From 3601a1e77d7fdd4c98c3dbd6f1bfa0028bc24fdd Mon Sep 17 00:00:00 2001 From: David Crawshaw Date: Mon, 29 Feb 2016 13:07:50 -0900 Subject: [PATCH] cmd/link: write DWARF PC table without seeking This per-symbol table was written with the strategy: 1. record offset and write fake header 2. write body 3. seek back to fake header 4. write real header This CL collects the per-symbol body into a []byte, then writes the real header followed by the body to the output file. This saves two seeks per-symbol and overwriting the fake header. Small performance improvement (3.5%) in best-of-ten links of godoc: tip: real 0m1.132s user 0m1.256s this: real 0m1.090s user 0m1.210s I'm not sure if the performance measured here alone justifies it, but I think this is an easier to read style of code. Change-Id: I1663901eb7c2ee330591b8b6550cdff0402ed5dc Reviewed-on: https://go-review.googlesource.com/20074 Reviewed-by: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/cmd/link/internal/ld/dwarf.go | 143 ++++++++++++------------------ 1 file changed, 58 insertions(+), 85 deletions(-) diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go index 200f4ccd9e..d26689b9ba 100644 --- a/src/cmd/link/internal/ld/dwarf.go +++ b/src/cmd/link/internal/ld/dwarf.go @@ -111,64 +111,47 @@ func addrput(addr int64) { } } -func uleb128enc(v uint64, dst []byte) int { - var c uint8 - - length := uint8(0) +func appendUleb128(b []byte, v uint64) []byte { for { - c = uint8(v & 0x7f) + c := uint8(v & 0x7f) v >>= 7 if v != 0 { c |= 0x80 } - if dst != nil { - dst[0] = byte(c) - dst = dst[1:] - } - length++ + b = append(b, c) if c&0x80 == 0 { break } } - - return int(length) + return b } -func sleb128enc(v int64, dst []byte) int { - var c uint8 - var s uint8 - - length := uint8(0) +func appendSleb128(b []byte, v int64) []byte { for { - c = uint8(v & 0x7f) - s = uint8(v & 0x40) + c := uint8(v & 0x7f) + s := uint8(v & 0x40) v >>= 7 if (v != -1 || s == 0) && (v != 0 || s != 0) { c |= 0x80 } - if dst != nil { - dst[0] = byte(c) - dst = dst[1:] - } - length++ + b = append(b, c) if c&0x80 == 0 { break } } - - return int(length) + return b } var encbuf [10]byte func uleb128put(v int64) { - n := uleb128enc(uint64(v), encbuf[:]) - Cwrite(encbuf[:n]) + b := appendUleb128(encbuf[:0], uint64(v)) + Cwrite(b) } func sleb128put(v int64) { - n := sleb128enc(v, encbuf[:]) - Cwrite(encbuf[:n]) + b := appendSleb128(encbuf[:0], v) + Cwrite(b) } /* @@ -892,12 +875,9 @@ func reversetree(list **DWDie) { func newmemberoffsetattr(die *DWDie, offs int32) { var block [20]byte - - i := 0 - block[i] = DW_OP_plus_uconst - i++ - i += uleb128enc(uint64(offs), block[i:]) - newattr(die, DW_AT_data_member_location, DW_CLS_BLOCK, int64(i), block[:i]) + b := append(block[:0], DW_OP_plus_uconst) + b = appendUleb128(b, uint64(offs)) + newattr(die, DW_AT_data_member_location, DW_CLS_BLOCK, int64(len(b)), b) } // GDB doesn't like DW_FORM_addr for DW_AT_location, so emit a @@ -1462,20 +1442,15 @@ func putpclcdelta(delta_pc int64, delta_lc int64) { func newcfaoffsetattr(die *DWDie, offs int32) { var block [20]byte + b := append(block[:0], DW_OP_call_frame_cfa) - i := 0 - - block[i] = DW_OP_call_frame_cfa - i++ if offs != 0 { - block[i] = DW_OP_consts - i++ - i += sleb128enc(int64(offs), block[i:]) - block[i] = DW_OP_plus - i++ + b = append(b, DW_OP_consts) + b = appendSleb128(b, int64(offs)) + b = append(b, DW_OP_plus) } - newattr(die, DW_AT_location, DW_CLS_BLOCK, int64(i), block[:i]) + newattr(die, DW_AT_location, DW_CLS_BLOCK, int64(len(b)), b) } func mkvarname(name string, da int) string { @@ -1719,22 +1694,25 @@ const ( DATAALIGNMENTFACTOR = -4 ) -func putpccfadelta(deltapc int64, cfa int64) { - Cput(DW_CFA_def_cfa_offset_sf) - sleb128put(cfa / DATAALIGNMENTFACTOR) +// appendPCDeltaCFA appends per-PC CFA deltas to b and returns the final slice. +func appendPCDeltaCFA(b []byte, deltapc, cfa int64) []byte { + b = append(b, DW_CFA_def_cfa_offset_sf) + b = appendSleb128(b, cfa/DATAALIGNMENTFACTOR) - if deltapc < 0x40 { - Cput(uint8(DW_CFA_advance_loc + deltapc)) - } else if deltapc < 0x100 { - Cput(DW_CFA_advance_loc1) - Cput(uint8(deltapc)) - } else if deltapc < 0x10000 { - Cput(DW_CFA_advance_loc2) - Thearch.Wput(uint16(deltapc)) - } else { - Cput(DW_CFA_advance_loc4) - Thearch.Lput(uint32(deltapc)) + switch { + case deltapc < 0x40: + b = append(b, uint8(DW_CFA_advance_loc+deltapc)) + case deltapc < 0x100: + b = append(b, DW_CFA_advance_loc1) + b = append(b, uint8(deltapc)) + case deltapc < 0x10000: + b = append(b, DW_CFA_advance_loc2) + b = Thearch.Append16(b, uint16(deltapc)) + default: + b = append(b, DW_CFA_advance_loc4) + b = Thearch.Append32(b, uint32(deltapc)) } + return b } func writeframes() { @@ -1779,6 +1757,7 @@ func writeframes() { strnput("", int(pad)) + var deltaBuf []byte var pcsp Pciter for Ctxt.Cursym = Ctxt.Textp; Ctxt.Cursym != nil; Ctxt.Cursym = Ctxt.Cursym.Next { s := Ctxt.Cursym @@ -1786,14 +1765,9 @@ func writeframes() { continue } - fdeo := Cpos() - - // Emit a FDE, Section 6.4.1, starting wit a placeholder. - Thearch.Lput(0) // length, must be multiple of thearch.ptrsize - Thearch.Lput(0) // Pointer to the CIE above, at offset 0 - addrput(0) // initial location - addrput(0) // address range - + // Emit a FDE, Section 6.4.1. + // First build the section contents into a byte buffer. + deltaBuf = deltaBuf[:0] for pciterinit(Ctxt, &pcsp, &s.Pcln.Pcsp); pcsp.done == 0; pciternext(&pcsp) { nextpc := pcsp.nextpc @@ -1807,31 +1781,30 @@ func writeframes() { } if haslinkregister() { - putpccfadelta(int64(nextpc)-int64(pcsp.pc), int64(pcsp.value)) + deltaBuf = appendPCDeltaCFA(deltaBuf, int64(nextpc)-int64(pcsp.pc), int64(pcsp.value)) } else { - putpccfadelta(int64(nextpc)-int64(pcsp.pc), int64(Thearch.Ptrsize)+int64(pcsp.value)) + deltaBuf = appendPCDeltaCFA(deltaBuf, int64(nextpc)-int64(pcsp.pc), int64(Thearch.Ptrsize)+int64(pcsp.value)) } } + pad := int(Rnd(int64(len(deltaBuf)), int64(Thearch.Ptrsize))) - len(deltaBuf) + deltaBuf = append(deltaBuf, zeros[:pad]...) - fdesize := Cpos() - fdeo - 4 // exclude the length field. - pad = Rnd(fdesize, int64(Thearch.Ptrsize)) - fdesize - strnput("", int(pad)) - fdesize += pad - - // Emit the FDE header for real, Section 6.4.1. - Cseek(fdeo) - - Thearch.Lput(uint32(fdesize)) + // Emit the FDE header, Section 6.4.1. + // 4 bytes: length, must be multiple of thearch.ptrsize + // 4 bytes: Pointer to the CIE above, at offset 0 + // ptrsize: initial location + // ptrsize: address range + Thearch.Lput(uint32(4 + 2*Thearch.Ptrsize + len(deltaBuf))) // length (excludes itself) if Linkmode == LinkExternal { - adddwarfrel(framesec, framesym, frameo, 4, 0) - adddwarfrel(framesec, s, frameo, Thearch.Ptrsize, 0) + adddwarfrel(framesec, framesym, frameo, 4, 0) // CIE offset + adddwarfrel(framesec, s, frameo, Thearch.Ptrsize, 0) // initial location } else { - Thearch.Lput(0) - addrput(s.Value) + Thearch.Lput(0) // CIE offset + addrput(s.Value) // initial location } + addrput(s.Size) // address range - addrput(s.Size) - Cseek(fdeo + 4 + fdesize) + Cwrite(deltaBuf) } Cflush()