mirror of
https://github.com/golang/go
synced 2024-11-21 22:44:40 -07:00
cmd/internal/obj/arm64: optimize ADRP+ADD+LD/ST to ADRP+LD/ST(offset)
This CL optimizes the sequence of instructions ADRP+ADD+LD/ST to the sequence of ADRP+LD/ST(offset). This saves an ADD instruction. The test result of compilecmp: name old text-bytes new text-bytes delta HelloSize 763kB ± 0% 755kB ± 0% -1.06% (p=0.000 n=20+20) name old data-bytes new data-bytes delta HelloSize 13.5kB ± 0% 13.5kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 227kB ± 0% 227kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.33MB ± 0% 1.33MB ± 0% -0.02% (p=0.000 n=20+20) file before after Δ % addr2line 3760392 3759504 -888 -0.024% api 5361511 5295351 -66160 -1.234% asm 5014157 4948674 -65483 -1.306% buildid 2579949 2579485 -464 -0.018% cgo 4492817 4491737 -1080 -0.024% compile 23359229 23156074 -203155 -0.870% cover 4823337 4756937 -66400 -1.377% dist 3332850 3331794 -1056 -0.032% doc 3902649 3836745 -65904 -1.689% fix 3269708 3268828 -880 -0.027% link 6510760 6443496 -67264 -1.033% nm 3670740 3604348 -66392 -1.809% objdump 4069599 4068967 -632 -0.016% pack 2374824 2374208 -616 -0.026% pprof 13874860 13805700 -69160 -0.498% test2json 2599210 2598530 -680 -0.026% trace 13231640 13162872 -68768 -0.520% vet 7360899 7292267 -68632 -0.932% total 113589131 112775517 -813614 -0.716% Change-Id: Ie1cf277e149ddd3f352d05fa0753d0ced7e0b894 Reviewed-on: https://go-review.googlesource.com/c/go/+/444715 Run-TryBot: Eric Fang <eric.fang@arm.com> Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
parent
7f255ba065
commit
537c4354cb
@ -37,6 +37,7 @@ import (
|
||||
"log"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ctxt7 holds state while assembling a single function.
|
||||
@ -73,7 +74,7 @@ type Optab struct {
|
||||
a3 uint8
|
||||
a4 uint8
|
||||
type_ int8
|
||||
size int8
|
||||
size_ int8 // the value of this field is not static, use the size() method to return the value
|
||||
param int16
|
||||
flag int8
|
||||
scond uint16
|
||||
@ -1021,6 +1022,27 @@ func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int {
|
||||
return int(-pc & (alignedValue - 1))
|
||||
}
|
||||
|
||||
// size returns the size of the sequence of machine instructions when p is encoded with o.
|
||||
// Usually it just returns o.size directly, in some cases it checks whether the optimization
|
||||
// conditions are met, and if so returns the size of the optimized instruction sequence.
|
||||
// These optimizations need to be synchronized with the asmout function.
|
||||
func (o *Optab) size(ctxt *obj.Link, p *obj.Prog) int {
|
||||
// Optimize adrp+add+ld/st to adrp+ld/st(offset).
|
||||
sz := movesize(p.As)
|
||||
if sz != -1 {
|
||||
// Relocations R_AARCH64_LDST{64,32,16,8}_ABS_LO12_NC can only generate 8-byte, 4-byte,
|
||||
// 2-byte and 1-byte aligned addresses, so the address of load/store must be aligned.
|
||||
// Also symbols with prefix of "go:string." are Go strings, which will go into
|
||||
// the symbol table, their addresses are not necessary aligned, rule this out.
|
||||
align := int64(1 << sz)
|
||||
if o.a1 == C_ADDR && p.From.Offset%align == 0 && !strings.HasPrefix(p.From.Sym.Name, "go:string.") ||
|
||||
o.a4 == C_ADDR && p.To.Offset%align == 0 && !strings.HasPrefix(p.To.Sym.Name, "go:string.") {
|
||||
return 8
|
||||
}
|
||||
}
|
||||
return int(o.size_)
|
||||
}
|
||||
|
||||
func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
if ctxt.Retpoline {
|
||||
ctxt.Diag("-spectre=ret not supported on arm64")
|
||||
@ -1050,7 +1072,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
}
|
||||
p.Pc = pc
|
||||
o = c.oplook(p)
|
||||
m = int(o.size)
|
||||
m = o.size(c.ctxt, p)
|
||||
if m == 0 {
|
||||
switch p.As {
|
||||
case obj.APCALIGN:
|
||||
@ -1131,7 +1153,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
bflag = 1
|
||||
}
|
||||
}
|
||||
m = int(o.size)
|
||||
m = o.size(c.ctxt, p)
|
||||
|
||||
if m == 0 {
|
||||
switch p.As {
|
||||
@ -1176,8 +1198,9 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
psz += 4
|
||||
}
|
||||
|
||||
if int(o.size) > 4*len(out) {
|
||||
log.Fatalf("out array in span7 is too small, need at least %d for %v", o.size/4, p)
|
||||
sz := o.size(c.ctxt, p)
|
||||
if sz > 4*len(out) {
|
||||
log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p)
|
||||
}
|
||||
if p.As == obj.APCALIGN {
|
||||
alignedValue := p.From.Offset
|
||||
@ -1190,7 +1213,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
}
|
||||
} else {
|
||||
c.asmout(p, o, out[:])
|
||||
for i = 0; i < int(o.size/4); i++ {
|
||||
for i = 0; i < sz/4; i++ {
|
||||
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
|
||||
bp = bp[4:]
|
||||
psz += 4
|
||||
@ -1238,7 +1261,7 @@ func (c *ctxt7) isRestartable(p *obj.Prog) bool {
|
||||
// If p doesn't use REGTMP, it can be simply preempted, so we don't
|
||||
// mark it.
|
||||
o := c.oplook(p)
|
||||
return o.size > 4 && o.flag&NOTUSETMP == 0
|
||||
return o.size(c.ctxt, p) > 4 && o.flag&NOTUSETMP == 0
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3414,7 +3437,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
op = int32(c.opirr(p, AADD))
|
||||
}
|
||||
|
||||
if int(o.size) == 8 {
|
||||
if int(o.size(c.ctxt, p)) == 8 {
|
||||
// NOTE: this case does not use REGTMP. If it ever does,
|
||||
// remove the NOTUSETMP flag in optab.
|
||||
o1 = c.oaddi(p, op, v&0xfff000, r, rt)
|
||||
@ -4460,31 +4483,43 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
o2 |= uint32(r&31) << 5
|
||||
o2 |= uint32(rt & 31)
|
||||
|
||||
/* reloc ops */
|
||||
case 64: /* movT R,addr -> adrp + add + movT R, (REGTMP) */
|
||||
/* reloc ops */
|
||||
case 64: /* movT R,addr -> adrp + movT R, (REGTMP) */
|
||||
if p.From.Reg == REGTMP {
|
||||
c.ctxt.Diag("cannot use REGTMP as source: %v\n", p)
|
||||
}
|
||||
o1 = ADR(1, 0, REGTMP)
|
||||
o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31
|
||||
rel := obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc)
|
||||
rel.Siz = 8
|
||||
rel.Sym = p.To.Sym
|
||||
rel.Add = p.To.Offset
|
||||
rel.Type = objabi.R_ADDRARM64
|
||||
o3 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg))
|
||||
// For unaligned access, fall back to adrp + add + movT R, (REGTMP).
|
||||
if o.size(c.ctxt, p) != 8 {
|
||||
o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31
|
||||
o3 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg))
|
||||
rel.Type = objabi.R_ADDRARM64
|
||||
break
|
||||
}
|
||||
o2 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg))
|
||||
rel.Type = c.addrRelocType(p)
|
||||
|
||||
case 65: /* movT addr,R -> adrp + add + movT (REGTMP), R */
|
||||
case 65: /* movT addr,R -> adrp + movT (REGTMP), R */
|
||||
o1 = ADR(1, 0, REGTMP)
|
||||
o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31
|
||||
rel := obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc)
|
||||
rel.Siz = 8
|
||||
rel.Sym = p.From.Sym
|
||||
rel.Add = p.From.Offset
|
||||
rel.Type = objabi.R_ADDRARM64
|
||||
o3 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg))
|
||||
// For unaligned access, fall back to adrp + add + movT (REGTMP), R.
|
||||
if o.size(c.ctxt, p) != 8 {
|
||||
o2 = c.opirr(p, AADD) | REGTMP&31<<5 | REGTMP&31
|
||||
o3 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg))
|
||||
rel.Type = objabi.R_ADDRARM64
|
||||
break
|
||||
}
|
||||
o2 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg))
|
||||
rel.Type = c.addrRelocType(p)
|
||||
|
||||
case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */
|
||||
v := int32(c.regoff(&p.From))
|
||||
@ -5676,6 +5711,22 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
out[4] = o5
|
||||
}
|
||||
|
||||
func (c *ctxt7) addrRelocType(p *obj.Prog) objabi.RelocType {
|
||||
switch movesize(p.As) {
|
||||
case 0:
|
||||
return objabi.R_ARM64_PCREL_LDST8
|
||||
case 1:
|
||||
return objabi.R_ARM64_PCREL_LDST16
|
||||
case 2:
|
||||
return objabi.R_ARM64_PCREL_LDST32
|
||||
case 3:
|
||||
return objabi.R_ARM64_PCREL_LDST64
|
||||
default:
|
||||
c.ctxt.Diag("use R_ADDRARM64 relocation type for: %v\n", p)
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
/*
|
||||
* basic Rm op Rn -> Rd (using shifted register with 0)
|
||||
* also op Rn -> Rt
|
||||
|
@ -154,6 +154,22 @@ const (
|
||||
// adrp followed by another add instruction.
|
||||
R_ARM64_PCREL
|
||||
|
||||
// R_ARM64_PCREL_LDST8 resolves a PC-relative addresses instruction sequence, usually an
|
||||
// adrp followed by a LD8 or ST8 instruction.
|
||||
R_ARM64_PCREL_LDST8
|
||||
|
||||
// R_ARM64_PCREL_LDST16 resolves a PC-relative addresses instruction sequence, usually an
|
||||
// adrp followed by a LD16 or ST16 instruction.
|
||||
R_ARM64_PCREL_LDST16
|
||||
|
||||
// R_ARM64_PCREL_LDST32 resolves a PC-relative addresses instruction sequence, usually an
|
||||
// adrp followed by a LD32 or ST32 instruction.
|
||||
R_ARM64_PCREL_LDST32
|
||||
|
||||
// R_ARM64_PCREL_LDST64 resolves a PC-relative addresses instruction sequence, usually an
|
||||
// adrp followed by a LD64 or ST64 instruction.
|
||||
R_ARM64_PCREL_LDST64
|
||||
|
||||
// R_ARM64_LDST8 sets a LD/ST immediate value to bits [11:0] of a local address.
|
||||
R_ARM64_LDST8
|
||||
|
||||
|
@ -45,42 +45,46 @@ func _() {
|
||||
_ = x[R_ARM64_GOTPCREL-35]
|
||||
_ = x[R_ARM64_GOT-36]
|
||||
_ = x[R_ARM64_PCREL-37]
|
||||
_ = x[R_ARM64_LDST8-38]
|
||||
_ = x[R_ARM64_LDST16-39]
|
||||
_ = x[R_ARM64_LDST32-40]
|
||||
_ = x[R_ARM64_LDST64-41]
|
||||
_ = x[R_ARM64_LDST128-42]
|
||||
_ = x[R_POWER_TLS_LE-43]
|
||||
_ = x[R_POWER_TLS_IE-44]
|
||||
_ = x[R_POWER_TLS-45]
|
||||
_ = x[R_ADDRPOWER_DS-46]
|
||||
_ = x[R_ADDRPOWER_GOT-47]
|
||||
_ = x[R_ADDRPOWER_PCREL-48]
|
||||
_ = x[R_ADDRPOWER_TOCREL-49]
|
||||
_ = x[R_ADDRPOWER_TOCREL_DS-50]
|
||||
_ = x[R_RISCV_CALL-51]
|
||||
_ = x[R_RISCV_CALL_TRAMP-52]
|
||||
_ = x[R_RISCV_PCREL_ITYPE-53]
|
||||
_ = x[R_RISCV_PCREL_STYPE-54]
|
||||
_ = x[R_RISCV_TLS_IE_ITYPE-55]
|
||||
_ = x[R_RISCV_TLS_IE_STYPE-56]
|
||||
_ = x[R_PCRELDBL-57]
|
||||
_ = x[R_ADDRLOONG64-58]
|
||||
_ = x[R_ADDRLOONG64U-59]
|
||||
_ = x[R_ADDRLOONG64TLS-60]
|
||||
_ = x[R_ADDRLOONG64TLSU-61]
|
||||
_ = x[R_CALLLOONG64-62]
|
||||
_ = x[R_JMPLOONG64-63]
|
||||
_ = x[R_ADDRMIPSU-64]
|
||||
_ = x[R_ADDRMIPSTLS-65]
|
||||
_ = x[R_ADDRCUOFF-66]
|
||||
_ = x[R_WASMIMPORT-67]
|
||||
_ = x[R_XCOFFREF-68]
|
||||
_ = x[R_ARM64_PCREL_LDST8-38]
|
||||
_ = x[R_ARM64_PCREL_LDST16-39]
|
||||
_ = x[R_ARM64_PCREL_LDST32-40]
|
||||
_ = x[R_ARM64_PCREL_LDST64-41]
|
||||
_ = x[R_ARM64_LDST8-42]
|
||||
_ = x[R_ARM64_LDST16-43]
|
||||
_ = x[R_ARM64_LDST32-44]
|
||||
_ = x[R_ARM64_LDST64-45]
|
||||
_ = x[R_ARM64_LDST128-46]
|
||||
_ = x[R_POWER_TLS_LE-47]
|
||||
_ = x[R_POWER_TLS_IE-48]
|
||||
_ = x[R_POWER_TLS-49]
|
||||
_ = x[R_ADDRPOWER_DS-50]
|
||||
_ = x[R_ADDRPOWER_GOT-51]
|
||||
_ = x[R_ADDRPOWER_PCREL-52]
|
||||
_ = x[R_ADDRPOWER_TOCREL-53]
|
||||
_ = x[R_ADDRPOWER_TOCREL_DS-54]
|
||||
_ = x[R_RISCV_CALL-55]
|
||||
_ = x[R_RISCV_CALL_TRAMP-56]
|
||||
_ = x[R_RISCV_PCREL_ITYPE-57]
|
||||
_ = x[R_RISCV_PCREL_STYPE-58]
|
||||
_ = x[R_RISCV_TLS_IE_ITYPE-59]
|
||||
_ = x[R_RISCV_TLS_IE_STYPE-60]
|
||||
_ = x[R_PCRELDBL-61]
|
||||
_ = x[R_ADDRLOONG64-62]
|
||||
_ = x[R_ADDRLOONG64U-63]
|
||||
_ = x[R_ADDRLOONG64TLS-64]
|
||||
_ = x[R_ADDRLOONG64TLSU-65]
|
||||
_ = x[R_CALLLOONG64-66]
|
||||
_ = x[R_JMPLOONG64-67]
|
||||
_ = x[R_ADDRMIPSU-68]
|
||||
_ = x[R_ADDRMIPSTLS-69]
|
||||
_ = x[R_ADDRCUOFF-70]
|
||||
_ = x[R_WASMIMPORT-71]
|
||||
_ = x[R_XCOFFREF-72]
|
||||
}
|
||||
|
||||
const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF"
|
||||
const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF"
|
||||
|
||||
var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 388, 402, 416, 430, 445, 459, 473, 484, 498, 513, 530, 548, 569, 581, 599, 618, 637, 657, 677, 687, 700, 714, 730, 747, 760, 772, 783, 796, 807, 819, 829}
|
||||
var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 577, 592, 609, 627, 648, 660, 678, 697, 716, 736, 756, 766, 779, 793, 809, 826, 839, 851, 862, 875, 886, 898, 908}
|
||||
|
||||
func (i RelocType) String() string {
|
||||
i -= 1
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Code generated by "stringer -type=SymKind symkind.go"; DO NOT EDIT.
|
||||
// Code generated by "stringer -type=SymKind"; DO NOT EDIT.
|
||||
|
||||
package objabi
|
||||
|
||||
|
@ -467,6 +467,28 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
|
||||
out.Write64(uint64(r.Xadd))
|
||||
out.Write64(uint64(sectoff + 4))
|
||||
out.Write64(uint64(elf.R_AARCH64_ADD_ABS_LO12_NC) | uint64(elfsym)<<32)
|
||||
|
||||
case objabi.R_ARM64_PCREL_LDST8,
|
||||
objabi.R_ARM64_PCREL_LDST16,
|
||||
objabi.R_ARM64_PCREL_LDST32,
|
||||
objabi.R_ARM64_PCREL_LDST64:
|
||||
// two relocations: R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_LDST{64/32/16/8}_ABS_LO12_NC
|
||||
out.Write64(uint64(elf.R_AARCH64_ADR_PREL_PG_HI21) | uint64(elfsym)<<32)
|
||||
out.Write64(uint64(r.Xadd))
|
||||
out.Write64(uint64(sectoff + 4))
|
||||
var ldstType elf.R_AARCH64
|
||||
switch r.Type {
|
||||
case objabi.R_ARM64_PCREL_LDST8:
|
||||
ldstType = elf.R_AARCH64_LDST8_ABS_LO12_NC
|
||||
case objabi.R_ARM64_PCREL_LDST16:
|
||||
ldstType = elf.R_AARCH64_LDST16_ABS_LO12_NC
|
||||
case objabi.R_ARM64_PCREL_LDST32:
|
||||
ldstType = elf.R_AARCH64_LDST32_ABS_LO12_NC
|
||||
case objabi.R_ARM64_PCREL_LDST64:
|
||||
ldstType = elf.R_AARCH64_LDST64_ABS_LO12_NC
|
||||
}
|
||||
out.Write64(uint64(ldstType) | uint64(elfsym)<<32)
|
||||
|
||||
case objabi.R_ARM64_TLS_LE:
|
||||
out.Write64(uint64(elf.R_AARCH64_TLSLE_MOVW_TPREL_G0) | uint64(elfsym)<<32)
|
||||
case objabi.R_ARM64_TLS_IE:
|
||||
@ -516,7 +538,10 @@ func machoreloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sy
|
||||
}
|
||||
}
|
||||
|
||||
if ldr.SymType(rs) == sym.SHOSTOBJ || rt == objabi.R_CALLARM64 || rt == objabi.R_ADDRARM64 || rt == objabi.R_ARM64_GOTPCREL {
|
||||
if ldr.SymType(rs) == sym.SHOSTOBJ || rt == objabi.R_CALLARM64 ||
|
||||
rt == objabi.R_ARM64_PCREL_LDST8 || rt == objabi.R_ARM64_PCREL_LDST16 ||
|
||||
rt == objabi.R_ARM64_PCREL_LDST32 || rt == objabi.R_ARM64_PCREL_LDST64 ||
|
||||
rt == objabi.R_ADDRARM64 || rt == objabi.R_ARM64_GOTPCREL {
|
||||
if ldr.SymDynid(rs) < 0 {
|
||||
ldr.Errorf(s, "reloc %d (%s) to non-macho symbol %s type=%d (%s)", rt, sym.RelocName(arch, rt), ldr.SymName(rs), ldr.SymType(rs), ldr.SymType(rs))
|
||||
return false
|
||||
@ -545,7 +570,11 @@ func machoreloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sy
|
||||
|
||||
v |= 1 << 24 // pc-relative bit
|
||||
v |= ld.MACHO_ARM64_RELOC_BRANCH26 << 28
|
||||
case objabi.R_ADDRARM64:
|
||||
case objabi.R_ADDRARM64,
|
||||
objabi.R_ARM64_PCREL_LDST8,
|
||||
objabi.R_ARM64_PCREL_LDST16,
|
||||
objabi.R_ARM64_PCREL_LDST32,
|
||||
objabi.R_ARM64_PCREL_LDST64:
|
||||
siz = 4
|
||||
// Two relocation entries: MACHO_ARM64_RELOC_PAGEOFF12 MACHO_ARM64_RELOC_PAGE21
|
||||
// if r.Xadd is non-zero, add two MACHO_ARM64_RELOC_ADDEND.
|
||||
@ -601,7 +630,8 @@ func pereloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
|
||||
rs := r.Xsym
|
||||
rt := r.Type
|
||||
|
||||
if rt == objabi.R_ADDRARM64 && r.Xadd != signext21(r.Xadd) {
|
||||
if (rt == objabi.R_ADDRARM64 || rt == objabi.R_ARM64_PCREL_LDST8 || rt == objabi.R_ARM64_PCREL_LDST16 ||
|
||||
rt == objabi.R_ARM64_PCREL_LDST32 || rt == objabi.R_ARM64_PCREL_LDST64) && r.Xadd != signext21(r.Xadd) {
|
||||
// If the relocation target would overflow the addend, then target
|
||||
// a linker-manufactured label symbol with a smaller addend instead.
|
||||
label := ldr.Lookup(offsetLabelName(ldr, rs, r.Xadd/peRelocLimit*peRelocLimit), ldr.SymVersion(rs))
|
||||
@ -653,6 +683,19 @@ func pereloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
|
||||
out.Write32(uint32(symdynid))
|
||||
out.Write16(ld.IMAGE_REL_ARM64_PAGEOFFSET_12A)
|
||||
|
||||
case objabi.R_ARM64_PCREL_LDST8,
|
||||
objabi.R_ARM64_PCREL_LDST16,
|
||||
objabi.R_ARM64_PCREL_LDST32,
|
||||
objabi.R_ARM64_PCREL_LDST64:
|
||||
// Note: r.Xadd has been taken care of below, in archreloc.
|
||||
out.Write32(uint32(sectoff))
|
||||
out.Write32(uint32(symdynid))
|
||||
out.Write16(ld.IMAGE_REL_ARM64_PAGEBASE_REL21)
|
||||
|
||||
out.Write32(uint32(sectoff + 4))
|
||||
out.Write32(uint32(symdynid))
|
||||
out.Write16(ld.IMAGE_REL_ARM64_PAGEOFFSET_12L)
|
||||
|
||||
case objabi.R_CALLARM64:
|
||||
// Note: r.Xadd has been taken care of above, by using a label pointing into the middle of the function.
|
||||
out.Write32(uint32(sectoff))
|
||||
@ -674,6 +717,10 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
switch rt := r.Type(); rt {
|
||||
default:
|
||||
case objabi.R_ARM64_GOTPCREL,
|
||||
objabi.R_ARM64_PCREL_LDST8,
|
||||
objabi.R_ARM64_PCREL_LDST16,
|
||||
objabi.R_ARM64_PCREL_LDST32,
|
||||
objabi.R_ARM64_PCREL_LDST64,
|
||||
objabi.R_ADDRARM64:
|
||||
|
||||
// set up addend for eventual relocation via outer symbol.
|
||||
@ -700,17 +747,35 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
}
|
||||
|
||||
// The first instruction (ADRP) has a 21-bit immediate field,
|
||||
// and the second (ADD) has a 12-bit immediate field.
|
||||
// and the second (ADD or LD/ST) has a 12-bit immediate field.
|
||||
// The first instruction is only for high bits, but to get the carry bits right we have
|
||||
// to put the full addend, including the bottom 12 bits again.
|
||||
// That limits the distance of any addend to only 21 bits.
|
||||
// But we assume that LDRP's top bit will be interpreted as a sign bit,
|
||||
// But we assume that ADRP's top bit will be interpreted as a sign bit,
|
||||
// so we only use 20 bits.
|
||||
// pereloc takes care of introducing new symbol labels
|
||||
// every megabyte for longer relocations.
|
||||
xadd := uint32(xadd)
|
||||
o0 |= (xadd&3)<<29 | (xadd&0xffffc)<<3
|
||||
o1 |= (xadd & 0xfff) << 10
|
||||
switch rt {
|
||||
case objabi.R_ARM64_PCREL_LDST8, objabi.R_ADDRARM64:
|
||||
o1 |= (xadd & 0xfff) << 10
|
||||
case objabi.R_ARM64_PCREL_LDST16:
|
||||
if xadd&0x1 != 0 {
|
||||
ldr.Errorf(s, "offset for 16-bit load/store has unaligned value %d", xadd&0xfff)
|
||||
}
|
||||
o1 |= ((xadd & 0xfff) >> 1) << 10
|
||||
case objabi.R_ARM64_PCREL_LDST32:
|
||||
if xadd&0x3 != 0 {
|
||||
ldr.Errorf(s, "offset for 32-bit load/store has unaligned value %d", xadd&0xfff)
|
||||
}
|
||||
o1 |= ((xadd & 0xfff) >> 2) << 10
|
||||
case objabi.R_ARM64_PCREL_LDST64:
|
||||
if xadd&0x7 != 0 {
|
||||
ldr.Errorf(s, "offset for 64-bit load/store has unaligned value %d", xadd&0xfff)
|
||||
}
|
||||
o1 |= ((xadd & 0xfff) >> 3) << 10
|
||||
}
|
||||
|
||||
if target.IsBigEndian() {
|
||||
val = int64(o0)<<32 | int64(o1)
|
||||
@ -750,8 +815,12 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
}
|
||||
}
|
||||
|
||||
switch r.Type() {
|
||||
case objabi.R_ADDRARM64:
|
||||
switch rt := r.Type(); rt {
|
||||
case objabi.R_ADDRARM64,
|
||||
objabi.R_ARM64_PCREL_LDST8,
|
||||
objabi.R_ARM64_PCREL_LDST16,
|
||||
objabi.R_ARM64_PCREL_LDST32,
|
||||
objabi.R_ARM64_PCREL_LDST64:
|
||||
t := ldr.SymAddr(rs) + r.Add() - ((ldr.SymValue(s) + int64(r.Off())) &^ 0xfff)
|
||||
if t >= 1<<32 || t < -1<<32 {
|
||||
ldr.Errorf(s, "program too large, address relocation distance = %d", t)
|
||||
@ -768,7 +837,25 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
}
|
||||
|
||||
o0 |= (uint32((t>>12)&3) << 29) | (uint32((t>>12>>2)&0x7ffff) << 5)
|
||||
o1 |= uint32(t&0xfff) << 10
|
||||
switch rt {
|
||||
case objabi.R_ARM64_PCREL_LDST8, objabi.R_ADDRARM64:
|
||||
o1 |= uint32(t&0xfff) << 10
|
||||
case objabi.R_ARM64_PCREL_LDST16:
|
||||
if t&0x1 != 0 {
|
||||
ldr.Errorf(s, "offset for 16-bit load/store has unaligned value %d", t&0xfff)
|
||||
}
|
||||
o1 |= (uint32(t&0xfff) >> 1) << 10
|
||||
case objabi.R_ARM64_PCREL_LDST32:
|
||||
if t&0x3 != 0 {
|
||||
ldr.Errorf(s, "offset for 32-bit load/store has unaligned value %d", t&0xfff)
|
||||
}
|
||||
o1 |= (uint32(t&0xfff) >> 2) << 10
|
||||
case objabi.R_ARM64_PCREL_LDST64:
|
||||
if t&0x7 != 0 {
|
||||
ldr.Errorf(s, "offset for 64-bit load/store has unaligned value %d", t&0xfff)
|
||||
}
|
||||
o1 |= (uint32(t&0xfff) >> 3) << 10
|
||||
}
|
||||
|
||||
// when laid out, the instruction order must always be o1, o2.
|
||||
if target.IsBigEndian() {
|
||||
@ -952,6 +1039,10 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant
|
||||
func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) {
|
||||
switch rt := r.Type(); rt {
|
||||
case objabi.R_ARM64_GOTPCREL,
|
||||
objabi.R_ARM64_PCREL_LDST8,
|
||||
objabi.R_ARM64_PCREL_LDST16,
|
||||
objabi.R_ARM64_PCREL_LDST32,
|
||||
objabi.R_ARM64_PCREL_LDST64,
|
||||
objabi.R_ADDRARM64:
|
||||
rr := ld.ExtrelocViaOuterSym(ldr, r, s)
|
||||
return rr, true
|
||||
|
Loading…
Reference in New Issue
Block a user