mirror of
https://github.com/golang/go
synced 2024-11-18 00:54:45 -07:00
cmd/internal/obj/arm64, cmd/link: use two instructions rather than three for loads from memory
Reduces size of godoc .text section by about 75k (or 1.4%). Change-Id: I65850aa569aefbddd6cb07c6ae1addcc39cab6a5 Reviewed-on: https://go-review.googlesource.com/13993 Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
a7d331b368
commit
3a9bc571b0
@ -260,16 +260,16 @@ var optab = []Optab{
|
||||
{AMOVW, C_VCONADDR, C_NONE, C_REG, 68, 8, 0, 0, 0},
|
||||
{AMOVD, C_VCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
|
||||
{AMOVD, C_VCONADDR, C_NONE, C_REG, 68, 8, 0, 0, 0},
|
||||
{AMOVB, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AMOVBU, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AMOVH, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AMOVW, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AMOVD, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AMOVB, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
|
||||
{AMOVBU, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
|
||||
{AMOVH, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
|
||||
{AMOVW, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
|
||||
{AMOVD, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
|
||||
{AMOVB, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AMOVBU, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AMOVH, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AMOVW, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AMOVD, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AMOVB, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
|
||||
{AMOVBU, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
|
||||
{AMOVH, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
|
||||
{AMOVW, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
|
||||
{AMOVD, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
|
||||
{AMOVD, C_TLS_LE, C_NONE, C_REG, 69, 4, 0, 0, 0},
|
||||
{AMOVD, C_TLS_IE, C_NONE, C_REG, 70, 8, 0, 0, 0},
|
||||
{AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0},
|
||||
@ -450,10 +450,10 @@ var optab = []Optab{
|
||||
{AFMOVS, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVD, C_LAUTO, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVD, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AFMOVS, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
|
||||
{AFMOVD, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AFMOVS, C_ADDR, C_NONE, C_FREG, 65, 8, 0, 0, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
|
||||
{AFMOVD, C_ADDR, C_NONE, C_FREG, 65, 8, 0, 0, 0},
|
||||
{AFADDS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFADDS, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0},
|
||||
{AFADDS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0},
|
||||
@ -2704,25 +2704,23 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
|
||||
/* reloc ops */
|
||||
case 64: /* movT R,addr -> adrp + add + movT R, (REGTMP) */
|
||||
o1 = ADR(1, 0, REGTMP)
|
||||
o2 = opirr(ctxt, AADD) | REGTMP&31<<5 | REGTMP&31
|
||||
o2 = olsr12u(ctxt, int32(opstr12(ctxt, int(p.As))), 0, REGTMP, int(p.From.Reg))
|
||||
rel := obj.Addrel(ctxt.Cursym)
|
||||
rel.Off = int32(ctxt.Pc)
|
||||
rel.Siz = 8
|
||||
rel.Sym = p.To.Sym
|
||||
rel.Add = p.To.Offset
|
||||
rel.Type = obj.R_ADDRARM64
|
||||
o3 = olsr12u(ctxt, int32(opstr12(ctxt, int(p.As))), 0, REGTMP, int(p.From.Reg))
|
||||
rel.Type = movereloc(p.As)
|
||||
|
||||
case 65: /* movT addr,R -> adrp + add + movT (REGTMP), R */
|
||||
case 65: /* movT addr,R -> adrp REGTMP, 0; ldr R, [REGTMP, #0] + relocs */
|
||||
o1 = ADR(1, 0, REGTMP)
|
||||
o2 = opirr(ctxt, AADD) | REGTMP&31<<5 | REGTMP&31
|
||||
o2 = olsr12u(ctxt, int32(opldr12(ctxt, int(p.As))), 0, REGTMP, int(p.To.Reg))
|
||||
rel := obj.Addrel(ctxt.Cursym)
|
||||
rel.Off = int32(ctxt.Pc)
|
||||
rel.Siz = 8
|
||||
rel.Sym = p.From.Sym
|
||||
rel.Add = p.From.Offset
|
||||
rel.Type = obj.R_ADDRARM64
|
||||
o3 = olsr12u(ctxt, int32(opldr12(ctxt, int(p.As))), 0, REGTMP, int(p.To.Reg))
|
||||
rel.Siz = 8
|
||||
rel.Type = movereloc(p.As)
|
||||
|
||||
case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */
|
||||
v := int32(p.From.Offset)
|
||||
@ -4161,3 +4159,19 @@ func movesize(a int) int {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
func movereloc(a int16) int32 {
|
||||
switch movesize(int(a)) {
|
||||
case 0:
|
||||
return obj.R_ARM64_LOAD8
|
||||
case 1:
|
||||
return obj.R_ARM64_LOAD16
|
||||
case 2:
|
||||
return obj.R_ARM64_LOAD32
|
||||
case 3:
|
||||
return obj.R_ARM64_LOAD64
|
||||
case -1:
|
||||
panic("xxx")
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
@ -457,6 +457,11 @@ const (
|
||||
// referenced (thread local) symbol from the GOT.
|
||||
R_ARM64_TLS_IE
|
||||
|
||||
R_ARM64_LOAD8
|
||||
R_ARM64_LOAD16
|
||||
R_ARM64_LOAD32
|
||||
R_ARM64_LOAD64
|
||||
|
||||
// PPC64.
|
||||
|
||||
// R_POWER_TLS_LE is used to implement the "local exec" model for tls
|
||||
|
@ -82,6 +82,30 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int {
|
||||
ld.Thearch.Vput(uint64(sectoff + 4))
|
||||
ld.Thearch.Vput(ld.R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC | uint64(elfsym)<<32)
|
||||
|
||||
case obj.R_ARM64_LOAD8:
|
||||
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
|
||||
ld.Thearch.Vput(uint64(r.Xadd))
|
||||
ld.Thearch.Vput(uint64(sectoff + 4))
|
||||
ld.Thearch.Vput(ld.R_AARCH64_LDST8_ABS_LO12_NC | uint64(elfsym)<<32)
|
||||
|
||||
case obj.R_ARM64_LOAD16:
|
||||
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
|
||||
ld.Thearch.Vput(uint64(r.Xadd))
|
||||
ld.Thearch.Vput(uint64(sectoff + 4))
|
||||
ld.Thearch.Vput(ld.R_AARCH64_LDST16_ABS_LO12_NC | uint64(elfsym)<<32)
|
||||
|
||||
case obj.R_ARM64_LOAD32:
|
||||
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
|
||||
ld.Thearch.Vput(uint64(r.Xadd))
|
||||
ld.Thearch.Vput(uint64(sectoff + 4))
|
||||
ld.Thearch.Vput(ld.R_AARCH64_LDST32_ABS_LO12_NC | uint64(elfsym)<<32)
|
||||
|
||||
case obj.R_ARM64_LOAD64:
|
||||
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
|
||||
ld.Thearch.Vput(uint64(r.Xadd))
|
||||
ld.Thearch.Vput(uint64(sectoff + 4))
|
||||
ld.Thearch.Vput(ld.R_AARCH64_LDST64_ABS_LO12_NC | uint64(elfsym)<<32)
|
||||
|
||||
case obj.R_CALLARM64:
|
||||
if r.Siz != 4 {
|
||||
return -1
|
||||
@ -178,13 +202,72 @@ func machoreloc1(r *ld.Reloc, sectoff int64) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func archrelocaddr(r *ld.Reloc, s *ld.LSym, val *int64) int {
|
||||
var o1, o2 uint32
|
||||
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
|
||||
o1 = uint32(*val >> 32)
|
||||
o2 = uint32(*val)
|
||||
} else {
|
||||
o1 = uint32(*val)
|
||||
o2 = uint32(*val >> 32)
|
||||
}
|
||||
|
||||
// We are inserting an address into two instructions: adrp and
|
||||
// then either addi or a load.
|
||||
address := ld.Symaddr(r.Sym) + r.Add
|
||||
pgaddress := (address &^ 0xfff) - ((s.Value + int64(r.Off)) &^ 0xfff)
|
||||
if pgaddress < -1<<31 || pgaddress >= 1<<31 {
|
||||
ld.Ctxt.Diag("relocation for %s is too big (>=2G): %d", s.Name, pgaddress)
|
||||
}
|
||||
pgoff := uint32(address & 0xfff)
|
||||
o1 |= uint32((((pgaddress >> 12) & 3) << 29) | (((pgaddress >> 12 >> 2) & 0x7ffff) << 5))
|
||||
|
||||
switch r.Type {
|
||||
case obj.R_ADDRARM64, obj.R_ARM64_LOAD8:
|
||||
o2 |= pgoff << 10
|
||||
|
||||
case obj.R_ARM64_LOAD16:
|
||||
if pgoff&0x1 != 0 {
|
||||
ld.Diag("offset for 16-byte load/store has unaligned value %d", pgoff)
|
||||
}
|
||||
o2 |= pgoff << 9
|
||||
|
||||
case obj.R_ARM64_LOAD32:
|
||||
if pgoff&0x3 != 0 {
|
||||
ld.Diag("offset for 32-byte load/store has unaligned value %d", pgoff)
|
||||
}
|
||||
o2 |= pgoff << 8
|
||||
|
||||
case obj.R_ARM64_LOAD64:
|
||||
if pgoff&0x7 != 0 {
|
||||
ld.Diag("offset for 64-byte load/store has unaligned value %d", pgoff)
|
||||
}
|
||||
o2 |= pgoff << 7
|
||||
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
|
||||
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
|
||||
*val = int64(o1)<<32 | int64(o2)
|
||||
} else {
|
||||
*val = int64(o2)<<32 | int64(o1)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
|
||||
if ld.Linkmode == ld.LinkExternal {
|
||||
switch r.Type {
|
||||
default:
|
||||
return -1
|
||||
|
||||
case obj.R_ADDRARM64:
|
||||
case obj.R_ADDRARM64,
|
||||
obj.R_ARM64_LOAD8,
|
||||
obj.R_ARM64_LOAD16,
|
||||
obj.R_ARM64_LOAD32,
|
||||
obj.R_ARM64_LOAD64:
|
||||
|
||||
r.Done = 0
|
||||
|
||||
// set up addend for eventual relocation via outer symbol.
|
||||
@ -253,32 +336,8 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
|
||||
*val = ld.Symaddr(r.Sym) + r.Add - ld.Symaddr(ld.Linklookup(ld.Ctxt, ".got", 0))
|
||||
return 0
|
||||
|
||||
case obj.R_ADDRARM64:
|
||||
t := ld.Symaddr(r.Sym) + r.Add - ((s.Value + int64(r.Off)) &^ 0xfff)
|
||||
if t >= 1<<32 || t < -1<<32 {
|
||||
ld.Diag("program too large, address relocation distance = %d", t)
|
||||
}
|
||||
|
||||
var o0, o1 uint32
|
||||
|
||||
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
|
||||
o0 = uint32(*val >> 32)
|
||||
o1 = uint32(*val)
|
||||
} else {
|
||||
o0 = uint32(*val)
|
||||
o1 = uint32(*val >> 32)
|
||||
}
|
||||
|
||||
o0 |= (uint32((t>>12)&3) << 29) | (uint32((t>>12>>2)&0x7ffff) << 5)
|
||||
o1 |= uint32(t&0xfff) << 10
|
||||
|
||||
// when laid out, the instruction order must always be o1, o2.
|
||||
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
|
||||
*val = int64(o0)<<32 | int64(o1)
|
||||
} else {
|
||||
*val = int64(o1)<<32 | int64(o0)
|
||||
}
|
||||
return 0
|
||||
case obj.R_ADDRARM64, obj.R_ARM64_LOAD8, obj.R_ARM64_LOAD16, obj.R_ARM64_LOAD32, obj.R_ARM64_LOAD64:
|
||||
return archrelocaddr(r, s, val)
|
||||
|
||||
case obj.R_ARM64_TLS_LE:
|
||||
r.Done = 0
|
||||
|
@ -372,6 +372,10 @@ const (
|
||||
R_AARCH64_CALL26 = 283
|
||||
R_AARCH64_ADR_PREL_PG_HI21 = 275
|
||||
R_AARCH64_ADD_ABS_LO12_NC = 277
|
||||
R_AARCH64_LDST8_ABS_LO12_NC = 278
|
||||
R_AARCH64_LDST16_ABS_LO12_NC = 284
|
||||
R_AARCH64_LDST32_ABS_LO12_NC = 285
|
||||
R_AARCH64_LDST64_ABS_LO12_NC = 286
|
||||
R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 = 541
|
||||
R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 542
|
||||
R_AARCH64_TLSLE_MOVW_TPREL_G0 = 547
|
||||
|
Loading…
Reference in New Issue
Block a user