mirror of
https://github.com/golang/go
synced 2024-11-07 14:46:14 -07:00
cmd/asm/internal,cmd/internal/obj/ppc64: add alignment directive to asm for ppc64x
This adds support for an alignment directive that can be used within Go asm to indicate preferred code alignment for ppc64x. This is intended to be used with loops to improve performance. This change only adds the directive and aligns the code based on it. Follow up changes will modify asm functions for ppc64x that benefit from preferred alignment. Fixes #14935 Here is one example of the improvement in memmove when the directive is used on the loops in the code: Memmove/64 8.74ns ± 0% 8.64ns ± 0% -1.19% (p=0.000 n=8+8) Memmove/128 11.5ns ± 0% 11.0ns ± 0% -4.35% (p=0.000 n=8+8) Memmove/256 23.0ns ± 0% 15.3ns ± 0% -33.48% (p=0.000 n=8+8) Memmove/512 31.7ns ± 0% 31.8ns ± 0% +0.32% (p=0.000 n=8+8) Memmove/1024 52.3ns ± 0% 43.9ns ± 0% -16.10% (p=0.000 n=8+8) Memmove/2048 93.2ns ± 0% 76.2ns ± 0% -18.24% (p=0.000 n=8+8) Memmove/4096 174ns ± 0% 141ns ± 0% -18.97% (p=0.000 n=8+8) Change-Id: I200d77e923dd5d78c22fe3f8eb142a8fbaff57bf Reviewed-on: https://go-review.googlesource.com/c/144218 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
e41fbc79cc
commit
bdba55653f
@ -308,6 +308,28 @@ func (p *Parser) asmPCData(operands [][]lex.Token) {
|
|||||||
p.append(prog, "", true)
|
p.append(prog, "", true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// asmPCAlign assembles a PCALIGN pseudo-op.
|
||||||
|
// PCALIGN $16
|
||||||
|
func (p *Parser) asmPCAlign(operands [][]lex.Token) {
|
||||||
|
if len(operands) != 1 {
|
||||||
|
p.errorf("expect one operand for PCALIGN")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Operand 0 must be an immediate constant.
|
||||||
|
key := p.address(operands[0])
|
||||||
|
if !p.validImmediate("PCALIGN", &key) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
prog := &obj.Prog{
|
||||||
|
Ctxt: p.ctxt,
|
||||||
|
As: obj.APCALIGN,
|
||||||
|
From: key,
|
||||||
|
}
|
||||||
|
p.append(prog, "", true)
|
||||||
|
}
|
||||||
|
|
||||||
// asmFuncData assembles a FUNCDATA pseudo-op.
|
// asmFuncData assembles a FUNCDATA pseudo-op.
|
||||||
// FUNCDATA $1, funcdata<>+4(SB)
|
// FUNCDATA $1, funcdata<>+4(SB)
|
||||||
func (p *Parser) asmFuncData(operands [][]lex.Token) {
|
func (p *Parser) asmFuncData(operands [][]lex.Token) {
|
||||||
|
@ -227,6 +227,8 @@ func (p *Parser) pseudo(word string, operands [][]lex.Token) bool {
|
|||||||
p.asmGlobl(operands)
|
p.asmGlobl(operands)
|
||||||
case "PCDATA":
|
case "PCDATA":
|
||||||
p.asmPCData(operands)
|
p.asmPCData(operands)
|
||||||
|
case "PCALIGN":
|
||||||
|
p.asmPCAlign(operands)
|
||||||
case "TEXT":
|
case "TEXT":
|
||||||
p.asmText(operands)
|
p.asmText(operands)
|
||||||
default:
|
default:
|
||||||
|
@ -344,6 +344,7 @@ const (
|
|||||||
AFUNCDATA
|
AFUNCDATA
|
||||||
AJMP
|
AJMP
|
||||||
ANOP
|
ANOP
|
||||||
|
APCALIGN
|
||||||
APCDATA
|
APCDATA
|
||||||
ARET
|
ARET
|
||||||
AGETCALLERPC
|
AGETCALLERPC
|
||||||
|
@ -602,6 +602,7 @@ var optab = []Optab{
|
|||||||
{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0},
|
{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0},
|
||||||
{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
|
{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
|
||||||
{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
|
{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, 11, 4, 0}, // same as ABR/ABL
|
||||||
|
{obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0}, // align code
|
||||||
|
|
||||||
{obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0},
|
{obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0},
|
||||||
}
|
}
|
||||||
@ -610,6 +611,28 @@ var oprange [ALAST & obj.AMask][]Optab
|
|||||||
|
|
||||||
var xcmp [C_NCLASS][C_NCLASS]bool
|
var xcmp [C_NCLASS][C_NCLASS]bool
|
||||||
|
|
||||||
|
// padding bytes to add to align code as requested
|
||||||
|
func addpad(pc, a int64, ctxt *obj.Link) int {
|
||||||
|
switch a {
|
||||||
|
case 8:
|
||||||
|
if pc%8 != 0 {
|
||||||
|
return 4
|
||||||
|
}
|
||||||
|
case 16:
|
||||||
|
switch pc % 16 {
|
||||||
|
// When currently aligned to 4, avoid 3 NOPs and set to
|
||||||
|
// 8 byte alignment which should still help.
|
||||||
|
case 4, 12:
|
||||||
|
return 4
|
||||||
|
case 8:
|
||||||
|
return 8
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a)
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||||
p := cursym.Func.Text
|
p := cursym.Func.Text
|
||||||
if p == nil || p.Link == nil { // handle external functions and ELF section symbols
|
if p == nil || p.Link == nil { // handle external functions and ELF section symbols
|
||||||
@ -632,12 +655,16 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
o = c.oplook(p)
|
o = c.oplook(p)
|
||||||
m = int(o.size)
|
m = int(o.size)
|
||||||
if m == 0 {
|
if m == 0 {
|
||||||
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
|
if p.As == obj.APCALIGN {
|
||||||
c.ctxt.Diag("zero-width instruction\n%v", p)
|
a := c.vregoff(&p.From)
|
||||||
|
m = addpad(pc, a, ctxt)
|
||||||
|
} else {
|
||||||
|
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
|
||||||
|
ctxt.Diag("zero-width instruction\n%v", p)
|
||||||
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pc += int64(m)
|
pc += int64(m)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -686,10 +713,15 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
|
|
||||||
m = int(o.size)
|
m = int(o.size)
|
||||||
if m == 0 {
|
if m == 0 {
|
||||||
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
|
if p.As == obj.APCALIGN {
|
||||||
c.ctxt.Diag("zero-width instruction\n%v", p)
|
a := c.vregoff(&p.From)
|
||||||
|
m = addpad(pc, a, ctxt)
|
||||||
|
} else {
|
||||||
|
if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
|
||||||
|
ctxt.Diag("zero-width instruction\n%v", p)
|
||||||
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pc += int64(m)
|
pc += int64(m)
|
||||||
@ -698,7 +730,10 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
c.cursym.Size = pc
|
c.cursym.Size = pc
|
||||||
}
|
}
|
||||||
|
|
||||||
pc += -pc & (funcAlign - 1)
|
if pc%funcAlign != 0 {
|
||||||
|
pc += funcAlign - (pc % funcAlign)
|
||||||
|
}
|
||||||
|
|
||||||
c.cursym.Size = pc
|
c.cursym.Size = pc
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -716,10 +751,19 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
if int(o.size) > 4*len(out) {
|
if int(o.size) > 4*len(out) {
|
||||||
log.Fatalf("out array in span9 is too small, need at least %d for %v", o.size/4, p)
|
log.Fatalf("out array in span9 is too small, need at least %d for %v", o.size/4, p)
|
||||||
}
|
}
|
||||||
|
origsize := o.size
|
||||||
c.asmout(p, o, out[:])
|
c.asmout(p, o, out[:])
|
||||||
for i = 0; i < int32(o.size/4); i++ {
|
if origsize == 0 && o.size > 0 {
|
||||||
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
|
for i = 0; i < int32(o.size/4); i++ {
|
||||||
bp = bp[4:]
|
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[0])
|
||||||
|
bp = bp[4:]
|
||||||
|
}
|
||||||
|
o.size = origsize
|
||||||
|
} else {
|
||||||
|
for i = 0; i < int32(o.size/4); i++ {
|
||||||
|
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
|
||||||
|
bp = bp[4:]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1897,6 +1941,7 @@ func buildop(ctxt *obj.Link) {
|
|||||||
obj.ATEXT,
|
obj.ATEXT,
|
||||||
obj.AUNDEF,
|
obj.AUNDEF,
|
||||||
obj.AFUNCDATA,
|
obj.AFUNCDATA,
|
||||||
|
obj.APCALIGN,
|
||||||
obj.APCDATA,
|
obj.APCDATA,
|
||||||
obj.ADUFFZERO,
|
obj.ADUFFZERO,
|
||||||
obj.ADUFFCOPY:
|
obj.ADUFFCOPY:
|
||||||
@ -2305,6 +2350,19 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||||||
prasm(p)
|
prasm(p)
|
||||||
|
|
||||||
case 0: /* pseudo ops */
|
case 0: /* pseudo ops */
|
||||||
|
if p.As == obj.APCALIGN {
|
||||||
|
aln := c.vregoff(&p.From)
|
||||||
|
v := addpad(p.Pc, aln, c.ctxt)
|
||||||
|
if v > 0 {
|
||||||
|
for i := 0; i < 6; i++ {
|
||||||
|
out[i] = uint32(0)
|
||||||
|
}
|
||||||
|
o.size = int8(v)
|
||||||
|
out[0] = LOP_RRR(OP_OR, REGZERO, REGZERO, REGZERO)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
o.size = 0
|
||||||
|
}
|
||||||
break
|
break
|
||||||
|
|
||||||
case 1: /* mov r1,r2 ==> OR Rs,Rs,Ra */
|
case 1: /* mov r1,r2 ==> OR Rs,Rs,Ra */
|
||||||
|
@ -535,6 +535,7 @@ var Anames = []string{
|
|||||||
"FUNCDATA",
|
"FUNCDATA",
|
||||||
"JMP",
|
"JMP",
|
||||||
"NOP",
|
"NOP",
|
||||||
|
"PCALIGN",
|
||||||
"PCDATA",
|
"PCDATA",
|
||||||
"RET",
|
"RET",
|
||||||
"GETCALLERPC",
|
"GETCALLERPC",
|
||||||
|
Loading…
Reference in New Issue
Block a user