1
0
mirror of https://github.com/golang/go synced 2024-11-18 17:54:57 -07:00

cmd/internal/gc, cmd/6g: generate boolean values without jumps

Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.

For example, given

func f(i, j int) bool {
	return i == j
}

Before

"".f t=1 size=32 value=0 args=0x18 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f(SB), $0-24
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	MOVQ	"".i+8(FP), BX
	0x0005 00005 (x.go:4)	MOVQ	"".j+16(FP), BP
	0x000a 00010 (x.go:4)	CMPQ	BX, BP
	0x000d 00013 (x.go:4)	JEQ	21
	0x000f 00015 (x.go:4)	MOVB	$0, "".~r2+24(FP)
	0x0014 00020 (x.go:4)	RET
	0x0015 00021 (x.go:4)	MOVB	$1, "".~r2+24(FP)
	0x001a 00026 (x.go:4)	JMP	20

After

"".f t=1 size=32 value=0 args=0x18 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f(SB), $0-24
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	MOVQ	"".i+8(FP), BX
	0x0005 00005 (x.go:4)	MOVQ	"".j+16(FP), BP
	0x000a 00010 (x.go:4)	CMPQ	BX, BP
	0x000d 00013 (x.go:4)	SETEQ	"".~r2+24(FP)
	0x0012 00018 (x.go:4)	RET

regexp benchmarks, best of 12 runs:

benchmark                                 old ns/op      new ns/op      delta
BenchmarkNotOnePassShortB                 782            733            -6.27%
BenchmarkLiteral                          180            171            -5.00%
BenchmarkNotLiteral                       2855           2721           -4.69%
BenchmarkMatchHard_32                     2672           2557           -4.30%
BenchmarkMatchHard_1K                     80182          76732          -4.30%
BenchmarkMatchEasy1_32M                   76440180       73304748       -4.10%
BenchmarkMatchEasy1_32K                   68798          66350          -3.56%
BenchmarkAnchoredLongMatch                482            465            -3.53%
BenchmarkMatchEasy1_1M                    2373042        2292692        -3.39%
BenchmarkReplaceAll                       2776           2690           -3.10%
BenchmarkNotOnePassShortA                 1397           1360           -2.65%
BenchmarkMatchClass_InRange               3842           3742           -2.60%
BenchmarkMatchEasy0_32                    125            122            -2.40%
BenchmarkMatchEasy0_32K                   11414          11164          -2.19%
BenchmarkMatchEasy0_1K                    668            654            -2.10%
BenchmarkAnchoredShortMatch               260            255            -1.92%
BenchmarkAnchoredLiteralShortNonMatch     164            161            -1.83%
BenchmarkOnePassShortB                    623            612            -1.77%
BenchmarkOnePassShortA                    801            788            -1.62%
BenchmarkMatchClass                       4094           4033           -1.49%
BenchmarkMatchEasy0_32M                   14078800       13890704       -1.34%
BenchmarkMatchHard_32K                    4095844        4045820        -1.22%
BenchmarkMatchEasy1_1K                    1663           1643           -1.20%
BenchmarkMatchHard_1M                     131261708      129708215      -1.18%
BenchmarkMatchHard_32M                    4210112412     4169292003     -0.97%
BenchmarkMatchMedium_32K                  2460752        2438611        -0.90%
BenchmarkMatchEasy0_1M                    422914         419672         -0.77%
BenchmarkMatchMedium_1M                   78581121       78040160       -0.69%
BenchmarkMatchMedium_32M                  2515287278     2498464906     -0.67%
BenchmarkMatchMedium_32                   1754           1746           -0.46%
BenchmarkMatchMedium_1K                   52105          52106          +0.00%
BenchmarkAnchoredLiteralLongNonMatch      185            185            +0.00%
BenchmarkMatchEasy1_32                    107            107            +0.00%
BenchmarkOnePassLongNotPrefix             505            505            +0.00%
BenchmarkOnePassLongPrefix                147            147            +0.00%

The godoc binary is ~0.12% smaller after this CL.

Updates #5729.

toolstash -cmp passes for all architectures other than amd64 and amd64p32.

Other architectures can be done in follow-up CLs.

Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Josh Bleecher Snyder 2015-04-08 09:54:15 -07:00
parent 9c1868d06d
commit 13cb62c764
7 changed files with 286 additions and 55 deletions

View File

@ -10,11 +10,11 @@ import (
"cmd/internal/obj/x86"
)
var thechar int = '6'
var thestring string = "amd64"
var thelinkarch *obj.LinkArch = &x86.Linkamd64
var (
thechar int = '6'
thestring string = "amd64"
thelinkarch *obj.LinkArch = &x86.Linkamd64
)
func linkarchinit() {
if obj.Getgoarch() == "amd64p32" {
@ -27,13 +27,12 @@ func linkarchinit() {
var MAXWIDTH int64 = 1 << 50
var addptr int = x86.AADDQ
var movptr int = x86.AMOVQ
var leaptr int = x86.ALEAQ
var cmpptr int = x86.ACMPQ
var (
addptr int = x86.AADDQ
movptr int = x86.AMOVQ
leaptr int = x86.ALEAQ
cmpptr int = x86.ACMPQ
)
/*
* go declares several platform-specific type aliases:
@ -101,6 +100,7 @@ func main() {
gc.Thearch.Expandchecks = expandchecks
gc.Thearch.Getg = getg
gc.Thearch.Gins = gins
gc.Thearch.Ginsboolval = ginsboolval
gc.Thearch.Ginscon = ginscon
gc.Thearch.Ginsnop = ginsnop
gc.Thearch.Gmove = gmove

View File

@ -99,6 +99,10 @@ func ginscon(as int, c int64, n2 *gc.Node) {
gins(as, &n1, n2)
}
func ginsboolval(a int, n *gc.Node) {
gins(jmptoset(a), nil, n)
}
/*
* set up nodes representing 2^63
*/
@ -698,6 +702,21 @@ func optoas(op int, t *gc.Type) int {
gc.OPS<<16 | gc.TFLOAT64:
a = x86.AJPS
case gc.OPC<<16 | gc.TBOOL,
gc.OPC<<16 | gc.TINT8,
gc.OPC<<16 | gc.TUINT8,
gc.OPC<<16 | gc.TINT16,
gc.OPC<<16 | gc.TUINT16,
gc.OPC<<16 | gc.TINT32,
gc.OPC<<16 | gc.TUINT32,
gc.OPC<<16 | gc.TINT64,
gc.OPC<<16 | gc.TUINT64,
gc.OPC<<16 | gc.TPTR32,
gc.OPC<<16 | gc.TPTR64,
gc.OPC<<16 | gc.TFLOAT32,
gc.OPC<<16 | gc.TFLOAT64:
a = x86.AJPC
case gc.OLT<<16 | gc.TINT8,
gc.OLT<<16 | gc.TINT16,
gc.OLT<<16 | gc.TINT32,
@ -902,7 +921,8 @@ func optoas(op int, t *gc.Type) int {
gc.OMINUS<<16 | gc.TPTR64:
a = x86.ANEGQ
case gc.OAND<<16 | gc.TINT8,
case gc.OAND<<16 | gc.TBOOL,
gc.OAND<<16 | gc.TINT8,
gc.OAND<<16 | gc.TUINT8:
a = x86.AANDB
@ -920,7 +940,8 @@ func optoas(op int, t *gc.Type) int {
gc.OAND<<16 | gc.TPTR64:
a = x86.AANDQ
case gc.OOR<<16 | gc.TINT8,
case gc.OOR<<16 | gc.TBOOL,
gc.OOR<<16 | gc.TINT8,
gc.OOR<<16 | gc.TUINT8:
a = x86.AORB
@ -1134,6 +1155,46 @@ func optoas(op int, t *gc.Type) int {
return a
}
// jmptoset returns ASETxx for AJxx.
func jmptoset(jmp int) int {
switch jmp {
case x86.AJEQ:
return x86.ASETEQ
case x86.AJNE:
return x86.ASETNE
case x86.AJLT:
return x86.ASETLT
case x86.AJCS:
return x86.ASETCS
case x86.AJLE:
return x86.ASETLE
case x86.AJLS:
return x86.ASETLS
case x86.AJGT:
return x86.ASETGT
case x86.AJHI:
return x86.ASETHI
case x86.AJGE:
return x86.ASETGE
case x86.AJCC:
return x86.ASETCC
case x86.AJMI:
return x86.ASETMI
case x86.AJOC:
return x86.ASETOC
case x86.AJOS:
return x86.ASETOS
case x86.AJPC:
return x86.ASETPC
case x86.AJPL:
return x86.ASETPL
case x86.AJPS:
return x86.ASETPS
}
gc.Fatal("jmptoset: no entry for %v", gc.Oconv(jmp, 0))
panic("unreachable")
}
const (
ODynam = 1 << 0
OAddable = 1 << 1

View File

@ -196,6 +196,22 @@ var progtable = [x86.ALAST]obj.ProgInfo{
x86.ASBBL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
x86.ASBBQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
x86.ASBBW: {gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry, 0, 0, 0},
x86.ASETCC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETCS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETEQ: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETGE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETGT: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETHI: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETLE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETLS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETLT: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETMI: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETNE: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETOC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETOS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETPC: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETPL: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASETPS: {gc.SizeB | gc.RightWrite | gc.UseCarry, 0, 0, 0},
x86.ASHLB: {gc.SizeB | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
x86.ASHLL: {gc.SizeL | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},
x86.ASHLQ: {gc.SizeQ | gc.LeftRead | RightRdwr | gc.ShiftCX | gc.SetCarry, 0, 0, 0},

View File

@ -345,20 +345,12 @@ func Cgen(n *Node, res *Node) {
Dump("cgen-res", res)
Fatal("cgen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign))
// these call bgen to get a bool value
case OOROR, OANDAND,
OEQ, ONE,
OLT, OLE,
OGE, OGT,
ONOT:
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Pc
Thearch.Gmove(Nodbool(true), res)
p3 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
Bgen(n, true, 0, p2)
Thearch.Gmove(Nodbool(false), res)
Patch(p3, Pc)
Bvgen(n, res, true)
return
case OPLUS:
@ -1640,10 +1632,55 @@ func Igen(n *Node, a *Node, res *Node) {
// goto to
// }
func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
if Debug['g'] != 0 {
fmt.Printf("\nbgen wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
Dump("bgen", n)
bgenx(n, nil, wantTrue, likely, to)
}
// Bvgen generates code for calculating boolean values:
// res = n == wantTrue
func Bvgen(n, res *Node, wantTrue bool) {
if Thearch.Ginsboolval == nil {
// Direct value generation not implemented for this architecture.
// Implement using jumps.
bvgenjump(n, res, wantTrue, true)
return
}
bgenx(n, res, wantTrue, 0, nil)
}
// bvgenjump implements boolean value generation using jumps:
// if n == wantTrue {
// res = 1
// } else {
// res = 0
// }
// geninit controls whether n's Ninit is generated.
func bvgenjump(n, res *Node, wantTrue, geninit bool) {
init := n.Ninit
if !geninit {
n.Ninit = nil
}
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Pc
Thearch.Gmove(Nodbool(true), res)
p3 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
Bgen(n, wantTrue, 0, p2)
Thearch.Gmove(Nodbool(false), res)
Patch(p3, Pc)
n.Ninit = init
}
// bgenx is the backend for Bgen and Bvgen.
// If res is nil, it generates a branch.
// Otherwise, it generates a boolean value.
func bgenx(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
if Debug['g'] != 0 {
fmt.Printf("\nbgenx wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
Dump("n", n)
Dump("res", res)
}
genval := res != nil
if n == nil {
n = Nodbool(true)
@ -1659,9 +1696,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
}
if n.Type.Etype != TBOOL {
Yyerror("cgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
Patch(Thearch.Gins(obj.AEND, nil, nil), to)
return
Fatal("bgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
}
for n.Op == OCONVNOP {
@ -1670,44 +1705,90 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
}
if Thearch.Bgen_float != nil && n.Left != nil && Isfloat[n.Left.Type.Etype] {
if genval {
bvgenjump(n, res, wantTrue, false)
return
}
Thearch.Bgen_float(n, wantTrue, likely, to)
return
}
switch n.Op {
default:
if genval {
Cgen(n, res)
if !wantTrue {
Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
}
return
}
var tmp Node
Regalloc(&tmp, n.Type, nil)
Cgen(n, &tmp)
bgenNonZero(&tmp, wantTrue, likely, to)
bgenNonZero(&tmp, nil, wantTrue, likely, to)
Regfree(&tmp)
return
case ONAME:
if genval {
// 5g, 7g, and 9g might need a temporary or other help here,
// but they don't support direct generation of a bool value yet.
// We can fix that as we go.
switch Ctxt.Arch.Thechar {
case '5', '7', '9':
Fatal("genval 5g, 7g, 9g ONAMES not fully implemented")
}
Cgen(n, res)
if !wantTrue {
Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
}
return
}
if n.Addable && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' {
// no need for a temporary
bgenNonZero(n, wantTrue, likely, to)
bgenNonZero(n, nil, wantTrue, likely, to)
return
}
var tmp Node
Regalloc(&tmp, n.Type, nil)
Cgen(n, &tmp)
bgenNonZero(&tmp, wantTrue, likely, to)
bgenNonZero(&tmp, nil, wantTrue, likely, to)
Regfree(&tmp)
return
case OLITERAL:
// n is a constant. If n == wantTrue, jump; otherwise do nothing.
// n is a constant.
if !Isconst(n, CTBOOL) {
Fatal("bgen: non-bool const %v\n", Nconv(n, obj.FmtLong))
}
if genval {
Cgen(Nodbool(wantTrue == n.Val.U.Bval), res)
return
}
// If n == wantTrue, jump; otherwise do nothing.
if wantTrue == n.Val.U.Bval {
Patch(Gbranch(obj.AJMP, nil, likely), to)
}
return
case OANDAND, OOROR:
if (n.Op == OANDAND) == wantTrue {
and := (n.Op == OANDAND) == wantTrue
if genval {
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Gbranch(obj.AJMP, nil, 0)
Patch(p2, Pc)
Cgen(Nodbool(!and), res)
p3 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
Bgen(n.Left, wantTrue != and, 0, p2)
Bvgen(n.Right, res, wantTrue)
Patch(p3, Pc)
return
}
if and {
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
@ -1726,7 +1807,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
if n.Left == nil || n.Left.Type == nil {
return
}
Bgen(n.Left, !wantTrue, likely, to)
bgenx(n.Left, res, !wantTrue, likely, to)
return
case OEQ, ONE, OLT, OGT, OLE, OGE:
@ -1743,15 +1824,21 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
if !wantTrue {
if Isfloat[nr.Type.Etype] {
// Brcom is not valid on floats when NaN is involved.
ll := n.Ninit // avoid re-genning Ninit
n.Ninit = nil
if genval {
bgenx(n, res, true, likely, to)
Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) // res = !res
n.Ninit = ll
return
}
p1 := Gbranch(obj.AJMP, nil, 0)
p2 := Gbranch(obj.AJMP, nil, 0)
Patch(p1, Pc)
ll := n.Ninit // avoid re-genning Ninit
n.Ninit = nil
Bgen(n, true, -likely, p2)
n.Ninit = ll
bgenx(n, res, true, -likely, p2)
Patch(Gbranch(obj.AJMP, nil, 0), to)
Patch(p2, Pc)
n.Ninit = ll
return
}
@ -1786,17 +1873,22 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
Regalloc(&tmp, ptr.Type, &ptr)
Cgen(&ptr, &tmp)
Regfree(&ptr)
bgenNonZero(&tmp, a == OEQ != wantTrue, likely, to)
bgenNonZero(&tmp, res, a == OEQ != wantTrue, likely, to)
Regfree(&tmp)
return
}
if Iscomplex[nl.Type.Etype] {
complexbool(a, nl, nr, wantTrue, likely, to)
complexbool(a, nl, nr, res, wantTrue, likely, to)
return
}
if Ctxt.Arch.Regsize == 4 && Is64(nr.Type) {
if genval {
// TODO: Teach Cmp64 to generate boolean values and remove this.
bvgenjump(n, res, wantTrue, false)
return
}
if !nl.Addable || Isconst(nl, CTINT) {
nl = CgenTemp(nl)
}
@ -1838,7 +1930,7 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
if Smallintconst(nr) && Ctxt.Arch.Thechar != '9' {
Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), nl, nr)
Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
bins(nr.Type, res, a, likely, to)
return
}
@ -1869,6 +1961,9 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
if Isfloat[nl.Type.Etype] {
switch Ctxt.Arch.Thechar {
case '5':
if genval {
Fatal("genval 5g Isfloat special cases not implemented")
}
switch n.Op {
case ONE:
Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, likely), to)
@ -1883,19 +1978,40 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
switch n.Op {
case OEQ:
// neither NE nor P
p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
Patch(Gbranch(obj.AJMP, nil, 0), to)
Patch(p1, Pc)
Patch(p2, Pc)
if genval {
var reg Node
Regalloc(&reg, Types[TBOOL], nil)
Thearch.Ginsboolval(Thearch.Optoas(OEQ, nr.Type), &reg)
Thearch.Ginsboolval(Thearch.Optoas(OPC, nr.Type), res)
Thearch.Gins(Thearch.Optoas(OAND, Types[TBOOL]), &reg, res)
Regfree(&reg)
} else {
p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
Patch(Gbranch(obj.AJMP, nil, 0), to)
Patch(p1, Pc)
Patch(p2, Pc)
}
return
case ONE:
// either NE or P
Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
if genval {
var reg Node
Regalloc(&reg, Types[TBOOL], nil)
Thearch.Ginsboolval(Thearch.Optoas(ONE, nr.Type), &reg)
Thearch.Ginsboolval(Thearch.Optoas(OPS, nr.Type), res)
Thearch.Gins(Thearch.Optoas(OOR, Types[TBOOL]), &reg, res)
Regfree(&reg)
} else {
Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
}
return
}
case '7', '9':
if genval {
Fatal("genval 7g, 9g Isfloat special cases not implemented")
}
switch n.Op {
// On arm64 and ppc64, <= and >= mishandle NaN. Must decompose into < or > and =.
// TODO(josh): Convert a <= b to b > a instead?
@ -1912,11 +2028,11 @@ func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
}
}
// Not a special case. Insert an appropriate conditional jump.
Patch(Gbranch(Thearch.Optoas(a, nr.Type), nr.Type, likely), to)
// Not a special case. Insert the conditional jump or value gen.
bins(nr.Type, res, a, likely, to)
}
func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) {
func bgenNonZero(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
// TODO: Optimize on systems that can compare to zero easily.
a := ONE
if !wantTrue {
@ -1925,7 +2041,21 @@ func bgenNonZero(n *Node, wantTrue bool, likely int, to *obj.Prog) {
var zero Node
Nodconst(&zero, n.Type, 0)
Thearch.Gins(Thearch.Optoas(OCMP, n.Type), n, &zero)
Patch(Gbranch(Thearch.Optoas(a, n.Type), n.Type, likely), to)
bins(n.Type, res, a, likely, to)
}
// bins inserts an instruction to handle the result of a compare.
// If res is non-nil, it inserts appropriate value generation instructions.
// If res is nil, it inserts a branch to to.
func bins(typ *Type, res *Node, a, likely int, to *obj.Prog) {
a = Thearch.Optoas(a, typ)
if res != nil {
// value gen
Thearch.Ginsboolval(a, res)
} else {
// jump
Patch(Gbranch(a, typ, likely), to)
}
}
/*

View File

@ -14,7 +14,7 @@ func overlap_cplx(f *Node, t *Node) bool {
return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset
}
func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog) {
func complexbool(op int, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) {
// make both sides addable in ullman order
if nr != nil {
if nl.Ullman > nr.Ullman && !nl.Addable {
@ -35,7 +35,10 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog)
subnode(&rreal, &rimag, nr)
// build tree
// real(l) == real(r) && imag(l) == imag(r)
// if branching:
// real(l) == real(r) && imag(l) == imag(r)
// if generating a value, use a branch-free version:
// real(l) == real(r) & imag(l) == imag(r)
realeq := Node{
Op: OEQ,
Left: &lreal,
@ -55,6 +58,19 @@ func complexbool(op int, nl, nr *Node, wantTrue bool, likely int, to *obj.Prog)
Type: Types[TBOOL],
}
if res != nil {
// generating a value
and.Op = OAND
if op == ONE {
and.Op = OOR
realeq.Op = ONE
imageq.Op = ONE
}
Bvgen(&and, res, true)
return
}
// generating a branch
if op == ONE {
wantTrue = !wantTrue
}

View File

@ -791,6 +791,13 @@ type Arch struct {
Expandchecks func(*obj.Prog)
Getg func(*Node)
Gins func(int, *Node, *Node) *obj.Prog
// Ginsboolval inserts instructions to convert the result
// of a just-completed comparison to a boolean value.
// The first argument is the conditional jump instruction
// corresponding to the desired value.
// The second argument is the destination.
// If not present, Ginsboolval will be emulated with jumps.
Ginsboolval func(int, *Node)
Ginscon func(int, int64, *Node)
Ginsnop func()
Gmove func(*Node, *Node)

View File

@ -306,6 +306,7 @@ const (
ORROTC // right rotate-carry: ARCR.
ORETJMP // return to other function
OPS // compare parity set (for x86 NaN check)
OPC // compare parity clear (for x86 NaN check)
OSQRT // sqrt(float64), on systems that have hw support
OGETG // runtime.getg() (read g pointer)