From 8e601b23cd77f687407a358d2baba672f5a8e4d6 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 18 Aug 2015 14:39:26 -0400
Subject: [PATCH] [dev.ssa] cmd/compile: add FP comparison ops

Basic ops, no particular optimization in the pattern
matching yet (e.g. x!=x for Nan detection, x cmp constant,
etc.)

Change-Id: I0043564081d6dc0eede876c4a9eb3c33cbd1521c
Reviewed-on: https://go-review.googlesource.com/13704
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/gc/ssa.go            |  180 ++-
 .../compile/internal/gc/testdata/fp_ssa.go    | 1039 ++++++++++++++++-
 src/cmd/compile/internal/ssa/fuse.go          |    2 +-
 src/cmd/compile/internal/ssa/gen/AMD64.rules  |   39 +-
 src/cmd/compile/internal/ssa/gen/AMD64Ops.go  |   30 +-
 .../compile/internal/ssa/gen/genericOps.go    |   12 +
 src/cmd/compile/internal/ssa/gen/main.go      |    4 +-
 src/cmd/compile/internal/ssa/opGen.go         |  184 ++-
 src/cmd/compile/internal/ssa/rewriteAMD64.go  |  424 +++++++
 9 files changed, 1800 insertions(+), 114 deletions(-)

diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 8e44ede318..676de23115 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -848,6 +848,8 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OEQ, TCHAN}:      ssa.OpEqPtr,
 	opAndType{OEQ, TUINTPTR}:   ssa.OpEqPtr,
 	opAndType{OEQ, TUNSAFEPTR}: ssa.OpEqPtr,
+	opAndType{OEQ, TFLOAT64}:   ssa.OpEq64F,
+	opAndType{OEQ, TFLOAT32}:   ssa.OpEq32F,
 
 	opAndType{ONE, TBOOL}:      ssa.OpNeq8,
 	opAndType{ONE, TINT8}:      ssa.OpNeq8,
@@ -866,42 +868,52 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{ONE, TCHAN}:      ssa.OpNeqPtr,
 	opAndType{ONE, TUINTPTR}:   ssa.OpNeqPtr,
 	opAndType{ONE, TUNSAFEPTR}: ssa.OpNeqPtr,
+	opAndType{ONE, TFLOAT64}:   ssa.OpNeq64F,
+	opAndType{ONE, TFLOAT32}:   ssa.OpNeq32F,
 
-	opAndType{OLT, TINT8}:   ssa.OpLess8,
-	opAndType{OLT, TUINT8}:  ssa.OpLess8U,
-	opAndType{OLT, TINT16}:  ssa.OpLess16,
-	opAndType{OLT, TUINT16}: ssa.OpLess16U,
-	opAndType{OLT, TINT32}:  ssa.OpLess32,
-	opAndType{OLT, TUINT32}: ssa.OpLess32U,
-	opAndType{OLT, TINT64}:  ssa.OpLess64,
-	opAndType{OLT, TUINT64}: ssa.OpLess64U,
+	opAndType{OLT, TINT8}:    ssa.OpLess8,
+	opAndType{OLT, TUINT8}:   ssa.OpLess8U,
+	opAndType{OLT, TINT16}:   ssa.OpLess16,
+	opAndType{OLT, TUINT16}:  ssa.OpLess16U,
+	opAndType{OLT, TINT32}:   ssa.OpLess32,
+	opAndType{OLT, TUINT32}:  ssa.OpLess32U,
+	opAndType{OLT, TINT64}:   ssa.OpLess64,
+	opAndType{OLT, TUINT64}:  ssa.OpLess64U,
+	opAndType{OLT, TFLOAT64}: ssa.OpLess64F,
+	opAndType{OLT, TFLOAT32}: ssa.OpLess32F,
 
-	opAndType{OGT, TINT8}:   ssa.OpGreater8,
-	opAndType{OGT, TUINT8}:  ssa.OpGreater8U,
-	opAndType{OGT, TINT16}:  ssa.OpGreater16,
-	opAndType{OGT, TUINT16}: ssa.OpGreater16U,
-	opAndType{OGT, TINT32}:  ssa.OpGreater32,
-	opAndType{OGT, TUINT32}: ssa.OpGreater32U,
-	opAndType{OGT, TINT64}:  ssa.OpGreater64,
-	opAndType{OGT, TUINT64}: ssa.OpGreater64U,
+	opAndType{OGT, TINT8}:    ssa.OpGreater8,
+	opAndType{OGT, TUINT8}:   ssa.OpGreater8U,
+	opAndType{OGT, TINT16}:   ssa.OpGreater16,
+	opAndType{OGT, TUINT16}:  ssa.OpGreater16U,
+	opAndType{OGT, TINT32}:   ssa.OpGreater32,
+	opAndType{OGT, TUINT32}:  ssa.OpGreater32U,
+	opAndType{OGT, TINT64}:   ssa.OpGreater64,
+	opAndType{OGT, TUINT64}:  ssa.OpGreater64U,
+	opAndType{OGT, TFLOAT64}: ssa.OpGreater64F,
+	opAndType{OGT, TFLOAT32}: ssa.OpGreater32F,
 
-	opAndType{OLE, TINT8}:   ssa.OpLeq8,
-	opAndType{OLE, TUINT8}:  ssa.OpLeq8U,
-	opAndType{OLE, TINT16}:  ssa.OpLeq16,
-	opAndType{OLE, TUINT16}: ssa.OpLeq16U,
-	opAndType{OLE, TINT32}:  ssa.OpLeq32,
-	opAndType{OLE, TUINT32}: ssa.OpLeq32U,
-	opAndType{OLE, TINT64}:  ssa.OpLeq64,
-	opAndType{OLE, TUINT64}: ssa.OpLeq64U,
+	opAndType{OLE, TINT8}:    ssa.OpLeq8,
+	opAndType{OLE, TUINT8}:   ssa.OpLeq8U,
+	opAndType{OLE, TINT16}:   ssa.OpLeq16,
+	opAndType{OLE, TUINT16}:  ssa.OpLeq16U,
+	opAndType{OLE, TINT32}:   ssa.OpLeq32,
+	opAndType{OLE, TUINT32}:  ssa.OpLeq32U,
+	opAndType{OLE, TINT64}:   ssa.OpLeq64,
+	opAndType{OLE, TUINT64}:  ssa.OpLeq64U,
+	opAndType{OLE, TFLOAT64}: ssa.OpLeq64F,
+	opAndType{OLE, TFLOAT32}: ssa.OpLeq32F,
 
-	opAndType{OGE, TINT8}:   ssa.OpGeq8,
-	opAndType{OGE, TUINT8}:  ssa.OpGeq8U,
-	opAndType{OGE, TINT16}:  ssa.OpGeq16,
-	opAndType{OGE, TUINT16}: ssa.OpGeq16U,
-	opAndType{OGE, TINT32}:  ssa.OpGeq32,
-	opAndType{OGE, TUINT32}: ssa.OpGeq32U,
-	opAndType{OGE, TINT64}:  ssa.OpGeq64,
-	opAndType{OGE, TUINT64}: ssa.OpGeq64U,
+	opAndType{OGE, TINT8}:    ssa.OpGeq8,
+	opAndType{OGE, TUINT8}:   ssa.OpGeq8U,
+	opAndType{OGE, TINT16}:   ssa.OpGeq16,
+	opAndType{OGE, TUINT16}:  ssa.OpGeq16U,
+	opAndType{OGE, TINT32}:   ssa.OpGeq32,
+	opAndType{OGE, TUINT32}:  ssa.OpGeq32U,
+	opAndType{OGE, TINT64}:   ssa.OpGeq64,
+	opAndType{OGE, TUINT64}:  ssa.OpGeq64U,
+	opAndType{OGE, TFLOAT64}: ssa.OpGeq64F,
+	opAndType{OGE, TFLOAT32}: ssa.OpGeq32F,
 
 	opAndType{OLROT, TUINT8}:  ssa.OpLrot8,
 	opAndType{OLROT, TUINT16}: ssa.OpLrot16,
@@ -2198,7 +2210,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
 }
 
 // opregreg emits instructions for
-//     dest := dest op src
+//     dest := dest(To) op src(From)
 // and also returns the created obj.Prog so it
 // may be further adjusted (offset, scale, etc).
 func opregreg(op int, dest, src int16) *obj.Prog {
@@ -2522,11 +2534,11 @@ func genValue(v *ssa.Value) {
 		p.To.Reg = regnum(v)
 	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
 		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB:
-		p := Prog(v.Op.Asm())
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = regnum(v.Args[0])
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = regnum(v.Args[1])
+		opregreg(v.Op.Asm(), regnum(v.Args[1]), regnum(v.Args[0]))
+	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
+		// Go assembler has swapped operands for UCOMISx relative to CMP,
+		// must account for that right here.
+		opregreg(v.Op.Asm(), regnum(v.Args[0]), regnum(v.Args[1]))
 	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst,
 		ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
 		p := Prog(v.Op.Asm())
@@ -2763,11 +2775,34 @@ func genValue(v *ssa.Value) {
 	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
 		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
 		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
+		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
 		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
+		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
 		ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
 		p := Prog(v.Op.Asm())
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = regnum(v)
+
+	case ssa.OpAMD64SETNEF:
+		p := Prog(v.Op.Asm())
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+		q := Prog(x86.ASETPS)
+		q.To.Type = obj.TYPE_REG
+		q.To.Reg = x86.REG_AX
+		// TODO AORQ copied from old code generator, why not AORB?
+		opregreg(x86.AORQ, regnum(v), x86.REG_AX)
+
+	case ssa.OpAMD64SETEQF:
+		p := Prog(v.Op.Asm())
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+		q := Prog(x86.ASETPC)
+		q.To.Type = obj.TYPE_REG
+		q.To.Reg = x86.REG_AX
+		// TODO AANDQ copied from old code generator, why not AANDB?
+		opregreg(x86.AANDQ, regnum(v), x86.REG_AX)
+
 	case ssa.OpAMD64InvertFlags:
 		v.Fatalf("InvertFlags should never make it to codegen %v", v)
 	case ssa.OpAMD64REPSTOSQ:
@@ -2808,7 +2843,9 @@ func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nle
 	return nleft, offset
 }
 
-var blockJump = [...]struct{ asm, invasm int }{
+var blockJump = [...]struct {
+	asm, invasm int
+}{
 	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
 	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
 	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
@@ -2819,6 +2856,63 @@ var blockJump = [...]struct{ asm, invasm int }{
 	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
 	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
 	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
+	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
+	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
+}
+
+type floatingEQNEJump struct {
+	jump, index int
+}
+
+var eqfJumps = [2][2]floatingEQNEJump{
+	{{x86.AJNE, 1}, {x86.AJPS, 1}}, // next == b.Succs[0]
+	{{x86.AJNE, 1}, {x86.AJPC, 0}}, // next == b.Succs[1]
+}
+var nefJumps = [2][2]floatingEQNEJump{
+	{{x86.AJNE, 0}, {x86.AJPC, 1}}, // next == b.Succs[0]
+	{{x86.AJNE, 0}, {x86.AJPS, 0}}, // next == b.Succs[1]
+}
+
+func oneFPJump(b *ssa.Block, jumps *floatingEQNEJump, likely ssa.BranchPrediction, branches []branch) []branch {
+	p := Prog(jumps.jump)
+	p.To.Type = obj.TYPE_BRANCH
+	to := jumps.index
+	branches = append(branches, branch{p, b.Succs[to]})
+	if to == 1 {
+		likely = -likely
+	}
+	// liblink reorders the instruction stream as it sees fit.
+	// Pass along what we know so liblink can make use of it.
+	// TODO: Once we've fully switched to SSA,
+	// make liblink leave our output alone.
+	switch likely {
+	case ssa.BranchUnlikely:
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = 0
+	case ssa.BranchLikely:
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = 1
+	}
+	return branches
+}
+
+func genFPJump(b, next *ssa.Block, jumps *[2][2]floatingEQNEJump, branches []branch) []branch {
+	likely := b.Likely
+	switch next {
+	case b.Succs[0]:
+		branches = oneFPJump(b, &jumps[0][0], likely, branches)
+		branches = oneFPJump(b, &jumps[0][1], likely, branches)
+	case b.Succs[1]:
+		branches = oneFPJump(b, &jumps[1][0], likely, branches)
+		branches = oneFPJump(b, &jumps[1][1], likely, branches)
+	default:
+		branches = oneFPJump(b, &jumps[1][0], likely, branches)
+		branches = oneFPJump(b, &jumps[1][1], likely, branches)
+		q := Prog(obj.AJMP)
+		q.To.Type = obj.TYPE_BRANCH
+		branches = append(branches, branch{q, b.Succs[1]})
+	}
+	return branches
 }
 
 func genBlock(b, next *ssa.Block, branches []branch) []branch {
@@ -2849,12 +2943,18 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch {
 			p.To.Type = obj.TYPE_BRANCH
 			branches = append(branches, branch{p, b.Succs[0]})
 		}
+
+	case ssa.BlockAMD64EQF:
+		branches = genFPJump(b, next, &eqfJumps, branches)
+
+	case ssa.BlockAMD64NEF:
+		branches = genFPJump(b, next, &nefJumps, branches)
+
 	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
 		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
 		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
 		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
 		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
-
 		jmp := blockJump[b.Kind]
 		likely := b.Likely
 		var p *obj.Prog
diff --git a/src/cmd/compile/internal/gc/testdata/fp_ssa.go b/src/cmd/compile/internal/gc/testdata/fp_ssa.go
index 1a52100d6b..95e3cf9196 100644
--- a/src/cmd/compile/internal/gc/testdata/fp_ssa.go
+++ b/src/cmd/compile/internal/gc/testdata/fp_ssa.go
@@ -10,64 +10,6 @@ package main
 
 import "fmt"
 
-func fail64(s string, f func(a, b float64) float64, a, b, e float64) int {
-	d := f(a, b)
-	if d != e {
-		fmt.Printf("For (float64) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
-		return 1
-	}
-	return 0
-}
-
-func fail32(s string, f func(a, b float32) float32, a, b, e float32) int {
-	d := f(a, b)
-	if d != e {
-		fmt.Printf("For (float32) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
-		return 1
-	}
-	return 0
-}
-
-func expect64(s string, x, expected float64) int {
-	if x != expected {
-		println("Expected", expected, "for", s, ", got", x)
-		return 1
-	}
-	return 0
-}
-
-func expect32(s string, x, expected float32) int {
-	if x != expected {
-		println("Expected", expected, "for", s, ", got", x)
-		return 1
-	}
-	return 0
-}
-
-func expectAll64(s string, expected, a, b, c, d, e, f, g, h, i float64) int {
-	fails := 0
-	fails += expect64(s+":a", a, expected)
-	fails += expect64(s+":b", b, expected)
-	fails += expect64(s+":c", c, expected)
-	fails += expect64(s+":d", d, expected)
-	fails += expect64(s+":e", e, expected)
-	fails += expect64(s+":f", f, expected)
-	fails += expect64(s+":g", g, expected)
-	return fails
-}
-
-func expectAll32(s string, expected, a, b, c, d, e, f, g, h, i float32) int {
-	fails := 0
-	fails += expect32(s+":a", a, expected)
-	fails += expect32(s+":b", b, expected)
-	fails += expect32(s+":c", c, expected)
-	fails += expect32(s+":d", d, expected)
-	fails += expect32(s+":e", e, expected)
-	fails += expect32(s+":f", f, expected)
-	fails += expect32(s+":g", g, expected)
-	return fails
-}
-
 // manysub_ssa is designed to tickle bugs that depend on register
 // pressure or unfriendly operand ordering in registers (and at
 // least once it succeeded in this).
@@ -245,6 +187,952 @@ func integer2floatConversions() int {
 	return fails
 }
 
+const (
+	aa = 0x1000000000000000
+	ab = 0x100000000000000
+	ac = 0x10000000000000
+	ad = 0x1000000000000
+	ba = 0x100000000000
+	bb = 0x10000000000
+	bc = 0x1000000000
+	bd = 0x100000000
+	ca = 0x10000000
+	cb = 0x1000000
+	cc = 0x100000
+	cd = 0x10000
+	da = 0x1000
+	db = 0x100
+	dc = 0x10
+	dd = 0x1
+)
+
+func compares64_ssa(a, b, c, d float64) (lt, le, eq, ne, ge, gt uint64) {
+
+	switch {
+	}
+
+	if a < a {
+		lt += aa
+	}
+	if a < b {
+		lt += ab
+	}
+	if a < c {
+		lt += ac
+	}
+	if a < d {
+		lt += ad
+	}
+
+	if b < a {
+		lt += ba
+	}
+	if b < b {
+		lt += bb
+	}
+	if b < c {
+		lt += bc
+	}
+	if b < d {
+		lt += bd
+	}
+
+	if c < a {
+		lt += ca
+	}
+	if c < b {
+		lt += cb
+	}
+	if c < c {
+		lt += cc
+	}
+	if c < d {
+		lt += cd
+	}
+
+	if d < a {
+		lt += da
+	}
+	if d < b {
+		lt += db
+	}
+	if d < c {
+		lt += dc
+	}
+	if d < d {
+		lt += dd
+	}
+
+	if a <= a {
+		le += aa
+	}
+	if a <= b {
+		le += ab
+	}
+	if a <= c {
+		le += ac
+	}
+	if a <= d {
+		le += ad
+	}
+
+	if b <= a {
+		le += ba
+	}
+	if b <= b {
+		le += bb
+	}
+	if b <= c {
+		le += bc
+	}
+	if b <= d {
+		le += bd
+	}
+
+	if c <= a {
+		le += ca
+	}
+	if c <= b {
+		le += cb
+	}
+	if c <= c {
+		le += cc
+	}
+	if c <= d {
+		le += cd
+	}
+
+	if d <= a {
+		le += da
+	}
+	if d <= b {
+		le += db
+	}
+	if d <= c {
+		le += dc
+	}
+	if d <= d {
+		le += dd
+	}
+
+	if a == a {
+		eq += aa
+	}
+	if a == b {
+		eq += ab
+	}
+	if a == c {
+		eq += ac
+	}
+	if a == d {
+		eq += ad
+	}
+
+	if b == a {
+		eq += ba
+	}
+	if b == b {
+		eq += bb
+	}
+	if b == c {
+		eq += bc
+	}
+	if b == d {
+		eq += bd
+	}
+
+	if c == a {
+		eq += ca
+	}
+	if c == b {
+		eq += cb
+	}
+	if c == c {
+		eq += cc
+	}
+	if c == d {
+		eq += cd
+	}
+
+	if d == a {
+		eq += da
+	}
+	if d == b {
+		eq += db
+	}
+	if d == c {
+		eq += dc
+	}
+	if d == d {
+		eq += dd
+	}
+
+	if a != a {
+		ne += aa
+	}
+	if a != b {
+		ne += ab
+	}
+	if a != c {
+		ne += ac
+	}
+	if a != d {
+		ne += ad
+	}
+
+	if b != a {
+		ne += ba
+	}
+	if b != b {
+		ne += bb
+	}
+	if b != c {
+		ne += bc
+	}
+	if b != d {
+		ne += bd
+	}
+
+	if c != a {
+		ne += ca
+	}
+	if c != b {
+		ne += cb
+	}
+	if c != c {
+		ne += cc
+	}
+	if c != d {
+		ne += cd
+	}
+
+	if d != a {
+		ne += da
+	}
+	if d != b {
+		ne += db
+	}
+	if d != c {
+		ne += dc
+	}
+	if d != d {
+		ne += dd
+	}
+
+	if a >= a {
+		ge += aa
+	}
+	if a >= b {
+		ge += ab
+	}
+	if a >= c {
+		ge += ac
+	}
+	if a >= d {
+		ge += ad
+	}
+
+	if b >= a {
+		ge += ba
+	}
+	if b >= b {
+		ge += bb
+	}
+	if b >= c {
+		ge += bc
+	}
+	if b >= d {
+		ge += bd
+	}
+
+	if c >= a {
+		ge += ca
+	}
+	if c >= b {
+		ge += cb
+	}
+	if c >= c {
+		ge += cc
+	}
+	if c >= d {
+		ge += cd
+	}
+
+	if d >= a {
+		ge += da
+	}
+	if d >= b {
+		ge += db
+	}
+	if d >= c {
+		ge += dc
+	}
+	if d >= d {
+		ge += dd
+	}
+
+	if a > a {
+		gt += aa
+	}
+	if a > b {
+		gt += ab
+	}
+	if a > c {
+		gt += ac
+	}
+	if a > d {
+		gt += ad
+	}
+
+	if b > a {
+		gt += ba
+	}
+	if b > b {
+		gt += bb
+	}
+	if b > c {
+		gt += bc
+	}
+	if b > d {
+		gt += bd
+	}
+
+	if c > a {
+		gt += ca
+	}
+	if c > b {
+		gt += cb
+	}
+	if c > c {
+		gt += cc
+	}
+	if c > d {
+		gt += cd
+	}
+
+	if d > a {
+		gt += da
+	}
+	if d > b {
+		gt += db
+	}
+	if d > c {
+		gt += dc
+	}
+	if d > d {
+		gt += dd
+	}
+
+	return
+}
+
+func compares32_ssa(a, b, c, d float32) (lt, le, eq, ne, ge, gt uint64) {
+
+	switch {
+	}
+
+	if a < a {
+		lt += aa
+	}
+	if a < b {
+		lt += ab
+	}
+	if a < c {
+		lt += ac
+	}
+	if a < d {
+		lt += ad
+	}
+
+	if b < a {
+		lt += ba
+	}
+	if b < b {
+		lt += bb
+	}
+	if b < c {
+		lt += bc
+	}
+	if b < d {
+		lt += bd
+	}
+
+	if c < a {
+		lt += ca
+	}
+	if c < b {
+		lt += cb
+	}
+	if c < c {
+		lt += cc
+	}
+	if c < d {
+		lt += cd
+	}
+
+	if d < a {
+		lt += da
+	}
+	if d < b {
+		lt += db
+	}
+	if d < c {
+		lt += dc
+	}
+	if d < d {
+		lt += dd
+	}
+
+	if a <= a {
+		le += aa
+	}
+	if a <= b {
+		le += ab
+	}
+	if a <= c {
+		le += ac
+	}
+	if a <= d {
+		le += ad
+	}
+
+	if b <= a {
+		le += ba
+	}
+	if b <= b {
+		le += bb
+	}
+	if b <= c {
+		le += bc
+	}
+	if b <= d {
+		le += bd
+	}
+
+	if c <= a {
+		le += ca
+	}
+	if c <= b {
+		le += cb
+	}
+	if c <= c {
+		le += cc
+	}
+	if c <= d {
+		le += cd
+	}
+
+	if d <= a {
+		le += da
+	}
+	if d <= b {
+		le += db
+	}
+	if d <= c {
+		le += dc
+	}
+	if d <= d {
+		le += dd
+	}
+
+	if a == a {
+		eq += aa
+	}
+	if a == b {
+		eq += ab
+	}
+	if a == c {
+		eq += ac
+	}
+	if a == d {
+		eq += ad
+	}
+
+	if b == a {
+		eq += ba
+	}
+	if b == b {
+		eq += bb
+	}
+	if b == c {
+		eq += bc
+	}
+	if b == d {
+		eq += bd
+	}
+
+	if c == a {
+		eq += ca
+	}
+	if c == b {
+		eq += cb
+	}
+	if c == c {
+		eq += cc
+	}
+	if c == d {
+		eq += cd
+	}
+
+	if d == a {
+		eq += da
+	}
+	if d == b {
+		eq += db
+	}
+	if d == c {
+		eq += dc
+	}
+	if d == d {
+		eq += dd
+	}
+
+	if a != a {
+		ne += aa
+	}
+	if a != b {
+		ne += ab
+	}
+	if a != c {
+		ne += ac
+	}
+	if a != d {
+		ne += ad
+	}
+
+	if b != a {
+		ne += ba
+	}
+	if b != b {
+		ne += bb
+	}
+	if b != c {
+		ne += bc
+	}
+	if b != d {
+		ne += bd
+	}
+
+	if c != a {
+		ne += ca
+	}
+	if c != b {
+		ne += cb
+	}
+	if c != c {
+		ne += cc
+	}
+	if c != d {
+		ne += cd
+	}
+
+	if d != a {
+		ne += da
+	}
+	if d != b {
+		ne += db
+	}
+	if d != c {
+		ne += dc
+	}
+	if d != d {
+		ne += dd
+	}
+
+	if a >= a {
+		ge += aa
+	}
+	if a >= b {
+		ge += ab
+	}
+	if a >= c {
+		ge += ac
+	}
+	if a >= d {
+		ge += ad
+	}
+
+	if b >= a {
+		ge += ba
+	}
+	if b >= b {
+		ge += bb
+	}
+	if b >= c {
+		ge += bc
+	}
+	if b >= d {
+		ge += bd
+	}
+
+	if c >= a {
+		ge += ca
+	}
+	if c >= b {
+		ge += cb
+	}
+	if c >= c {
+		ge += cc
+	}
+	if c >= d {
+		ge += cd
+	}
+
+	if d >= a {
+		ge += da
+	}
+	if d >= b {
+		ge += db
+	}
+	if d >= c {
+		ge += dc
+	}
+	if d >= d {
+		ge += dd
+	}
+
+	if a > a {
+		gt += aa
+	}
+	if a > b {
+		gt += ab
+	}
+	if a > c {
+		gt += ac
+	}
+	if a > d {
+		gt += ad
+	}
+
+	if b > a {
+		gt += ba
+	}
+	if b > b {
+		gt += bb
+	}
+	if b > c {
+		gt += bc
+	}
+	if b > d {
+		gt += bd
+	}
+
+	if c > a {
+		gt += ca
+	}
+	if c > b {
+		gt += cb
+	}
+	if c > c {
+		gt += cc
+	}
+	if c > d {
+		gt += cd
+	}
+
+	if d > a {
+		gt += da
+	}
+	if d > b {
+		gt += db
+	}
+	if d > c {
+		gt += dc
+	}
+	if d > d {
+		gt += dd
+	}
+
+	return
+}
+
+func le64_ssa(x, y float64) bool {
+	switch {
+	}
+	return x <= y
+}
+func ge64_ssa(x, y float64) bool {
+	switch {
+	}
+	return x >= y
+}
+func lt64_ssa(x, y float64) bool {
+	switch {
+	}
+	return x < y
+}
+func gt64_ssa(x, y float64) bool {
+	switch {
+	}
+	return x > y
+}
+func eq64_ssa(x, y float64) bool {
+	switch {
+	}
+	return x == y
+}
+func ne64_ssa(x, y float64) bool {
+	switch {
+	}
+	return x != y
+}
+
+func eqbr64_ssa(x, y float64) float64 {
+	switch {
+	}
+	if x == y {
+		return 17
+	}
+	return 42
+}
+func nebr64_ssa(x, y float64) float64 {
+	switch {
+	}
+	if x != y {
+		return 17
+	}
+	return 42
+}
+func gebr64_ssa(x, y float64) float64 {
+	switch {
+	}
+	if x >= y {
+		return 17
+	}
+	return 42
+}
+func lebr64_ssa(x, y float64) float64 {
+	switch {
+	}
+	if x <= y {
+		return 17
+	}
+	return 42
+}
+func ltbr64_ssa(x, y float64) float64 {
+	switch {
+	}
+	if x < y {
+		return 17
+	}
+	return 42
+}
+func gtbr64_ssa(x, y float64) float64 {
+	switch {
+	}
+	if x > y {
+		return 17
+	}
+	return 42
+}
+
+func le32_ssa(x, y float32) bool {
+	switch {
+	}
+	return x <= y
+}
+func ge32_ssa(x, y float32) bool {
+	switch {
+	}
+	return x >= y
+}
+func lt32_ssa(x, y float32) bool {
+	switch {
+	}
+	return x < y
+}
+func gt32_ssa(x, y float32) bool {
+	switch {
+	}
+	return x > y
+}
+func eq32_ssa(x, y float32) bool {
+	switch {
+	}
+	return x == y
+}
+func ne32_ssa(x, y float32) bool {
+	switch {
+	}
+	return x != y
+}
+
+func eqbr32_ssa(x, y float32) float32 {
+	switch {
+	}
+	if x == y {
+		return 17
+	}
+	return 42
+}
+func nebr32_ssa(x, y float32) float32 {
+	switch {
+	}
+	if x != y {
+		return 17
+	}
+	return 42
+}
+func gebr32_ssa(x, y float32) float32 {
+	switch {
+	}
+	if x >= y {
+		return 17
+	}
+	return 42
+}
+func lebr32_ssa(x, y float32) float32 {
+	switch {
+	}
+	if x <= y {
+		return 17
+	}
+	return 42
+}
+func ltbr32_ssa(x, y float32) float32 {
+	switch {
+	}
+	if x < y {
+		return 17
+	}
+	return 42
+}
+func gtbr32_ssa(x, y float32) float32 {
+	switch {
+	}
+	if x > y {
+		return 17
+	}
+	return 42
+}
+
+func fail64(s string, f func(a, b float64) float64, a, b, e float64) int {
+	d := f(a, b)
+	if d != e {
+		fmt.Printf("For (float64) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+		return 1
+	}
+	return 0
+}
+
+func fail64bool(s string, f func(a, b float64) bool, a, b float64, e bool) int {
+	d := f(a, b)
+	if d != e {
+		fmt.Printf("For (float64) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+		return 1
+	}
+	return 0
+}
+
+func fail32(s string, f func(a, b float32) float32, a, b, e float32) int {
+	d := f(a, b)
+	if d != e {
+		fmt.Printf("For (float32) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+		return 1
+	}
+	return 0
+}
+
+func fail32bool(s string, f func(a, b float32) bool, a, b float32, e bool) int {
+	d := f(a, b)
+	if d != e {
+		fmt.Printf("For (float32) %v %v %v, expected %v, got %v\n", a, s, b, e, d)
+		return 1
+	}
+	return 0
+}
+
+func expect64(s string, x, expected float64) int {
+	if x != expected {
+		println("Expected", expected, "for", s, ", got", x)
+		return 1
+	}
+	return 0
+}
+
+func expect32(s string, x, expected float32) int {
+	if x != expected {
+		println("Expected", expected, "for", s, ", got", x)
+		return 1
+	}
+	return 0
+}
+
+func expectUint64(s string, x, expected uint64) int {
+	if x != expected {
+		fmt.Printf("Expected 0x%016x for %s, got 0x%016x\n", expected, s, x)
+		return 1
+	}
+	return 0
+}
+
+func expectAll64(s string, expected, a, b, c, d, e, f, g, h, i float64) int {
+	fails := 0
+	fails += expect64(s+":a", a, expected)
+	fails += expect64(s+":b", b, expected)
+	fails += expect64(s+":c", c, expected)
+	fails += expect64(s+":d", d, expected)
+	fails += expect64(s+":e", e, expected)
+	fails += expect64(s+":f", f, expected)
+	fails += expect64(s+":g", g, expected)
+	return fails
+}
+
+func expectAll32(s string, expected, a, b, c, d, e, f, g, h, i float32) int {
+	fails := 0
+	fails += expect32(s+":a", a, expected)
+	fails += expect32(s+":b", b, expected)
+	fails += expect32(s+":c", c, expected)
+	fails += expect32(s+":d", d, expected)
+	fails += expect32(s+":e", e, expected)
+	fails += expect32(s+":f", f, expected)
+	fails += expect32(s+":g", g, expected)
+	return fails
+}
+
+var ev64 [2]float64 = [2]float64{42.0, 17.0}
+var ev32 [2]float32 = [2]float32{42.0, 17.0}
+
+func cmpOpTest(s string,
+	f func(a, b float64) bool,
+	g func(a, b float64) float64,
+	ff func(a, b float32) bool,
+	gg func(a, b float32) float32,
+	zero, one, inf, nan float64, result uint) int {
+	fails := 0
+	fails += fail64bool(s, f, zero, zero, result>>16&1 == 1)
+	fails += fail64bool(s, f, zero, one, result>>12&1 == 1)
+	fails += fail64bool(s, f, zero, inf, result>>8&1 == 1)
+	fails += fail64bool(s, f, zero, nan, result>>4&1 == 1)
+	fails += fail64bool(s, f, nan, nan, result&1 == 1)
+
+	fails += fail64(s, g, zero, zero, ev64[result>>16&1])
+	fails += fail64(s, g, zero, one, ev64[result>>12&1])
+	fails += fail64(s, g, zero, inf, ev64[result>>8&1])
+	fails += fail64(s, g, zero, nan, ev64[result>>4&1])
+	fails += fail64(s, g, nan, nan, ev64[result>>0&1])
+
+	{
+		zero := float32(zero)
+		one := float32(one)
+		inf := float32(inf)
+		nan := float32(nan)
+		fails += fail32bool(s, ff, zero, zero, (result>>16)&1 == 1)
+		fails += fail32bool(s, ff, zero, one, (result>>12)&1 == 1)
+		fails += fail32bool(s, ff, zero, inf, (result>>8)&1 == 1)
+		fails += fail32bool(s, ff, zero, nan, (result>>4)&1 == 1)
+		fails += fail32bool(s, ff, nan, nan, result&1 == 1)
+
+		fails += fail32(s, gg, zero, zero, ev32[(result>>16)&1])
+		fails += fail32(s, gg, zero, one, ev32[(result>>12)&1])
+		fails += fail32(s, gg, zero, inf, ev32[(result>>8)&1])
+		fails += fail32(s, gg, zero, nan, ev32[(result>>4)&1])
+		fails += fail32(s, gg, nan, nan, ev32[(result>>0)&1])
+	}
+
+	return fails
+}
+
 func main() {
 
 	a := 3.0
@@ -273,6 +1161,8 @@ func main() {
 	// but should not underflow in float and in fact is exactly representable.
 	fails += fail64("*", mul64_ssa, dtiny, dtiny, 1.9636373861190906e-90)
 
+	// Intended to create register pressure which forces
+	// asymmetric op into different code paths.
 	aa, ab, ac, ad, ba, bb, bc, bd, ca, cb, cc, cd, da, db, dc, dd := manysub_ssa(1000.0, 100.0, 10.0, 1.0)
 
 	fails += expect64("aa", aa, 11.0)
@@ -297,6 +1187,39 @@ func main() {
 
 	fails += integer2floatConversions()
 
+	var zero64 float64 = 0.0
+	var one64 float64 = 1.0
+	var inf64 float64 = 1.0 / zero64
+	var nan64 float64 = sub64_ssa(inf64, inf64)
+
+	fails += cmpOpTest("!=", ne64_ssa, nebr64_ssa, ne32_ssa, nebr32_ssa, zero64, one64, inf64, nan64, 0x01111)
+	fails += cmpOpTest("==", eq64_ssa, eqbr64_ssa, eq32_ssa, eqbr32_ssa, zero64, one64, inf64, nan64, 0x10000)
+	fails += cmpOpTest("<=", le64_ssa, lebr64_ssa, le32_ssa, lebr32_ssa, zero64, one64, inf64, nan64, 0x11100)
+	fails += cmpOpTest("<", lt64_ssa, ltbr64_ssa, lt32_ssa, ltbr32_ssa, zero64, one64, inf64, nan64, 0x01100)
+	fails += cmpOpTest(">", gt64_ssa, gtbr64_ssa, gt32_ssa, gtbr32_ssa, zero64, one64, inf64, nan64, 0x00000)
+	fails += cmpOpTest(">=", ge64_ssa, gebr64_ssa, ge32_ssa, gebr32_ssa, zero64, one64, inf64, nan64, 0x10000)
+
+	{
+		lt, le, eq, ne, ge, gt := compares64_ssa(0.0, 1.0, inf64, nan64)
+		fails += expectUint64("lt", lt, 0x0110001000000000)
+		fails += expectUint64("le", le, 0x1110011000100000)
+		fails += expectUint64("eq", eq, 0x1000010000100000)
+		fails += expectUint64("ne", ne, 0x0111101111011111)
+		fails += expectUint64("ge", ge, 0x1000110011100000)
+		fails += expectUint64("gt", gt, 0x0000100011000000)
+		// fmt.Printf("lt=0x%016x, le=0x%016x, eq=0x%016x, ne=0x%016x, ge=0x%016x, gt=0x%016x\n",
+		// 	lt, le, eq, ne, ge, gt)
+	}
+	{
+		lt, le, eq, ne, ge, gt := compares32_ssa(0.0, 1.0, float32(inf64), float32(nan64))
+		fails += expectUint64("lt", lt, 0x0110001000000000)
+		fails += expectUint64("le", le, 0x1110011000100000)
+		fails += expectUint64("eq", eq, 0x1000010000100000)
+		fails += expectUint64("ne", ne, 0x0111101111011111)
+		fails += expectUint64("ge", ge, 0x1000110011100000)
+		fails += expectUint64("gt", gt, 0x0000100011000000)
+	}
+
 	if fails > 0 {
 		fmt.Printf("Saw %v failures\n", fails)
 		panic("Failed.")
diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go
index e6bd44d573..e390fc4998 100644
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@@ -35,7 +35,7 @@ func fuse(f *Func) {
 		}
 
 		// trash b, just in case
-		b.Kind = blockInvalid
+		b.Kind = BlockInvalid
 		b.Values = nil
 		b.Preds = nil
 		b.Succs = nil
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 86b443c10d..ff89a7e899 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -198,53 +198,67 @@
 (Less32 x y) -> (SETL (CMPL <TypeFlags> x y))
 (Less16 x y) -> (SETL (CMPW <TypeFlags> x y))
 (Less8  x y) -> (SETL (CMPB <TypeFlags> x y))
-
 (Less64U x y) -> (SETB (CMPQ <TypeFlags> x y))
 (Less32U x y) -> (SETB (CMPL <TypeFlags> x y))
 (Less16U x y) -> (SETB (CMPW <TypeFlags> x y))
 (Less8U  x y) -> (SETB (CMPB <TypeFlags> x y))
+// Use SETGF with reversed operands to dodge NaN case
+(Less64F x y) -> (SETGF (UCOMISD <TypeFlags> y x))
+(Less32F x y) -> (SETGF (UCOMISS <TypeFlags> y x))
 
 (Leq64 x y) -> (SETLE (CMPQ <TypeFlags> x y))
 (Leq32 x y) -> (SETLE (CMPL <TypeFlags> x y))
 (Leq16 x y) -> (SETLE (CMPW <TypeFlags> x y))
 (Leq8  x y) -> (SETLE (CMPB <TypeFlags> x y))
-
 (Leq64U x y) -> (SETBE (CMPQ <TypeFlags> x y))
 (Leq32U x y) -> (SETBE (CMPL <TypeFlags> x y))
 (Leq16U x y) -> (SETBE (CMPW <TypeFlags> x y))
 (Leq8U  x y) -> (SETBE (CMPB <TypeFlags> x y))
+// Use SETGEF with reversed operands to dodge NaN case
+(Leq64F x y) -> (SETGEF (UCOMISD <TypeFlags> y x))
+(Leq32F x y) -> (SETGEF (UCOMISS <TypeFlags> y x))
 
 (Greater64 x y) -> (SETG (CMPQ <TypeFlags> x y))
 (Greater32 x y) -> (SETG (CMPL <TypeFlags> x y))
 (Greater16 x y) -> (SETG (CMPW <TypeFlags> x y))
 (Greater8  x y) -> (SETG (CMPB <TypeFlags> x y))
-
 (Greater64U x y) -> (SETA (CMPQ <TypeFlags> x y))
 (Greater32U x y) -> (SETA (CMPL <TypeFlags> x y))
 (Greater16U x y) -> (SETA (CMPW <TypeFlags> x y))
 (Greater8U  x y) -> (SETA (CMPB <TypeFlags> x y))
+// Note Go assembler gets UCOMISx operand order wrong, but it is right here
+// Bug is accommodated at generation of assembly language.
+(Greater64F x y) -> (SETGF (UCOMISD <TypeFlags> x y))
+(Greater32F x y) -> (SETGF (UCOMISS <TypeFlags> x y))
 
 (Geq64 x y) -> (SETGE (CMPQ <TypeFlags> x y))
 (Geq32 x y) -> (SETGE (CMPL <TypeFlags> x y))
 (Geq16 x y) -> (SETGE (CMPW <TypeFlags> x y))
 (Geq8  x y) -> (SETGE (CMPB <TypeFlags> x y))
-
 (Geq64U x y) -> (SETAE (CMPQ <TypeFlags> x y))
 (Geq32U x y) -> (SETAE (CMPL <TypeFlags> x y))
 (Geq16U x y) -> (SETAE (CMPW <TypeFlags> x y))
 (Geq8U  x y) -> (SETAE (CMPB <TypeFlags> x y))
+// Note Go assembler gets UCOMISx operand order wrong, but it is right here
+// Bug is accommodated at generation of assembly language.
+(Geq64F x y) -> (SETGEF (UCOMISD <TypeFlags> x y))
+(Geq32F x y) -> (SETGEF (UCOMISS <TypeFlags> x y))
 
 (Eq64 x y) -> (SETEQ (CMPQ <TypeFlags> x y))
 (Eq32 x y) -> (SETEQ (CMPL <TypeFlags> x y))
 (Eq16 x y) -> (SETEQ (CMPW <TypeFlags> x y))
 (Eq8 x y) -> (SETEQ (CMPB <TypeFlags> x y))
 (EqPtr x y) -> (SETEQ (CMPQ <TypeFlags> x y))
+(Eq64F x y) -> (SETEQF (UCOMISD <TypeFlags> x y))
+(Eq32F x y) -> (SETEQF (UCOMISS <TypeFlags> x y))
 
 (Neq64 x y) -> (SETNE (CMPQ <TypeFlags> x y))
 (Neq32 x y) -> (SETNE (CMPL <TypeFlags> x y))
 (Neq16 x y) -> (SETNE (CMPW <TypeFlags> x y))
 (Neq8 x y) -> (SETNE (CMPB <TypeFlags> x y))
 (NeqPtr x y) -> (SETNE (CMPQ <TypeFlags> x y))
+(Neq64F x y) -> (SETNEF (UCOMISD <TypeFlags> x y))
+(Neq32F x y) -> (SETNEF (UCOMISS <TypeFlags> x y))
 
 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
 (Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
@@ -304,6 +318,12 @@
 (If (SETA  cmp) yes no) -> (UGT cmp yes no)
 (If (SETAE cmp) yes no) -> (UGE cmp yes no)
 
+// Special case for floating point - LF/LEF not generated
+(If (SETGF  cmp) yes no) -> (UGT  cmp yes no)
+(If (SETGEF cmp) yes no) -> (UGE  cmp yes no)
+(If (SETEQF cmp) yes no) -> (EQF  cmp yes no)
+(If (SETNEF cmp) yes no) -> (EQF  cmp yes no)
+
 (If cond yes no) -> (NE (TESTB <TypeFlags> cond cond) yes no)
 
 (NE (TESTB (SETL  cmp)) yes no) -> (LT  cmp yes no)
@@ -317,6 +337,16 @@
 (NE (TESTB (SETA  cmp)) yes no) -> (UGT cmp yes no)
 (NE (TESTB (SETAE cmp)) yes no) -> (UGE cmp yes no)
 
+// Special case for floating point - LF/LEF not generated
+(NE (TESTB (SETGF  cmp)) yes no) -> (UGT  cmp yes no)
+(NE (TESTB (SETGEF cmp)) yes no) -> (UGE  cmp yes no)
+(NE (TESTB (SETEQF cmp)) yes no) -> (EQF  cmp yes no)
+(NE (TESTB (SETNEF cmp)) yes no) -> (NEF  cmp yes no)
+
+// Disabled because it interferes with the pattern match above and makes worse code.
+// (SETNEF x) -> (ORQ (SETNE <config.Frontend().TypeInt8()> x) (SETNAN <config.Frontend().TypeInt8()> x))
+// (SETEQF x) -> (ANDQ (SETEQ <config.Frontend().TypeInt8()> x) (SETORD <config.Frontend().TypeInt8()> x))
+
 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
 
@@ -519,7 +549,6 @@
 (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) ->
 	(MOVSDstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem)
 
-
 (ADDQconst [0] x) -> x
 
 // lower Zero instructions with word sizes
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 8b8da225d1..e610458c92 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -83,7 +83,6 @@ func init() {
 		flags      = buildReg("FLAGS")
 		callerSave = gp | fp | flags
 	)
-
 	// Common slices of register masks
 	var (
 		gponly    = []regMask{gp}
@@ -110,8 +109,9 @@ func init() {
 
 		gp2flags  = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
 		gp1flags  = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
-		flagsgp   = regInfo{inputs: flagsonly, outputs: gponly, clobbers: flags}
+		flagsgp   = regInfo{inputs: flagsonly, outputs: gponly}
 		readflags = regInfo{inputs: flagsonly, outputs: gponly}
+		flagsgpax = regInfo{inputs: flagsonly, clobbers: ax, outputs: []regMask{gp &^ ax}}
 
 		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
 		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
@@ -124,10 +124,11 @@ func init() {
 		fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
 		fp21x15 = regInfo{inputs: []regMask{fp &^ x15, fp &^ x15},
 			clobbers: x15, outputs: []regMask{fp &^ x15}}
-
-		fpgp = regInfo{inputs: fponly, outputs: gponly}
-		gpfp = regInfo{inputs: gponly, outputs: fponly}
-		fp11 = regInfo{inputs: fponly, outputs: fponly}
+		fpgp     = regInfo{inputs: fponly, outputs: gponly}
+		gpfp     = regInfo{inputs: gponly, outputs: fponly}
+		fp11     = regInfo{inputs: fponly, outputs: fponly}
+		fp2flags = regInfo{inputs: []regMask{fp, fp}, outputs: flagsonly}
+		// fp1flags = regInfo{inputs: fponly, outputs: flagsonly}
 
 		fpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: fponly}
 		fploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: fponly}
@@ -249,6 +250,9 @@ func init() {
 		{name: "CMPWconst", reg: gp1flags, asm: "CMPW"}, // arg0 compare to auxint
 		{name: "CMPBconst", reg: gp1flags, asm: "CMPB"}, // arg0 compare to auxint
 
+		{name: "UCOMISS", reg: fp2flags, asm: "UCOMISS"}, // arg0 compare to arg1, f32
+		{name: "UCOMISD", reg: fp2flags, asm: "UCOMISD"}, // arg0 compare to arg1, f64
+
 		{name: "TESTQ", reg: gp2flags, asm: "TESTQ"},      // (arg0 & arg1) compare to 0
 		{name: "TESTL", reg: gp2flags, asm: "TESTL"},      // (arg0 & arg1) compare to 0
 		{name: "TESTW", reg: gp2flags, asm: "TESTW"},      // (arg0 & arg1) compare to 0
@@ -316,6 +320,16 @@ func init() {
 		{name: "SETBE", reg: readflags, asm: "SETLS"}, // extract unsigned <= condition from arg0
 		{name: "SETA", reg: readflags, asm: "SETHI"},  // extract unsigned > condition from arg0
 		{name: "SETAE", reg: readflags, asm: "SETCC"}, // extract unsigned >= condition from arg0
+		// Need different opcodes for floating point conditions because
+		// any comparison involving a NaN is always FALSE and thus
+		// the patterns for inverting conditions cannot be used.
+		{name: "SETEQF", reg: flagsgpax, asm: "SETEQ"}, // extract == condition from arg0
+		{name: "SETNEF", reg: flagsgpax, asm: "SETNE"}, // extract != condition from arg0
+		{name: "SETORD", reg: flagsgp, asm: "SETPC"},   // extract "ordered" (No Nan present) condition from arg0
+		{name: "SETNAN", reg: flagsgp, asm: "SETPS"},   // extract "unordered" (Nan present) condition from arg0
+
+		{name: "SETGF", reg: flagsgp, asm: "SETHI"},  // extract floating > condition from arg0
+		{name: "SETGEF", reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
 
 		{name: "MOVBQSX", reg: gp11nf, asm: "MOVBQSX"}, // sign extend arg0 from int8 to int64
 		{name: "MOVBQZX", reg: gp11nf, asm: "MOVBQZX"}, // zero extend arg0 from int8 to int64
@@ -395,6 +409,10 @@ func init() {
 		{name: "ULE"},
 		{name: "UGT"},
 		{name: "UGE"},
+		{name: "EQF"},
+		{name: "NEF"},
+		{name: "ORD"}, // FP, ordered comparison (parity zero)
+		{name: "NAN"}, // FP, unordered comparison (parity one)
 	}
 
 	archs = append(archs, arch{"AMD64", AMD64ops, AMD64blocks, regNamesAMD64})
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 4a65a87ea8..a0040d3017 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -161,6 +161,8 @@ var genericOps = []opData{
 	{name: "Eq64"},
 	{name: "EqPtr"},
 	{name: "EqFat"}, // slice/interface; arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Eq32F"},
+	{name: "Eq64F"},
 
 	{name: "Neq8"}, // arg0 != arg1
 	{name: "Neq16"},
@@ -168,6 +170,8 @@ var genericOps = []opData{
 	{name: "Neq64"},
 	{name: "NeqPtr"},
 	{name: "NeqFat"}, // slice/interface; arg0 or arg1 is nil; other cases handled by frontend
+	{name: "Neq32F"},
+	{name: "Neq64F"},
 
 	{name: "Less8"}, // arg0 < arg1
 	{name: "Less8U"},
@@ -177,6 +181,8 @@ var genericOps = []opData{
 	{name: "Less32U"},
 	{name: "Less64"},
 	{name: "Less64U"},
+	{name: "Less32F"},
+	{name: "Less64F"},
 
 	{name: "Leq8"}, // arg0 <= arg1
 	{name: "Leq8U"},
@@ -186,6 +192,8 @@ var genericOps = []opData{
 	{name: "Leq32U"},
 	{name: "Leq64"},
 	{name: "Leq64U"},
+	{name: "Leq32F"},
+	{name: "Leq64F"},
 
 	{name: "Greater8"}, // arg0 > arg1
 	{name: "Greater8U"},
@@ -195,6 +203,8 @@ var genericOps = []opData{
 	{name: "Greater32U"},
 	{name: "Greater64"},
 	{name: "Greater64U"},
+	{name: "Greater32F"},
+	{name: "Greater64F"},
 
 	{name: "Geq8"}, // arg0 <= arg1
 	{name: "Geq8U"},
@@ -204,6 +214,8 @@ var genericOps = []opData{
 	{name: "Geq32U"},
 	{name: "Geq64"},
 	{name: "Geq64U"},
+	{name: "Geq32F"},
+	{name: "Geq64F"},
 
 	// 1-input ops
 	{name: "Not"}, // !arg0
diff --git a/src/cmd/compile/internal/ssa/gen/main.go b/src/cmd/compile/internal/ssa/gen/main.go
index 6620c0a1d0..1383566e3a 100644
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@@ -76,7 +76,7 @@ func genOp() {
 
 	// generate Block* declarations
 	fmt.Fprintln(w, "const (")
-	fmt.Fprintln(w, "blockInvalid BlockKind = iota")
+	fmt.Fprintln(w, "BlockInvalid BlockKind = iota")
 	for _, a := range archs {
 		fmt.Fprintln(w)
 		for _, d := range a.blocks {
@@ -87,7 +87,7 @@ func genOp() {
 
 	// generate block kind string method
 	fmt.Fprintln(w, "var blockString = [...]string{")
-	fmt.Fprintln(w, "blockInvalid:\"BlockInvalid\",")
+	fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",")
 	for _, a := range archs {
 		fmt.Fprintln(w)
 		for _, b := range a.blocks {
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index aa51cbc301..4eccb463da 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -5,7 +5,7 @@ package ssa
 import "cmd/internal/obj/x86"
 
 const (
-	blockInvalid BlockKind = iota
+	BlockInvalid BlockKind = iota
 
 	BlockAMD64EQ
 	BlockAMD64NE
@@ -17,6 +17,10 @@ const (
 	BlockAMD64ULE
 	BlockAMD64UGT
 	BlockAMD64UGE
+	BlockAMD64EQF
+	BlockAMD64NEF
+	BlockAMD64ORD
+	BlockAMD64NAN
 
 	BlockExit
 	BlockDead
@@ -26,7 +30,7 @@ const (
 )
 
 var blockString = [...]string{
-	blockInvalid: "BlockInvalid",
+	BlockInvalid: "BlockInvalid",
 
 	BlockAMD64EQ:  "EQ",
 	BlockAMD64NE:  "NE",
@@ -38,6 +42,10 @@ var blockString = [...]string{
 	BlockAMD64ULE: "ULE",
 	BlockAMD64UGT: "UGT",
 	BlockAMD64UGE: "UGE",
+	BlockAMD64EQF: "EQF",
+	BlockAMD64NEF: "NEF",
+	BlockAMD64ORD: "ORD",
+	BlockAMD64NAN: "NAN",
 
 	BlockExit:  "Exit",
 	BlockDead:  "Dead",
@@ -143,6 +151,8 @@ const (
 	OpAMD64CMPLconst
 	OpAMD64CMPWconst
 	OpAMD64CMPBconst
+	OpAMD64UCOMISS
+	OpAMD64UCOMISD
 	OpAMD64TESTQ
 	OpAMD64TESTL
 	OpAMD64TESTW
@@ -199,6 +209,12 @@ const (
 	OpAMD64SETBE
 	OpAMD64SETA
 	OpAMD64SETAE
+	OpAMD64SETEQF
+	OpAMD64SETNEF
+	OpAMD64SETORD
+	OpAMD64SETNAN
+	OpAMD64SETGF
+	OpAMD64SETGEF
 	OpAMD64MOVBQSX
 	OpAMD64MOVBQZX
 	OpAMD64MOVWQSX
@@ -361,12 +377,16 @@ const (
 	OpEq64
 	OpEqPtr
 	OpEqFat
+	OpEq32F
+	OpEq64F
 	OpNeq8
 	OpNeq16
 	OpNeq32
 	OpNeq64
 	OpNeqPtr
 	OpNeqFat
+	OpNeq32F
+	OpNeq64F
 	OpLess8
 	OpLess8U
 	OpLess16
@@ -375,6 +395,8 @@ const (
 	OpLess32U
 	OpLess64
 	OpLess64U
+	OpLess32F
+	OpLess64F
 	OpLeq8
 	OpLeq8U
 	OpLeq16
@@ -383,6 +405,8 @@ const (
 	OpLeq32U
 	OpLeq64
 	OpLeq64U
+	OpLeq32F
+	OpLeq64F
 	OpGreater8
 	OpGreater8U
 	OpGreater16
@@ -391,6 +415,8 @@ const (
 	OpGreater32U
 	OpGreater64
 	OpGreater64U
+	OpGreater32F
+	OpGreater64F
 	OpGeq8
 	OpGeq8U
 	OpGeq16
@@ -399,6 +425,8 @@ const (
 	OpGeq32U
 	OpGeq64
 	OpGeq64U
+	OpGeq32F
+	OpGeq64F
 	OpNot
 	OpNeg8
 	OpNeg16
@@ -1707,6 +1735,32 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name: "UCOMISS",
+		asm:  x86.AUCOMISS,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+				{1, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+			},
+			outputs: []regMask{
+				8589934592, // .FLAGS
+			},
+		},
+	},
+	{
+		name: "UCOMISD",
+		asm:  x86.AUCOMISD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+				{1, 4294901760}, // .X0 .X1 .X2 .X3 .X4 .X5 .X6 .X7 .X8 .X9 .X10 .X11 .X12 .X13 .X14 .X15
+			},
+			outputs: []regMask{
+				8589934592, // .FLAGS
+			},
+		},
+	},
 	{
 		name: "TESTQ",
 		asm:  x86.ATESTQ,
@@ -2432,6 +2486,84 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name: "SETEQF",
+		asm:  x86.ASETEQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 8589934592}, // .FLAGS
+			},
+			clobbers: 8589934593, // .AX .FLAGS
+			outputs: []regMask{
+				65518, // .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+		},
+	},
+	{
+		name: "SETNEF",
+		asm:  x86.ASETNE,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 8589934592}, // .FLAGS
+			},
+			clobbers: 8589934593, // .AX .FLAGS
+			outputs: []regMask{
+				65518, // .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+		},
+	},
+	{
+		name: "SETORD",
+		asm:  x86.ASETPC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 8589934592}, // .FLAGS
+			},
+			clobbers: 8589934592, // .FLAGS
+			outputs: []regMask{
+				65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+		},
+	},
+	{
+		name: "SETNAN",
+		asm:  x86.ASETPS,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 8589934592}, // .FLAGS
+			},
+			clobbers: 8589934592, // .FLAGS
+			outputs: []regMask{
+				65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+		},
+	},
+	{
+		name: "SETGF",
+		asm:  x86.ASETHI,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 8589934592}, // .FLAGS
+			},
+			clobbers: 8589934592, // .FLAGS
+			outputs: []regMask{
+				65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+		},
+	},
+	{
+		name: "SETGEF",
+		asm:  x86.ASETCC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 8589934592}, // .FLAGS
+			},
+			clobbers: 8589934592, // .FLAGS
+			outputs: []regMask{
+				65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+			},
+		},
+	},
 	{
 		name: "MOVBQSX",
 		asm:  x86.AMOVBQSX,
@@ -3386,6 +3518,14 @@ var opcodeTable = [...]opInfo{
 		name:    "EqFat",
 		generic: true,
 	},
+	{
+		name:    "Eq32F",
+		generic: true,
+	},
+	{
+		name:    "Eq64F",
+		generic: true,
+	},
 	{
 		name:    "Neq8",
 		generic: true,
@@ -3410,6 +3550,14 @@ var opcodeTable = [...]opInfo{
 		name:    "NeqFat",
 		generic: true,
 	},
+	{
+		name:    "Neq32F",
+		generic: true,
+	},
+	{
+		name:    "Neq64F",
+		generic: true,
+	},
 	{
 		name:    "Less8",
 		generic: true,
@@ -3442,6 +3590,14 @@ var opcodeTable = [...]opInfo{
 		name:    "Less64U",
 		generic: true,
 	},
+	{
+		name:    "Less32F",
+		generic: true,
+	},
+	{
+		name:    "Less64F",
+		generic: true,
+	},
 	{
 		name:    "Leq8",
 		generic: true,
@@ -3474,6 +3630,14 @@ var opcodeTable = [...]opInfo{
 		name:    "Leq64U",
 		generic: true,
 	},
+	{
+		name:    "Leq32F",
+		generic: true,
+	},
+	{
+		name:    "Leq64F",
+		generic: true,
+	},
 	{
 		name:    "Greater8",
 		generic: true,
@@ -3506,6 +3670,14 @@ var opcodeTable = [...]opInfo{
 		name:    "Greater64U",
 		generic: true,
 	},
+	{
+		name:    "Greater32F",
+		generic: true,
+	},
+	{
+		name:    "Greater64F",
+		generic: true,
+	},
 	{
 		name:    "Geq8",
 		generic: true,
@@ -3538,6 +3710,14 @@ var opcodeTable = [...]opInfo{
 		name:    "Geq64U",
 		generic: true,
 	},
+	{
+		name:    "Geq32F",
+		generic: true,
+	},
+	{
+		name:    "Geq64F",
+		generic: true,
+	},
 	{
 		name:    "Not",
 		generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index b50fecda2e..dc6dce995b 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -2082,6 +2082,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end4d77d0b016f93817fd6e5f60fa0e7ef2
 	end4d77d0b016f93817fd6e5f60fa0e7ef2:
 		;
+	case OpEq32F:
+		// match: (Eq32F x y)
+		// cond:
+		// result: (SETEQF (UCOMISS <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETEQF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto end034925b03df528b1ffec9fafdcd56c8e
+	end034925b03df528b1ffec9fafdcd56c8e:
+		;
 	case OpEq64:
 		// match: (Eq64 x y)
 		// cond:
@@ -2103,6 +2124,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endae6c62e4e20b4f62694b6ee40dbd9211
 	endae6c62e4e20b4f62694b6ee40dbd9211:
 		;
+	case OpEq64F:
+		// match: (Eq64F x y)
+		// cond:
+		// result: (SETEQF (UCOMISD <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETEQF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto end62b2fb60187571e6ab0c53696ef7d030
+	end62b2fb60187571e6ab0c53696ef7d030:
+		;
 	case OpEq8:
 		// match: (Eq8 x y)
 		// cond:
@@ -2208,6 +2250,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end713c3dfa0f7247dcc232bcfc916fb044
 	end713c3dfa0f7247dcc232bcfc916fb044:
 		;
+	case OpGeq32F:
+		// match: (Geq32F x y)
+		// cond:
+		// result: (SETGEF (UCOMISS <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGEF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto end5847ac7f2e264fba4c408ebb60c1e8a5
+	end5847ac7f2e264fba4c408ebb60c1e8a5:
+		;
 	case OpGeq32U:
 		// match: (Geq32U x y)
 		// cond:
@@ -2250,6 +2313,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end63f44e3fec8d92723b5bde42d6d7eea0
 	end63f44e3fec8d92723b5bde42d6d7eea0:
 		;
+	case OpGeq64F:
+		// match: (Geq64F x y)
+		// cond:
+		// result: (SETGEF (UCOMISD <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGEF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto endb40fbc46a8fc04fef95182771e2933c2
+	endb40fbc46a8fc04fef95182771e2933c2:
+		;
 	case OpGeq64U:
 		// match: (Geq64U x y)
 		// cond:
@@ -2390,6 +2474,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endbf0b2b1368aadff48969a7386eee5795
 	endbf0b2b1368aadff48969a7386eee5795:
 		;
+	case OpGreater32F:
+		// match: (Greater32F x y)
+		// cond:
+		// result: (SETGF (UCOMISS <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto endb65b042358784f18002ae59ea6f2c51a
+	endb65b042358784f18002ae59ea6f2c51a:
+		;
 	case OpGreater32U:
 		// match: (Greater32U x y)
 		// cond:
@@ -2432,6 +2537,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endaef0cfa5e27e23cf5e527061cf251069
 	endaef0cfa5e27e23cf5e527061cf251069:
 		;
+	case OpGreater64F:
+		// match: (Greater64F x y)
+		// cond:
+		// result: (SETGF (UCOMISD <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto end1a6ca23bbb3e885473865e3b3ea501e7
+	end1a6ca23bbb3e885473865e3b3ea501e7:
+		;
 	case OpGreater64U:
 		// match: (Greater64U x y)
 		// cond:
@@ -2728,6 +2854,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endf422ecc8da0033e22242de9c67112537
 	endf422ecc8da0033e22242de9c67112537:
 		;
+	case OpLeq32F:
+		// match: (Leq32F x y)
+		// cond:
+		// result: (SETGEF (UCOMISS <TypeFlags> y x))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGEF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(y)
+			v0.AddArg(x)
+			v.AddArg(v0)
+			return true
+		}
+		goto end98f7b2e6e15ce282d044c812454fe77f
+	end98f7b2e6e15ce282d044c812454fe77f:
+		;
 	case OpLeq32U:
 		// match: (Leq32U x y)
 		// cond:
@@ -2770,6 +2917,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endf03da5e28dccdb4797671f39e824fb10
 	endf03da5e28dccdb4797671f39e824fb10:
 		;
+	case OpLeq64F:
+		// match: (Leq64F x y)
+		// cond:
+		// result: (SETGEF (UCOMISD <TypeFlags> y x))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGEF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(y)
+			v0.AddArg(x)
+			v.AddArg(v0)
+			return true
+		}
+		goto end7efa164f4e4f5a395f547b1885b7eef4
+	end7efa164f4e4f5a395f547b1885b7eef4:
+		;
 	case OpLeq64U:
 		// match: (Leq64U x y)
 		// cond:
@@ -2896,6 +3064,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end8da8d2030c0a323a84503c1240c566ae
 	end8da8d2030c0a323a84503c1240c566ae:
 		;
+	case OpLess32F:
+		// match: (Less32F x y)
+		// cond:
+		// result: (SETGF (UCOMISS <TypeFlags> y x))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(y)
+			v0.AddArg(x)
+			v.AddArg(v0)
+			return true
+		}
+		goto end54f94ce87c18a1ed2beb8d0161bea907
+	end54f94ce87c18a1ed2beb8d0161bea907:
+		;
 	case OpLess32U:
 		// match: (Less32U x y)
 		// cond:
@@ -2938,6 +3127,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto endf8e7a24c25692045bbcfd2c9356d1a8c
 	endf8e7a24c25692045bbcfd2c9356d1a8c:
 		;
+	case OpLess64F:
+		// match: (Less64F x y)
+		// cond:
+		// result: (SETGF (UCOMISD <TypeFlags> y x))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETGF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(y)
+			v0.AddArg(x)
+			v.AddArg(v0)
+			return true
+		}
+		goto end92720155a95cbfae47ea469583c4d3c7
+	end92720155a95cbfae47ea469583c4d3c7:
+		;
 	case OpLess64U:
 		// match: (Less64U x y)
 		// cond:
@@ -5902,6 +6112,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end39c4bf6d063f8a0b6f0064c96ce25173
 	end39c4bf6d063f8a0b6f0064c96ce25173:
 		;
+	case OpNeq32F:
+		// match: (Neq32F x y)
+		// cond:
+		// result: (SETNEF (UCOMISS <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETNEF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISS, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto end4eb0af70b64b789e55d83c15e426b0c5
+	end4eb0af70b64b789e55d83c15e426b0c5:
+		;
 	case OpNeq64:
 		// match: (Neq64 x y)
 		// cond:
@@ -5923,6 +6154,27 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
 		goto end8ab0bcb910c0d3213dd8726fbcc4848e
 	end8ab0bcb910c0d3213dd8726fbcc4848e:
 		;
+	case OpNeq64F:
+		// match: (Neq64F x y)
+		// cond:
+		// result: (SETNEF (UCOMISD <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			v.Op = OpAMD64SETNEF
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64UCOMISD, TypeInvalid)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto end73beb54a015a226bc2e83bdd39e7ee46
+	end73beb54a015a226bc2e83bdd39e7ee46:
+		;
 	case OpNeq8:
 		// match: (Neq8 x y)
 		// cond:
@@ -10358,6 +10610,86 @@ func rewriteBlockAMD64(b *Block) bool {
 		}
 		goto end9bea9963c3c5dfb97249a5feb8287f94
 	end9bea9963c3c5dfb97249a5feb8287f94:
+		;
+		// match: (If (SETGF  cmp) yes no)
+		// cond:
+		// result: (UGT  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64SETGF {
+				goto enda72d68674cfa26b5982a43756bca6767
+			}
+			cmp := v.Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64UGT
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto enda72d68674cfa26b5982a43756bca6767
+	enda72d68674cfa26b5982a43756bca6767:
+		;
+		// match: (If (SETGEF cmp) yes no)
+		// cond:
+		// result: (UGE  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64SETGEF {
+				goto endccc171c1d66dd60ac0275d1f78259315
+			}
+			cmp := v.Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64UGE
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto endccc171c1d66dd60ac0275d1f78259315
+	endccc171c1d66dd60ac0275d1f78259315:
+		;
+		// match: (If (SETEQF cmp) yes no)
+		// cond:
+		// result: (EQF  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64SETEQF {
+				goto end58cb74d05266a79003ebdd733afb66fa
+			}
+			cmp := v.Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64EQF
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto end58cb74d05266a79003ebdd733afb66fa
+	end58cb74d05266a79003ebdd733afb66fa:
+		;
+		// match: (If (SETNEF cmp) yes no)
+		// cond:
+		// result: (EQF  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64SETNEF {
+				goto endfe25939ca97349543bc2d2ce4f97ba41
+			}
+			cmp := v.Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64EQF
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto endfe25939ca97349543bc2d2ce4f97ba41
+	endfe25939ca97349543bc2d2ce4f97ba41:
 		;
 		// match: (If cond yes no)
 		// cond:
@@ -10652,6 +10984,98 @@ func rewriteBlockAMD64(b *Block) bool {
 		}
 		goto endbd122fd599aeb9e60881a0fa735e2fde
 	endbd122fd599aeb9e60881a0fa735e2fde:
+		;
+		// match: (NE (TESTB (SETGF  cmp)) yes no)
+		// cond:
+		// result: (UGT  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64TESTB {
+				goto endb2499521f7f351e24757f8c918c3598e
+			}
+			if v.Args[0].Op != OpAMD64SETGF {
+				goto endb2499521f7f351e24757f8c918c3598e
+			}
+			cmp := v.Args[0].Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64UGT
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto endb2499521f7f351e24757f8c918c3598e
+	endb2499521f7f351e24757f8c918c3598e:
+		;
+		// match: (NE (TESTB (SETGEF cmp)) yes no)
+		// cond:
+		// result: (UGE  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64TESTB {
+				goto end20461774babea665c4ca7c4f790a7209
+			}
+			if v.Args[0].Op != OpAMD64SETGEF {
+				goto end20461774babea665c4ca7c4f790a7209
+			}
+			cmp := v.Args[0].Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64UGE
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto end20461774babea665c4ca7c4f790a7209
+	end20461774babea665c4ca7c4f790a7209:
+		;
+		// match: (NE (TESTB (SETEQF cmp)) yes no)
+		// cond:
+		// result: (EQF  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64TESTB {
+				goto end236616ef13d489b78736cda7bcc1d168
+			}
+			if v.Args[0].Op != OpAMD64SETEQF {
+				goto end236616ef13d489b78736cda7bcc1d168
+			}
+			cmp := v.Args[0].Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64EQF
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto end236616ef13d489b78736cda7bcc1d168
+	end236616ef13d489b78736cda7bcc1d168:
+		;
+		// match: (NE (TESTB (SETNEF cmp)) yes no)
+		// cond:
+		// result: (NEF  cmp yes no)
+		{
+			v := b.Control
+			if v.Op != OpAMD64TESTB {
+				goto endc992f3c266b16cb5f6aa98faa8f55600
+			}
+			if v.Args[0].Op != OpAMD64SETNEF {
+				goto endc992f3c266b16cb5f6aa98faa8f55600
+			}
+			cmp := v.Args[0].Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockAMD64NEF
+			b.Control = cmp
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+		goto endc992f3c266b16cb5f6aa98faa8f55600
+	endc992f3c266b16cb5f6aa98faa8f55600:
 		;
 		// match: (NE (InvertFlags cmp) yes no)
 		// cond: