mirror of
https://github.com/golang/go
synced 2024-11-16 15:50:22 -07:00
854e892ce1
Optimize combinations of left and right shifts by a constant value into a 'rotate then insert selected bits [into zero]' instruction. Use the same instruction for contiguous masks since it has some benefits over 'and immediate' (not restricted to 32-bits, does not overwrite source register). To keep the complexity of this change under control I've only implemented 64 bit operations for now. There are a lot more optimizations that can be done with this instruction family. However, since their function overlaps with other instructions we need to be somewhat careful not to break existing optimization rules by creating optimization dead ends. This is particularly true of the load/store merging rules which contain lots of zero extensions and shifts. This CL does interfere with the store merging rules when an operand is shifted left before it is stored: binary.BigEndian.PutUint64(b, x << 1) This is unfortunate but it's not critical and somewhat complex so I plan to fix that in a follow up CL. file before after Δ % addr2line 4117446 4117282 -164 -0.004% api 4945184 4942752 -2432 -0.049% asm 4998079 4991891 -6188 -0.124% buildid 2685158 2684074 -1084 -0.040% cgo 4553732 4553394 -338 -0.007% compile 19294446 19245070 -49376 -0.256% cover 4897105 4891319 -5786 -0.118% dist 3544389 3542785 -1604 -0.045% doc 3926795 3927617 +822 +0.021% fix 3302958 3293868 -9090 -0.275% link 6546274 6543456 -2818 -0.043% nm 4102021 4100825 -1196 -0.029% objdump 4542431 4548483 +6052 +0.133% pack 2482465 2416389 -66076 -2.662% pprof 13366541 13363915 -2626 -0.020% test2json 2829007 2761515 -67492 -2.386% trace 10216164 10219684 +3520 +0.034% vet 6773956 6773572 -384 -0.006% total 107124151 106917891 -206260 -0.193% Change-Id: I7591cce41e06867ba10a745daae9333513062746 Reviewed-on: https://go-review.googlesource.com/c/go/+/233317 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Trust: Michael Munday <mike.munday@ibm.com>
742 lines
16 KiB
Go
742 lines
16 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "math/bits"
|
|
|
|
// ----------------------- //
|
|
// bits.LeadingZeros //
|
|
// ----------------------- //
|
|
|
|
func LeadingZeros(n uint) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.LeadingZeros(n)
|
|
}
|
|
|
|
func LeadingZeros64(n uint64) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.LeadingZeros64(n)
|
|
}
|
|
|
|
func LeadingZeros32(n uint32) int {
|
|
// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZW"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.LeadingZeros32(n)
|
|
}
|
|
|
|
func LeadingZeros16(n uint16) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.LeadingZeros16(n)
|
|
}
|
|
|
|
func LeadingZeros8(n uint8) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.LeadingZeros8(n)
|
|
}
|
|
|
|
// --------------- //
|
|
// bits.Len* //
|
|
// --------------- //
|
|
|
|
func Len(n uint) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.Len(n)
|
|
}
|
|
|
|
func Len64(n uint64) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
// ppc64le:"SUBC","CNTLZD"
|
|
// ppc64:"SUBC","CNTLZD"
|
|
return bits.Len64(n)
|
|
}
|
|
|
|
func SubFromLen64(n uint64) int {
|
|
// ppc64le:"CNTLZD",-"SUBC"
|
|
// ppc64:"CNTLZD",-"SUBC"
|
|
return 64 - bits.Len64(n)
|
|
}
|
|
|
|
func Len32(n uint32) int {
|
|
// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.Len32(n)
|
|
}
|
|
|
|
func Len16(n uint16) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.Len16(n)
|
|
}
|
|
|
|
func Len8(n uint8) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
// wasm:"I64Clz"
|
|
return bits.Len8(n)
|
|
}
|
|
|
|
// -------------------- //
|
|
// bits.OnesCount //
|
|
// -------------------- //
|
|
|
|
// amd64:".*x86HasPOPCNT"
|
|
func OnesCount(n uint) int {
|
|
// amd64:"POPCNTQ"
|
|
// arm64:"VCNT","VUADDLV"
|
|
// s390x:"POPCNT"
|
|
// ppc64:"POPCNTD"
|
|
// ppc64le:"POPCNTD"
|
|
// wasm:"I64Popcnt"
|
|
return bits.OnesCount(n)
|
|
}
|
|
|
|
// amd64:".*x86HasPOPCNT"
|
|
func OnesCount64(n uint64) int {
|
|
// amd64:"POPCNTQ"
|
|
// arm64:"VCNT","VUADDLV"
|
|
// s390x:"POPCNT"
|
|
// ppc64:"POPCNTD"
|
|
// ppc64le:"POPCNTD"
|
|
// wasm:"I64Popcnt"
|
|
return bits.OnesCount64(n)
|
|
}
|
|
|
|
// amd64:".*x86HasPOPCNT"
|
|
func OnesCount32(n uint32) int {
|
|
// amd64:"POPCNTL"
|
|
// arm64:"VCNT","VUADDLV"
|
|
// s390x:"POPCNT"
|
|
// ppc64:"POPCNTW"
|
|
// ppc64le:"POPCNTW"
|
|
// wasm:"I64Popcnt"
|
|
return bits.OnesCount32(n)
|
|
}
|
|
|
|
// amd64:".*x86HasPOPCNT"
|
|
func OnesCount16(n uint16) int {
|
|
// amd64:"POPCNTL"
|
|
// arm64:"VCNT","VUADDLV"
|
|
// s390x:"POPCNT"
|
|
// ppc64:"POPCNTW"
|
|
// ppc64le:"POPCNTW"
|
|
// wasm:"I64Popcnt"
|
|
return bits.OnesCount16(n)
|
|
}
|
|
|
|
func OnesCount8(n uint8) int {
|
|
// s390x:"POPCNT"
|
|
// ppc64:"POPCNTB"
|
|
// ppc64le:"POPCNTB"
|
|
// wasm:"I64Popcnt"
|
|
return bits.OnesCount8(n)
|
|
}
|
|
|
|
// ----------------------- //
|
|
// bits.ReverseBytes //
|
|
// ----------------------- //
|
|
|
|
func ReverseBytes(n uint) uint {
|
|
// amd64:"BSWAPQ"
|
|
// s390x:"MOVDBR"
|
|
// arm64:"REV"
|
|
return bits.ReverseBytes(n)
|
|
}
|
|
|
|
func ReverseBytes64(n uint64) uint64 {
|
|
// amd64:"BSWAPQ"
|
|
// s390x:"MOVDBR"
|
|
// arm64:"REV"
|
|
return bits.ReverseBytes64(n)
|
|
}
|
|
|
|
func ReverseBytes32(n uint32) uint32 {
|
|
// amd64:"BSWAPL"
|
|
// s390x:"MOVWBR"
|
|
// arm64:"REVW"
|
|
return bits.ReverseBytes32(n)
|
|
}
|
|
|
|
func ReverseBytes16(n uint16) uint16 {
|
|
// amd64:"ROLW"
|
|
// arm64:"REV16W",-"UBFX",-"ORR"
|
|
// arm/5:"SLL","SRL","ORR"
|
|
// arm/6:"REV16"
|
|
// arm/7:"REV16"
|
|
return bits.ReverseBytes16(n)
|
|
}
|
|
|
|
// --------------------- //
|
|
// bits.RotateLeft //
|
|
// --------------------- //
|
|
|
|
func RotateLeft64(n uint64) uint64 {
|
|
// amd64:"ROLQ"
|
|
// arm64:"ROR"
|
|
// ppc64:"ROTL"
|
|
// ppc64le:"ROTL"
|
|
// s390x:"RISBGZ\t[$]0, [$]63, [$]37, "
|
|
// wasm:"I64Rotl"
|
|
return bits.RotateLeft64(n, 37)
|
|
}
|
|
|
|
func RotateLeft32(n uint32) uint32 {
|
|
// amd64:"ROLL" 386:"ROLL"
|
|
// arm:`MOVW\tR[0-9]+@>23`
|
|
// arm64:"RORW"
|
|
// ppc64:"ROTLW"
|
|
// ppc64le:"ROTLW"
|
|
// s390x:"RLL"
|
|
// wasm:"I32Rotl"
|
|
return bits.RotateLeft32(n, 9)
|
|
}
|
|
|
|
func RotateLeft16(n uint16) uint16 {
|
|
// amd64:"ROLW" 386:"ROLW"
|
|
return bits.RotateLeft16(n, 5)
|
|
}
|
|
|
|
func RotateLeft8(n uint8) uint8 {
|
|
// amd64:"ROLB" 386:"ROLB"
|
|
return bits.RotateLeft8(n, 5)
|
|
}
|
|
|
|
func RotateLeftVariable(n uint, m int) uint {
|
|
// amd64:"ROLQ"
|
|
// arm64:"ROR"
|
|
// ppc64:"ROTL"
|
|
// ppc64le:"ROTL"
|
|
// s390x:"RLLG"
|
|
// wasm:"I64Rotl"
|
|
return bits.RotateLeft(n, m)
|
|
}
|
|
|
|
func RotateLeftVariable64(n uint64, m int) uint64 {
|
|
// amd64:"ROLQ"
|
|
// arm64:"ROR"
|
|
// ppc64:"ROTL"
|
|
// ppc64le:"ROTL"
|
|
// s390x:"RLLG"
|
|
// wasm:"I64Rotl"
|
|
return bits.RotateLeft64(n, m)
|
|
}
|
|
|
|
func RotateLeftVariable32(n uint32, m int) uint32 {
|
|
// arm:`MOVW\tR[0-9]+@>R[0-9]+`
|
|
// amd64:"ROLL"
|
|
// arm64:"RORW"
|
|
// ppc64:"ROTLW"
|
|
// ppc64le:"ROTLW"
|
|
// s390x:"RLL"
|
|
// wasm:"I32Rotl"
|
|
return bits.RotateLeft32(n, m)
|
|
}
|
|
|
|
// ------------------------ //
|
|
// bits.TrailingZeros //
|
|
// ------------------------ //
|
|
|
|
func TrailingZeros(n uint) int {
|
|
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
|
|
// arm:"CLZ"
|
|
// arm64:"RBIT","CLZ"
|
|
// s390x:"FLOGR"
|
|
// ppc64/power8:"ANDN","POPCNTD"
|
|
// ppc64le/power8:"ANDN","POPCNTD"
|
|
// ppc64/power9: "CNTTZD"
|
|
// ppc64le/power9: "CNTTZD"
|
|
// wasm:"I64Ctz"
|
|
return bits.TrailingZeros(n)
|
|
}
|
|
|
|
func TrailingZeros64(n uint64) int {
|
|
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
|
|
// arm64:"RBIT","CLZ"
|
|
// s390x:"FLOGR"
|
|
// ppc64/power8:"ANDN","POPCNTD"
|
|
// ppc64le/power8:"ANDN","POPCNTD"
|
|
// ppc64/power9: "CNTTZD"
|
|
// ppc64le/power9: "CNTTZD"
|
|
// wasm:"I64Ctz"
|
|
return bits.TrailingZeros64(n)
|
|
}
|
|
|
|
func TrailingZeros64Subtract(n uint64) int {
|
|
// ppc64le/power8:"NEG","SUBC","ANDN","POPCNTD"
|
|
// ppc64le/power9:"SUBC","CNTTZD"
|
|
return bits.TrailingZeros64(1 - n)
|
|
}
|
|
|
|
func TrailingZeros32(n uint32) int {
|
|
// amd64:"BTSQ\\t\\$32","BSFQ"
|
|
// arm:"CLZ"
|
|
// arm64:"RBITW","CLZW"
|
|
// s390x:"FLOGR","MOVWZ"
|
|
// ppc64/power8:"ANDN","POPCNTW"
|
|
// ppc64le/power8:"ANDN","POPCNTW"
|
|
// ppc64/power9: "CNTTZW"
|
|
// ppc64le/power9: "CNTTZW"
|
|
// wasm:"I64Ctz"
|
|
return bits.TrailingZeros32(n)
|
|
}
|
|
|
|
func TrailingZeros16(n uint16) int {
|
|
// amd64:"BSFL","BTSL\\t\\$16"
|
|
// 386:"BSFL\t"
|
|
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
|
|
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
|
|
// s390x:"FLOGR","OR\t\\$65536"
|
|
// ppc64/power8:"POPCNTD","OR\\t\\$65536"
|
|
// ppc64le/power8:"POPCNTD","OR\\t\\$65536"
|
|
// ppc64/power9:"CNTTZD","OR\\t\\$65536"
|
|
// ppc64le/power9:"CNTTZD","OR\\t\\$65536"
|
|
// wasm:"I64Ctz"
|
|
return bits.TrailingZeros16(n)
|
|
}
|
|
|
|
func TrailingZeros8(n uint8) int {
|
|
// amd64:"BSFL","BTSL\\t\\$8"
|
|
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
|
|
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
|
|
// s390x:"FLOGR","OR\t\\$256"
|
|
// wasm:"I64Ctz"
|
|
return bits.TrailingZeros8(n)
|
|
}
|
|
|
|
// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
|
|
|
|
func IterateBits(n uint) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFQ",-"CMOVEQ"
|
|
i += bits.TrailingZeros(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits64(n uint64) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFQ",-"CMOVEQ"
|
|
i += bits.TrailingZeros64(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits32(n uint32) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFL",-"BTSQ"
|
|
i += bits.TrailingZeros32(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits16(n uint16) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFL",-"BTSL"
|
|
// arm64:"RBITW","CLZW",-"ORR"
|
|
i += bits.TrailingZeros16(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits8(n uint8) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFL",-"BTSL"
|
|
// arm64:"RBITW","CLZW",-"ORR"
|
|
i += bits.TrailingZeros8(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
// --------------- //
|
|
// bits.Add* //
|
|
// --------------- //
|
|
|
|
func Add(x, y, ci uint) (r, co uint) {
|
|
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
|
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
|
// s390x:"ADDE","ADDC\t[$]-1,"
|
|
return bits.Add(x, y, ci)
|
|
}
|
|
|
|
func AddC(x, ci uint) (r, co uint) {
|
|
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
|
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
|
// s390x:"ADDE","ADDC\t[$]-1,"
|
|
return bits.Add(x, 7, ci)
|
|
}
|
|
|
|
func AddZ(x, y uint) (r, co uint) {
|
|
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
|
|
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
|
|
// s390x:"ADDC",-"ADDC\t[$]-1,"
|
|
return bits.Add(x, y, 0)
|
|
}
|
|
|
|
func AddR(x, y, ci uint) uint {
|
|
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
|
|
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
|
|
// s390x:"ADDE","ADDC\t[$]-1,"
|
|
r, _ := bits.Add(x, y, ci)
|
|
return r
|
|
}
|
|
|
|
func AddM(p, q, r *[3]uint) {
|
|
var c uint
|
|
r[0], c = bits.Add(p[0], q[0], c)
|
|
// arm64:"ADCS",-"ADD\t",-"CMP"
|
|
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
|
|
// s390x:"ADDE",-"ADDC\t[$]-1,"
|
|
r[1], c = bits.Add(p[1], q[1], c)
|
|
r[2], c = bits.Add(p[2], q[2], c)
|
|
}
|
|
|
|
func Add64(x, y, ci uint64) (r, co uint64) {
|
|
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
|
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
|
// s390x:"ADDE","ADDC\t[$]-1,"
|
|
return bits.Add64(x, y, ci)
|
|
}
|
|
|
|
func Add64C(x, ci uint64) (r, co uint64) {
|
|
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
|
|
// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
|
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
|
// s390x:"ADDE","ADDC\t[$]-1,"
|
|
return bits.Add64(x, 7, ci)
|
|
}
|
|
|
|
func Add64Z(x, y uint64) (r, co uint64) {
|
|
// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
|
|
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
|
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
|
// s390x:"ADDC",-"ADDC\t[$]-1,"
|
|
return bits.Add64(x, y, 0)
|
|
}
|
|
|
|
func Add64R(x, y, ci uint64) uint64 {
|
|
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
|
|
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
|
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
|
// s390x:"ADDE","ADDC\t[$]-1,"
|
|
r, _ := bits.Add64(x, y, ci)
|
|
return r
|
|
}
|
|
func Add64M(p, q, r *[3]uint64) {
|
|
var c uint64
|
|
r[0], c = bits.Add64(p[0], q[0], c)
|
|
// arm64:"ADCS",-"ADD\t",-"CMP"
|
|
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
|
|
// ppc64: "ADDC", "ADDE", "ADDZE"
|
|
// ppc64le: "ADDC", "ADDE", "ADDZE"
|
|
// s390x:"ADDE",-"ADDC\t[$]-1,"
|
|
r[1], c = bits.Add64(p[1], q[1], c)
|
|
r[2], c = bits.Add64(p[2], q[2], c)
|
|
}
|
|
|
|
func Add64PanicOnOverflowEQ(a, b uint64) uint64 {
|
|
r, c := bits.Add64(a, b, 0)
|
|
// s390x:"BRC\t[$]3,",-"ADDE"
|
|
if c == 1 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Add64PanicOnOverflowNE(a, b uint64) uint64 {
|
|
r, c := bits.Add64(a, b, 0)
|
|
// s390x:"BRC\t[$]3,",-"ADDE"
|
|
if c != 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Add64PanicOnOverflowGT(a, b uint64) uint64 {
|
|
r, c := bits.Add64(a, b, 0)
|
|
// s390x:"BRC\t[$]3,",-"ADDE"
|
|
if c > 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
|
|
var r [2]uint64
|
|
var c uint64
|
|
r[0], c = bits.Add64(a[0], b[0], c)
|
|
r[1], c = bits.Add64(a[1], b[1], c)
|
|
// s390x:"BRC\t[$]3,"
|
|
if c == 1 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
|
|
var r [2]uint64
|
|
var c uint64
|
|
r[0], c = bits.Add64(a[0], b[0], c)
|
|
r[1], c = bits.Add64(a[1], b[1], c)
|
|
// s390x:"BRC\t[$]3,"
|
|
if c != 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
|
|
var r [2]uint64
|
|
var c uint64
|
|
r[0], c = bits.Add64(a[0], b[0], c)
|
|
r[1], c = bits.Add64(a[1], b[1], c)
|
|
// s390x:"BRC\t[$]3,"
|
|
if c > 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
// --------------- //
|
|
// bits.Sub* //
|
|
// --------------- //
|
|
|
|
func Sub(x, y, ci uint) (r, co uint) {
|
|
// amd64:"NEGL","SBBQ","NEGQ"
|
|
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
return bits.Sub(x, y, ci)
|
|
}
|
|
|
|
func SubC(x, ci uint) (r, co uint) {
|
|
// amd64:"NEGL","SBBQ","NEGQ"
|
|
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
return bits.Sub(x, 7, ci)
|
|
}
|
|
|
|
func SubZ(x, y uint) (r, co uint) {
|
|
// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
|
|
// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
|
|
// s390x:"SUBC"
|
|
return bits.Sub(x, y, 0)
|
|
}
|
|
|
|
func SubR(x, y, ci uint) uint {
|
|
// amd64:"NEGL","SBBQ",-"NEGQ"
|
|
// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
r, _ := bits.Sub(x, y, ci)
|
|
return r
|
|
}
|
|
func SubM(p, q, r *[3]uint) {
|
|
var c uint
|
|
r[0], c = bits.Sub(p[0], q[0], c)
|
|
// amd64:"SBBQ",-"NEGL",-"NEGQ"
|
|
// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
r[1], c = bits.Sub(p[1], q[1], c)
|
|
r[2], c = bits.Sub(p[2], q[2], c)
|
|
}
|
|
|
|
func Sub64(x, y, ci uint64) (r, co uint64) {
|
|
// amd64:"NEGL","SBBQ","NEGQ"
|
|
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
return bits.Sub64(x, y, ci)
|
|
}
|
|
|
|
func Sub64C(x, ci uint64) (r, co uint64) {
|
|
// amd64:"NEGL","SBBQ","NEGQ"
|
|
// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
return bits.Sub64(x, 7, ci)
|
|
}
|
|
|
|
func Sub64Z(x, y uint64) (r, co uint64) {
|
|
// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
|
|
// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
|
|
// s390x:"SUBC"
|
|
return bits.Sub64(x, y, 0)
|
|
}
|
|
|
|
func Sub64R(x, y, ci uint64) uint64 {
|
|
// amd64:"NEGL","SBBQ",-"NEGQ"
|
|
// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
r, _ := bits.Sub64(x, y, ci)
|
|
return r
|
|
}
|
|
func Sub64M(p, q, r *[3]uint64) {
|
|
var c uint64
|
|
r[0], c = bits.Sub64(p[0], q[0], c)
|
|
// amd64:"SBBQ",-"NEGL",-"NEGQ"
|
|
// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
|
|
// s390x:"SUBE"
|
|
r[1], c = bits.Sub64(p[1], q[1], c)
|
|
r[2], c = bits.Sub64(p[2], q[2], c)
|
|
}
|
|
|
|
func Sub64PanicOnOverflowEQ(a, b uint64) uint64 {
|
|
r, b := bits.Sub64(a, b, 0)
|
|
// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
|
|
if b == 1 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Sub64PanicOnOverflowNE(a, b uint64) uint64 {
|
|
r, b := bits.Sub64(a, b, 0)
|
|
// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
|
|
if b != 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Sub64PanicOnOverflowGT(a, b uint64) uint64 {
|
|
r, b := bits.Sub64(a, b, 0)
|
|
// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
|
|
if b > 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
|
|
var r [2]uint64
|
|
var c uint64
|
|
r[0], c = bits.Sub64(a[0], b[0], c)
|
|
r[1], c = bits.Sub64(a[1], b[1], c)
|
|
// s390x:"BRC\t[$]12,"
|
|
if c == 1 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
|
|
var r [2]uint64
|
|
var c uint64
|
|
r[0], c = bits.Sub64(a[0], b[0], c)
|
|
r[1], c = bits.Sub64(a[1], b[1], c)
|
|
// s390x:"BRC\t[$]12,"
|
|
if c != 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
|
|
var r [2]uint64
|
|
var c uint64
|
|
r[0], c = bits.Sub64(a[0], b[0], c)
|
|
r[1], c = bits.Sub64(a[1], b[1], c)
|
|
// s390x:"BRC\t[$]12,"
|
|
if c > 0 {
|
|
panic("overflow")
|
|
}
|
|
return r
|
|
}
|
|
|
|
// --------------- //
|
|
// bits.Mul* //
|
|
// --------------- //
|
|
|
|
func Mul(x, y uint) (hi, lo uint) {
|
|
// amd64:"MULQ"
|
|
// arm64:"UMULH","MUL"
|
|
// ppc64:"MULHDU","MULLD"
|
|
// ppc64le:"MULHDU","MULLD"
|
|
// s390x:"MLGR"
|
|
// mips64: "MULVU"
|
|
return bits.Mul(x, y)
|
|
}
|
|
|
|
func Mul64(x, y uint64) (hi, lo uint64) {
|
|
// amd64:"MULQ"
|
|
// arm64:"UMULH","MUL"
|
|
// ppc64:"MULHDU","MULLD"
|
|
// ppc64le:"MULHDU","MULLD"
|
|
// s390x:"MLGR"
|
|
// mips64: "MULVU"
|
|
return bits.Mul64(x, y)
|
|
}
|
|
|
|
// --------------- //
|
|
// bits.Div* //
|
|
// --------------- //
|
|
|
|
func Div(hi, lo, x uint) (q, r uint) {
|
|
// amd64:"DIVQ"
|
|
return bits.Div(hi, lo, x)
|
|
}
|
|
|
|
func Div32(hi, lo, x uint32) (q, r uint32) {
|
|
// arm64:"ORR","UDIV","MSUB",-"UREM"
|
|
return bits.Div32(hi, lo, x)
|
|
}
|
|
|
|
func Div64(hi, lo, x uint64) (q, r uint64) {
|
|
// amd64:"DIVQ"
|
|
return bits.Div64(hi, lo, x)
|
|
}
|
|
|
|
func Div64degenerate(x uint64) (q, r uint64) {
|
|
// amd64:-"DIVQ"
|
|
return bits.Div64(0, x, 5)
|
|
}
|