2018-04-30 06:27:50 -06:00
|
|
|
// asmcheck
|
|
|
|
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package codegen
|
|
|
|
|
2021-09-21 10:46:00 -06:00
|
|
|
// ------------------ //
|
|
|
|
// constant shifts //
|
|
|
|
// ------------------ //
|
|
|
|
|
|
|
|
func lshConst64x64(v int64) int64 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v << uint64(33)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshConst64Ux64(v uint64) uint64 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SRLI",-"AND",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> uint64(33)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshConst64x64(v int64) int64 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SRAI",-"OR",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> uint64(33)
|
|
|
|
}
|
|
|
|
|
|
|
|
func lshConst32x64(v int32) int32 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v << uint64(29)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshConst32Ux64(v uint32) uint32 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SRLI",-"AND",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> uint64(29)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshConst32x64(v int32) int32 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SRAI",-"OR",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> uint64(29)
|
|
|
|
}
|
|
|
|
|
|
|
|
func lshConst64x32(v int64) int64 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v << uint32(33)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshConst64Ux32(v uint64) uint64 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SRLI",-"AND",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> uint32(33)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshConst64x32(v int64) int64 {
|
cmd/compile: optimise immediate operands with constants on riscv64
Instructions with immediates can be precomputed when operating on a
constant - do so for SLTI/SLTIU, SLLI/SRLI/SRAI, NEG/NEGW, ANDI, ORI
and ADDI. Additionally, optimise ANDI and ORI when the immediate is
all ones or all zeroes.
In particular, the RISCV64 logical left and right shift rules
(Lsh*x*/Rsh*Ux*) produce sequences that check if the shift amount
exceeds 64 and if so returns zero. When the shift amount is a
constant we can precompute and eliminate the filter entirely.
Likewise the arithmetic right shift rules produce sequences that
check if the shift amount exceeds 64 and if so, ensures that the
lower six bits of the shift are all ones. When the shift amount
is a constant we can precompute the shift value.
Arithmetic right shift sequences like:
117fc: 00100513 li a0,1
11800: 04053593 sltiu a1,a0,64
11804: fff58593 addi a1,a1,-1
11808: 0015e593 ori a1,a1,1
1180c: 40b45433 sra s0,s0,a1
Are now a single srai instruction:
117fc: 40145413 srai s0,s0,0x1
Likewise for logical left shift (and logical right shift):
1d560: 01100413 li s0,17
1d564: 04043413 sltiu s0,s0,64
1d568: 40800433 neg s0,s0
1d56c: 01131493 slli s1,t1,0x11
1d570: 0084f433 and s0,s1,s0
Which are now a single slli (or srli) instruction:
1d120: 01131413 slli s0,t1,0x11
This removes more than 30,000 instructions from the Go binary and
should improve performance in a variety of areas - of note
runtime.makemap_small drops from 48 to 36 instructions. Similar
gains exist in at least other parts of runtime and math/bits.
Change-Id: I33f6f3d1fd36d9ff1bda706997162bfe4bb859b6
Reviewed-on: https://go-review.googlesource.com/c/go/+/350689
Trust: Joel Sing <joel@sing.id.au>
Reviewed-by: Michael Munday <mike.munday@lowrisc.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-09-17 00:53:11 -06:00
|
|
|
// riscv64:"SRAI",-"OR",-"SLTIU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> uint32(33)
|
|
|
|
}
|
|
|
|
|
2018-04-30 06:27:50 -06:00
|
|
|
// ------------------ //
|
|
|
|
// masked shifts //
|
|
|
|
// ------------------ //
|
|
|
|
|
|
|
|
func lshMask64x64(v int64, s uint64) int64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
|
|
// riscv64:"SLL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v << (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask64Ux64(v uint64, s uint64) uint64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
|
|
// riscv64:"SRL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v >> (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask64x64(v int64, s uint64) int64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-ORN",-"ISEL"
|
|
|
|
// riscv64:"SRA","OR","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v >> (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func lshMask32x64(v int32, s uint64) int32 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ISEL",-"ORN"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ISEL",-"ORN"
|
|
|
|
// riscv64:"SLL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v << (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask32Ux64(v uint32, s uint64) uint32 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ISEL",-"ORN"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ISEL",-"ORN"
|
|
|
|
// riscv64:"SRL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v >> (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask32x64(v int32, s uint64) int32 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ISEL",-"ORN"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ISEL",-"ORN"
|
|
|
|
// riscv64:"SRA","OR","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v >> (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func lshMask64x32(v int64, s uint32) int64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN"
|
|
|
|
// riscv64:"SLL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v << (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask64Ux32(v uint64, s uint32) uint64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN"
|
|
|
|
// riscv64:"SRL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v >> (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask64x32(v int64, s uint32) int64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
|
|
// riscv64:"SRA","OR","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2019-03-13 14:52:17 -06:00
|
|
|
return v >> (s & 63)
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
func lshMask64x32Ext(v int64, s int32) int64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
|
|
// riscv64:"SLL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2018-04-30 06:27:50 -06:00
|
|
|
return v << uint(s&63)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
|
|
// riscv64:"SRL","AND","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2018-04-30 06:27:50 -06:00
|
|
|
return v >> uint(s&63)
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshMask64x32Ext(v int64, s int32) int64 {
|
2020-04-01 08:30:05 -06:00
|
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
2021-09-21 10:46:00 -06:00
|
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
|
|
// riscv64:"SRA","OR","SLTIU"
|
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
2018-04-30 06:27:50 -06:00
|
|
|
return v >> uint(s&63)
|
|
|
|
}
|
|
|
|
|
2019-03-13 14:53:38 -06:00
|
|
|
// --------------- //
|
|
|
|
// signed shifts //
|
|
|
|
// --------------- //
|
|
|
|
|
|
|
|
// We do want to generate a test + panicshift for these cases.
|
|
|
|
func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
|
|
// amd64:"TESTB"
|
|
|
|
_ = x << v8
|
|
|
|
// amd64:"TESTW"
|
|
|
|
_ = x << v16
|
|
|
|
// amd64:"TESTL"
|
|
|
|
_ = x << v32
|
|
|
|
// amd64:"TESTQ"
|
|
|
|
_ = x << v64
|
|
|
|
}
|
|
|
|
|
|
|
|
// We want to avoid generating a test + panicshift for these cases.
|
|
|
|
func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
|
|
// amd64:-"TESTB"
|
|
|
|
_ = x << (v8 & 7)
|
|
|
|
// amd64:-"TESTW"
|
|
|
|
_ = x << (v16 & 15)
|
|
|
|
// amd64:-"TESTL"
|
|
|
|
_ = x << (v32 & 31)
|
|
|
|
// amd64:-"TESTQ"
|
|
|
|
_ = x << (v64 & 63)
|
|
|
|
}
|
|
|
|
|
2018-04-30 06:27:50 -06:00
|
|
|
// ------------------ //
|
|
|
|
// bounded shifts //
|
|
|
|
// ------------------ //
|
|
|
|
|
2021-09-21 10:46:00 -06:00
|
|
|
func lshGuarded64(v int64, s uint) int64 {
|
2018-04-30 06:27:50 -06:00
|
|
|
if s < 64 {
|
2021-09-21 10:46:00 -06:00
|
|
|
// riscv64:"SLL","AND","SLTIU"
|
2020-05-11 10:44:48 -06:00
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
|
|
// wasm:-"Select",-".*LtU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v << s
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
panic("shift too large")
|
|
|
|
}
|
|
|
|
|
|
|
|
func rshGuarded64U(v uint64, s uint) uint64 {
|
|
|
|
if s < 64 {
|
2021-09-21 10:46:00 -06:00
|
|
|
// riscv64:"SRL","AND","SLTIU"
|
2020-05-11 10:44:48 -06:00
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
|
|
// wasm:-"Select",-".*LtU"
|
2018-04-30 06:27:50 -06:00
|
|
|
return v >> s
|
|
|
|
}
|
|
|
|
panic("shift too large")
|
|
|
|
}
|
|
|
|
|
2021-09-21 10:46:00 -06:00
|
|
|
func rshGuarded64(v int64, s uint) int64 {
|
2018-04-30 06:27:50 -06:00
|
|
|
if s < 64 {
|
2021-09-21 10:46:00 -06:00
|
|
|
// riscv64:"SRA","OR","SLTIU"
|
2020-05-11 10:44:48 -06:00
|
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
|
|
// wasm:-"Select",-".*LtU"
|
2021-09-21 10:46:00 -06:00
|
|
|
return v >> s
|
2018-04-30 06:27:50 -06:00
|
|
|
}
|
|
|
|
panic("shift too large")
|
|
|
|
}
|
2020-03-26 14:01:40 -06:00
|
|
|
|
2020-08-31 07:43:40 -06:00
|
|
|
func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
|
|
|
|
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
|
|
f := tab[byte(v)^b]
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-10-23 11:12:34 -06:00
|
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-08-31 07:43:40 -06:00
|
|
|
f += tab[byte(v)&b]
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-10-23 11:12:34 -06:00
|
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-08-31 07:43:40 -06:00
|
|
|
f += tab[byte(v)|b]
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-10-23 11:12:34 -06:00
|
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-08-31 07:43:40 -06:00
|
|
|
f += tab[uint16(v)&h]
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-10-23 11:12:34 -06:00
|
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-08-31 07:43:40 -06:00
|
|
|
f += tab[uint16(v)^h]
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-10-23 11:12:34 -06:00
|
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
2020-08-31 07:43:40 -06:00
|
|
|
f += tab[uint16(v)|h]
|
|
|
|
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
|
|
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
|
|
f += tab[v&0xff]
|
|
|
|
// ppc64le:-".*AND",".*CLRLSLWI"
|
2020-10-23 11:12:34 -06:00
|
|
|
// ppc64:-".*AND",".*CLRLSLWI"
|
|
|
|
f += 2 * uint32(uint16(d))
|
2020-08-31 07:43:40 -06:00
|
|
|
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
|
|
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
|
2020-10-23 11:12:34 -06:00
|
|
|
g := 2 * uint64(uint32(d))
|
2020-08-31 07:43:40 -06:00
|
|
|
return f, g
|
|
|
|
}
|
|
|
|
|
2020-09-23 09:06:39 -06:00
|
|
|
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
|
2020-08-31 07:43:40 -06:00
|
|
|
|
|
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
|
|
// ppc64:-"AND","CLRLSLWI"
|
2020-10-23 11:12:34 -06:00
|
|
|
f := (v8 & 0xF) << 2
|
2020-09-28 16:20:12 -06:00
|
|
|
// ppc64le:"CLRLSLWI"
|
|
|
|
// ppc64:"CLRLSLWI"
|
2020-10-23 11:12:34 -06:00
|
|
|
f += byte(v16) << 3
|
2020-08-31 07:43:40 -06:00
|
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
|
|
g := (v16 & 0xFF) << 3
|
|
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
|
|
h := (v32 & 0xFFFFF) << 2
|
2020-09-28 16:20:12 -06:00
|
|
|
// ppc64le:"CLRLSLDI"
|
|
|
|
// ppc64:"CLRLSLDI"
|
2020-08-31 07:43:40 -06:00
|
|
|
i := (v64 & 0xFFFFFFFF) << 5
|
2020-09-28 16:20:12 -06:00
|
|
|
// ppc64le:-"CLRLSLDI"
|
|
|
|
// ppc64:-"CLRLSLDI"
|
|
|
|
i += (v64 & 0xFFFFFFF) << 38
|
|
|
|
// ppc64le/power9:-"CLRLSLDI"
|
|
|
|
// ppc64/power9:-"CLRLSLDI"
|
|
|
|
i += (v64 & 0xFFFF00) << 10
|
2020-09-23 09:06:39 -06:00
|
|
|
// ppc64le/power9:-"SLD","EXTSWSLI"
|
|
|
|
// ppc64/power9:-"SLD","EXTSWSLI"
|
2020-10-23 11:12:34 -06:00
|
|
|
j := int64(x32+32) * 8
|
2020-09-23 09:06:39 -06:00
|
|
|
return f, g, h, i, j
|
2020-08-31 07:43:40 -06:00
|
|
|
}
|
|
|
|
|
2020-03-26 14:01:40 -06:00
|
|
|
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
|
|
|
|
|
|
|
|
// ppc64le:-".*MOVW"
|
2020-10-23 11:12:34 -06:00
|
|
|
f := int32(v >> 32)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:".*MOVW"
|
2020-10-23 11:12:34 -06:00
|
|
|
f += int32(v >> 31)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:-".*MOVH"
|
2020-10-23 11:12:34 -06:00
|
|
|
g := int16(v >> 48)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:".*MOVH"
|
2020-10-23 11:12:34 -06:00
|
|
|
g += int16(v >> 30)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:-".*MOVH"
|
2020-10-23 11:12:34 -06:00
|
|
|
g += int16(f >> 16)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:-".*MOVB"
|
2020-10-23 11:12:34 -06:00
|
|
|
h := int8(v >> 56)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:".*MOVB"
|
2020-10-23 11:12:34 -06:00
|
|
|
h += int8(v >> 28)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:-".*MOVB"
|
2020-10-23 11:12:34 -06:00
|
|
|
h += int8(f >> 24)
|
2020-03-26 14:01:40 -06:00
|
|
|
// ppc64le:".*MOVB"
|
2020-10-23 11:12:34 -06:00
|
|
|
h += int8(f >> 16)
|
|
|
|
return int64(h), uint64(g)
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkShiftAndMask32(v []uint32) {
|
|
|
|
i := 0
|
|
|
|
|
2021-01-05 15:44:43 -07:00
|
|
|
// ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
|
|
|
// ppc64: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
v[i] = (v[i] & 0xFF00000) >> 8
|
|
|
|
i++
|
2021-01-05 15:44:43 -07:00
|
|
|
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
|
|
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
v[i] = (v[i] & 0xFF00) >> 6
|
|
|
|
i++
|
|
|
|
// ppc64le: "MOVW\tR0"
|
|
|
|
// ppc64: "MOVW\tR0"
|
|
|
|
v[i] = (v[i] & 0xFF) >> 8
|
|
|
|
i++
|
|
|
|
// ppc64le: "MOVW\tR0"
|
|
|
|
// ppc64: "MOVW\tR0"
|
|
|
|
v[i] = (v[i] & 0xF000000) >> 28
|
|
|
|
i++
|
2021-01-05 15:44:43 -07:00
|
|
|
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
|
|
|
|
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
v[i] = (v[i] >> 6) & 0xFF
|
|
|
|
i++
|
2021-01-05 15:44:43 -07:00
|
|
|
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
|
|
|
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
v[i] = (v[i] >> 6) & 0xFF000
|
|
|
|
i++
|
|
|
|
// ppc64le: "MOVW\tR0"
|
|
|
|
// ppc64: "MOVW\tR0"
|
|
|
|
v[i] = (v[i] >> 20) & 0xFF000
|
|
|
|
i++
|
|
|
|
// ppc64le: "MOVW\tR0"
|
|
|
|
// ppc64: "MOVW\tR0"
|
|
|
|
v[i] = (v[i] >> 24) & 0xFF00
|
|
|
|
i++
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
|
2021-01-05 15:44:43 -07:00
|
|
|
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
|
|
//ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
a[0] = a[uint8(v>>24)]
|
2021-01-05 15:44:43 -07:00
|
|
|
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
|
|
//ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
b[0] = b[uint8(v>>24)]
|
2021-01-05 15:44:43 -07:00
|
|
|
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
|
|
//ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
b[1] = b[(v>>20)&0xFF]
|
2021-01-05 15:44:43 -07:00
|
|
|
//ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
|
|
|
|
//ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
|
2020-10-23 11:12:34 -06:00
|
|
|
b[2] = b[v>>25]
|
2020-03-26 14:01:40 -06:00
|
|
|
}
|
2021-01-07 20:25:05 -07:00
|
|
|
|
|
|
|
// 128 bit shifts
|
|
|
|
|
|
|
|
func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {
|
|
|
|
s := bits & 63
|
|
|
|
ŝ := (64 - bits) & 63
|
|
|
|
// check that the shift operation has two commas (three operands)
|
|
|
|
// amd64:"SHRQ.*,.*,"
|
|
|
|
shr := x>>s | y<<ŝ
|
|
|
|
// amd64:"SHLQ.*,.*,"
|
|
|
|
shl := x<<s | y>>ŝ
|
|
|
|
return shr, shl
|
|
|
|
}
|