diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index b3d2cb2067..2b4422ebd2 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -20,7 +20,7 @@ var sink16 uint16 // ------------- // func load_le64(b []byte) { - // amd64:`MOVQ\s\(.*\),` + // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]`,-`OR` // s390x:`MOVDBR\s\(.*\),` // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]` // ppc64le:`MOVD\s`,-`MOV[BHW]Z` @@ -28,7 +28,7 @@ func load_le64(b []byte) { } func load_le64_idx(b []byte, idx int) { - // amd64:`MOVQ\s\(.*\)\(.*\*1\),` + // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]`,-`OR` // s390x:`MOVDBR\s\(.*\)\(.*\*1\),` // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]` // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` @@ -36,7 +36,8 @@ func load_le64_idx(b []byte, idx int) { } func load_le32(b []byte) { - // amd64:`MOVL\s\(.*\),` 386:`MOVL\s\(.*\),` + // amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR` + // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR` // s390x:`MOVWBR\s\(.*\),` // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]` // ppc64le:`MOVWZ\s` @@ -44,7 +45,8 @@ func load_le32(b []byte) { } func load_le32_idx(b []byte, idx int) { - // amd64:`MOVL\s\(.*\)\(.*\*1\),` 386:`MOVL\s\(.*\)\(.*\*1\),` + // amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR` + // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR` // s390x:`MOVWBR\s\(.*\)\(.*\*1\),` // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]` // ppc64le:`MOVWZ\s` @@ -52,21 +54,21 @@ func load_le32_idx(b []byte, idx int) { } func load_le16(b []byte) { - // amd64:`MOVWLZX\s\(.*\),` + // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR` // ppc64le:`MOVHZ\s` // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB` sink16 = binary.LittleEndian.Uint16(b) } func load_le16_idx(b []byte, idx int) { - // amd64:`MOVWLZX\s\(.*\),` + // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR` // ppc64le:`MOVHZ\s` // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` sink16 = binary.LittleEndian.Uint16(b[idx:]) } func load_be64(b []byte) { - // amd64:`BSWAPQ` + // amd64:`BSWAPQ`,-`MOV[BWL]`,-`OR` // s390x:`MOVD\s\(.*\),` // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W` // ppc64le:`MOVDBR` @@ -74,7 +76,7 @@ func load_be64(b []byte) { } func load_be64_idx(b []byte, idx int) { - // amd64:`BSWAPQ` + // amd64:`BSWAPQ`,-`MOV[BWL]`,-`OR` // s390x:`MOVD\s\(.*\)\(.*\*1\),` // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W` // ppc64le:`MOVDBR` @@ -82,7 +84,7 @@ func load_be64_idx(b []byte, idx int) { } func load_be32(b []byte) { - // amd64:`BSWAPL` + // amd64:`BSWAPL`,-`MOV[BW]`,-`OR` // s390x:`MOVWZ\s\(.*\),` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W` // ppc64le:`MOVWBR` @@ -90,7 +92,7 @@ func load_be32(b []byte) { } func load_be32_idx(b []byte, idx int) { - // amd64:`BSWAPL` + // amd64:`BSWAPL`,-`MOV[BW]`,-`OR` // s390x:`MOVWZ\s\(.*\)\(.*\*1\),` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W` // ppc64le:`MOVWBR` @@ -98,14 +100,14 @@ func load_be32_idx(b []byte, idx int) { } func load_be16(b []byte) { - // amd64:`ROLW\s\$8` + // amd64:`ROLW\s\$8`,-`MOVB`,-`OR` // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB` // ppc64le:`MOVHBR` sink16 = binary.BigEndian.Uint16(b) } func load_be16_idx(b []byte, idx int) { - // amd64:`ROLW\s\$8` + // amd64:`ROLW\s\$8`,-`MOVB`,-`OR` // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` // ppc64le:`MOVHBR` sink16 = binary.BigEndian.Uint16(b[idx:]) @@ -113,22 +115,22 @@ func load_be16_idx(b []byte, idx int) { func load_le_byte2_uint16(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` - // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` + // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` return uint16(s[0]) | uint16(s[1])<<8 } func load_le_byte2_uint16_inv(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` - // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` + // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` return uint16(s[1])<<8 | uint16(s[0]) } func load_le_byte4_uint32(s []byte) uint32 { // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` - // 386:`MOVL\s\([A-Z]+\)`,-`MOVB`,-`OR`-`MOVW` - // amd64:`MOVL\s\([A-Z]+\)`,-`MOVB`,-`OR`-`MOVW` + // 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 } @@ -139,25 +141,24 @@ func load_le_byte4_uint32_inv(s []byte) uint32 { func load_le_byte8_uint64(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+` + // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]`,-`OR` return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56 } func load_le_byte8_uint64_inv(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+` return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0]) } func load_be_byte2_uint16(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` + // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` return uint16(s[0])<<8 | uint16(s[1]) } func load_be_byte2_uint16_inv(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` + // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` return uint16(s[1]) | uint16(s[0])<<8 } @@ -168,39 +169,38 @@ func load_be_byte4_uint32(s []byte) uint32 { func load_be_byte4_uint32_inv(s []byte) uint32 { // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` - // amd64:`MOVL\s\([A-Z]+\)`,-`MOVB`,-`OR`,-`MOVW` + // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24 } func load_be_byte8_uint64(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+` return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7]) } func load_be_byte8_uint64_inv(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+` + // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]`,-`OR` return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56 } func load_le_byte2_uint16_idx(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`OR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` return uint16(s[idx]) | uint16(s[idx+1])<<8 } func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`OR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` return uint16(s[idx+1])<<8 | uint16(s[idx]) } func load_le_byte4_uint32_idx(s []byte, idx int) uint32 { // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` - // amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`OR`,-`MOVB`,-`MOVW` + // amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR` return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24 } @@ -211,7 +211,7 @@ func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 { func load_le_byte8_uint64_idx(s []byte, idx int) uint64 { // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+` + // amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR` return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56 } @@ -222,13 +222,13 @@ func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 { func load_be_byte2_uint16_idx(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`OR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` return uint16(s[idx])<<8 | uint16(s[idx+1]) } func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`OR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` return uint16(s[idx+1]) | uint16(s[idx])<<8 }