diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 4008fdb2cb..b849eef964 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -116,51 +116,101 @@ func load_le_byte2_uint16(s []byte) uint16 { return uint16(s[0]) | uint16(s[1])<<8 } +func load_le_byte2_uint16_inv(s []byte) uint16 { + // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` + return uint16(s[1])<<8 | uint16(s[0]) +} + func load_le_byte4_uint32(s []byte) uint32 { // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 } +func load_le_byte4_uint32_inv(s []byte) uint32 { + // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` + return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0]) +} + func load_le_byte8_uint64(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56 } +func load_le_byte8_uint64_inv(s []byte) uint64 { + // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0]) +} + func load_be_byte2_uint16(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` return uint16(s[0])<<8 | uint16(s[1]) } +func load_be_byte2_uint16_inv(s []byte) uint16 { + // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` + return uint16(s[1]) | uint16(s[0])<<8 +} + func load_be_byte4_uint32(s []byte) uint32 { // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3]) } +func load_be_byte4_uint32_inv(s []byte) uint32 { + // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` + return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24 +} + func load_be_byte8_uint64(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7]) } +func load_be_byte8_uint64_inv(s []byte) uint64 { + // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` + return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56 +} + func load_le_byte2_uint16_idx(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` return uint16(s[idx]) | uint16(s[idx+1])<<8 } +func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` + return uint16(s[idx+1])<<8 | uint16(s[idx]) +} + func load_le_byte4_uint32_idx(s []byte, idx int) uint32 { // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24 } +func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` + return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx]) +} + func load_le_byte8_uint64_idx(s []byte, idx int) uint64 { // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56 } +func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx]) +} + func load_be_byte2_uint16_idx(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` return uint16(s[idx])<<8 | uint16(s[idx+1]) } +func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` + return uint16(s[idx+1]) | uint16(s[idx])<<8 +} + func load_be_byte4_uint32_idx(s []byte, idx int) uint32 { // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W` return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3]) @@ -176,21 +226,41 @@ func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 { return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8 } +func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB` + return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1]) +} + func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 { // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]` return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24 } +func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]` + return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2]) +} + func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 { // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]` return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56 } +func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3]) +} + func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB` return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1]) } +func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB` + return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8 +} + func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 { // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W` return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])