mirror of
https://github.com/golang/go
synced 2024-11-22 19:54:39 -07:00
4481042c43
This modifies some existing rules to allow more prefixed instructions to be generated when using GOPPC64=power10. Some rules also check if PCRel is available, which is currently supported for linux/ppc64le and linux/ppc64 (internal linking only). Prior to p10, DS-offset loads and stores had a 16 bit size limit for the offset field. If the offset of the data for load or store was beyond this range then an indexed load or store would be selected by the rules. In p10 the assembler can generate prefixed instructions in this case, but does not if an indexed instruction was selected during the lowering pass. This allows many more cases to use prefixed loads or stores, reducing function sizes and improving performance in some cases where the code change happens in key loops. For example in strconv BenchmarkAppendQuoteRune before: 12c5e4: 15 00 10 06 pla r10,1425660 12c5e8: fc c0 40 39 12c5ec: 00 00 6a e8 ld r3,0(r10) 12c5f0: 10 00 aa e8 ld r5,16(r10) After this change: 12a828: 15 00 10 04 pld r3,1433272 12a82c: b8 de 60 e4 12a830: 15 00 10 04 pld r5,1433280 12a834: c0 de a0 e4 Performs better in the second case. A testcase was added to verify that the rules correctly select a load or store based on the offset and whether power10 or earlier. Change-Id: I4335fed0bd9b8aba8a4f84d69b89f819cc464846 Reviewed-on: https://go-review.googlesource.com/c/go/+/477398 Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Archana Ravindar <aravind5@in.ibm.com> Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Paul Murphy <murp@ibm.com>
72 lines
2.5 KiB
Go
72 lines
2.5 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2023 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
type big1 struct {
|
|
w [1<<30 - 1]uint32
|
|
}
|
|
type big2 struct {
|
|
d [1<<29 - 1]uint64
|
|
}
|
|
|
|
func loadLargeOffset(sw *big1, sd *big2) (uint32, uint64) {
|
|
|
|
// ppc64x:`MOVWZ\s+[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
a3 := sw.w[1<<10]
|
|
// ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
|
|
// ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
|
|
// ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
|
|
b3 := sw.w[1<<16]
|
|
// ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
|
|
// ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
|
|
// ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
|
|
c3 := sw.w[1<<28]
|
|
// ppc64x:`MOVWZ\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
|
|
d3 := sw.w[1<<29]
|
|
// ppc64x:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
a4 := sd.d[1<<10]
|
|
// ppc64le/power10:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
// ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
|
|
// ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
|
|
b4 := sd.d[1<<16]
|
|
// ppc64le/power10`:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
// ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
|
|
// ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
|
|
c4 := sd.d[1<<27]
|
|
// ppc64x:`MOVD\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
|
|
d4 := sd.d[1<<28]
|
|
|
|
return a3 + b3 + c3 + d3, a4 + b4 + c4 + d4
|
|
}
|
|
|
|
func storeLargeOffset(sw *big1, sd *big2) {
|
|
// ppc64x:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
sw.w[1<<10] = uint32(10)
|
|
// ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
// ppc64x/power9:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
|
|
// ppc64x/power8:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
|
|
sw.w[1<<16] = uint32(20)
|
|
// ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
// ppc64x/power9:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
|
|
// ppc64x/power8:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
|
|
sw.w[1<<28] = uint32(30)
|
|
// ppc64x:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`
|
|
sw.w[1<<29] = uint32(40)
|
|
// ppc64x:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
sd.d[1<<10] = uint64(40)
|
|
// ppc64le/power10:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
// ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
|
|
// ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
|
|
sd.d[1<<16] = uint64(50)
|
|
// ppc64le/power10`:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
|
|
// ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
|
|
// ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
|
|
sd.d[1<<27] = uint64(60)
|
|
// ppc64x:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`
|
|
sd.d[1<<28] = uint64(70)
|
|
}
|