1
0
mirror of https://github.com/golang/go synced 2024-09-28 19:24:29 -06:00

cmd/compile: Use XORL and X15 for zeroing in ggen's zerorange on AMD64

Prefer a SSE store from X15 over XORL REG REG -> MOVQ.

Use XORL REG, REG to setup 0 for REP STOS.

Remove the nacl related block.

Intel Alder Lake 12600k (Linux):

name                      old time/op    new time/op    delta
BinaryTree17-16              1.52s ± 1%     1.52s ± 1%    ~
(p=0.932 n=12+12)
Fannkuch11-16                1.40s ± 0%     1.46s ± 0%  +4.12%
(p=0.000 n=12+11)
FmtFprintfEmpty-16          13.0ns ± 1%    13.2ns ± 1%  +1.37%
(p=0.000 n=11+12)
FmtFprintfString-16         24.0ns ± 1%    23.2ns ± 1%  -3.53%
(p=0.000 n=12+11)
FmtFprintfInt-16            27.6ns ± 1%    25.5ns ± 1%  -7.53%
(p=0.000 n=12+12)
FmtFprintfIntInt-16         43.1ns ± 1%    41.4ns ± 1%  -4.00%
(p=0.000 n=12+12)
FmtFprintfPrefixedInt-16    56.8ns ± 0%    54.8ns ± 1%  -3.49%
(p=0.000 n=11+12)
FmtFprintfFloat-16          59.0ns ± 0%    59.7ns ± 1%  +1.11%
(p=0.000 n=12+12)
FmtManyArgs-16               159ns ± 1%     160ns ± 1%    ~
(p=0.070 n=12+12)
GobDecode-16                2.37ms ± 2%    2.39ms ± 1%    ~
(p=0.059 n=12+11)
GobEncode-16                1.99ms ± 2%    2.00ms ± 2%    ~
(p=0.291 n=12+12)
Gzip-16                     98.7ms ± 1%    99.2ms ± 0%  +0.51%
(p=0.024 n=12+12)
Gunzip-16                   13.4ms ± 0%    13.5ms ± 0%  +0.20%
(p=0.001 n=11+12)
HTTPClientServer-16         27.0µs ± 6%    26.5µs ± 4%    ~
(p=0.266 n=12+12)
JSONEncode-16               3.41ms ± 5%    3.44ms ± 2%    ~
(p=0.291 n=12+12)
JSONDecode-16               16.6ms ± 1%    16.6ms ± 2%    ~
(p=0.872 n=10+12)
Mandelbrot200-16            1.78ms ± 0%    1.78ms ± 0%    ~
(p=0.514 n=12+12)
GoParse-16                  1.59ms ± 2%    1.57ms ± 2%  -0.82%
(p=0.016 n=12+11)
RegexpMatchEasy0_32-16      21.5ns ± 1%    21.3ns ± 0%  -1.10%
(p=0.000 n=12+12)
RegexpMatchEasy0_1K-16      71.0ns ± 0%    69.9ns ± 0%  -1.58%
(p=0.000 n=11+11)
RegexpMatchEasy1_32-16      18.0ns ± 1%    17.3ns ± 0%  -3.38%
(p=0.000 n=12+12)
RegexpMatchEasy1_1K-16      97.8ns ± 0%    97.2ns ± 1%  -0.56%
(p=0.001 n=10+10)
RegexpMatchMedium_32-16      269ns ± 0%     270ns ± 2%    ~
(p=0.241 n=11+12)
RegexpMatchMedium_1K-16     11.4µs ± 0%    11.3µs ± 1%  -0.69%
(p=0.000 n=11+12)
RegexpMatchHard_32-16        522ns ± 0%     522ns ± 1%    ~
(p=0.811 n=12+12)
RegexpMatchHard_1K-16       15.1µs ± 2%    14.8µs ± 0%  -2.17%
(p=0.000 n=12+12)
Revcomp-16                   194ms ± 1%     195ms ± 2%    ~
(p=0.059 n=11+12)
Template-16                 22.0ms ± 2%    21.5ms ± 2%  -2.11%
(p=0.001 n=12+12)
TimeParse-16                97.3ns ± 1%    97.2ns ± 0%    ~
(p=0.217 n=11+12)
TimeFormat-16               98.2ns ± 2%    97.1ns ± 2%    ~
(p=0.101 n=12+12)
[Geo mean]                  17.7µs         17.6µs       -0.77%

name                      old speed      new speed      delta
GobDecode-16               324MB/s ± 2%   322MB/s ± 1%    ~
(p=0.058 n=12+11)
GobEncode-16               385MB/s ± 2%   383MB/s ± 2%    ~
(p=0.291 n=12+12)
Gzip-16                    197MB/s ± 1%   196MB/s ± 0%  -0.51%
(p=0.023 n=12+12)
Gunzip-16                 1.44GB/s ± 0%  1.44GB/s ± 0%  -0.20%
(p=0.001 n=11+12)
JSONEncode-16              570MB/s ± 5%   565MB/s ± 2%    ~
(p=0.291 n=12+12)
JSONDecode-16              117MB/s ± 1%   117MB/s ± 2%    ~
(p=0.885 n=10+12)
GoParse-16                36.5MB/s ± 2%  36.8MB/s ± 2%  +0.83%
(p=0.018 n=12+11)
RegexpMatchEasy0_32-16    1.49GB/s ± 1%  1.50GB/s ± 0%  +1.12%
(p=0.000 n=12+12)
RegexpMatchEasy0_1K-16    14.4GB/s ± 0%  14.6GB/s ± 0%  +1.61%
(p=0.000 n=11+11)
RegexpMatchEasy1_32-16    1.78GB/s ± 1%  1.84GB/s ± 0%  +3.50%
(p=0.000 n=12+12)
RegexpMatchEasy1_1K-16    10.5GB/s ± 0%  10.5GB/s ± 1%  +0.57%
(p=0.001 n=10+10)
RegexpMatchMedium_32-16    119MB/s ± 0%   119MB/s ± 2%    ~
(p=0.235 n=11+12)
RegexpMatchMedium_1K-16   90.1MB/s ± 0%  90.8MB/s ± 1%  +0.69%
(p=0.000 n=11+12)
RegexpMatchHard_32-16     61.3MB/s ± 0%  61.3MB/s ± 1%    ~
(p=0.765 n=12+12)
RegexpMatchHard_1K-16     67.6MB/s ± 2%  69.1MB/s ± 0%  +2.20%
(p=0.000 n=12+12)
Revcomp-16                1.31GB/s ± 1%  1.30GB/s ± 2%    ~
(p=0.059 n=11+12)
Template-16               88.3MB/s ± 2%  90.2MB/s ± 2%  +2.16%
(p=0.001 n=12+12)
[Geo mean]                 401MB/s        403MB/s       +0.49%

file                                                        before   after    Δ       %
runtime.s                                                   512467   512447   -20     -0.004%
sync.s                                                      16219    16197    -22     -0.136%
internal/singleflight.s                                     2617     2616     -1      -0.038%
internal/testlog.s                                          2157     2152     -5      -0.232%
io.s                                                        18992    18980    -12     -0.063%
text/tabwriter.s                                            8952     8913     -39     -0.436%
syscall.s                                                   85241    85220    -21     -0.025%
go/build/constraint.s                                       12763    12741    -22     -0.172%
time.s                                                      100682   100672   -10     -0.010%
context.s                                                   12316    12305    -11     -0.089%
internal/poll.s                                             45297    45114    -183    -0.404%
io/fs.s                                                     16767    16763    -4      -0.024%
crypto/hmac.s                                               2546     2537     -9      -0.353%
os.s                                                        53983    53964    -19     -0.035%
os/exec.s                                                   25723    25710    -13     -0.051%
os/user.s                                                   12166    12133    -33     -0.271%
debug/gosym.s                                               36980    36948    -32     -0.087%
database/sql.s                                              90990    90863    -127    -0.140%
archive/zip.s                                               52485    52481    -4      -0.008%
debug/dwarf.s                                               117251   117219   -32     -0.027%
encoding/json.s                                             95637    95579    -58     -0.061%
net.s                                                       278084   278056   -28     -0.010%
log.s                                                       12153    12121    -32     -0.263%
vendor/golang.org/x/net/http2/hpack.s                       22562    22552    -10     -0.044%
mime.s                                                      32872    32851    -21     -0.064%
vendor/golang.org/x/crypto/cryptobyte.s                     32035    32024    -11     -0.034%
go/token.s                                                  13689    13645    -44     -0.321%
image/gif.s                                                 22700    22668    -32     -0.141%
text/template/parse.s                                       81696    81683    -13     -0.016%
image/png.s                                                 37704    37692    -12     -0.032%
go/ast.s                                                    63753    63751    -2      -0.003%
internal/dag.s                                              13123    13122    -1      -0.008%
crypto/x509.s                                               137641   137635   -6      -0.004%
text/template.s                                             106615   106592   -23     -0.022%
os/signal.s                                                 7658     7651     -7      -0.091%
go/printer.s                                                90393    90384    -9      -0.010%
runtime/trace.s                                             2844     2819     -25     -0.879%
go/parser.s                                                 111432   111144   -288    -0.258%
html/template.s                                             91633    91619    -14     -0.015%
log/syslog.s                                                6612     6593     -19     -0.287%
net/internal/socktest.s                                     15715    15684    -31     -0.197%
runtime/pprof.s                                             70273    70177    -96     -0.137%
crypto/tls.s                                                288762   288684   -78     -0.027%
testing.s                                                   112376   112300   -76     -0.068%
internal/fuzz.s                                             89544    89535    -9      -0.010%
net/smtp.s                                                  11357    11325    -32     -0.282%
vendor/golang.org/x/net/nettest.s                           27449    27361    -88     -0.321%
testing/internal/testdeps.s                                 6384     6369     -15     -0.235%
go/types.s                                                  484464   484324   -140    -0.029%
cmd/internal/buildid.s                                      17646    17625    -21     -0.119%
go/internal/gccgoimporter.s                                 44931    44920    -11     -0.024%
go/internal/srcimporter.s                                   9989     9983     -6      -0.060%
cmd/api.s                                                   35098    35075    -23     -0.066%
net/http.s                                                  551134   550680   -454    -0.082%
cmd/internal/obj.s                                          121795   121750   -45     -0.037%
cmd/compile/internal/syntax.s                               170092   170075   -17     -0.010%
expvar.s                                                    8959     8945     -14     -0.156%
net/http/httptest.s                                         16201    16198    -3      -0.019%
net/http/httputil.s                                         44379    44339    -40     -0.090%
net/http/fcgi.s                                             18105    18099    -6      -0.033%
cmd/compile/internal/logopt.s                               9916     9905     -11     -0.111%
cmd/vendor/golang.org/x/tools/cover.s                       9722     9720     -2      -0.021%
cmd/compile/internal/base.s                                 38986    38982    -4      -0.010%
cmd/covdata.s                                               39190    39182    -8      -0.020%
cmd/go/internal/fsys.s                                      17948    17938    -10     -0.056%
cmd/dist.s                                                  169725   169616   -109    -0.064%
cmd/compile/internal/types.s                                74666    74639    -27     -0.036%
cmd/go/internal/par.s                                       4517     4516     -1      -0.022%
cmd/go/internal/lockedfile.s                                22412    22410    -2      -0.009%
cmd/compile/internal/types2.s                               476715   476596   -119    -0.025%
cmd/go/internal/trace.s                                     4888     4877     -11     -0.225%
cmd/vendor/golang.org/x/mod/module.s                        20445    20415    -30     -0.147%
cmd/vendor/golang.org/x/mod/sumdb/dirhash.s                 3596     3587     -9      -0.250%
cmd/go/internal/cache.s                                     23871    23839    -32     -0.134%
cmd/vendor/golang.org/x/mod/zip.s                           36602    36567    -35     -0.096%
cmd/vendor/golang.org/x/mod/sumdb.s                         27669    27552    -117    -0.423%
cmd/compile/internal/ir.s                                   253597   253590   -7      -0.003%
cmd/go/internal/vcs.s                                       45027    45025    -2      -0.004%
cmd/go/internal/modfetch/codehost.s                         80672    80583    -89     -0.110%
cmd/go/internal/modindex.s                                  79273    79271    -2      -0.003%
cmd/go/internal/modconv.s                                   14793    14792    -1      -0.007%
cmd/gofmt.s                                                 29600    29587    -13     -0.044%
cmd/go/internal/modfetch.s                                  111067   111017   -50     -0.045%
cmd/go/internal/vcweb.s                                     37882    37872    -10     -0.026%
cmd/compile/internal/typecheck.s                            319852   319834   -18     -0.006%
cmd/vendor/golang.org/x/term.s                              24398    24338    -60     -0.246%
cmd/vendor/github.com/google/pprof/profile.s                149107   149103   -4      -0.003%
cmd/go/internal/modload.s                                   275078   275020   -58     -0.021%
cmd/vendor/github.com/ianlancetaylor/demangle.s             264051   264013   -38     -0.014%
cmd/compile/internal/staticdata.s                           14019    14011    -8      -0.057%
cmd/go/internal/modcmd.s                                    47591    47582    -9      -0.019%
cmd/vendor/github.com/google/pprof/internal/binutils.s      37978    37926    -52     -0.137%
cmd/vendor/golang.org/x/tools/go/analysis/passes/buildtag.s 6880     6864     -16     -0.233%
cmd/go/internal/work.s                                      287700   287637   -63     -0.022%
cmd/vendor/github.com/google/pprof/internal/symbolizer.s    10171    10138    -33     -0.324%
cmd/go/internal/modget.s                                    58314    58250    -64     -0.110%
cmd/go/internal/test.s                                      52538    52534    -4      -0.008%
cmd/trace.s                                                 100245   100242   -3      -0.003%
cmd/vendor/golang.org/x/tools/go/ast/astutil.s              50731    50724    -7      -0.014%
cmd/vendor/golang.org/x/tools/internal/facts.s              13018    13011    -7      -0.054%
cmd/link/internal/ld.s                                      562438   562377   -61     -0.011%
cmd/vendor/github.com/google/pprof/internal/driver.s        148389   148338   -51     -0.034%
cmd/compile/internal/ssa.s                                  3639799  3639727  -72     -0.002%
cmd/compile/internal/ssagen.s                               359076   359028   -48     -0.013%
cmd/compile/internal/amd64.s                                31084    30582    -502    -1.615%
cmd/compile/internal/noder.s                                407972   407949   -23     -0.006%
cmd/compile/internal/gc.s                                   20101    20069    -32     -0.159%
total                                                       20771294 20766881 -4413   -0.021%

Change-Id: I2a4b01449e94906fa1ed3fb96a790977466368d9
Reviewed-on: https://go-review.googlesource.com/c/go/+/453536
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Jakub Ciolek 2022-11-27 16:36:35 +01:00 committed by Gopher Robot
parent 2cac7e89da
commit 326df693d7

View File

@ -5,7 +5,6 @@
package amd64
import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/objw"
"cmd/compile/internal/types"
@ -63,26 +62,8 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
return p
}
if cnt%int64(types.RegSize) != 0 {
// should only happen with nacl
if cnt%int64(types.PtrSize) != 0 {
base.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
}
if *state&r13 == 0 {
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
*state |= r13
}
p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
off += int64(types.PtrSize)
cnt -= int64(types.PtrSize)
}
if cnt == 8 {
if *state&r13 == 0 {
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
*state |= r13
}
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off)
} else if !isPlan9 && cnt <= int64(8*types.RegSize) {
for i := int64(0); i < cnt/16; i++ {
p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
@ -120,7 +101,7 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
// Set up the REPSTOSQ and kick it off.
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
p = pp.Append(p, x86.AXORL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_AX, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)