1
0
mirror of https://github.com/golang/go synced 2024-11-23 04:20:03 -07:00

runtime: add wasm bulk memory operations

The existing implementation uses loops to implement bulk memory
operations such as memcpy and memclr. Now that bulk memory operations
have been standardized and are implemented in all major browsers and
engines (see https://webassembly.org/roadmap/), we should use them
to improve performance.

Updates #28360

Change-Id: I28df0e0350287d5e7e1d1c09a4064ea1054e7575
Reviewed-on: https://go-review.googlesource.com/c/go/+/444935
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Richard Musiol <neelance@gmail.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Richard Musiol <neelance@gmail.com>
Reviewed-by: Richard Musiol <neelance@gmail.com>
This commit is contained in:
Garet Halliday 2022-10-21 22:22:12 -05:00 committed by Gopher Robot
parent 599a1e40c6
commit 50557edf10
15 changed files with 59 additions and 347 deletions

View File

@ -58,10 +58,6 @@ var Syms struct {
// Wasm
WasmDiv *obj.LSym
// Wasm
WasmMove *obj.LSym
// Wasm
WasmZero *obj.LSym
// Wasm
WasmTruncS *obj.LSym
// Wasm
WasmTruncU *obj.LSym

View File

@ -234,24 +234,9 @@
(I64Store [s-8] dst (I64Load [s-8] src mem)
(I64Store dst (I64Load src mem) mem))
// Adjust moves to be a multiple of 16 bytes.
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 <= 8 =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(I64Store dst (I64Load src mem) mem))
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 > 8 =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(I64Store [8] dst (I64Load [8] src mem)
(I64Store dst (I64Load src mem) mem)))
// Large copying uses helper.
(Move [s] dst src mem) && s%8 == 0 && logLargeCopy(v, s) =>
(LoweredMove [s/8] dst src mem)
(Move [s] dst src mem) && logLargeCopy(v, s) =>
(LoweredMove [s] dst src mem)
// Lowering Zero instructions
(Zero [0] _ mem) => mem
@ -274,7 +259,7 @@
(I64Store32 destptr (I64Const [0]) mem))
// Strip off any fractional word zeroing.
(Zero [s] destptr mem) && s%8 != 0 && s > 8 =>
(Zero [s] destptr mem) && s%8 != 0 && s > 8 && s < 32 =>
(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
(I64Store destptr (I64Const [0]) mem))
@ -293,8 +278,8 @@
(I64Store destptr (I64Const [0]) mem))))
// Large zeroing uses helper.
(Zero [s] destptr mem) && s%8 == 0 && s > 32 =>
(LoweredZero [s/8] destptr mem)
(Zero [s] destptr mem) =>
(LoweredZero [s] destptr mem)
// Lowering constants
(Const64 ...) => (I64Const ...)

View File

@ -126,8 +126,8 @@ func init() {
{name: "LoweredInterCall", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
{name: "LoweredAddr", argLength: 1, reg: gp11, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // returns base+aux+auxint, arg0=base
{name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"}, // large move. arg0=dst, arg1=src, arg2=mem, auxint=len/8, returns mem
{name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"}, // large zeroing. arg0=start, arg1=mem, auxint=len/8, returns mem
{name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"}, // large move. arg0=dst, arg1=src, arg2=mem, auxint=len, returns mem
{name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"}, // large zeroing. arg0=start, arg1=mem, auxint=len, returns mem
{name: "LoweredGetClosurePtr", reg: gp01}, // returns wasm.REG_CTXT, the closure pointer
{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // returns the PC of the caller of the current function

View File

@ -2141,76 +2141,18 @@ func rewriteValueWasm_OpMove(v *Value) bool {
return true
}
// match: (Move [s] dst src mem)
// cond: s > 16 && s%16 != 0 && s%16 <= 8
// result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (I64Store dst (I64Load src mem) mem))
// cond: logLargeCopy(v, s)
// result: (LoweredMove [s] dst src mem)
for {
s := auxIntToInt64(v.AuxInt)
dst := v_0
src := v_1
mem := v_2
if !(s > 16 && s%16 != 0 && s%16 <= 8) {
break
}
v.reset(OpMove)
v.AuxInt = int64ToAuxInt(s - s%16)
v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
v0.AuxInt = int64ToAuxInt(s % 16)
v0.AddArg(dst)
v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
v1.AuxInt = int64ToAuxInt(s % 16)
v1.AddArg(src)
v2 := b.NewValue0(v.Pos, OpWasmI64Store, types.TypeMem)
v3 := b.NewValue0(v.Pos, OpWasmI64Load, typ.UInt64)
v3.AddArg2(src, mem)
v2.AddArg3(dst, v3, mem)
v.AddArg3(v0, v1, v2)
return true
}
// match: (Move [s] dst src mem)
// cond: s > 16 && s%16 != 0 && s%16 > 8
// result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (I64Store [8] dst (I64Load [8] src mem) (I64Store dst (I64Load src mem) mem)))
for {
s := auxIntToInt64(v.AuxInt)
dst := v_0
src := v_1
mem := v_2
if !(s > 16 && s%16 != 0 && s%16 > 8) {
break
}
v.reset(OpMove)
v.AuxInt = int64ToAuxInt(s - s%16)
v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
v0.AuxInt = int64ToAuxInt(s % 16)
v0.AddArg(dst)
v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
v1.AuxInt = int64ToAuxInt(s % 16)
v1.AddArg(src)
v2 := b.NewValue0(v.Pos, OpWasmI64Store, types.TypeMem)
v2.AuxInt = int64ToAuxInt(8)
v3 := b.NewValue0(v.Pos, OpWasmI64Load, typ.UInt64)
v3.AuxInt = int64ToAuxInt(8)
v3.AddArg2(src, mem)
v4 := b.NewValue0(v.Pos, OpWasmI64Store, types.TypeMem)
v5 := b.NewValue0(v.Pos, OpWasmI64Load, typ.UInt64)
v5.AddArg2(src, mem)
v4.AddArg3(dst, v5, mem)
v2.AddArg3(dst, v3, v4)
v.AddArg3(v0, v1, v2)
return true
}
// match: (Move [s] dst src mem)
// cond: s%8 == 0 && logLargeCopy(v, s)
// result: (LoweredMove [s/8] dst src mem)
for {
s := auxIntToInt64(v.AuxInt)
dst := v_0
src := v_1
mem := v_2
if !(s%8 == 0 && logLargeCopy(v, s)) {
if !(logLargeCopy(v, s)) {
break
}
v.reset(OpWasmLoweredMove)
v.AuxInt = int64ToAuxInt(s / 8)
v.AuxInt = int64ToAuxInt(s)
v.AddArg3(dst, src, mem)
return true
}
@ -4656,13 +4598,13 @@ func rewriteValueWasm_OpZero(v *Value) bool {
return true
}
// match: (Zero [s] destptr mem)
// cond: s%8 != 0 && s > 8
// cond: s%8 != 0 && s > 8 && s < 32
// result: (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (I64Store destptr (I64Const [0]) mem))
for {
s := auxIntToInt64(v.AuxInt)
destptr := v_0
mem := v_1
if !(s%8 != 0 && s > 8) {
if !(s%8 != 0 && s > 8 && s < 32) {
break
}
v.reset(OpZero)
@ -4738,21 +4680,16 @@ func rewriteValueWasm_OpZero(v *Value) bool {
return true
}
// match: (Zero [s] destptr mem)
// cond: s%8 == 0 && s > 32
// result: (LoweredZero [s/8] destptr mem)
// result: (LoweredZero [s] destptr mem)
for {
s := auxIntToInt64(v.AuxInt)
destptr := v_0
mem := v_1
if !(s%8 == 0 && s > 32) {
break
}
v.reset(OpWasmLoweredZero)
v.AuxInt = int64ToAuxInt(s / 8)
v.AuxInt = int64ToAuxInt(s)
v.AddArg2(destptr, mem)
return true
}
return false
}
func rewriteValueWasm_OpZeroExt16to32(v *Value) bool {
v_0 := v.Args[0]

View File

@ -206,8 +206,6 @@ func InitConfig() {
}
// Wasm (all asm funcs with special ABIs)
ir.Syms.WasmMove = typecheck.LookupRuntimeVar("wasmMove")
ir.Syms.WasmZero = typecheck.LookupRuntimeVar("wasmZero")
ir.Syms.WasmDiv = typecheck.LookupRuntimeVar("wasmDiv")
ir.Syms.WasmTruncS = typecheck.LookupRuntimeVar("wasmTruncS")
ir.Syms.WasmTruncU = typecheck.LookupRuntimeVar("wasmTruncU")

View File

@ -149,14 +149,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
getValue32(s, v.Args[0])
getValue32(s, v.Args[1])
i32Const(s, int32(v.AuxInt))
p := s.Prog(wasm.ACall)
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: ir.Syms.WasmMove}
s.Prog(wasm.AMemoryCopy)
case ssa.OpWasmLoweredZero:
getValue32(s, v.Args[0])
i32Const(s, 0)
i32Const(s, int32(v.AuxInt))
p := s.Prog(wasm.ACall)
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: ir.Syms.WasmZero}
s.Prog(wasm.AMemoryFill)
case ssa.OpWasmLoweredNilCheck:
getValue64(s, v.Args[0])

View File

@ -231,6 +231,17 @@ const (
AI64TruncSatF64S
AI64TruncSatF64U
AMemoryInit
ADataDrop
AMemoryCopy
AMemoryFill
ATableInit
AElemDrop
ATableCopy
ATableGrow
ATableSize
ATableFill
ALast // Sentinel: End of low-level WebAssembly instructions.
ARESUMEPOINT

View File

@ -195,6 +195,16 @@ var Anames = []string{
"I64TruncSatF32U",
"I64TruncSatF64S",
"I64TruncSatF64U",
"MemoryInit",
"DataDrop",
"MemoryCopy",
"MemoryFill",
"TableInit",
"ElemDrop",
"TableCopy",
"TableGrow",
"TableSize",
"TableFill",
"Last",
"RESUMEPOINT",
"CALLNORESUME",

View File

@ -799,8 +799,6 @@ var notUsePC_B = map[string]bool{
"wasm_export_resume": true,
"wasm_export_getsp": true,
"wasm_pc_f_loop": true,
"runtime.wasmMove": true,
"runtime.wasmZero": true,
"runtime.wasmDiv": true,
"runtime.wasmTruncS": true,
"runtime.wasmTruncU": true,
@ -844,7 +842,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
// Some functions use a special calling convention.
switch s.Name {
case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop",
"runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
"runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
varDecls = []*varDecl{}
useAssemblyRegMap()
case "memchr", "memcmp":
@ -1088,7 +1086,11 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
writeUleb128(w, align(p.As))
writeUleb128(w, uint64(p.To.Offset))
case ACurrentMemory, AGrowMemory:
case ACurrentMemory, AGrowMemory, AMemoryFill:
w.WriteByte(0x00)
case AMemoryCopy:
w.WriteByte(0x00)
w.WriteByte(0x00)
}

View File

@ -60,8 +60,6 @@ var wasmFuncTypes = map[string]*wasmFuncType{
"wasm_export_resume": {Params: []byte{}}, //
"wasm_export_getsp": {Results: []byte{I32}}, // sp
"wasm_pc_f_loop": {Params: []byte{}}, //
"runtime.wasmMove": {Params: []byte{I32, I32, I32}}, // dst, src, len
"runtime.wasmZero": {Params: []byte{I32, I32}}, // ptr, len
"runtime.wasmDiv": {Params: []byte{I64, I64}, Results: []byte{I64}}, // x, y -> x/y
"runtime.wasmTruncS": {Params: []byte{F64}, Results: []byte{I64}}, // x -> int(x)
"runtime.wasmTruncU": {Params: []byte{F64}, Results: []byte{I64}}, // x -> uint(x)

View File

@ -320,10 +320,8 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
I64Load stackArgs+16(FP); \
I32WrapI64; \
I64Load stackArgsSize+24(FP); \
I64Const $3; \
I64ShrU; \
I32WrapI64; \
Call runtime·wasmMove(SB); \
MemoryCopy; \
End; \
\
MOVD f+8(FP), CTXT; \

View File

@ -11,29 +11,10 @@ TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT, $0-16
MOVD ptr+0(FP), R0
MOVD n+8(FP), R1
loop:
Loop
Get R1
I64Eqz
If
RET
End
Get R0
I32WrapI64
I64Const $0
I64Store8 $0
Get R0
I64Const $1
I64Add
Set R0
I32Const $0
Get R1
I64Const $1
I64Sub
Set R1
Br loop
End
UNDEF
I32WrapI64
MemoryFill
RET

View File

@ -12,143 +12,11 @@ TEXT runtime·memmove(SB), NOSPLIT, $0-24
MOVD from+8(FP), R1
MOVD n+16(FP), R2
Get R0
Get R1
I64LtU
If // forward
exit_forward_64:
Block
loop_forward_64:
Loop
Get R2
I64Const $8
I64LtU
BrIf exit_forward_64
MOVD 0(R1), 0(R0)
Get R0
I64Const $8
I64Add
Set R0
Get R1
I64Const $8
I64Add
Set R1
Get R2
I64Const $8
I64Sub
Set R2
Br loop_forward_64
End
End
loop_forward_8:
Loop
Get R2
I64Eqz
If
RET
End
Get R0
I32WrapI64
I64Load8U (R1)
I64Store8 $0
Get R0
I64Const $1
I64Add
Set R0
Get R1
I64Const $1
I64Add
Set R1
Get R2
I64Const $1
I64Sub
Set R2
Br loop_forward_8
End
Else
// backward
Get R0
Get R2
I64Add
Set R0
Get R1
Get R2
I64Add
Set R1
exit_backward_64:
Block
loop_backward_64:
Loop
Get R2
I64Const $8
I64LtU
BrIf exit_backward_64
Get R0
I64Const $8
I64Sub
Set R0
Get R1
I64Const $8
I64Sub
Set R1
Get R2
I64Const $8
I64Sub
Set R2
MOVD 0(R1), 0(R0)
Br loop_backward_64
End
End
loop_backward_8:
Loop
Get R2
I64Eqz
If
RET
End
Get R0
I64Const $1
I64Sub
Set R0
Get R1
I64Const $1
I64Sub
Set R1
Get R2
I64Const $1
I64Sub
Set R2
Get R0
I32WrapI64
I64Load8U (R1)
I64Store8 $0
Br loop_backward_8
End
End
UNDEF
Get R2
I32WrapI64
MemoryCopy
RET

View File

@ -16,10 +16,6 @@ type m0Stack struct {
var wasmStack m0Stack
func wasmMove()
func wasmZero()
func wasmDiv()
func wasmTruncS()

View File

@ -4,73 +4,6 @@
#include "textflag.h"
TEXT runtime·wasmMove(SB), NOSPLIT, $0-0
loop:
Loop
// *dst = *src
Get R0
Get R1
I64Load $0
I64Store $0
// n--
Get R2
I32Const $1
I32Sub
Tee R2
// n == 0
I32Eqz
If
Return
End
// dst += 8
Get R0
I32Const $8
I32Add
Set R0
// src += 8
Get R1
I32Const $8
I32Add
Set R1
Br loop
End
UNDEF
TEXT runtime·wasmZero(SB), NOSPLIT, $0-0
loop:
Loop
// *dst = 0
Get R0
I64Const $0
I64Store $0
// n--
Get R1
I32Const $1
I32Sub
Tee R1
// n == 0
I32Eqz
If
Return
End
// dst += 8
Get R0
I32Const $8
I32Add
Set R0
Br loop
End
UNDEF
TEXT runtime·wasmDiv(SB), NOSPLIT, $0-0
Get R0
I64Const $-0x8000000000000000