mirror of
https://github.com/golang/go
synced 2024-11-12 01:50:22 -07:00
cmd, runtime: remove PC_F & PC_B globals on Wasm
Following the previous CL, this removes more global variables on Wasm. PC_B is used mostly for intra-function jumps, and for a function telling its callee where to start or resume. This usage can be served by a parameter. The top level loop (wasm_pc_f_loop) uses PC_B for resuming a function. This value is either set by gogo, or loaded from the Go stack at function return. Instead of loading PC_B at each function return, we could make gogo stores PC_B at the same stack location, and let the top level loop do the load. This way, we don't need to use global PC_B to communicate with the top level loop, and we can replace global PC_B with a parameter. PC_F is similar. It is even more so in that the only reader is the top level loop. Let the top level loop read it from the stack, and we can get rid of PC_F entirely. PC_F and PC_B are used less entensively as SP, so this CL has smaller performance gain. Running on Chrome 74.0.3729.108 on Linux/AMD64, name old time/op new time/op delta BinaryTree17 16.6s ± 0% 16.2s ± 1% -2.59% (p=0.016 n=4+5) Fannkuch11 11.1s ± 1% 10.8s ± 0% -2.65% (p=0.008 n=5+5) FmtFprintfEmpty 231ns ± 1% 217ns ± 0% -6.06% (p=0.008 n=5+5) FmtFprintfString 407ns ± 3% 375ns ± 2% -7.81% (p=0.008 n=5+5) FmtFprintfInt 466ns ± 2% 430ns ± 0% -7.79% (p=0.016 n=5+4) FmtFprintfIntInt 719ns ± 2% 673ns ± 2% -6.37% (p=0.008 n=5+5) FmtFprintfPrefixedInt 706ns ± 1% 676ns ± 3% -4.31% (p=0.008 n=5+5) FmtFprintfFloat 1.01µs ± 1% 0.97µs ± 1% -4.30% (p=0.008 n=5+5) FmtManyArgs 2.67µs ± 1% 2.51µs ± 1% -5.95% (p=0.008 n=5+5) GobDecode 30.7ms ± 9% 31.3ms ±34% ~ (p=0.222 n=5+5) GobEncode 24.2ms ±23% 20.2ms ± 0% -16.36% (p=0.016 n=5+4) Gzip 852ms ± 0% 823ms ± 0% -3.38% (p=0.016 n=4+5) Gunzip 160ms ± 1% 151ms ± 1% -5.37% (p=0.008 n=5+5) JSONEncode 35.7ms ± 1% 34.3ms ± 1% -3.81% (p=0.008 n=5+5) JSONDecode 247ms ± 8% 254ms ± 7% ~ (p=0.548 n=5+5) Mandelbrot200 5.39ms ± 0% 5.41ms ± 0% +0.42% (p=0.008 n=5+5) GoParse 18.5ms ± 1% 18.3ms ± 2% ~ (p=0.343 n=4+4) RegexpMatchEasy0_32 424ns ± 2% 397ns ± 0% -6.23% (p=0.008 n=5+5) RegexpMatchEasy0_1K 2.88µs ± 0% 2.86µs ± 1% ~ (p=0.079 n=5+5) RegexpMatchEasy1_32 395ns ± 2% 370ns ± 1% -6.23% (p=0.008 n=5+5) RegexpMatchEasy1_1K 3.26µs ± 0% 3.19µs ± 1% -2.06% (p=0.008 n=5+5) RegexpMatchMedium_32 564ns ± 1% 532ns ± 0% -5.71% (p=0.008 n=5+5) RegexpMatchMedium_1K 146µs ± 2% 140µs ± 1% -4.62% (p=0.008 n=5+5) RegexpMatchHard_32 8.47µs ± 1% 7.91µs ± 1% -6.65% (p=0.008 n=5+5) RegexpMatchHard_1K 253µs ± 1% 236µs ± 2% -6.66% (p=0.008 n=5+5) Revcomp 1.78s ± 4% 1.76s ± 5% ~ (p=1.000 n=5+5) Template 292ms ±29% 269ms ± 5% ~ (p=0.690 n=5+5) TimeParse 1.61µs ± 4% 1.54µs ± 1% -4.42% (p=0.008 n=5+5) TimeFormat 1.66µs ± 3% 1.58µs ± 1% -5.22% (p=0.008 n=5+5) [Geo mean] 232µs 221µs -4.54% name old speed new speed delta GobDecode 25.0MB/s ± 8% 25.1MB/s ±27% ~ (p=0.222 n=5+5) GobEncode 32.8MB/s ±21% 38.0MB/s ± 0% +15.84% (p=0.016 n=5+4) Gzip 22.8MB/s ± 0% 23.6MB/s ± 0% +3.49% (p=0.016 n=4+5) Gunzip 121MB/s ± 1% 128MB/s ± 1% +5.68% (p=0.008 n=5+5) JSONEncode 54.4MB/s ± 1% 56.5MB/s ± 1% +3.97% (p=0.008 n=5+5) JSONDecode 7.88MB/s ± 8% 7.65MB/s ± 8% ~ (p=0.548 n=5+5) GoParse 3.07MB/s ± 8% 3.00MB/s ±22% ~ (p=0.579 n=5+5) RegexpMatchEasy0_32 75.6MB/s ± 2% 80.5MB/s ± 0% +6.58% (p=0.008 n=5+5) RegexpMatchEasy0_1K 356MB/s ± 0% 358MB/s ± 1% ~ (p=0.095 n=5+5) RegexpMatchEasy1_32 81.1MB/s ± 2% 86.5MB/s ± 1% +6.69% (p=0.008 n=5+5) RegexpMatchEasy1_1K 314MB/s ± 0% 320MB/s ± 0% +2.10% (p=0.008 n=5+5) RegexpMatchMedium_32 1.77MB/s ± 1% 1.88MB/s ± 0% +6.09% (p=0.016 n=5+4) RegexpMatchMedium_1K 6.99MB/s ± 2% 7.33MB/s ± 1% +4.83% (p=0.008 n=5+5) RegexpMatchHard_32 3.78MB/s ± 1% 4.04MB/s ± 1% +7.04% (p=0.008 n=5+5) RegexpMatchHard_1K 4.04MB/s ± 1% 4.33MB/s ± 2% +7.17% (p=0.008 n=5+5) Revcomp 143MB/s ± 4% 145MB/s ± 5% ~ (p=1.000 n=5+5) Template 6.77MB/s ±24% 7.22MB/s ± 5% ~ (p=0.690 n=5+5) [Geo mean] 24.4MB/s 25.4MB/s +4.18% Change-Id: Ib80716e62992aec28b2c4a96af280c278f83aa49 Reviewed-on: https://go-review.googlesource.com/c/go/+/173980 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Richard Musiol <neelance@gmail.com>
This commit is contained in:
parent
7ed7669c0d
commit
496b8dbbfc
@ -250,9 +250,7 @@ const (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// globals
|
// globals
|
||||||
REG_PC_F = obj.RBaseWasm + iota
|
REG_SP = obj.RBaseWasm + iota // SP is currently 32-bit, until 64-bit memory operations are available
|
||||||
REG_PC_B
|
|
||||||
REG_SP // SP is currently 32-bit, until 64-bit memory operations are available
|
|
||||||
REG_CTXT
|
REG_CTXT
|
||||||
REG_g
|
REG_g
|
||||||
// RET* are used by runtime.return0 and runtime.reflectcall. These functions pass return values in registers.
|
// RET* are used by runtime.return0 and runtime.reflectcall. These functions pass return values in registers.
|
||||||
@ -296,9 +294,11 @@ const (
|
|||||||
REG_F14
|
REG_F14
|
||||||
REG_F15
|
REG_F15
|
||||||
|
|
||||||
|
REG_PC_B // also first parameter, i32
|
||||||
|
|
||||||
MAXREG
|
MAXREG
|
||||||
|
|
||||||
MINREG = REG_PC_F
|
MINREG = REG_SP
|
||||||
REGSP = REG_SP
|
REGSP = REG_SP
|
||||||
REGCTXT = REG_CTXT
|
REGCTXT = REG_CTXT
|
||||||
REGG = REG_g
|
REGG = REG_g
|
||||||
|
@ -16,8 +16,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var Register = map[string]int16{
|
var Register = map[string]int16{
|
||||||
"PC_F": REG_PC_F,
|
|
||||||
"PC_B": REG_PC_B,
|
|
||||||
"SP": REG_SP,
|
"SP": REG_SP,
|
||||||
"CTXT": REG_CTXT,
|
"CTXT": REG_CTXT,
|
||||||
"g": REG_g,
|
"g": REG_g,
|
||||||
@ -60,6 +58,8 @@ var Register = map[string]int16{
|
|||||||
"F13": REG_F13,
|
"F13": REG_F13,
|
||||||
"F14": REG_F14,
|
"F14": REG_F14,
|
||||||
"F15": REG_F15,
|
"F15": REG_F15,
|
||||||
|
|
||||||
|
"PC_B": REG_PC_B,
|
||||||
}
|
}
|
||||||
|
|
||||||
var registerNames []string
|
var registerNames []string
|
||||||
@ -368,20 +368,31 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset PC_B to function entry
|
|
||||||
p = appendp(p, AI32Const, constAddr(0))
|
|
||||||
p = appendp(p, ASet, regAddr(REG_PC_B))
|
|
||||||
|
|
||||||
// low-level WebAssembly call to function
|
// low-level WebAssembly call to function
|
||||||
switch jmp.To.Type {
|
switch jmp.To.Type {
|
||||||
case obj.TYPE_MEM:
|
case obj.TYPE_MEM:
|
||||||
|
if !notUsePC_B[jmp.To.Sym.Name] {
|
||||||
|
// Set PC_B parameter to function entry.
|
||||||
|
p = appendp(p, AI32Const, constAddr(0))
|
||||||
|
}
|
||||||
p = appendp(p, ACall, jmp.To)
|
p = appendp(p, ACall, jmp.To)
|
||||||
|
|
||||||
case obj.TYPE_NONE:
|
case obj.TYPE_NONE:
|
||||||
// (target PC is on stack)
|
// (target PC is on stack)
|
||||||
p = appendp(p, AI32WrapI64)
|
p = appendp(p, AI32WrapI64)
|
||||||
p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
|
p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
|
||||||
p = appendp(p, AI32ShrU)
|
p = appendp(p, AI32ShrU)
|
||||||
|
|
||||||
|
// Set PC_B parameter to function entry.
|
||||||
|
// We need to push this before pushing the target PC_F,
|
||||||
|
// so temporarily pop PC_F, using our REG_PC_B as a
|
||||||
|
// scratch register, and push it back after pushing 0.
|
||||||
|
p = appendp(p, ASet, regAddr(REG_PC_B))
|
||||||
|
p = appendp(p, AI32Const, constAddr(0))
|
||||||
|
p = appendp(p, AGet, regAddr(REG_PC_B))
|
||||||
|
|
||||||
p = appendp(p, ACallIndirect)
|
p = appendp(p, ACallIndirect)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
panic("bad target for JMP")
|
panic("bad target for JMP")
|
||||||
}
|
}
|
||||||
@ -419,20 +430,31 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
})
|
})
|
||||||
p = appendp(p, AI64Store, constAddr(0))
|
p = appendp(p, AI64Store, constAddr(0))
|
||||||
|
|
||||||
// reset PC_B to function entry
|
|
||||||
p = appendp(p, AI32Const, constAddr(0))
|
|
||||||
p = appendp(p, ASet, regAddr(REG_PC_B))
|
|
||||||
|
|
||||||
// low-level WebAssembly call to function
|
// low-level WebAssembly call to function
|
||||||
switch call.To.Type {
|
switch call.To.Type {
|
||||||
case obj.TYPE_MEM:
|
case obj.TYPE_MEM:
|
||||||
|
if !notUsePC_B[call.To.Sym.Name] {
|
||||||
|
// Set PC_B parameter to function entry.
|
||||||
|
p = appendp(p, AI32Const, constAddr(0))
|
||||||
|
}
|
||||||
p = appendp(p, ACall, call.To)
|
p = appendp(p, ACall, call.To)
|
||||||
|
|
||||||
case obj.TYPE_NONE:
|
case obj.TYPE_NONE:
|
||||||
// (target PC is on stack)
|
// (target PC is on stack)
|
||||||
p = appendp(p, AI32WrapI64)
|
p = appendp(p, AI32WrapI64)
|
||||||
p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
|
p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
|
||||||
p = appendp(p, AI32ShrU)
|
p = appendp(p, AI32ShrU)
|
||||||
|
|
||||||
|
// Set PC_B parameter to function entry.
|
||||||
|
// We need to push this before pushing the target PC_F,
|
||||||
|
// so temporarily pop PC_F, using our PC_B as a
|
||||||
|
// scratch register, and push it back after pushing 0.
|
||||||
|
p = appendp(p, ASet, regAddr(REG_PC_B))
|
||||||
|
p = appendp(p, AI32Const, constAddr(0))
|
||||||
|
p = appendp(p, AGet, regAddr(REG_PC_B))
|
||||||
|
|
||||||
p = appendp(p, ACallIndirect)
|
p = appendp(p, ACallIndirect)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
panic("bad target for CALL")
|
panic("bad target for CALL")
|
||||||
}
|
}
|
||||||
@ -465,7 +487,13 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
|
|
||||||
// jump to before the call if jmpdefer has reset the return address to the call's PC
|
// jump to before the call if jmpdefer has reset the return address to the call's PC
|
||||||
if call.To.Sym == deferreturn {
|
if call.To.Sym == deferreturn {
|
||||||
p = appendp(p, AGet, regAddr(REG_PC_B))
|
// get PC_B from -8(SP)
|
||||||
|
p = appendp(p, AGet, regAddr(REG_SP))
|
||||||
|
p = appendp(p, AI32Const, constAddr(8))
|
||||||
|
p = appendp(p, AI32Sub)
|
||||||
|
p = appendp(p, AI32Load16U, constAddr(0))
|
||||||
|
p = appendp(p, ATee, regAddr(REG_PC_B))
|
||||||
|
|
||||||
p = appendp(p, AI32Const, constAddr(call.Pc))
|
p = appendp(p, AI32Const, constAddr(call.Pc))
|
||||||
p = appendp(p, AI32Eq)
|
p = appendp(p, AI32Eq)
|
||||||
p = appendp(p, ABrIf, constAddr(0))
|
p = appendp(p, ABrIf, constAddr(0))
|
||||||
@ -487,9 +515,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ret.To.Type == obj.TYPE_MEM {
|
if ret.To.Type == obj.TYPE_MEM {
|
||||||
// reset PC_B to function entry
|
// Set PC_B parameter to function entry.
|
||||||
p = appendp(p, AI32Const, constAddr(0))
|
p = appendp(p, AI32Const, constAddr(0))
|
||||||
p = appendp(p, ASet, regAddr(REG_PC_B))
|
|
||||||
|
|
||||||
// low-level WebAssembly call to function
|
// low-level WebAssembly call to function
|
||||||
p = appendp(p, ACall, ret.To)
|
p = appendp(p, ACall, ret.To)
|
||||||
@ -497,16 +524,6 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// read return PC_F from Go stack
|
|
||||||
p = appendp(p, AGet, regAddr(REG_SP))
|
|
||||||
p = appendp(p, AI32Load16U, constAddr(2))
|
|
||||||
p = appendp(p, ASet, regAddr(REG_PC_F))
|
|
||||||
|
|
||||||
// read return PC_B from Go stack
|
|
||||||
p = appendp(p, AGet, regAddr(REG_SP))
|
|
||||||
p = appendp(p, AI32Load16U, constAddr(0))
|
|
||||||
p = appendp(p, ASet, regAddr(REG_PC_B))
|
|
||||||
|
|
||||||
// SP += 8
|
// SP += 8
|
||||||
p = appendp(p, AGet, regAddr(REG_SP))
|
p = appendp(p, AGet, regAddr(REG_SP))
|
||||||
p = appendp(p, AI32Const, constAddr(8))
|
p = appendp(p, AI32Const, constAddr(8))
|
||||||
@ -771,16 +788,38 @@ func countRegisters(s *obj.LSym) (numI, numF int16) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Most of the Go functions has a single parameter (PC_B) in
|
||||||
|
// Wasm ABI. This is a list of exceptions.
|
||||||
|
var notUsePC_B = map[string]bool{
|
||||||
|
"_rt0_wasm_js": true,
|
||||||
|
"wasm_export_run": true,
|
||||||
|
"wasm_export_resume": true,
|
||||||
|
"wasm_export_getsp": true,
|
||||||
|
"wasm_pc_f_loop": true,
|
||||||
|
"runtime.wasmMove": true,
|
||||||
|
"runtime.wasmZero": true,
|
||||||
|
"runtime.wasmDiv": true,
|
||||||
|
"runtime.wasmTruncS": true,
|
||||||
|
"runtime.wasmTruncU": true,
|
||||||
|
"runtime.gcWriteBarrier": true,
|
||||||
|
"cmpbody": true,
|
||||||
|
"memeqbody": true,
|
||||||
|
"memcmp": true,
|
||||||
|
"memchr": true,
|
||||||
|
}
|
||||||
|
|
||||||
func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
||||||
w := new(bytes.Buffer)
|
w := new(bytes.Buffer)
|
||||||
|
|
||||||
hasLocalSP := false
|
hasLocalSP := false
|
||||||
|
hasPC_B := false
|
||||||
var r0, f0 int16
|
var r0, f0 int16
|
||||||
|
|
||||||
// Function starts with declaration of locals: numbers and types.
|
// Function starts with declaration of locals: numbers and types.
|
||||||
// Some functions use a special calling convention.
|
// Some functions use a special calling convention.
|
||||||
switch s.Name {
|
switch s.Name {
|
||||||
case "wasm_export_run", "runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
|
case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop",
|
||||||
|
"runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
|
||||||
writeUleb128(w, 0) // number of sets of locals
|
writeUleb128(w, 0) // number of sets of locals
|
||||||
case "memchr", "memcmp":
|
case "memchr", "memcmp":
|
||||||
writeUleb128(w, 1) // number of sets of locals
|
writeUleb128(w, 1) // number of sets of locals
|
||||||
@ -797,9 +836,10 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
default:
|
default:
|
||||||
// Normal calling convention: No WebAssembly parameters. First local variable is local SP cache.
|
// Normal calling convention: No WebAssembly parameters. First local variable is local SP cache.
|
||||||
hasLocalSP = true
|
hasLocalSP = true
|
||||||
|
hasPC_B = true
|
||||||
numI, numF := countRegisters(s)
|
numI, numF := countRegisters(s)
|
||||||
r0 = 1
|
r0 = 2
|
||||||
f0 = 1 + numI
|
f0 = 2 + numI
|
||||||
|
|
||||||
numTypes := 1
|
numTypes := 1
|
||||||
if numI > 0 {
|
if numI > 0 {
|
||||||
@ -826,6 +866,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
// Copy SP from its global variable into a local variable. Accessing a local variable is more efficient.
|
// Copy SP from its global variable into a local variable. Accessing a local variable is more efficient.
|
||||||
updateLocalSP(w)
|
updateLocalSP(w)
|
||||||
}
|
}
|
||||||
|
|
||||||
for p := s.Func.Text; p != nil; p = p.Link {
|
for p := s.Func.Text; p != nil; p = p.Link {
|
||||||
switch p.As {
|
switch p.As {
|
||||||
case AGet:
|
case AGet:
|
||||||
@ -836,10 +877,16 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
switch {
|
switch {
|
||||||
case reg == REG_SP && hasLocalSP:
|
case reg == REG_SP && hasLocalSP:
|
||||||
w.WriteByte(0x20) // local.get
|
w.WriteByte(0x20) // local.get
|
||||||
writeUleb128(w, 0) // local SP
|
writeUleb128(w, 1) // local SP
|
||||||
case reg >= REG_PC_F && reg <= REG_PAUSE:
|
case reg >= REG_SP && reg <= REG_PAUSE:
|
||||||
w.WriteByte(0x23) // global.get
|
w.WriteByte(0x23) // global.get
|
||||||
writeUleb128(w, uint64(reg-REG_PC_F))
|
writeUleb128(w, uint64(reg-REG_SP))
|
||||||
|
case reg == REG_PC_B:
|
||||||
|
if !hasPC_B {
|
||||||
|
panic(fmt.Sprintf("PC_B is not used in %s", s.Name))
|
||||||
|
}
|
||||||
|
w.WriteByte(0x20) // local.get (i32)
|
||||||
|
writeUleb128(w, 0) // local PC_B
|
||||||
case reg >= REG_R0 && reg <= REG_R15:
|
case reg >= REG_R0 && reg <= REG_R15:
|
||||||
w.WriteByte(0x20) // local.get (i64)
|
w.WriteByte(0x20) // local.get (i64)
|
||||||
writeUleb128(w, uint64(r0+(reg-REG_R0)))
|
writeUleb128(w, uint64(r0+(reg-REG_R0)))
|
||||||
@ -857,21 +904,26 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
}
|
}
|
||||||
reg := p.To.Reg
|
reg := p.To.Reg
|
||||||
switch {
|
switch {
|
||||||
case reg >= REG_PC_F && reg <= REG_PAUSE:
|
case reg >= REG_SP && reg <= REG_PAUSE:
|
||||||
if reg == REG_SP && hasLocalSP {
|
if reg == REG_SP && hasLocalSP {
|
||||||
w.WriteByte(0x22) // local.tee
|
w.WriteByte(0x22) // local.tee
|
||||||
writeUleb128(w, 0) // local SP
|
writeUleb128(w, 1) // local SP
|
||||||
}
|
}
|
||||||
w.WriteByte(0x24) // global.set
|
w.WriteByte(0x24) // global.set
|
||||||
writeUleb128(w, uint64(reg-REG_PC_F))
|
writeUleb128(w, uint64(reg-REG_SP))
|
||||||
case reg >= REG_R0 && reg <= REG_F15:
|
case reg >= REG_R0 && reg <= REG_PC_B:
|
||||||
if p.Link.As == AGet && p.Link.From.Reg == reg {
|
if p.Link.As == AGet && p.Link.From.Reg == reg {
|
||||||
w.WriteByte(0x22) // local.tee
|
w.WriteByte(0x22) // local.tee
|
||||||
p = p.Link
|
p = p.Link
|
||||||
} else {
|
} else {
|
||||||
w.WriteByte(0x21) // local.set
|
w.WriteByte(0x21) // local.set
|
||||||
}
|
}
|
||||||
if reg <= REG_R15 {
|
if reg == REG_PC_B {
|
||||||
|
if !hasPC_B {
|
||||||
|
panic(fmt.Sprintf("PC_B is not used in %s", s.Name))
|
||||||
|
}
|
||||||
|
writeUleb128(w, 0) // local PC_B
|
||||||
|
} else if reg <= REG_R15 {
|
||||||
writeUleb128(w, uint64(r0+(reg-REG_R0)))
|
writeUleb128(w, uint64(r0+(reg-REG_R0)))
|
||||||
} else {
|
} else {
|
||||||
writeUleb128(w, uint64(f0+(reg-REG_F0)))
|
writeUleb128(w, uint64(f0+(reg-REG_F0)))
|
||||||
@ -887,6 +939,12 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
}
|
}
|
||||||
reg := p.To.Reg
|
reg := p.To.Reg
|
||||||
switch {
|
switch {
|
||||||
|
case reg == REG_PC_B:
|
||||||
|
if !hasPC_B {
|
||||||
|
panic(fmt.Sprintf("PC_B is not used in %s", s.Name))
|
||||||
|
}
|
||||||
|
w.WriteByte(0x22) // local.tee (i32)
|
||||||
|
writeUleb128(w, 0) // local PC_B
|
||||||
case reg >= REG_R0 && reg <= REG_R15:
|
case reg >= REG_R0 && reg <= REG_R15:
|
||||||
w.WriteByte(0x22) // local.tee (i64)
|
w.WriteByte(0x22) // local.tee (i64)
|
||||||
writeUleb128(w, uint64(r0+(reg-REG_R0)))
|
writeUleb128(w, uint64(r0+(reg-REG_R0)))
|
||||||
@ -1036,10 +1094,10 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func updateLocalSP(w *bytes.Buffer) {
|
func updateLocalSP(w *bytes.Buffer) {
|
||||||
w.WriteByte(0x23) // global.get
|
w.WriteByte(0x23) // global.get
|
||||||
writeUleb128(w, uint64(REG_SP-REG_PC_F)) // SP
|
writeUleb128(w, 0) // global SP
|
||||||
w.WriteByte(0x21) // local.set
|
w.WriteByte(0x21) // local.set
|
||||||
writeUleb128(w, 0) // local SP
|
writeUleb128(w, 1) // local SP
|
||||||
}
|
}
|
||||||
|
|
||||||
func align(as obj.As) uint64 {
|
func align(as obj.As) uint64 {
|
||||||
|
@ -102,10 +102,11 @@ func asmb2(ctxt *ld.Link) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
types := []*wasmFuncType{
|
types := []*wasmFuncType{
|
||||||
// For normal Go functions the return value is
|
// For normal Go functions, the single parameter is PC_B,
|
||||||
|
// the return value is
|
||||||
// 0 if the function returned normally or
|
// 0 if the function returned normally or
|
||||||
// 1 if the stack needs to be unwound.
|
// 1 if the stack needs to be unwound.
|
||||||
{Results: []byte{I32}},
|
{Params: []byte{I32}, Results: []byte{I32}},
|
||||||
}
|
}
|
||||||
|
|
||||||
// collect host imports (functions that get imported from the WebAssembly host, usually JavaScript)
|
// collect host imports (functions that get imported from the WebAssembly host, usually JavaScript)
|
||||||
@ -320,16 +321,14 @@ func writeGlobalSec(ctxt *ld.Link) {
|
|||||||
sizeOffset := writeSecHeader(ctxt, sectionGlobal)
|
sizeOffset := writeSecHeader(ctxt, sectionGlobal)
|
||||||
|
|
||||||
globalRegs := []byte{
|
globalRegs := []byte{
|
||||||
I32, // 0: PC_F
|
I32, // 0: SP
|
||||||
I32, // 1: PC_B
|
I64, // 1: CTXT
|
||||||
I32, // 2: SP
|
I64, // 2: g
|
||||||
I64, // 3: CTXT
|
I64, // 3: RET0
|
||||||
I64, // 4: g
|
I64, // 4: RET1
|
||||||
I64, // 5: RET0
|
I64, // 5: RET2
|
||||||
I64, // 6: RET1
|
I64, // 6: RET3
|
||||||
I64, // 7: RET2
|
I32, // 7: PAUSE
|
||||||
I64, // 8: RET3
|
|
||||||
I32, // 9: PAUSE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
writeUleb128(ctxt.Out, uint64(len(globalRegs))) // number of globals
|
writeUleb128(ctxt.Out, uint64(len(globalRegs))) // number of globals
|
||||||
|
@ -37,17 +37,12 @@ TEXT runtime·gogo(SB), NOSPLIT, $0-8
|
|||||||
MOVD gobuf_g(R0), g
|
MOVD gobuf_g(R0), g
|
||||||
MOVD gobuf_sp(R0), SP
|
MOVD gobuf_sp(R0), SP
|
||||||
|
|
||||||
|
// Put target PC at -8(SP), wasm_pc_f_loop will pick it up
|
||||||
|
Get SP
|
||||||
|
I32Const $8
|
||||||
|
I32Sub
|
||||||
I64Load gobuf_pc(R0)
|
I64Load gobuf_pc(R0)
|
||||||
I32WrapI64
|
I64Store $0
|
||||||
I32Const $16
|
|
||||||
I32ShrU
|
|
||||||
Set PC_F
|
|
||||||
|
|
||||||
I64Load gobuf_pc(R0)
|
|
||||||
I64Const $0xFFFF
|
|
||||||
I64And
|
|
||||||
I32WrapI64
|
|
||||||
Set PC_B
|
|
||||||
|
|
||||||
MOVD gobuf_ret(R0), RET0
|
MOVD gobuf_ret(R0), RET0
|
||||||
MOVD gobuf_ctxt(R0), CTXT
|
MOVD gobuf_ctxt(R0), CTXT
|
||||||
|
@ -31,14 +31,9 @@ TEXT wasm_export_run(SB),NOSPLIT,$0
|
|||||||
I64ExtendI32U
|
I64ExtendI32U
|
||||||
I64Store $8
|
I64Store $8
|
||||||
|
|
||||||
I32Const $runtime·rt0_go(SB)
|
I32Const $0 // entry PC_B
|
||||||
I32Const $16
|
Call runtime·rt0_go(SB)
|
||||||
I32ShrU
|
Drop
|
||||||
Set PC_F
|
|
||||||
|
|
||||||
I32Const $0
|
|
||||||
Set PC_B
|
|
||||||
|
|
||||||
Call wasm_pc_f_loop(SB)
|
Call wasm_pc_f_loop(SB)
|
||||||
|
|
||||||
Return
|
Return
|
||||||
@ -46,14 +41,9 @@ TEXT wasm_export_run(SB),NOSPLIT,$0
|
|||||||
// wasm_export_resume gets called from JavaScript. It resumes the execution of Go code until it needs to wait for
|
// wasm_export_resume gets called from JavaScript. It resumes the execution of Go code until it needs to wait for
|
||||||
// an event.
|
// an event.
|
||||||
TEXT wasm_export_resume(SB),NOSPLIT,$0
|
TEXT wasm_export_resume(SB),NOSPLIT,$0
|
||||||
I32Const $runtime·handleEvent(SB)
|
|
||||||
I32Const $16
|
|
||||||
I32ShrU
|
|
||||||
Set PC_F
|
|
||||||
|
|
||||||
I32Const $0
|
I32Const $0
|
||||||
Set PC_B
|
Call runtime·handleEvent(SB)
|
||||||
|
Drop
|
||||||
Call wasm_pc_f_loop(SB)
|
Call wasm_pc_f_loop(SB)
|
||||||
|
|
||||||
Return
|
Return
|
||||||
@ -63,15 +53,30 @@ TEXT wasm_pc_f_loop(SB),NOSPLIT,$0
|
|||||||
// The WebAssembly stack may unwind, e.g. when switching goroutines.
|
// The WebAssembly stack may unwind, e.g. when switching goroutines.
|
||||||
// The Go stack on the linear memory is then used to jump to the correct functions
|
// The Go stack on the linear memory is then used to jump to the correct functions
|
||||||
// with this loop, without having to restore the full WebAssembly stack.
|
// with this loop, without having to restore the full WebAssembly stack.
|
||||||
loop:
|
// It is expected to have a pending call before entering the loop, so check PAUSE first.
|
||||||
Loop
|
Get PAUSE
|
||||||
Get PC_F
|
I32Eqz
|
||||||
CallIndirect $0
|
If
|
||||||
Drop
|
loop:
|
||||||
|
Loop
|
||||||
|
// Get PC_B & PC_F from -8(SP)
|
||||||
|
Get SP
|
||||||
|
I32Const $8
|
||||||
|
I32Sub
|
||||||
|
I32Load16U $0 // PC_B
|
||||||
|
|
||||||
Get PAUSE
|
Get SP
|
||||||
I32Eqz
|
I32Const $8
|
||||||
BrIf loop
|
I32Sub
|
||||||
|
I32Load16U $2 // PC_F
|
||||||
|
|
||||||
|
CallIndirect $0
|
||||||
|
Drop
|
||||||
|
|
||||||
|
Get PAUSE
|
||||||
|
I32Eqz
|
||||||
|
BrIf loop
|
||||||
|
End
|
||||||
End
|
End
|
||||||
|
|
||||||
I32Const $0
|
I32Const $0
|
||||||
@ -91,6 +96,7 @@ TEXT runtime·pause(SB), NOSPLIT, $0-8
|
|||||||
RETUNWIND
|
RETUNWIND
|
||||||
|
|
||||||
TEXT runtime·exit(SB), NOSPLIT, $0-4
|
TEXT runtime·exit(SB), NOSPLIT, $0-4
|
||||||
|
I32Const $0
|
||||||
Call runtime·wasmExit(SB)
|
Call runtime·wasmExit(SB)
|
||||||
Drop
|
Drop
|
||||||
I32Const $1
|
I32Const $1
|
||||||
|
Loading…
Reference in New Issue
Block a user