diff --git a/src/cmd/internal/obj/wasm/a.out.go b/src/cmd/internal/obj/wasm/a.out.go index c686f1d6f0e..823777d4fbc 100644 --- a/src/cmd/internal/obj/wasm/a.out.go +++ b/src/cmd/internal/obj/wasm/a.out.go @@ -250,9 +250,7 @@ const ( const ( // globals - REG_PC_F = obj.RBaseWasm + iota - REG_PC_B - REG_SP // SP is currently 32-bit, until 64-bit memory operations are available + REG_SP = obj.RBaseWasm + iota // SP is currently 32-bit, until 64-bit memory operations are available REG_CTXT REG_g // RET* are used by runtime.return0 and runtime.reflectcall. These functions pass return values in registers. @@ -296,9 +294,11 @@ const ( REG_F14 REG_F15 + REG_PC_B // also first parameter, i32 + MAXREG - MINREG = REG_PC_F + MINREG = REG_SP REGSP = REG_SP REGCTXT = REG_CTXT REGG = REG_g diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index 0ad883470ef..a6388b9ee7e 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -16,8 +16,6 @@ import ( ) var Register = map[string]int16{ - "PC_F": REG_PC_F, - "PC_B": REG_PC_B, "SP": REG_SP, "CTXT": REG_CTXT, "g": REG_g, @@ -60,6 +58,8 @@ var Register = map[string]int16{ "F13": REG_F13, "F14": REG_F14, "F15": REG_F15, + + "PC_B": REG_PC_B, } var registerNames []string @@ -368,20 +368,31 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { break } - // reset PC_B to function entry - p = appendp(p, AI32Const, constAddr(0)) - p = appendp(p, ASet, regAddr(REG_PC_B)) - // low-level WebAssembly call to function switch jmp.To.Type { case obj.TYPE_MEM: + if !notUsePC_B[jmp.To.Sym.Name] { + // Set PC_B parameter to function entry. + p = appendp(p, AI32Const, constAddr(0)) + } p = appendp(p, ACall, jmp.To) + case obj.TYPE_NONE: // (target PC is on stack) p = appendp(p, AI32WrapI64) p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero p = appendp(p, AI32ShrU) + + // Set PC_B parameter to function entry. + // We need to push this before pushing the target PC_F, + // so temporarily pop PC_F, using our REG_PC_B as a + // scratch register, and push it back after pushing 0. + p = appendp(p, ASet, regAddr(REG_PC_B)) + p = appendp(p, AI32Const, constAddr(0)) + p = appendp(p, AGet, regAddr(REG_PC_B)) + p = appendp(p, ACallIndirect) + default: panic("bad target for JMP") } @@ -419,20 +430,31 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { }) p = appendp(p, AI64Store, constAddr(0)) - // reset PC_B to function entry - p = appendp(p, AI32Const, constAddr(0)) - p = appendp(p, ASet, regAddr(REG_PC_B)) - // low-level WebAssembly call to function switch call.To.Type { case obj.TYPE_MEM: + if !notUsePC_B[call.To.Sym.Name] { + // Set PC_B parameter to function entry. + p = appendp(p, AI32Const, constAddr(0)) + } p = appendp(p, ACall, call.To) + case obj.TYPE_NONE: // (target PC is on stack) p = appendp(p, AI32WrapI64) p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero p = appendp(p, AI32ShrU) + + // Set PC_B parameter to function entry. + // We need to push this before pushing the target PC_F, + // so temporarily pop PC_F, using our PC_B as a + // scratch register, and push it back after pushing 0. + p = appendp(p, ASet, regAddr(REG_PC_B)) + p = appendp(p, AI32Const, constAddr(0)) + p = appendp(p, AGet, regAddr(REG_PC_B)) + p = appendp(p, ACallIndirect) + default: panic("bad target for CALL") } @@ -465,7 +487,13 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { // jump to before the call if jmpdefer has reset the return address to the call's PC if call.To.Sym == deferreturn { - p = appendp(p, AGet, regAddr(REG_PC_B)) + // get PC_B from -8(SP) + p = appendp(p, AGet, regAddr(REG_SP)) + p = appendp(p, AI32Const, constAddr(8)) + p = appendp(p, AI32Sub) + p = appendp(p, AI32Load16U, constAddr(0)) + p = appendp(p, ATee, regAddr(REG_PC_B)) + p = appendp(p, AI32Const, constAddr(call.Pc)) p = appendp(p, AI32Eq) p = appendp(p, ABrIf, constAddr(0)) @@ -487,9 +515,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } if ret.To.Type == obj.TYPE_MEM { - // reset PC_B to function entry + // Set PC_B parameter to function entry. p = appendp(p, AI32Const, constAddr(0)) - p = appendp(p, ASet, regAddr(REG_PC_B)) // low-level WebAssembly call to function p = appendp(p, ACall, ret.To) @@ -497,16 +524,6 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { break } - // read return PC_F from Go stack - p = appendp(p, AGet, regAddr(REG_SP)) - p = appendp(p, AI32Load16U, constAddr(2)) - p = appendp(p, ASet, regAddr(REG_PC_F)) - - // read return PC_B from Go stack - p = appendp(p, AGet, regAddr(REG_SP)) - p = appendp(p, AI32Load16U, constAddr(0)) - p = appendp(p, ASet, regAddr(REG_PC_B)) - // SP += 8 p = appendp(p, AGet, regAddr(REG_SP)) p = appendp(p, AI32Const, constAddr(8)) @@ -771,16 +788,38 @@ func countRegisters(s *obj.LSym) (numI, numF int16) { return } +// Most of the Go functions has a single parameter (PC_B) in +// Wasm ABI. This is a list of exceptions. +var notUsePC_B = map[string]bool{ + "_rt0_wasm_js": true, + "wasm_export_run": true, + "wasm_export_resume": true, + "wasm_export_getsp": true, + "wasm_pc_f_loop": true, + "runtime.wasmMove": true, + "runtime.wasmZero": true, + "runtime.wasmDiv": true, + "runtime.wasmTruncS": true, + "runtime.wasmTruncU": true, + "runtime.gcWriteBarrier": true, + "cmpbody": true, + "memeqbody": true, + "memcmp": true, + "memchr": true, +} + func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { w := new(bytes.Buffer) hasLocalSP := false + hasPC_B := false var r0, f0 int16 // Function starts with declaration of locals: numbers and types. // Some functions use a special calling convention. switch s.Name { - case "wasm_export_run", "runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody": + case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop", + "runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody": writeUleb128(w, 0) // number of sets of locals case "memchr", "memcmp": writeUleb128(w, 1) // number of sets of locals @@ -797,9 +836,10 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { default: // Normal calling convention: No WebAssembly parameters. First local variable is local SP cache. hasLocalSP = true + hasPC_B = true numI, numF := countRegisters(s) - r0 = 1 - f0 = 1 + numI + r0 = 2 + f0 = 2 + numI numTypes := 1 if numI > 0 { @@ -826,6 +866,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { // Copy SP from its global variable into a local variable. Accessing a local variable is more efficient. updateLocalSP(w) } + for p := s.Func.Text; p != nil; p = p.Link { switch p.As { case AGet: @@ -836,10 +877,16 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { switch { case reg == REG_SP && hasLocalSP: w.WriteByte(0x20) // local.get - writeUleb128(w, 0) // local SP - case reg >= REG_PC_F && reg <= REG_PAUSE: + writeUleb128(w, 1) // local SP + case reg >= REG_SP && reg <= REG_PAUSE: w.WriteByte(0x23) // global.get - writeUleb128(w, uint64(reg-REG_PC_F)) + writeUleb128(w, uint64(reg-REG_SP)) + case reg == REG_PC_B: + if !hasPC_B { + panic(fmt.Sprintf("PC_B is not used in %s", s.Name)) + } + w.WriteByte(0x20) // local.get (i32) + writeUleb128(w, 0) // local PC_B case reg >= REG_R0 && reg <= REG_R15: w.WriteByte(0x20) // local.get (i64) writeUleb128(w, uint64(r0+(reg-REG_R0))) @@ -857,21 +904,26 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } reg := p.To.Reg switch { - case reg >= REG_PC_F && reg <= REG_PAUSE: + case reg >= REG_SP && reg <= REG_PAUSE: if reg == REG_SP && hasLocalSP { w.WriteByte(0x22) // local.tee - writeUleb128(w, 0) // local SP + writeUleb128(w, 1) // local SP } w.WriteByte(0x24) // global.set - writeUleb128(w, uint64(reg-REG_PC_F)) - case reg >= REG_R0 && reg <= REG_F15: + writeUleb128(w, uint64(reg-REG_SP)) + case reg >= REG_R0 && reg <= REG_PC_B: if p.Link.As == AGet && p.Link.From.Reg == reg { w.WriteByte(0x22) // local.tee p = p.Link } else { w.WriteByte(0x21) // local.set } - if reg <= REG_R15 { + if reg == REG_PC_B { + if !hasPC_B { + panic(fmt.Sprintf("PC_B is not used in %s", s.Name)) + } + writeUleb128(w, 0) // local PC_B + } else if reg <= REG_R15 { writeUleb128(w, uint64(r0+(reg-REG_R0))) } else { writeUleb128(w, uint64(f0+(reg-REG_F0))) @@ -887,6 +939,12 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } reg := p.To.Reg switch { + case reg == REG_PC_B: + if !hasPC_B { + panic(fmt.Sprintf("PC_B is not used in %s", s.Name)) + } + w.WriteByte(0x22) // local.tee (i32) + writeUleb128(w, 0) // local PC_B case reg >= REG_R0 && reg <= REG_R15: w.WriteByte(0x22) // local.tee (i64) writeUleb128(w, uint64(r0+(reg-REG_R0))) @@ -1036,10 +1094,10 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } func updateLocalSP(w *bytes.Buffer) { - w.WriteByte(0x23) // global.get - writeUleb128(w, uint64(REG_SP-REG_PC_F)) // SP - w.WriteByte(0x21) // local.set - writeUleb128(w, 0) // local SP + w.WriteByte(0x23) // global.get + writeUleb128(w, 0) // global SP + w.WriteByte(0x21) // local.set + writeUleb128(w, 1) // local SP } func align(as obj.As) uint64 { diff --git a/src/cmd/link/internal/wasm/asm.go b/src/cmd/link/internal/wasm/asm.go index c80e81e5b35..54b265cb19f 100644 --- a/src/cmd/link/internal/wasm/asm.go +++ b/src/cmd/link/internal/wasm/asm.go @@ -102,10 +102,11 @@ func asmb2(ctxt *ld.Link) { } types := []*wasmFuncType{ - // For normal Go functions the return value is + // For normal Go functions, the single parameter is PC_B, + // the return value is // 0 if the function returned normally or // 1 if the stack needs to be unwound. - {Results: []byte{I32}}, + {Params: []byte{I32}, Results: []byte{I32}}, } // collect host imports (functions that get imported from the WebAssembly host, usually JavaScript) @@ -320,16 +321,14 @@ func writeGlobalSec(ctxt *ld.Link) { sizeOffset := writeSecHeader(ctxt, sectionGlobal) globalRegs := []byte{ - I32, // 0: PC_F - I32, // 1: PC_B - I32, // 2: SP - I64, // 3: CTXT - I64, // 4: g - I64, // 5: RET0 - I64, // 6: RET1 - I64, // 7: RET2 - I64, // 8: RET3 - I32, // 9: PAUSE + I32, // 0: SP + I64, // 1: CTXT + I64, // 2: g + I64, // 3: RET0 + I64, // 4: RET1 + I64, // 5: RET2 + I64, // 6: RET3 + I32, // 7: PAUSE } writeUleb128(ctxt.Out, uint64(len(globalRegs))) // number of globals diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index a10c89d298b..8f3964f08b4 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -37,17 +37,12 @@ TEXT runtime·gogo(SB), NOSPLIT, $0-8 MOVD gobuf_g(R0), g MOVD gobuf_sp(R0), SP + // Put target PC at -8(SP), wasm_pc_f_loop will pick it up + Get SP + I32Const $8 + I32Sub I64Load gobuf_pc(R0) - I32WrapI64 - I32Const $16 - I32ShrU - Set PC_F - - I64Load gobuf_pc(R0) - I64Const $0xFFFF - I64And - I32WrapI64 - Set PC_B + I64Store $0 MOVD gobuf_ret(R0), RET0 MOVD gobuf_ctxt(R0), CTXT diff --git a/src/runtime/rt0_js_wasm.s b/src/runtime/rt0_js_wasm.s index c4efd9637c4..b22c46e2e95 100644 --- a/src/runtime/rt0_js_wasm.s +++ b/src/runtime/rt0_js_wasm.s @@ -31,14 +31,9 @@ TEXT wasm_export_run(SB),NOSPLIT,$0 I64ExtendI32U I64Store $8 - I32Const $runtime·rt0_go(SB) - I32Const $16 - I32ShrU - Set PC_F - - I32Const $0 - Set PC_B - + I32Const $0 // entry PC_B + Call runtime·rt0_go(SB) + Drop Call wasm_pc_f_loop(SB) Return @@ -46,14 +41,9 @@ TEXT wasm_export_run(SB),NOSPLIT,$0 // wasm_export_resume gets called from JavaScript. It resumes the execution of Go code until it needs to wait for // an event. TEXT wasm_export_resume(SB),NOSPLIT,$0 - I32Const $runtime·handleEvent(SB) - I32Const $16 - I32ShrU - Set PC_F - I32Const $0 - Set PC_B - + Call runtime·handleEvent(SB) + Drop Call wasm_pc_f_loop(SB) Return @@ -63,15 +53,30 @@ TEXT wasm_pc_f_loop(SB),NOSPLIT,$0 // The WebAssembly stack may unwind, e.g. when switching goroutines. // The Go stack on the linear memory is then used to jump to the correct functions // with this loop, without having to restore the full WebAssembly stack. -loop: - Loop - Get PC_F - CallIndirect $0 - Drop +// It is expected to have a pending call before entering the loop, so check PAUSE first. + Get PAUSE + I32Eqz + If + loop: + Loop + // Get PC_B & PC_F from -8(SP) + Get SP + I32Const $8 + I32Sub + I32Load16U $0 // PC_B - Get PAUSE - I32Eqz - BrIf loop + Get SP + I32Const $8 + I32Sub + I32Load16U $2 // PC_F + + CallIndirect $0 + Drop + + Get PAUSE + I32Eqz + BrIf loop + End End I32Const $0 @@ -91,6 +96,7 @@ TEXT runtime·pause(SB), NOSPLIT, $0-8 RETUNWIND TEXT runtime·exit(SB), NOSPLIT, $0-4 + I32Const $0 Call runtime·wasmExit(SB) Drop I32Const $1