diff --git a/src/cmd/5l/noop.c b/src/cmd/5l/noop.c index f4de0a0eba..19fc56764b 100644 --- a/src/cmd/5l/noop.c +++ b/src/cmd/5l/noop.c @@ -141,8 +141,8 @@ noops(void) } } // TODO(kaib): make lack of morestack an error -// if(pmorestack == P) -// diag("sys·morestack not defined"); +// if(pmorestack == P) +// diag("sys·morestack not defined"); curframe = 0; curbecome = 0; @@ -356,27 +356,26 @@ noops(void) p->link = q1; } else if (autosize < StackBig) { // split stack check for small functions - // MOVW (REGG), R1 + // MOVW g_stackguard(g), R1 // CMP R1, $-autosize(SP) - // MOVW.W.LT R14,$-autosize(SP) - // MOVW.W.GE R14,$-4(SP) - // MOVW.GE $(args << 24 | autosize), R1 - // BL.GE callmorestack(SB) + // MOVW.LT $args, R2 + // MOVW.W.LT R14, R3 + // BL.LT sys·morestackx(SB) // modifies LR + // MOVW.W R14,$-autosize(SP) - // TODO(kaib): double check we allocate autosize after - // stack has been split - // TODO(kaib): add error in case autosize doesn't pack // TODO(kaib): add more trampolines // TODO(kaib): put stackguard in register // TODO(kaib): add support for -K and underflow detection - p = appendp(p); // load G.stackguard into R1 + // MOVW g_stackguard(g), R1 + p = appendp(p); p->as = AMOVW; p->from.type = D_OREG; p->from.reg = REGG; p->to.type = D_REG; p->to.reg = 1; + // CMP R1, $-autosize(SP) p = appendp(p); p->as = ACMP; p->from.type = D_REG; @@ -384,42 +383,41 @@ noops(void) p->from.offset = -autosize; p->reg = REGSP; + // MOVW.LT $args, R2 p = appendp(p); p->as = AMOVW; - p->scond = C_SCOND_GE | C_WBIT; - p->from.type = D_REG; - p->from.reg = REGLINK; - p->to.type = D_OREG; - p->to.offset = -autosize; - p->to.reg = REGSP; - - p = appendp(p); - p->as = AMOVW; - p->scond = C_SCOND_LT | C_WBIT; - p->from.type = D_REG; - p->from.reg = REGLINK; - p->to.type = D_OREG; - p->to.offset = -4; - p->to.reg = REGSP; - - p = appendp(p); // packs args and autosize - p->as = AMOVW; p->scond = C_SCOND_LT; p->from.type = D_CONST; - // top 8 bits are arg count, lower 24 bits number of 4 byte - // words - p->from.offset = - (curtext->to.offset2 & ~7) << 21 | - (autosize & ~7) >> 3; + p->from.offset = curtext->to.offset2 & ~7; p->to.type = D_REG; - p->to.reg = 1; + p->to.reg = 2; + // MOVW.W.LT R14, R3 + p = appendp(p); + p->as = AMOVW; + p->scond = C_SCOND_LT; + p->from.type = D_REG; + p->from.reg = REGLINK; + p->to.type = D_REG; + p->to.reg = 3; + + // BL.LT sys·morestackx(SB) // modifies LR p = appendp(p); p->as = ABL; p->scond = C_SCOND_LT; p->to.type = D_BRANCH; p->to.sym = symmorestack; p->cond = pmorestack; + + // MOVW.W R14,$-autosize(SP) + p = appendp(p); + p->as = AMOVW; + p->scond |= C_WBIT; + p->from.type = D_REG; + p->from.reg = REGLINK; + p->to.type = D_OREG; + p->to.offset = -autosize; + p->to.reg = REGSP; } else { // > StackBig // MOVW.W R14,$-4(SP) // MOVW $(args << 24 | autosize), R1 diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile index 984c1f5339..281dca9815 100644 --- a/src/pkg/runtime/Makefile +++ b/src/pkg/runtime/Makefile @@ -10,7 +10,10 @@ SIZE=$(SIZE_$(GOARCH)) # Setup CFLAGS. Add -D_64BIT on 64-bit platforms (sorry). CFLAGS_64=-D_64BIT -CFLAGS=-I$(GOOS) -I$(GOOS)/$(GOARCH) -wF $(CFLAGS_$(SIZE)) +# TODO(kaib): fix register allocation to honor extern register so we +# can enable optimizations again. +CFLAGS_arm=-N +CFLAGS=-I$(GOOS) -I$(GOOS)/$(GOARCH) -wF $(CFLAGS_$(SIZE)) $(CFLAGS_$(GOARCH)) # Set O to right letter. O_386=8 @@ -33,6 +36,7 @@ OFILES_386=\ # arm-specific object files OFILES_arm=\ + memset.$O\ vlop.$O\ vlrt.$O\ diff --git a/src/pkg/runtime/arm/asm.s b/src/pkg/runtime/arm/asm.s index 5e68b72ffe..39ac99ec67 100644 --- a/src/pkg/runtime/arm/asm.s +++ b/src/pkg/runtime/arm/asm.s @@ -2,51 +2,55 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -TEXT _rt0_arm(SB),7,$0 +#include "arm/asm.h" + +// using frame size $-4 means do not save LR on stack. +TEXT _rt0_arm(SB),7,$-4 MOVW $setR12(SB), R12 // copy arguments forward on an even stack - MOVW 0(SP), R0 // argc - MOVW 4(SP), R1 // argv - SUB $128, SP // plenty of scratch - AND $~7, SP - MOVW R0, 120(SP) // save argc, argv away - MOVW R1, 124(SP) + // use R13 instead of SP to avoid linker rewriting the offsets + MOVW 0(R13), R0 // argc + MOVW $4(R13), R1 // argv + SUB $128, R13 // plenty of scratch + AND $~7, R13 + MOVW R0, 120(R13) // save argc, argv away + MOVW R1, 124(R13) // set up m and g registers // g is R10, m is R9 - MOVW $g0(SB), R10 - MOVW $m0(SB), R9 + MOVW $g0(SB), g + MOVW $m0(SB), m // save m->g0 = g0 - MOVW R10, 0(R9) + MOVW g, m_g0(m) // create istack out of the OS stack - MOVW $(-8192+104)(SP), R0 - MOVW R0, 0(R10) // 0(g) is stack limit (w 104b guard) - MOVW SP, 4(R10) // 4(g) is base + MOVW $(-8192+104)(R13), R0 + MOVW R0, g_stackguard(g) // (w 104b guard) + MOVW R13, g_stackbase(g) BL emptyfunc(SB) // fault if stack check is wrong BL check(SB) // saved argc, argv - MOVW 120(SP), R0 - MOVW R0, 0(SP) - MOVW 124(SP), R0 - MOVW R0, 4(SP) + MOVW 120(R13), R0 + MOVW R0, 4(R13) + MOVW 124(R13), R1 + MOVW R1, 8(R13) BL args(SB) BL osinit(SB) BL schedinit(SB) // create a new goroutine to start program MOVW $mainstart(SB), R0 - MOVW.W R0, -4(SP) + MOVW.W R0, -4(R13) MOVW $8, R0 - MOVW.W R0, -4(SP) + MOVW.W R0, -4(R13) MOVW $0, R0 - MOVW.W R0, -4(SP) // push $0 as guard + MOVW.W R0, -4(R13) // push $0 as guard BL sys·newproc(SB) - MOVW $12(SP), SP // pop args and LR + MOVW $12(R13), R13 // pop args and LR // start this M BL mstart(SB) @@ -70,73 +74,106 @@ TEXT mainstart(SB),7,$0 // TODO(kaib): remove these once linker works properly // pull in dummy dependencies TEXT _dep_dummy(SB),7,$0 - BL sys·morestack(SB) - BL sys·morestackx(SB) BL _div(SB) BL _divu(SB) BL _mod(SB) BL _modu(SB) BL _modu(SB) - TEXT breakpoint(SB),7,$0 BL abort(SB) // BYTE $0xcc // RET -// go-routine -TEXT gogo(SB), 7, $0 - BL abort(SB) -// MOVL 4(SP), AX // gobuf -// MOVL 0(AX), SP // restore SP -// MOVL 4(AX), AX -// MOVL AX, 0(SP) // put PC on the stack -// MOVL $1, AX -// RET +/* + * go-routine + */ +// uintptr gosave(Gobuf*) +// save state in Gobuf; setjmp TEXT gosave(SB), 7, $0 + MOVW SP, gobuf_sp(R0) + MOVW LR, gobuf_pc(R0) + MOVW g, gobuf_g(R0) + MOVW $0, R0 // return 0 + RET + +// void gogo(Gobuf*, uintptr) +// restore state from Gobuf; longjmp +TEXT gogo(SB), 7, $0 + MOVW R0, R1 // gobuf + MOVW 8(SP), R0 // return 2nd arg + MOVW gobuf_g(R1), g + MOVW 0(g), R2 // make sure g != nil + MOVW gobuf_sp(R1), SP // restore SP + MOVW gobuf_pc(R1), PC + +// void gogocall(Gobuf*, void (*fn)(void)) +// restore state from Gobuf but then call fn. +// (call fn, returning to state in Gobuf) +// TODO(kaib): add R0 to gobuf so it can be restored properly +// using frame size $-4 means do not save LR on stack. +TEXT gogocall(SB), 7, $-4 + MOVW 8(SP), R1 // fn + MOVW gobuf_g(R0), g + MOVW 0(g), R2 // make sure g != nil + MOVW gobuf_sp(R0), SP // restore SP + MOVW gobuf_pc(R0), LR + MOVW R1, PC + +/* + * support for morestack + */ + +// Called during function prolog when more stack is needed. +// R1 frame size +// R2 arg size +// R3 prolog's LR +// using frame size $-4 means do not save LR on stack. +TEXT sys·morestack(SB),7,$-4 + // Cannot grow scheduler stack (m->g0). + MOVW m_g0(m), R4 + CMP g, R4 + BNE 2(PC) BL abort(SB) -// MOVL 4(SP), AX // gobuf -// MOVL SP, 0(AX) // save SP -// MOVL 0(SP), BX -// MOVL BX, 4(AX) // save PC -// MOVL $0, AX // return 0 -// RET -// support for morestack + // Save in m. + MOVW R1, m_moreframe(m) + MOVW R2, m_moreargs(m) -// return point when leaving new stack. -// save R0, jmp to lesstack to switch back -TEXT retfromnewstack(SB),7,$0 - MOVW R0,12(R9) // m->cret - B lessstack(SB) + // Called from f. + // Set m->morebuf to f's caller. + MOVW R3, (m_morebuf+gobuf_pc)(m) // f's caller's PC + MOVW SP, (m_morebuf+gobuf_sp)(m) // f's caller's SP + MOVW g, (m_morebuf+gobuf_g)(m) -// gogo, returning 2nd arg instead of 1 -TEXT gogoret(SB), 7, $0 - MOVW 8(SP), R0 // return 2nd arg - MOVW 4(SP), R1 // gobuf - MOVW 0(R1), SP // restore SP - MOVW 4(R1), PC // restore PC + // Set m->morepc to f's PC. + MOVW LR, m_morepc(m) -TEXT setspgoto(SB), 7, $0 - MOVW 4(SP), R0 // SP - MOVW 8(SP), R1 // fn to call - MOVW 12(SP), R2 // fn to return into - MOVW R2, R14 // restore LR - MOVW R0, SP - MOVW R1, PC // goto + // Call newstack on m's scheduling stack. + MOVW m_g0(m), g + MOVW (m_sched+gobuf_sp)(m), SP + B newstack(SB) + +// Return point when leaving stack. +// using frame size $-4 means do not save LR on stack. +TEXT sys·lessstack(SB), 7, $-4 + // Save return value in m->cret + MOVW R0, m_cret(m) + + // Call oldstack on m's scheduling stack. + MOVW m_g0(m), g + MOVW (m_sched+gobuf_sp)(m), SP + B oldstack(SB) // Optimization to make inline stack splitting code smaller // R0 is original first argument -// R1 is arg_num << 24 | autosize >> 3 -TEXT sys·morestackx(SB), 7, $0 - MOVW R0, 4(SP) // Save arg0 - MOVW R1<<8, R2 - MOVW R2>>5, R2 - MOVW R2, 4(R10) // autooffset into g - MOVW R1>>24, R2 - MOVW R2<<3, R2 - MOVW R2, 8(R10) // argsize into g +// R2 is argsize +// R3 is LR for f (f's caller's PC) +// using frame size $-4 means do not save LR on stack. +TEXT sys·morestackx(SB), 7, $-4 + MOVW R0, 0(FP) // Save arg0 + MOVW $0, R1 // set frame size B sys·morestack(SB) // bool cas(int32 *val, int32 old, int32 new) @@ -180,17 +217,18 @@ TEXT jmpdefer(SB), 7, $0 // SUBL $5, (SP) // return to CALL again // JMP AX // but first run the deferred function -TEXT sys·memclr(SB),7,$0 - BL abort(SB) -// MOVL 4(SP), DI // arg 1 addr -// MOVL 8(SP), CX // arg 2 count -// ADDL $3, CX -// SHRL $2, CX -// MOVL $0, AX -// CLD -// REP -// STOSL -// RET +TEXT sys·memclr(SB),7,$20 +// R0 = addr and passes implicitly to memset + MOVW $0, R1 // c = 0 + MOVW R1, -16(SP) + MOVW 4(FP), R1 // n + MOVW R1, -12(SP) + MOVW m, -8(SP) // Save m and g + MOVW g, -4(SP) + BL memset(SB) + MOVW -8(SP), m // Restore m and g, memset clobbers them + MOVW -4(SP), g + RET TEXT sys·getcallerpc+0(SB),7,$0 BL abort(SB) diff --git a/src/pkg/runtime/arm/memset.s b/src/pkg/runtime/arm/memset.s new file mode 100644 index 0000000000..cce94534c1 --- /dev/null +++ b/src/pkg/runtime/arm/memset.s @@ -0,0 +1,94 @@ +// Inferno's libkern/memset-arm.s +// http://code.google.com/p/inferno-os/source/browse/libkern/memset-arm.s +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. +// Portions Copyright 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +TO = 1 +TOE = 2 +N = 3 +TMP = 3 /* N and TMP don't overlap */ + +// TODO(kaib): memset clobbers R9 and R10 (m and g). This makes the +// registers unpredictable if (when) memset SIGSEGV's. Fix it by +// moving the R4-R11 register bank. +TEXT memset(SB), $0 + MOVW R0, R(TO) + MOVW data+4(FP), R(4) + MOVW n+8(FP), R(N) + + ADD R(N), R(TO), R(TOE) /* to end pointer */ + + CMP $4, R(N) /* need at least 4 bytes to copy */ + BLT _1tail + + AND $0xFF, R(4) /* it's a byte */ + SLL $8, R(4), R(TMP) /* replicate to a word */ + ORR R(TMP), R(4) + SLL $16, R(4), R(TMP) + ORR R(TMP), R(4) + +_4align: /* align on 4 */ + AND.S $3, R(TO), R(TMP) + BEQ _4aligned + + MOVBU.P R(4), 1(R(TO)) /* implicit write back */ + B _4align + +_4aligned: + SUB $31, R(TOE), R(TMP) /* do 32-byte chunks if possible */ + CMP R(TMP), R(TO) + BHS _4tail + + MOVW R4, R5 /* replicate */ + MOVW R4, R6 + MOVW R4, R7 + MOVW R4, R8 + MOVW R4, R9 + MOVW R4, R10 + MOVW R4, R11 + +_f32loop: + CMP R(TMP), R(TO) + BHS _4tail + + MOVM.IA.W [R4-R11], (R(TO)) + B _f32loop + +_4tail: + SUB $3, R(TOE), R(TMP) /* do remaining words if possible */ +_4loop: + CMP R(TMP), R(TO) + BHS _1tail + + MOVW.P R(4), 4(R(TO)) /* implicit write back */ + B _4loop + +_1tail: + CMP R(TO), R(TOE) + BEQ _return + + MOVBU.P R(4), 1(R(TO)) /* implicit write back */ + B _1tail + +_return: + RET diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s index 25e64a3587..c61d08f090 100644 --- a/src/pkg/runtime/linux/arm/sys.s +++ b/src/pkg/runtime/linux/arm/sys.s @@ -6,23 +6,28 @@ // System calls and other sys.stuff for arm, Linux // +#define SYS_BASE 0x00900000 +#define SYS_exit (SYS_BASE + 1) +#define SYS_write (SYS_BASE + 4) +#define SYS_mmap2 (SYS_BASE + 192) + TEXT write(SB),7,$0 MOVW 8(SP), R1 MOVW 12(SP), R2 - SWI $0x00900004 // syscall write + SWI $SYS_write RET TEXT exit(SB),7,$0 - SWI $0x00900001 // exit value in R0 - -TEXT sys·write(SB),7,$0 - MOVW 8(SP), R1 - MOVW 12(SP), R2 - SWI $0x00900004 // syscall write - RET + // Exit value already in R0 + SWI $SYS_exit TEXT sys·mmap(SB),7,$0 - BL abort(SB) + MOVW 4(FP), R1 + MOVW 8(FP), R2 + MOVW 12(FP), R3 + MOVW 16(FP), R4 + MOVW 20(FP), R5 + SWI $SYS_mmap2 RET // int64 futex(int32 *uaddr, int32 op, int32 val,