1
0
mirror of https://github.com/golang/go synced 2024-11-20 10:34:42 -07:00

[dev.cc] cmd/asm: the assembler proper

Add main.go, the simple driver for the assembler, and the
subdirectory internal/asm, which contains the parser and
instruction generator.

It's likely that much of the implementation is superstition,
or at best experimental phenomenology, but it does generate
working binaries.

Change-Id: I322a9ae8a20174b6693153f30e39217ba68f8032
Reviewed-on: https://go-review.googlesource.com/3196
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
Rob Pike 2015-01-22 10:48:33 -08:00
parent 8642639575
commit fdeee3a538
4 changed files with 1217 additions and 0 deletions

View File

@ -0,0 +1,533 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asm
import (
"fmt"
"strings"
"text/scanner"
"cmd/asm/internal/addr"
"cmd/asm/internal/arch"
"cmd/asm/internal/lex"
"cmd/internal/obj"
)
// TODO: This package has many numeric conversions that should be unnecessary.
// symbolType returns the extern/static etc. type appropriate for the symbol.
func (p *Parser) symbolType(a *addr.Addr) int {
switch a.Register {
case arch.RFP:
return p.arch.D_PARAM
case arch.RSP:
return p.arch.D_AUTO
case arch.RSB:
// See comment in addrToAddr.
if a.IsImmediateAddress {
return p.arch.D_ADDR
}
if a.IsStatic {
return p.arch.D_STATIC
}
return p.arch.D_EXTERN
}
p.errorf("invalid register for symbol %s", a.Symbol)
return 0
}
// TODO: configure the architecture
func (p *Parser) addrToAddr(a *addr.Addr) obj.Addr {
out := p.arch.NoAddr
if a.Has(addr.Symbol) {
// How to encode the symbols:
// syntax = Typ,Index
// $a(SB) = ADDR,EXTERN
// $a<>(SB) = ADDR,STATIC
// a(SB) = EXTERN,NONE
// a<>(SB) = STATIC,NONE
// The call to symbolType does the first column; we need to fix up Index here.
out.Type = int16(p.symbolType(a))
out.Sym = obj.Linklookup(p.linkCtxt, a.Symbol, 0)
if a.IsImmediateAddress {
// Index field says whether it's a static.
switch a.Register {
case arch.RSB:
if a.IsStatic {
out.Index = uint8(p.arch.D_STATIC)
} else {
out.Index = uint8(p.arch.D_EXTERN)
}
default:
p.errorf("can't handle immediate address of %s not (SB)\n", a.Symbol)
}
}
} else if a.Has(addr.Register) {
// TODO: SP is tricky, and this isn't good enough.
// SP = D_SP
// 4(SP) = 4(D_SP)
// x+4(SP) = D_AUTO with sym=x TODO
out.Type = a.Register
if a.Register == arch.RSP {
out.Type = int16(p.arch.SP)
}
if a.IsIndirect {
out.Type += p.arch.D_INDIR
}
// a.Register2 handled in the instruction method; it's bizarre.
}
if a.Has(addr.Index) {
out.Index = uint8(a.Index) // TODO: out.Index == p.NoArch.Index should be same type as Register.
}
if a.Has(addr.Scale) {
out.Scale = a.Scale
}
if a.Has(addr.Offset) {
out.Offset = a.Offset
if a.Is(addr.Offset) {
// RHS of MOVL $0xf1, 0xf1 // crash
out.Type = p.arch.D_INDIR + int16(p.arch.D_NONE)
} else if a.IsImmediateConstant && out.Type == int16(p.arch.D_NONE) {
out.Type = int16(p.arch.D_CONST)
}
}
if a.Has(addr.Float) {
out.U.Dval = a.Float
out.Type = int16(p.arch.D_FCONST)
}
if a.Has(addr.String) {
out.U.Sval = a.String
out.Type = int16(p.arch.D_SCONST)
}
// HACK TODO
if a.IsIndirect && !a.Has(addr.Register) && a.Has(addr.Index) {
// LHS of LEAQ 0(BX*8), CX
out.Type = p.arch.D_INDIR + int16(p.arch.D_NONE)
}
return out
}
func (p *Parser) link(prog *obj.Prog, doLabel bool) {
if p.firstProg == nil {
p.firstProg = prog
} else {
p.lastProg.Link = prog
}
p.lastProg = prog
if doLabel {
p.pc++
for _, label := range p.pendingLabels {
if p.labels[label] != nil {
p.errorf("label %q multiply defined", label)
}
p.labels[label] = prog
}
p.pendingLabels = p.pendingLabels[0:0]
}
prog.Pc = int64(p.pc)
fmt.Println(p.histLineNum, prog)
}
// asmText assembles a TEXT pseudo-op.
// TEXT runtime·sigtramp(SB),4,$0-0
func (p *Parser) asmText(word string, operands [][]lex.Token) {
if len(operands) != 3 {
p.errorf("expect three operands for TEXT")
}
// Operand 0 is the symbol name in the form foo(SB).
// That means symbol plus indirect on SB and no offset.
nameAddr := p.address(operands[0])
if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB {
p.errorf("TEXT symbol %q must be an offset from SB", nameAddr.Symbol)
}
name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
// Operand 1 is the text flag, a literal integer.
flagAddr := p.address(operands[1])
if !flagAddr.Is(addr.Offset) {
p.errorf("TEXT flag for %s must be an integer", name)
}
flag := int8(flagAddr.Offset)
// Operand 2 is the frame and arg size.
// Bizarre syntax: $a-b is two words, not subtraction.
// We might even see $-b, which means $0-b. Ugly.
// Assume if it has this syntax that b is a plain constant.
// Not clear we can do better, but it doesn't matter.
op := operands[2]
n := len(op)
var locals int64
if n >= 2 && op[n-2].ScanToken == '-' && op[n-1].ScanToken == scanner.Int {
p.start(op[n-1:])
locals = int64(p.expr())
op = op[:n-2]
}
args := int64(0)
if len(op) == 1 && op[0].ScanToken == '$' {
// Special case for $-8.
// Done; args is zero.
} else {
argsAddr := p.address(op)
if !argsAddr.Is(addr.ImmediateConstant | addr.Offset) {
p.errorf("TEXT frame size for %s must be an immediate constant", name)
}
args = argsAddr.Offset
}
prog := &obj.Prog{
Ctxt: p.linkCtxt,
As: int16(p.arch.ATEXT),
Lineno: int32(p.histLineNum),
From: obj.Addr{
Type: int16(p.symbolType(&nameAddr)),
Index: uint8(p.arch.D_NONE),
Sym: obj.Linklookup(p.linkCtxt, name, 0),
Scale: flag,
},
To: obj.Addr{
Index: uint8(p.arch.D_NONE),
},
}
// Encoding of arg and locals depends on architecture.
switch p.arch.Thechar {
case '6':
prog.To.Type = int16(p.arch.D_CONST)
prog.To.Offset = (locals << 32) | args
case '8':
prog.To.Type = p.arch.D_CONST2
prog.To.Offset = args
prog.To.Offset2 = int32(locals)
default:
p.errorf("internal error: can't encode TEXT arg/frame")
}
p.link(prog, true)
}
// asmData assembles a DATA pseudo-op.
// DATA masks<>+0x00(SB)/4, $0x00000000
func (p *Parser) asmData(word string, operands [][]lex.Token) {
if len(operands) != 2 {
p.errorf("expect two operands for DATA")
}
// Operand 0 has the general form foo<>+0x04(SB)/4.
op := operands[0]
n := len(op)
if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int {
p.errorf("expect /size for DATA argument")
}
scale := p.scale(op[n-1].String())
op = op[:n-2]
nameAddr := p.address(op)
ok := nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect|addr.Offset)
if !ok || nameAddr.Register != arch.RSB {
p.errorf("DATA symbol %q must be an offset from SB", nameAddr.Symbol)
}
name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
// Operand 1 is an immediate constant or address.
valueAddr := p.address(operands[1])
if !valueAddr.IsImmediateConstant && !valueAddr.IsImmediateAddress {
p.errorf("DATA value must be an immediate constant or address")
}
// The addresses must not overlap. Easiest test: require monotonicity.
if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr {
p.errorf("overlapping DATA entry for %s", nameAddr.Symbol)
}
p.dataAddr[name] = nameAddr.Offset + int64(scale)
prog := &obj.Prog{
Ctxt: p.linkCtxt,
As: int16(p.arch.ADATA),
Lineno: int32(p.histLineNum),
From: obj.Addr{
Type: int16(p.symbolType(&nameAddr)),
Index: uint8(p.arch.D_NONE),
Sym: obj.Linklookup(p.linkCtxt, name, 0),
Offset: nameAddr.Offset,
Scale: scale,
},
To: p.addrToAddr(&valueAddr),
}
p.link(prog, false)
}
// asmGlobl assembles a GLOBL pseudo-op.
// GLOBL shifts<>(SB),8,$256
// GLOBL shifts<>(SB),$256
func (p *Parser) asmGlobl(word string, operands [][]lex.Token) {
if len(operands) != 2 && len(operands) != 3 {
p.errorf("expect two or three operands for GLOBL")
}
// Operand 0 has the general form foo<>+0x04(SB).
nameAddr := p.address(operands[0])
if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB {
p.errorf("GLOBL symbol %q must be an offset from SB", nameAddr.Symbol)
}
name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
// If three operands, middle operand is a scale.
scale := int8(0)
op := operands[1]
if len(operands) == 3 {
scaleAddr := p.address(op)
if !scaleAddr.Is(addr.Offset) {
p.errorf("GLOBL scale must be a constant")
}
scale = int8(scaleAddr.Offset)
op = operands[2]
}
// Final operand is an immediate constant.
sizeAddr := p.address(op)
if !sizeAddr.Is(addr.ImmediateConstant | addr.Offset) {
p.errorf("GLOBL size must be an immediate constant")
}
size := sizeAddr.Offset
// log.Printf("GLOBL %s %d, $%d", name, scale, size)
prog := &obj.Prog{
Ctxt: p.linkCtxt,
As: int16(p.arch.AGLOBL),
Lineno: int32(p.histLineNum),
From: obj.Addr{
Type: int16(p.symbolType(&nameAddr)),
Index: uint8(p.arch.D_NONE),
Sym: obj.Linklookup(p.linkCtxt, name, 0),
Offset: nameAddr.Offset,
Scale: scale,
},
To: obj.Addr{
Type: int16(p.arch.D_CONST),
Index: uint8(p.arch.D_NONE),
Offset: size,
},
}
p.link(prog, false)
}
// asmPCData assembles a PCDATA pseudo-op.
// PCDATA $2, $705
func (p *Parser) asmPCData(word string, operands [][]lex.Token) {
if len(operands) != 2 {
p.errorf("expect two operands for PCDATA")
}
// Operand 0 must be an immediate constant.
addr0 := p.address(operands[0])
if !addr0.Is(addr.ImmediateConstant | addr.Offset) {
p.errorf("PCDATA value must be an immediate constant")
}
value0 := addr0.Offset
// Operand 1 must be an immediate constant.
addr1 := p.address(operands[1])
if !addr1.Is(addr.ImmediateConstant | addr.Offset) {
p.errorf("PCDATA value must be an immediate constant")
}
value1 := addr1.Offset
// log.Printf("PCDATA $%d, $%d", value0, value1)
prog := &obj.Prog{
Ctxt: p.linkCtxt,
As: int16(p.arch.APCDATA),
Lineno: int32(p.histLineNum),
From: obj.Addr{
Type: int16(p.arch.D_CONST),
Index: uint8(p.arch.D_NONE),
Offset: value0,
},
To: obj.Addr{
Type: int16(p.arch.D_CONST),
Index: uint8(p.arch.D_NONE),
Offset: value1,
},
}
p.link(prog, true)
}
// asmFuncData assembles a FUNCDATA pseudo-op.
// FUNCDATA $1, funcdata<>+4(SB)
func (p *Parser) asmFuncData(word string, operands [][]lex.Token) {
if len(operands) != 2 {
p.errorf("expect two operands for FUNCDATA")
}
// Operand 0 must be an immediate constant.
valueAddr := p.address(operands[0])
if !valueAddr.Is(addr.ImmediateConstant | addr.Offset) {
p.errorf("FUNCDATA value must be an immediate constant")
}
value := valueAddr.Offset
// Operand 1 is a symbol name in the form foo(SB).
// That means symbol plus indirect on SB and no offset.
nameAddr := p.address(operands[1])
if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB {
p.errorf("FUNCDATA symbol %q must be an offset from SB", nameAddr.Symbol)
}
name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
// log.Printf("FUNCDATA %s, $%d", name, value)
prog := &obj.Prog{
Ctxt: p.linkCtxt,
As: int16(p.arch.AFUNCDATA),
Lineno: int32(p.histLineNum),
From: obj.Addr{
Type: int16(p.arch.D_CONST),
Index: uint8(p.arch.D_NONE),
Offset: value,
},
To: obj.Addr{
Type: int16(p.symbolType(&nameAddr)),
Index: uint8(p.arch.D_NONE),
Sym: obj.Linklookup(p.linkCtxt, name, 0),
},
}
p.link(prog, true)
}
// asmJump assembles a jump instruction.
// JMP R1
// JMP exit
// JMP 3(PC)
func (p *Parser) asmJump(op int, a []addr.Addr) {
var target *addr.Addr
switch len(a) {
default:
p.errorf("jump must have one or two addresses")
case 1:
target = &a[0]
case 2:
if !a[0].Is(0) {
p.errorf("two-address jump must have empty first address")
}
target = &a[1]
}
prog := &obj.Prog{
Ctxt: p.linkCtxt,
Lineno: int32(p.histLineNum),
As: int16(op),
From: p.arch.NoAddr,
}
switch {
case target.Is(addr.Register):
// JMP R1
prog.To = p.addrToAddr(target)
case target.Is(addr.Symbol):
// JMP exit
targetProg := p.labels[target.Symbol]
if targetProg == nil {
p.toPatch = append(p.toPatch, Patch{prog, target.Symbol})
} else {
p.branch(prog, targetProg)
}
case target.Is(addr.Register | addr.Indirect), target.Is(addr.Register | addr.Indirect | addr.Offset):
// JMP 4(AX)
if target.Register == arch.RPC {
prog.To = obj.Addr{
Type: int16(p.arch.D_BRANCH),
Index: uint8(p.arch.D_NONE),
Offset: p.pc + 1 + target.Offset, // +1 because p.pc is incremented in link, below.
}
} else {
prog.To = p.addrToAddr(target)
}
case target.Is(addr.Symbol | addr.Indirect | addr.Register):
// JMP main·morestack(SB)
if target.Register != arch.RSB {
p.errorf("jmp to symbol must be SB-relative")
}
prog.To = obj.Addr{
Type: int16(p.arch.D_BRANCH),
Sym: obj.Linklookup(p.linkCtxt, target.Symbol, 0),
Index: uint8(p.arch.D_NONE),
Offset: target.Offset,
}
default:
p.errorf("cannot assemble jump %+v", target)
}
p.link(prog, true)
}
func (p *Parser) patch() {
for _, patch := range p.toPatch {
targetProg := p.labels[patch.label]
if targetProg == nil {
p.errorf("undefined label %s", patch.label)
} else {
p.branch(patch.prog, targetProg)
}
}
}
func (p *Parser) branch(jmp, target *obj.Prog) {
jmp.To = obj.Addr{
Type: int16(p.arch.D_BRANCH),
Index: uint8(p.arch.D_NONE),
}
jmp.To.U.Branch = target
}
// asmInstruction assembles an instruction.
// MOVW R9, (R10)
func (p *Parser) asmInstruction(op int, a []addr.Addr) {
prog := &obj.Prog{
Ctxt: p.linkCtxt,
Lineno: int32(p.histLineNum),
As: int16(op),
}
switch len(a) {
case 0:
prog.From = p.arch.NoAddr
prog.To = p.arch.NoAddr
case 1:
if p.arch.UnaryDestination[op] {
prog.From = p.arch.NoAddr
prog.To = p.addrToAddr(&a[0])
} else {
prog.From = p.addrToAddr(&a[0])
prog.To = p.arch.NoAddr
}
case 2:
prog.From = p.addrToAddr(&a[0])
prog.To = p.addrToAddr(&a[1])
// DX:AX as a register pair can only appear on the RHS.
// Bizarrely, to obj it's specified by setting index on the LHS.
// TODO: can we fix this?
if a[1].Has(addr.Register2) {
if int(prog.From.Index) != p.arch.D_NONE {
p.errorf("register pair operand on RHS must have register on LHS")
}
prog.From.Index = uint8(a[1].Register2)
}
case 3:
// CMPSD etc.; third operand is imm8, stored in offset, or a register.
prog.From = p.addrToAddr(&a[0])
prog.To = p.addrToAddr(&a[1])
switch {
case a[2].Is(addr.Offset):
prog.To.Offset = a[2].Offset
case a[2].Is(addr.Register):
// Strange reodering.
prog.To = p.addrToAddr(&a[2])
prog.From = p.addrToAddr(&a[1])
if !a[0].IsImmediateConstant {
p.errorf("expected $value for 1st operand")
}
prog.To.Offset = a[0].Offset
default:
p.errorf("expected offset or register for 3rd operand")
}
default:
p.errorf("can't handle instruction with %d operands", len(a))
}
p.link(prog, true)
}

View File

@ -0,0 +1,94 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asm
/*
Tested with uint8s like this:
for a := 0; a <= 255; a++ {
for b := 0; b <= 127; b++ {
ovfl := a+b != int(uint8(a)+uint8(b))
if addOverflows(uint8(a), uint8(b)) != ovfl {
fmt.Printf("%d+%d fails\n", a, b)
break
}
}
}
for a := 0; a <= 255; a++ {
for b := 0; b <= 127; b++ {
ovfl := a-b != int(uint8(a)-uint8(b))
if subOverflows(uint8(a), uint8(b)) != ovfl {
fmt.Printf("%d-%d fails\n", a, b)
break
}
}
}
for a := 0; a <= 255; a++ {
for b := 0; b <= 255; b++ {
ovfl := a*b != int(uint8(a)*uint8(b))
if mulOverflows(uint8(a), uint8(b)) != ovfl {
fmt.Printf("%d*%d fails\n", a, b)
}
}
}
*/
func addOverflows(a, b uint64) bool {
return a+b < a
}
func subOverflows(a, b uint64) bool {
return a-b > a
}
func mulOverflows(a, b uint64) bool {
if a <= 1 || b <= 1 {
return false
}
c := a * b
return c/b != a
}
/*
For the record, signed overflow:
const mostNegative = -(mostPositive + 1)
const mostPositive = 1<<63 - 1
func signedAddOverflows(a, b int64) bool {
if (a >= 0) != (b >= 0) {
// Different signs cannot overflow.
return false
}
if a >= 0 {
// Both are positive.
return a+b < 0
}
return a+b >= 0
}
func signedSubOverflows(a, b int64) bool {
if (a >= 0) == (b >= 0) {
// Same signs cannot overflow.
return false
}
if a >= 0 {
// a positive, b negative.
return a-b < 0
}
return a-b >= 0
}
func signedMulOverflows(a, b int64) bool {
if a == 0 || b == 0 || a == 1 || b == 1 {
return false
}
if a == mostNegative || b == mostNegative {
return true
}
c := a * b
return c/b != a
}
*/

View File

@ -0,0 +1,527 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package asm implements the parser and instruction generator for the assembler.
// TODO: Split apart?
package asm
import (
"fmt"
"log"
"os"
"strconv"
"text/scanner"
"cmd/asm/internal/addr"
"cmd/asm/internal/arch"
"cmd/asm/internal/lex"
"cmd/internal/obj"
)
type Parser struct {
lex lex.TokenReader
lineNum int // Line number in source file.
histLineNum int // Cumulative line number across source files.
errorLine int // (Cumulative) line number of last error.
errorCount int // Number of errors.
pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
input []lex.Token
inputPos int
pendingLabels []string // Labels to attach to next instruction.
labels map[string]*obj.Prog
toPatch []Patch
addr []addr.Addr
arch *arch.Arch
linkCtxt *obj.Link
firstProg *obj.Prog
lastProg *obj.Prog
dataAddr map[string]int64 // Most recent address for DATA for this symbol.
}
type Patch struct {
prog *obj.Prog
label string
}
func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
return &Parser{
linkCtxt: ctxt,
arch: ar,
lex: lexer,
labels: make(map[string]*obj.Prog),
dataAddr: make(map[string]int64),
}
}
func (p *Parser) errorf(format string, args ...interface{}) {
if p.histLineNum == p.errorLine {
// Only one error per line.
return
}
p.errorLine = p.histLineNum
// Put file and line information on head of message.
format = "%s:%d: " + format + "\n"
args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
fmt.Fprintf(os.Stderr, format, args...)
p.errorCount++
if p.errorCount > 10 {
log.Fatal("too many errors")
}
}
func (p *Parser) Parse() (*obj.Prog, bool) {
for p.line() {
}
if p.errorCount > 0 {
return nil, false
}
p.patch()
return p.firstProg, true
}
// WORD op {, op} '\n'
func (p *Parser) line() bool {
// Skip newlines.
var tok lex.ScanToken
for {
tok = p.lex.Next()
// We save the line number here so error messages from this instruction
// are labeled with this line. Otherwise we complain after we've absorbed
// the terminating newline and the line numbers are off by one in errors.
p.lineNum = p.lex.Line()
p.histLineNum = lex.HistLine()
switch tok {
case '\n':
continue
case scanner.EOF:
return false
}
break
}
// First item must be an identifier.
if tok != scanner.Ident {
p.errorf("expected identifier, found %q", p.lex.Text())
return false // Might as well stop now.
}
word := p.lex.Text()
operands := make([][]lex.Token, 0, 3)
// Zero or more comma-separated operands, one per loop.
first := true // Permit ':' to define this as a label.
for tok != '\n' && tok != ';' {
// Process one operand.
items := make([]lex.Token, 0, 3)
for {
tok = p.lex.Next()
if first {
if tok == ':' {
p.pendingLabels = append(p.pendingLabels, word)
return true
}
first = false
}
if tok == scanner.EOF {
p.errorf("unexpected EOF")
return false
}
if tok == '\n' || tok == ';' || tok == ',' {
break
}
items = append(items, lex.Make(tok, p.lex.Text()))
}
if len(items) > 0 {
operands = append(operands, items)
} else if len(operands) > 0 {
// Had a comma but nothing after.
p.errorf("missing operand")
}
}
i := p.arch.Pseudos[word]
if i != 0 {
p.pseudo(i, word, operands)
return true
}
i = p.arch.Instructions[word]
if i != 0 {
p.instruction(i, word, operands)
return true
}
p.errorf("unrecognized instruction %s", word)
return true
}
func (p *Parser) instruction(op int, word string, operands [][]lex.Token) {
p.addr = p.addr[0:0]
for _, op := range operands {
p.addr = append(p.addr, p.address(op))
}
// Is it a jump? TODO
if word[0] == 'J' || word == "CALL" {
p.asmJump(op, p.addr)
return
}
p.asmInstruction(op, p.addr)
}
func (p *Parser) pseudo(op int, word string, operands [][]lex.Token) {
switch op {
case p.arch.ATEXT:
p.asmText(word, operands)
case p.arch.ADATA:
p.asmData(word, operands)
case p.arch.AGLOBL:
p.asmGlobl(word, operands)
case p.arch.APCDATA:
p.asmPCData(word, operands)
case p.arch.AFUNCDATA:
p.asmFuncData(word, operands)
default:
p.errorf("unimplemented: %s", word)
}
}
func (p *Parser) start(operand []lex.Token) {
p.input = operand
p.inputPos = 0
}
// address parses the operand into a link address structure.
func (p *Parser) address(operand []lex.Token) addr.Addr {
p.start(operand)
addr := addr.Addr{}
p.operand(&addr)
return addr
}
// parse (R). The opening paren is known to be there.
// The return value states whether it was a scaled mode.
func (p *Parser) parenRegister(a *addr.Addr) bool {
p.next()
tok := p.next()
if tok.ScanToken != scanner.Ident {
p.errorf("expected register, got %s", tok)
}
r, present := p.arch.Registers[tok.String()]
if !present {
p.errorf("expected register, found %s", tok.String())
}
a.IsIndirect = true
scaled := p.peek() == '*'
if scaled {
// (R*2)
p.next()
tok := p.get(scanner.Int)
a.Scale = p.scale(tok.String())
a.Index = int16(r) // TODO: r should have type int16 but is uint8.
} else {
if a.HasRegister {
p.errorf("multiple indirections")
}
a.HasRegister = true
a.Register = int16(r)
}
p.expect(')')
p.next()
return scaled
}
// scale converts a decimal string into a valid scale factor.
func (p *Parser) scale(s string) int8 {
switch s {
case "1", "2", "4", "8":
return int8(s[0] - '0')
}
p.errorf("bad scale: %s", s)
return 0
}
// parse (R) or (R)(R*scale). The opening paren is known to be there.
func (p *Parser) addressMode(a *addr.Addr) {
scaled := p.parenRegister(a)
if !scaled && p.peek() == '(' {
p.parenRegister(a)
}
}
// operand parses a general operand and stores the result in *a.
func (p *Parser) operand(a *addr.Addr) bool {
if len(p.input) == 0 {
p.errorf("empty operand: cannot happen")
return false
}
switch p.peek() {
case '$':
p.next()
switch p.peek() {
case scanner.Ident:
a.IsImmediateAddress = true
p.operand(a) // TODO
case scanner.String:
a.IsImmediateConstant = true
a.HasString = true
a.String = p.atos(p.next().String())
case scanner.Int, scanner.Float, '+', '-', '~', '(':
a.IsImmediateConstant = true
if p.have(scanner.Float) {
a.HasFloat = true
a.Float = p.floatExpr()
} else {
a.HasOffset = true
a.Offset = int64(p.expr())
}
default:
p.errorf("illegal %s in immediate operand", p.next().String())
}
case '*':
p.next()
tok := p.next()
r, present := p.arch.Registers[tok.String()]
if !present {
p.errorf("expected register; got %s", tok.String())
}
a.HasRegister = true
a.Register = int16(r)
case '(':
p.next()
if p.peek() == scanner.Ident {
p.back()
p.addressMode(a)
break
}
p.back()
fallthrough
case '+', '-', '~', scanner.Int, scanner.Float:
if p.have(scanner.Float) {
a.HasFloat = true
a.Float = p.floatExpr()
} else {
a.HasOffset = true
a.Offset = int64(p.expr())
}
if p.peek() != scanner.EOF {
p.expect('(')
p.addressMode(a)
}
case scanner.Ident:
tok := p.next()
// Either R or (most general) ident<>+4(SB)(R*scale).
if r, present := p.arch.Registers[tok.String()]; present {
a.HasRegister = true
a.Register = int16(r)
// Possibly register pair: DX:AX.
if p.peek() == ':' {
p.next()
tok = p.get(scanner.Ident)
a.HasRegister2 = true
a.Register2 = int16(p.arch.Registers[tok.String()])
}
break
}
// Weirdness with statics: Might now have "<>".
if p.peek() == '<' {
p.next()
p.get('>')
a.IsStatic = true
}
if p.peek() == '+' || p.peek() == '-' {
a.HasOffset = true
a.Offset = int64(p.expr())
}
a.Symbol = tok.String()
if p.peek() == scanner.EOF {
break
}
// Expect (SB) or (FP)
p.expect('(')
p.parenRegister(a)
if a.Register != arch.RSB && a.Register != arch.RFP && a.Register != arch.RSP {
p.errorf("expected SB, FP, or SP offset for %s", tok)
}
// Possibly have scaled register (CX*8).
if p.peek() != scanner.EOF {
p.expect('(')
p.addressMode(a)
}
default:
p.errorf("unexpected %s in operand", p.next())
}
p.expect(scanner.EOF)
return true
}
// expr = term | term '+' term
func (p *Parser) expr() uint64 {
value := p.term()
for {
switch p.peek() {
case '+':
p.next()
x := p.term()
if addOverflows(x, value) {
p.errorf("overflow in %d+%d", value, x)
}
value += x
case '-':
p.next()
value -= p.term()
case '|':
p.next()
value |= p.term()
case '^':
p.next()
value ^= p.term()
default:
return value
}
}
}
// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
func (p *Parser) floatExpr() float64 {
tok := p.next()
switch tok.ScanToken {
case '(':
v := p.floatExpr()
if p.next().ScanToken != ')' {
p.errorf("missing closing paren")
}
return v
case '+':
return +p.floatExpr()
case '-':
return -p.floatExpr()
case scanner.Float:
return p.atof(tok.String())
}
p.errorf("unexpected %s evaluating float expression", tok)
return 0
}
// term = const | term '*' term | '(' expr ')'
func (p *Parser) term() uint64 {
tok := p.next()
switch tok.ScanToken {
case '(':
v := p.expr()
if p.next().ScanToken != ')' {
p.errorf("missing closing paren")
}
return v
case '+':
return +p.term()
case '-':
return -p.term()
case '~':
return ^p.term()
case scanner.Int:
value := p.atoi(tok.String())
for {
switch p.peek() {
case '*':
p.next()
value *= p.term() // OVERFLOW?
case '/':
p.next()
value /= p.term()
case '%':
p.next()
value %= p.term()
case lex.LSH:
p.next()
shift := p.term()
if shift < 0 {
p.errorf("negative left shift %d", shift)
}
value <<= uint(shift)
case lex.RSH:
p.next()
shift := p.term()
if shift < 0 {
p.errorf("negative right shift %d", shift)
}
value >>= uint(shift)
case '&':
p.next()
value &= p.term()
default:
return value
}
}
}
p.errorf("unexpected %s evaluating expression", tok)
return 0
}
func (p *Parser) atoi(str string) uint64 {
value, err := strconv.ParseUint(str, 0, 64)
if err != nil {
p.errorf("%s", err)
}
return value
}
func (p *Parser) atof(str string) float64 {
value, err := strconv.ParseFloat(str, 64)
if err != nil {
p.errorf("%s", err)
}
return value
}
func (p *Parser) atos(str string) string {
value, err := strconv.Unquote(str)
if err != nil {
p.errorf("%s", err)
}
return value
}
// EOF represents the end of input.
var EOF = lex.Make(scanner.EOF, "EOF")
func (p *Parser) next() lex.Token {
if !p.more() {
return EOF
}
tok := p.input[p.inputPos]
p.inputPos++
return tok
}
func (p *Parser) back() {
p.inputPos--
}
func (p *Parser) peek() lex.ScanToken {
if p.more() {
return p.input[p.inputPos].ScanToken
}
return scanner.EOF
}
func (p *Parser) more() bool {
return p.inputPos < len(p.input)
}
// get verifies that the next item has the expected type and returns it.
func (p *Parser) get(expected lex.ScanToken) lex.Token {
p.expect(expected)
return p.next()
}
// expect verifies that the next item has the expected type. It does not consume it.
func (p *Parser) expect(expected lex.ScanToken) {
if p.peek() != expected {
p.errorf("expected %s, found %s", expected, p.next())
}
}
// have reports whether the remaining tokens contain the specified token.
func (p *Parser) have(token lex.ScanToken) bool {
for i := p.inputPos; i < len(p.input); i++ {
if p.input[i].ScanToken == token {
return true
}
}
return false
}

63
src/cmd/asm/main.go Normal file
View File

@ -0,0 +1,63 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"flag"
"fmt"
"go/build"
"log"
"os"
"cmd/asm/internal/arch"
"cmd/asm/internal/asm"
"cmd/asm/internal/flags"
"cmd/asm/internal/lex"
"cmd/internal/obj"
)
func main() {
log.SetFlags(0)
log.SetPrefix("asm: ")
GOARCH := build.Default.GOARCH
architecture := arch.Set(GOARCH)
if architecture == nil {
log.Fatalf("asm: unrecognized architecture %s", GOARCH)
}
// Is this right?
flags.Parse(build.Default.GOROOT, build.Default.GOOS, GOARCH, architecture.Thechar)
// Create object file, write header.
fd, err := os.Create(*flags.OutputFile)
if err != nil {
log.Fatal(err)
}
ctxt := obj.Linknew(architecture.LinkArch)
if *flags.PrintOut {
ctxt.Debugasm = 1
}
ctxt.Bso = obj.Binitw(os.Stdout)
defer obj.Bflush(ctxt.Bso)
ctxt.Diag = log.Fatalf
output := obj.Binitw(fd)
fmt.Fprintf(output, "go object %s %s %s\n", obj.Getgoos(), obj.Getgoarch(), obj.Getgoversion())
fmt.Fprintf(output, "!\n")
lexer := lex.NewLexer(flag.Arg(0), ctxt)
parser := asm.NewParser(ctxt, architecture, lexer)
pList := obj.Linknewplist(ctxt)
var ok bool
pList.Firstpc, ok = parser.Parse()
if !ok {
log.Print("FAIL TODO")
os.Exit(1)
}
obj.Writeobjdirect(ctxt, output)
obj.Bflush(output)
}