From d6eb21e331ba73d235b9014ce5b710d5e7bde545 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Thu, 29 Jan 2015 16:33:36 -0800 Subject: [PATCH] [dev.cc] cmd/asm: rewrite the macro processor The previous one was too broken, so just rewrite the code that invokes a macro. Basically it was evaluating things too early, and mishandling nested invocations. It's also easier to understand now. Keep backslash-newline around in macro definitions. They get processed when the body is evaluated. Write some golden tests. Change-Id: I27435f77f258a0873f80932bdc8d13ad39821ac1 Reviewed-on: https://go-review.googlesource.com/3550 Reviewed-by: Russ Cox --- src/cmd/asm/internal/asm/parse.go | 2 +- src/cmd/asm/internal/lex/input.go | 73 ++++++++------- src/cmd/asm/internal/lex/lex_test.go | 130 +++++++++++++++++++++++++++ src/cmd/asm/internal/lex/stack.go | 2 +- 4 files changed, 174 insertions(+), 33 deletions(-) create mode 100644 src/cmd/asm/internal/lex/lex_test.go diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go index b0f6ca9f08e..3efea625da4 100644 --- a/src/cmd/asm/internal/asm/parse.go +++ b/src/cmd/asm/internal/asm/parse.go @@ -80,7 +80,7 @@ func (p *Parser) Parse() (*obj.Prog, bool) { return p.firstProg, true } -// WORD [ arg {, arg} ] '\n' +// WORD [ arg {, arg} ] (';' | '\n') func (p *Parser) line() bool { // Skip newlines. var tok lex.ScanToken diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go index a193649fee7..529fd13f262 100644 --- a/src/cmd/asm/internal/lex/input.go +++ b/src/cmd/asm/internal/lex/input.go @@ -229,10 +229,6 @@ func (in *Input) macroDefinition(name string) ([]string, []Token) { if tok != '\n' && tok != '\\' { in.Error(`can only escape \ or \n in definition for macro:`, name) } - if tok == '\n' { // backslash-newline is discarded - tok = in.Stack.Next() - continue - } } tokens = append(tokens, Make(tok, in.Text())) tok = in.Stack.Next() @@ -279,35 +275,50 @@ func (in *Input) argsFor(macro *Macro) map[string][]Token { if tok != '(' { in.Error("missing arguments for invocation of macro:", macro.name) } - var tokens []Token - args := make(map[string][]Token) - argNum := 0 - for { - tok = in.Stack.Next() - switch tok { - case scanner.EOF, '\n': - in.Error("unterminated arg list invoking macro:", macro.name) - case ',', ')': - if argNum >= len(macro.args) { - in.Error("too many arguments for macro:", macro.name) - } - if len(macro.args) == 0 && argNum == 0 && len(tokens) == 0 { - // Zero-argument macro invoked with no arguments. - return args - } - args[macro.args[argNum]] = tokens - tokens = nil - argNum++ - if tok == ')' { - if argNum != len(macro.args) { - in.Error("too few arguments for macro:", macro.name) - } - return args - } - default: - tokens = append(tokens, Make(tok, in.Stack.Text())) + var args [][]Token + // One macro argument per iteration. Collect them all and check counts afterwards. + for argNum := 0; ; argNum++ { + tokens, tok := in.collectArgument(macro) + args = append(args, tokens) + if tok == ')' { + break } } + // Zero-argument macros are tricky. + if len(macro.args) == 0 && len(args) == 1 && args[0] == nil { + args = nil + } else if len(args) != len(macro.args) { + in.Error("wrong arg count for macro", macro.name) + } + argMap := make(map[string][]Token) + for i, arg := range args { + argMap[macro.args[i]] = arg + } + return argMap +} + +// collectArgument returns the actual tokens for a single argument of a macro. +// It also returns the token that terminated the argument, which will always +// be either ',' or ')'. The starting '(' has been scanned. +func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) { + nesting := 0 + var tokens []Token + for { + tok := in.Stack.Next() + if tok == scanner.EOF || tok == '\n' { + in.Error("unterminated arg list invoking macro:", macro.name) + } + if nesting == 0 && (tok == ')' || tok == ',') { + return tokens, tok + } + if tok == '(' { + nesting++ + } + if tok == ')' { + nesting-- + } + tokens = append(tokens, Make(tok, in.Stack.Text())) + } } // #ifdef and #ifndef processing. diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go new file mode 100644 index 00000000000..7ac689fb762 --- /dev/null +++ b/src/cmd/asm/internal/lex/lex_test.go @@ -0,0 +1,130 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "bytes" + "strings" + "testing" + "text/scanner" +) + +type lexTest struct { + name string + input string + output string +} + +var lexTests = []lexTest{ + { + "empty", + "", + "", + }, + { + "simple", + "1 (a)", + "1.(.a.)", + }, + { + "simple define", + lines( + "#define A 1234", + "A", + ), + "1234.\n", + }, + { + "macro without arguments", + "#define A() 1234\n" + "A()\n", + "1234.\n", + }, + { + "macro with arguments", + "#define A(x, y, z) x+z+y\n" + "A(1, 2, 3)\n", + "1.+.3.+.2.\n", + }, + { + "multiline macro without arguments", + lines( + "#define A 1\\", + "\t2\\", + "\t3", + "before", + "A", + "after", + ), + "before.\n.1.\n.2.\n.3.\n.after.\n", + }, + { + "multiline macro with arguments", + lines( + "#define A(a, b, c) a\\", + "\tb\\", + "\tc", + "before", + "A(1, 2, 3)", + "after", + ), + "before.\n.1.\n.2.\n.3.\n.after.\n", + }, + { + "LOAD macro", + lines( + "#define LOAD(off, reg) \\", + "\tMOVBLZX (off*4)(R12), reg \\", + "\tADDB reg, DX", + "", + "LOAD(8, AX)", + ), + "\n.\n.MOVBLZX.(.8.*.4.).(.R12.).,.AX.\n.ADDB.AX.,.DX.\n", + }, + { + "nested multiline macro", + lines( + "#define KEYROUND(xmm, load, off, r1, r2, index) \\", + "\tMOVBLZX (BP)(DX*4), R8 \\", + "\tload((off+1), r2) \\", + "\tMOVB R8, (off*4)(R12) \\", + "\tPINSRW $index, (BP)(R8*4), xmm", + "#define LOAD(off, reg) \\", + "\tMOVBLZX (off*4)(R12), reg \\", + "\tADDB reg, DX", + "KEYROUND(X0, LOAD, 8, AX, BX, 0)", + ), + "\n.MOVBLZX.(.BP.).(.DX.*.4.).,.R8.\n.\n.MOVBLZX.(.(.8.+.1.).*.4.).(.R12.).,.BX.\n.ADDB.BX.,.DX.\n.MOVB.R8.,.(.8.*.4.).(.R12.).\n.PINSRW.$.0.,.(.BP.).(.R8.*.4.).,.X0.\n", + }, +} + +func TestLex(t *testing.T) { + for _, test := range lexTests { + input := NewInput(test.name) + input.Push(NewTokenizer(test.name, strings.NewReader(test.input), nil)) + result := drain(input) + if result != test.output { + t.Errorf("%s: got %q expected %q", test.name, result, test.output) + } + } +} + +// lines joins the arguments together as complete lines. +func lines(a ...string) string { + return strings.Join(a, "\n") + "\n" +} + +// drain returns a single string representing the processed input tokens. +func drain(input *Input) string { + var buf bytes.Buffer + for { + tok := input.Next() + if tok == scanner.EOF { + return buf.String() + } + if buf.Len() > 0 { + buf.WriteByte('.') + } + buf.WriteString(input.Text()) + } +} diff --git a/src/cmd/asm/internal/lex/stack.go b/src/cmd/asm/internal/lex/stack.go index acd44012bff..5e3b7beefb1 100644 --- a/src/cmd/asm/internal/lex/stack.go +++ b/src/cmd/asm/internal/lex/stack.go @@ -12,7 +12,7 @@ type Stack struct { tr []TokenReader } -// Push adds tr to the top (ehd) of the input stack. (Popping happens automatically.) +// Push adds tr to the top (end) of the input stack. (Popping happens automatically.) func (s *Stack) Push(tr TokenReader) { s.tr = append(s.tr, tr) }