[dev.cc] cmd/asm: fix the expression parser and add tests

Rewrite the grammar to have one more production so it parses ~0*0 correctly and write tests to prove it. Change-Id: I0dd652baf65b48a3f26c9287c420702db4eaec59 Reviewed-on: https://go-review.googlesource.com/3443 Reviewed-by: Russ Cox <rsc@golang.org>
2024-09-30 22:48:32 -06:00 · 2015-01-28 11:11:33 -08:00 · 2015-01-28 11:11:33 -08:00 · 0012b83507
commit 0012b83507
parent ad82238653
4 changed files with 132 additions and 46 deletions
--- a/src/cmd/asm/internal/asm/expr_test.go
+++ b/src/cmd/asm/internal/asm/expr_test.go
@ -0,0 +1,71 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+	"cmd/asm/internal/lex"
+	"testing"
+	"text/scanner"
+)
+
+type exprTest struct {
+	input  string
+	output int64
+	atEOF  bool
+}
+
+var exprTests = []exprTest{
+	// Simple
+	{"0", 0, true},
+	{"3", 3, true},
+	{"070", 8 * 7, true},
+	{"0x0f", 15, true},
+	{"0xFF", 255, true},
+	{"9223372036854775807", 9223372036854775807, true}, // max int64
+	// Unary
+	{"-0", 0, true},
+	{"~0", -1, true},
+	{"~0*0", 0, true},
+	{"+3", 3, true},
+	{"-3", -3, true},
+	{"-9223372036854775808", -9223372036854775808, true}, // min int64
+	// Binary
+	{"3+4", 3 + 4, true},
+	{"3-4", 3 - 4, true},
+	{"2|5", 2 | 5, true},
+	{"3^4", 3 ^ 4, true},
+	{"3*4", 3 * 4, true},
+	{"14/4", 14 / 4, true},
+	{"3<<4", 3 << 4, true},
+	{"48>>3", 48 >> 3, true},
+	{"3&9", 3 & 9, true},
+	// General
+	{"3*2+3", 3*2 + 3, true},
+	{"3+2*3", 3 + 2*3, true},
+	{"3*(2+3)", 3 * (2 + 3), true},
+	{"3*-(2+3)", 3 * -(2 + 3), true},
+	{"3<<2+4", 3<<2 + 4, true},
+	{"3<<2+4", 3<<2 + 4, true},
+	{"3<<(2+4)", 3 << (2 + 4), true},
+	// Junk at EOF.
+	{"3 x", 3, false},
+}
+
+func TestExpr(t *testing.T) {
+	p := NewParser(nil, nil, nil) // Expression evaluation uses none of these fields of the parser.
+	for i, test := range exprTests {
+		p.start(lex.Tokenize(test.input))
+		result := int64(p.expr())
+		if result != test.output {
+			t.Errorf("%d: %q evaluated to %d; expected %d", i, test.input, result, test.output)
+		}
+		tok := p.next()
+		if test.atEOF && tok.ScanToken != scanner.EOF {
+			t.Errorf("%d: %q: at EOF got %s", i, test.input, tok)
+		} else if !test.atEOF && tok.ScanToken == scanner.EOF {
+			t.Errorf("%d: %q: expected not EOF but at EOF", i, test.input)
+		}
+	}
+}
--- a/src/cmd/asm/internal/asm/parse.go
+++ b/src/cmd/asm/internal/asm/parse.go
@ -345,7 +345,15 @@ func (p *Parser) operand(a *addr.Addr) bool {
 	return true
 }

-// expr = term | term '+' term
+// Note: There are two changes in the expression handling here
+// compared to the old yacc/C implemenatations. Neither has
+// much practical consequence because the expressions we
+// see in assembly code are simple, but for the record:
+//
+// 1) Evaluation uses uint64; the old one used int64.
+// 2) Precedence uses Go rules not C rules.
+
+// expr = term | term ('+' | '-' | '|' | '^') term.
 func (p *Parser) expr() uint64 {
 	value := p.term()
 	for {
@ -393,56 +401,63 @@ func (p *Parser) floatExpr() float64 {
 	return 0
 }

-// term = const | term '*' term | '(' expr ')'
+// term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor
 func (p *Parser) term() uint64 {
+	value := p.factor()
+	for {
+		switch p.peek() {
+		case '*':
+			p.next()
+			value *= p.factor() // OVERFLOW?
+		case '/':
+			p.next()
+			value /= p.factor()
+		case '%':
+			p.next()
+			value %= p.factor()
+		case lex.LSH:
+			p.next()
+			shift := p.factor()
+			if shift < 0 {
+				p.errorf("negative left shift %d", shift)
+			}
+			value <<= uint(shift) // OVERFLOW?
+		case lex.RSH:
+			p.next()
+			shift := p.term()
+			if shift < 0 {
+				p.errorf("negative right shift %d", shift)
+			}
+			value >>= uint(shift)
+		case '&':
+			p.next()
+			value &= p.factor()
+		default:
+			return value
+		}
+	}
+	p.errorf("unexpected %s evaluating expression", p.peek())
+	return 0
+}
+
+// factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')'
+func (p *Parser) factor() uint64 {
 	tok := p.next()
 	switch tok.ScanToken {
+	case scanner.Int:
+		return p.atoi(tok.String())
+	case '+':
+		return +p.factor()
+	case '-':
+		return -p.factor()
+	case '~':
+		return ^p.factor()
 	case '(':
 		v := p.expr()
 		if p.next().ScanToken != ')' {
 			p.errorf("missing closing paren")
 		}
 		return v
-	case '+':
-		return +p.term()
-	case '-':
-		return -p.term()
-	case '~':
-		return ^p.term()
-	case scanner.Int:
-		value := p.atoi(tok.String())
-		for {
-			switch p.peek() {
-			case '*':
-				p.next()
-				value *= p.term() // OVERFLOW?
-			case '/':
-				p.next()
-				value /= p.term()
-			case '%':
-				p.next()
-				value %= p.term()
-			case lex.LSH:
-				p.next()
-				shift := p.term()
-				if shift < 0 {
-					p.errorf("negative left shift %d", shift)
-				}
-				value <<= uint(shift)
-			case lex.RSH:
-				p.next()
-				shift := p.term()
-				if shift < 0 {
-					p.errorf("negative right shift %d", shift)
-				}
-				value >>= uint(shift)
-			case '&':
-				p.next()
-				value &= p.term()
-			default:
-				return value
-			}
-		}
 	}
 	p.errorf("unexpected %s evaluating expression", tok)
 	return 0
--- a/src/cmd/asm/internal/lex/input.go
+++ b/src/cmd/asm/internal/lex/input.go
@ -46,7 +46,7 @@ func predefine(defines flags.MultiFlag) map[string]*Macro {
 		if i > 0 {
 			name, value = name[:i], name[i+1:]
 		}
-		tokens := tokenize(name)
+		tokens := Tokenize(name)
 		if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
 			fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
 			flags.Usage()
@ -54,7 +54,7 @@ func predefine(defines flags.MultiFlag) map[string]*Macro {
 		macros[name] = &Macro{
 			name:   name,
 			args:   nil,
-			tokens: tokenize(value),
+			tokens: Tokenize(value),
 		}
 	}
 	return macros
--- a/src/cmd/asm/internal/lex/lex.go
+++ b/src/cmd/asm/internal/lex/lex.go
@ -128,8 +128,8 @@ type Macro struct {
 	tokens []Token  // Body of macro.
 }

-// tokenize turns a string into a list of Tokens; used to parse the -D flag.
-func tokenize(str string) []Token {
+// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
+func Tokenize(str string) []Token {
 	t := NewTokenizer("command line", strings.NewReader(str), nil)
 	var tokens []Token
 	for {