2008-07-02 18:02:55 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package Scanner
|
|
|
|
|
|
|
|
export EOF;
|
|
|
|
const (
|
|
|
|
ILLEGAL = iota;
|
2008-07-03 17:51:22 -06:00
|
|
|
EOF;
|
|
|
|
IDENT;
|
|
|
|
STRING;
|
|
|
|
NUMBER;
|
|
|
|
|
|
|
|
COMMA;
|
|
|
|
COLON;
|
|
|
|
SEMICOLON;
|
|
|
|
PERIOD;
|
|
|
|
|
|
|
|
LPAREN;
|
|
|
|
RPAREN;
|
|
|
|
LBRACK;
|
|
|
|
RBRACK;
|
|
|
|
LBRACE;
|
|
|
|
RBRACE;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
ASSIGN;
|
|
|
|
DEFINE;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
INC;
|
|
|
|
DEC;
|
|
|
|
NOT;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
AND;
|
|
|
|
OR;
|
|
|
|
XOR;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
ADD;
|
|
|
|
SUB;
|
|
|
|
MUL;
|
|
|
|
QUO;
|
|
|
|
REM;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
EQL;
|
|
|
|
NEQ;
|
|
|
|
LSS;
|
|
|
|
LEQ;
|
|
|
|
GTR;
|
|
|
|
GEQ;
|
|
|
|
|
|
|
|
SHL;
|
|
|
|
SHR;
|
|
|
|
|
|
|
|
ADD_ASSIGN;
|
|
|
|
SUB_ASSIGN;
|
|
|
|
MUL_ASSIGN;
|
|
|
|
QUO_ASSIGN;
|
|
|
|
REM_ASSIGN;
|
|
|
|
|
|
|
|
AND_ASSIGN;
|
|
|
|
OR_ASSIGN;
|
|
|
|
XOR_ASSIGN;
|
2008-07-03 00:19:31 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
SHL_ASSIGN;
|
|
|
|
SHR_ASSIGN;
|
2008-07-03 00:19:31 -06:00
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
CAND;
|
|
|
|
COR;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
// keywords
|
2008-07-03 17:51:22 -06:00
|
|
|
KEYWORDS_BEG;
|
|
|
|
BREAK;
|
|
|
|
CASE;
|
|
|
|
CONST;
|
|
|
|
CONTINUE;
|
|
|
|
DEFAULT;
|
|
|
|
ELSE;
|
|
|
|
EXPORT;
|
|
|
|
FALLTHROUGH;
|
|
|
|
FALSE;
|
|
|
|
FOR;
|
|
|
|
FUNC;
|
|
|
|
GO;
|
|
|
|
GOTO;
|
|
|
|
IF;
|
|
|
|
IMPORT;
|
|
|
|
INTERFACE;
|
|
|
|
MAP;
|
|
|
|
NEW;
|
|
|
|
NIL;
|
|
|
|
PACKAGE;
|
|
|
|
RANGE;
|
|
|
|
RETURN;
|
|
|
|
SELECT;
|
|
|
|
STRUCT;
|
|
|
|
SWITCH;
|
|
|
|
TRUE;
|
|
|
|
TYPE;
|
|
|
|
VAR;
|
|
|
|
KEYWORDS_END;
|
2008-07-02 18:02:55 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
var Keywords *map [string] int;
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
|
|
|
|
export TokenName
|
|
|
|
func TokenName(tok int) string {
|
|
|
|
switch (tok) {
|
2008-07-03 16:16:51 -06:00
|
|
|
case ILLEGAL: return "illegal";
|
|
|
|
case EOF: return "eof";
|
|
|
|
case IDENT: return "ident";
|
|
|
|
case STRING: return "string";
|
|
|
|
case NUMBER: return "number";
|
|
|
|
|
|
|
|
case COMMA: return ",";
|
|
|
|
case COLON: return ":";
|
|
|
|
case SEMICOLON: return ";";
|
|
|
|
case PERIOD: return ".";
|
|
|
|
|
|
|
|
case LPAREN: return "(";
|
|
|
|
case RPAREN: return ")";
|
|
|
|
case LBRACK: return "[";
|
|
|
|
case RBRACK: return "]";
|
|
|
|
case LBRACE: return "{";
|
|
|
|
case RBRACE: return "}";
|
|
|
|
|
|
|
|
case ASSIGN: return "=";
|
|
|
|
case DEFINE: return ":=";
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 16:16:51 -06:00
|
|
|
case INC: return "++";
|
|
|
|
case DEC: return "--";
|
|
|
|
case NOT: return "!";
|
2008-07-03 00:19:31 -06:00
|
|
|
|
2008-07-03 16:16:51 -06:00
|
|
|
case AND: return "&";
|
|
|
|
case OR: return "|";
|
|
|
|
case XOR: return "^";
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 16:16:51 -06:00
|
|
|
case ADD: return "+";
|
|
|
|
case SUB: return "-";
|
|
|
|
case MUL: return "*";
|
|
|
|
case QUO: return "/";
|
|
|
|
case REM: return "%";
|
2008-07-02 18:02:55 -06:00
|
|
|
|
2008-07-03 16:16:51 -06:00
|
|
|
case EQL: return "==";
|
|
|
|
case NEQ: return "!=";
|
|
|
|
case LSS: return "<";
|
|
|
|
case LEQ: return "<=";
|
|
|
|
case GTR: return ">";
|
|
|
|
case GEQ: return ">=";
|
|
|
|
|
|
|
|
case SHL: return "<<";
|
|
|
|
case SHR: return ">>";
|
|
|
|
|
|
|
|
case ADD_ASSIGN: return "+=";
|
|
|
|
case SUB_ASSIGN: return "-=";
|
|
|
|
case MUL_ASSIGN: return "+=";
|
|
|
|
case QUO_ASSIGN: return "/=";
|
|
|
|
case REM_ASSIGN: return "%=";
|
|
|
|
|
|
|
|
case AND_ASSIGN: return "&=";
|
|
|
|
case OR_ASSIGN: return "|=";
|
|
|
|
case XOR_ASSIGN: return "^=";
|
|
|
|
|
|
|
|
case SHL_ASSIGN: return "<<=";
|
|
|
|
case SHR_ASSIGN: return ">>=";
|
|
|
|
|
|
|
|
case CAND: return "&&";
|
|
|
|
case COR: return "||";
|
|
|
|
|
|
|
|
case BREAK: return "break";
|
|
|
|
case CASE: return "case";
|
|
|
|
case CONST: return "const";
|
|
|
|
case CONTINUE: return "continue";
|
|
|
|
case DEFAULT: return "default";
|
|
|
|
case ELSE: return "else";
|
|
|
|
case EXPORT: return "export";
|
|
|
|
case FALLTHROUGH: return "fallthrough";
|
|
|
|
case FALSE: return "false";
|
|
|
|
case FOR: return "for";
|
|
|
|
case FUNC: return "func";
|
|
|
|
case GO: return "go";
|
|
|
|
case GOTO: return "goto";
|
|
|
|
case IF: return "if";
|
|
|
|
case IMPORT: return "import";
|
|
|
|
case INTERFACE: return "interface";
|
|
|
|
case MAP: return "map";
|
|
|
|
case NEW: return "new";
|
|
|
|
case NIL: return "nil";
|
|
|
|
case PACKAGE: return "package";
|
|
|
|
case RANGE: return "range";
|
|
|
|
case RETURN: return "return";
|
|
|
|
case SELECT: return "select";
|
|
|
|
case STRUCT: return "struct";
|
|
|
|
case SWITCH: return "switch";
|
|
|
|
case TRUE: return "true";
|
|
|
|
case TYPE: return "type";
|
|
|
|
case VAR: return "var";
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return "???";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func is_whitespace (ch int) bool {
|
|
|
|
return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func is_letter (ch int) bool {
|
|
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 128 ;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
func digit_val (ch int) int {
|
|
|
|
if '0' <= ch && ch <= '9' {
|
|
|
|
return ch - '0';
|
|
|
|
}
|
|
|
|
if 'a' <= ch && ch <= 'f' {
|
|
|
|
return ch - 'a' + 10;
|
|
|
|
}
|
|
|
|
if 'A' <= ch && ch <= 'F' {
|
|
|
|
return ch - 'A' + 10;
|
|
|
|
}
|
|
|
|
return 16; // larger than any legal digit val
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
export Scanner
|
|
|
|
type Scanner struct {
|
|
|
|
src string;
|
|
|
|
pos int;
|
|
|
|
ch int; // one char look-ahead
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
export Token
|
|
|
|
type Token struct {
|
|
|
|
val int;
|
|
|
|
beg, end int;
|
|
|
|
txt string;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
func (T *Token) Print () {
|
|
|
|
print TokenName(T.val), " [", T.beg, ", ", T.end, "[ ", T.txt, "\n";
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
// Read the next Unicode char into S.ch.
|
|
|
|
// S.ch < 0 means end-of-file.
|
|
|
|
//
|
2008-07-02 18:02:55 -06:00
|
|
|
func (S *Scanner) Next () {
|
2008-07-03 16:16:51 -06:00
|
|
|
const (
|
|
|
|
Bit1 = 7;
|
|
|
|
Bitx = 6;
|
|
|
|
Bit2 = 5;
|
|
|
|
Bit3 = 4;
|
|
|
|
Bit4 = 3;
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
// TODO 6g constant evaluation incomplete
|
2008-07-03 16:16:51 -06:00
|
|
|
T1 = 0x00; // (1 << (Bit1 + 1) - 1) ^ 0xFF; // 0000 0000
|
|
|
|
Tx = 0x80; // (1 << (Bitx + 1) - 1) ^ 0xFF; // 1000 0000
|
|
|
|
T2 = 0xC0; // (1 << (Bit2 + 1) - 1) ^ 0xFF; // 1100 0000
|
|
|
|
T3 = 0xE0; // (1 << (Bit3 + 1) - 1) ^ 0xFF; // 1110 0000
|
|
|
|
T4 = 0xF0; // (1 << (Bit4 + 1) - 1) ^ 0xFF; // 1111 0000
|
|
|
|
|
|
|
|
Rune1 = 1 << (Bit1 + 0*Bitx) - 1; // 0000 0000 0111 1111
|
|
|
|
Rune2 = 1 << (Bit2 + 1*Bitx) - 1; // 0000 0111 1111 1111
|
|
|
|
Rune3 = 1 << (Bit3 + 2*Bitx) - 1; // 1111 1111 1111 1111
|
|
|
|
|
|
|
|
Maskx = 0x3F; // 1 << Bitx - 1; // 0011 1111
|
|
|
|
Testx = 0xC0; // Maskx ^ 0xFF; // 1100 0000
|
|
|
|
|
|
|
|
Bad = 0xFFFD; // Runeerror
|
|
|
|
);
|
|
|
|
|
2008-07-02 18:02:55 -06:00
|
|
|
src := S.src; // TODO only needed because of 6g bug
|
2008-07-03 16:16:51 -06:00
|
|
|
lim := len(src);
|
|
|
|
pos := S.pos;
|
|
|
|
|
|
|
|
// 1-byte sequence
|
|
|
|
// 0000-007F => T1
|
|
|
|
if pos >= lim {
|
2008-07-03 17:51:22 -06:00
|
|
|
S.ch = -1; // end of file
|
|
|
|
return;
|
2008-07-03 16:16:51 -06:00
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
c0 := int(src[pos]);
|
|
|
|
pos++;
|
2008-07-03 16:16:51 -06:00
|
|
|
if c0 < Tx {
|
|
|
|
S.ch = c0;
|
2008-07-03 17:51:22 -06:00
|
|
|
S.pos = pos;
|
2008-07-03 16:16:51 -06:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2-byte sequence
|
|
|
|
// 0080-07FF => T2 Tx
|
2008-07-03 17:51:22 -06:00
|
|
|
if pos >= lim {
|
|
|
|
goto bad;
|
2008-07-03 16:16:51 -06:00
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
c1 := int(src[pos]) ^ Tx;
|
|
|
|
pos++;
|
|
|
|
if c1 & Testx != 0 {
|
2008-07-03 16:16:51 -06:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
if c0 < T3 {
|
|
|
|
if c0 < T2 {
|
|
|
|
goto bad;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
2008-07-03 16:16:51 -06:00
|
|
|
r := (c0 << Bitx | c1) & Rune2;
|
|
|
|
if r <= Rune1 {
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
S.ch = r;
|
2008-07-03 17:51:22 -06:00
|
|
|
S.pos = pos;
|
2008-07-03 16:16:51 -06:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
// 3-byte sequence
|
2008-07-03 16:16:51 -06:00
|
|
|
// 0800-FFFF => T3 Tx Tx
|
2008-07-03 17:51:22 -06:00
|
|
|
if pos >= lim {
|
|
|
|
goto bad;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
c2 := int(src[pos]) ^ Tx;
|
|
|
|
pos++;
|
2008-07-03 16:16:51 -06:00
|
|
|
if c2 & Testx != 0 {
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
if c0 < T4 {
|
|
|
|
r := (((c0 << Bitx | c1) << Bitx) | c2) & Rune3;
|
|
|
|
if r <= Rune2 {
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
S.ch = r;
|
2008-07-03 17:51:22 -06:00
|
|
|
S.pos = pos;
|
2008-07-03 16:16:51 -06:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// bad encoding
|
|
|
|
bad:
|
|
|
|
S.ch = Bad;
|
|
|
|
S.pos += 1;
|
|
|
|
return;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func Init () {
|
|
|
|
Keywords = new(map [string] int);
|
2008-07-03 16:16:51 -06:00
|
|
|
|
|
|
|
for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ {
|
|
|
|
Keywords[TokenName(i)] = i;
|
|
|
|
}
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (S *Scanner) Open (src string) {
|
|
|
|
if Keywords == nil {
|
|
|
|
Init();
|
|
|
|
}
|
|
|
|
|
|
|
|
S.src = src;
|
|
|
|
S.pos = 0;
|
|
|
|
S.Next();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
func (S *Scanner) Expect (ch int) {
|
|
|
|
if S.ch != ch {
|
|
|
|
panic "expected ", string(ch), " found ", string(S.ch);
|
|
|
|
}
|
|
|
|
S.Next();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-02 18:02:55 -06:00
|
|
|
func (S *Scanner) SkipWhitespace () {
|
|
|
|
for is_whitespace(S.ch) {
|
|
|
|
S.Next();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (S *Scanner) SkipComment () {
|
|
|
|
if S.ch == '/' {
|
|
|
|
// comment
|
2008-07-03 17:51:22 -06:00
|
|
|
S.Next();
|
|
|
|
for S.ch != '\n' && S.ch >= 0 {
|
|
|
|
S.Next();
|
|
|
|
}
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
} else {
|
|
|
|
/* comment */
|
2008-07-03 17:51:22 -06:00
|
|
|
S.Next();
|
|
|
|
for S.ch >= 0 {
|
|
|
|
ch := S.ch;
|
2008-07-02 18:02:55 -06:00
|
|
|
S.Next();
|
2008-07-03 17:51:22 -06:00
|
|
|
if ch == '*' && S.ch == '/' {
|
2008-07-02 18:02:55 -06:00
|
|
|
S.Next();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
panic "comment not terminated";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (S *Scanner) ScanIdentifier () int {
|
|
|
|
beg := S.pos - 1;
|
2008-07-03 19:07:03 -06:00
|
|
|
for is_letter(S.ch) || digit_val(S.ch) < 10 {
|
2008-07-02 18:02:55 -06:00
|
|
|
S.Next();
|
|
|
|
}
|
|
|
|
end := S.pos - 1;
|
|
|
|
|
|
|
|
var tok int;
|
|
|
|
var present bool;
|
|
|
|
tok, present = Keywords[S.src[beg : end]];
|
|
|
|
if !present {
|
|
|
|
tok = IDENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
func (S *Scanner) ScanMantissa (base int) {
|
2008-07-03 19:07:03 -06:00
|
|
|
for digit_val(S.ch) < base {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
func (S *Scanner) ScanNumber (seen_decimal_point bool) int {
|
|
|
|
if seen_decimal_point {
|
|
|
|
S.ScanMantissa(10);
|
|
|
|
goto exponent;
|
|
|
|
}
|
|
|
|
|
|
|
|
if S.ch == '0' {
|
2008-07-03 19:07:03 -06:00
|
|
|
// TODO bug: doesn't accept 09.0 !
|
2008-07-03 17:51:22 -06:00
|
|
|
// int
|
|
|
|
S.Next();
|
|
|
|
if S.ch == 'x' || S.ch == 'X' {
|
|
|
|
// hexadecimal int
|
|
|
|
S.Next();
|
|
|
|
S.ScanMantissa(16);
|
|
|
|
} else {
|
|
|
|
// octal int
|
|
|
|
S.ScanMantissa(8);
|
|
|
|
}
|
|
|
|
return NUMBER;
|
|
|
|
}
|
|
|
|
|
|
|
|
// decimal int or float
|
|
|
|
S.ScanMantissa(10);
|
|
|
|
|
2008-07-03 00:19:31 -06:00
|
|
|
if S.ch == '.' {
|
2008-07-03 17:51:22 -06:00
|
|
|
// float
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 17:51:22 -06:00
|
|
|
S.ScanMantissa(10)
|
2008-07-03 00:19:31 -06:00
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
|
|
|
|
exponent:
|
2008-07-03 00:19:31 -06:00
|
|
|
if S.ch == 'e' || S.ch == 'E' {
|
2008-07-03 17:51:22 -06:00
|
|
|
// float
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
if S.ch == '-' || S.ch == '+' {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
S.ScanMantissa(10);
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
2008-07-03 00:19:31 -06:00
|
|
|
return NUMBER;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
func (S *Scanner) ScanDigits(n int, base int) {
|
|
|
|
for digit_val(S.ch) < base {
|
2008-07-02 18:02:55 -06:00
|
|
|
S.Next();
|
2008-07-03 19:07:03 -06:00
|
|
|
n--;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
2008-07-03 19:07:03 -06:00
|
|
|
if n > 0 {
|
|
|
|
panic "illegal char escape";
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
func (S *Scanner) ScanEscape () string {
|
2008-07-02 18:02:55 -06:00
|
|
|
// TODO: fix this routine
|
|
|
|
|
2008-07-03 17:51:22 -06:00
|
|
|
ch := S.ch;
|
|
|
|
S.Next();
|
|
|
|
switch (ch) {
|
|
|
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
|
|
|
|
return string(ch);
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
2008-07-03 19:07:03 -06:00
|
|
|
S.ScanDigits(3 - 1, 8); // 1 char already read
|
2008-07-03 17:51:22 -06:00
|
|
|
return ""; // TODO fix this
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
case 'x':
|
2008-07-03 19:07:03 -06:00
|
|
|
S.ScanDigits(2, 16);
|
2008-07-03 17:51:22 -06:00
|
|
|
return ""; // TODO fix this
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
case 'u':
|
2008-07-03 19:07:03 -06:00
|
|
|
S.ScanDigits(4, 16);
|
2008-07-03 17:51:22 -06:00
|
|
|
return ""; // TODO fix this
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
case 'U':
|
2008-07-03 19:07:03 -06:00
|
|
|
S.ScanDigits(8, 16);
|
2008-07-03 17:51:22 -06:00
|
|
|
return ""; // TODO fix this
|
2008-07-02 18:02:55 -06:00
|
|
|
|
|
|
|
default:
|
|
|
|
panic "illegal char escape";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 00:19:31 -06:00
|
|
|
func (S *Scanner) ScanChar () int {
|
2008-07-03 17:51:22 -06:00
|
|
|
// '\'' already consumed
|
|
|
|
|
|
|
|
ch := S.ch;
|
|
|
|
S.Next();
|
|
|
|
if ch == '\\' {
|
2008-07-02 18:02:55 -06:00
|
|
|
S.ScanEscape();
|
|
|
|
}
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
S.Expect('\'');
|
2008-07-03 00:19:31 -06:00
|
|
|
return NUMBER;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 00:19:31 -06:00
|
|
|
func (S *Scanner) ScanString () int {
|
2008-07-03 17:51:22 -06:00
|
|
|
// '"' already consumed
|
|
|
|
|
|
|
|
for S.ch != '"' {
|
|
|
|
ch := S.ch;
|
|
|
|
S.Next();
|
|
|
|
if ch == '\n' || ch < 0 {
|
2008-07-02 18:02:55 -06:00
|
|
|
panic "string not terminated";
|
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
if ch == '\\' {
|
|
|
|
S.ScanEscape();
|
|
|
|
}
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
|
2008-07-02 18:02:55 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
return STRING;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 00:19:31 -06:00
|
|
|
func (S *Scanner) ScanRawString () int {
|
2008-07-03 17:51:22 -06:00
|
|
|
// '`' already consumed
|
|
|
|
|
|
|
|
for S.ch != '`' {
|
|
|
|
ch := S.ch;
|
|
|
|
S.Next();
|
|
|
|
if ch == '\n' || ch < 0 {
|
2008-07-02 18:02:55 -06:00
|
|
|
panic "string not terminated";
|
|
|
|
}
|
|
|
|
}
|
2008-07-03 17:51:22 -06:00
|
|
|
|
2008-07-02 18:02:55 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
return STRING;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (S *Scanner) Select2 (tok0, tok1 int) int {
|
2008-07-03 16:16:51 -06:00
|
|
|
if S.ch == '=' {
|
|
|
|
S.Next();
|
|
|
|
return tok1;
|
2008-07-03 00:19:31 -06:00
|
|
|
}
|
|
|
|
return tok0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (S *Scanner) Select3 (tok0, tok1, ch2, tok2 int) int {
|
|
|
|
if S.ch == '=' {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
return tok1;
|
|
|
|
}
|
|
|
|
if S.ch == ch2 {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
return tok2;
|
|
|
|
}
|
|
|
|
return tok0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func (S *Scanner) Select4 (tok0, tok1, ch2, tok2, tok3 int) int {
|
|
|
|
if S.ch == '=' {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
return tok1;
|
|
|
|
}
|
|
|
|
if S.ch == ch2 {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
if S.ch == '=' {
|
2008-07-03 16:16:51 -06:00
|
|
|
S.Next();
|
2008-07-03 00:19:31 -06:00
|
|
|
return tok3;
|
|
|
|
}
|
|
|
|
return tok2;
|
|
|
|
}
|
|
|
|
return tok0;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
func (S *Scanner) Scan (t *Token) (tok, beg, end int) {
|
2008-07-02 18:02:55 -06:00
|
|
|
S.SkipWhitespace();
|
|
|
|
|
|
|
|
var tok int = ILLEGAL;
|
|
|
|
var beg int = S.pos - 1;
|
|
|
|
var end int = beg;
|
|
|
|
|
2008-07-03 19:07:03 -06:00
|
|
|
ch := S.ch;
|
|
|
|
switch {
|
2008-07-03 00:19:31 -06:00
|
|
|
case is_letter(ch): tok = S.ScanIdentifier();
|
2008-07-03 19:07:03 -06:00
|
|
|
case digit_val(ch) < 10: tok = S.ScanNumber(false);
|
2008-07-03 00:19:31 -06:00
|
|
|
default:
|
|
|
|
S.Next();
|
|
|
|
switch ch {
|
2008-07-03 16:16:51 -06:00
|
|
|
case -1: tok = EOF;
|
|
|
|
case '"': tok = S.ScanString();
|
|
|
|
case '\'': tok = S.ScanChar();
|
|
|
|
case '`': tok = S.ScanRawString();
|
|
|
|
case ':': tok = S.Select2(COLON, DEFINE);
|
|
|
|
case '.':
|
2008-07-03 19:07:03 -06:00
|
|
|
if digit_val(S.ch) < 10 {
|
2008-07-03 17:51:22 -06:00
|
|
|
tok = S.ScanNumber(true);
|
2008-07-03 16:16:51 -06:00
|
|
|
} else {
|
|
|
|
tok = PERIOD;
|
|
|
|
}
|
|
|
|
case ',': tok = COMMA;
|
|
|
|
case ';': tok = SEMICOLON;
|
|
|
|
case '(': tok = LPAREN;
|
|
|
|
case ')': tok = RPAREN;
|
|
|
|
case '[': tok = LBRACK;
|
|
|
|
case ']': tok = RBRACK;
|
|
|
|
case '{': tok = LBRACE;
|
|
|
|
case '}': tok = RBRACE;
|
|
|
|
case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
|
|
|
|
case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
|
|
|
|
case '*': tok = S.Select2(MUL, MUL_ASSIGN);
|
|
|
|
case '/':
|
|
|
|
if S.ch == '/' || S.ch == '*' {
|
|
|
|
S.SkipComment();
|
|
|
|
// cannot simply return because of 6g bug
|
2008-07-03 19:07:03 -06:00
|
|
|
tok, beg, end = S.Scan(t);
|
2008-07-03 16:16:51 -06:00
|
|
|
return tok, beg, end;
|
|
|
|
}
|
|
|
|
tok = S.Select2(QUO, QUO_ASSIGN);
|
|
|
|
case '%': tok = S.Select2(REM, REM_ASSIGN);
|
|
|
|
case '^': tok = S.Select2(XOR, XOR_ASSIGN);
|
|
|
|
case '<': tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
|
|
|
|
case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
|
|
|
|
case '=': tok = S.Select2(ASSIGN, EQL);
|
|
|
|
case '!': tok = S.Select2(NOT, NEQ);
|
|
|
|
case '&': tok = S.Select3(AND, AND_ASSIGN, '&', CAND);
|
|
|
|
case '|': tok = S.Select3(OR, OR_ASSIGN, '|', COR);
|
|
|
|
default: tok = ILLEGAL;
|
2008-07-02 18:02:55 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
end = S.pos - 1;
|
2008-07-03 19:07:03 -06:00
|
|
|
|
|
|
|
t.val = tok;
|
|
|
|
t.beg = beg;
|
|
|
|
t.end = end;
|
|
|
|
t.txt = S.src[beg : end];
|
|
|
|
|
2008-07-02 18:02:55 -06:00
|
|
|
return tok, beg, end;
|
|
|
|
}
|