// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go.h" #include "y.tab.h" #include #undef getc #undef ungetc #define getc ccgetc #define ungetc ccungetc extern int yychar; int windows; int yyprev; int yylast; static void lexinit(void); static void lexfini(void); static void yytinit(void); static int getc(void); static void ungetc(int); static int32 getr(void); static int escchar(int, int*, vlong*); static void addidir(char*); static int getlinepragma(void); static char *goos, *goarch, *goroot; // Our own isdigit, isspace, isalpha, isalnum that take care // of EOF and other out of range arguments. static int yy_isdigit(int c) { return c >= 0 && c <= 0xFF && isdigit(c); } static int yy_isspace(int c) { return c >= 0 && c <= 0xFF && isspace(c); } static int yy_isalpha(int c) { return c >= 0 && c <= 0xFF && isalpha(c); } static int yy_isalnum(int c) { return c >= 0 && c <= 0xFF && isalnum(c); } // Disallow use of isdigit etc. #undef isdigit #undef isspace #undef isalpha #undef isalnum #define isdigit use_yy_isdigit_instead_of_isdigit #define isspace use_yy_isspace_instead_of_isspace #define isalpha use_yy_isalpha_instead_of_isalpha #define isalnum use_yy_isalnum_instead_of_isalnum #define DBG if(!debug['x']);else print enum { EOF = -1, }; void usage(void) { print("gc: usage: %cg [flags] file.go...\n", thechar); print("flags:\n"); // -A is allow use of "any" type, for bootstrapping print(" -I DIR search for packages in DIR\n"); print(" -d print declarations\n"); print(" -e no limit on number of errors printed\n"); print(" -f print stack frame structure\n"); print(" -h panic on an error\n"); print(" -o file specify output file\n"); print(" -S print the assembly language\n"); print(" -V print the compiler version\n"); print(" -u disable package unsafe\n"); print(" -w print the parse tree after typing\n"); print(" -x print lex tokens\n"); exit(0); } void fault(int s) { // If we've already complained about things // in the program, don't bother complaining // about the seg fault too; let the user clean up // the code and try again. if(nsavederrors + nerrors > 0) errorexit(); fatal("fault"); } int main(int argc, char *argv[]) { int i, c; NodeList *l; char *p; signal(SIGBUS, fault); signal(SIGSEGV, fault); localpkg = mkpkg(strlit("")); localpkg->prefix = "\"\""; builtinpkg = mkpkg(strlit("go.builtin")); gostringpkg = mkpkg(strlit("go.string")); gostringpkg->name = "go.string"; gostringpkg->prefix = "go.string"; // not go%2estring runtimepkg = mkpkg(strlit("runtime")); runtimepkg->name = "runtime"; typepkg = mkpkg(strlit("type")); typepkg->name = "type"; unsafepkg = mkpkg(strlit("unsafe")); unsafepkg->name = "unsafe"; goroot = getgoroot(); goos = getgoos(); goarch = thestring; outfile = nil; ARGBEGIN { default: c = ARGC(); if(c >= 0 && c < sizeof(debug)) debug[c]++; break; case 'o': outfile = EARGF(usage()); break; case 'I': addidir(EARGF(usage())); break; case 'u': safemode = 1; break; case 'V': print("%cg version %s\n", thechar, getgoversion()); exit(0); } ARGEND if(argc < 1) usage(); // special flag to detect compilation of package runtime compiling_runtime = debug['+']; pathname = mal(1000); if(getwd(pathname, 999) == 0) strcpy(pathname, "/???"); if(yy_isalpha(pathname[0]) && pathname[1] == ':') { // On Windows. windows = 1; // Canonicalize path by converting \ to / (Windows accepts both). for(p=pathname; *p; p++) if(*p == '\\') *p = '/'; } fmtinstall('O', Oconv); // node opcodes fmtinstall('E', Econv); // etype opcodes fmtinstall('J', Jconv); // all the node flags fmtinstall('S', Sconv); // sym pointer fmtinstall('T', Tconv); // type pointer fmtinstall('N', Nconv); // node pointer fmtinstall('Z', Zconv); // escaped string fmtinstall('L', Lconv); // line number fmtinstall('B', Bconv); // big numbers fmtinstall('F', Fconv); // big float numbers betypeinit(); if(widthptr == 0) fatal("betypeinit failed"); lexinit(); typeinit(); yytinit(); blockgen = 1; dclcontext = PEXTERN; nerrors = 0; lexlineno = 1; for(i=0; iname); // final import not used checks lexfini(); typecheckok = 1; if(debug['f']) frame(1); // Process top-level declarations in phases. // Phase 1: const, type, and names and types of funcs. // This will gather all the information about types // and methods but doesn't depend on any of it. defercheckwidth(); for(l=xtop; l; l=l->next) if(l->n->op != ODCL && l->n->op != OAS) typecheck(&l->n, Etop); // Phase 2: Variable assignments. // To check interface assignments, depends on phase 1. for(l=xtop; l; l=l->next) if(l->n->op == ODCL || l->n->op == OAS) typecheck(&l->n, Etop); resumetypecopy(); resumecheckwidth(); // Phase 3: Type check function bodies. for(l=xtop; l; l=l->next) { if(l->n->op == ODCLFUNC || l->n->op == OCLOSURE) { curfn = l->n; saveerrors(); typechecklist(l->n->nbody, Etop); if(nerrors != 0) l->n->nbody = nil; // type errors; do not compile } } curfn = nil; if(nsavederrors+nerrors) errorexit(); // Phase 3b: escape analysis. if(debug['s']) escapes(); // Phase 4: Compile function bodies. for(l=xtop; l; l=l->next) if(l->n->op == ODCLFUNC) funccompile(l->n, 0); if(nsavederrors+nerrors == 0) fninit(xtop); // Phase 4b: Compile all closures. while(closures) { l = closures; closures = nil; for(; l; l=l->next) { funccompile(l->n, 1); } } // Phase 5: check external declarations. for(l=externdcl; l; l=l->next) if(l->n->op == ONAME) typecheck(&l->n, Erv); if(nerrors+nsavederrors) errorexit(); dumpobj(); if(nerrors+nsavederrors) errorexit(); flusherrors(); exit(0); return 0; } void saveerrors(void) { nsavederrors += nerrors; nerrors = 0; } static int arsize(Biobuf *b, char *name) { struct ar_hdr *a; if((a = Brdline(b, '\n')) == nil) return -1; if(Blinelen(b) != sizeof(struct ar_hdr)) return -1; if(strncmp(a->name, name, strlen(name)) != 0) return -1; return atoi(a->size); } static int skiptopkgdef(Biobuf *b) { char *p; int sz; /* archive header */ if((p = Brdline(b, '\n')) == nil) return 0; if(Blinelen(b) != 8) return 0; if(memcmp(p, "!\n", 8) != 0) return 0; /* symbol table is first; skip it */ sz = arsize(b, "__.SYMDEF"); if(sz < 0) return 0; Bseek(b, sz, 1); /* package export block is second */ sz = arsize(b, "__.PKGDEF"); if(sz <= 0) return 0; return 1; } static void addidir(char* dir) { Idir** pp; if(dir == nil) return; for(pp = &idirs; *pp != nil; pp = &(*pp)->link) ; *pp = mal(sizeof(Idir)); (*pp)->link = nil; (*pp)->dir = dir; } // is this path a local name? begins with ./ or ../ or / static int islocalname(Strlit *name) { if(!windows && name->len >= 1 && name->s[0] == '/') return 1; if(windows && name->len >= 3 && yy_isalpha(name->s[0]) && name->s[1] == ':' && name->s[2] == '/') return 1; if(name->len >= 2 && strncmp(name->s, "./", 2) == 0) return 1; if(name->len >= 3 && strncmp(name->s, "../", 3) == 0) return 1; return 0; } static int findpkg(Strlit *name) { Idir *p; char *q; if(islocalname(name)) { if(safemode) return 0; // try .a before .6. important for building libraries: // if there is an array.6 in the array.a library, // want to find all of array.a, not just array.6. snprint(namebuf, sizeof(namebuf), "%Z.a", name); if(access(namebuf, 0) >= 0) return 1; snprint(namebuf, sizeof(namebuf), "%Z.%c", name, thechar); if(access(namebuf, 0) >= 0) return 1; return 0; } // local imports should be canonicalized already. // don't want to see "encoding/../encoding/base64" // as different from "encoding/base64". q = mal(name->len+1); memmove(q, name->s, name->len); q[name->len] = '\0'; cleanname(q); if(strlen(q) != name->len || memcmp(q, name->s, name->len) != 0) { yyerror("non-canonical import path %Z (should be %s)", name, q); return 0; } for(p = idirs; p != nil; p = p->link) { snprint(namebuf, sizeof(namebuf), "%s/%Z.a", p->dir, name); if(access(namebuf, 0) >= 0) return 1; snprint(namebuf, sizeof(namebuf), "%s/%Z.%c", p->dir, name, thechar); if(access(namebuf, 0) >= 0) return 1; } if(goroot != nil) { snprint(namebuf, sizeof(namebuf), "%s/pkg/%s_%s/%Z.a", goroot, goos, goarch, name); if(access(namebuf, 0) >= 0) return 1; snprint(namebuf, sizeof(namebuf), "%s/pkg/%s_%s/%Z.%c", goroot, goos, goarch, name, thechar); if(access(namebuf, 0) >= 0) return 1; } return 0; } void importfile(Val *f, int line) { Biobuf *imp; char *file, *p, *q; int32 c; int len; Strlit *path; char *cleanbuf; // TODO(rsc): don't bother reloading imports more than once? if(f->ctype != CTSTR) { yyerror("import statement not a string"); return; } if(strlen(f->u.sval->s) != f->u.sval->len) { yyerror("import path contains NUL"); errorexit(); } // The package name main is no longer reserved, // but we reserve the import path "main" to identify // the main package, just as we reserve the import // path "math" to identify the standard math package. if(strcmp(f->u.sval->s, "main") == 0) { yyerror("cannot import \"main\""); errorexit(); } if(strcmp(f->u.sval->s, "unsafe") == 0) { if(safemode) { yyerror("cannot import package unsafe"); errorexit(); } importpkg = mkpkg(f->u.sval); cannedimports("unsafe.6", unsafeimport); return; } path = f->u.sval; if(islocalname(path)) { cleanbuf = mal(strlen(pathname) + strlen(path->s) + 2); strcpy(cleanbuf, pathname); strcat(cleanbuf, "/"); strcat(cleanbuf, path->s); cleanname(cleanbuf); path = strlit(cleanbuf); } if(!findpkg(path)) { yyerror("can't find import: %Z", f->u.sval); errorexit(); } importpkg = mkpkg(path); imp = Bopen(namebuf, OREAD); if(imp == nil) { yyerror("can't open import: %Z: %r", f->u.sval); errorexit(); } file = strdup(namebuf); len = strlen(namebuf); if(len > 2 && namebuf[len-2] == '.' && namebuf[len-1] == 'a') { if(!skiptopkgdef(imp)) { yyerror("import %s: not a package file", file); errorexit(); } } // check object header p = Brdstr(imp, '\n', 1); if(strcmp(p, "empty archive") != 0) { if(strncmp(p, "go object ", 10) != 0) { yyerror("import %s: not a go object file", file); errorexit(); } q = smprint("%s %s %s", getgoos(), thestring, getgoversion()); if(strcmp(p+10, q) != 0) { yyerror("import %s: object is [%s] expected [%s]", file, p+10, q); errorexit(); } free(q); } // assume files move (get installed) // so don't record the full path. linehist(file + len - path->len - 2, -1, 1); // acts as #pragma lib /* * position the input right * after $$ and return */ pushedio = curio; curio.bin = imp; curio.peekc = 0; curio.peekc1 = 0; curio.infile = file; curio.nlsemi = 0; typecheckok = 1; for(;;) { c = getc(); if(c == EOF) break; if(c != '$') continue; c = getc(); if(c == EOF) break; if(c != '$') continue; return; } yyerror("no import in: %Z", f->u.sval); unimportfile(); } void unimportfile(void) { if(curio.bin != nil) { Bterm(curio.bin); curio.bin = nil; } else lexlineno--; // re correct sys.6 line number curio = pushedio; pushedio.bin = nil; incannedimport = 0; typecheckok = 0; } void cannedimports(char *file, char *cp) { lexlineno++; // if sys.6 is included on line 1, pushedio = curio; curio.bin = nil; curio.peekc = 0; curio.peekc1 = 0; curio.infile = file; curio.cp = cp; curio.nlsemi = 0; curio.importsafe = 0; typecheckok = 1; incannedimport = 1; } static int isfrog(int c) { // complain about possibly invisible control characters if(c < 0) return 1; if(c < ' ') { if(c == '\n' || c== '\r' || c == '\t') // good white space return 0; return 1; } if(0x7f <= c && c <= 0xa0) // DEL, unicode block including unbreakable space. return 1; return 0; } typedef struct Loophack Loophack; struct Loophack { int v; Loophack *next; }; static int32 _yylex(void) { int c, c1, clen, escflag, ncp; vlong v; char *cp, *ep; Rune rune; Sym *s; static Loophack *lstk; Loophack *h; prevlineno = lineno; l0: c = getc(); if(yy_isspace(c)) { if(c == '\n' && curio.nlsemi) { ungetc(c); DBG("lex: implicit semi\n"); return ';'; } goto l0; } lineno = lexlineno; /* start of token */ if(c >= Runeself) { /* all multibyte runes are alpha */ cp = lexbuf; ep = lexbuf+sizeof lexbuf; goto talph; } if(yy_isalpha(c)) { cp = lexbuf; ep = lexbuf+sizeof lexbuf; goto talph; } if(yy_isdigit(c)) goto tnum; switch(c) { case EOF: lineno = prevlineno; ungetc(EOF); return -1; case '_': cp = lexbuf; ep = lexbuf+sizeof lexbuf; goto talph; case '.': c1 = getc(); if(yy_isdigit(c1)) { cp = lexbuf; ep = lexbuf+sizeof lexbuf; *cp++ = c; c = c1; c1 = 0; goto casedot; } if(c1 == '.') { c1 = getc(); if(c1 == '.') { c = LDDD; goto lx; } ungetc(c1); c1 = '.'; } break; case '"': /* "..." */ strcpy(lexbuf, "\"\""); cp = mal(8); clen = sizeof(int32); ncp = 8; for(;;) { if(clen+UTFmax > ncp) { cp = remal(cp, ncp, ncp); ncp += ncp; } if(escchar('"', &escflag, &v)) break; if(v < Runeself || escflag) { cp[clen++] = v; } else { rune = v; c = runelen(rune); runetochar(cp+clen, &rune); clen += c; } } goto strlit; case '`': /* `...` */ strcpy(lexbuf, "``"); cp = mal(8); clen = sizeof(int32); ncp = 8; for(;;) { if(clen+UTFmax > ncp) { cp = remal(cp, ncp, ncp); ncp += ncp; } c = getr(); if(c == EOF) { yyerror("eof in string"); break; } if(c == '`') break; rune = c; clen += runetochar(cp+clen, &rune); } strlit: *(int32*)cp = clen-sizeof(int32); // length do { cp[clen++] = 0; } while(clen & MAXALIGN); yylval.val.u.sval = (Strlit*)cp; yylval.val.ctype = CTSTR; DBG("lex: string literal\n"); strcpy(litbuf, "string literal"); return LLITERAL; case '\'': /* '.' */ if(escchar('\'', &escflag, &v)) { yyerror("empty character literal or unescaped ' in character literal"); v = '\''; } if(!escchar('\'', &escflag, &v)) { yyerror("missing '"); ungetc(v); } yylval.val.u.xval = mal(sizeof(*yylval.val.u.xval)); mpmovecfix(yylval.val.u.xval, v); yylval.val.ctype = CTINT; DBG("lex: codepoint literal\n"); strcpy(litbuf, "string literal"); return LLITERAL; case '/': c1 = getc(); if(c1 == '*') { int nl; nl = 0; for(;;) { c = getr(); if(c == '\n') nl = 1; while(c == '*') { c = getr(); if(c == '/') { if(nl) ungetc('\n'); goto l0; } if(c == '\n') nl = 1; } if(c == EOF) { yyerror("eof in comment"); errorexit(); } } } if(c1 == '/') { c = getlinepragma(); for(;;) { if(c == '\n' || c == EOF) { ungetc(c); goto l0; } c = getr(); } } if(c1 == '=') { c = ODIV; goto asop; } break; case ':': c1 = getc(); if(c1 == '=') { c = LCOLAS; goto lx; } break; case '*': c1 = getc(); if(c1 == '=') { c = OMUL; goto asop; } break; case '%': c1 = getc(); if(c1 == '=') { c = OMOD; goto asop; } break; case '+': c1 = getc(); if(c1 == '+') { c = LINC; goto lx; } if(c1 == '=') { c = OADD; goto asop; } break; case '-': c1 = getc(); if(c1 == '-') { c = LDEC; goto lx; } if(c1 == '=') { c = OSUB; goto asop; } break; case '>': c1 = getc(); if(c1 == '>') { c = LRSH; c1 = getc(); if(c1 == '=') { c = ORSH; goto asop; } break; } if(c1 == '=') { c = LGE; goto lx; } c = LGT; break; case '<': c1 = getc(); if(c1 == '<') { c = LLSH; c1 = getc(); if(c1 == '=') { c = OLSH; goto asop; } break; } if(c1 == '=') { c = LLE; goto lx; } if(c1 == '-') { c = LCOMM; goto lx; } c = LLT; break; case '=': c1 = getc(); if(c1 == '=') { c = LEQ; goto lx; } break; case '!': c1 = getc(); if(c1 == '=') { c = LNE; goto lx; } break; case '&': c1 = getc(); if(c1 == '&') { c = LANDAND; goto lx; } if(c1 == '^') { c = LANDNOT; c1 = getc(); if(c1 == '=') { c = OANDNOT; goto asop; } break; } if(c1 == '=') { c = OAND; goto asop; } break; case '|': c1 = getc(); if(c1 == '|') { c = LOROR; goto lx; } if(c1 == '=') { c = OOR; goto asop; } break; case '^': c1 = getc(); if(c1 == '=') { c = OXOR; goto asop; } break; /* * clumsy dance: * to implement rule that disallows * if T{1}[0] { ... } * but allows * if (T{1}[0]) { ... } * the block bodies for if/for/switch/select * begin with an LBODY token, not '{'. * * when we see the keyword, the next * non-parenthesized '{' becomes an LBODY. * loophack is normally 0. * a keyword makes it go up to 1. * parens push loophack onto a stack and go back to 0. * a '{' with loophack == 1 becomes LBODY and disables loophack. * * i said it was clumsy. */ case '(': case '[': if(loophack || lstk != nil) { h = malloc(sizeof *h); h->v = loophack; h->next = lstk; lstk = h; loophack = 0; } goto lx; case ')': case ']': if(lstk != nil) { h = lstk; loophack = h->v; lstk = h->next; free(h); } goto lx; case '{': if(loophack == 1) { DBG("%L lex: LBODY\n", lexlineno); loophack = 0; return LBODY; } goto lx; default: goto lx; } ungetc(c1); lx: if(c > 0xff) DBG("%L lex: TOKEN %s\n", lexlineno, lexname(c)); else DBG("%L lex: TOKEN '%c'\n", lexlineno, c); if(isfrog(c)) { yyerror("illegal character 0x%ux", c); goto l0; } if(importpkg == nil && (c == '#' || c == '$' || c == '?' || c == '@' || c == '\\')) { yyerror("%s: unexpected %c", "syntax error", c); goto l0; } return c; asop: yylval.lint = c; // rathole to hold which asop DBG("lex: TOKEN ASOP %c\n", c); return LASOP; talph: /* * cp is set to lexbuf and some * prefix has been stored */ for(;;) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } if(c >= Runeself) { ungetc(c); rune = getr(); // 0xb7 ยท is used for internal names if(!isalpharune(rune) && !isdigitrune(rune) && (importpkg == nil || rune != 0xb7)) yyerror("invalid identifier character 0x%ux", rune); cp += runetochar(cp, &rune); } else if(!yy_isalnum(c) && c != '_') break; else *cp++ = c; c = getc(); } *cp = 0; ungetc(c); s = lookup(lexbuf); switch(s->lexical) { case LIGNORE: goto l0; case LFOR: case LIF: case LSWITCH: case LSELECT: loophack = 1; // see comment about loophack above break; } DBG("lex: %S %s\n", s, lexname(s->lexical)); yylval.sym = s; return s->lexical; tnum: c1 = 0; cp = lexbuf; ep = lexbuf+sizeof lexbuf; if(c != '0') { for(;;) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } *cp++ = c; c = getc(); if(yy_isdigit(c)) continue; goto dc; } } *cp++ = c; c = getc(); if(c == 'x' || c == 'X') { for(;;) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } *cp++ = c; c = getc(); if(yy_isdigit(c)) continue; if(c >= 'a' && c <= 'f') continue; if(c >= 'A' && c <= 'F') continue; if(cp == lexbuf+2) yyerror("malformed hex constant"); goto ncu; } } if(c == 'p') // 0p begins floating point zero goto casep; c1 = 0; for(;;) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } if(!yy_isdigit(c)) break; if(c < '0' || c > '7') c1 = 1; // not octal *cp++ = c; c = getc(); } if(c == '.') goto casedot; if(c == 'e' || c == 'E') goto casee; if(c == 'i') goto casei; if(c1) yyerror("malformed octal constant"); goto ncu; dc: if(c == '.') goto casedot; if(c == 'e' || c == 'E') goto casee; if(c == 'p' || c == 'P') goto casep; if(c == 'i') goto casei; ncu: *cp = 0; ungetc(c); yylval.val.u.xval = mal(sizeof(*yylval.val.u.xval)); mpatofix(yylval.val.u.xval, lexbuf); if(yylval.val.u.xval->ovf) { yyerror("overflow in constant"); mpmovecfix(yylval.val.u.xval, 0); } yylval.val.ctype = CTINT; DBG("lex: integer literal\n"); strcpy(litbuf, "literal "); strcat(litbuf, lexbuf); return LLITERAL; casedot: for(;;) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } *cp++ = c; c = getc(); if(!yy_isdigit(c)) break; } if(c == 'i') goto casei; if(c != 'e' && c != 'E') goto caseout; casee: *cp++ = 'e'; c = getc(); if(c == '+' || c == '-') { *cp++ = c; c = getc(); } if(!yy_isdigit(c)) yyerror("malformed fp constant exponent"); while(yy_isdigit(c)) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } *cp++ = c; c = getc(); } if(c == 'i') goto casei; goto caseout; casep: *cp++ = 'p'; c = getc(); if(c == '+' || c == '-') { *cp++ = c; c = getc(); } if(!yy_isdigit(c)) yyerror("malformed fp constant exponent"); while(yy_isdigit(c)) { if(cp+10 >= ep) { yyerror("identifier too long"); errorexit(); } *cp++ = c; c = getc(); } if(c == 'i') goto casei; goto caseout; casei: // imaginary constant *cp = 0; yylval.val.u.cval = mal(sizeof(*yylval.val.u.cval)); mpmovecflt(&yylval.val.u.cval->real, 0.0); mpatoflt(&yylval.val.u.cval->imag, lexbuf); if(yylval.val.u.cval->imag.val.ovf) { yyerror("overflow in imaginary constant"); mpmovecflt(&yylval.val.u.cval->real, 0.0); } yylval.val.ctype = CTCPLX; DBG("lex: imaginary literal\n"); strcpy(litbuf, "literal "); strcat(litbuf, lexbuf); return LLITERAL; caseout: *cp = 0; ungetc(c); yylval.val.u.fval = mal(sizeof(*yylval.val.u.fval)); mpatoflt(yylval.val.u.fval, lexbuf); if(yylval.val.u.fval->val.ovf) { yyerror("overflow in float constant"); mpmovecflt(yylval.val.u.fval, 0.0); } yylval.val.ctype = CTFLT; DBG("lex: floating literal\n"); strcpy(litbuf, "literal "); strcat(litbuf, lexbuf); return LLITERAL; } /* * read and interpret syntax that looks like * //line parse.y:15 * as a discontinuity in sequential line numbers. * the next line of input comes from parse.y:15 */ static int getlinepragma(void) { int i, c, n; char *cp, *ep; Hist *h; for(i=0; i<5; i++) { c = getr(); if(c != "line "[i]) goto out; } cp = lexbuf; ep = lexbuf+sizeof(lexbuf)-5; for(;;) { c = getr(); if(c == '\n' || c == EOF) goto out; if(c == ' ') continue; if(c == ':') break; if(cp < ep) *cp++ = c; } *cp = 0; n = 0; for(;;) { c = getr(); if(!yy_isdigit(c)) break; n = n*10 + (c-'0'); if(n > 1e8) { yyerror("line number out of range"); errorexit(); } } if(c != '\n' || n <= 0) goto out; // try to avoid allocating file name over and over for(h=hist; h!=H; h=h->link) { if(h->name != nil && strcmp(h->name, lexbuf) == 0) { linehist(h->name, n, 0); goto out; } } linehist(strdup(lexbuf), n, 0); out: return c; } int32 yylex(void) { int lx; lx = _yylex(); if(curio.nlsemi && lx == EOF) { // Treat EOF as "end of line" for the purposes // of inserting a semicolon. lx = ';'; } switch(lx) { case LNAME: case LLITERAL: case LBREAK: case LCONTINUE: case LFALL: case LRETURN: case LINC: case LDEC: case ')': case '}': case ']': curio.nlsemi = 1; break; default: curio.nlsemi = 0; break; } // Track last two tokens returned by yylex. yyprev = yylast; yylast = lx; return lx; } static int getc(void) { int c; c = curio.peekc; if(c != 0) { curio.peekc = curio.peekc1; curio.peekc1 = 0; if(c == '\n' && pushedio.bin == nil) lexlineno++; return c; } if(curio.bin == nil) { c = *curio.cp & 0xff; if(c != 0) curio.cp++; } else c = Bgetc(curio.bin); switch(c) { case 0: if(curio.bin != nil) { yyerror("illegal NUL byte"); break; } case EOF: // insert \n at EOF if(curio.eofnl) return EOF; curio.eofnl = 1; c = '\n'; case '\n': if(pushedio.bin == nil) lexlineno++; break; } return c; } static void ungetc(int c) { curio.peekc1 = curio.peekc; curio.peekc = c; if(c == '\n' && pushedio.bin == nil) lexlineno--; } static int32 getr(void) { int c, i; char str[UTFmax+1]; Rune rune; c = getc(); if(c < Runeself) return c; i = 0; str[i++] = c; loop: c = getc(); str[i++] = c; if(!fullrune(str, i)) goto loop; c = chartorune(&rune, str); if(rune == Runeerror && c == 1) { lineno = lexlineno; yyerror("illegal UTF-8 sequence"); flusherrors(); print("\t"); for(c=0; c 0 ? " " : "", *(uchar*)(str+c)); print("\n"); } return rune; } static int escchar(int e, int *escflg, vlong *val) { int i, u, c; vlong l; *escflg = 0; c = getr(); switch(c) { case EOF: yyerror("eof in string"); return 1; case '\n': yyerror("newline in string"); return 1; case '\\': break; default: if(c == e) return 1; *val = c; return 0; } u = 0; c = getr(); switch(c) { case 'x': *escflg = 1; // it's a byte i = 2; goto hex; case 'u': i = 4; u = 1; goto hex; case 'U': i = 8; u = 1; goto hex; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': *escflg = 1; // it's a byte goto oct; case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '\\': c = '\\'; break; default: if(c != e) yyerror("unknown escape sequence: %c", c); } *val = c; return 0; hex: l = 0; for(; i>0; i--) { c = getc(); if(c >= '0' && c <= '9') { l = l*16 + c-'0'; continue; } if(c >= 'a' && c <= 'f') { l = l*16 + c-'a' + 10; continue; } if(c >= 'A' && c <= 'F') { l = l*16 + c-'A' + 10; continue; } yyerror("non-hex character in escape sequence: %c", c); ungetc(c); break; } if(u && (l > Runemax || (0xd800 <= l && l < 0xe000))) { yyerror("invalid Unicode code point in escape sequence: %#llx", l); l = Runeerror; } *val = l; return 0; oct: l = c - '0'; for(i=2; i>0; i--) { c = getc(); if(c >= '0' && c <= '7') { l = l*8 + c-'0'; continue; } yyerror("non-octal character in escape sequence: %c", c); ungetc(c); } if(l > 255) yyerror("octal escape value > 255: %d", l); *val = l; return 0; } static struct { char* name; int lexical; int etype; int op; } syms[] = { /* name lexical etype op */ /* basic types */ "int8", LNAME, TINT8, OXXX, "int16", LNAME, TINT16, OXXX, "int32", LNAME, TINT32, OXXX, "int64", LNAME, TINT64, OXXX, "uint8", LNAME, TUINT8, OXXX, "uint16", LNAME, TUINT16, OXXX, "uint32", LNAME, TUINT32, OXXX, "uint64", LNAME, TUINT64, OXXX, "float32", LNAME, TFLOAT32, OXXX, "float64", LNAME, TFLOAT64, OXXX, "complex64", LNAME, TCOMPLEX64, OXXX, "complex128", LNAME, TCOMPLEX128, OXXX, "bool", LNAME, TBOOL, OXXX, "byte", LNAME, TUINT8, OXXX, "string", LNAME, TSTRING, OXXX, "any", LNAME, TANY, OXXX, "break", LBREAK, Txxx, OXXX, "case", LCASE, Txxx, OXXX, "chan", LCHAN, Txxx, OXXX, "const", LCONST, Txxx, OXXX, "continue", LCONTINUE, Txxx, OXXX, "default", LDEFAULT, Txxx, OXXX, "else", LELSE, Txxx, OXXX, "defer", LDEFER, Txxx, OXXX, "fallthrough", LFALL, Txxx, OXXX, "for", LFOR, Txxx, OXXX, "func", LFUNC, Txxx, OXXX, "go", LGO, Txxx, OXXX, "goto", LGOTO, Txxx, OXXX, "if", LIF, Txxx, OXXX, "import", LIMPORT, Txxx, OXXX, "interface", LINTERFACE, Txxx, OXXX, "map", LMAP, Txxx, OXXX, "package", LPACKAGE, Txxx, OXXX, "range", LRANGE, Txxx, OXXX, "return", LRETURN, Txxx, OXXX, "select", LSELECT, Txxx, OXXX, "struct", LSTRUCT, Txxx, OXXX, "switch", LSWITCH, Txxx, OXXX, "type", LTYPE, Txxx, OXXX, "var", LVAR, Txxx, OXXX, "append", LNAME, Txxx, OAPPEND, "cap", LNAME, Txxx, OCAP, "close", LNAME, Txxx, OCLOSE, "complex", LNAME, Txxx, OCOMPLEX, "copy", LNAME, Txxx, OCOPY, "imag", LNAME, Txxx, OIMAG, "len", LNAME, Txxx, OLEN, "make", LNAME, Txxx, OMAKE, "new", LNAME, Txxx, ONEW, "panic", LNAME, Txxx, OPANIC, "print", LNAME, Txxx, OPRINT, "println", LNAME, Txxx, OPRINTN, "real", LNAME, Txxx, OREAL, "recover", LNAME, Txxx, ORECOVER, "notwithstanding", LIGNORE, Txxx, OXXX, "thetruthofthematter", LIGNORE, Txxx, OXXX, "despiteallobjections", LIGNORE, Txxx, OXXX, "whereas", LIGNORE, Txxx, OXXX, "insofaras", LIGNORE, Txxx, OXXX, }; static void lexinit(void) { int i, lex; Sym *s, *s1; Type *t; int etype; /* * initialize basic types array * initialize known symbols */ for(i=0; ilexical = lex; etype = syms[i].etype; if(etype != Txxx) { if(etype < 0 || etype >= nelem(types)) fatal("lexinit: %s bad etype", s->name); t = types[etype]; if(t == T) { t = typ(etype); t->sym = s; if(etype != TANY && etype != TSTRING) dowidth(t); types[etype] = t; } s1 = pkglookup(syms[i].name, builtinpkg); s1->lexical = LNAME; s1->def = typenod(t); continue; } } // logically, the type of a string literal. // types[TSTRING] is the named type string // (the type of x in var x string or var x = "hello"). // this is the ideal form // (the type of x in const x = "hello"). idealstring = typ(TSTRING); idealbool = typ(TBOOL); s = pkglookup("true", builtinpkg); s->def = nodbool(1); s->def->sym = lookup("true"); s->def->type = idealbool; s = pkglookup("false", builtinpkg); s->def = nodbool(0); s->def->sym = lookup("false"); s->def->type = idealbool; s = lookup("_"); s->block = -100; s->def = nod(ONAME, N, N); s->def->sym = s; types[TBLANK] = typ(TBLANK); s->def->type = types[TBLANK]; nblank = s->def; } static void lexfini(void) { Sym *s; int lex, etype, i; Val v; for(i=0; ilexical = lex; etype = syms[i].etype; if(etype != Txxx && (etype != TANY || debug['A']) && s->def == N) s->def = typenod(types[etype]); etype = syms[i].op; if(etype != OXXX && s->def == N) { s->def = nod(ONAME, N, N); s->def->sym = s; s->def->etype = etype; s->def->builtin = 1; } } for(i=0; typedefs[i].name; i++) { s = lookup(typedefs[i].name); if(s->def == N) s->def = typenod(types[typedefs[i].etype]); } // there's only so much table-driven we can handle. // these are special cases. types[TNIL] = typ(TNIL); s = lookup("nil"); if(s->def == N) { v.ctype = CTNIL; s->def = nodlit(v); s->def->sym = s; } s = lookup("iota"); if(s->def == N) { s->def = nod(OIOTA, N, N); s->def->sym = s; } s = lookup("true"); if(s->def == N) { s->def = nodbool(1); s->def->sym = s; } s = lookup("false"); if(s->def == N) { s->def = nodbool(0); s->def->sym = s; } nodfp = nod(ONAME, N, N); nodfp->type = types[TINT32]; nodfp->xoffset = 0; nodfp->class = PPARAM; nodfp->sym = lookup(".fp"); } struct { int lex; char* name; } lexn[] = { LANDAND, "ANDAND", LASOP, "ASOP", LBREAK, "BREAK", LCASE, "CASE", LCHAN, "CHAN", LCOLAS, "COLAS", LCONST, "CONST", LCONTINUE, "CONTINUE", LDEC, "DEC", LDEFER, "DEFER", LELSE, "ELSE", LEQ, "EQ", LFALL, "FALL", LFOR, "FOR", LFUNC, "FUNC", LGE, "GE", LGO, "GO", LGOTO, "GOTO", LGT, "GT", LIF, "IF", LIMPORT, "IMPORT", LINC, "INC", LINTERFACE, "INTERFACE", LLE, "LE", LLITERAL, "LITERAL", LLSH, "LSH", LLT, "LT", LMAP, "MAP", LNAME, "NAME", LNE, "NE", LOROR, "OROR", LPACKAGE, "PACKAGE", LRANGE, "RANGE", LRETURN, "RETURN", LRSH, "RSH", LSTRUCT, "STRUCT", LSWITCH, "SWITCH", LTYPE, "TYPE", LVAR, "VAR", }; char* lexname(int lex) { int i; static char buf[100]; for(i=0; i=", "LGT", ">", "LLE", "<=", "LLT", "<", "LLSH", "<<", "LRSH", ">>", "LOROR", "||", "LNE", "!=", // spell out to avoid confusion with punctuation in error messages "';'", "semicolon or newline", "','", "comma", }; static void yytinit(void) { int i, j; extern char *yytname[]; char *s, *t; for(i=0; yytname[i] != nil; i++) { s = yytname[i]; if(strcmp(s, "LLITERAL") == 0) { strcpy(litbuf, "literal"); yytname[i] = litbuf; goto loop; } // apply yytfix if possible for(j=0; jname == nil) { if(strcmp(pkgname, "_") == 0) yyerror("invalid package name _"); localpkg->name = pkgname; } else { if(strcmp(pkgname, localpkg->name) != 0) yyerror("package %s; expected %s", pkgname, localpkg->name); for(h=0; hlink) { if(s->def == N || s->pkg != localpkg) continue; if(s->def->op == OPACK) { // throw away top-level package name leftover // from previous file. // leave s->block set to cause redeclaration // errors if a conflicting top-level name is // introduced by a different file. if(!s->def->used && !nsyntaxerrors) yyerrorl(s->def->lineno, "imported and not used: %Z", s->def->pkg->path); s->def = N; continue; } if(s->def->sym != s) { // throw away top-level name left over // from previous import . "x" if(s->def->pack != N && !s->def->pack->used && !nsyntaxerrors) { yyerrorl(s->def->pack->lineno, "imported and not used: %Z", s->def->pack->pkg->path); s->def->pack->used = 1; } s->def = N; continue; } } } } if(outfile == nil) { p = strrchr(infile, '/'); if(p == nil) p = infile; else p = p+1; snprint(namebuf, sizeof(namebuf), "%s", p); p = strrchr(namebuf, '.'); if(p != nil) *p = 0; outfile = smprint("%s.%c", namebuf, thechar); } }