#!/usr/bin/awk -f # Copyright 2010 The Go Authors. All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. # This program implements the core idea from # # Clinton L. Jeffery, Generating LR syntax error messages from examples, # ACM TOPLAS 25(5) (September 2003). http://doi.acm.org/10.1145/937563.937566 # # It reads Bison's summary of a grammar followed by a file # like go.errors, replacing lines beginning with % by the # yystate and yychar that will be active when an error happens # while parsing that line. # # Unlike the system described in the paper, the lines in go.errors # give grammar symbol name lists, not actual program fragments. # This is a little less programmer-friendly but doesn't require being # able to run the text through lex.c. BEGIN{ bison = 1 grammar = 0 states = 0 } # In Grammar section of y.output, # record lhs and length of rhs for each rule. bison && /^Grammar/ { grammar = 1 } bison && /^(Terminals|state 0)/ { grammar = 0 } grammar && NF>0 { if($2 != "|") { r = $2 sub(/:$/, "", r) } rulelhs[$1] = r rulesize[$1] = NF-2 if(rulesize[$1] == 3 && $3 $4 $5 == "/*empty*/") { rulesize[$1] = 0 } } # In state dumps, record shift/reduce actions. bison && /^state 0/ { grammar = 0; states = 1 } states && /^state / { state = $2 } states { statetext[state] = statetext[state] $0 "\n" } states && / shift/ { n = nshift[state]++ if($0 ~ /and go to/) shift[state,n] = $7 # GNU Bison else shift[state,n] = $3 # Plan 9 Yacc shifttoken[state,n] = $1 next } states && / (go to|goto)/ { n = nshift[state]++ if($0 ~ /go to/) shift[state,n] = $5 # GNU Bison else shift[state,n] = $3 # Plan 9 Yacc shifttoken[state,n] = $1 next } states && / reduce/ { n = nreduce[state]++ if($0 ~ /reduce using rule/) reduce[state,n] = $5 # GNU Bison else reduce[state,n] = $3 # Plan 9 yacc reducetoken[state,n] = $1 next } # Skip over the summary information printed by Plan 9 yacc. /nonterminals$/,/^maximum spread/ { next } # First // comment marks the beginning of the pattern file. /^\/\// { bison = 0; grammar = 0; state = 0 } bison { next } # Treat % as first field on line as introducing a pattern (token sequence). # Run it through the LR machine and print the induced "yystate, yychar," # at the point where the error happens. $1 == "%" { nstack = 0 state = 0 f = 2 tok = "" for(;;) { if(tok == "" && f <= NF) { tok = $f f++ } found = 0 for(j=0; j " shift[state,j] stack[nstack++] = state state = shift[state,j] found = 1 tok = "" break } } if(found) continue for(j=0; j