diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index 9dd5a4091c..d6505c6913 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -29,6 +29,8 @@ type parser struct { head, form *Node // Other parsing state flags (section 11.2.3.5). scripting, framesetOK bool + // im is the current insertion mode. + im insertionMode // originalIM is the insertion mode to go back to after completing a text // or inTableText insertion mode. originalIM insertionMode @@ -265,37 +267,22 @@ func (p *parser) acknowledgeSelfClosingTag() { // An insertion mode (section 11.2.3.1) is the state transition function from // a particular state in the HTML5 parser's state machine. It updates the -// parser's fields depending on parser.token (where ErrorToken means EOF). In -// addition to returning the next insertionMode state, it also returns whether -// the token was consumed. -type insertionMode func(*parser) (insertionMode, bool) - -// useTheRulesFor runs the delegate insertionMode over p, returning the actual -// insertionMode unless the delegate caused a state transition. -// Section 11.2.3.1, "using the rules for". -func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, bool) { - im, consumed := delegate(p) - if p.originalIM == delegate { - p.originalIM = actual - } - if im != delegate { - return im, consumed - } - return actual, consumed -} +// parser's fields depending on parser.tok (where ErrorToken means EOF). +// It returns whether the token was consumed. +type insertionMode func(*parser) bool // setOriginalIM sets the insertion mode to return to after completing a text or // inTableText insertion mode. // Section 11.2.3.1, "using the rules for". -func (p *parser) setOriginalIM(im insertionMode) { +func (p *parser) setOriginalIM() { if p.originalIM != nil { panic("html: bad parser state: originalIM was set twice") } - p.originalIM = im + p.originalIM = p.im } // Section 11.2.3.1, "reset the insertion mode". -func (p *parser) resetInsertionMode() insertionMode { +func (p *parser) resetInsertionMode() { for i := len(p.oe) - 1; i >= 0; i-- { n := p.oe[i] if i == 0 { @@ -303,60 +290,66 @@ func (p *parser) resetInsertionMode() insertionMode { } switch n.Data { case "select": - return inSelectIM + p.im = inSelectIM case "td", "th": - return inCellIM + p.im = inCellIM case "tr": - return inRowIM + p.im = inRowIM case "tbody", "thead", "tfoot": - return inTableBodyIM + p.im = inTableBodyIM case "caption": - // TODO: return inCaptionIM + // TODO: p.im = inCaptionIM case "colgroup": - return inColumnGroupIM + p.im = inColumnGroupIM case "table": - return inTableIM + p.im = inTableIM case "head": - return inBodyIM + p.im = inBodyIM case "body": - return inBodyIM + p.im = inBodyIM case "frameset": - return inFramesetIM + p.im = inFramesetIM case "html": - return beforeHeadIM + p.im = beforeHeadIM + default: + continue } + return } - return inBodyIM + p.im = inBodyIM } // Section 11.2.5.4.1. -func initialIM(p *parser) (insertionMode, bool) { +func initialIM(p *parser) bool { switch p.tok.Type { case CommentToken: p.doc.Add(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return initialIM, true + return true case DoctypeToken: p.doc.Add(&Node{ Type: DoctypeNode, Data: p.tok.Data, }) - return beforeHTMLIM, true + p.im = beforeHTMLIM + return true } // TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper, // and so switching on "quirks mode" might belong in a different package. - return beforeHTMLIM, false + p.im = beforeHTMLIM + return false } // Section 11.2.5.4.2. -func beforeHTMLIM(p *parser) (insertionMode, bool) { +func beforeHTMLIM(p *parser) bool { switch p.tok.Type { case StartTagToken: if p.tok.Data == "html" { p.addElement(p.tok.Data, p.tok.Attr) - return beforeHeadIM, true + p.im = beforeHeadIM + return true } case EndTagToken: switch p.tok.Data { @@ -364,22 +357,23 @@ func beforeHTMLIM(p *parser) (insertionMode, bool) { // Drop down to creating an implied tag. default: // Ignore the token. - return beforeHTMLIM, true + return true } case CommentToken: p.doc.Add(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return beforeHTMLIM, true + return true } // Create an implied tag. p.addElement("html", nil) - return beforeHeadIM, false + p.im = beforeHeadIM + return false } // Section 11.2.5.4.3. -func beforeHeadIM(p *parser) (insertionMode, bool) { +func beforeHeadIM(p *parser) bool { var ( add bool attr []Attribute @@ -397,7 +391,7 @@ func beforeHeadIM(p *parser) (insertionMode, bool) { add = true attr = p.tok.Attr case "html": - return useTheRulesFor(p, beforeHeadIM, inBodyIM) + return inBodyIM(p) default: implied = true } @@ -413,19 +407,20 @@ func beforeHeadIM(p *parser) (insertionMode, bool) { Type: CommentNode, Data: p.tok.Data, }) - return beforeHeadIM, true + return true } if add || implied { p.addElement("head", attr) p.head = p.top() } - return inHeadIM, !implied + p.im = inHeadIM + return !implied } const whitespace = " \t\r\n\f" // Section 11.2.5.4.4. -func inHeadIM(p *parser) (insertionMode, bool) { +func inHeadIM(p *parser) bool { var ( pop bool implied bool @@ -439,7 +434,7 @@ func inHeadIM(p *parser) (insertionMode, bool) { // Add the initial whitespace to the current node. p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) if s == "" { - return inHeadIM, true + return true } p.tok.Data = s } @@ -452,8 +447,9 @@ func inHeadIM(p *parser) (insertionMode, bool) { p.acknowledgeSelfClosingTag() case "script", "title", "noscript", "noframes", "style": p.addElement(p.tok.Data, p.tok.Attr) - p.setOriginalIM(inHeadIM) - return textIM, true + p.setOriginalIM() + p.im = textIM + return true default: implied = true } @@ -465,27 +461,28 @@ func inHeadIM(p *parser) (insertionMode, bool) { implied = true default: // Ignore the token. - return inHeadIM, true + return true } case CommentToken: p.addChild(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return inHeadIM, true + return true } if pop || implied { n := p.oe.pop() if n.Data != "head" { panic("html: bad parser state: element not found, in the in-head insertion mode") } - return afterHeadIM, !implied + p.im = afterHeadIM + return !implied } - return inHeadIM, true + return true } // Section 11.2.5.4.6. -func afterHeadIM(p *parser) (insertionMode, bool) { +func afterHeadIM(p *parser) bool { var ( add bool attr []Attribute @@ -506,11 +503,12 @@ func afterHeadIM(p *parser) (insertionMode, bool) { framesetOK = false case "frameset": p.addElement(p.tok.Data, p.tok.Attr) - return inFramesetIM, true + p.im = inFramesetIM + return true case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title": p.oe = append(p.oe, p.head) defer p.oe.pop() - return useTheRulesFor(p, afterHeadIM, inHeadIM) + return inHeadIM(p) case "head": // TODO. default: @@ -524,20 +522,21 @@ func afterHeadIM(p *parser) (insertionMode, bool) { framesetOK = true default: // Ignore the token. - return afterHeadIM, true + return true } case CommentToken: p.addChild(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return afterHeadIM, true + return true } if add || implied { p.addElement("body", attr) p.framesetOK = framesetOK } - return inBodyIM, !implied + p.im = inBodyIM + return !implied } // copyAttributes copies attributes of src not found on dst to dst. @@ -558,7 +557,7 @@ func copyAttributes(dst *Node, src Token) { } // Section 11.2.5.4.7. -func inBodyIM(p *parser) (insertionMode, bool) { +func inBodyIM(p *parser) bool { switch p.tok.Type { case TextToken: p.reconstructActiveFormattingElements() @@ -605,7 +604,8 @@ func inBodyIM(p *parser) (insertionMode, bool) { p.popUntil(buttonScopeStopTags, "p") // TODO: skip this step in quirks mode. p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false - return inTableIM, true + p.im = inTableIM + return true case "hr": p.popUntil(buttonScopeStopTags, "p") p.addElement(p.tok.Data, p.tok.Attr) @@ -617,7 +617,8 @@ func inBodyIM(p *parser) (insertionMode, bool) { p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false // TODO: detect