1
0
mirror of https://github.com/golang/go synced 2024-11-20 03:24:41 -07:00

html: store the current insertion mode in the parser

Currently, the state transition functions in the HTML parser
return the next insertion mode and whether the token is consumed.
This works well except for when one insertion mode needs to use
the rules for another insertion mode. Then the useTheRulesFor
function needs to patch things up. This requires comparing functions
for equality, which is going to stop working.

Adding a field to the parser structure to store the current
insertion mode eliminates the need for useTheRulesFor;
one insertion mode function can now just call the other
directly. The insertion mode will be changed only if it needs to be.

This CL is an alternative to CL 5372078.

R=nigeltao, rsc
CC=golang-dev
https://golang.org/cl/5372079
This commit is contained in:
Andrew Balholm 2011-11-13 12:39:41 +11:00 committed by Nigel Tao
parent 13dc53055f
commit 631a575fd9

View File

@ -29,6 +29,8 @@ type parser struct {
head, form *Node head, form *Node
// Other parsing state flags (section 11.2.3.5). // Other parsing state flags (section 11.2.3.5).
scripting, framesetOK bool scripting, framesetOK bool
// im is the current insertion mode.
im insertionMode
// originalIM is the insertion mode to go back to after completing a text // originalIM is the insertion mode to go back to after completing a text
// or inTableText insertion mode. // or inTableText insertion mode.
originalIM insertionMode originalIM insertionMode
@ -265,37 +267,22 @@ func (p *parser) acknowledgeSelfClosingTag() {
// An insertion mode (section 11.2.3.1) is the state transition function from // An insertion mode (section 11.2.3.1) is the state transition function from
// a particular state in the HTML5 parser's state machine. It updates the // a particular state in the HTML5 parser's state machine. It updates the
// parser's fields depending on parser.token (where ErrorToken means EOF). In // parser's fields depending on parser.tok (where ErrorToken means EOF).
// addition to returning the next insertionMode state, it also returns whether // It returns whether the token was consumed.
// the token was consumed. type insertionMode func(*parser) bool
type insertionMode func(*parser) (insertionMode, bool)
// useTheRulesFor runs the delegate insertionMode over p, returning the actual
// insertionMode unless the delegate caused a state transition.
// Section 11.2.3.1, "using the rules for".
func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, bool) {
im, consumed := delegate(p)
if p.originalIM == delegate {
p.originalIM = actual
}
if im != delegate {
return im, consumed
}
return actual, consumed
}
// setOriginalIM sets the insertion mode to return to after completing a text or // setOriginalIM sets the insertion mode to return to after completing a text or
// inTableText insertion mode. // inTableText insertion mode.
// Section 11.2.3.1, "using the rules for". // Section 11.2.3.1, "using the rules for".
func (p *parser) setOriginalIM(im insertionMode) { func (p *parser) setOriginalIM() {
if p.originalIM != nil { if p.originalIM != nil {
panic("html: bad parser state: originalIM was set twice") panic("html: bad parser state: originalIM was set twice")
} }
p.originalIM = im p.originalIM = p.im
} }
// Section 11.2.3.1, "reset the insertion mode". // Section 11.2.3.1, "reset the insertion mode".
func (p *parser) resetInsertionMode() insertionMode { func (p *parser) resetInsertionMode() {
for i := len(p.oe) - 1; i >= 0; i-- { for i := len(p.oe) - 1; i >= 0; i-- {
n := p.oe[i] n := p.oe[i]
if i == 0 { if i == 0 {
@ -303,60 +290,66 @@ func (p *parser) resetInsertionMode() insertionMode {
} }
switch n.Data { switch n.Data {
case "select": case "select":
return inSelectIM p.im = inSelectIM
case "td", "th": case "td", "th":
return inCellIM p.im = inCellIM
case "tr": case "tr":
return inRowIM p.im = inRowIM
case "tbody", "thead", "tfoot": case "tbody", "thead", "tfoot":
return inTableBodyIM p.im = inTableBodyIM
case "caption": case "caption":
// TODO: return inCaptionIM // TODO: p.im = inCaptionIM
case "colgroup": case "colgroup":
return inColumnGroupIM p.im = inColumnGroupIM
case "table": case "table":
return inTableIM p.im = inTableIM
case "head": case "head":
return inBodyIM p.im = inBodyIM
case "body": case "body":
return inBodyIM p.im = inBodyIM
case "frameset": case "frameset":
return inFramesetIM p.im = inFramesetIM
case "html": case "html":
return beforeHeadIM p.im = beforeHeadIM
default:
continue
} }
return
} }
return inBodyIM p.im = inBodyIM
} }
// Section 11.2.5.4.1. // Section 11.2.5.4.1.
func initialIM(p *parser) (insertionMode, bool) { func initialIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case CommentToken: case CommentToken:
p.doc.Add(&Node{ p.doc.Add(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return initialIM, true return true
case DoctypeToken: case DoctypeToken:
p.doc.Add(&Node{ p.doc.Add(&Node{
Type: DoctypeNode, Type: DoctypeNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return beforeHTMLIM, true p.im = beforeHTMLIM
return true
} }
// TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper, // TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper,
// and so switching on "quirks mode" might belong in a different package. // and so switching on "quirks mode" might belong in a different package.
return beforeHTMLIM, false p.im = beforeHTMLIM
return false
} }
// Section 11.2.5.4.2. // Section 11.2.5.4.2.
func beforeHTMLIM(p *parser) (insertionMode, bool) { func beforeHTMLIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case StartTagToken: case StartTagToken:
if p.tok.Data == "html" { if p.tok.Data == "html" {
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
return beforeHeadIM, true p.im = beforeHeadIM
return true
} }
case EndTagToken: case EndTagToken:
switch p.tok.Data { switch p.tok.Data {
@ -364,22 +357,23 @@ func beforeHTMLIM(p *parser) (insertionMode, bool) {
// Drop down to creating an implied <html> tag. // Drop down to creating an implied <html> tag.
default: default:
// Ignore the token. // Ignore the token.
return beforeHTMLIM, true return true
} }
case CommentToken: case CommentToken:
p.doc.Add(&Node{ p.doc.Add(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return beforeHTMLIM, true return true
} }
// Create an implied <html> tag. // Create an implied <html> tag.
p.addElement("html", nil) p.addElement("html", nil)
return beforeHeadIM, false p.im = beforeHeadIM
return false
} }
// Section 11.2.5.4.3. // Section 11.2.5.4.3.
func beforeHeadIM(p *parser) (insertionMode, bool) { func beforeHeadIM(p *parser) bool {
var ( var (
add bool add bool
attr []Attribute attr []Attribute
@ -397,7 +391,7 @@ func beforeHeadIM(p *parser) (insertionMode, bool) {
add = true add = true
attr = p.tok.Attr attr = p.tok.Attr
case "html": case "html":
return useTheRulesFor(p, beforeHeadIM, inBodyIM) return inBodyIM(p)
default: default:
implied = true implied = true
} }
@ -413,19 +407,20 @@ func beforeHeadIM(p *parser) (insertionMode, bool) {
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return beforeHeadIM, true return true
} }
if add || implied { if add || implied {
p.addElement("head", attr) p.addElement("head", attr)
p.head = p.top() p.head = p.top()
} }
return inHeadIM, !implied p.im = inHeadIM
return !implied
} }
const whitespace = " \t\r\n\f" const whitespace = " \t\r\n\f"
// Section 11.2.5.4.4. // Section 11.2.5.4.4.
func inHeadIM(p *parser) (insertionMode, bool) { func inHeadIM(p *parser) bool {
var ( var (
pop bool pop bool
implied bool implied bool
@ -439,7 +434,7 @@ func inHeadIM(p *parser) (insertionMode, bool) {
// Add the initial whitespace to the current node. // Add the initial whitespace to the current node.
p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
if s == "" { if s == "" {
return inHeadIM, true return true
} }
p.tok.Data = s p.tok.Data = s
} }
@ -452,8 +447,9 @@ func inHeadIM(p *parser) (insertionMode, bool) {
p.acknowledgeSelfClosingTag() p.acknowledgeSelfClosingTag()
case "script", "title", "noscript", "noframes", "style": case "script", "title", "noscript", "noframes", "style":
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
p.setOriginalIM(inHeadIM) p.setOriginalIM()
return textIM, true p.im = textIM
return true
default: default:
implied = true implied = true
} }
@ -465,27 +461,28 @@ func inHeadIM(p *parser) (insertionMode, bool) {
implied = true implied = true
default: default:
// Ignore the token. // Ignore the token.
return inHeadIM, true return true
} }
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return inHeadIM, true return true
} }
if pop || implied { if pop || implied {
n := p.oe.pop() n := p.oe.pop()
if n.Data != "head" { if n.Data != "head" {
panic("html: bad parser state: <head> element not found, in the in-head insertion mode") panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
} }
return afterHeadIM, !implied p.im = afterHeadIM
return !implied
} }
return inHeadIM, true return true
} }
// Section 11.2.5.4.6. // Section 11.2.5.4.6.
func afterHeadIM(p *parser) (insertionMode, bool) { func afterHeadIM(p *parser) bool {
var ( var (
add bool add bool
attr []Attribute attr []Attribute
@ -506,11 +503,12 @@ func afterHeadIM(p *parser) (insertionMode, bool) {
framesetOK = false framesetOK = false
case "frameset": case "frameset":
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
return inFramesetIM, true p.im = inFramesetIM
return true
case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title": case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
p.oe = append(p.oe, p.head) p.oe = append(p.oe, p.head)
defer p.oe.pop() defer p.oe.pop()
return useTheRulesFor(p, afterHeadIM, inHeadIM) return inHeadIM(p)
case "head": case "head":
// TODO. // TODO.
default: default:
@ -524,20 +522,21 @@ func afterHeadIM(p *parser) (insertionMode, bool) {
framesetOK = true framesetOK = true
default: default:
// Ignore the token. // Ignore the token.
return afterHeadIM, true return true
} }
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return afterHeadIM, true return true
} }
if add || implied { if add || implied {
p.addElement("body", attr) p.addElement("body", attr)
p.framesetOK = framesetOK p.framesetOK = framesetOK
} }
return inBodyIM, !implied p.im = inBodyIM
return !implied
} }
// copyAttributes copies attributes of src not found on dst to dst. // copyAttributes copies attributes of src not found on dst to dst.
@ -558,7 +557,7 @@ func copyAttributes(dst *Node, src Token) {
} }
// Section 11.2.5.4.7. // Section 11.2.5.4.7.
func inBodyIM(p *parser) (insertionMode, bool) { func inBodyIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case TextToken: case TextToken:
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
@ -605,7 +604,8 @@ func inBodyIM(p *parser) (insertionMode, bool) {
p.popUntil(buttonScopeStopTags, "p") // TODO: skip this step in quirks mode. p.popUntil(buttonScopeStopTags, "p") // TODO: skip this step in quirks mode.
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
p.framesetOK = false p.framesetOK = false
return inTableIM, true p.im = inTableIM
return true
case "hr": case "hr":
p.popUntil(buttonScopeStopTags, "p") p.popUntil(buttonScopeStopTags, "p")
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
@ -617,7 +617,8 @@ func inBodyIM(p *parser) (insertionMode, bool) {
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
p.framesetOK = false p.framesetOK = false
// TODO: detect <select> inside a table. // TODO: detect <select> inside a table.
return inSelectIM, true p.im = inSelectIM
return true
case "li": case "li":
p.framesetOK = false p.framesetOK = false
for i := len(p.oe) - 1; i >= 0; i-- { for i := len(p.oe) - 1; i >= 0; i-- {
@ -651,10 +652,10 @@ func inBodyIM(p *parser) (insertionMode, bool) {
} }
} }
case "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title": case "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title":
return useTheRulesFor(p, inBodyIM, inHeadIM) return inHeadIM(p)
case "image": case "image":
p.tok.Data = "img" p.tok.Data = "img"
return inBodyIM, false return false
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr": case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token. // Ignore the token.
default: default:
@ -665,7 +666,8 @@ func inBodyIM(p *parser) (insertionMode, bool) {
switch p.tok.Data { switch p.tok.Data {
case "body": case "body":
// TODO: autoclose the stack of open elements. // TODO: autoclose the stack of open elements.
return afterBodyIM, true p.im = afterBodyIM
return true
case "p": case "p":
if !p.elementInScope(buttonScopeStopTags, "p") { if !p.elementInScope(buttonScopeStopTags, "p") {
p.addElement("p", nil) p.addElement("p", nil)
@ -681,7 +683,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
} }
case "br": case "br":
p.tok.Type = StartTagToken p.tok.Type = StartTagToken
return inBodyIM, false return false
default: default:
p.inBodyEndTagOther(p.tok.Data) p.inBodyEndTagOther(p.tok.Data)
} }
@ -692,7 +694,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
}) })
} }
return inBodyIM, true return true
} }
func (p *parser) inBodyEndTagFormatting(tag string) { func (p *parser) inBodyEndTagFormatting(tag string) {
@ -833,27 +835,27 @@ func (p *parser) inBodyEndTagOther(tag string) {
} }
// Section 11.2.5.4.8. // Section 11.2.5.4.8.
func textIM(p *parser) (insertionMode, bool) { func textIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
p.oe.pop() p.oe.pop()
case TextToken: case TextToken:
p.addText(p.tok.Data) p.addText(p.tok.Data)
return textIM, true return true
case EndTagToken: case EndTagToken:
p.oe.pop() p.oe.pop()
} }
o := p.originalIM p.im = p.originalIM
p.originalIM = nil p.originalIM = nil
return o, p.tok.Type == EndTagToken return p.tok.Type == EndTagToken
} }
// Section 11.2.5.4.9. // Section 11.2.5.4.9.
func inTableIM(p *parser) (insertionMode, bool) { func inTableIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
// Stop parsing. // Stop parsing.
return nil, true return true
case TextToken: case TextToken:
// TODO. // TODO.
case StartTagToken: case StartTagToken:
@ -861,25 +863,30 @@ func inTableIM(p *parser) (insertionMode, bool) {
case "tbody", "tfoot", "thead": case "tbody", "tfoot", "thead":
p.clearStackToContext(tableScopeStopTags) p.clearStackToContext(tableScopeStopTags)
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
return inTableBodyIM, true p.im = inTableBodyIM
return true
case "td", "th", "tr": case "td", "th", "tr":
p.clearStackToContext(tableScopeStopTags) p.clearStackToContext(tableScopeStopTags)
p.addElement("tbody", nil) p.addElement("tbody", nil)
return inTableBodyIM, false p.im = inTableBodyIM
return false
case "table": case "table":
if p.popUntil(tableScopeStopTags, "table") { if p.popUntil(tableScopeStopTags, "table") {
return p.resetInsertionMode(), false p.resetInsertionMode()
return false
} }
// Ignore the token. // Ignore the token.
return inTableIM, true return true
case "colgroup": case "colgroup":
p.clearStackToContext(tableScopeStopTags) p.clearStackToContext(tableScopeStopTags)
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
return inColumnGroupIM, true p.im = inColumnGroupIM
return true
case "col": case "col":
p.clearStackToContext(tableScopeStopTags) p.clearStackToContext(tableScopeStopTags)
p.addElement("colgroup", p.tok.Attr) p.addElement("colgroup", p.tok.Attr)
return inColumnGroupIM, false p.im = inColumnGroupIM
return false
default: default:
// TODO. // TODO.
} }
@ -887,20 +894,21 @@ func inTableIM(p *parser) (insertionMode, bool) {
switch p.tok.Data { switch p.tok.Data {
case "table": case "table":
if p.popUntil(tableScopeStopTags, "table") { if p.popUntil(tableScopeStopTags, "table") {
return p.resetInsertionMode(), true p.resetInsertionMode()
return true
} }
// Ignore the token. // Ignore the token.
return inTableIM, true return true
case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr": case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token. // Ignore the token.
return inTableIM, true return true
} }
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return inTableIM, true return true
} }
switch p.top().Data { switch p.top().Data {
@ -909,7 +917,7 @@ func inTableIM(p *parser) (insertionMode, bool) {
defer func() { p.fosterParenting = false }() defer func() { p.fosterParenting = false }()
} }
return useTheRulesFor(p, inTableIM, inBodyIM) return inBodyIM(p)
} }
// clearStackToContext pops elements off the stack of open elements // clearStackToContext pops elements off the stack of open elements
@ -926,26 +934,26 @@ func (p *parser) clearStackToContext(stopTags []string) {
} }
// Section 11.2.5.4.12. // Section 11.2.5.4.12.
func inColumnGroupIM(p *parser) (insertionMode, bool) { func inColumnGroupIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return inColumnGroupIM, true return true
case DoctypeToken: case DoctypeToken:
// Ignore the token. // Ignore the token.
return inColumnGroupIM, true return true
case StartTagToken: case StartTagToken:
switch p.tok.Data { switch p.tok.Data {
case "html": case "html":
return useTheRulesFor(p, inColumnGroupIM, inBodyIM) return inBodyIM(p)
case "col": case "col":
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
p.oe.pop() p.oe.pop()
p.acknowledgeSelfClosingTag() p.acknowledgeSelfClosingTag()
return inColumnGroupIM, true return true
} }
case EndTagToken: case EndTagToken:
switch p.tok.Data { switch p.tok.Data {
@ -953,20 +961,22 @@ func inColumnGroupIM(p *parser) (insertionMode, bool) {
if p.oe.top().Data != "html" { if p.oe.top().Data != "html" {
p.oe.pop() p.oe.pop()
} }
return inTableIM, true p.im = inTableIM
return true
case "col": case "col":
// Ignore the token. // Ignore the token.
return inColumnGroupIM, true return true
} }
} }
if p.oe.top().Data != "html" { if p.oe.top().Data != "html" {
p.oe.pop() p.oe.pop()
} }
return inTableIM, false p.im = inTableIM
return false
} }
// Section 11.2.5.4.13. // Section 11.2.5.4.13.
func inTableBodyIM(p *parser) (insertionMode, bool) { func inTableBodyIM(p *parser) bool {
var ( var (
add bool add bool
data string data string
@ -996,31 +1006,33 @@ func inTableBodyIM(p *parser) (insertionMode, bool) {
switch p.tok.Data { switch p.tok.Data {
case "table": case "table":
if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") { if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
return inTableIM, false p.im = inTableIM
return false
} }
// Ignore the token. // Ignore the token.
return inTableBodyIM, true return true
case "body", "caption", "col", "colgroup", "html", "td", "th", "tr": case "body", "caption", "col", "colgroup", "html", "td", "th", "tr":
// Ignore the token. // Ignore the token.
return inTableBodyIM, true return true
} }
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return inTableBodyIM, true return true
} }
if add { if add {
// TODO: clear the stack back to a table body context. // TODO: clear the stack back to a table body context.
p.addElement(data, attr) p.addElement(data, attr)
return inRowIM, consumed p.im = inRowIM
return consumed
} }
return useTheRulesFor(p, inTableBodyIM, inTableIM) return inTableIM(p)
} }
// Section 11.2.5.4.14. // Section 11.2.5.4.14.
func inRowIM(p *parser) (insertionMode, bool) { func inRowIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
// TODO. // TODO.
@ -1032,13 +1044,15 @@ func inRowIM(p *parser) (insertionMode, bool) {
p.clearStackToContext(tableRowContextStopTags) p.clearStackToContext(tableRowContextStopTags)
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
p.afe = append(p.afe, &scopeMarker) p.afe = append(p.afe, &scopeMarker)
return inCellIM, true p.im = inCellIM
return true
case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr": case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr":
if p.popUntil(tableScopeStopTags, "tr") { if p.popUntil(tableScopeStopTags, "tr") {
return inTableBodyIM, false p.im = inTableBodyIM
return false
} }
// Ignore the token. // Ignore the token.
return inRowIM, true return true
default: default:
// TODO. // TODO.
} }
@ -1046,21 +1060,23 @@ func inRowIM(p *parser) (insertionMode, bool) {
switch p.tok.Data { switch p.tok.Data {
case "tr": case "tr":
if p.popUntil(tableScopeStopTags, "tr") { if p.popUntil(tableScopeStopTags, "tr") {
return inTableBodyIM, true p.im = inTableBodyIM
return true
} }
// Ignore the token. // Ignore the token.
return inRowIM, true return true
case "table": case "table":
if p.popUntil(tableScopeStopTags, "tr") { if p.popUntil(tableScopeStopTags, "tr") {
return inTableBodyIM, false p.im = inTableBodyIM
return false
} }
// Ignore the token. // Ignore the token.
return inRowIM, true return true
case "tbody", "tfoot", "thead": case "tbody", "tfoot", "thead":
// TODO. // TODO.
case "body", "caption", "col", "colgroup", "html", "td", "th": case "body", "caption", "col", "colgroup", "html", "td", "th":
// Ignore the token. // Ignore the token.
return inRowIM, true return true
default: default:
// TODO. // TODO.
} }
@ -1069,13 +1085,13 @@ func inRowIM(p *parser) (insertionMode, bool) {
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return inRowIM, true return true
} }
return useTheRulesFor(p, inRowIM, inTableIM) return inTableIM(p)
} }
// Section 11.2.5.4.15. // Section 11.2.5.4.15.
func inCellIM(p *parser) (insertionMode, bool) { func inCellIM(p *parser) bool {
var ( var (
closeTheCellAndReprocess bool closeTheCellAndReprocess bool
) )
@ -1091,10 +1107,11 @@ func inCellIM(p *parser) (insertionMode, bool) {
case "td", "th": case "td", "th":
if !p.popUntil(tableScopeStopTags, p.tok.Data) { if !p.popUntil(tableScopeStopTags, p.tok.Data) {
// Ignore the token. // Ignore the token.
return inCellIM, true return true
} }
p.clearActiveFormattingElements() p.clearActiveFormattingElements()
return inRowIM, true p.im = inRowIM
return true
case "body", "caption", "col", "colgroup", "html": case "body", "caption", "col", "colgroup", "html":
// TODO. // TODO.
case "table", "tbody", "tfoot", "thead", "tr": case "table", "tbody", "tfoot", "thead", "tr":
@ -1106,19 +1123,20 @@ func inCellIM(p *parser) (insertionMode, bool) {
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return inCellIM, true return true
} }
if closeTheCellAndReprocess { if closeTheCellAndReprocess {
if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") { if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") {
p.clearActiveFormattingElements() p.clearActiveFormattingElements()
return inRowIM, false p.im = inRowIM
return false
} }
} }
return useTheRulesFor(p, inCellIM, inBodyIM) return inBodyIM(p)
} }
// Section 11.2.5.4.16. // Section 11.2.5.4.16.
func inSelectIM(p *parser) (insertionMode, bool) { func inSelectIM(p *parser) bool {
endSelect := false endSelect := false
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
@ -1167,31 +1185,33 @@ func inSelectIM(p *parser) (insertionMode, bool) {
switch p.oe[i].Data { switch p.oe[i].Data {
case "select": case "select":
p.oe = p.oe[:i] p.oe = p.oe[:i]
return p.resetInsertionMode(), true p.resetInsertionMode()
return true
case "option", "optgroup": case "option", "optgroup":
continue continue
default: default:
// Ignore the token. // Ignore the token.
return inSelectIM, true return true
} }
} }
} }
return inSelectIM, true return true
} }
// Section 11.2.5.4.18. // Section 11.2.5.4.18.
func afterBodyIM(p *parser) (insertionMode, bool) { func afterBodyIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
// Stop parsing. // Stop parsing.
return nil, true return true
case StartTagToken: case StartTagToken:
if p.tok.Data == "html" { if p.tok.Data == "html" {
return useTheRulesFor(p, afterBodyIM, inBodyIM) return inBodyIM(p)
} }
case EndTagToken: case EndTagToken:
if p.tok.Data == "html" { if p.tok.Data == "html" {
return afterAfterBodyIM, true p.im = afterAfterBodyIM
return true
} }
case CommentToken: case CommentToken:
// The comment is attached to the <html> element. // The comment is attached to the <html> element.
@ -1202,13 +1222,14 @@ func afterBodyIM(p *parser) (insertionMode, bool) {
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return afterBodyIM, true return true
} }
return inBodyIM, false p.im = inBodyIM
return false
} }
// Section 11.2.5.4.19. // Section 11.2.5.4.19.
func inFramesetIM(p *parser) (insertionMode, bool) { func inFramesetIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
@ -1218,7 +1239,7 @@ func inFramesetIM(p *parser) (insertionMode, bool) {
case StartTagToken: case StartTagToken:
switch p.tok.Data { switch p.tok.Data {
case "html": case "html":
return useTheRulesFor(p, inFramesetIM, inBodyIM) return inBodyIM(p)
case "frameset": case "frameset":
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
case "frame": case "frame":
@ -1226,7 +1247,7 @@ func inFramesetIM(p *parser) (insertionMode, bool) {
p.oe.pop() p.oe.pop()
p.acknowledgeSelfClosingTag() p.acknowledgeSelfClosingTag()
case "noframes": case "noframes":
return useTheRulesFor(p, inFramesetIM, inHeadIM) return inHeadIM(p)
} }
case EndTagToken: case EndTagToken:
switch p.tok.Data { switch p.tok.Data {
@ -1234,18 +1255,19 @@ func inFramesetIM(p *parser) (insertionMode, bool) {
if p.oe.top().Data != "html" { if p.oe.top().Data != "html" {
p.oe.pop() p.oe.pop()
if p.oe.top().Data != "frameset" { if p.oe.top().Data != "frameset" {
return afterFramesetIM, true p.im = afterFramesetIM
return true
} }
} }
} }
default: default:
// Ignore the token. // Ignore the token.
} }
return inFramesetIM, true return true
} }
// Section 11.2.5.4.20. // Section 11.2.5.4.20.
func afterFramesetIM(p *parser) (insertionMode, bool) { func afterFramesetIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
@ -1255,45 +1277,47 @@ func afterFramesetIM(p *parser) (insertionMode, bool) {
case StartTagToken: case StartTagToken:
switch p.tok.Data { switch p.tok.Data {
case "html": case "html":
return useTheRulesFor(p, inFramesetIM, inBodyIM) return inBodyIM(p)
case "noframes": case "noframes":
return useTheRulesFor(p, inFramesetIM, inHeadIM) return inHeadIM(p)
} }
case EndTagToken: case EndTagToken:
switch p.tok.Data { switch p.tok.Data {
case "html": case "html":
return afterAfterFramesetIM, true p.im = afterAfterFramesetIM
return true
} }
default: default:
// Ignore the token. // Ignore the token.
} }
return afterFramesetIM, true return true
} }
// Section 11.2.5.4.21. // Section 11.2.5.4.21.
func afterAfterBodyIM(p *parser) (insertionMode, bool) { func afterAfterBodyIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
// Stop parsing. // Stop parsing.
return nil, true return true
case TextToken: case TextToken:
// TODO. // TODO.
case StartTagToken: case StartTagToken:
if p.tok.Data == "html" { if p.tok.Data == "html" {
return useTheRulesFor(p, afterAfterBodyIM, inBodyIM) return inBodyIM(p)
} }
case CommentToken: case CommentToken:
p.doc.Add(&Node{ p.doc.Add(&Node{
Type: CommentNode, Type: CommentNode,
Data: p.tok.Data, Data: p.tok.Data,
}) })
return afterAfterBodyIM, true return true
} }
return inBodyIM, false p.im = inBodyIM
return false
} }
// Section 11.2.5.4.22. // Section 11.2.5.4.22.
func afterAfterFramesetIM(p *parser) (insertionMode, bool) { func afterAfterFramesetIM(p *parser) bool {
switch p.tok.Type { switch p.tok.Type {
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
@ -1303,14 +1327,14 @@ func afterAfterFramesetIM(p *parser) (insertionMode, bool) {
case StartTagToken: case StartTagToken:
switch p.tok.Data { switch p.tok.Data {
case "html": case "html":
return useTheRulesFor(p, afterAfterFramesetIM, inBodyIM) return inBodyIM(p)
case "noframes": case "noframes":
return useTheRulesFor(p, afterAfterFramesetIM, inHeadIM) return inHeadIM(p)
} }
default: default:
// Ignore the token. // Ignore the token.
} }
return afterAfterFramesetIM, true return true
} }
// Parse returns the parse tree for the HTML from the given Reader. // Parse returns the parse tree for the HTML from the given Reader.
@ -1323,9 +1347,10 @@ func Parse(r io.Reader) (*Node, error) {
}, },
scripting: true, scripting: true,
framesetOK: true, framesetOK: true,
im: initialIM,
} }
// Iterate until EOF. Any other error will cause an early return. // Iterate until EOF. Any other error will cause an early return.
im, consumed := initialIM, true consumed := true
for { for {
if consumed { if consumed {
if err := p.read(); err != nil { if err := p.read(); err != nil {
@ -1335,11 +1360,11 @@ func Parse(r io.Reader) (*Node, error) {
return nil, err return nil, err
} }
} }
im, consumed = im(p) consumed = p.im(p)
} }
// Loop until the final token (the ErrorToken signifying EOF) is consumed. // Loop until the final token (the ErrorToken signifying EOF) is consumed.
for { for {
if im, consumed = im(p); consumed { if consumed = p.im(p); consumed {
break break
} }
} }