1
0
mirror of https://github.com/golang/go synced 2024-10-03 05:21:22 -06:00

html: parse framesets

Pass tests1.dat, test 106:
<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>

| <html>
|   <head>
|   <frameset>
|     <frame>
|     <frameset>
|       <frame>
|     <noframes>

Also pass test 107:
<h1><table><td><h3></table><h3></h1>

R=nigeltao
CC=golang-dev
https://golang.org/cl/5373050
This commit is contained in:
Andrew Balholm 2011-11-10 23:56:13 +11:00 committed by Nigel Tao
parent be8025604e
commit e9e874b7fc
2 changed files with 89 additions and 4 deletions

View File

@ -321,7 +321,7 @@ func (p *parser) resetInsertionMode() insertionMode {
case "body": case "body":
return inBodyIM return inBodyIM
case "frameset": case "frameset":
// TODO: return inFramesetIM return inFramesetIM
case "html": case "html":
return beforeHeadIM return beforeHeadIM
} }
@ -517,7 +517,8 @@ func afterHeadIM(p *parser) (insertionMode, bool) {
attr = p.tok.Attr attr = p.tok.Attr
framesetOK = false framesetOK = false
case "frameset": case "frameset":
// TODO. p.addElement(p.tok.Data, p.tok.Attr)
return inFramesetIM, true
case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title": case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
p.oe = append(p.oe, p.head) p.oe = append(p.oe, p.head)
defer p.oe.pop() defer p.oe.pop()
@ -646,7 +647,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
break break
} }
p.popUntil(buttonScopeStopTags, "p") p.popUntil(buttonScopeStopTags, "p")
p.addElement("li", p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
case "optgroup", "option": case "optgroup", "option":
if p.top().Data == "option" { if p.top().Data == "option" {
p.oe.pop() p.oe.pop()
@ -1169,6 +1170,69 @@ func afterBodyIM(p *parser) (insertionMode, bool) {
return afterBodyIM, true return afterBodyIM, true
} }
// Section 11.2.5.4.19.
func inFramesetIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
switch p.tok.Data {
case "html":
return useTheRulesFor(p, inFramesetIM, inBodyIM)
case "frameset":
p.addElement(p.tok.Data, p.tok.Attr)
case "frame":
p.addElement(p.tok.Data, p.tok.Attr)
p.oe.pop()
p.acknowledgeSelfClosingTag()
case "noframes":
return useTheRulesFor(p, inFramesetIM, inHeadIM)
}
case EndTagToken:
switch p.tok.Data {
case "frameset":
if p.oe.top().Data != "html" {
p.oe.pop()
if p.oe.top().Data != "frameset" {
return afterFramesetIM, true
}
}
}
default:
// Ignore the token.
}
return inFramesetIM, true
}
// Section 11.2.5.4.20.
func afterFramesetIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
switch p.tok.Data {
case "html":
return useTheRulesFor(p, inFramesetIM, inBodyIM)
case "noframes":
return useTheRulesFor(p, inFramesetIM, inHeadIM)
}
case EndTagToken:
switch p.tok.Data {
case "html":
return afterAfterFramesetIM, true
}
default:
// Ignore the token.
}
return afterFramesetIM, true
}
// Section 11.2.5.4.21. // Section 11.2.5.4.21.
func afterAfterBodyIM(p *parser) (insertionMode, bool) { func afterAfterBodyIM(p *parser) (insertionMode, bool) {
switch p.tok.Type { switch p.tok.Type {
@ -1191,6 +1255,27 @@ func afterAfterBodyIM(p *parser) (insertionMode, bool) {
return inBodyIM, false return inBodyIM, false
} }
// Section 11.2.5.4.22.
func afterAfterFramesetIM(p *parser) (insertionMode, bool) {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
switch p.tok.Data {
case "html":
return useTheRulesFor(p, afterAfterFramesetIM, inBodyIM)
case "noframes":
return useTheRulesFor(p, afterAfterFramesetIM, inHeadIM)
}
default:
// Ignore the token.
}
return afterAfterFramesetIM, true
}
// Parse returns the parse tree for the HTML from the given Reader. // Parse returns the parse tree for the HTML from the given Reader.
// The input is assumed to be UTF-8 encoded. // The input is assumed to be UTF-8 encoded.
func Parse(r io.Reader) (*Node, error) { func Parse(r io.Reader) (*Node, error) {

View File

@ -133,7 +133,7 @@ func TestParser(t *testing.T) {
n int n int
}{ }{
// TODO(nigeltao): Process all the test cases from all the .dat files. // TODO(nigeltao): Process all the test cases from all the .dat files.
{"tests1.dat", 106}, {"tests1.dat", 108},
{"tests2.dat", 0}, {"tests2.dat", 0},
{"tests3.dat", 0}, {"tests3.dat", 0},
} }