From e9e874b7fcc722e2e9af942761b8fc2cd8e2c240 Mon Sep 17 00:00:00 2001 From: Andrew Balholm Date: Thu, 10 Nov 2011 23:56:13 +1100 Subject: [PATCH] html: parse framesets Pass tests1.dat, test 106: | | | | | | | Also pass test 107: <h1><table><td><h3></table><h3></h1> R=nigeltao CC=golang-dev https://golang.org/cl/5373050 --- src/pkg/html/parse.go | 91 ++++++++++++++++++++++++++++++++++++-- src/pkg/html/parse_test.go | 2 +- 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index c64eb66fcb..eb0d5c2d09 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -321,7 +321,7 @@ func (p *parser) resetInsertionMode() insertionMode { case "body": return inBodyIM case "frameset": - // TODO: return inFramesetIM + return inFramesetIM case "html": return beforeHeadIM } @@ -517,7 +517,8 @@ func afterHeadIM(p *parser) (insertionMode, bool) { attr = p.tok.Attr framesetOK = false case "frameset": - // TODO. + p.addElement(p.tok.Data, p.tok.Attr) + return inFramesetIM, true case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title": p.oe = append(p.oe, p.head) defer p.oe.pop() @@ -646,7 +647,7 @@ func inBodyIM(p *parser) (insertionMode, bool) { break } p.popUntil(buttonScopeStopTags, "p") - p.addElement("li", p.tok.Attr) + p.addElement(p.tok.Data, p.tok.Attr) case "optgroup", "option": if p.top().Data == "option" { p.oe.pop() @@ -1169,6 +1170,69 @@ func afterBodyIM(p *parser) (insertionMode, bool) { return afterBodyIM, true } +// Section 11.2.5.4.19. +func inFramesetIM(p *parser) (insertionMode, bool) { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + switch p.tok.Data { + case "html": + return useTheRulesFor(p, inFramesetIM, inBodyIM) + case "frameset": + p.addElement(p.tok.Data, p.tok.Attr) + case "frame": + p.addElement(p.tok.Data, p.tok.Attr) + p.oe.pop() + p.acknowledgeSelfClosingTag() + case "noframes": + return useTheRulesFor(p, inFramesetIM, inHeadIM) + } + case EndTagToken: + switch p.tok.Data { + case "frameset": + if p.oe.top().Data != "html" { + p.oe.pop() + if p.oe.top().Data != "frameset" { + return afterFramesetIM, true + } + } + } + default: + // Ignore the token. + } + return inFramesetIM, true +} + +// Section 11.2.5.4.20. +func afterFramesetIM(p *parser) (insertionMode, bool) { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + switch p.tok.Data { + case "html": + return useTheRulesFor(p, inFramesetIM, inBodyIM) + case "noframes": + return useTheRulesFor(p, inFramesetIM, inHeadIM) + } + case EndTagToken: + switch p.tok.Data { + case "html": + return afterAfterFramesetIM, true + } + default: + // Ignore the token. + } + return afterFramesetIM, true +} + // Section 11.2.5.4.21. func afterAfterBodyIM(p *parser) (insertionMode, bool) { switch p.tok.Type { @@ -1191,6 +1255,27 @@ func afterAfterBodyIM(p *parser) (insertionMode, bool) { return inBodyIM, false } +// Section 11.2.5.4.22. +func afterAfterFramesetIM(p *parser) (insertionMode, bool) { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + switch p.tok.Data { + case "html": + return useTheRulesFor(p, afterAfterFramesetIM, inBodyIM) + case "noframes": + return useTheRulesFor(p, afterAfterFramesetIM, inHeadIM) + } + default: + // Ignore the token. + } + return afterAfterFramesetIM, true +} + // Parse returns the parse tree for the HTML from the given Reader. // The input is assumed to be UTF-8 encoded. func Parse(r io.Reader) (*Node, error) { diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index 8cef0fa8e3..0e93a9de84 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -133,7 +133,7 @@ func TestParser(t *testing.T) { n int }{ // TODO(nigeltao): Process all the test cases from all the .dat files. - {"tests1.dat", 106}, + {"tests1.dat", 108}, {"tests2.dat", 0}, {"tests3.dat", 0}, }