diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index d476f4ac21b..582437f7673 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -29,6 +29,9 @@ type parser struct { head, form *Node // Other parsing state flags (section 11.2.3.5). scripting, framesetOK bool + // originalIM is the insertion mode to go back to after completing a text + // or inTableText insertion mode. + originalIM insertionMode } func (p *parser) top() *Node { @@ -214,12 +217,23 @@ type insertionMode func(*parser) (insertionMode, bool) // Section 11.2.3.1, "using the rules for". func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, bool) { im, consumed := delegate(p) + // TODO: do we need to update p.originalMode if it equals delegate? if im != delegate { return im, consumed } return actual, consumed } +// setOriginalIM sets the insertion mode to return to after completing a text or +// inTableText insertion mode. +// Section 11.2.3.1, "using the rules for". +func (p *parser) setOriginalIM(im insertionMode) { + if p.originalIM != nil { + panic("html: bad parser state: originalIM was set twice") + } + p.originalIM = im +} + // Section 11.2.5.4.1. func initialIM(p *parser) (insertionMode, bool) { if p.tok.Type == DoctypeToken { @@ -318,8 +332,10 @@ func inHeadIM(p *parser) (insertionMode, bool) { switch p.tok.Data { case "meta": // TODO. - case "script": - // TODO. + case "script", "title": + p.addElement(p.tok.Data, p.tok.Attr) + p.setOriginalIM(inHeadIM) + return textIM, true default: implied = true } @@ -574,6 +590,20 @@ func (p *parser) inBodyEndTagFormatting(tag string) { } } +// Section 11.2.5.4.8. +func textIM(p *parser) (insertionMode, bool) { + switch p.tok.Type { + case TextToken: + p.addText(p.tok.Data) + return textIM, true + case EndTagToken: + p.oe.pop() + } + o := p.originalIM + p.originalIM = nil + return o, p.tok.Type == EndTagToken +} + // Section 11.2.5.4.9. func inTableIM(p *parser) (insertionMode, bool) { var ( diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index c6fd37a10ed..564580c78b2 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -80,13 +80,13 @@ func dumpLevel(w io.Writer, n *Node, level int) os.Error { case DocumentNode: return os.NewError("unexpected DocumentNode") case ElementNode: - fmt.Fprintf(w, "<%s>", EscapeString(n.Data)) + fmt.Fprintf(w, "<%s>", n.Data) case TextNode: - fmt.Fprintf(w, "%q", EscapeString(n.Data)) + fmt.Fprintf(w, "%q", n.Data) case CommentNode: return os.NewError("COMMENT") case DoctypeNode: - fmt.Fprintf(w, "", EscapeString(n.Data)) + fmt.Fprintf(w, "", n.Data) case scopeMarkerNode: return os.NewError("unexpected scopeMarkerNode") default: @@ -123,7 +123,7 @@ func TestParser(t *testing.T) { rc := make(chan io.Reader) go readDat(filename, rc) // TODO(nigeltao): Process all test cases, not just a subset. - for i := 0; i < 26; i++ { + for i := 0; i < 27; i++ { // Parse the #data section. b, err := ioutil.ReadAll(<-rc) if err != nil { diff --git a/src/pkg/html/render.go b/src/pkg/html/render.go index bf7b5995a11..e1ec66ff1ac 100644 --- a/src/pkg/html/render.go +++ b/src/pkg/html/render.go @@ -74,17 +74,6 @@ func render(w writer, n *Node) os.Error { return os.NewError("html: unknown node type") } - // TODO: figure out what to do with " that closes the next token. If + // non-empty, the subsequent call to Next will return a raw or RCDATA text + // token: one that treats "

" as text instead of an element. + // rawTag's contents are lower-cased. + rawTag string + // textIsRaw is whether the current text token's data is not escaped. + textIsRaw bool } // Error returns the error associated with the most recent ErrorToken token. @@ -225,6 +233,54 @@ func (z *Tokenizer) skipWhiteSpace() { } } +// readRawOrRCDATA reads until the next "", where "foo" is z.rawTag and +// is typically something like "script" or "textarea". +func (z *Tokenizer) readRawOrRCDATA() { +loop: + for { + c := z.readByte() + if z.err != nil { + break loop + } + if c != '<' { + continue loop + } + c = z.readByte() + if z.err != nil { + break loop + } + if c != '/' { + continue loop + } + for i := 0; i < len(z.rawTag); i++ { + c = z.readByte() + if z.err != nil { + break loop + } + if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') { + continue loop + } + } + c = z.readByte() + if z.err != nil { + break loop + } + switch c { + case ' ', '\n', '\r', '\t', '\f', '/', '>': + // The 3 is 2 for the leading "". + z.raw.end-- + } + } + z.data.end = z.raw.end + // A textarea's or title's RCDATA can contain escaped entities. + z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title" + z.rawTag = "" +} + // readComment reads the next comment token starting with "