1
0
mirror of https://github.com/golang/go synced 2024-11-20 00:04:43 -07:00

html: parse and render <plaintext> elements

Pass tests2.dat, test 10:
<table><plaintext><td>

| <html>
|   <head>
|   <body>
|     <plaintext>
|       "<td>"
|     <table>

Also pass tests through test 25:
<!doctypehtml><p><dd>

R=nigeltao
CC=golang-dev
https://golang.org/cl/5369109
This commit is contained in:
Andrew Balholm 2011-11-15 11:39:18 +11:00 committed by Nigel Tao
parent f5cf0a486e
commit 3bd5082f57
4 changed files with 42 additions and 11 deletions

View File

@ -655,6 +655,9 @@ func inBodyIM(p *parser) bool {
}
p.popUntil(buttonScopeStopTags, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "plaintext":
p.popUntil(buttonScopeStopTags, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "optgroup", "option":
if p.top().Data == "option" {
p.oe.pop()

View File

@ -134,7 +134,7 @@ func TestParser(t *testing.T) {
}{
// TODO(nigeltao): Process all the test cases from all the .dat files.
{"tests1.dat", -1},
{"tests2.dat", 10},
{"tests2.dat", 26},
{"tests3.dat", 0},
}
for _, tf := range testFiles {
@ -214,4 +214,7 @@ var renderTestBlacklist = map[string]bool{
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
`<a><table><a></table><p><a><div><a>`: true,
`<a><table><td><a><table></table><a></tr><a></table><a>`: true,
// A <plaintext> element is reparented, putting it before a table.
// A <plaintext> element can't have anything after it in HTML.
`<table><plaintext><td>`: true,
}

View File

@ -52,7 +52,19 @@ func Render(w io.Writer, n *Node) error {
return buf.Flush()
}
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases.
switch n.Type {
case ErrorNode:
@ -61,7 +73,7 @@ func render(w writer, n *Node) error {
return escape(w, n.Data)
case DocumentNode:
for _, c := range n.Child {
if err := render(w, c); err != nil {
if err := render1(w, c); err != nil {
return err
}
}
@ -128,7 +140,7 @@ func render(w writer, n *Node) error {
// Render any child nodes.
switch n.Data {
case "noembed", "noframes", "noscript", "script", "style":
case "noembed", "noframes", "noscript", "plaintext", "script", "style":
for _, c := range n.Child {
if c.Type != TextNode {
return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data)
@ -137,18 +149,23 @@ func render(w writer, n *Node) error {
return err
}
}
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
case "textarea", "title":
for _, c := range n.Child {
if c.Type != TextNode {
return fmt.Errorf("html: RCDATA element <%s> has non-text child node", n.Data)
}
if err := render(w, c); err != nil {
if err := render1(w, c); err != nil {
return err
}
}
default:
for _, c := range n.Child {
if err := render(w, c); err != nil {
if err := render1(w, c); err != nil {
return err
}
}

View File

@ -401,14 +401,14 @@ func (z *Tokenizer) readStartTag() TokenType {
break
}
}
// Any "<noembed>", "<noframes>", "<noscript>", "<script>", "<style>",
// Any "<noembed>", "<noframes>", "<noscript>", "<plaintext", "<script>", "<style>",
// "<textarea>" or "<title>" tag flags the tokenizer's next token as raw.
// The tag name lengths of these special cases ranges in [5, 8].
if x := z.data.end - z.data.start; 5 <= x && x <= 8 {
// The tag name lengths of these special cases ranges in [5, 9].
if x := z.data.end - z.data.start; 5 <= x && x <= 9 {
switch z.buf[z.data.start] {
case 'n', 's', 't', 'N', 'S', 'T':
case 'n', 'p', 's', 't', 'N', 'P', 'S', 'T':
switch s := strings.ToLower(string(z.buf[z.data.start:z.data.end])); s {
case "noembed", "noframes", "noscript", "script", "style", "textarea", "title":
case "noembed", "noframes", "noscript", "plaintext", "script", "style", "textarea", "title":
z.rawTag = s
}
}
@ -551,7 +551,15 @@ func (z *Tokenizer) Next() TokenType {
z.data.start = z.raw.end
z.data.end = z.raw.end
if z.rawTag != "" {
z.readRawOrRCDATA()
if z.rawTag == "plaintext" {
// Read everything up to EOF.
for z.err == nil {
z.readByte()
}
z.textIsRaw = true
} else {
z.readRawOrRCDATA()
}
if z.data.end > z.data.start {
z.tt = TextToken
return z.tt