1
0
mirror of https://github.com/golang/go synced 2024-11-18 20:44:45 -07:00

exp/html: parse CDATA sections in foreign content

Also convert NUL to U+FFFD in comments.

Pass 23 additional tests.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6446055
This commit is contained in:
Andrew Balholm 2012-07-27 16:05:25 +10:00 committed by Nigel Tao
parent b9e051e82d
commit a1f340fa1a
4 changed files with 91 additions and 31 deletions

View File

@ -390,6 +390,10 @@ func (p *parser) reconstructActiveFormattingElements() {
// read reads the next token from the tokenizer. // read reads the next token from the tokenizer.
func (p *parser) read() error { func (p *parser) read() error {
// CDATA sections are allowed only in foreign content.
n := p.oe.top()
p.tokenizer.cdataOK = n != nil && n.Namespace != ""
p.tokenizer.Next() p.tokenizer.Next()
p.tok = p.tokenizer.Token() p.tok = p.tokenizer.Token()
if p.tok.Type == ErrorToken { if p.tok.Type == ErrorToken {

View File

@ -8,9 +8,9 @@ PASS "<html><select>\x00"
PASS "\x00" PASS "\x00"
PASS "<body>\x00" PASS "<body>\x00"
PASS "<plaintext>\x00filler\x00text\x00" PASS "<plaintext>\x00filler\x00text\x00"
FAIL "<svg><![CDATA[\x00filler\x00text\x00]]>" PASS "<svg><![CDATA[\x00filler\x00text\x00]]>"
FAIL "<body><!\x00>" PASS "<body><!\x00>"
FAIL "<body><!\x00filler\x00text>" PASS "<body><!\x00filler\x00text>"
PASS "<body><svg><foreignObject>\x00filler\x00text" PASS "<body><svg><foreignObject>\x00filler\x00text"
FAIL "<svg>\x00filler\x00text" FAIL "<svg>\x00filler\x00text"
FAIL "<svg>\x00<frameset>" FAIL "<svg>\x00<frameset>"

View File

@ -1,22 +1,22 @@
FAIL "<svg><![CDATA[foo]]>" PASS "<svg><![CDATA[foo]]>"
FAIL "<math><![CDATA[foo]]>" PASS "<math><![CDATA[foo]]>"
PASS "<div><![CDATA[foo]]>" PASS "<div><![CDATA[foo]]>"
FAIL "<svg><![CDATA[foo" PASS "<svg><![CDATA[foo"
FAIL "<svg><![CDATA[foo" PASS "<svg><![CDATA[foo"
FAIL "<svg><![CDATA[" PASS "<svg><![CDATA["
FAIL "<svg><![CDATA[]]>" PASS "<svg><![CDATA[]]>"
FAIL "<svg><![CDATA[]] >]]>" PASS "<svg><![CDATA[]] >]]>"
FAIL "<svg><![CDATA[]] >]]>" PASS "<svg><![CDATA[]] >]]>"
FAIL "<svg><![CDATA[]]" PASS "<svg><![CDATA[]]"
FAIL "<svg><![CDATA[]" PASS "<svg><![CDATA[]"
FAIL "<svg><![CDATA[]>a" PASS "<svg><![CDATA[]>a"
PASS "<svg><foreignObject><div><![CDATA[foo]]>" PASS "<svg><foreignObject><div><![CDATA[foo]]>"
FAIL "<svg><![CDATA[<svg>]]>" PASS "<svg><![CDATA[<svg>]]>"
FAIL "<svg><![CDATA[</svg>a]]>" PASS "<svg><![CDATA[</svg>a]]>"
FAIL "<svg><![CDATA[<svg>a" PASS "<svg><![CDATA[<svg>a"
FAIL "<svg><![CDATA[</svg>a" PASS "<svg><![CDATA[</svg>a"
FAIL "<svg><![CDATA[<svg>]]><path>" PASS "<svg><![CDATA[<svg>]]><path>"
FAIL "<svg><![CDATA[<svg>]]></path>" PASS "<svg><![CDATA[<svg>]]></path>"
FAIL "<svg><![CDATA[<svg>]]><!--path-->" PASS "<svg><![CDATA[<svg>]]><!--path-->"
FAIL "<svg><![CDATA[<svg>]]>path" PASS "<svg><![CDATA[<svg>]]>path"
FAIL "<svg><![CDATA[<!--svg-->]]>" PASS "<svg><![CDATA[<!--svg-->]]>"

View File

@ -155,6 +155,8 @@ type Tokenizer struct {
// convertNUL is whether NUL bytes in the current token's data should // convertNUL is whether NUL bytes in the current token's data should
// be converted into \ufffd replacement characters. // be converted into \ufffd replacement characters.
convertNUL bool convertNUL bool
// cdataOK is whether CDATA sections are allowed in the current context.
cdataOK bool
} }
// Err returns the error associated with the most recent ErrorToken token. // Err returns the error associated with the most recent ErrorToken token.
@ -347,8 +349,8 @@ func (z *Tokenizer) readUntilCloseAngle() {
} }
// readMarkupDeclaration reads the next token starting with "<!". It might be // readMarkupDeclaration reads the next token starting with "<!". It might be
// a "<!--comment-->", a "<!DOCTYPE foo>", or "<!a bogus comment". The opening // a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
// "<!" has already been consumed. // "<!a bogus comment". The opening "<!" has already been consumed.
func (z *Tokenizer) readMarkupDeclaration() TokenType { func (z *Tokenizer) readMarkupDeclaration() TokenType {
z.data.start = z.raw.end z.data.start = z.raw.end
var c [2]byte var c [2]byte
@ -364,27 +366,81 @@ func (z *Tokenizer) readMarkupDeclaration() TokenType {
return CommentToken return CommentToken
} }
z.raw.end -= 2 z.raw.end -= 2
if z.readDoctype() {
return DoctypeToken
}
if z.cdataOK && z.readCDATA() {
z.convertNUL = true
return TextToken
}
// It's a bogus comment.
z.readUntilCloseAngle()
return CommentToken
}
// readDoctype attempts to read a doctype declaration and returns true if
// successful. The opening "<!" has already been consumed.
func (z *Tokenizer) readDoctype() bool {
const s = "DOCTYPE" const s = "DOCTYPE"
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
c := z.readByte() c := z.readByte()
if z.err != nil { if z.err != nil {
z.data.end = z.raw.end z.data.end = z.raw.end
return CommentToken return false
} }
if c != s[i] && c != s[i]+('a'-'A') { if c != s[i] && c != s[i]+('a'-'A') {
// Back up to read the fragment of "DOCTYPE" again. // Back up to read the fragment of "DOCTYPE" again.
z.raw.end = z.data.start z.raw.end = z.data.start
z.readUntilCloseAngle() return false
return CommentToken
} }
} }
if z.skipWhiteSpace(); z.err != nil { if z.skipWhiteSpace(); z.err != nil {
z.data.start = z.raw.end z.data.start = z.raw.end
z.data.end = z.raw.end z.data.end = z.raw.end
return DoctypeToken return true
} }
z.readUntilCloseAngle() z.readUntilCloseAngle()
return DoctypeToken return true
}
// readCDATA attempts to read a CDATA section and returns true if
// successful. The opening "<!" has already been consumed.
func (z *Tokenizer) readCDATA() bool {
const s = "[CDATA["
for i := 0; i < len(s); i++ {
c := z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return false
}
if c != s[i] {
// Back up to read the fragment of "[CDATA[" again.
z.raw.end = z.data.start
return false
}
}
z.data.start = z.raw.end
brackets := 0
for {
c := z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return true
}
switch c {
case ']':
brackets++
case '>':
if brackets >= 2 {
z.data.end = z.raw.end - len("]]>")
return true
}
brackets = 0
default:
brackets = 0
}
}
panic("unreachable")
} }
// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end] // startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
@ -751,7 +807,7 @@ func (z *Tokenizer) Text() []byte {
z.data.start = z.raw.end z.data.start = z.raw.end
z.data.end = z.raw.end z.data.end = z.raw.end
s = convertNewlines(s) s = convertNewlines(s)
if z.convertNUL && bytes.Contains(s, nul) { if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
s = bytes.Replace(s, nul, replacement, -1) s = bytes.Replace(s, nul, replacement, -1)
} }
if !z.textIsRaw { if !z.textIsRaw {