mirror of
https://github.com/golang/go
synced 2024-11-18 20:44:45 -07:00
exp/html: parse CDATA sections in foreign content
Also convert NUL to U+FFFD in comments. Pass 23 additional tests. R=nigeltao CC=golang-dev https://golang.org/cl/6446055
This commit is contained in:
parent
b9e051e82d
commit
a1f340fa1a
@ -390,6 +390,10 @@ func (p *parser) reconstructActiveFormattingElements() {
|
|||||||
|
|
||||||
// read reads the next token from the tokenizer.
|
// read reads the next token from the tokenizer.
|
||||||
func (p *parser) read() error {
|
func (p *parser) read() error {
|
||||||
|
// CDATA sections are allowed only in foreign content.
|
||||||
|
n := p.oe.top()
|
||||||
|
p.tokenizer.cdataOK = n != nil && n.Namespace != ""
|
||||||
|
|
||||||
p.tokenizer.Next()
|
p.tokenizer.Next()
|
||||||
p.tok = p.tokenizer.Token()
|
p.tok = p.tokenizer.Token()
|
||||||
if p.tok.Type == ErrorToken {
|
if p.tok.Type == ErrorToken {
|
||||||
|
@ -8,9 +8,9 @@ PASS "<html><select>\x00"
|
|||||||
PASS "\x00"
|
PASS "\x00"
|
||||||
PASS "<body>\x00"
|
PASS "<body>\x00"
|
||||||
PASS "<plaintext>\x00filler\x00text\x00"
|
PASS "<plaintext>\x00filler\x00text\x00"
|
||||||
FAIL "<svg><![CDATA[\x00filler\x00text\x00]]>"
|
PASS "<svg><![CDATA[\x00filler\x00text\x00]]>"
|
||||||
FAIL "<body><!\x00>"
|
PASS "<body><!\x00>"
|
||||||
FAIL "<body><!\x00filler\x00text>"
|
PASS "<body><!\x00filler\x00text>"
|
||||||
PASS "<body><svg><foreignObject>\x00filler\x00text"
|
PASS "<body><svg><foreignObject>\x00filler\x00text"
|
||||||
FAIL "<svg>\x00filler\x00text"
|
FAIL "<svg>\x00filler\x00text"
|
||||||
FAIL "<svg>\x00<frameset>"
|
FAIL "<svg>\x00<frameset>"
|
||||||
|
@ -1,22 +1,22 @@
|
|||||||
FAIL "<svg><![CDATA[foo]]>"
|
PASS "<svg><![CDATA[foo]]>"
|
||||||
FAIL "<math><![CDATA[foo]]>"
|
PASS "<math><![CDATA[foo]]>"
|
||||||
PASS "<div><![CDATA[foo]]>"
|
PASS "<div><![CDATA[foo]]>"
|
||||||
FAIL "<svg><![CDATA[foo"
|
PASS "<svg><![CDATA[foo"
|
||||||
FAIL "<svg><![CDATA[foo"
|
PASS "<svg><![CDATA[foo"
|
||||||
FAIL "<svg><![CDATA["
|
PASS "<svg><![CDATA["
|
||||||
FAIL "<svg><![CDATA[]]>"
|
PASS "<svg><![CDATA[]]>"
|
||||||
FAIL "<svg><![CDATA[]] >]]>"
|
PASS "<svg><![CDATA[]] >]]>"
|
||||||
FAIL "<svg><![CDATA[]] >]]>"
|
PASS "<svg><![CDATA[]] >]]>"
|
||||||
FAIL "<svg><![CDATA[]]"
|
PASS "<svg><![CDATA[]]"
|
||||||
FAIL "<svg><![CDATA[]"
|
PASS "<svg><![CDATA[]"
|
||||||
FAIL "<svg><![CDATA[]>a"
|
PASS "<svg><![CDATA[]>a"
|
||||||
PASS "<svg><foreignObject><div><![CDATA[foo]]>"
|
PASS "<svg><foreignObject><div><![CDATA[foo]]>"
|
||||||
FAIL "<svg><![CDATA[<svg>]]>"
|
PASS "<svg><![CDATA[<svg>]]>"
|
||||||
FAIL "<svg><![CDATA[</svg>a]]>"
|
PASS "<svg><![CDATA[</svg>a]]>"
|
||||||
FAIL "<svg><![CDATA[<svg>a"
|
PASS "<svg><![CDATA[<svg>a"
|
||||||
FAIL "<svg><![CDATA[</svg>a"
|
PASS "<svg><![CDATA[</svg>a"
|
||||||
FAIL "<svg><![CDATA[<svg>]]><path>"
|
PASS "<svg><![CDATA[<svg>]]><path>"
|
||||||
FAIL "<svg><![CDATA[<svg>]]></path>"
|
PASS "<svg><![CDATA[<svg>]]></path>"
|
||||||
FAIL "<svg><![CDATA[<svg>]]><!--path-->"
|
PASS "<svg><![CDATA[<svg>]]><!--path-->"
|
||||||
FAIL "<svg><![CDATA[<svg>]]>path"
|
PASS "<svg><![CDATA[<svg>]]>path"
|
||||||
FAIL "<svg><![CDATA[<!--svg-->]]>"
|
PASS "<svg><![CDATA[<!--svg-->]]>"
|
||||||
|
@ -155,6 +155,8 @@ type Tokenizer struct {
|
|||||||
// convertNUL is whether NUL bytes in the current token's data should
|
// convertNUL is whether NUL bytes in the current token's data should
|
||||||
// be converted into \ufffd replacement characters.
|
// be converted into \ufffd replacement characters.
|
||||||
convertNUL bool
|
convertNUL bool
|
||||||
|
// cdataOK is whether CDATA sections are allowed in the current context.
|
||||||
|
cdataOK bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Err returns the error associated with the most recent ErrorToken token.
|
// Err returns the error associated with the most recent ErrorToken token.
|
||||||
@ -347,8 +349,8 @@ func (z *Tokenizer) readUntilCloseAngle() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// readMarkupDeclaration reads the next token starting with "<!". It might be
|
// readMarkupDeclaration reads the next token starting with "<!". It might be
|
||||||
// a "<!--comment-->", a "<!DOCTYPE foo>", or "<!a bogus comment". The opening
|
// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
|
||||||
// "<!" has already been consumed.
|
// "<!a bogus comment". The opening "<!" has already been consumed.
|
||||||
func (z *Tokenizer) readMarkupDeclaration() TokenType {
|
func (z *Tokenizer) readMarkupDeclaration() TokenType {
|
||||||
z.data.start = z.raw.end
|
z.data.start = z.raw.end
|
||||||
var c [2]byte
|
var c [2]byte
|
||||||
@ -364,27 +366,81 @@ func (z *Tokenizer) readMarkupDeclaration() TokenType {
|
|||||||
return CommentToken
|
return CommentToken
|
||||||
}
|
}
|
||||||
z.raw.end -= 2
|
z.raw.end -= 2
|
||||||
|
if z.readDoctype() {
|
||||||
|
return DoctypeToken
|
||||||
|
}
|
||||||
|
if z.cdataOK && z.readCDATA() {
|
||||||
|
z.convertNUL = true
|
||||||
|
return TextToken
|
||||||
|
}
|
||||||
|
// It's a bogus comment.
|
||||||
|
z.readUntilCloseAngle()
|
||||||
|
return CommentToken
|
||||||
|
}
|
||||||
|
|
||||||
|
// readDoctype attempts to read a doctype declaration and returns true if
|
||||||
|
// successful. The opening "<!" has already been consumed.
|
||||||
|
func (z *Tokenizer) readDoctype() bool {
|
||||||
const s = "DOCTYPE"
|
const s = "DOCTYPE"
|
||||||
for i := 0; i < len(s); i++ {
|
for i := 0; i < len(s); i++ {
|
||||||
c := z.readByte()
|
c := z.readByte()
|
||||||
if z.err != nil {
|
if z.err != nil {
|
||||||
z.data.end = z.raw.end
|
z.data.end = z.raw.end
|
||||||
return CommentToken
|
return false
|
||||||
}
|
}
|
||||||
if c != s[i] && c != s[i]+('a'-'A') {
|
if c != s[i] && c != s[i]+('a'-'A') {
|
||||||
// Back up to read the fragment of "DOCTYPE" again.
|
// Back up to read the fragment of "DOCTYPE" again.
|
||||||
z.raw.end = z.data.start
|
z.raw.end = z.data.start
|
||||||
z.readUntilCloseAngle()
|
return false
|
||||||
return CommentToken
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if z.skipWhiteSpace(); z.err != nil {
|
if z.skipWhiteSpace(); z.err != nil {
|
||||||
z.data.start = z.raw.end
|
z.data.start = z.raw.end
|
||||||
z.data.end = z.raw.end
|
z.data.end = z.raw.end
|
||||||
return DoctypeToken
|
return true
|
||||||
}
|
}
|
||||||
z.readUntilCloseAngle()
|
z.readUntilCloseAngle()
|
||||||
return DoctypeToken
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// readCDATA attempts to read a CDATA section and returns true if
|
||||||
|
// successful. The opening "<!" has already been consumed.
|
||||||
|
func (z *Tokenizer) readCDATA() bool {
|
||||||
|
const s = "[CDATA["
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := z.readByte()
|
||||||
|
if z.err != nil {
|
||||||
|
z.data.end = z.raw.end
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c != s[i] {
|
||||||
|
// Back up to read the fragment of "[CDATA[" again.
|
||||||
|
z.raw.end = z.data.start
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
z.data.start = z.raw.end
|
||||||
|
brackets := 0
|
||||||
|
for {
|
||||||
|
c := z.readByte()
|
||||||
|
if z.err != nil {
|
||||||
|
z.data.end = z.raw.end
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
switch c {
|
||||||
|
case ']':
|
||||||
|
brackets++
|
||||||
|
case '>':
|
||||||
|
if brackets >= 2 {
|
||||||
|
z.data.end = z.raw.end - len("]]>")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
brackets = 0
|
||||||
|
default:
|
||||||
|
brackets = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic("unreachable")
|
||||||
}
|
}
|
||||||
|
|
||||||
// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
|
// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
|
||||||
@ -751,7 +807,7 @@ func (z *Tokenizer) Text() []byte {
|
|||||||
z.data.start = z.raw.end
|
z.data.start = z.raw.end
|
||||||
z.data.end = z.raw.end
|
z.data.end = z.raw.end
|
||||||
s = convertNewlines(s)
|
s = convertNewlines(s)
|
||||||
if z.convertNUL && bytes.Contains(s, nul) {
|
if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
|
||||||
s = bytes.Replace(s, nul, replacement, -1)
|
s = bytes.Replace(s, nul, replacement, -1)
|
||||||
}
|
}
|
||||||
if !z.textIsRaw {
|
if !z.textIsRaw {
|
||||||
|
Loading…
Reference in New Issue
Block a user