mirror of
https://github.com/golang/go
synced 2024-11-22 09:24:41 -07:00
html: parse empty, unquoted, and single-quoted attribute values
Fixes #1391 R=nigeltao CC=golang-dev https://golang.org/cl/4453054
This commit is contained in:
parent
5473103666
commit
f4e5f364c7
@ -331,10 +331,10 @@ func (z *Tokenizer) trim(i int) int {
|
|||||||
return k
|
return k
|
||||||
}
|
}
|
||||||
|
|
||||||
// lower finds the largest alphabetic [0-9A-Za-z]* word at the start of z.buf[i:]
|
// word finds the largest alphabetic [0-9A-Za-z]* word at the start
|
||||||
// and returns that word lower-cased, as well as the trimmed cursor location
|
// of z.buf[i:] and returns that word (optionally lower-cased), as
|
||||||
// after that word.
|
// well as the trimmed cursor location after that word.
|
||||||
func (z *Tokenizer) lower(i int) ([]byte, int) {
|
func (z *Tokenizer) word(i int, lower bool) ([]byte, int) {
|
||||||
i0 := i
|
i0 := i
|
||||||
loop:
|
loop:
|
||||||
for ; i < z.p1; i++ {
|
for ; i < z.p1; i++ {
|
||||||
@ -343,7 +343,9 @@ loop:
|
|||||||
case '0' <= c && c <= '9':
|
case '0' <= c && c <= '9':
|
||||||
// No-op.
|
// No-op.
|
||||||
case 'A' <= c && c <= 'Z':
|
case 'A' <= c && c <= 'Z':
|
||||||
|
if lower {
|
||||||
z.buf[i] = c + 'a' - 'A'
|
z.buf[i] = c + 'a' - 'A'
|
||||||
|
}
|
||||||
case 'a' <= c && c <= 'z':
|
case 'a' <= c && c <= 'z':
|
||||||
// No-op.
|
// No-op.
|
||||||
default:
|
default:
|
||||||
@ -388,7 +390,7 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
|
|||||||
if z.buf[i] == '/' {
|
if z.buf[i] == '/' {
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
name, z.p0 = z.lower(i)
|
name, z.p0 = z.word(i, true)
|
||||||
hasAttr = z.p0 != z.p1
|
hasAttr = z.p0 != z.p1
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -397,23 +399,36 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
|
|||||||
// attribute for the current tag token and whether there are more attributes.
|
// attribute for the current tag token and whether there are more attributes.
|
||||||
// The contents of the returned slices may change on the next call to Next.
|
// The contents of the returned slices may change on the next call to Next.
|
||||||
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
||||||
key, i := z.lower(z.p0)
|
key, i := z.word(z.p0, true)
|
||||||
// Get past the "=\"".
|
// Check for an empty attribute value.
|
||||||
if i == z.p1 || z.buf[i] != '=' {
|
if i == z.p1 {
|
||||||
|
z.p0 = i
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Get past the equals and quote characters.
|
||||||
|
if z.buf[i] != '=' {
|
||||||
|
z.p0, moreAttr = i, true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
i = z.trim(i + 1)
|
i = z.trim(i + 1)
|
||||||
if i == z.p1 || z.buf[i] != '"' {
|
if i == z.p1 {
|
||||||
|
z.p0 = i
|
||||||
|
return
|
||||||
|
}
|
||||||
|
closeQuote := z.buf[i]
|
||||||
|
if closeQuote != '\'' && closeQuote != '"' {
|
||||||
|
val, z.p0 = z.word(i, false)
|
||||||
|
moreAttr = z.p0 != z.p1
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
i = z.trim(i + 1)
|
i = z.trim(i + 1)
|
||||||
// Copy and unescape everything up to the closing '"'.
|
// Copy and unescape everything up to the closing quote.
|
||||||
dst, src := i, i
|
dst, src := i, i
|
||||||
loop:
|
loop:
|
||||||
for src < z.p1 {
|
for src < z.p1 {
|
||||||
c := z.buf[src]
|
c := z.buf[src]
|
||||||
switch c {
|
switch c {
|
||||||
case '"':
|
case closeQuote:
|
||||||
src++
|
src++
|
||||||
break loop
|
break loop
|
||||||
case '&':
|
case '&':
|
||||||
|
@ -107,6 +107,44 @@ var tokenTests = []tokenTest{
|
|||||||
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
|
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
|
||||||
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
|
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Attribute tests:
|
||||||
|
// http://dev.w3.org/html5/spec/Overview.html#attributes-0
|
||||||
|
{
|
||||||
|
"Empty attribute",
|
||||||
|
`<input disabled FOO>`,
|
||||||
|
`<input disabled="" foo="">`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Empty attribute, whitespace",
|
||||||
|
`<input disabled FOO >`,
|
||||||
|
`<input disabled="" foo="">`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Unquoted attribute value",
|
||||||
|
`<input value=yes FOO=BAR>`,
|
||||||
|
`<input value="yes" foo="BAR">`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Unquoted attribute value, trailing space",
|
||||||
|
`<input value=yes FOO=BAR >`,
|
||||||
|
`<input value="yes" foo="BAR">`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Single-quoted attribute value",
|
||||||
|
`<input value='yes' FOO='BAR'>`,
|
||||||
|
`<input value="yes" foo="BAR">`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Single-quoted attribute value, trailing space",
|
||||||
|
`<input value='yes' FOO='BAR' >`,
|
||||||
|
`<input value="yes" foo="BAR">`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Double-quoted attribute value",
|
||||||
|
`<input value="I'm an attribute" FOO="BAR">`,
|
||||||
|
`<input value="I'm an attribute" foo="BAR">`,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTokenizer(t *testing.T) {
|
func TestTokenizer(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user