mirror of
https://github.com/golang/go
synced 2024-11-24 17:20:12 -07:00
html: improve attribute parsing, note package status
Fixes #1890 R=nigeltao CC=golang-dev https://golang.org/cl/4528102
This commit is contained in:
parent
9e857dbdcc
commit
5e03143c1a
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
Package html implements an HTML5-compliant tokenizer and parser.
|
Package html implements an HTML5-compliant tokenizer and parser.
|
||||||
|
INCOMPLETE.
|
||||||
|
|
||||||
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
|
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
|
||||||
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
|
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
|
||||||
|
@ -355,6 +355,33 @@ loop:
|
|||||||
return z.buf[i0:i], z.trim(i)
|
return z.buf[i0:i], z.trim(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// attrName finds the largest attribute name at the start
|
||||||
|
// of z.buf[i:] and returns it lower-cased, as well
|
||||||
|
// as the trimmed cursor location after that word.
|
||||||
|
//
|
||||||
|
// http://dev.w3.org/html5/spec/Overview.html#syntax-attribute-name
|
||||||
|
// TODO: unicode characters
|
||||||
|
func (z *Tokenizer) attrName(i int) ([]byte, int) {
|
||||||
|
i0 := i
|
||||||
|
loop:
|
||||||
|
for ; i < z.p1; i++ {
|
||||||
|
c := z.buf[i]
|
||||||
|
switch c {
|
||||||
|
case '<', '>', '"', '\'', '/', '=':
|
||||||
|
break loop
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case 'A' <= c && c <= 'Z':
|
||||||
|
z.buf[i] = c + 'a' - 'A'
|
||||||
|
case c > ' ' && c < 0x7f:
|
||||||
|
// No-op.
|
||||||
|
default:
|
||||||
|
break loop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return z.buf[i0:i], z.trim(i)
|
||||||
|
}
|
||||||
|
|
||||||
// Text returns the unescaped text of a TextToken or a CommentToken.
|
// Text returns the unescaped text of a TextToken or a CommentToken.
|
||||||
// The contents of the returned slice may change on the next call to Next.
|
// The contents of the returned slice may change on the next call to Next.
|
||||||
func (z *Tokenizer) Text() []byte {
|
func (z *Tokenizer) Text() []byte {
|
||||||
@ -399,7 +426,7 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
|
|||||||
// attribute for the current tag token and whether there are more attributes.
|
// attribute for the current tag token and whether there are more attributes.
|
||||||
// The contents of the returned slices may change on the next call to Next.
|
// The contents of the returned slices may change on the next call to Next.
|
||||||
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
||||||
key, i := z.word(z.p0, true)
|
key, i := z.attrName(z.p0)
|
||||||
// Check for an empty attribute value.
|
// Check for an empty attribute value.
|
||||||
if i == z.p1 {
|
if i == z.p1 {
|
||||||
z.p0 = i
|
z.p0 = i
|
||||||
|
@ -125,6 +125,11 @@ var tokenTests = []tokenTest{
|
|||||||
`<input value=yes FOO=BAR>`,
|
`<input value=yes FOO=BAR>`,
|
||||||
`<input value="yes" foo="BAR">`,
|
`<input value="yes" foo="BAR">`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"Unquoted attribute value, spaces",
|
||||||
|
`<input value = yes FOO = BAR>`,
|
||||||
|
`<input value="yes" foo="BAR">`,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"Unquoted attribute value, trailing space",
|
"Unquoted attribute value, trailing space",
|
||||||
`<input value=yes FOO=BAR >`,
|
`<input value=yes FOO=BAR >`,
|
||||||
@ -145,6 +150,11 @@ var tokenTests = []tokenTest{
|
|||||||
`<input value="I'm an attribute" FOO="BAR">`,
|
`<input value="I'm an attribute" FOO="BAR">`,
|
||||||
`<input value="I'm an attribute" foo="BAR">`,
|
`<input value="I'm an attribute" foo="BAR">`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"Attribute name characters",
|
||||||
|
`<meta http-equiv="content-type">`,
|
||||||
|
`<meta http-equiv="content-type">`,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTokenizer(t *testing.T) {
|
func TestTokenizer(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user