mirror of
https://github.com/golang/go
synced 2024-11-26 05:37:57 -07:00
exp/html: Convert \r and \r\n to \n when tokenizing
Also escape "\r" as " " when rendering HTML. Pass 2 additional tests. R=nigeltao CC=golang-dev https://golang.org/cl/6260046
This commit is contained in:
parent
afe0e97aa6
commit
4e0749a478
@ -192,7 +192,7 @@ func lower(b []byte) []byte {
|
|||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
const escapedChars = `&'<>"`
|
const escapedChars = "&'<>\"\r"
|
||||||
|
|
||||||
func escape(w writer, s string) error {
|
func escape(w writer, s string) error {
|
||||||
i := strings.IndexAny(s, escapedChars)
|
i := strings.IndexAny(s, escapedChars)
|
||||||
@ -214,6 +214,8 @@ func escape(w writer, s string) error {
|
|||||||
case '"':
|
case '"':
|
||||||
// """ is shorter than """.
|
// """ is shorter than """.
|
||||||
esc = """
|
esc = """
|
||||||
|
case '\r':
|
||||||
|
esc = " "
|
||||||
default:
|
default:
|
||||||
panic("unrecognized escape character")
|
panic("unrecognized escape character")
|
||||||
}
|
}
|
||||||
|
@ -21,8 +21,8 @@ PASS "<svg>\x00 </svg><frameset>"
|
|||||||
FAIL "<svg>\x00a</svg><frameset>"
|
FAIL "<svg>\x00a</svg><frameset>"
|
||||||
PASS "<svg><path></path></svg><frameset>"
|
PASS "<svg><path></path></svg><frameset>"
|
||||||
PASS "<svg><p><frameset>"
|
PASS "<svg><p><frameset>"
|
||||||
FAIL "<!DOCTYPE html><pre>\r\n\r\nA</pre>"
|
PASS "<!DOCTYPE html><pre>\r\n\r\nA</pre>"
|
||||||
FAIL "<!DOCTYPE html><pre>\r\rA</pre>"
|
PASS "<!DOCTYPE html><pre>\r\rA</pre>"
|
||||||
PASS "<!DOCTYPE html><pre>\rA</pre>"
|
PASS "<!DOCTYPE html><pre>\rA</pre>"
|
||||||
PASS "<!DOCTYPE html><table><tr><td><math><mtext>\x00a"
|
PASS "<!DOCTYPE html><table><tr><td><math><mtext>\x00a"
|
||||||
PASS "<!DOCTYPE html><table><tr><td><svg><foreignObject>\x00a"
|
PASS "<!DOCTYPE html><table><tr><td><svg><foreignObject>\x00a"
|
||||||
|
@ -696,6 +696,38 @@ func (z *Tokenizer) Raw() []byte {
|
|||||||
return z.buf[z.raw.start:z.raw.end]
|
return z.buf[z.raw.start:z.raw.end]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// convertNewlines converts "\r" and "\r\n" in s to "\n".
|
||||||
|
// The conversion happens in place, but the resulting slice may be shorter.
|
||||||
|
func convertNewlines(s []byte) []byte {
|
||||||
|
for i, c := range s {
|
||||||
|
if c != '\r' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
src := i + 1
|
||||||
|
if src >= len(s) || s[src] != '\n' {
|
||||||
|
s[i] = '\n'
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
dst := i
|
||||||
|
for src < len(s) {
|
||||||
|
if s[src] == '\r' {
|
||||||
|
if src+1 < len(s) && s[src+1] == '\n' {
|
||||||
|
src++
|
||||||
|
}
|
||||||
|
s[dst] = '\n'
|
||||||
|
} else {
|
||||||
|
s[dst] = s[src]
|
||||||
|
}
|
||||||
|
src++
|
||||||
|
dst++
|
||||||
|
}
|
||||||
|
return s[:dst]
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
// Text returns the unescaped text of a text, comment or doctype token. The
|
// Text returns the unescaped text of a text, comment or doctype token. The
|
||||||
// contents of the returned slice may change on the next call to Next.
|
// contents of the returned slice may change on the next call to Next.
|
||||||
func (z *Tokenizer) Text() []byte {
|
func (z *Tokenizer) Text() []byte {
|
||||||
@ -704,6 +736,7 @@ func (z *Tokenizer) Text() []byte {
|
|||||||
s := z.buf[z.data.start:z.data.end]
|
s := z.buf[z.data.start:z.data.end]
|
||||||
z.data.start = z.raw.end
|
z.data.start = z.raw.end
|
||||||
z.data.end = z.raw.end
|
z.data.end = z.raw.end
|
||||||
|
s = convertNewlines(s)
|
||||||
if !z.textIsRaw {
|
if !z.textIsRaw {
|
||||||
s = unescape(s)
|
s = unescape(s)
|
||||||
}
|
}
|
||||||
@ -739,7 +772,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
|||||||
z.nAttrReturned++
|
z.nAttrReturned++
|
||||||
key = z.buf[x[0].start:x[0].end]
|
key = z.buf[x[0].start:x[0].end]
|
||||||
val = z.buf[x[1].start:x[1].end]
|
val = z.buf[x[1].start:x[1].end]
|
||||||
return lower(key), unescape(val), z.nAttrReturned < len(z.attr)
|
return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, nil, false
|
return nil, nil, false
|
||||||
|
@ -592,6 +592,33 @@ loop:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertNewlines(t *testing.T) {
|
||||||
|
testCases := map[string]string{
|
||||||
|
"Mac\rDOS\r\nUnix\n": "Mac\nDOS\nUnix\n",
|
||||||
|
"Unix\nMac\rDOS\r\n": "Unix\nMac\nDOS\n",
|
||||||
|
"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
|
||||||
|
"": "",
|
||||||
|
"\n": "\n",
|
||||||
|
"\n\r": "\n\n",
|
||||||
|
"\r": "\n",
|
||||||
|
"\r\n": "\n",
|
||||||
|
"\r\n\n": "\n\n",
|
||||||
|
"\r\n\r": "\n\n",
|
||||||
|
"\r\n\r\n": "\n\n",
|
||||||
|
"\r\r": "\n\n",
|
||||||
|
"\r\r\n": "\n\n",
|
||||||
|
"\r\r\n\n": "\n\n\n",
|
||||||
|
"\r\r\r\n": "\n\n\n",
|
||||||
|
"\r \n": "\n \n",
|
||||||
|
"xyz": "xyz",
|
||||||
|
}
|
||||||
|
for in, want := range testCases {
|
||||||
|
if got := string(convertNewlines([]byte(in))); got != want {
|
||||||
|
t.Errorf("input %q: got %q, want %q", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
rawLevel = iota
|
rawLevel = iota
|
||||||
lowLevel
|
lowLevel
|
||||||
|
Loading…
Reference in New Issue
Block a user