mirror of
https://github.com/golang/go
synced 2024-11-12 08:40:21 -07:00
Small performance improvements to the HTML tokenizer based on your 'TODO's.
R=nigeltao_golang CC=golang-dev https://golang.org/cl/1941042
This commit is contained in:
parent
bca3151042
commit
8fcdc6a1e2
@ -5,6 +5,7 @@
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"utf8"
|
||||
)
|
||||
@ -60,18 +61,45 @@ func unescape(b []byte) []byte {
|
||||
return b
|
||||
}
|
||||
|
||||
const escapedChars = `&'<>"`
|
||||
|
||||
func escape(buf *bytes.Buffer, s string) {
|
||||
i := strings.IndexAny(s, escapedChars)
|
||||
for i != -1 {
|
||||
buf.WriteString(s[0:i])
|
||||
var esc string
|
||||
switch s[i] {
|
||||
case '&':
|
||||
esc = "&"
|
||||
case '\'':
|
||||
esc = "'"
|
||||
case '<':
|
||||
esc = "<"
|
||||
case '>':
|
||||
esc = ">"
|
||||
case '"':
|
||||
esc = """
|
||||
default:
|
||||
panic("unrecognized escape character")
|
||||
}
|
||||
s = s[i+1:]
|
||||
buf.WriteString(esc)
|
||||
i = strings.IndexAny(s, escapedChars)
|
||||
}
|
||||
buf.WriteString(s)
|
||||
}
|
||||
|
||||
// EscapeString escapes special characters like "<" to become "<". It
|
||||
// escapes only five such characters: amp, apos, lt, gt and quot.
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func EscapeString(s string) string {
|
||||
// TODO(nigeltao): Do this much more efficiently.
|
||||
s = strings.Replace(s, `&`, `&`, -1)
|
||||
s = strings.Replace(s, `'`, `'`, -1)
|
||||
s = strings.Replace(s, `<`, `<`, -1)
|
||||
s = strings.Replace(s, `>`, `>`, -1)
|
||||
s = strings.Replace(s, `"`, `"`, -1)
|
||||
return s
|
||||
if strings.IndexAny(s, escapedChars) == -1 {
|
||||
return s
|
||||
}
|
||||
buf := bytes.NewBuffer(nil)
|
||||
escape(buf, s)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// UnescapeString unescapes entities like "<" to become "<". It unescapes a
|
||||
|
@ -5,6 +5,7 @@
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
@ -68,12 +69,19 @@ type Token struct {
|
||||
|
||||
// tagString returns a string representation of a tag Token's Data and Attr.
|
||||
func (t Token) tagString() string {
|
||||
// TODO(nigeltao): Don't use string concatenation; it is inefficient.
|
||||
s := string(t.Data)
|
||||
for _, a := range t.Attr {
|
||||
s += ` ` + a.Key + `="` + EscapeString(a.Val) + `"`
|
||||
if len(t.Attr) == 0 {
|
||||
return t.Data
|
||||
}
|
||||
return s
|
||||
buf := bytes.NewBuffer(nil)
|
||||
buf.WriteString(t.Data)
|
||||
for _, a := range t.Attr {
|
||||
buf.WriteByte(' ')
|
||||
buf.WriteString(a.Key)
|
||||
buf.WriteString(`="`)
|
||||
escape(buf, a.Val)
|
||||
buf.WriteByte('"')
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// String returns a string representation of the Token.
|
||||
|
Loading…
Reference in New Issue
Block a user