mirror of
https://github.com/golang/go
synced 2024-11-24 22:57:57 -07:00
strings: implement a faster byte->string Replacer
This implements a replacer for when all old strings are single bytes, but new values are not. BenchmarkHTMLEscapeNew 1000000 1090 ns/op BenchmarkHTMLEscapeOld 1000000 2049 ns/op R=rsc CC=golang-dev https://golang.org/cl/5176043
This commit is contained in:
parent
e419535f2a
commit
bba7396fbd
@ -47,6 +47,8 @@ func (h Header) Write(w io.Writer) os.Error {
|
||||
return h.WriteSubset(w, nil)
|
||||
}
|
||||
|
||||
var headerNewlineToSpace = strings.NewReplacer("\n", " ", "\r", " ")
|
||||
|
||||
// WriteSubset writes a header in wire format.
|
||||
// If exclude is not nil, keys where exclude[key] == true are not written.
|
||||
func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error {
|
||||
@ -59,8 +61,7 @@ func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error {
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
for _, v := range h[k] {
|
||||
v = strings.Replace(v, "\n", " ", -1)
|
||||
v = strings.Replace(v, "\r", " ", -1)
|
||||
v = headerNewlineToSpace.Replace(v)
|
||||
v = strings.TrimSpace(v)
|
||||
if _, err := fmt.Fprintf(w, "%s: %s\r\n", k, v); err != nil {
|
||||
return err
|
||||
|
@ -752,13 +752,16 @@ func Redirect(w ResponseWriter, r *Request, urlStr string, code int) {
|
||||
}
|
||||
}
|
||||
|
||||
var htmlReplacer = strings.NewReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
`"`, """,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
func htmlEscape(s string) string {
|
||||
s = strings.Replace(s, "&", "&", -1)
|
||||
s = strings.Replace(s, "<", "<", -1)
|
||||
s = strings.Replace(s, ">", ">", -1)
|
||||
s = strings.Replace(s, "\"", """, -1)
|
||||
s = strings.Replace(s, "'", "'", -1)
|
||||
return s
|
||||
return htmlReplacer.Replace(s)
|
||||
}
|
||||
|
||||
// Redirect to a fixed URL
|
||||
|
@ -85,10 +85,10 @@ func (w *Writer) CreatePart(header textproto.MIMEHeader) (io.Writer, os.Error) {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
var quoteEscaper = strings.NewReplacer("\\", "\\\\", `"`, "\\\"")
|
||||
|
||||
func escapeQuotes(s string) string {
|
||||
s = strings.Replace(s, "\\", "\\\\", -1)
|
||||
s = strings.Replace(s, "\"", "\\\"", -1)
|
||||
return s
|
||||
return quoteEscaper.Replace(s)
|
||||
}
|
||||
|
||||
// CreateFormFile is a convenience wrapper around CreatePart. It creates
|
||||
|
@ -36,8 +36,12 @@ func NewReplacer(oldnew ...string) *Replacer {
|
||||
panic("strings.NewReplacer: odd argument count")
|
||||
}
|
||||
|
||||
var bb byteReplacer
|
||||
var gen genericReplacer
|
||||
// Possible implementations.
|
||||
var (
|
||||
bb byteReplacer
|
||||
bs byteStringReplacer
|
||||
gen genericReplacer
|
||||
)
|
||||
|
||||
allOldBytes, allNewBytes := true, true
|
||||
for len(oldnew) > 0 {
|
||||
@ -49,7 +53,17 @@ func NewReplacer(oldnew ...string) *Replacer {
|
||||
if len(new) != 1 {
|
||||
allNewBytes = false
|
||||
}
|
||||
|
||||
// generic
|
||||
gen.p = append(gen.p, pair{old, new})
|
||||
|
||||
// byte -> string
|
||||
if allOldBytes {
|
||||
bs.old.set(old[0])
|
||||
bs.new[old[0]] = []byte(new)
|
||||
}
|
||||
|
||||
// byte -> byte
|
||||
if allOldBytes && allNewBytes {
|
||||
bb.old.set(old[0])
|
||||
bb.new[old[0]] = new[0]
|
||||
@ -59,6 +73,9 @@ func NewReplacer(oldnew ...string) *Replacer {
|
||||
if allOldBytes && allNewBytes {
|
||||
return &Replacer{r: &bb}
|
||||
}
|
||||
if allOldBytes {
|
||||
return &Replacer{r: &bs}
|
||||
}
|
||||
return &Replacer{r: &gen}
|
||||
}
|
||||
|
||||
@ -176,6 +193,7 @@ func (r *byteReplacer) Replace(s string) string {
|
||||
}
|
||||
|
||||
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) {
|
||||
// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
|
||||
bufsize := 32 << 10
|
||||
if len(s) < bufsize {
|
||||
bufsize = len(s)
|
||||
@ -199,6 +217,94 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error)
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// byteStringReplacer is the implementation that's used when all the
|
||||
// "old" values are single ASCII bytes but the "new" values vary in
|
||||
// size.
|
||||
type byteStringReplacer struct {
|
||||
// old has a bit set for each old byte that should be replaced.
|
||||
old byteBitmap
|
||||
|
||||
// replacement string, indexed by old byte. only valid if
|
||||
// corresponding old bit is set.
|
||||
new [256][]byte
|
||||
}
|
||||
|
||||
func (r *byteStringReplacer) Replace(s string) string {
|
||||
newSize := 0
|
||||
anyChanges := false
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
|
||||
anyChanges = true
|
||||
newSize += len(r.new[b])
|
||||
} else {
|
||||
newSize++
|
||||
}
|
||||
}
|
||||
if !anyChanges {
|
||||
return s
|
||||
}
|
||||
buf := make([]byte, newSize)
|
||||
bi := buf
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
|
||||
n := copy(bi[:], r.new[b])
|
||||
bi = bi[n:]
|
||||
} else {
|
||||
bi[0] = b
|
||||
bi = bi[1:]
|
||||
}
|
||||
}
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
// WriteString maintains one buffer that's at most 32KB. The bytes in
|
||||
// s are enumerated and the buffer is filled. If it reaches its
|
||||
// capacity or a byte has a replacement, the buffer is flushed to w.
|
||||
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) {
|
||||
// TODO(bradfitz): use io.WriteString with slices of s instead.
|
||||
bufsize := 32 << 10
|
||||
if len(s) < bufsize {
|
||||
bufsize = len(s)
|
||||
}
|
||||
buf := make([]byte, bufsize)
|
||||
bi := buf[:0]
|
||||
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
var new []byte
|
||||
if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
|
||||
new = r.new[b]
|
||||
} else {
|
||||
bi = append(bi, b)
|
||||
}
|
||||
if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
|
||||
nw, err := w.Write(bi)
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
bi = buf[:0]
|
||||
}
|
||||
if len(new) > 0 {
|
||||
nw, err := w.Write(new)
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(bi) > 0 {
|
||||
nw, err := w.Write(bi)
|
||||
n += nw
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// strings is too low-level to import io/ioutil
|
||||
var discard io.Writer = devNull(0)
|
||||
|
||||
|
@ -41,14 +41,21 @@ var capitalLetters = NewReplacer("a", "A", "b", "B")
|
||||
var blankToXReplacer = NewReplacer("", "X", "o", "O")
|
||||
|
||||
var ReplacerTests = []ReplacerTest{
|
||||
// byte->string
|
||||
{htmlEscaper, "No changes", "No changes"},
|
||||
{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
|
||||
{htmlEscaper, "&&&", "&&&"},
|
||||
|
||||
// generic
|
||||
{replacer, "fooaaabar", "foo3[aaa]b1[a]r"},
|
||||
{replacer, "long, longerst, longer", "short, most long, medium"},
|
||||
{replacer, "XiX", "YiY"},
|
||||
|
||||
// byte->byte
|
||||
{capitalLetters, "brad", "BrAd"},
|
||||
{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
|
||||
|
||||
// hitting "" special case
|
||||
{blankToXReplacer, "oo", "XOXOX"},
|
||||
}
|
||||
|
||||
@ -84,7 +91,9 @@ type pickAlgorithmTest struct {
|
||||
|
||||
var pickAlgorithmTests = []pickAlgorithmTest{
|
||||
{capitalLetters, "*strings.byteReplacer"},
|
||||
{NewReplacer("a", "A", "b", "Bb"), "*strings.genericReplacer"},
|
||||
{NewReplacer("12", "123"), "*strings.genericReplacer"},
|
||||
{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
|
||||
{htmlEscaper, "*strings.byteStringReplacer"},
|
||||
}
|
||||
|
||||
func TestPickAlgorithm(t *testing.T) {
|
||||
@ -118,6 +127,27 @@ func BenchmarkByteByteMatch(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringMatch(b *testing.B) {
|
||||
str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeNew(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeOld(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
oldhtmlEscape(str)
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
|
||||
func BenchmarkByteByteReplaces(b *testing.B) {
|
||||
str := Repeat("a", 100) + Repeat("b", 100)
|
||||
|
Loading…
Reference in New Issue
Block a user