mirror of
https://github.com/golang/go
synced 2024-11-18 05:54:49 -07:00
5b92028155
Add benchmarks for for sparsely escaped and densely escaped strings. Then speed up the sparse unescaping part heavily by using IndexByte and copy to skip the parts containing no escaping very fast. Unescaping densely escaped strings slower because of the new function call overhead. But sparsely encoded strings are seen more often in the utf8 enabled web. We win part of the speed back by looking up entityName differently. benchmark old ns/op new ns/op delta BenchmarkEscape 31680 31396 -0.90% BenchmarkEscapeNone 6507 6872 +5.61% BenchmarkUnescape 36481 48298 +32.39% BenchmarkUnescapeNone 332 325 -2.11% BenchmarkUnescapeSparse 8836 3221 -63.55% BenchmarkUnescapeDense 30639 32224 +5.17% Change-Id: If606cb01897a40eefe35ba98f2ff23bb25251606 Reviewed-on: https://go-review.googlesource.com/10172 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
170 lines
3.3 KiB
Go
170 lines
3.3 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package html
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
type unescapeTest struct {
|
|
// A short description of the test case.
|
|
desc string
|
|
// The HTML text.
|
|
html string
|
|
// The unescaped text.
|
|
unescaped string
|
|
}
|
|
|
|
var unescapeTests = []unescapeTest{
|
|
// Handle no entities.
|
|
{
|
|
"copy",
|
|
"A\ttext\nstring",
|
|
"A\ttext\nstring",
|
|
},
|
|
// Handle simple named entities.
|
|
{
|
|
"simple",
|
|
"& > <",
|
|
"& > <",
|
|
},
|
|
// Handle hitting the end of the string.
|
|
{
|
|
"stringEnd",
|
|
"& &",
|
|
"& &",
|
|
},
|
|
// Handle entities with two codepoints.
|
|
{
|
|
"multiCodepoint",
|
|
"text ⋛︀ blah",
|
|
"text \u22db\ufe00 blah",
|
|
},
|
|
// Handle decimal numeric entities.
|
|
{
|
|
"decimalEntity",
|
|
"Delta = Δ ",
|
|
"Delta = Δ ",
|
|
},
|
|
// Handle hexadecimal numeric entities.
|
|
{
|
|
"hexadecimalEntity",
|
|
"Lambda = λ = λ ",
|
|
"Lambda = λ = λ ",
|
|
},
|
|
// Handle numeric early termination.
|
|
{
|
|
"numericEnds",
|
|
"&# &#x €43 © = ©f = ©",
|
|
"&# &#x €43 © = ©f = ©",
|
|
},
|
|
// Handle numeric ISO-8859-1 entity replacements.
|
|
{
|
|
"numericReplacements",
|
|
"Footnote‡",
|
|
"Footnote‡",
|
|
},
|
|
// Handle single ampersand.
|
|
{
|
|
"copySingleAmpersand",
|
|
"&",
|
|
"&",
|
|
},
|
|
// Handle ampersand followed by non-entity.
|
|
{
|
|
"copyAmpersandNonEntity",
|
|
"text &test",
|
|
"text &test",
|
|
},
|
|
// Handle "&#".
|
|
{
|
|
"copyAmpersandHash",
|
|
"text &#",
|
|
"text &#",
|
|
},
|
|
}
|
|
|
|
func TestUnescape(t *testing.T) {
|
|
for _, tt := range unescapeTests {
|
|
unescaped := UnescapeString(tt.html)
|
|
if unescaped != tt.unescaped {
|
|
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestUnescapeEscape(t *testing.T) {
|
|
ss := []string{
|
|
``,
|
|
`abc def`,
|
|
`a & b`,
|
|
`a&b`,
|
|
`a & b`,
|
|
`"`,
|
|
`"`,
|
|
`"<&>"`,
|
|
`"<&>"`,
|
|
`3&5==1 && 0<1, "0<1", a+acute=á`,
|
|
`The special characters are: <, >, &, ' and "`,
|
|
}
|
|
for _, s := range ss {
|
|
if got := UnescapeString(EscapeString(s)); got != s {
|
|
t.Errorf("got %q want %q", got, s)
|
|
}
|
|
}
|
|
}
|
|
|
|
var (
|
|
benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
|
|
benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
|
|
benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&", 10)
|
|
benchUnescapeDense = strings.Repeat("&< & <", 100)
|
|
)
|
|
|
|
func BenchmarkEscape(b *testing.B) {
|
|
n := 0
|
|
for i := 0; i < b.N; i++ {
|
|
n += len(EscapeString(benchEscapeData))
|
|
}
|
|
}
|
|
|
|
func BenchmarkEscapeNone(b *testing.B) {
|
|
n := 0
|
|
for i := 0; i < b.N; i++ {
|
|
n += len(EscapeString(benchEscapeNone))
|
|
}
|
|
}
|
|
|
|
func BenchmarkUnescape(b *testing.B) {
|
|
s := EscapeString(benchEscapeData)
|
|
n := 0
|
|
for i := 0; i < b.N; i++ {
|
|
n += len(UnescapeString(s))
|
|
}
|
|
}
|
|
|
|
func BenchmarkUnescapeNone(b *testing.B) {
|
|
s := EscapeString(benchEscapeNone)
|
|
n := 0
|
|
for i := 0; i < b.N; i++ {
|
|
n += len(UnescapeString(s))
|
|
}
|
|
}
|
|
|
|
func BenchmarkUnescapeSparse(b *testing.B) {
|
|
n := 0
|
|
for i := 0; i < b.N; i++ {
|
|
n += len(UnescapeString(benchUnescapeSparse))
|
|
}
|
|
}
|
|
|
|
func BenchmarkUnescapeDense(b *testing.B) {
|
|
n := 0
|
|
for i := 0; i < b.N; i++ {
|
|
n += len(UnescapeString(benchUnescapeDense))
|
|
}
|
|
}
|