mirror of
https://github.com/golang/go
synced 2024-11-20 09:54:45 -07:00
net/textproto: faster header canonicalization with fewer allocations
By keeping a single copy of the strings that commonly show up in headers, we can avoid one string allocation per header. benchmark old ns/op new ns/op delta BenchmarkReadMIMEHeader 19590 10824 -44.75% BenchmarkUncommon 3168 1861 -41.26% benchmark old allocs new allocs delta BenchmarkReadMIMEHeader 32 25 -21.88% BenchmarkUncommon 5 5 0.00% R=bradfitz, golang-dev, dave, rsc, jra CC=golang-dev https://golang.org/cl/6721055
This commit is contained in:
parent
73c67606e9
commit
7e7b89f7d0
@ -486,6 +486,7 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
|
|||||||
// letter and any letter following a hyphen to upper case;
|
// letter and any letter following a hyphen to upper case;
|
||||||
// the rest are converted to lowercase. For example, the
|
// the rest are converted to lowercase. For example, the
|
||||||
// canonical key for "accept-encoding" is "Accept-Encoding".
|
// canonical key for "accept-encoding" is "Accept-Encoding".
|
||||||
|
// MIME header keys are assumed to be ASCII only.
|
||||||
func CanonicalMIMEHeaderKey(s string) string {
|
func CanonicalMIMEHeaderKey(s string) string {
|
||||||
// Quick check for canonical encoding.
|
// Quick check for canonical encoding.
|
||||||
upper := true
|
upper := true
|
||||||
@ -502,28 +503,90 @@ func CanonicalMIMEHeaderKey(s string) string {
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const toLower = 'a' - 'A'
|
||||||
|
|
||||||
// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
|
// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
|
||||||
// allowed to mutate the provided byte slice before returning the
|
// allowed to mutate the provided byte slice before returning the
|
||||||
// string.
|
// string.
|
||||||
func canonicalMIMEHeaderKey(a []byte) string {
|
func canonicalMIMEHeaderKey(a []byte) string {
|
||||||
|
// Look for it in commonHeaders , so that we can avoid an
|
||||||
|
// allocation by sharing the strings among all users
|
||||||
|
// of textproto. If we don't find it, a has been canonicalized
|
||||||
|
// so just return string(a).
|
||||||
|
upper := true
|
||||||
|
lo := 0
|
||||||
|
hi := len(commonHeaders)
|
||||||
|
for i := 0; i < len(a); i++ {
|
||||||
// Canonicalize: first letter upper case
|
// Canonicalize: first letter upper case
|
||||||
// and upper case after each dash.
|
// and upper case after each dash.
|
||||||
// (Host, User-Agent, If-Modified-Since).
|
// (Host, User-Agent, If-Modified-Since).
|
||||||
// MIME headers are ASCII only, so no Unicode issues.
|
// MIME headers are ASCII only, so no Unicode issues.
|
||||||
upper := true
|
if a[i] == ' ' {
|
||||||
for i, v := range a {
|
|
||||||
if v == ' ' {
|
|
||||||
a[i] = '-'
|
a[i] = '-'
|
||||||
upper = true
|
upper = true
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if upper && 'a' <= v && v <= 'z' {
|
c := a[i]
|
||||||
a[i] = v + 'A' - 'a'
|
if upper && 'a' <= c && c <= 'z' {
|
||||||
|
c -= toLower
|
||||||
|
} else if !upper && 'A' <= c && c <= 'Z' {
|
||||||
|
c += toLower
|
||||||
}
|
}
|
||||||
if !upper && 'A' <= v && v <= 'Z' {
|
a[i] = c
|
||||||
a[i] = v + 'a' - 'A'
|
upper = c == '-' // for next time
|
||||||
|
|
||||||
|
if lo < hi {
|
||||||
|
for lo < hi && (len(commonHeaders[lo]) <= i || commonHeaders[lo][i] < c) {
|
||||||
|
lo++
|
||||||
}
|
}
|
||||||
upper = v == '-'
|
for hi > lo && commonHeaders[hi-1][i] > c {
|
||||||
|
hi--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if lo < hi && len(commonHeaders[lo]) == len(a) {
|
||||||
|
return commonHeaders[lo]
|
||||||
}
|
}
|
||||||
return string(a)
|
return string(a)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var commonHeaders = []string{
|
||||||
|
"Accept",
|
||||||
|
"Accept-Charset",
|
||||||
|
"Accept-Encoding",
|
||||||
|
"Accept-Language",
|
||||||
|
"Accept-Ranges",
|
||||||
|
"Cache-Control",
|
||||||
|
"Cc",
|
||||||
|
"Connection",
|
||||||
|
"Content-Id",
|
||||||
|
"Content-Language",
|
||||||
|
"Content-Length",
|
||||||
|
"Content-Transfer-Encoding",
|
||||||
|
"Content-Type",
|
||||||
|
"Date",
|
||||||
|
"Dkim-Signature",
|
||||||
|
"Etag",
|
||||||
|
"Expires",
|
||||||
|
"From",
|
||||||
|
"Host",
|
||||||
|
"If-Modified-Since",
|
||||||
|
"If-None-Match",
|
||||||
|
"In-Reply-To",
|
||||||
|
"Last-Modified",
|
||||||
|
"Location",
|
||||||
|
"Message-Id",
|
||||||
|
"Mime-Version",
|
||||||
|
"Pragma",
|
||||||
|
"Received",
|
||||||
|
"Return-Path",
|
||||||
|
"Server",
|
||||||
|
"Set-Cookie",
|
||||||
|
"Subject",
|
||||||
|
"To",
|
||||||
|
"User-Agent",
|
||||||
|
"Via",
|
||||||
|
"X-Forwarded-For",
|
||||||
|
"X-Imforwards",
|
||||||
|
"X-Powered-By",
|
||||||
|
}
|
||||||
|
@ -24,6 +24,7 @@ var canonicalHeaderKeyTests = []canonicalHeaderKeyTest{
|
|||||||
{"uSER-aGENT", "User-Agent"},
|
{"uSER-aGENT", "User-Agent"},
|
||||||
{"user-agent", "User-Agent"},
|
{"user-agent", "User-Agent"},
|
||||||
{"USER-AGENT", "User-Agent"},
|
{"USER-AGENT", "User-Agent"},
|
||||||
|
{"üser-agenT", "üser-Agent"}, // non-ASCII unchanged
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCanonicalMIMEHeaderKey(t *testing.T) {
|
func TestCanonicalMIMEHeaderKey(t *testing.T) {
|
||||||
@ -241,18 +242,94 @@ func TestRFC959Lines(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCommonHeaders(t *testing.T) {
|
||||||
|
// need to disable the commonHeaders-based optimization
|
||||||
|
// during this check, or we'd not be testing anything
|
||||||
|
oldch := commonHeaders
|
||||||
|
commonHeaders = []string{}
|
||||||
|
defer func() { commonHeaders = oldch }()
|
||||||
|
|
||||||
|
last := ""
|
||||||
|
for _, h := range oldch {
|
||||||
|
if last > h {
|
||||||
|
t.Errorf("%v is out of order", h)
|
||||||
|
}
|
||||||
|
if last == h {
|
||||||
|
t.Errorf("%v is duplicated", h)
|
||||||
|
}
|
||||||
|
if canon := CanonicalMIMEHeaderKey(h); h != canon {
|
||||||
|
t.Errorf("%v is not canonical", h)
|
||||||
|
}
|
||||||
|
last = h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var clientHeaders = strings.Replace(`Host: golang.org
|
||||||
|
Connection: keep-alive
|
||||||
|
Cache-Control: max-age=0
|
||||||
|
Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
|
||||||
|
User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3
|
||||||
|
Accept-Encoding: gzip,deflate,sdch
|
||||||
|
Accept-Language: en-US,en;q=0.8,fr-CH;q=0.6
|
||||||
|
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3
|
||||||
|
COOKIE: __utma=000000000.0000000000.0000000000.0000000000.0000000000.00; __utmb=000000000.0.00.0000000000; __utmc=000000000; __utmz=000000000.0000000000.00.0.utmcsr=code.google.com|utmccn=(referral)|utmcmd=referral|utmcct=/p/go/issues/detail
|
||||||
|
Non-Interned: test
|
||||||
|
|
||||||
|
`, "\n", "\r\n", -1)
|
||||||
|
|
||||||
|
var serverHeaders = strings.Replace(`Content-Type: text/html; charset=utf-8
|
||||||
|
Content-Encoding: gzip
|
||||||
|
Date: Thu, 27 Sep 2012 09:03:33 GMT
|
||||||
|
Server: Google Frontend
|
||||||
|
Cache-Control: private
|
||||||
|
Content-Length: 2298
|
||||||
|
VIA: 1.1 proxy.example.com:80 (XXX/n.n.n-nnn)
|
||||||
|
Connection: Close
|
||||||
|
Non-Interned: test
|
||||||
|
|
||||||
|
`, "\n", "\r\n", -1)
|
||||||
|
|
||||||
func BenchmarkReadMIMEHeader(b *testing.B) {
|
func BenchmarkReadMIMEHeader(b *testing.B) {
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
br := bufio.NewReader(&buf)
|
br := bufio.NewReader(&buf)
|
||||||
r := NewReader(br)
|
r := NewReader(br)
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
buf.WriteString("User-Agent: not mozilla\r\nContent-Length: 23452\r\nContent-Type: text/html; charset-utf8\r\nFoo-Bar: foobar\r\nfoo-bar: some more string\r\n\r\n")
|
var want int
|
||||||
|
var find string
|
||||||
|
if (i & 1) == 1 {
|
||||||
|
buf.WriteString(clientHeaders)
|
||||||
|
want = 10
|
||||||
|
find = "Cookie"
|
||||||
|
} else {
|
||||||
|
buf.WriteString(serverHeaders)
|
||||||
|
want = 9
|
||||||
|
find = "Via"
|
||||||
|
}
|
||||||
h, err := r.ReadMIMEHeader()
|
h, err := r.ReadMIMEHeader()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.Fatal(err)
|
b.Fatal(err)
|
||||||
}
|
}
|
||||||
if len(h) != 4 {
|
if len(h) != want {
|
||||||
b.Fatalf("want 4")
|
b.Fatalf("wrong number of headers: got %d, want %d", len(h), want)
|
||||||
|
}
|
||||||
|
if _, ok := h[find]; !ok {
|
||||||
|
b.Fatalf("did not find key %s", find)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkUncommon(b *testing.B) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
br := bufio.NewReader(&buf)
|
||||||
|
r := NewReader(br)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
buf.WriteString("uncommon-header-for-benchmark: foo\r\n\r\n")
|
||||||
|
h, err := r.ReadMIMEHeader()
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
if _, ok := h["Uncommon-Header-For-Benchmark"]; !ok {
|
||||||
|
b.Fatal("Missing result header.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user