1
0
mirror of https://github.com/golang/go synced 2024-10-03 16:31:27 -06:00

net/textproto: faster header canonicalization with fewer allocations

By keeping a single copy of the strings that commonly show up
in headers, we can avoid one string allocation per header.

benchmark                  old ns/op    new ns/op    delta
BenchmarkReadMIMEHeader        19590        10824  -44.75%
BenchmarkUncommon               3168         1861  -41.26%

benchmark                 old allocs   new allocs    delta
BenchmarkReadMIMEHeader           32           25  -21.88%
BenchmarkUncommon                  5            5    0.00%

R=bradfitz, golang-dev, dave, rsc, jra
CC=golang-dev
https://golang.org/cl/6721055
This commit is contained in:
Jeff R. Allen 2012-11-12 15:31:42 -08:00 committed by Brad Fitzpatrick
parent 73c67606e9
commit 7e7b89f7d0
2 changed files with 154 additions and 14 deletions

View File

@ -486,6 +486,7 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
// letter and any letter following a hyphen to upper case; // letter and any letter following a hyphen to upper case;
// the rest are converted to lowercase. For example, the // the rest are converted to lowercase. For example, the
// canonical key for "accept-encoding" is "Accept-Encoding". // canonical key for "accept-encoding" is "Accept-Encoding".
// MIME header keys are assumed to be ASCII only.
func CanonicalMIMEHeaderKey(s string) string { func CanonicalMIMEHeaderKey(s string) string {
// Quick check for canonical encoding. // Quick check for canonical encoding.
upper := true upper := true
@ -502,28 +503,90 @@ func CanonicalMIMEHeaderKey(s string) string {
return s return s
} }
const toLower = 'a' - 'A'
// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
// allowed to mutate the provided byte slice before returning the // allowed to mutate the provided byte slice before returning the
// string. // string.
func canonicalMIMEHeaderKey(a []byte) string { func canonicalMIMEHeaderKey(a []byte) string {
// Canonicalize: first letter upper case // Look for it in commonHeaders , so that we can avoid an
// and upper case after each dash. // allocation by sharing the strings among all users
// (Host, User-Agent, If-Modified-Since). // of textproto. If we don't find it, a has been canonicalized
// MIME headers are ASCII only, so no Unicode issues. // so just return string(a).
upper := true upper := true
for i, v := range a { lo := 0
if v == ' ' { hi := len(commonHeaders)
for i := 0; i < len(a); i++ {
// Canonicalize: first letter upper case
// and upper case after each dash.
// (Host, User-Agent, If-Modified-Since).
// MIME headers are ASCII only, so no Unicode issues.
if a[i] == ' ' {
a[i] = '-' a[i] = '-'
upper = true upper = true
continue continue
} }
if upper && 'a' <= v && v <= 'z' { c := a[i]
a[i] = v + 'A' - 'a' if upper && 'a' <= c && c <= 'z' {
c -= toLower
} else if !upper && 'A' <= c && c <= 'Z' {
c += toLower
} }
if !upper && 'A' <= v && v <= 'Z' { a[i] = c
a[i] = v + 'a' - 'A' upper = c == '-' // for next time
if lo < hi {
for lo < hi && (len(commonHeaders[lo]) <= i || commonHeaders[lo][i] < c) {
lo++
}
for hi > lo && commonHeaders[hi-1][i] > c {
hi--
}
} }
upper = v == '-' }
if lo < hi && len(commonHeaders[lo]) == len(a) {
return commonHeaders[lo]
} }
return string(a) return string(a)
} }
var commonHeaders = []string{
"Accept",
"Accept-Charset",
"Accept-Encoding",
"Accept-Language",
"Accept-Ranges",
"Cache-Control",
"Cc",
"Connection",
"Content-Id",
"Content-Language",
"Content-Length",
"Content-Transfer-Encoding",
"Content-Type",
"Date",
"Dkim-Signature",
"Etag",
"Expires",
"From",
"Host",
"If-Modified-Since",
"If-None-Match",
"In-Reply-To",
"Last-Modified",
"Location",
"Message-Id",
"Mime-Version",
"Pragma",
"Received",
"Return-Path",
"Server",
"Set-Cookie",
"Subject",
"To",
"User-Agent",
"Via",
"X-Forwarded-For",
"X-Imforwards",
"X-Powered-By",
}

View File

@ -24,6 +24,7 @@ var canonicalHeaderKeyTests = []canonicalHeaderKeyTest{
{"uSER-aGENT", "User-Agent"}, {"uSER-aGENT", "User-Agent"},
{"user-agent", "User-Agent"}, {"user-agent", "User-Agent"},
{"USER-AGENT", "User-Agent"}, {"USER-AGENT", "User-Agent"},
{"üser-agenT", "üser-Agent"}, // non-ASCII unchanged
} }
func TestCanonicalMIMEHeaderKey(t *testing.T) { func TestCanonicalMIMEHeaderKey(t *testing.T) {
@ -241,18 +242,94 @@ func TestRFC959Lines(t *testing.T) {
} }
} }
func TestCommonHeaders(t *testing.T) {
// need to disable the commonHeaders-based optimization
// during this check, or we'd not be testing anything
oldch := commonHeaders
commonHeaders = []string{}
defer func() { commonHeaders = oldch }()
last := ""
for _, h := range oldch {
if last > h {
t.Errorf("%v is out of order", h)
}
if last == h {
t.Errorf("%v is duplicated", h)
}
if canon := CanonicalMIMEHeaderKey(h); h != canon {
t.Errorf("%v is not canonical", h)
}
last = h
}
}
var clientHeaders = strings.Replace(`Host: golang.org
Connection: keep-alive
Cache-Control: max-age=0
Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3
Accept-Encoding: gzip,deflate,sdch
Accept-Language: en-US,en;q=0.8,fr-CH;q=0.6
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3
COOKIE: __utma=000000000.0000000000.0000000000.0000000000.0000000000.00; __utmb=000000000.0.00.0000000000; __utmc=000000000; __utmz=000000000.0000000000.00.0.utmcsr=code.google.com|utmccn=(referral)|utmcmd=referral|utmcct=/p/go/issues/detail
Non-Interned: test
`, "\n", "\r\n", -1)
var serverHeaders = strings.Replace(`Content-Type: text/html; charset=utf-8
Content-Encoding: gzip
Date: Thu, 27 Sep 2012 09:03:33 GMT
Server: Google Frontend
Cache-Control: private
Content-Length: 2298
VIA: 1.1 proxy.example.com:80 (XXX/n.n.n-nnn)
Connection: Close
Non-Interned: test
`, "\n", "\r\n", -1)
func BenchmarkReadMIMEHeader(b *testing.B) { func BenchmarkReadMIMEHeader(b *testing.B) {
var buf bytes.Buffer var buf bytes.Buffer
br := bufio.NewReader(&buf) br := bufio.NewReader(&buf)
r := NewReader(br) r := NewReader(br)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
buf.WriteString("User-Agent: not mozilla\r\nContent-Length: 23452\r\nContent-Type: text/html; charset-utf8\r\nFoo-Bar: foobar\r\nfoo-bar: some more string\r\n\r\n") var want int
var find string
if (i & 1) == 1 {
buf.WriteString(clientHeaders)
want = 10
find = "Cookie"
} else {
buf.WriteString(serverHeaders)
want = 9
find = "Via"
}
h, err := r.ReadMIMEHeader() h, err := r.ReadMIMEHeader()
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
if len(h) != 4 { if len(h) != want {
b.Fatalf("want 4") b.Fatalf("wrong number of headers: got %d, want %d", len(h), want)
}
if _, ok := h[find]; !ok {
b.Fatalf("did not find key %s", find)
}
}
}
func BenchmarkUncommon(b *testing.B) {
var buf bytes.Buffer
br := bufio.NewReader(&buf)
r := NewReader(br)
for i := 0; i < b.N; i++ {
buf.WriteString("uncommon-header-for-benchmark: foo\r\n\r\n")
h, err := r.ReadMIMEHeader()
if err != nil {
b.Fatal(err)
}
if _, ok := h["Uncommon-Header-For-Benchmark"]; !ok {
b.Fatal("Missing result header.")
} }
} }
} }