diff --git a/src/pkg/net/textproto/reader.go b/src/pkg/net/textproto/reader.go index 3777424534d..855350c31f0 100644 --- a/src/pkg/net/textproto/reader.go +++ b/src/pkg/net/textproto/reader.go @@ -486,6 +486,7 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { // letter and any letter following a hyphen to upper case; // the rest are converted to lowercase. For example, the // canonical key for "accept-encoding" is "Accept-Encoding". +// MIME header keys are assumed to be ASCII only. func CanonicalMIMEHeaderKey(s string) string { // Quick check for canonical encoding. upper := true @@ -502,28 +503,90 @@ func CanonicalMIMEHeaderKey(s string) string { return s } +const toLower = 'a' - 'A' + // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is // allowed to mutate the provided byte slice before returning the // string. func canonicalMIMEHeaderKey(a []byte) string { - // Canonicalize: first letter upper case - // and upper case after each dash. - // (Host, User-Agent, If-Modified-Since). - // MIME headers are ASCII only, so no Unicode issues. + // Look for it in commonHeaders , so that we can avoid an + // allocation by sharing the strings among all users + // of textproto. If we don't find it, a has been canonicalized + // so just return string(a). upper := true - for i, v := range a { - if v == ' ' { + lo := 0 + hi := len(commonHeaders) + for i := 0; i < len(a); i++ { + // Canonicalize: first letter upper case + // and upper case after each dash. + // (Host, User-Agent, If-Modified-Since). + // MIME headers are ASCII only, so no Unicode issues. + if a[i] == ' ' { a[i] = '-' upper = true continue } - if upper && 'a' <= v && v <= 'z' { - a[i] = v + 'A' - 'a' + c := a[i] + if upper && 'a' <= c && c <= 'z' { + c -= toLower + } else if !upper && 'A' <= c && c <= 'Z' { + c += toLower } - if !upper && 'A' <= v && v <= 'Z' { - a[i] = v + 'a' - 'A' + a[i] = c + upper = c == '-' // for next time + + if lo < hi { + for lo < hi && (len(commonHeaders[lo]) <= i || commonHeaders[lo][i] < c) { + lo++ + } + for hi > lo && commonHeaders[hi-1][i] > c { + hi-- + } } - upper = v == '-' + } + if lo < hi && len(commonHeaders[lo]) == len(a) { + return commonHeaders[lo] } return string(a) } + +var commonHeaders = []string{ + "Accept", + "Accept-Charset", + "Accept-Encoding", + "Accept-Language", + "Accept-Ranges", + "Cache-Control", + "Cc", + "Connection", + "Content-Id", + "Content-Language", + "Content-Length", + "Content-Transfer-Encoding", + "Content-Type", + "Date", + "Dkim-Signature", + "Etag", + "Expires", + "From", + "Host", + "If-Modified-Since", + "If-None-Match", + "In-Reply-To", + "Last-Modified", + "Location", + "Message-Id", + "Mime-Version", + "Pragma", + "Received", + "Return-Path", + "Server", + "Set-Cookie", + "Subject", + "To", + "User-Agent", + "Via", + "X-Forwarded-For", + "X-Imforwards", + "X-Powered-By", +} diff --git a/src/pkg/net/textproto/reader_test.go b/src/pkg/net/textproto/reader_test.go index 9b6c76a0d08..26987f61132 100644 --- a/src/pkg/net/textproto/reader_test.go +++ b/src/pkg/net/textproto/reader_test.go @@ -24,6 +24,7 @@ var canonicalHeaderKeyTests = []canonicalHeaderKeyTest{ {"uSER-aGENT", "User-Agent"}, {"user-agent", "User-Agent"}, {"USER-AGENT", "User-Agent"}, + {"üser-agenT", "üser-Agent"}, // non-ASCII unchanged } func TestCanonicalMIMEHeaderKey(t *testing.T) { @@ -241,18 +242,94 @@ func TestRFC959Lines(t *testing.T) { } } +func TestCommonHeaders(t *testing.T) { + // need to disable the commonHeaders-based optimization + // during this check, or we'd not be testing anything + oldch := commonHeaders + commonHeaders = []string{} + defer func() { commonHeaders = oldch }() + + last := "" + for _, h := range oldch { + if last > h { + t.Errorf("%v is out of order", h) + } + if last == h { + t.Errorf("%v is duplicated", h) + } + if canon := CanonicalMIMEHeaderKey(h); h != canon { + t.Errorf("%v is not canonical", h) + } + last = h + } +} + +var clientHeaders = strings.Replace(`Host: golang.org +Connection: keep-alive +Cache-Control: max-age=0 +Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 +User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3 +Accept-Encoding: gzip,deflate,sdch +Accept-Language: en-US,en;q=0.8,fr-CH;q=0.6 +Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3 +COOKIE: __utma=000000000.0000000000.0000000000.0000000000.0000000000.00; __utmb=000000000.0.00.0000000000; __utmc=000000000; __utmz=000000000.0000000000.00.0.utmcsr=code.google.com|utmccn=(referral)|utmcmd=referral|utmcct=/p/go/issues/detail +Non-Interned: test + +`, "\n", "\r\n", -1) + +var serverHeaders = strings.Replace(`Content-Type: text/html; charset=utf-8 +Content-Encoding: gzip +Date: Thu, 27 Sep 2012 09:03:33 GMT +Server: Google Frontend +Cache-Control: private +Content-Length: 2298 +VIA: 1.1 proxy.example.com:80 (XXX/n.n.n-nnn) +Connection: Close +Non-Interned: test + +`, "\n", "\r\n", -1) + func BenchmarkReadMIMEHeader(b *testing.B) { var buf bytes.Buffer br := bufio.NewReader(&buf) r := NewReader(br) for i := 0; i < b.N; i++ { - buf.WriteString("User-Agent: not mozilla\r\nContent-Length: 23452\r\nContent-Type: text/html; charset-utf8\r\nFoo-Bar: foobar\r\nfoo-bar: some more string\r\n\r\n") + var want int + var find string + if (i & 1) == 1 { + buf.WriteString(clientHeaders) + want = 10 + find = "Cookie" + } else { + buf.WriteString(serverHeaders) + want = 9 + find = "Via" + } h, err := r.ReadMIMEHeader() if err != nil { b.Fatal(err) } - if len(h) != 4 { - b.Fatalf("want 4") + if len(h) != want { + b.Fatalf("wrong number of headers: got %d, want %d", len(h), want) + } + if _, ok := h[find]; !ok { + b.Fatalf("did not find key %s", find) + } + } +} + +func BenchmarkUncommon(b *testing.B) { + var buf bytes.Buffer + br := bufio.NewReader(&buf) + r := NewReader(br) + for i := 0; i < b.N; i++ { + buf.WriteString("uncommon-header-for-benchmark: foo\r\n\r\n") + h, err := r.ReadMIMEHeader() + if err != nil { + b.Fatal(err) + } + if _, ok := h["Uncommon-Header-For-Benchmark"]; !ok { + b.Fatal("Missing result header.") } } }