diff --git a/src/pkg/http/Makefile b/src/pkg/http/Makefile index 2a2a2a3beb8..b8bc093d49c 100644 --- a/src/pkg/http/Makefile +++ b/src/pkg/http/Makefile @@ -18,6 +18,7 @@ GOFILES=\ response.go\ reverseproxy.go\ server.go\ + sniff.go\ status.go\ transfer.go\ transport.go\ diff --git a/src/pkg/http/server.go b/src/pkg/http/server.go index ab960f4f0a8..d71a24959a5 100644 --- a/src/pkg/http/server.go +++ b/src/pkg/http/server.go @@ -98,7 +98,8 @@ type conn struct { rwc net.Conn // i/o connection buf *bufio.ReadWriter // buffered rwc hijacked bool // connection has been hijacked by handler - tlsState *tls.ConnectionState // or nil when not using TLS + tlsState *tls.ConnectionState // or nil when not using TLS + body []byte } // A response represents the server side of an HTTP response. @@ -112,6 +113,7 @@ type response struct { written int64 // number of bytes written in body contentLength int64 // explicitly-declared Content-Length; or -1 status int // status code passed to WriteHeader + needSniff bool // need to sniff to find Content-Type // close connection after this reply. set on request and // updated after response from handler if there's a @@ -147,6 +149,7 @@ func newConn(rwc net.Conn, handler Handler) (c *conn, err os.Error) { c.remoteAddr = rwc.RemoteAddr().String() c.handler = handler c.rwc = rwc + c.body = make([]byte, sniffLen) br := bufio.NewReader(rwc) bw := bufio.NewWriter(rwc) c.buf = bufio.NewReadWriter(br, bw) @@ -209,6 +212,7 @@ func (c *conn) readRequest() (w *response, err os.Error) { w.req = req w.header = make(Header) w.contentLength = -1 + c.body = c.body[:0] return w, nil } @@ -249,9 +253,9 @@ func (w *response) WriteHeader(code int) { } } } else { - // Default output is HTML encoded in UTF-8. + // If no content type, apply sniffing algorithm to body. if w.header.Get("Content-Type") == "" { - w.header.Set("Content-Type", "text/html; charset=utf-8") + w.needSniff = true } } @@ -337,7 +341,34 @@ func (w *response) WriteHeader(code int) { } io.WriteString(w.conn.buf, proto+" "+codestring+" "+text+"\r\n") w.header.Write(w.conn.buf) + + // If we need to sniff the body, leave the header open. + // Otherwise, end it here. + if !w.needSniff { + io.WriteString(w.conn.buf, "\r\n") + } +} + +// sniff uses the first block of written data, +// stored in w.conn.body, to decide the Content-Type +// for the HTTP body. +func (w *response) sniff() { + if !w.needSniff { + return + } + w.needSniff = false + + data := w.conn.body + ctype := detectContentType(data) + if ctype != "" { + fmt.Fprintf(w.conn.buf, "Content-Type: %s\r\n", ctype) + } io.WriteString(w.conn.buf, "\r\n") + + if w.chunking && len(data) > 0 { + fmt.Fprintf(w.conn.buf, "%x\r\n", len(data)) + w.conn.buf.Write(data) + } } // bodyAllowed returns true if a Write is allowed for this response type. @@ -369,6 +400,18 @@ func (w *response) Write(data []byte) (n int, err os.Error) { return 0, ErrContentLength } + var m int + if w.needSniff { + body := w.conn.body + m = copy(body[len(body):], data) + w.conn.body = body[:len(body)+m] + if m == len(data) { + return m, nil + } + w.sniff() + data = data[m:] + } + // TODO(rsc): if chunking happened after the buffering, // then there would be fewer chunk headers. // On the other hand, it would make hijacking more difficult. @@ -385,7 +428,7 @@ func (w *response) Write(data []byte) (n int, err os.Error) { } } - return n, err + return m + n, err } // If this is an error reply (4xx or 5xx) @@ -449,6 +492,9 @@ func (w *response) finishRequest() { if !w.wroteHeader { w.WriteHeader(StatusOK) } + if w.needSniff { + w.sniff() + } errorKludge(w) if w.chunking { io.WriteString(w.conn.buf, "0\r\n") @@ -471,6 +517,7 @@ func (w *response) Flush() { if !w.wroteHeader { w.WriteHeader(StatusOK) } + w.sniff() w.conn.buf.Flush() } @@ -517,6 +564,7 @@ func (c *conn) serve() { if req.ContentLength == 0 { w.Header().Set("Connection", "close") w.WriteHeader(StatusBadRequest) + w.finishRequest() break } req.Header.Del("Expect") @@ -535,6 +583,7 @@ func (c *conn) serve() { // respond with a 417 (Expectation Failed) status." w.Header().Set("Connection", "close") w.WriteHeader(StatusExpectationFailed) + w.finishRequest() break } diff --git a/src/pkg/http/sniff.go b/src/pkg/http/sniff.go new file mode 100644 index 00000000000..9fd6fee9b51 --- /dev/null +++ b/src/pkg/http/sniff.go @@ -0,0 +1,18 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +// Content-type sniffing algorithm. +// http://tools.ietf.org/html/draft-ietf-websec-mime-sniff-03 + +// The algorithm prefers to use sniffLen bytes to make its decision. +const sniffLen = 1024 + +// detectContentType returns the sniffed Content-Type string +// for the given data. +func detectContentType(data []byte) string { + // TODO(dsymonds,rsc): Implement algorithm from draft. + return "text/html; charset=utf-8" +}