1
0
mirror of https://github.com/golang/go synced 2024-11-22 16:25:07 -07:00

mime/multipart: fix handling of empty parts without CRLF before next part

Empty parts can be either of the form:

a) "--separator\r\n", header (w/ trailing 2xCRLF), \r\n "--separator"...
or
b) "--separator\r\n", header (w/ trailing 2xCRLF), "--separator"...

We never handled case b).  In fact the RFC seems kinda vague about
it, but browsers seem to do a), and App Engine's synthetic POST
bodies after blob uploads is of form b).

So handle them both, and add a bunch of tests.

(I can't promise these are the last fixes to multipart, especially
considering its history, but I'm growing increasingly confident at
least, and I've never submitted a multipart CL with known bugs
outstanding, including this time.)

R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/6212046
This commit is contained in:
Brad Fitzpatrick 2012-05-14 18:16:47 -07:00
parent a90cbd741a
commit e393a8292e
2 changed files with 292 additions and 68 deletions

View File

@ -22,11 +22,6 @@ import (
"net/textproto" "net/textproto"
) )
// TODO(bradfitz): inline these once the compiler can inline them in
// read-only situation (such as bytes.HasSuffix)
var lf = []byte("\n")
var crlf = []byte("\r\n")
var emptyParams = make(map[string]string) var emptyParams = make(map[string]string)
// A Part represents a single part in a multipart body. // A Part represents a single part in a multipart body.
@ -38,6 +33,7 @@ type Part struct {
buffer *bytes.Buffer buffer *bytes.Buffer
mr *Reader mr *Reader
bytesRead int
disposition string disposition string
dispositionParams map[string]string dispositionParams map[string]string
@ -113,14 +109,26 @@ func (bp *Part) populateHeaders() error {
// Read reads the body of a part, after its headers and before the // Read reads the body of a part, after its headers and before the
// next part (if any) begins. // next part (if any) begins.
func (p *Part) Read(d []byte) (n int, err error) { func (p *Part) Read(d []byte) (n int, err error) {
defer func() {
p.bytesRead += n
}()
if p.buffer.Len() >= len(d) { if p.buffer.Len() >= len(d) {
// Internal buffer of unconsumed data is large enough for // Internal buffer of unconsumed data is large enough for
// the read request. No need to parse more at the moment. // the read request. No need to parse more at the moment.
return p.buffer.Read(d) return p.buffer.Read(d)
} }
peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
unexpectedEof := err == io.EOF
if err != nil && !unexpectedEof { // Look for an immediate empty part without a leading \r\n
// before the boundary separator. Some MIME code makes empty
// parts like this. Most browsers, however, write the \r\n
// before the subsequent boundary even for empty parts and
// won't hit this path.
if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
return 0, io.EOF
}
unexpectedEOF := err == io.EOF
if err != nil && !unexpectedEOF {
return 0, fmt.Errorf("multipart: Part Read: %v", err) return 0, fmt.Errorf("multipart: Part Read: %v", err)
} }
if peek == nil { if peek == nil {
@ -138,7 +146,7 @@ func (p *Part) Read(d []byte) (n int, err error) {
foundBoundary = true foundBoundary = true
} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
nCopy = safeCount nCopy = safeCount
} else if unexpectedEof { } else if unexpectedEOF {
// If we've run out of peek buffer and the boundary // If we've run out of peek buffer and the boundary
// wasn't found (and can't possibly fit), we must have // wasn't found (and can't possibly fit), we must have
// hit the end of the file unexpectedly. // hit the end of the file unexpectedly.
@ -172,7 +180,10 @@ type Reader struct {
currentPart *Part currentPart *Part
partsRead int partsRead int
nl, nlDashBoundary, dashBoundaryDash, dashBoundary []byte nl []byte // "\r\n" or "\n" (set after seeing first boundary line)
nlDashBoundary []byte // nl + "--boundary"
dashBoundaryDash []byte // "--boundary--"
dashBoundary []byte // "--boundary"
} }
// NextPart returns the next part in the multipart or an error. // NextPart returns the next part in the multipart or an error.
@ -185,7 +196,7 @@ func (r *Reader) NextPart() (*Part, error) {
expectNewPart := false expectNewPart := false
for { for {
line, err := r.bufReader.ReadSlice('\n') line, err := r.bufReader.ReadSlice('\n')
if err == io.EOF && bytes.Equal(line, r.dashBoundaryDash) { if err == io.EOF && r.isFinalBoundary(line) {
// If the buffer ends in "--boundary--" without the // If the buffer ends in "--boundary--" without the
// trailing "\r\n", ReadSlice will return an error // trailing "\r\n", ReadSlice will return an error
// (since it's missing the '\n'), but this is a valid // (since it's missing the '\n'), but this is a valid
@ -207,7 +218,7 @@ func (r *Reader) NextPart() (*Part, error) {
return bp, nil return bp, nil
} }
if hasPrefixThenNewline(line, r.dashBoundaryDash) { if r.isFinalBoundary(line) {
// Expected EOF // Expected EOF
return nil, io.EOF return nil, io.EOF
} }
@ -235,7 +246,19 @@ func (r *Reader) NextPart() (*Part, error) {
panic("unreachable") panic("unreachable")
} }
func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool { // isFinalBoundary returns whether line is the final boundary line
// indiciating that all parts are over.
// It matches `^--boundary--[ \t]*(\r\n)?$`
func (mr *Reader) isFinalBoundary(line []byte) bool {
if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
return false
}
rest := line[len(mr.dashBoundaryDash):]
rest = skipLWSPChar(rest)
return len(rest) == 0 || bytes.Equal(rest, mr.nl)
}
func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
// http://tools.ietf.org/html/rfc2046#section-5.1 // http://tools.ietf.org/html/rfc2046#section-5.1
// The boundary delimiter line is then defined as a line // The boundary delimiter line is then defined as a line
// consisting entirely of two hyphen characters ("-", // consisting entirely of two hyphen characters ("-",
@ -245,32 +268,52 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
if !bytes.HasPrefix(line, mr.dashBoundary) { if !bytes.HasPrefix(line, mr.dashBoundary) {
return false return false
} }
if bytes.HasSuffix(line, mr.nl) { rest := line[len(mr.dashBoundary):]
return onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-len(mr.nl)]) rest = skipLWSPChar(rest)
}
// Violate the spec and also support newlines without the // On the first part, see our lines are ending in \n instead of \r\n
// carriage return... // and switch into that mode if so. This is a violation of the spec,
if mr.partsRead == 0 && bytes.HasSuffix(line, lf) { // but occurs in practice.
if onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-1]) { if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
mr.nl = mr.nl[1:] mr.nl = mr.nl[1:]
mr.nlDashBoundary = mr.nlDashBoundary[1:] mr.nlDashBoundary = mr.nlDashBoundary[1:]
return true
} }
} return bytes.Equal(rest, mr.nl)
return false
} }
func onlyHorizontalWhitespace(s []byte) bool { // peekBufferIsEmptyPart returns whether the provided peek-ahead
for _, b := range s { // buffer represents an empty part. This is only called if we've not
if b != ' ' && b != '\t' { // already read any bytes in this part and checks for the case of MIME
// software not writing the \r\n on empty parts. Some does, some
// doesn't.
//
// This checks that what follows the "--boundary" is actually the end
// ("--boundary--" with optional whitespace) or optional whitespace
// and then a newline, so we don't catch "--boundaryFAKE", in which
// case the whole line is part of the data.
func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
// End of parts case.
// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
rest := peek[len(mr.dashBoundaryDash):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
}
if !bytes.HasPrefix(peek, mr.dashBoundary) {
return false return false
} }
} // Test whether rest matches `^[ \t]*\r\n`)
return true rest := peek[len(mr.dashBoundary):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl)
} }
func hasPrefixThenNewline(s, prefix []byte) bool { // skipLWSPChar returns b with leading spaces and tabs removed.
return bytes.HasPrefix(s, prefix) && // RFC 822 defines:
(len(s) == len(prefix)+1 && s[len(s)-1] == '\n' || // LWSP-char = SPACE / HTAB
len(s) == len(prefix)+2 && bytes.HasSuffix(s, crlf)) func skipLWSPChar(b []byte) []byte {
for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
b = b[1:]
}
return b
} }

View File

@ -10,20 +10,13 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"net/textproto"
"os" "os"
"reflect"
"strings" "strings"
"testing" "testing"
) )
func TestHorizontalWhitespace(t *testing.T) {
if !onlyHorizontalWhitespace([]byte(" \t")) {
t.Error("expected pass")
}
if onlyHorizontalWhitespace([]byte("foo bar")) {
t.Error("expected failure")
}
}
func TestBoundaryLine(t *testing.T) { func TestBoundaryLine(t *testing.T) {
mr := NewReader(strings.NewReader(""), "myBoundary") mr := NewReader(strings.NewReader(""), "myBoundary")
if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) { if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) {
@ -319,29 +312,6 @@ Oh no, premature EOF!
} }
} }
func TestZeroLengthBody(t *testing.T) {
testBody := strings.Replace(`
This is a multi-part message. This line is ignored.
--MyBoundary
foo: bar
--MyBoundary--
`, "\n", "\r\n", -1)
r := NewReader(strings.NewReader(testBody), "MyBoundary")
part, err := r.NextPart()
if err != nil {
t.Fatalf("didn't get a part")
}
n, err := io.Copy(ioutil.Discard, part)
if err != nil {
t.Errorf("error reading part: %v", err)
}
if n != 0 {
t.Errorf("read %d bytes; expected 0", n)
}
}
type slowReader struct { type slowReader struct {
r io.Reader r io.Reader
} }
@ -427,3 +397,214 @@ func TestNested(t *testing.T) {
t.Fatalf("final outer NextPart = %v; want io.EOF", err) t.Fatalf("final outer NextPart = %v; want io.EOF", err)
} }
} }
type headerBody struct {
header textproto.MIMEHeader
body string
}
func formData(key, value string) headerBody {
return headerBody{
textproto.MIMEHeader{
"Content-Type": {"text/plain; charset=ISO-8859-1"},
"Content-Disposition": {"form-data; name=" + key},
},
value,
}
}
type parseTest struct {
name string
in, sep string
want []headerBody
}
var parseTests = []parseTest{
// Actual body from App Engine on a blob upload. The final part (the
// Content-Type: message/external-body) is what App Engine replaces
// the uploaded file with. The other form fields (prefixed with
// "other" in their form-data name) are unchanged. A bug was
// reported with blob uploads failing when the other fields were
// empty. This was the MIME POST body that previously failed.
{
name: "App Engine post",
sep: "00151757727e9583fd04bfbca4c6",
in: "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--",
want: []headerBody{
formData("otherEmpty1", ""),
formData("otherFoo1", "foo"),
formData("otherFoo2", "foo"),
formData("otherEmpty2", ""),
formData("otherRepeatFoo", "foo"),
formData("otherRepeatFoo", "foo"),
formData("otherRepeatEmpty", ""),
formData("otherRepeatEmpty", ""),
formData("submit", "Submit"),
{textproto.MIMEHeader{
"Content-Type": {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"},
"Content-Disposition": {"form-data; name=file; filename=\"fall.png\""},
}, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"},
},
},
// Single empty part, ended with --boundary immediately after headers.
{
name: "single empty part, --boundary",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// Single empty part, ended with \r\n--boundary immediately after headers.
{
name: "single empty part, \r\n--boundary",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// Final part empty.
{
name: "final part empty",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
{textproto.MIMEHeader{"Foo2": {"bar2"}}, ""},
},
},
// Final part empty with newlines after final separator.
{
name: "final part empty then crlf",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// Final part empty with lwsp-chars after final separator.
{
name: "final part empty then lwsp",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc-- \t",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// No parts (empty form as submitted by Chrome)
{
name: "no parts",
sep: "----WebKitFormBoundaryQfEAfzFOiSemeHfA",
in: "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n",
want: []headerBody{},
},
// Part containing data starting with the boundary, but with additional suffix.
{
name: "fake separator as data",
sep: "sep",
in: "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"},
},
},
// Part containing a boundary with whitespace following it.
{
name: "boundary with whitespace",
sep: "sep",
in: "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, "text"},
},
},
// With ignored leading line.
{
name: "leading line",
sep: "MyBoundary",
in: strings.Replace(`This is a multi-part message. This line is ignored.
--MyBoundary
foo: bar
--MyBoundary--`, "\n", "\r\n", -1),
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
roundTripParseTest(),
}
func TestParse(t *testing.T) {
Cases:
for _, tt := range parseTests {
r := NewReader(strings.NewReader(tt.in), tt.sep)
got := []headerBody{}
for {
p, err := r.NextPart()
if err == io.EOF {
break
}
if err != nil {
t.Errorf("in test %q, NextPart: %v", tt.name, err)
continue Cases
}
pbody, err := ioutil.ReadAll(p)
if err != nil {
t.Errorf("in test %q, error reading part: %v", tt.name, err)
continue Cases
}
got = append(got, headerBody{p.Header, string(pbody)})
}
if !reflect.DeepEqual(tt.want, got) {
t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want)
if len(tt.want) != len(got) {
t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want))
} else if len(got) > 1 {
for pi, wantPart := range tt.want {
if !reflect.DeepEqual(wantPart, got[pi]) {
t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart)
}
}
}
}
}
}
func roundTripParseTest() parseTest {
t := parseTest{
name: "round trip",
want: []headerBody{
formData("empty", ""),
formData("lf", "\n"),
formData("cr", "\r"),
formData("crlf", "\r\n"),
formData("foo", "bar"),
},
}
var buf bytes.Buffer
w := NewWriter(&buf)
for _, p := range t.want {
pw, err := w.CreatePart(p.header)
if err != nil {
panic(err)
}
_, err = pw.Write([]byte(p.body))
if err != nil {
panic(err)
}
}
w.Close()
t.in = buf.String()
t.sep = w.Boundary()
return t
}