mime/multipart: fix handling of empty parts without CRLF before next part

Empty parts can be either of the form: a) "--separator\r\n", header (w/ trailing 2xCRLF), \r\n "--separator"... or b) "--separator\r\n", header (w/ trailing 2xCRLF), "--separator"... We never handled case b). In fact the RFC seems kinda vague about it, but browsers seem to do a), and App Engine's synthetic POST bodies after blob uploads is of form b). So handle them both, and add a bunch of tests. (I can't promise these are the last fixes to multipart, especially considering its history, but I'm growing increasingly confident at least, and I've never submitted a multipart CL with known bugs outstanding, including this time.) R=golang-dev, adg CC=golang-dev https://golang.org/cl/6212046
2024-11-22 16:25:07 -07:00 · 2012-05-14 18:16:47 -07:00 · 2012-05-14 18:16:47 -07:00 · e393a8292e
commit e393a8292e
parent a90cbd741a
2 changed files with 292 additions and 68 deletions
--- a/src/pkg/mime/multipart/multipart.go
+++ b/src/pkg/mime/multipart/multipart.go
@ -22,11 +22,6 @@ import (
 	"net/textproto"
 )
 // TODO(bradfitz): inline these once the compiler can inline them in
 // read-only situation (such as bytes.HasSuffix)
 var lf = []byte("\n")
 var crlf = []byte("\r\n")
 var emptyParams = make(map[string]string)
 // A Part represents a single part in a multipart body.
@ -38,6 +33,7 @@ type Part struct {
 	buffer    *bytes.Buffer
 	mr        *Reader
 	bytesRead int
 	disposition       string
 	dispositionParams map[string]string
@ -113,14 +109,26 @@ func (bp *Part) populateHeaders() error {
 // Read reads the body of a part, after its headers and before the
 // next part (if any) begins.
 func (p *Part) Read(d []byte) (n int, err error) {
 	defer func() {
 		p.bytesRead += n
 	}()
 	if p.buffer.Len() >= len(d) {
 		// Internal buffer of unconsumed data is large enough for
 		// the read request.  No need to parse more at the moment.
 		return p.buffer.Read(d)
 	}
 	peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
-	unexpectedEof := err == io.EOF
+
-	if err != nil && !unexpectedEof {
+	// Look for an immediate empty part without a leading \r\n
 	// before the boundary separator.  Some MIME code makes empty
 	// parts like this. Most browsers, however, write the \r\n
 	// before the subsequent boundary even for empty parts and
 	// won't hit this path.
 	if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
 		return 0, io.EOF
 	}
 	unexpectedEOF := err == io.EOF
 	if err != nil && !unexpectedEOF {
 		return 0, fmt.Errorf("multipart: Part Read: %v", err)
 	}
 	if peek == nil {
@ -138,7 +146,7 @@ func (p *Part) Read(d []byte) (n int, err error) {
 		foundBoundary = true
 	} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
 		nCopy = safeCount
-	} else if unexpectedEof {
+	} else if unexpectedEOF {
 		// If we've run out of peek buffer and the boundary
 		// wasn't found (and can't possibly fit), we must have
 		// hit the end of the file unexpectedly.
@ -172,7 +180,10 @@ type Reader struct {
 	currentPart *Part
 	partsRead   int
-	nl, nlDashBoundary, dashBoundaryDash, dashBoundary []byte
+	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
 	nlDashBoundary   []byte // nl + "--boundary"
 	dashBoundaryDash []byte // "--boundary--"
 	dashBoundary     []byte // "--boundary"
 }
 // NextPart returns the next part in the multipart or an error.
@ -185,7 +196,7 @@ func (r *Reader) NextPart() (*Part, error) {
 	expectNewPart := false
 	for {
 		line, err := r.bufReader.ReadSlice('\n')
-		if err == io.EOF && bytes.Equal(line, r.dashBoundaryDash) {
+		if err == io.EOF && r.isFinalBoundary(line) {
 			// If the buffer ends in "--boundary--" without the
 			// trailing "\r\n", ReadSlice will return an error
 			// (since it's missing the '\n'), but this is a valid
@ -207,7 +218,7 @@ func (r *Reader) NextPart() (*Part, error) {
 			return bp, nil
 		}
-		if hasPrefixThenNewline(line, r.dashBoundaryDash) {
+		if r.isFinalBoundary(line) {
 			// Expected EOF
 			return nil, io.EOF
 		}
@ -235,7 +246,19 @@ func (r *Reader) NextPart() (*Part, error) {
 	panic("unreachable")
 }
-func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
+// isFinalBoundary returns whether line is the final boundary line
 // indiciating that all parts are over.
 // It matches `^--boundary--[ \t]*(\r\n)?$`
 func (mr *Reader) isFinalBoundary(line []byte) bool {
 	if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
 		return false
 	}
 	rest := line[len(mr.dashBoundaryDash):]
 	rest = skipLWSPChar(rest)
 	return len(rest) == 0 || bytes.Equal(rest, mr.nl)
 }
 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
 	// http://tools.ietf.org/html/rfc2046#section-5.1
 	//   The boundary delimiter line is then defined as a line
 	//   consisting entirely of two hyphen characters ("-",
@ -245,32 +268,52 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
 	if !bytes.HasPrefix(line, mr.dashBoundary) {
 		return false
 	}
-	if bytes.HasSuffix(line, mr.nl) {
+	rest := line[len(mr.dashBoundary):]
-		return onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-len(mr.nl)])
+	rest = skipLWSPChar(rest)
-	}
+
-	// Violate the spec and also support newlines without the
+	// On the first part, see our lines are ending in \n instead of \r\n
-	// carriage return...
+	// and switch into that mode if so.  This is a violation of the spec,
-	if mr.partsRead == 0 && bytes.HasSuffix(line, lf) {
+	// but occurs in practice.
-		if onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-1]) {
+	if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
 		mr.nl = mr.nl[1:]
 		mr.nlDashBoundary = mr.nlDashBoundary[1:]
 			return true
 	}
-	}
+	return bytes.Equal(rest, mr.nl)
 	return false
 }
-func onlyHorizontalWhitespace(s []byte) bool {
+// peekBufferIsEmptyPart returns whether the provided peek-ahead
-	for _, b := range s {
+// buffer represents an empty part.  This is only called if we've not
-		if b != ' ' && b != '\t' {
+// already read any bytes in this part and checks for the case of MIME
 // software not writing the \r\n on empty parts. Some does, some
 // doesn't.
 //
 // This checks that what follows the "--boundary" is actually the end
 // ("--boundary--" with optional whitespace) or optional whitespace
 // and then a newline, so we don't catch "--boundaryFAKE", in which
 // case the whole line is part of the data.
 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
 	// End of parts case.
 	// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
 	if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
 		rest := peek[len(mr.dashBoundaryDash):]
 		rest = skipLWSPChar(rest)
 		return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
 	}
 	if !bytes.HasPrefix(peek, mr.dashBoundary) {
 		return false
 	}
-	}
+	// Test whether rest matches `^[ \t]*\r\n`)
-	return true
+	rest := peek[len(mr.dashBoundary):]
 	rest = skipLWSPChar(rest)
 	return bytes.HasPrefix(rest, mr.nl)
 }
-func hasPrefixThenNewline(s, prefix []byte) bool {
+// skipLWSPChar returns b with leading spaces and tabs removed.
-	return bytes.HasPrefix(s, prefix) &&
+// RFC 822 defines:
-		(len(s) == len(prefix)+1 && s[len(s)-1] == '\n' ||
+//    LWSP-char = SPACE / HTAB
-			len(s) == len(prefix)+2 && bytes.HasSuffix(s, crlf))
+func skipLWSPChar(b []byte) []byte {
 	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
 		b = b[1:]
 	}
 	return b
 }
--- a/src/pkg/mime/multipart/multipart_test.go
+++ b/src/pkg/mime/multipart/multipart_test.go
@ -10,20 +10,13 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/textproto"
 	"os"
 	"reflect"
 	"strings"
 	"testing"
 )
 func TestHorizontalWhitespace(t *testing.T) {
 	if !onlyHorizontalWhitespace([]byte(" \t")) {
 		t.Error("expected pass")
 	}
 	if onlyHorizontalWhitespace([]byte("foo bar")) {
 		t.Error("expected failure")
 	}
 }
 func TestBoundaryLine(t *testing.T) {
 	mr := NewReader(strings.NewReader(""), "myBoundary")
 	if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) {
@ -319,29 +312,6 @@ Oh no, premature EOF!
 	}
 }
 func TestZeroLengthBody(t *testing.T) {
 	testBody := strings.Replace(`
 This is a multi-part message.  This line is ignored.
 --MyBoundary
 foo: bar
 --MyBoundary--
 `, "\n", "\r\n", -1)
 	r := NewReader(strings.NewReader(testBody), "MyBoundary")
 	part, err := r.NextPart()
 	if err != nil {
 		t.Fatalf("didn't get a part")
 	}
 	n, err := io.Copy(ioutil.Discard, part)
 	if err != nil {
 		t.Errorf("error reading part: %v", err)
 	}
 	if n != 0 {
 		t.Errorf("read %d bytes; expected 0", n)
 	}
 }
 type slowReader struct {
 	r io.Reader
 }
@ -427,3 +397,214 @@ func TestNested(t *testing.T) {
 		t.Fatalf("final outer NextPart = %v; want io.EOF", err)
 	}
 }
 type headerBody struct {
 	header textproto.MIMEHeader
 	body   string
 }
 func formData(key, value string) headerBody {
 	return headerBody{
 		textproto.MIMEHeader{
 			"Content-Type":        {"text/plain; charset=ISO-8859-1"},
 			"Content-Disposition": {"form-data; name=" + key},
 		},
 		value,
 	}
 }
 type parseTest struct {
 	name    string
 	in, sep string
 	want    []headerBody
 }
 var parseTests = []parseTest{
 	// Actual body from App Engine on a blob upload. The final part (the
 	// Content-Type: message/external-body) is what App Engine replaces
 	// the uploaded file with.  The other form fields (prefixed with
 	// "other" in their form-data name) are unchanged.  A bug was
 	// reported with blob uploads failing when the other fields were
 	// empty. This was the MIME POST body that previously failed.
 	{
 		name: "App Engine post",
 		sep:  "00151757727e9583fd04bfbca4c6",
 		in:   "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--",
 		want: []headerBody{
 			formData("otherEmpty1", ""),
 			formData("otherFoo1", "foo"),
 			formData("otherFoo2", "foo"),
 			formData("otherEmpty2", ""),
 			formData("otherRepeatFoo", "foo"),
 			formData("otherRepeatFoo", "foo"),
 			formData("otherRepeatEmpty", ""),
 			formData("otherRepeatEmpty", ""),
 			formData("submit", "Submit"),
 			{textproto.MIMEHeader{
 				"Content-Type":        {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"},
 				"Content-Disposition": {"form-data; name=file; filename=\"fall.png\""},
 			}, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"},
 		},
 	},
 	// Single empty part, ended with --boundary immediately after headers.
 	{
 		name: "single empty part, --boundary",
 		sep:  "abc",
 		in:   "--abc\r\nFoo: bar\r\n\r\n--abc--",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
 		},
 	},
 	// Single empty part, ended with \r\n--boundary immediately after headers.
 	{
 		name: "single empty part, \r\n--boundary",
 		sep:  "abc",
 		in:   "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
 		},
 	},
 	// Final part empty.
 	{
 		name: "final part empty",
 		sep:  "abc",
 		in:   "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
 			{textproto.MIMEHeader{"Foo2": {"bar2"}}, ""},
 		},
 	},
 	// Final part empty with newlines after final separator.
 	{
 		name: "final part empty then crlf",
 		sep:  "abc",
 		in:   "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
 		},
 	},
 	// Final part empty with lwsp-chars after final separator.
 	{
 		name: "final part empty then lwsp",
 		sep:  "abc",
 		in:   "--abc\r\nFoo: bar\r\n\r\n--abc-- \t",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
 		},
 	},
 	// No parts (empty form as submitted by Chrome)
 	{
 		name: "no parts",
 		sep:  "----WebKitFormBoundaryQfEAfzFOiSemeHfA",
 		in:   "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n",
 		want: []headerBody{},
 	},
 	// Part containing data starting with the boundary, but with additional suffix.
 	{
 		name: "fake separator as data",
 		sep:  "sep",
 		in:   "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"},
 		},
 	},
 	// Part containing a boundary with whitespace following it.
 	{
 		name: "boundary with whitespace",
 		sep:  "sep",
 		in:   "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--",
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, "text"},
 		},
 	},
 	// With ignored leading line.
 	{
 		name: "leading line",
 		sep:  "MyBoundary",
 		in: strings.Replace(`This is a multi-part message.  This line is ignored.
 --MyBoundary
 foo: bar
 --MyBoundary--`, "\n", "\r\n", -1),
 		want: []headerBody{
 			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
 		},
 	},
 	roundTripParseTest(),
 }
 func TestParse(t *testing.T) {
 Cases:
 	for _, tt := range parseTests {
 		r := NewReader(strings.NewReader(tt.in), tt.sep)
 		got := []headerBody{}
 		for {
 			p, err := r.NextPart()
 			if err == io.EOF {
 				break
 			}
 			if err != nil {
 				t.Errorf("in test %q, NextPart: %v", tt.name, err)
 				continue Cases
 			}
 			pbody, err := ioutil.ReadAll(p)
 			if err != nil {
 				t.Errorf("in test %q, error reading part: %v", tt.name, err)
 				continue Cases
 			}
 			got = append(got, headerBody{p.Header, string(pbody)})
 		}
 		if !reflect.DeepEqual(tt.want, got) {
 			t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want)
 			if len(tt.want) != len(got) {
 				t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want))
 			} else if len(got) > 1 {
 				for pi, wantPart := range tt.want {
 					if !reflect.DeepEqual(wantPart, got[pi]) {
 						t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart)
 					}
 				}
 			}
 		}
 	}
 }
 func roundTripParseTest() parseTest {
 	t := parseTest{
 		name: "round trip",
 		want: []headerBody{
 			formData("empty", ""),
 			formData("lf", "\n"),
 			formData("cr", "\r"),
 			formData("crlf", "\r\n"),
 			formData("foo", "bar"),
 		},
 	}
 	var buf bytes.Buffer
 	w := NewWriter(&buf)
 	for _, p := range t.want {
 		pw, err := w.CreatePart(p.header)
 		if err != nil {
 			panic(err)
 		}
 		_, err = pw.Write([]byte(p.body))
 		if err != nil {
 			panic(err)
 		}
 	}
 	w.Close()
 	t.in = buf.String()
 	t.sep = w.Boundary()
 	return t
 }