1
0
mirror of https://github.com/golang/go synced 2024-11-18 02:44:48 -07:00

mime/multipart: simplify Part.Read

The basic structure of Part.Read should be simple:
do what you can with the current buffered data,
reading more as you need it. Make it that way.

Working entirely in the bufio.Reader's buffer eliminates
the need for an additional bytes.Buffer.

This structure should be easier to extend in the future as
more special cases arise.

Change-Id: I83cb24a755a1767c4c037f9ece6716460c3ecd01
Reviewed-on: https://go-review.googlesource.com/32092
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Russ Cox 2016-10-25 23:15:30 -04:00
parent 14f3284ddb
commit ef3df18944

View File

@ -42,9 +42,7 @@ type Part struct {
// during Read calls. // during Read calls.
Header textproto.MIMEHeader Header textproto.MIMEHeader
buffer *bytes.Buffer mr *Reader
mr *Reader
bytesRead int
disposition string disposition string
dispositionParams map[string]string dispositionParams map[string]string
@ -53,6 +51,11 @@ type Part struct {
// wrapper around such a reader, decoding the // wrapper around such a reader, decoding the
// Content-Transfer-Encoding // Content-Transfer-Encoding
r io.Reader r io.Reader
n int // known data bytes waiting in mr.bufReader
total int64 // total data bytes read already
err error // error to return when n == 0
readErr error // read error observed from mr.bufReader
} }
// FormName returns the name parameter if p has a Content-Disposition // FormName returns the name parameter if p has a Content-Disposition
@ -126,7 +129,6 @@ func newPart(mr *Reader) (*Part, error) {
bp := &Part{ bp := &Part{
Header: make(map[string][]string), Header: make(map[string][]string),
mr: mr, mr: mr,
buffer: new(bytes.Buffer),
} }
if err := bp.populateHeaders(); err != nil { if err := bp.populateHeaders(); err != nil {
return nil, err return nil, err
@ -161,65 +163,118 @@ type partReader struct {
p *Part p *Part
} }
func (pr partReader) Read(d []byte) (n int, err error) { func (pr partReader) Read(d []byte) (int, error) {
p := pr.p p := pr.p
defer func() { br := p.mr.bufReader
p.bytesRead += n
}()
if p.buffer.Len() >= len(d) {
// Internal buffer of unconsumed data is large enough for
// the read request. No need to parse more at the moment.
return p.buffer.Read(d)
}
peek, err := p.mr.bufReader.Peek(peekBufferSize) // TODO(bradfitz): add buffer size accessor
// Look for an immediate empty part without a leading \r\n // Read into buffer until we identify some data to return,
// before the boundary separator. Some MIME code makes empty // or we find a reason to stop (boundary or read error).
// parts like this. Most browsers, however, write the \r\n for p.n == 0 && p.err == nil {
// before the subsequent boundary even for empty parts and peek, _ := br.Peek(br.Buffered())
// won't hit this path. p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr)
if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { if p.n == 0 && p.err == nil {
return 0, io.EOF // Force buffered I/O to read more into buffer.
} _, p.readErr = br.Peek(len(peek) + 1)
unexpectedEOF := err == io.EOF if p.readErr == io.EOF {
if err != nil && !unexpectedEOF { p.readErr = io.ErrUnexpectedEOF
return 0, fmt.Errorf("multipart: Part Read: %v", err) }
}
if peek == nil {
panic("nil peek buf")
}
// Search the peek buffer for "\r\n--boundary". If found,
// consume everything up to the boundary. If not, consume only
// as much of the peek buffer as cannot hold the boundary
// string.
nCopy := 0
foundBoundary := false
if idx, isEnd := p.mr.peekBufferSeparatorIndex(peek); idx != -1 {
nCopy = idx
foundBoundary = isEnd
if !isEnd && nCopy == 0 {
nCopy = 1 // make some progress.
}
} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
nCopy = safeCount
} else if unexpectedEOF {
// If we've run out of peek buffer and the boundary
// wasn't found (and can't possibly fit), we must have
// hit the end of the file unexpectedly.
return 0, io.ErrUnexpectedEOF
}
if nCopy > 0 {
if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil {
return 0, err
} }
} }
n, err = p.buffer.Read(d)
if err == io.EOF && !foundBoundary { // Read out from "data to return" part of buffer.
// If the boundary hasn't been reached there's more to if p.n == 0 {
// read, so don't pass through an EOF from the buffer return 0, p.err
err = nil
} }
return n := len(d)
if n > p.n {
n = p.n
}
n, _ = br.Read(d[:n])
p.total += int64(n)
p.n -= n
if p.n == 0 {
return n, p.err
}
return n, nil
}
// scanUntilBoundary scans buf to identify how much of it can be safely
// returned as part of the Part body.
// dashBoundary is "--boundary".
// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in.
// The comments below (and the name) assume "\n--boundary", but either is accepted.
// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized.
// readErr is the read error, if any, that followed reading the bytes in buf.
// scanUntilBoundary returns the number of data bytes from buf that can be
// returned as part of the Part body and also the error to return (if any)
// once those data bytes are done.
func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) {
if total == 0 {
// At beginning of body, allow dashBoundary.
if bytes.HasPrefix(buf, dashBoundary) {
switch matchAfterPrefix(buf, dashBoundary, readErr) {
case -1:
return len(dashBoundary), nil
case 0:
return 0, nil
case +1:
return 0, io.EOF
}
}
if bytes.HasPrefix(dashBoundary, buf) {
return 0, readErr
}
}
// Search for "\n--boundary".
if i := bytes.Index(buf, nlDashBoundary); i >= 0 {
switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) {
case -1:
return i + len(nlDashBoundary), nil
case 0:
return i, nil
case +1:
return i, io.EOF
}
}
if bytes.HasPrefix(nlDashBoundary, buf) {
return 0, readErr
}
// Otherwise, anything up to the final \n is not part of the boundary
// and so must be part of the body.
// Also if the section from the final \n onward is not a prefix of the boundary,
// it too must be part of the body.
i := bytes.LastIndexByte(buf, nlDashBoundary[0])
if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) {
return i, nil
}
return len(buf), readErr
}
// matchAfterPrefix checks whether buf should be considered to match the boundary.
// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary",
// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true.
//
// matchAfterPrefix returns +1 if the buffer does match the boundary,
// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input.
// It returns -1 if the buffer definitely does NOT match the boundary,
// meaning the prefix is followed by some other character.
// For example, "--foobar" does not match "--foo".
// It returns 0 more input needs to be read to make the decision,
// meaning that len(buf) == len(prefix) and readErr == nil.
func matchAfterPrefix(buf, prefix []byte, readErr error) int {
if len(buf) == len(prefix) {
if readErr != nil {
return +1
}
return 0
}
c := buf[len(prefix)]
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' {
return +1
}
return -1
} }
func (p *Part) Close() error { func (p *Part) Close() error {
@ -337,64 +392,6 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
return bytes.Equal(rest, mr.nl) return bytes.Equal(rest, mr.nl)
} }
// peekBufferIsEmptyPart reports whether the provided peek-ahead
// buffer represents an empty part. It is called only if we've not
// already read any bytes in this part and checks for the case of MIME
// software not writing the \r\n on empty parts. Some does, some
// doesn't.
//
// This checks that what follows the "--boundary" is actually the end
// ("--boundary--" with optional whitespace) or optional whitespace
// and then a newline, so we don't catch "--boundaryFAKE", in which
// case the whole line is part of the data.
func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
// End of parts case.
// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
rest := peek[len(mr.dashBoundaryDash):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
}
if !bytes.HasPrefix(peek, mr.dashBoundary) {
return false
}
// Test whether rest matches `^[ \t]*\r\n`)
rest := peek[len(mr.dashBoundary):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl)
}
// peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in
// peek and whether it is a real boundary (and not a prefix of an
// unrelated separator). To be the end, the peek buffer must contain a
// newline after the boundary or contain the ending boundary (--separator--).
func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) {
idx = bytes.Index(peek, mr.nlDashBoundary)
if idx == -1 {
return
}
peek = peek[idx+len(mr.nlDashBoundary):]
if len(peek) == 0 || len(peek) == 1 && peek[0] == '-' {
return idx, false
}
if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' {
return idx, true
}
peek = skipLWSPChar(peek)
// Don't have a complete line after the peek.
if bytes.IndexByte(peek, '\n') == -1 {
return idx, false
}
if len(peek) > 0 && peek[0] == '\n' {
return idx, true
}
if len(peek) > 1 && peek[0] == '\r' && peek[1] == '\n' {
return idx, true
}
return idx, false
}
// skipLWSPChar returns b with leading spaces and tabs removed. // skipLWSPChar returns b with leading spaces and tabs removed.
// RFC 822 defines: // RFC 822 defines:
// LWSP-char = SPACE / HTAB // LWSP-char = SPACE / HTAB