mirror of
https://github.com/golang/go
synced 2024-11-22 01:54:42 -07:00
mime: RFC 2231 continuation / non-ASCII support
Fixes #1119. R=rsc, r CC=golang-dev https://golang.org/cl/4437052
This commit is contained in:
parent
23fc9c84bd
commit
98176b7760
@ -6,6 +6,8 @@ package mime
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
@ -46,11 +48,16 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
|
|||||||
|
|
||||||
params = make(map[string]string)
|
params = make(map[string]string)
|
||||||
|
|
||||||
|
// Map of base parameter name -> parameter name -> value
|
||||||
|
// for parameters containing a '*' character.
|
||||||
|
// Lazily initialized.
|
||||||
|
var continuation map[string]map[string]string
|
||||||
|
|
||||||
v = v[i:]
|
v = v[i:]
|
||||||
for len(v) > 0 {
|
for len(v) > 0 {
|
||||||
v = strings.TrimLeftFunc(v, unicode.IsSpace)
|
v = strings.TrimLeftFunc(v, unicode.IsSpace)
|
||||||
if len(v) == 0 {
|
if len(v) == 0 {
|
||||||
return
|
break
|
||||||
}
|
}
|
||||||
key, value, rest := consumeMediaParam(v)
|
key, value, rest := consumeMediaParam(v)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
@ -62,12 +69,83 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
|
|||||||
// Parse error.
|
// Parse error.
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
params[key] = value
|
|
||||||
|
pmap := params
|
||||||
|
if idx := strings.Index(key, "*"); idx != -1 {
|
||||||
|
baseName := key[:idx]
|
||||||
|
if continuation == nil {
|
||||||
|
continuation = make(map[string]map[string]string)
|
||||||
|
}
|
||||||
|
var ok bool
|
||||||
|
if pmap, ok = continuation[baseName]; !ok {
|
||||||
|
continuation[baseName] = make(map[string]string)
|
||||||
|
pmap = continuation[baseName]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, exists := pmap[key]; exists {
|
||||||
|
// Duplicate parameter name is bogus.
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
pmap[key] = value
|
||||||
v = rest
|
v = rest
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stitch together any continuations or things with stars
|
||||||
|
// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
|
||||||
|
var buf bytes.Buffer
|
||||||
|
for key, pieceMap := range continuation {
|
||||||
|
singlePartKey := key + "*"
|
||||||
|
if v, ok := pieceMap[singlePartKey]; ok {
|
||||||
|
decv := decode2231Enc(v)
|
||||||
|
params[key] = decv
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.Reset()
|
||||||
|
valid := false
|
||||||
|
for n := 0; ; n++ {
|
||||||
|
simplePart := fmt.Sprintf("%s*%d", key, n)
|
||||||
|
if v, ok := pieceMap[simplePart]; ok {
|
||||||
|
valid = true
|
||||||
|
buf.WriteString(v)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
encodedPart := simplePart + "*"
|
||||||
|
if v, ok := pieceMap[encodedPart]; ok {
|
||||||
|
valid = true
|
||||||
|
if n == 0 {
|
||||||
|
buf.WriteString(decode2231Enc(v))
|
||||||
|
} else {
|
||||||
|
decv, _ := percentHexUnescape(v)
|
||||||
|
buf.WriteString(decv)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if valid {
|
||||||
|
params[key] = buf.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func decode2231Enc(v string) string {
|
||||||
|
sv := strings.Split(v, "'", 3)
|
||||||
|
if len(sv) != 3 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// Ignoring lang in sv[1] for now.
|
||||||
|
charset := strings.ToLower(sv[0])
|
||||||
|
if charset != "us-ascii" && charset != "utf-8" {
|
||||||
|
// TODO: unsupported encoding
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
encv, _ := percentHexUnescape(sv[2])
|
||||||
|
return encv
|
||||||
|
}
|
||||||
|
|
||||||
func isNotTokenChar(rune int) bool {
|
func isNotTokenChar(rune int) bool {
|
||||||
return !IsTokenChar(rune)
|
return !IsTokenChar(rune)
|
||||||
}
|
}
|
||||||
@ -107,17 +185,14 @@ func consumeValue(v string) (value, rest string) {
|
|||||||
for idx, rune = range rest {
|
for idx, rune = range rest {
|
||||||
switch {
|
switch {
|
||||||
case nextIsLiteral:
|
case nextIsLiteral:
|
||||||
if rune >= 0x80 {
|
|
||||||
return "", v
|
|
||||||
}
|
|
||||||
buffer.WriteRune(rune)
|
buffer.WriteRune(rune)
|
||||||
nextIsLiteral = false
|
nextIsLiteral = false
|
||||||
case rune == leadQuote:
|
case rune == leadQuote:
|
||||||
return buffer.String(), rest[idx+1:]
|
return buffer.String(), rest[idx+1:]
|
||||||
case IsQText(rune):
|
|
||||||
buffer.WriteRune(rune)
|
|
||||||
case rune == '\\':
|
case rune == '\\':
|
||||||
nextIsLiteral = true
|
nextIsLiteral = true
|
||||||
|
case rune != '\r' && rune != '\n':
|
||||||
|
buffer.WriteRune(rune)
|
||||||
default:
|
default:
|
||||||
return "", v
|
return "", v
|
||||||
}
|
}
|
||||||
@ -137,6 +212,7 @@ func consumeMediaParam(v string) (param, value, rest string) {
|
|||||||
if param == "" {
|
if param == "" {
|
||||||
return "", "", v
|
return "", "", v
|
||||||
}
|
}
|
||||||
|
|
||||||
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
|
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
|
||||||
if !strings.HasPrefix(rest, "=") {
|
if !strings.HasPrefix(rest, "=") {
|
||||||
return "", "", v
|
return "", "", v
|
||||||
@ -149,3 +225,66 @@ func consumeMediaParam(v string) (param, value, rest string) {
|
|||||||
}
|
}
|
||||||
return param, value, rest
|
return param, value, rest
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func percentHexUnescape(s string) (string, os.Error) {
|
||||||
|
// Count %, check that they're well-formed.
|
||||||
|
percents := 0
|
||||||
|
for i := 0; i < len(s); {
|
||||||
|
if s[i] != '%' {
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
percents++
|
||||||
|
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
||||||
|
s = s[i:]
|
||||||
|
if len(s) > 3 {
|
||||||
|
s = s[0:3]
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("Bogus characters after %: %q", s)
|
||||||
|
}
|
||||||
|
i += 3
|
||||||
|
}
|
||||||
|
if percents == 0 {
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
t := make([]byte, len(s)-2*percents)
|
||||||
|
j := 0
|
||||||
|
for i := 0; i < len(s); {
|
||||||
|
switch s[i] {
|
||||||
|
case '%':
|
||||||
|
t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
|
||||||
|
j++
|
||||||
|
i += 3
|
||||||
|
default:
|
||||||
|
t[j] = s[i]
|
||||||
|
j++
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string(t), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ishex(c byte) bool {
|
||||||
|
switch {
|
||||||
|
case '0' <= c && c <= '9':
|
||||||
|
return true
|
||||||
|
case 'a' <= c && c <= 'f':
|
||||||
|
return true
|
||||||
|
case 'A' <= c && c <= 'F':
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func unhex(c byte) byte {
|
||||||
|
switch {
|
||||||
|
case '0' <= c && c <= '9':
|
||||||
|
return c - '0'
|
||||||
|
case 'a' <= c && c <= 'f':
|
||||||
|
return c - 'a' + 10
|
||||||
|
case 'A' <= c && c <= 'F':
|
||||||
|
return c - 'A' + 10
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
@ -114,6 +114,28 @@ func TestParseMediaType(t *testing.T) {
|
|||||||
"form-data",
|
"form-data",
|
||||||
m("key", "value", "blah", "value", "name", "foo")},
|
m("key", "value", "blah", "value", "name", "foo")},
|
||||||
|
|
||||||
|
{`foo; key=val1; key=the-key-appears-again-which-is-bogus`,
|
||||||
|
"", m()},
|
||||||
|
|
||||||
|
// From RFC 2231:
|
||||||
|
{`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`,
|
||||||
|
"application/x-stuff",
|
||||||
|
m("title", "This is ***fun***")},
|
||||||
|
|
||||||
|
{`message/external-body; access-type=URL; ` +
|
||||||
|
`URL*0="ftp://";` +
|
||||||
|
`URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`,
|
||||||
|
"message/external-body",
|
||||||
|
m("access-type", "URL",
|
||||||
|
"URL", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar")},
|
||||||
|
|
||||||
|
{`application/x-stuff; ` +
|
||||||
|
`title*0*=us-ascii'en'This%20is%20even%20more%20; ` +
|
||||||
|
`title*1*=%2A%2A%2Afun%2A%2A%2A%20; ` +
|
||||||
|
`title*2="isn't it!"`,
|
||||||
|
"application/x-stuff",
|
||||||
|
m("title", "This is even more ***fun*** isn't it!")},
|
||||||
|
|
||||||
// Tests from http://greenbytes.de/tech/tc2231/
|
// Tests from http://greenbytes.de/tech/tc2231/
|
||||||
// TODO(bradfitz): add the rest of the tests from that site.
|
// TODO(bradfitz): add the rest of the tests from that site.
|
||||||
{`attachment; filename="f\oo.html"`,
|
{`attachment; filename="f\oo.html"`,
|
||||||
@ -159,8 +181,41 @@ func TestParseMediaType(t *testing.T) {
|
|||||||
"attachment",
|
"attachment",
|
||||||
m("creation-date", "Wed, 12 Feb 1997 16:29:51 -0500")},
|
m("creation-date", "Wed, 12 Feb 1997 16:29:51 -0500")},
|
||||||
{`foobar`, "foobar", m()},
|
{`foobar`, "foobar", m()},
|
||||||
// TODO(bradfitz): rest of them, including RFC2231 encoded UTF-8 and
|
{`attachment; filename* =UTF-8''foo-%c3%a4.html`,
|
||||||
// other charsets.
|
"attachment",
|
||||||
|
m("filename", "foo-ä.html")},
|
||||||
|
{`attachment; filename*=UTF-8''A-%2541.html`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "A-%41.html")},
|
||||||
|
{`attachment; filename*0="foo."; filename*1="html"`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foo.html")},
|
||||||
|
{`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foo-ä.html")},
|
||||||
|
{`attachment; filename*0="foo"; filename*01="bar"`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foo")},
|
||||||
|
{`attachment; filename*0="foo"; filename*2="bar"`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foo")},
|
||||||
|
{`attachment; filename*1="foo"; filename*2="bar"`,
|
||||||
|
"attachment", m()},
|
||||||
|
{`attachment; filename*1="bar"; filename*0="foo"`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foobar")},
|
||||||
|
{`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foo-ä.html")},
|
||||||
|
{`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`,
|
||||||
|
"attachment",
|
||||||
|
m("filename", "foo-ä.html")},
|
||||||
|
|
||||||
|
// Browsers also just send UTF-8 directly without RFC 2231,
|
||||||
|
// at least when the source page is served with UTF-8.
|
||||||
|
{`form-data; firstname="Брэд"; lastname="Фицпатрик"`,
|
||||||
|
"form-data",
|
||||||
|
m("firstname", "Брэд", "lastname", "Фицпатрик")},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
mt, params := ParseMediaType(test.in)
|
mt, params := ParseMediaType(test.in)
|
||||||
|
Loading…
Reference in New Issue
Block a user