1
0
mirror of https://github.com/golang/go synced 2024-11-23 13:00:07 -07:00

mime: Export RFC 2047 code

Fixes #4943
Fixes #4687
Fixes #7079

Change-Id: Ia96f07d650a3af935cd75fd7e3253f4af2977429
Reviewed-on: https://go-review.googlesource.com/7890
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Alexandre Cesaro 2015-03-20 10:22:55 +01:00 committed by Brad Fitzpatrick
parent e3a9a08a0b
commit 2b03610842
5 changed files with 638 additions and 166 deletions

View File

@ -184,42 +184,43 @@ var pkgDeps = map[string][]string{
},
// One of a kind.
"archive/tar": {"L4", "OS", "syscall"},
"archive/zip": {"L4", "OS", "compress/flate"},
"compress/bzip2": {"L4"},
"compress/flate": {"L4"},
"compress/gzip": {"L4", "compress/flate"},
"compress/lzw": {"L4"},
"compress/zlib": {"L4", "compress/flate"},
"database/sql": {"L4", "container/list", "database/sql/driver"},
"database/sql/driver": {"L4", "time"},
"debug/dwarf": {"L4"},
"debug/elf": {"L4", "OS", "debug/dwarf"},
"debug/gosym": {"L4"},
"debug/macho": {"L4", "OS", "debug/dwarf"},
"debug/pe": {"L4", "OS", "debug/dwarf"},
"encoding": {"L4"},
"encoding/ascii85": {"L4"},
"encoding/asn1": {"L4", "math/big"},
"encoding/csv": {"L4"},
"encoding/gob": {"L4", "OS", "encoding"},
"encoding/hex": {"L4"},
"encoding/json": {"L4", "encoding"},
"encoding/pem": {"L4"},
"encoding/xml": {"L4", "encoding"},
"flag": {"L4", "OS"},
"go/build": {"L4", "OS", "GOPARSER"},
"html": {"L4"},
"image/draw": {"L4", "image/internal/imageutil"},
"image/gif": {"L4", "compress/lzw", "image/color/palette", "image/draw"},
"image/jpeg": {"L4", "image/internal/imageutil"},
"image/png": {"L4", "compress/zlib"},
"index/suffixarray": {"L4", "regexp"},
"math/big": {"L4"},
"mime": {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
"net/url": {"L4"},
"text/scanner": {"L4", "OS"},
"text/template/parse": {"L4"},
"archive/tar": {"L4", "OS", "syscall"},
"archive/zip": {"L4", "OS", "compress/flate"},
"compress/bzip2": {"L4"},
"compress/flate": {"L4"},
"compress/gzip": {"L4", "compress/flate"},
"compress/lzw": {"L4"},
"compress/zlib": {"L4", "compress/flate"},
"database/sql": {"L4", "container/list", "database/sql/driver"},
"database/sql/driver": {"L4", "time"},
"debug/dwarf": {"L4"},
"debug/elf": {"L4", "OS", "debug/dwarf"},
"debug/gosym": {"L4"},
"debug/macho": {"L4", "OS", "debug/dwarf"},
"debug/pe": {"L4", "OS", "debug/dwarf"},
"encoding": {"L4"},
"encoding/ascii85": {"L4"},
"encoding/asn1": {"L4", "math/big"},
"encoding/csv": {"L4"},
"encoding/gob": {"L4", "OS", "encoding"},
"encoding/hex": {"L4"},
"encoding/json": {"L4", "encoding"},
"encoding/pem": {"L4"},
"encoding/xml": {"L4", "encoding"},
"flag": {"L4", "OS"},
"go/build": {"L4", "OS", "GOPARSER"},
"html": {"L4"},
"image/draw": {"L4", "image/internal/imageutil"},
"image/gif": {"L4", "compress/lzw", "image/color/palette", "image/draw"},
"image/jpeg": {"L4", "image/internal/imageutil"},
"image/png": {"L4", "compress/zlib"},
"index/suffixarray": {"L4", "regexp"},
"math/big": {"L4"},
"mime": {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
"mime/quotedprintable": {"L4"},
"net/url": {"L4"},
"text/scanner": {"L4", "OS"},
"text/template/parse": {"L4"},
"html/template": {
"L4", "OS", "encoding/json", "html", "text/template",
@ -255,7 +256,7 @@ var pkgDeps = map[string][]string{
// Uses of networking.
"log/syslog": {"L4", "OS", "net"},
"net/mail": {"L4", "NET", "OS", "internal/mime"},
"net/mail": {"L4", "NET", "OS", "mime"},
"net/textproto": {"L4", "OS", "net"},
// Core crypto.
@ -347,13 +348,11 @@ var pkgDeps = map[string][]string{
"go/types": {"bytes", "container/heap", "fmt", "go/ast", "go/constants", "go/parser", "go/token", "io", "math", "path", "sort", "strconv", "strings", "sync", "unicode"},
"image/internal/imageutil": {"image"},
"internal/format": {"bytes", "go/ast", "go/parser", "go/printer", "go/token", "strings"},
"internal/mime": {"bytes", "encoding/base64", "errors", "fmt", "io", "io/ioutil", "strconv", "strings", "unicode"},
"internal/singleflight": {"sync"},
"internal/syscall/unix": {"runtime", "sync/atomic", "syscall", "unsafe"},
"internal/syscall/windows": {"syscall", "unsafe"},
"internal/syscall/windows/registry": {"errors", "io", "syscall", "unicode/utf16", "unsafe"},
"internal/trace": {"bufio", "bytes", "fmt", "io", "os", "os/exec", "sort", "strconv", "strings"},
"mime/quotedprintable": {"bufio", "bytes", "fmt", "io"},
"net/http/cookiejar": {"errors", "fmt", "net", "net/http", "net/url", "sort", "strings", "sync", "time", "unicode/utf8"},
"net/http/internal": {"bufio", "bytes", "errors", "fmt", "io"},
"net/internal/socktest": {"fmt", "sync", "syscall"},

View File

@ -1,122 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mime
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
"io/ioutil"
"strconv"
"strings"
"unicode"
)
// EncodeWord encodes a string into an RFC 2047 encoded-word.
func EncodeWord(s string) string {
// UTF-8 "Q" encoding
b := bytes.NewBufferString("=?utf-8?q?")
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == ' ':
b.WriteByte('_')
case isVchar(c) && c != '=' && c != '?' && c != '_':
b.WriteByte(c)
default:
fmt.Fprintf(b, "=%02X", c)
}
}
b.WriteString("?=")
return b.String()
}
// DecodeWord decodes an RFC 2047 encoded-word.
func DecodeWord(s string) (string, error) {
fields := strings.Split(s, "?")
if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" {
return "", errors.New("address not RFC 2047 encoded")
}
charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2])
if charset != "us-ascii" && charset != "iso-8859-1" && charset != "utf-8" {
return "", fmt.Errorf("charset not supported: %q", charset)
}
in := bytes.NewBufferString(fields[3])
var r io.Reader
switch enc {
case "b":
r = base64.NewDecoder(base64.StdEncoding, in)
case "q":
r = qDecoder{r: in}
default:
return "", fmt.Errorf("RFC 2047 encoding not supported: %q", enc)
}
dec, err := ioutil.ReadAll(r)
if err != nil {
return "", err
}
switch charset {
case "us-ascii":
b := new(bytes.Buffer)
for _, c := range dec {
if c >= 0x80 {
b.WriteRune(unicode.ReplacementChar)
} else {
b.WriteRune(rune(c))
}
}
return b.String(), nil
case "iso-8859-1":
b := new(bytes.Buffer)
for _, c := range dec {
b.WriteRune(rune(c))
}
return b.String(), nil
case "utf-8":
return string(dec), nil
}
panic("unreachable")
}
type qDecoder struct {
r io.Reader
scratch [2]byte
}
func (qd qDecoder) Read(p []byte) (n int, err error) {
// This method writes at most one byte into p.
if len(p) == 0 {
return 0, nil
}
if _, err := qd.r.Read(qd.scratch[:1]); err != nil {
return 0, err
}
switch c := qd.scratch[0]; {
case c == '=':
if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil {
return 0, err
}
x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64)
if err != nil {
return 0, fmt.Errorf("mime: invalid RFC 2047 encoding: %q", qd.scratch[:2])
}
p[0] = byte(x)
case c == '_':
p[0] = ' '
default:
p[0] = c
}
return 1, nil
}
// isVchar returns true if c is an RFC 5322 VCHAR character.
func isVchar(c byte) bool {
// Visible (printing) characters.
return '!' <= c && c <= '~'
}

329
src/mime/encodedword.go Normal file
View File

@ -0,0 +1,329 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mime
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
"strings"
"sync"
"unicode"
"unicode/utf8"
)
// A WordEncoder is a RFC 2047 encoded-word encoder.
type WordEncoder byte
const (
// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
BEncoding = WordEncoder('b')
// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
QEncoding = WordEncoder('q')
)
var (
errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
)
// Encode returns the encoded-word form of s. If s is ASCII without special
// characters, it is returned unchanged. The provided charset is the IANA
// charset name of s. It is case insensitive.
func (e WordEncoder) Encode(charset, s string) string {
if !needsEncoding(s) {
return s
}
return e.encodeWord(charset, s)
}
func needsEncoding(s string) bool {
for _, b := range s {
if (b < ' ' || b > '~') && b != '\t' {
return true
}
}
return false
}
// encodeWord encodes a string into an encoded-word.
func (e WordEncoder) encodeWord(charset, s string) string {
buf := getBuffer()
defer putBuffer(buf)
buf.WriteString("=?")
buf.WriteString(charset)
buf.WriteByte('?')
buf.WriteByte(byte(e))
buf.WriteByte('?')
if e == BEncoding {
w := base64.NewEncoder(base64.StdEncoding, buf)
io.WriteString(w, s)
w.Close()
} else {
enc := make([]byte, 3)
for i := 0; i < len(s); i++ {
b := s[i]
switch {
case b == ' ':
buf.WriteByte('_')
case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
buf.WriteByte(b)
default:
enc[0] = '='
enc[1] = upperhex[b>>4]
enc[2] = upperhex[b&0x0f]
buf.Write(enc)
}
}
}
buf.WriteString("?=")
return buf.String()
}
const upperhex = "0123456789ABCDEF"
// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
type WordDecoder struct {
// CharsetReader, if non-nil, defines a function to generate
// charset-conversion readers, converting from the provided
// charset into UTF-8.
// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
// are handled by default.
// One of the the CharsetReader's result values must be non-nil.
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
}
// Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
// word is returned unchanged.
func (d *WordDecoder) Decode(word string) (string, error) {
fields := strings.Split(word, "?") // TODO: remove allocation?
if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
return "", errInvalidWord
}
content, err := decode(fields[2][0], fields[3])
if err != nil {
return "", err
}
buf := getBuffer()
defer putBuffer(buf)
if err := d.convert(buf, fields[1], content); err != nil {
return "", err
}
return buf.String(), nil
}
// DecodeHeader decodes all encoded-words of the given string. It returns an
// error if and only if CharsetReader of d returns an error.
func (d *WordDecoder) DecodeHeader(header string) (string, error) {
// If there is no encoded-word, returns before creating a buffer.
i := strings.Index(header, "=?")
if i == -1 {
return header, nil
}
buf := getBuffer()
defer putBuffer(buf)
buf.WriteString(header[:i])
header = header[i:]
betweenWords := false
for {
start := strings.Index(header, "=?")
if start == -1 {
break
}
cur := start + len("=?")
i := strings.Index(header[cur:], "?")
if i == -1 {
break
}
charset := header[cur : cur+i]
cur += i + len("?")
if len(header) < cur+len("Q??=") {
break
}
encoding := header[cur]
cur++
if header[cur] != '?' {
break
}
cur++
j := strings.Index(header[cur:], "?=")
if j == -1 {
break
}
text := header[cur : cur+j]
end := cur + j + len("?=")
content, err := decode(encoding, text)
if err != nil {
betweenWords = false
buf.WriteString(header[:start+2])
header = header[start+2:]
continue
}
// Write characters before the encoded-word. White-space and newline
// characters separating two encoded-words must be deleted.
if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
buf.WriteString(header[:start])
}
if err := d.convert(buf, charset, content); err != nil {
return "", err
}
header = header[end:]
betweenWords = true
}
if len(header) > 0 {
buf.WriteString(header)
}
return buf.String(), nil
}
func decode(encoding byte, text string) ([]byte, error) {
switch encoding {
case 'B', 'b':
return base64.StdEncoding.DecodeString(text)
case 'Q', 'q':
return qDecode(text)
default:
return nil, errInvalidWord
}
}
func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
switch {
case strings.EqualFold("utf-8", charset):
buf.Write(content)
case strings.EqualFold("iso-8859-1", charset):
for _, c := range content {
buf.WriteRune(rune(c))
}
case strings.EqualFold("us-ascii", charset):
for _, c := range content {
if c >= utf8.RuneSelf {
buf.WriteRune(unicode.ReplacementChar)
} else {
buf.WriteByte(c)
}
}
default:
if d.CharsetReader == nil {
return fmt.Errorf("mime: unhandled charset %q", charset)
}
r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
if err != nil {
return err
}
if _, err = buf.ReadFrom(r); err != nil {
return err
}
}
return nil
}
// hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
// one byte of non-whitespace.
func hasNonWhitespace(s string) bool {
for _, b := range s {
switch b {
// Encoded-words can only be separated by linear white spaces which does
// not include vertical tabs (\v).
case ' ', '\t', '\n', '\r':
default:
return true
}
}
return false
}
// qDecode decodes a Q encoded string.
func qDecode(s string) ([]byte, error) {
dec := make([]byte, len(s))
n := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == '_':
dec[n] = ' '
case c == '=':
if i+2 >= len(s) {
return nil, errInvalidWord
}
b, err := readHexByte(s[i+1], s[i+2])
if err != nil {
return nil, err
}
dec[n] = b
i += 2
case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
dec[n] = c
default:
return nil, errInvalidWord
}
n++
}
return dec[:n], nil
}
// readHexByte returns the byte from its quoted-printable representation.
func readHexByte(a, b byte) (byte, error) {
var hb, lb byte
var err error
if hb, err = fromHex(a); err != nil {
return 0, err
}
if lb, err = fromHex(b); err != nil {
return 0, err
}
return hb<<4 | lb, nil
}
func fromHex(b byte) (byte, error) {
switch {
case b >= '0' && b <= '9':
return b - '0', nil
case b >= 'A' && b <= 'F':
return b - 'A' + 10, nil
// Accept badly encoded bytes.
case b >= 'a' && b <= 'f':
return b - 'a' + 10, nil
}
return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
}
var bufPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
func getBuffer() *bytes.Buffer {
return bufPool.Get().(*bytes.Buffer)
}
func putBuffer(buf *bytes.Buffer) {
if buf.Len() > 1024 {
return
}
buf.Reset()
bufPool.Put(buf)
}

View File

@ -0,0 +1,241 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mime
import (
"errors"
"fmt"
"io"
"io/ioutil"
"strings"
"testing"
)
func ExampleEncodeWord() {
fmt.Println(QEncoding.Encode("utf-8", "¡Hola, señor!"))
fmt.Println(QEncoding.Encode("utf-8", "Hello!"))
fmt.Println(BEncoding.Encode("UTF-8", "¡Hola, señor!"))
fmt.Println(QEncoding.Encode("ISO-8859-1", "Caf\xE9"))
// Output:
// =?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=
// Hello!
// =?UTF-8?b?wqFIb2xhLCBzZcOxb3Ih?=
// =?ISO-8859-1?q?Caf=E9?=
}
func ExampleDecodeWord() {
dec := new(WordDecoder)
header, err := dec.DecodeHeader("=?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=")
if err != nil {
panic(err)
}
fmt.Println(header)
// Output: ¡Hola, señor!
}
func ExampleDecodeHeader() {
dec := new(WordDecoder)
header, err := dec.DecodeHeader("=?utf-8?q?=C3=89ric?= <eric@example.org>, =?utf-8?q?Ana=C3=AFs?= <anais@example.org>")
if err != nil {
panic(err)
}
fmt.Println(header)
header, err = dec.DecodeHeader("=?utf-8?q?=C2=A1Hola,?= =?utf-8?q?_se=C3=B1or!?=")
if err != nil {
panic(err)
}
fmt.Println(header)
// Output:
// Éric <eric@example.org>, Anaïs <anais@example.org>
// ¡Hola, señor!
}
func TestEncodeWord(t *testing.T) {
utf8, iso88591 := "utf-8", "iso-8859-1"
tests := []struct {
enc WordEncoder
charset string
src, exp string
}{
{QEncoding, utf8, "François-Jérôme", "=?utf-8?q?Fran=C3=A7ois-J=C3=A9r=C3=B4me?="},
{BEncoding, utf8, "Café", "=?utf-8?b?Q2Fmw6k=?="},
{QEncoding, iso88591, "La Seleção", "=?iso-8859-1?q?La_Sele=C3=A7=C3=A3o?="},
{QEncoding, utf8, "", ""},
{QEncoding, utf8, "A", "A"},
{QEncoding, iso88591, "a", "a"},
{QEncoding, utf8, "123 456", "123 456"},
{QEncoding, utf8, "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~"},
}
for _, test := range tests {
if s := test.enc.Encode(test.charset, test.src); s != test.exp {
t.Errorf("Encode(%q) = %q, want %q", test.src, s, test.exp)
}
}
}
func TestDecodeWord(t *testing.T) {
tests := []struct {
src, exp string
hasErr bool
}{
{"=?UTF-8?Q?=C2=A1Hola,_se=C3=B1or!?=", "¡Hola, señor!", false},
{"=?UTF-8?Q?Fran=C3=A7ois-J=C3=A9r=C3=B4me?=", "François-Jérôme", false},
{"=?UTF-8?q?ascii?=", "ascii", false},
{"=?utf-8?B?QW5kcsOp?=", "André", false},
{"=?ISO-8859-1?Q?Rapha=EBl_Dupont?=", "Raphaël Dupont", false},
{"=?utf-8?b?IkFudG9uaW8gSm9zw6kiIDxqb3NlQGV4YW1wbGUub3JnPg==?=", `"Antonio José" <jose@example.org>`, false},
{"=?UTF-8?A?Test?=", "", true},
{"=?UTF-8?Q?A=B?=", "", true},
{"=?UTF-8?Q?=A?=", "", true},
{"=?UTF-8?A?A?=", "", true},
}
for _, test := range tests {
dec := new(WordDecoder)
s, err := dec.Decode(test.src)
if test.hasErr && err == nil {
t.Errorf("Decode(%q) should return an error", test.src)
continue
}
if !test.hasErr && err != nil {
t.Errorf("Decode(%q): %v", test.src, err)
continue
}
if s != test.exp {
t.Errorf("Decode(%q) = %q, want %q", test.src, s, test.exp)
}
}
}
func TestDecodeHeader(t *testing.T) {
tests := []struct {
src, exp string
}{
{"=?UTF-8?Q?=C2=A1Hola,_se=C3=B1or!?=", "¡Hola, señor!"},
{"=?UTF-8?Q?Fran=C3=A7ois-J=C3=A9r=C3=B4me?=", "François-Jérôme"},
{"=?UTF-8?q?ascii?=", "ascii"},
{"=?utf-8?B?QW5kcsOp?=", "André"},
{"=?ISO-8859-1?Q?Rapha=EBl_Dupont?=", "Raphaël Dupont"},
{"Jean", "Jean"},
{"=?utf-8?b?IkFudG9uaW8gSm9zw6kiIDxqb3NlQGV4YW1wbGUub3JnPg==?=", `"Antonio José" <jose@example.org>`},
{"=?UTF-8?A?Test?=", "=?UTF-8?A?Test?="},
{"=?UTF-8?Q?A=B?=", "=?UTF-8?Q?A=B?="},
{"=?UTF-8?Q?=A?=", "=?UTF-8?Q?=A?="},
{"=?UTF-8?A?A?=", "=?UTF-8?A?A?="},
// Incomplete words
{"=?", "=?"},
{"=?UTF-8?", "=?UTF-8?"},
{"=?UTF-8?=", "=?UTF-8?="},
{"=?UTF-8?Q", "=?UTF-8?Q"},
{"=?UTF-8?Q?", "=?UTF-8?Q?"},
{"=?UTF-8?Q?=", "=?UTF-8?Q?="},
{"=?UTF-8?Q?A", "=?UTF-8?Q?A"},
{"=?UTF-8?Q?A?", "=?UTF-8?Q?A?"},
// Tests from RFC 2047
{"=?ISO-8859-1?Q?a?=", "a"},
{"=?ISO-8859-1?Q?a?= b", "a b"},
{"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=", "ab"},
{"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=", "ab"},
{"=?ISO-8859-1?Q?a?= \r\n\t =?ISO-8859-1?Q?b?=", "ab"},
{"=?ISO-8859-1?Q?a_b?=", "a b"},
}
for _, test := range tests {
dec := new(WordDecoder)
s, err := dec.DecodeHeader(test.src)
if err != nil {
t.Errorf("DecodeHeader(%q): %v", test.src, err)
}
if s != test.exp {
t.Errorf("DecodeHeader(%q) = %q, want %q", test.src, s, test.exp)
}
}
}
func TestCharsetDecoder(t *testing.T) {
tests := []struct {
src string
want string
charsets []string
content []string
}{
{"=?utf-8?b?Q2Fmw6k=?=", "Café", nil, nil},
{"=?ISO-8859-1?Q?caf=E9?=", "café", nil, nil},
{"=?US-ASCII?Q?foo_bar?=", "foo bar", nil, nil},
{"=?utf-8?Q?=?=", "=?utf-8?Q?=?=", nil, nil},
{"=?utf-8?Q?=A?=", "=?utf-8?Q?=A?=", nil, nil},
{
"=?ISO-8859-15?Q?f=F5=F6?= =?windows-1252?Q?b=E0r?=",
"f\xf5\xf6b\xe0r",
[]string{"iso-8859-15", "windows-1252"},
[]string{"f\xf5\xf6", "b\xe0r"},
},
}
for _, test := range tests {
i := 0
dec := &WordDecoder{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
if charset != test.charsets[i] {
t.Errorf("DecodeHeader(%q), got charset %q, want %q", test.src, charset, test.charsets[i])
}
content, err := ioutil.ReadAll(input)
if err != nil {
t.Errorf("DecodeHeader(%q), error in reader: %v", test.src, err)
}
got := string(content)
if got != test.content[i] {
t.Errorf("DecodeHeader(%q), got content %q, want %q", test.src, got, test.content[i])
}
i++
return strings.NewReader(got), nil
},
}
got, err := dec.DecodeHeader(test.src)
if err != nil {
t.Errorf("DecodeHeader(%q): %v", test.src, err)
}
if got != test.want {
t.Errorf("DecodeHeader(%q) = %q, want %q", test.src, got, test.want)
}
}
}
func TestCharsetDecoderError(t *testing.T) {
dec := &WordDecoder{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
return nil, errors.New("Test error")
},
}
if _, err := dec.DecodeHeader("=?charset?Q?foo?="); err == nil {
t.Error("DecodeHeader should return an error")
}
}
func BenchmarkQEncodeWord(b *testing.B) {
for i := 0; i < b.N; i++ {
QEncoding.Encode("UTF-8", "¡Hola, señor!")
}
}
func BenchmarkQDecodeWord(b *testing.B) {
dec := new(WordDecoder)
for i := 0; i < b.N; i++ {
dec.Decode("=?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=")
}
}
func BenchmarkQDecodeHeader(b *testing.B) {
dec := new(WordDecoder)
for i := 0; i < b.N; i++ {
dec.Decode("=?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=")
}
}

View File

@ -20,9 +20,9 @@ import (
"bytes"
"errors"
"fmt"
"internal/mime"
"io"
"log"
"mime"
"net/textproto"
"strings"
"time"
@ -177,7 +177,7 @@ func (a *Address) String() string {
return b.String()
}
return mime.EncodeWord(a.Name) + " " + s
return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
}
type addrParser []byte
@ -333,9 +333,8 @@ func (p *addrParser) consumePhrase() (phrase string, err error) {
word, err = p.consumeAtom(true)
}
// RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 {
word, err = mime.DecodeWord(word)
if err == nil {
word, err = decodeRFC2047Word(word)
}
if err != nil {
@ -423,6 +422,32 @@ func (p *addrParser) len() int {
return len(*p)
}
func decodeRFC2047Word(s string) (string, error) {
dec, err := rfc2047Decoder.Decode(s)
if err == nil {
return dec, nil
}
if _, ok := err.(charsetError); ok {
return s, err
}
// Ignore invalid RFC 2047 encoded-word errors.
return s, nil
}
var rfc2047Decoder = mime.WordDecoder{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
return nil, charsetError(charset)
},
}
type charsetError string
func (e charsetError) Error() string {
return fmt.Sprintf("charset not supported: %q", string(e))
}
var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
"abcdefghijklmnopqrstuvwxyz" +
"0123456789" +