mirror of
https://github.com/golang/go
synced 2024-11-18 08:44:43 -07:00
mime: limit UTF-8 encoded-word length to 75 characters
As specified by RFC 2047 section 2, encoded-words may not be more than 75 characters long. We only enforce this rule when the charset is UTF-8, since multi-bytes characters must not be split accross encoded-words (see section 5.3). Fixes #12300 Change-Id: I72a43fc3fe6ddeb3dab54dcdce0837d7ebf658f0 Reviewed-on: https://go-review.googlesource.com/14957 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
9f60a0a2b0
commit
65fc379dae
@ -54,35 +54,129 @@ func (e WordEncoder) encodeWord(charset, s string) string {
|
||||
buf := getBuffer()
|
||||
defer putBuffer(buf)
|
||||
|
||||
e.openWord(buf, charset)
|
||||
if e == BEncoding {
|
||||
e.bEncode(buf, charset, s)
|
||||
} else {
|
||||
e.qEncode(buf, charset, s)
|
||||
}
|
||||
closeWord(buf)
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
const (
|
||||
// The maximum length of an encoded-word is 75 characters.
|
||||
// See RFC 2047, section 2.
|
||||
maxEncodedWordLen = 75
|
||||
// maxContentLen is how much content can be encoded, ignoring the header and
|
||||
// 2-byte footer.
|
||||
maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
|
||||
)
|
||||
|
||||
var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
|
||||
|
||||
// bEncode encodes s using base64 encoding and writes it to buf.
|
||||
func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
|
||||
w := base64.NewEncoder(base64.StdEncoding, buf)
|
||||
// If the charset is not UTF-8 or if the content is short, do not bother
|
||||
// splitting the encoded-word.
|
||||
if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
|
||||
io.WriteString(w, s)
|
||||
w.Close()
|
||||
return
|
||||
}
|
||||
|
||||
var currentLen, last, runeLen int
|
||||
for i := 0; i < len(s); i += runeLen {
|
||||
// Multi-byte characters must not be split accross encoded-words.
|
||||
// See RFC 2047, section 5.3.
|
||||
_, runeLen = utf8.DecodeRuneInString(s[i:])
|
||||
|
||||
if currentLen+runeLen <= maxBase64Len {
|
||||
currentLen += runeLen
|
||||
} else {
|
||||
io.WriteString(w, s[last:i])
|
||||
w.Close()
|
||||
e.splitWord(buf, charset)
|
||||
last = i
|
||||
currentLen = runeLen
|
||||
}
|
||||
}
|
||||
io.WriteString(w, s[last:])
|
||||
w.Close()
|
||||
}
|
||||
|
||||
// qEncode encodes s using Q encoding and writes it to buf. It splits the
|
||||
// encoded-words when necessary.
|
||||
func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
|
||||
// We only split encoded-words when the charset is UTF-8.
|
||||
if !isUTF8(charset) {
|
||||
writeQString(buf, s)
|
||||
return
|
||||
}
|
||||
|
||||
var currentLen, runeLen int
|
||||
for i := 0; i < len(s); i += runeLen {
|
||||
b := s[i]
|
||||
// Multi-byte characters must not be split accross encoded-words.
|
||||
// See RFC 2047, section 5.3.
|
||||
var encLen int
|
||||
if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
|
||||
runeLen, encLen = 1, 1
|
||||
} else {
|
||||
_, runeLen = utf8.DecodeRuneInString(s[i:])
|
||||
encLen = 3 * runeLen
|
||||
}
|
||||
|
||||
if currentLen+encLen > maxContentLen {
|
||||
e.splitWord(buf, charset)
|
||||
currentLen = 0
|
||||
}
|
||||
writeQString(buf, s[i:i+runeLen])
|
||||
currentLen += encLen
|
||||
}
|
||||
}
|
||||
|
||||
// writeQString encodes s using Q encoding and writes it to buf.
|
||||
func writeQString(buf *bytes.Buffer, s string) {
|
||||
for i := 0; i < len(s); i++ {
|
||||
switch b := s[i]; {
|
||||
case b == ' ':
|
||||
buf.WriteByte('_')
|
||||
case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
|
||||
buf.WriteByte(b)
|
||||
default:
|
||||
buf.WriteByte('=')
|
||||
buf.WriteByte(upperhex[b>>4])
|
||||
buf.WriteByte(upperhex[b&0x0f])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// openWord writes the beginning of an encoded-word into buf.
|
||||
func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
|
||||
buf.WriteString("=?")
|
||||
buf.WriteString(charset)
|
||||
buf.WriteByte('?')
|
||||
buf.WriteByte(byte(e))
|
||||
buf.WriteByte('?')
|
||||
}
|
||||
|
||||
if e == BEncoding {
|
||||
w := base64.NewEncoder(base64.StdEncoding, buf)
|
||||
io.WriteString(w, s)
|
||||
w.Close()
|
||||
} else {
|
||||
enc := make([]byte, 3)
|
||||
for i := 0; i < len(s); i++ {
|
||||
b := s[i]
|
||||
switch {
|
||||
case b == ' ':
|
||||
buf.WriteByte('_')
|
||||
case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
|
||||
buf.WriteByte(b)
|
||||
default:
|
||||
enc[0] = '='
|
||||
enc[1] = upperhex[b>>4]
|
||||
enc[2] = upperhex[b&0x0f]
|
||||
buf.Write(enc)
|
||||
}
|
||||
}
|
||||
}
|
||||
// closeWord writes the end of an encoded-word into buf.
|
||||
func closeWord(buf *bytes.Buffer) {
|
||||
buf.WriteString("?=")
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// splitWord closes the current encoded-word and opens a new one.
|
||||
func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
|
||||
closeWord(buf)
|
||||
buf.WriteByte(' ')
|
||||
e.openWord(buf, charset)
|
||||
}
|
||||
|
||||
func isUTF8(charset string) bool {
|
||||
return strings.EqualFold(charset, "UTF-8")
|
||||
}
|
||||
|
||||
const upperhex = "0123456789ABCDEF"
|
||||
|
@ -27,6 +27,14 @@ func TestEncodeWord(t *testing.T) {
|
||||
{QEncoding, iso88591, "a", "a"},
|
||||
{QEncoding, utf8, "123 456", "123 456"},
|
||||
{QEncoding, utf8, "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~"},
|
||||
{QEncoding, utf8, strings.Repeat("é", 10), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?="},
|
||||
{QEncoding, utf8, strings.Repeat("é", 11), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?= =?utf-8?q?=C3=A9?="},
|
||||
{QEncoding, iso88591, strings.Repeat("\xe9", 22), "=?iso-8859-1?q?" + strings.Repeat("=E9", 22) + "?="},
|
||||
{QEncoding, utf8, strings.Repeat("\x80", 22), "=?utf-8?q?" + strings.Repeat("=80", 21) + "?= =?utf-8?q?=80?="},
|
||||
{BEncoding, utf8, strings.Repeat("é", 24), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?="},
|
||||
{BEncoding, utf8, strings.Repeat("é", 27), "=?utf-8?b?" + strings.Repeat("w6nDqcOp", 8) + "?= =?utf-8?b?w6nDqcOp?="},
|
||||
{BEncoding, iso88591, strings.Repeat("\xe9", 45), "=?iso-8859-1?b?" + strings.Repeat("6enp", 15) + "?="},
|
||||
{BEncoding, utf8, strings.Repeat("\x80", 51), "=?utf-8?b?" + strings.Repeat("gICA", 16) + "?= =?utf-8?b?gICA?="},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
Loading…
Reference in New Issue
Block a user