1
0
mirror of https://github.com/golang/go synced 2024-10-02 20:41:21 -06:00

net/mail: treat comment in address as display name

I verified this change on a corpus of > 200 GB of emails since the mid-90s. With
this change, more addresses parse than before, and anything which parsed before
still parses.

In said corpus, I came across the edge case of comments preceding an
addr-spec (with angle brackets!), e.g. “(John Doe) <john@example.com>”, which
does not satisfy the conditions to be treated as a fallback, as per my reading
of RFC2822.

This change does not parse quoted-strings within comments (a corresponding TODO
is in the code), but I have not seen that in the wild.

Fixes #22670

Change-Id: I526fcf7c6390aa1c219fdec1852f26c514506f76
Reviewed-on: https://go-review.googlesource.com/77474
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Michael Stapelberg 2017-11-14 04:46:03 -08:00 committed by Brad Fitzpatrick
parent 01c979dabe
commit fcee189776
2 changed files with 103 additions and 5 deletions

View File

@ -303,7 +303,17 @@ func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
// TODO(dsymonds): Is this really correct? // TODO(dsymonds): Is this really correct?
spec, err := p.consumeAddrSpec() spec, err := p.consumeAddrSpec()
if err == nil { if err == nil {
var displayName string
p.skipSpace()
if !p.empty() && p.peek() == '(' {
displayName, err = p.consumeDisplayNameComment()
if err != nil {
return nil, err
}
}
return []*Address{{ return []*Address{{
Name: displayName,
Address: spec, Address: spec,
}}, err }}, err
} }
@ -570,6 +580,30 @@ Loop:
return atom, nil return atom, nil
} }
func (p *addrParser) consumeDisplayNameComment() (string, error) {
if !p.consume('(') {
return "", errors.New("mail: comment does not start with (")
}
comment, ok := p.consumeComment()
if !ok {
return "", errors.New("mail: misformatted parenthetical comment")
}
// TODO(stapelberg): parse quoted-string within comment
words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
for idx, word := range words {
decoded, isEncoded, err := p.decodeRFC2047Word(word)
if err != nil {
return "", err
}
if isEncoded {
words[idx] = decoded
}
}
return strings.Join(words, " "), nil
}
func (p *addrParser) consume(c byte) bool { func (p *addrParser) consume(c byte) bool {
if p.empty() || p.peek() != c { if p.empty() || p.peek() != c {
return false return false
@ -604,7 +638,7 @@ func (p *addrParser) skipCFWS() bool {
break break
} }
if !p.skipComment() { if _, ok := p.consumeComment(); !ok {
return false return false
} }
@ -614,10 +648,11 @@ func (p *addrParser) skipCFWS() bool {
return true return true
} }
func (p *addrParser) skipComment() bool { func (p *addrParser) consumeComment() (string, bool) {
// '(' already consumed. // '(' already consumed.
depth := 1 depth := 1
var comment string
for { for {
if p.empty() || depth == 0 { if p.empty() || depth == 0 {
break break
@ -630,10 +665,13 @@ func (p *addrParser) skipComment() bool {
} else if p.peek() == ')' { } else if p.peek() == ')' {
depth-- depth--
} }
if depth > 0 {
comment += p.s[:1]
}
p.s = p.s[1:] p.s = p.s[1:]
} }
return depth == 0 return comment, depth == 0
} }
func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) { func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {

View File

@ -426,7 +426,7 @@ func TestAddressParsing(t *testing.T) {
}, },
// CFWS // CFWS
{ {
`cfws@example.com (CFWS (cfws)) (another comment)`, `<cfws@example.com> (CFWS (cfws)) (another comment)`,
[]*Address{ []*Address{
{ {
Name: "", Name: "",
@ -435,7 +435,7 @@ func TestAddressParsing(t *testing.T) {
}, },
}, },
{ {
`cfws@example.com () (another comment), cfws2@example.com (another)`, `<cfws@example.com> () (another comment), <cfws2@example.com> (another)`,
[]*Address{ []*Address{
{ {
Name: "", Name: "",
@ -447,6 +447,66 @@ func TestAddressParsing(t *testing.T) {
}, },
}, },
}, },
// Comment as display name
{
`john@example.com (John Doe)`,
[]*Address{
{
Name: "John Doe",
Address: "john@example.com",
},
},
},
// Comment and display name
{
`John Doe <john@example.com> (Joey)`,
[]*Address{
{
Name: "John Doe",
Address: "john@example.com",
},
},
},
// Comment as display name, no space
{
`john@example.com(John Doe)`,
[]*Address{
{
Name: "John Doe",
Address: "john@example.com",
},
},
},
// Comment as display name, Q-encoded
{
`asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?=)`,
[]*Address{
{
Name: "Adam Sjøgren",
Address: "asjo@example.com",
},
},
},
// Comment as display name, Q-encoded and tab-separated
{
`asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?=)`,
[]*Address{
{
Name: "Adam Sjøgren",
Address: "asjo@example.com",
},
},
},
// Nested comment as display name, Q-encoded
{
`asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?= (Debian))`,
[]*Address{
{
Name: "Adam Sjøgren (Debian)",
Address: "asjo@example.com",
},
},
},
} }
for _, test := range tests { for _, test := range tests {
if len(test.exp) == 1 { if len(test.exp) == 1 {