mirror of
https://github.com/golang/go
synced 2024-11-19 23:44:43 -07:00
net/mail: treat comment in address as display name
I verified this change on a corpus of > 200 GB of emails since the mid-90s. With this change, more addresses parse than before, and anything which parsed before still parses. In said corpus, I came across the edge case of comments preceding an addr-spec (with angle brackets!), e.g. “(John Doe) <john@example.com>”, which does not satisfy the conditions to be treated as a fallback, as per my reading of RFC2822. This change does not parse quoted-strings within comments (a corresponding TODO is in the code), but I have not seen that in the wild. Fixes #22670 Change-Id: I526fcf7c6390aa1c219fdec1852f26c514506f76 Reviewed-on: https://go-review.googlesource.com/77474 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
01c979dabe
commit
fcee189776
@ -303,7 +303,17 @@ func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
|
|||||||
// TODO(dsymonds): Is this really correct?
|
// TODO(dsymonds): Is this really correct?
|
||||||
spec, err := p.consumeAddrSpec()
|
spec, err := p.consumeAddrSpec()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
var displayName string
|
||||||
|
p.skipSpace()
|
||||||
|
if !p.empty() && p.peek() == '(' {
|
||||||
|
displayName, err = p.consumeDisplayNameComment()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return []*Address{{
|
return []*Address{{
|
||||||
|
Name: displayName,
|
||||||
Address: spec,
|
Address: spec,
|
||||||
}}, err
|
}}, err
|
||||||
}
|
}
|
||||||
@ -570,6 +580,30 @@ Loop:
|
|||||||
return atom, nil
|
return atom, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *addrParser) consumeDisplayNameComment() (string, error) {
|
||||||
|
if !p.consume('(') {
|
||||||
|
return "", errors.New("mail: comment does not start with (")
|
||||||
|
}
|
||||||
|
comment, ok := p.consumeComment()
|
||||||
|
if !ok {
|
||||||
|
return "", errors.New("mail: misformatted parenthetical comment")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(stapelberg): parse quoted-string within comment
|
||||||
|
words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
|
||||||
|
for idx, word := range words {
|
||||||
|
decoded, isEncoded, err := p.decodeRFC2047Word(word)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if isEncoded {
|
||||||
|
words[idx] = decoded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(words, " "), nil
|
||||||
|
}
|
||||||
|
|
||||||
func (p *addrParser) consume(c byte) bool {
|
func (p *addrParser) consume(c byte) bool {
|
||||||
if p.empty() || p.peek() != c {
|
if p.empty() || p.peek() != c {
|
||||||
return false
|
return false
|
||||||
@ -604,7 +638,7 @@ func (p *addrParser) skipCFWS() bool {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
if !p.skipComment() {
|
if _, ok := p.consumeComment(); !ok {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -614,10 +648,11 @@ func (p *addrParser) skipCFWS() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *addrParser) skipComment() bool {
|
func (p *addrParser) consumeComment() (string, bool) {
|
||||||
// '(' already consumed.
|
// '(' already consumed.
|
||||||
depth := 1
|
depth := 1
|
||||||
|
|
||||||
|
var comment string
|
||||||
for {
|
for {
|
||||||
if p.empty() || depth == 0 {
|
if p.empty() || depth == 0 {
|
||||||
break
|
break
|
||||||
@ -630,10 +665,13 @@ func (p *addrParser) skipComment() bool {
|
|||||||
} else if p.peek() == ')' {
|
} else if p.peek() == ')' {
|
||||||
depth--
|
depth--
|
||||||
}
|
}
|
||||||
|
if depth > 0 {
|
||||||
|
comment += p.s[:1]
|
||||||
|
}
|
||||||
p.s = p.s[1:]
|
p.s = p.s[1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
return depth == 0
|
return comment, depth == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
|
func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
|
||||||
|
@ -426,7 +426,7 @@ func TestAddressParsing(t *testing.T) {
|
|||||||
},
|
},
|
||||||
// CFWS
|
// CFWS
|
||||||
{
|
{
|
||||||
`cfws@example.com (CFWS (cfws)) (another comment)`,
|
`<cfws@example.com> (CFWS (cfws)) (another comment)`,
|
||||||
[]*Address{
|
[]*Address{
|
||||||
{
|
{
|
||||||
Name: "",
|
Name: "",
|
||||||
@ -435,7 +435,7 @@ func TestAddressParsing(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
`cfws@example.com () (another comment), cfws2@example.com (another)`,
|
`<cfws@example.com> () (another comment), <cfws2@example.com> (another)`,
|
||||||
[]*Address{
|
[]*Address{
|
||||||
{
|
{
|
||||||
Name: "",
|
Name: "",
|
||||||
@ -447,6 +447,66 @@ func TestAddressParsing(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
// Comment as display name
|
||||||
|
{
|
||||||
|
`john@example.com (John Doe)`,
|
||||||
|
[]*Address{
|
||||||
|
{
|
||||||
|
Name: "John Doe",
|
||||||
|
Address: "john@example.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// Comment and display name
|
||||||
|
{
|
||||||
|
`John Doe <john@example.com> (Joey)`,
|
||||||
|
[]*Address{
|
||||||
|
{
|
||||||
|
Name: "John Doe",
|
||||||
|
Address: "john@example.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// Comment as display name, no space
|
||||||
|
{
|
||||||
|
`john@example.com(John Doe)`,
|
||||||
|
[]*Address{
|
||||||
|
{
|
||||||
|
Name: "John Doe",
|
||||||
|
Address: "john@example.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// Comment as display name, Q-encoded
|
||||||
|
{
|
||||||
|
`asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?=)`,
|
||||||
|
[]*Address{
|
||||||
|
{
|
||||||
|
Name: "Adam Sjøgren",
|
||||||
|
Address: "asjo@example.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// Comment as display name, Q-encoded and tab-separated
|
||||||
|
{
|
||||||
|
`asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?=)`,
|
||||||
|
[]*Address{
|
||||||
|
{
|
||||||
|
Name: "Adam Sjøgren",
|
||||||
|
Address: "asjo@example.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// Nested comment as display name, Q-encoded
|
||||||
|
{
|
||||||
|
`asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?= (Debian))`,
|
||||||
|
[]*Address{
|
||||||
|
{
|
||||||
|
Name: "Adam Sjøgren (Debian)",
|
||||||
|
Address: "asjo@example.com",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
if len(test.exp) == 1 {
|
if len(test.exp) == 1 {
|
||||||
|
Loading…
Reference in New Issue
Block a user