mirror of
https://github.com/golang/go
synced 2024-11-19 15:05:00 -07:00
mail: address list parsing.
R=golang-dev, r, r CC=golang-dev https://golang.org/cl/4547084
This commit is contained in:
parent
a028a02395
commit
ff0198b72f
@ -2,17 +2,42 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package mail implements parsing of mail messages according to RFC 5322.
|
||||
/*
|
||||
Package mail implements parsing of mail messages.
|
||||
|
||||
For the most part, this package follows the syntax as specified by RFC 5322.
|
||||
Notable divergences:
|
||||
* Obsolete address formats are not parsed, including addresses with
|
||||
embedded route information.
|
||||
* Group addresses are not parsed.
|
||||
* The full range of spacing (the CFWS syntax element) is not supported,
|
||||
such as breaking addresses across lines.
|
||||
*/
|
||||
package mail
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/textproto"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var debug = debugT(false)
|
||||
|
||||
type debugT bool
|
||||
|
||||
func (d debugT) Printf(format string, args ...interface{}) {
|
||||
if d {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// A Message represents a parsed mail message.
|
||||
type Message struct {
|
||||
Header Header
|
||||
@ -93,3 +118,277 @@ func (h Header) Date() (*time.Time, os.Error) {
|
||||
}
|
||||
return parseDate(hdr)
|
||||
}
|
||||
|
||||
// AddressList parses the named header field as a list of addresses.
|
||||
func (h Header) AddressList(key string) ([]*Address, os.Error) {
|
||||
hdr := h.Get(key)
|
||||
if hdr == "" {
|
||||
return nil, ErrHeaderNotPresent
|
||||
}
|
||||
return newAddrParser(hdr).parseAddressList()
|
||||
}
|
||||
|
||||
// Address represents a single mail address.
|
||||
// An address such as "Barry Gibbs <bg@example.com>" is represented
|
||||
// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
|
||||
type Address struct {
|
||||
Name string // Proper name; may be empty.
|
||||
Address string // user@domain
|
||||
}
|
||||
|
||||
func (a *Address) String() string {
|
||||
s := "<" + a.Address + ">"
|
||||
if a.Name == "" {
|
||||
return s
|
||||
}
|
||||
return "\"" + strconv.Quote(a.Name) + "\" " + s
|
||||
}
|
||||
|
||||
type addrParser []byte
|
||||
|
||||
func newAddrParser(s string) *addrParser {
|
||||
p := addrParser([]byte(s))
|
||||
return &p
|
||||
}
|
||||
|
||||
func (p *addrParser) parseAddressList() ([]*Address, os.Error) {
|
||||
var list []*Address
|
||||
for {
|
||||
p.skipSpace()
|
||||
addr, err := p.parseAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
list = append(list, addr)
|
||||
|
||||
p.skipSpace()
|
||||
if p.empty() {
|
||||
break
|
||||
}
|
||||
if !p.consume(',') {
|
||||
return nil, os.ErrorString("mail: expected comma")
|
||||
}
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
// parseAddress parses a single RFC 5322 address at the start of p.
|
||||
func (p *addrParser) parseAddress() (addr *Address, err os.Error) {
|
||||
debug.Printf("parseAddress: %q", *p)
|
||||
p.skipSpace()
|
||||
if p.empty() {
|
||||
return nil, os.ErrorString("mail: no address")
|
||||
}
|
||||
|
||||
// address = name-addr / addr-spec
|
||||
// TODO(dsymonds): Support parsing group address.
|
||||
|
||||
// addr-spec has a more restricted grammar than name-addr,
|
||||
// so try parsing it first, and fallback to name-addr.
|
||||
// TODO(dsymonds): Is this really correct?
|
||||
spec, err := p.consumeAddrSpec()
|
||||
if err == nil {
|
||||
return &Address{
|
||||
Address: spec,
|
||||
}, err
|
||||
}
|
||||
debug.Printf("parseAddress: not an addr-spec: %v", err)
|
||||
debug.Printf("parseAddress: state is now %q", *p)
|
||||
|
||||
// display-name
|
||||
var displayName string
|
||||
if p.peek() != '<' {
|
||||
displayName, err = p.consumePhrase()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
debug.Printf("parseAddress: displayName=%q", displayName)
|
||||
|
||||
// angle-addr = "<" addr-spec ">"
|
||||
p.skipSpace()
|
||||
if !p.consume('<') {
|
||||
return nil, os.ErrorString("mail: no angle-addr")
|
||||
}
|
||||
spec, err = p.consumeAddrSpec()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !p.consume('>') {
|
||||
return nil, os.ErrorString("mail: unclosed angle-addr")
|
||||
}
|
||||
debug.Printf("parseAddress: spec=%q", spec)
|
||||
|
||||
return &Address{
|
||||
Name: displayName,
|
||||
Address: spec,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
|
||||
func (p *addrParser) consumeAddrSpec() (spec string, err os.Error) {
|
||||
debug.Printf("consumeAddrSpec: %q", *p)
|
||||
|
||||
orig := *p
|
||||
defer func() {
|
||||
if err != nil {
|
||||
*p = orig
|
||||
}
|
||||
}()
|
||||
|
||||
// local-part = dot-atom / quoted-string
|
||||
var localPart string
|
||||
p.skipSpace()
|
||||
if p.empty() {
|
||||
return "", os.ErrorString("mail: no addr-spec")
|
||||
}
|
||||
if p.peek() == '"' {
|
||||
// quoted-string
|
||||
debug.Printf("consumeAddrSpec: parsing quoted-string")
|
||||
localPart, err = p.consumeQuotedString()
|
||||
} else {
|
||||
// dot-atom
|
||||
debug.Printf("consumeAddrSpec: parsing dot-atom")
|
||||
localPart, err = p.consumeAtom(true)
|
||||
}
|
||||
if err != nil {
|
||||
debug.Printf("consumeAddrSpec: failed: %v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
if !p.consume('@') {
|
||||
return "", os.ErrorString("mail: missing @ in addr-spec")
|
||||
}
|
||||
|
||||
// domain = dot-atom / domain-literal
|
||||
var domain string
|
||||
p.skipSpace()
|
||||
if p.empty() {
|
||||
return "", os.ErrorString("mail: no domain in addr-spec")
|
||||
}
|
||||
// TODO(dsymonds): Handle domain-literal
|
||||
domain, err = p.consumeAtom(true)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return localPart + "@" + domain, nil
|
||||
}
|
||||
|
||||
// consumePhrase parses the RFC 5322 phrase at the start of p.
|
||||
func (p *addrParser) consumePhrase() (phrase string, err os.Error) {
|
||||
debug.Printf("consumePhrase: [%s]", *p)
|
||||
// phrase = 1*word
|
||||
var words []string
|
||||
for {
|
||||
// word = atom / quoted-string
|
||||
var word string
|
||||
p.skipSpace()
|
||||
if p.empty() {
|
||||
return "", os.ErrorString("mail: missing phrase")
|
||||
}
|
||||
if p.peek() == '"' {
|
||||
// quoted-string
|
||||
word, err = p.consumeQuotedString()
|
||||
} else {
|
||||
// atom
|
||||
word, err = p.consumeAtom(false)
|
||||
}
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
debug.Printf("consumePhrase: consumed %q", word)
|
||||
words = append(words, word)
|
||||
}
|
||||
// Ignore any error if we got at least one word.
|
||||
if err != nil && len(words) == 0 {
|
||||
debug.Printf("consumePhrase: hit err: %v", err)
|
||||
return "", os.ErrorString("mail: missing word in phrase")
|
||||
}
|
||||
return strings.Join(words, " "), nil
|
||||
}
|
||||
|
||||
// consumeQuotedString parses the quoted string at the start of p.
|
||||
func (p *addrParser) consumeQuotedString() (qs string, err os.Error) {
|
||||
// Assume first byte is '"'.
|
||||
i := 1
|
||||
qsb := make([]byte, 0, 10)
|
||||
Loop:
|
||||
for {
|
||||
if i >= p.len() {
|
||||
return "", os.ErrorString("mail: unclosed quoted-string")
|
||||
}
|
||||
switch c := (*p)[i]; {
|
||||
case c == '"':
|
||||
break Loop
|
||||
case c == '\\':
|
||||
if i+1 == p.len() {
|
||||
return "", os.ErrorString("mail: unclosed quoted-string")
|
||||
}
|
||||
qsb = append(qsb, (*p)[i+1])
|
||||
i += 2
|
||||
case '!' <= c && c <= '~', c == ' ' || c == '\t':
|
||||
// qtext (printable US-ASCII excluding " and \), or
|
||||
// FWS (almost; we're ignoring CRLF)
|
||||
qsb = append(qsb, c)
|
||||
i++
|
||||
default:
|
||||
return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
|
||||
}
|
||||
}
|
||||
*p = (*p)[i+1:]
|
||||
return string(qsb), nil
|
||||
}
|
||||
|
||||
// consumeAtom parses an RFC 5322 atom at the start of p.
|
||||
// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
|
||||
func (p *addrParser) consumeAtom(dot bool) (atom string, err os.Error) {
|
||||
if !isAtext(p.peek(), false) {
|
||||
return "", os.ErrorString("mail: invalid string")
|
||||
}
|
||||
i := 1
|
||||
for ; i < p.len() && isAtext((*p)[i], dot); i++ {
|
||||
}
|
||||
// TODO(dsymonds): Remove the []byte() conversion here when 6g doesn't need it.
|
||||
atom, *p = string([]byte((*p)[:i])), (*p)[i:]
|
||||
return atom, nil
|
||||
}
|
||||
|
||||
func (p *addrParser) consume(c byte) bool {
|
||||
if p.empty() || p.peek() != c {
|
||||
return false
|
||||
}
|
||||
*p = (*p)[1:]
|
||||
return true
|
||||
}
|
||||
|
||||
// skipSpace skips the leading space and tab characters.
|
||||
func (p *addrParser) skipSpace() {
|
||||
*p = bytes.TrimLeft(*p, " \t")
|
||||
}
|
||||
|
||||
func (p *addrParser) peek() byte {
|
||||
return (*p)[0]
|
||||
}
|
||||
|
||||
func (p *addrParser) empty() bool {
|
||||
return p.len() == 0
|
||||
}
|
||||
|
||||
func (p *addrParser) len() int {
|
||||
return len(*p)
|
||||
}
|
||||
|
||||
var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
|
||||
"abcdefghijklmnopqrstuvwxyz" +
|
||||
"0123456789" +
|
||||
"!#$%&'*+-/=?^_`{|}~")
|
||||
|
||||
// isAtext returns true if c is an RFC 5322 atext character.
|
||||
// If dot is true, period is included.
|
||||
func isAtext(c byte, dot bool) bool {
|
||||
if dot && c == '.' {
|
||||
return true
|
||||
}
|
||||
return bytes.IndexByte(atextChars, c) >= 0
|
||||
}
|
||||
|
@ -127,3 +127,74 @@ func TestDateParsing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddressParsing(t *testing.T) {
|
||||
tests := []struct {
|
||||
addrsStr string
|
||||
exp []*Address
|
||||
}{
|
||||
// Bare address
|
||||
{
|
||||
`jdoe@machine.example`,
|
||||
[]*Address{&Address{
|
||||
Address: "jdoe@machine.example",
|
||||
}},
|
||||
},
|
||||
// RFC 5322, Appendix A.1.1
|
||||
{
|
||||
`John Doe <jdoe@machine.example>`,
|
||||
[]*Address{&Address{
|
||||
Name: "John Doe",
|
||||
Address: "jdoe@machine.example",
|
||||
}},
|
||||
},
|
||||
// RFC 5322, Appendix A.1.2
|
||||
{
|
||||
`"Joe Q. Public" <john.q.public@example.com>`,
|
||||
[]*Address{&Address{
|
||||
Name: "Joe Q. Public",
|
||||
Address: "john.q.public@example.com",
|
||||
}},
|
||||
},
|
||||
{
|
||||
`Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`,
|
||||
[]*Address{
|
||||
&Address{
|
||||
Name: "Mary Smith",
|
||||
Address: "mary@x.test",
|
||||
},
|
||||
&Address{
|
||||
Address: "jdoe@example.org",
|
||||
},
|
||||
&Address{
|
||||
Name: "Who?",
|
||||
Address: "one@y.test",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
`<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`,
|
||||
[]*Address{
|
||||
&Address{
|
||||
Address: "boss@nil.test",
|
||||
},
|
||||
&Address{
|
||||
Name: `Giant; "Big" Box`,
|
||||
Address: "sysservices@example.net",
|
||||
},
|
||||
},
|
||||
},
|
||||
// RFC 5322, Appendix A.1.3
|
||||
// TODO(dsymonds): Group addresses.
|
||||
}
|
||||
for _, test := range tests {
|
||||
addrs, err := newAddrParser(test.addrsStr).parseAddressList()
|
||||
if err != nil {
|
||||
t.Errorf("Failed parsing %q: %v", test.addrsStr, err)
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(addrs, test.exp) {
|
||||
t.Errorf("Parse of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user