1
0
mirror of https://github.com/golang/go synced 2024-11-19 15:05:00 -07:00

mail: address list parsing.

R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/4547084
This commit is contained in:
David Symonds 2011-06-06 16:46:14 +10:00
parent a028a02395
commit ff0198b72f
2 changed files with 371 additions and 1 deletions

View File

@ -2,17 +2,42 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package mail implements parsing of mail messages according to RFC 5322.
/*
Package mail implements parsing of mail messages.
For the most part, this package follows the syntax as specified by RFC 5322.
Notable divergences:
* Obsolete address formats are not parsed, including addresses with
embedded route information.
* Group addresses are not parsed.
* The full range of spacing (the CFWS syntax element) is not supported,
such as breaking addresses across lines.
*/
package mail
import (
"bufio"
"bytes"
"fmt"
"io"
"log"
"net/textproto"
"os"
"strconv"
"strings"
"time"
)
var debug = debugT(false)
type debugT bool
func (d debugT) Printf(format string, args ...interface{}) {
if d {
log.Printf(format, args...)
}
}
// A Message represents a parsed mail message.
type Message struct {
Header Header
@ -93,3 +118,277 @@ func (h Header) Date() (*time.Time, os.Error) {
}
return parseDate(hdr)
}
// AddressList parses the named header field as a list of addresses.
func (h Header) AddressList(key string) ([]*Address, os.Error) {
hdr := h.Get(key)
if hdr == "" {
return nil, ErrHeaderNotPresent
}
return newAddrParser(hdr).parseAddressList()
}
// Address represents a single mail address.
// An address such as "Barry Gibbs <bg@example.com>" is represented
// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
type Address struct {
Name string // Proper name; may be empty.
Address string // user@domain
}
func (a *Address) String() string {
s := "<" + a.Address + ">"
if a.Name == "" {
return s
}
return "\"" + strconv.Quote(a.Name) + "\" " + s
}
type addrParser []byte
func newAddrParser(s string) *addrParser {
p := addrParser([]byte(s))
return &p
}
func (p *addrParser) parseAddressList() ([]*Address, os.Error) {
var list []*Address
for {
p.skipSpace()
addr, err := p.parseAddress()
if err != nil {
return nil, err
}
list = append(list, addr)
p.skipSpace()
if p.empty() {
break
}
if !p.consume(',') {
return nil, os.ErrorString("mail: expected comma")
}
}
return list, nil
}
// parseAddress parses a single RFC 5322 address at the start of p.
func (p *addrParser) parseAddress() (addr *Address, err os.Error) {
debug.Printf("parseAddress: %q", *p)
p.skipSpace()
if p.empty() {
return nil, os.ErrorString("mail: no address")
}
// address = name-addr / addr-spec
// TODO(dsymonds): Support parsing group address.
// addr-spec has a more restricted grammar than name-addr,
// so try parsing it first, and fallback to name-addr.
// TODO(dsymonds): Is this really correct?
spec, err := p.consumeAddrSpec()
if err == nil {
return &Address{
Address: spec,
}, err
}
debug.Printf("parseAddress: not an addr-spec: %v", err)
debug.Printf("parseAddress: state is now %q", *p)
// display-name
var displayName string
if p.peek() != '<' {
displayName, err = p.consumePhrase()
if err != nil {
return nil, err
}
}
debug.Printf("parseAddress: displayName=%q", displayName)
// angle-addr = "<" addr-spec ">"
p.skipSpace()
if !p.consume('<') {
return nil, os.ErrorString("mail: no angle-addr")
}
spec, err = p.consumeAddrSpec()
if err != nil {
return nil, err
}
if !p.consume('>') {
return nil, os.ErrorString("mail: unclosed angle-addr")
}
debug.Printf("parseAddress: spec=%q", spec)
return &Address{
Name: displayName,
Address: spec,
}, nil
}
// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
func (p *addrParser) consumeAddrSpec() (spec string, err os.Error) {
debug.Printf("consumeAddrSpec: %q", *p)
orig := *p
defer func() {
if err != nil {
*p = orig
}
}()
// local-part = dot-atom / quoted-string
var localPart string
p.skipSpace()
if p.empty() {
return "", os.ErrorString("mail: no addr-spec")
}
if p.peek() == '"' {
// quoted-string
debug.Printf("consumeAddrSpec: parsing quoted-string")
localPart, err = p.consumeQuotedString()
} else {
// dot-atom
debug.Printf("consumeAddrSpec: parsing dot-atom")
localPart, err = p.consumeAtom(true)
}
if err != nil {
debug.Printf("consumeAddrSpec: failed: %v", err)
return "", err
}
if !p.consume('@') {
return "", os.ErrorString("mail: missing @ in addr-spec")
}
// domain = dot-atom / domain-literal
var domain string
p.skipSpace()
if p.empty() {
return "", os.ErrorString("mail: no domain in addr-spec")
}
// TODO(dsymonds): Handle domain-literal
domain, err = p.consumeAtom(true)
if err != nil {
return "", err
}
return localPart + "@" + domain, nil
}
// consumePhrase parses the RFC 5322 phrase at the start of p.
func (p *addrParser) consumePhrase() (phrase string, err os.Error) {
debug.Printf("consumePhrase: [%s]", *p)
// phrase = 1*word
var words []string
for {
// word = atom / quoted-string
var word string
p.skipSpace()
if p.empty() {
return "", os.ErrorString("mail: missing phrase")
}
if p.peek() == '"' {
// quoted-string
word, err = p.consumeQuotedString()
} else {
// atom
word, err = p.consumeAtom(false)
}
if err != nil {
break
}
debug.Printf("consumePhrase: consumed %q", word)
words = append(words, word)
}
// Ignore any error if we got at least one word.
if err != nil && len(words) == 0 {
debug.Printf("consumePhrase: hit err: %v", err)
return "", os.ErrorString("mail: missing word in phrase")
}
return strings.Join(words, " "), nil
}
// consumeQuotedString parses the quoted string at the start of p.
func (p *addrParser) consumeQuotedString() (qs string, err os.Error) {
// Assume first byte is '"'.
i := 1
qsb := make([]byte, 0, 10)
Loop:
for {
if i >= p.len() {
return "", os.ErrorString("mail: unclosed quoted-string")
}
switch c := (*p)[i]; {
case c == '"':
break Loop
case c == '\\':
if i+1 == p.len() {
return "", os.ErrorString("mail: unclosed quoted-string")
}
qsb = append(qsb, (*p)[i+1])
i += 2
case '!' <= c && c <= '~', c == ' ' || c == '\t':
// qtext (printable US-ASCII excluding " and \), or
// FWS (almost; we're ignoring CRLF)
qsb = append(qsb, c)
i++
default:
return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
}
}
*p = (*p)[i+1:]
return string(qsb), nil
}
// consumeAtom parses an RFC 5322 atom at the start of p.
// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
func (p *addrParser) consumeAtom(dot bool) (atom string, err os.Error) {
if !isAtext(p.peek(), false) {
return "", os.ErrorString("mail: invalid string")
}
i := 1
for ; i < p.len() && isAtext((*p)[i], dot); i++ {
}
// TODO(dsymonds): Remove the []byte() conversion here when 6g doesn't need it.
atom, *p = string([]byte((*p)[:i])), (*p)[i:]
return atom, nil
}
func (p *addrParser) consume(c byte) bool {
if p.empty() || p.peek() != c {
return false
}
*p = (*p)[1:]
return true
}
// skipSpace skips the leading space and tab characters.
func (p *addrParser) skipSpace() {
*p = bytes.TrimLeft(*p, " \t")
}
func (p *addrParser) peek() byte {
return (*p)[0]
}
func (p *addrParser) empty() bool {
return p.len() == 0
}
func (p *addrParser) len() int {
return len(*p)
}
var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
"abcdefghijklmnopqrstuvwxyz" +
"0123456789" +
"!#$%&'*+-/=?^_`{|}~")
// isAtext returns true if c is an RFC 5322 atext character.
// If dot is true, period is included.
func isAtext(c byte, dot bool) bool {
if dot && c == '.' {
return true
}
return bytes.IndexByte(atextChars, c) >= 0
}

View File

@ -127,3 +127,74 @@ func TestDateParsing(t *testing.T) {
}
}
}
func TestAddressParsing(t *testing.T) {
tests := []struct {
addrsStr string
exp []*Address
}{
// Bare address
{
`jdoe@machine.example`,
[]*Address{&Address{
Address: "jdoe@machine.example",
}},
},
// RFC 5322, Appendix A.1.1
{
`John Doe <jdoe@machine.example>`,
[]*Address{&Address{
Name: "John Doe",
Address: "jdoe@machine.example",
}},
},
// RFC 5322, Appendix A.1.2
{
`"Joe Q. Public" <john.q.public@example.com>`,
[]*Address{&Address{
Name: "Joe Q. Public",
Address: "john.q.public@example.com",
}},
},
{
`Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`,
[]*Address{
&Address{
Name: "Mary Smith",
Address: "mary@x.test",
},
&Address{
Address: "jdoe@example.org",
},
&Address{
Name: "Who?",
Address: "one@y.test",
},
},
},
{
`<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`,
[]*Address{
&Address{
Address: "boss@nil.test",
},
&Address{
Name: `Giant; "Big" Box`,
Address: "sysservices@example.net",
},
},
},
// RFC 5322, Appendix A.1.3
// TODO(dsymonds): Group addresses.
}
for _, test := range tests {
addrs, err := newAddrParser(test.addrsStr).parseAddressList()
if err != nil {
t.Errorf("Failed parsing %q: %v", test.addrsStr, err)
continue
}
if !reflect.DeepEqual(addrs, test.exp) {
t.Errorf("Parse of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp)
}
}
}