mirror of
https://github.com/golang/go
synced 2024-11-22 09:44:40 -07:00
encoding/xml: add, support Unmarshaler interface
See golang.org/s/go12xml for design. R=golang-dev, dominik.honnef, dan.kortschak CC=golang-dev https://golang.org/cl/12556043
This commit is contained in:
parent
7e886740d1
commit
84b0842a59
@ -7,6 +7,7 @@ package xml
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -137,6 +138,100 @@ type UnmarshalError string
|
|||||||
|
|
||||||
func (e UnmarshalError) Error() string { return string(e) }
|
func (e UnmarshalError) Error() string { return string(e) }
|
||||||
|
|
||||||
|
// Unmarshaler is the interface implemented by objects that can unmarshal
|
||||||
|
// an XML element description of themselves.
|
||||||
|
//
|
||||||
|
// UnmarshalXML decodes a single XML element
|
||||||
|
// beginning with the given start element.
|
||||||
|
// If it returns an error, the outer call to Unmarshal stops and
|
||||||
|
// returns that error.
|
||||||
|
// UnmarshalXML must consume exactly one XML element.
|
||||||
|
// One common implementation strategy is to unmarshal into
|
||||||
|
// a separate value with a layout matching the expected XML
|
||||||
|
// using d.DecodeElement, and then to copy the data from
|
||||||
|
// that value into the receiver.
|
||||||
|
// Another common strategy is to use d.Token to process the
|
||||||
|
// XML object one token at a time.
|
||||||
|
// UnmarshalXML may not use d.RawToken.
|
||||||
|
type Unmarshaler interface {
|
||||||
|
UnmarshalXML(d *Decoder, start StartElement) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalerAttr is the interface implemented by objects that can unmarshal
|
||||||
|
// an XML attribute description of themselves.
|
||||||
|
//
|
||||||
|
// UnmarshalXMLAttr decodes a single XML attribute.
|
||||||
|
// If it returns an error, the outer call to Unmarshal stops and
|
||||||
|
// returns that error.
|
||||||
|
// UnmarshalXMLAttr is used only for struct fields with the
|
||||||
|
// "attr" option in the field tag.
|
||||||
|
type UnmarshalerAttr interface {
|
||||||
|
UnmarshalXMLAttr(attr Attr) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// receiverType returns the receiver type to use in an expression like "%s.MethodName".
|
||||||
|
func receiverType(val interface{}) string {
|
||||||
|
t := reflect.TypeOf(val)
|
||||||
|
if t.Name() != "" {
|
||||||
|
return t.String()
|
||||||
|
}
|
||||||
|
return "(" + t.String() + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
// unmarshalInterface unmarshals a single XML element into val,
|
||||||
|
// which is known to implement Unmarshaler.
|
||||||
|
// start is the opening tag of the element.
|
||||||
|
func (p *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error {
|
||||||
|
// Record that decoder must stop at end tag corresponding to start.
|
||||||
|
p.pushEOF()
|
||||||
|
|
||||||
|
p.unmarshalDepth++
|
||||||
|
err := val.UnmarshalXML(p, *start)
|
||||||
|
p.unmarshalDepth--
|
||||||
|
if err != nil {
|
||||||
|
p.popEOF()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !p.popEOF() {
|
||||||
|
return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// unmarshalAttr unmarshals a single XML attribute into val.
|
||||||
|
func (p *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error {
|
||||||
|
if val.Kind() == reflect.Ptr {
|
||||||
|
if val.IsNil() {
|
||||||
|
val.Set(reflect.New(val.Type().Elem()))
|
||||||
|
}
|
||||||
|
val = val.Elem()
|
||||||
|
}
|
||||||
|
|
||||||
|
if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) {
|
||||||
|
// This is an unmarshaler with a non-pointer receiver,
|
||||||
|
// so it's likely to be incorrect, but we do what we're told.
|
||||||
|
return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr)
|
||||||
|
}
|
||||||
|
if val.CanAddr() {
|
||||||
|
pv := val.Addr()
|
||||||
|
if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) {
|
||||||
|
return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Check for and use encoding.TextUnmarshaler.
|
||||||
|
|
||||||
|
copyValue(val, []byte(attr.Value))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
unmarshalerType = reflect.TypeOf((*Unmarshaler)(nil)).Elem()
|
||||||
|
unmarshalerAttrType = reflect.TypeOf((*UnmarshalerAttr)(nil)).Elem()
|
||||||
|
)
|
||||||
|
|
||||||
// Unmarshal a single XML element into val.
|
// Unmarshal a single XML element into val.
|
||||||
func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
|
func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
|
||||||
// Find start element if we need it.
|
// Find start element if we need it.
|
||||||
@ -153,13 +248,28 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if pv := val; pv.Kind() == reflect.Ptr {
|
if val.Kind() == reflect.Ptr {
|
||||||
if pv.IsNil() {
|
if val.IsNil() {
|
||||||
pv.Set(reflect.New(pv.Type().Elem()))
|
val.Set(reflect.New(val.Type().Elem()))
|
||||||
}
|
}
|
||||||
val = pv.Elem()
|
val = val.Elem()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if val.CanInterface() && val.Type().Implements(unmarshalerType) {
|
||||||
|
// This is an unmarshaler with a non-pointer receiver,
|
||||||
|
// so it's likely to be incorrect, but we do what we're told.
|
||||||
|
return p.unmarshalInterface(val.Interface().(Unmarshaler), start)
|
||||||
|
}
|
||||||
|
|
||||||
|
if val.CanAddr() {
|
||||||
|
pv := val.Addr()
|
||||||
|
if pv.CanInterface() && pv.Type().Implements(unmarshalerType) {
|
||||||
|
return p.unmarshalInterface(pv.Interface().(Unmarshaler), start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Check for and use encoding.TextUnmarshaler.
|
||||||
|
|
||||||
var (
|
var (
|
||||||
data []byte
|
data []byte
|
||||||
saveData reflect.Value
|
saveData reflect.Value
|
||||||
@ -264,7 +374,9 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
|
|||||||
// Look for attribute.
|
// Look for attribute.
|
||||||
for _, a := range start.Attr {
|
for _, a := range start.Attr {
|
||||||
if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) {
|
if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) {
|
||||||
copyValue(strv, []byte(a.Value))
|
if err := p.unmarshalAttr(strv, a); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
package xml
|
package xml
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
@ -621,3 +622,66 @@ func TestMarshalNSAttr(t *testing.T) {
|
|||||||
t.Errorf("Unmarshal = %q, want %q", dst, src)
|
t.Errorf("Unmarshal = %q, want %q", dst, src)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MyCharData struct {
|
||||||
|
body string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MyCharData) UnmarshalXML(d *Decoder, start StartElement) error {
|
||||||
|
for {
|
||||||
|
t, err := d.Token()
|
||||||
|
if err == io.EOF { // found end of element
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if char, ok := t.(CharData); ok {
|
||||||
|
m.body += string(char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ Unmarshaler = (*MyCharData)(nil)
|
||||||
|
|
||||||
|
func (m *MyCharData) UnmarshalXMLAttr(attr Attr) error {
|
||||||
|
panic("must not call")
|
||||||
|
}
|
||||||
|
|
||||||
|
type MyAttr struct {
|
||||||
|
attr string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MyAttr) UnmarshalXMLAttr(attr Attr) error {
|
||||||
|
m.attr = attr.Value
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ UnmarshalerAttr = (*MyAttr)(nil)
|
||||||
|
|
||||||
|
type MyStruct struct {
|
||||||
|
Data *MyCharData
|
||||||
|
Attr *MyAttr `xml:",attr"`
|
||||||
|
|
||||||
|
Data2 MyCharData
|
||||||
|
Attr2 MyAttr `xml:",attr"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUnmarshaler(t *testing.T) {
|
||||||
|
xml := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<MyStruct Attr="attr1" Attr2="attr2">
|
||||||
|
<Data>hello <!-- comment -->world</Data>
|
||||||
|
<Data2>howdy <!-- comment -->world</Data2>
|
||||||
|
</MyStruct>
|
||||||
|
`
|
||||||
|
|
||||||
|
var m MyStruct
|
||||||
|
if err := Unmarshal([]byte(xml), &m); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.Data == nil || m.Attr == nil || m.Data.body != "hello world" || m.Attr.attr != "attr1" || m.Data2.body != "howdy world" || m.Attr2.attr != "attr2" {
|
||||||
|
t.Errorf("m=%#+v\n", m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -16,6 +16,7 @@ package xml
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -186,6 +187,7 @@ type Decoder struct {
|
|||||||
ns map[string]string
|
ns map[string]string
|
||||||
err error
|
err error
|
||||||
line int
|
line int
|
||||||
|
unmarshalDepth int
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDecoder creates a new XML parser reading from r.
|
// NewDecoder creates a new XML parser reading from r.
|
||||||
@ -223,10 +225,14 @@ func NewDecoder(r io.Reader) *Decoder {
|
|||||||
// If Token encounters an unrecognized name space prefix,
|
// If Token encounters an unrecognized name space prefix,
|
||||||
// it uses the prefix as the Space rather than report an error.
|
// it uses the prefix as the Space rather than report an error.
|
||||||
func (d *Decoder) Token() (t Token, err error) {
|
func (d *Decoder) Token() (t Token, err error) {
|
||||||
|
if d.stk != nil && d.stk.kind == stkEOF {
|
||||||
|
err = io.EOF
|
||||||
|
return
|
||||||
|
}
|
||||||
if d.nextToken != nil {
|
if d.nextToken != nil {
|
||||||
t = d.nextToken
|
t = d.nextToken
|
||||||
d.nextToken = nil
|
d.nextToken = nil
|
||||||
} else if t, err = d.RawToken(); err != nil {
|
} else if t, err = d.rawToken(); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,6 +328,7 @@ type stack struct {
|
|||||||
const (
|
const (
|
||||||
stkStart = iota
|
stkStart = iota
|
||||||
stkNs
|
stkNs
|
||||||
|
stkEOF
|
||||||
)
|
)
|
||||||
|
|
||||||
func (d *Decoder) push(kind int) *stack {
|
func (d *Decoder) push(kind int) *stack {
|
||||||
@ -347,6 +354,43 @@ func (d *Decoder) pop() *stack {
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record that after the current element is finished
|
||||||
|
// (that element is already pushed on the stack)
|
||||||
|
// Token should return EOF until popEOF is called.
|
||||||
|
func (d *Decoder) pushEOF() {
|
||||||
|
// Walk down stack to find Start.
|
||||||
|
// It might not be the top, because there might be stkNs
|
||||||
|
// entries above it.
|
||||||
|
start := d.stk
|
||||||
|
for start.kind != stkStart {
|
||||||
|
start = start.next
|
||||||
|
}
|
||||||
|
// The stkNs entries below a start are associated with that
|
||||||
|
// element too; skip over them.
|
||||||
|
for start.next != nil && start.next.kind == stkNs {
|
||||||
|
start = start.next
|
||||||
|
}
|
||||||
|
s := d.free
|
||||||
|
if s != nil {
|
||||||
|
d.free = s.next
|
||||||
|
} else {
|
||||||
|
s = new(stack)
|
||||||
|
}
|
||||||
|
s.kind = stkEOF
|
||||||
|
s.next = start.next
|
||||||
|
start.next = s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Undo a pushEOF.
|
||||||
|
// The element must have been finished, so the EOF should be at the top of the stack.
|
||||||
|
func (d *Decoder) popEOF() bool {
|
||||||
|
if d.stk == nil || d.stk.kind != stkEOF {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
d.pop()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Record that we are starting an element with the given name.
|
// Record that we are starting an element with the given name.
|
||||||
func (d *Decoder) pushElement(name Name) {
|
func (d *Decoder) pushElement(name Name) {
|
||||||
s := d.push(stkStart)
|
s := d.push(stkStart)
|
||||||
@ -395,9 +439,9 @@ func (d *Decoder) popElement(t *EndElement) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pop stack until a Start is on the top, undoing the
|
// Pop stack until a Start or EOF is on the top, undoing the
|
||||||
// translations that were associated with the element we just closed.
|
// translations that were associated with the element we just closed.
|
||||||
for d.stk != nil && d.stk.kind != stkStart {
|
for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF {
|
||||||
s := d.pop()
|
s := d.pop()
|
||||||
if s.ok {
|
if s.ok {
|
||||||
d.ns[s.name.Local] = s.name.Space
|
d.ns[s.name.Local] = s.name.Space
|
||||||
@ -429,10 +473,19 @@ func (d *Decoder) autoClose(t Token) (Token, bool) {
|
|||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method")
|
||||||
|
|
||||||
// RawToken is like Token but does not verify that
|
// RawToken is like Token but does not verify that
|
||||||
// start and end elements match and does not translate
|
// start and end elements match and does not translate
|
||||||
// name space prefixes to their corresponding URLs.
|
// name space prefixes to their corresponding URLs.
|
||||||
func (d *Decoder) RawToken() (Token, error) {
|
func (d *Decoder) RawToken() (Token, error) {
|
||||||
|
if d.unmarshalDepth > 0 {
|
||||||
|
return nil, errRawToken
|
||||||
|
}
|
||||||
|
return d.rawToken()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Decoder) rawToken() (Token, error) {
|
||||||
if d.err != nil {
|
if d.err != nil {
|
||||||
return nil, d.err
|
return nil, d.err
|
||||||
}
|
}
|
||||||
@ -484,8 +537,7 @@ func (d *Decoder) RawToken() (Token, error) {
|
|||||||
|
|
||||||
case '?':
|
case '?':
|
||||||
// <?: Processing instruction.
|
// <?: Processing instruction.
|
||||||
// TODO(rsc): Should parse the <?xml declaration to make sure
|
// TODO(rsc): Should parse the <?xml declaration to make sure the version is 1.0.
|
||||||
// the version is 1.0 and the encoding is UTF-8.
|
|
||||||
var target string
|
var target string
|
||||||
if target, ok = d.name(); !ok {
|
if target, ok = d.name(); !ok {
|
||||||
if d.err == nil {
|
if d.err == nil {
|
||||||
@ -1112,6 +1164,30 @@ func isName(s []byte) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isNameString(s string) bool {
|
||||||
|
if len(s) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
c, n := utf8.DecodeRuneInString(s)
|
||||||
|
if c == utf8.RuneError && n == 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !unicode.Is(first, c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for n < len(s) {
|
||||||
|
s = s[n:]
|
||||||
|
c, n = utf8.DecodeRuneInString(s)
|
||||||
|
if c == utf8.RuneError && n == 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !unicode.Is(first, c) && !unicode.Is(second, c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// These tables were generated by cut and paste from Appendix B of
|
// These tables were generated by cut and paste from Appendix B of
|
||||||
// the XML spec at http://www.xml.com/axml/testaxml.htm
|
// the XML spec at http://www.xml.com/axml/testaxml.htm
|
||||||
// and then reformatting. First corresponds to (Letter | '_' | ':')
|
// and then reformatting. First corresponds to (Letter | '_' | ':')
|
||||||
@ -1778,6 +1854,45 @@ func EscapeText(w io.Writer, s []byte) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EscapeString writes to p the properly escaped XML equivalent
|
||||||
|
// of the plain text data s.
|
||||||
|
func (p *printer) EscapeString(s string) {
|
||||||
|
var esc []byte
|
||||||
|
last := 0
|
||||||
|
for i := 0; i < len(s); {
|
||||||
|
r, width := utf8.DecodeRuneInString(s[i:])
|
||||||
|
i += width
|
||||||
|
switch r {
|
||||||
|
case '"':
|
||||||
|
esc = esc_quot
|
||||||
|
case '\'':
|
||||||
|
esc = esc_apos
|
||||||
|
case '&':
|
||||||
|
esc = esc_amp
|
||||||
|
case '<':
|
||||||
|
esc = esc_lt
|
||||||
|
case '>':
|
||||||
|
esc = esc_gt
|
||||||
|
case '\t':
|
||||||
|
esc = esc_tab
|
||||||
|
case '\n':
|
||||||
|
esc = esc_nl
|
||||||
|
case '\r':
|
||||||
|
esc = esc_cr
|
||||||
|
default:
|
||||||
|
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
|
||||||
|
esc = esc_fffd
|
||||||
|
break
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
p.WriteString(s[last : i-width])
|
||||||
|
p.Write(esc)
|
||||||
|
last = i
|
||||||
|
}
|
||||||
|
p.WriteString(s[last:])
|
||||||
|
}
|
||||||
|
|
||||||
// Escape is like EscapeText but omits the error return value.
|
// Escape is like EscapeText but omits the error return value.
|
||||||
// It is provided for backwards compatibility with Go 1.0.
|
// It is provided for backwards compatibility with Go 1.0.
|
||||||
// Code targeting Go 1.1 or later should use EscapeText.
|
// Code targeting Go 1.1 or later should use EscapeText.
|
||||||
|
Loading…
Reference in New Issue
Block a user