From c52ad23461953a8017199fe0a6fa78ba486ec2ef Mon Sep 17 00:00:00 2001 From: Gustavo Niemeyer Date: Wed, 19 Jan 2011 15:43:58 -0500 Subject: [PATCH] xml: handle tag paths through the same element With the current implementation, xml unmarshalling will silently fail to unmarshal any paths passing through the same element, such as: type T struct { A string "dummy>a" B string "dummy>b" } This change tweaks the algorithm so that this works correctly. Also, using paths that would cause the same element to unmarshal twice will error out ahead of time explaining the problem, rather than silently misbehaving. R=rsc CC=golang-dev https://golang.org/cl/4082041 --- src/pkg/xml/read.go | 93 ++++++++++++++++++++++++++++++---------- src/pkg/xml/read_test.go | 53 +++++++++++++++++------ 2 files changed, 111 insertions(+), 35 deletions(-) diff --git a/src/pkg/xml/read.go b/src/pkg/xml/read.go index 1f50e0b86b8..9ae3bb8eee9 100644 --- a/src/pkg/xml/read.go +++ b/src/pkg/xml/read.go @@ -6,6 +6,7 @@ package xml import ( "bytes" + "fmt" "io" "os" "reflect" @@ -156,6 +157,18 @@ type UnmarshalError string func (e UnmarshalError) String() string { return string(e) } +// A TagPathError represents an error in the unmarshalling process +// caused by the use of field tags with conflicting paths. +type TagPathError struct { + Struct reflect.Type + Field1, Tag1 string + Field2, Tag2 string +} + +func (e *TagPathError) String() string { + return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) +} + // The Parser's Unmarshal method is like xml.Unmarshal // except that it can be passed a pointer to the initial start element, // useful when a client reads some raw XML tokens itself @@ -226,7 +239,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { saveXMLData []byte sv *reflect.StructValue styp *reflect.StructType - fieldPaths map[string]fieldPath + fieldPaths map[string]pathInfo ) switch v := val.(type) { @@ -349,20 +362,21 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { } default: - i := strings.Index(f.Tag, ">") - if i != -1 { + if strings.Contains(f.Tag, ">") { if fieldPaths == nil { - fieldPaths = make(map[string]fieldPath) + fieldPaths = make(map[string]pathInfo) } path := strings.ToLower(f.Tag) - if i == 0 { + if strings.HasPrefix(f.Tag, ">") { path = strings.ToLower(f.Name) + path } - if path[len(path)-1] == '>' { + if strings.HasSuffix(f.Tag, ">") { path = path[:len(path)-1] } - s := strings.Split(path, ">", -1) - fieldPaths[s[0]] = fieldPath{s[1:], f.Index} + err := addFieldPath(sv, fieldPaths, path, f.Index) + if err != nil { + return err + } } } } @@ -385,12 +399,11 @@ Loop: // Sub-element. // Look up by tag name. if sv != nil { - k := strings.ToLower(fieldName(t.Name.Local)) + k := fieldName(t.Name.Local) if fieldPaths != nil { - if fp, ok := fieldPaths[k]; ok { - val := sv.FieldByIndex(fp.Index) - if err := p.unmarshalPath(val, &t, fp.Path); err != nil { + if _, found := fieldPaths[k]; found { + if err := p.unmarshalPaths(sv, fieldPaths, k, &t); err != nil { return err } continue Loop @@ -515,16 +528,50 @@ Loop: return nil } -type fieldPath struct { - Path []string - Index []int +type pathInfo struct { + fieldIdx []int + complete bool } -// unmarshalPath finds the nested elements matching the -// provided path and calls unmarshal on the tip elements. -func (p *Parser) unmarshalPath(val reflect.Value, start *StartElement, path []string) os.Error { - if len(path) == 0 { - return p.unmarshal(val, start) +// addFieldPath takes an element path such as "a>b>c" and fills the +// paths map with all paths leading to it ("a", "a>b", and "a>b>c"). +// It is okay for paths to share a common, shorter prefix but not ok +// for one path to itself be a prefix of another. +func addFieldPath(sv *reflect.StructValue, paths map[string]pathInfo, path string, fieldIdx []int) os.Error { + if info, found := paths[path]; found { + return tagError(sv, info.fieldIdx, fieldIdx) + } + paths[path] = pathInfo{fieldIdx, true} + for { + i := strings.LastIndex(path, ">") + if i < 0 { + break + } + path = path[:i] + if info, found := paths[path]; found { + if info.complete { + return tagError(sv, info.fieldIdx, fieldIdx) + } + } else { + paths[path] = pathInfo{fieldIdx, false} + } + } + return nil + +} + +func tagError(sv *reflect.StructValue, idx1 []int, idx2 []int) os.Error { + t := sv.Type().(*reflect.StructType) + f1 := t.FieldByIndex(idx1) + f2 := t.FieldByIndex(idx2) + return &TagPathError{t, f1.Name, f1.Tag, f2.Name, f2.Tag} +} + +// unmarshalPaths walks down an XML structure looking for +// wanted paths, and calls unmarshal on them. +func (p *Parser) unmarshalPaths(sv *reflect.StructValue, paths map[string]pathInfo, path string, start *StartElement) os.Error { + if info, _ := paths[path]; info.complete { + return p.unmarshal(sv.FieldByIndex(info.fieldIdx), start) } for { tok, err := p.Token() @@ -533,9 +580,9 @@ func (p *Parser) unmarshalPath(val reflect.Value, start *StartElement, path []st } switch t := tok.(type) { case StartElement: - k := fieldName(t.Name.Local) - if k == path[0] { - if err := p.unmarshalPath(val, &t, path[1:]); err != nil { + k := path + ">" + fieldName(t.Name.Local) + if _, found := paths[k]; found { + if err := p.unmarshalPaths(sv, paths, k, &t); err != nil { return err } continue diff --git a/src/pkg/xml/read_test.go b/src/pkg/xml/read_test.go index 72b907704ba..71ceddce4a4 100644 --- a/src/pkg/xml/read_test.go +++ b/src/pkg/xml/read_test.go @@ -235,16 +235,16 @@ const pathTestString = ` 1 - + A - - + + B - - + + C D - + 2 @@ -255,22 +255,23 @@ type PathTestItem struct { } type PathTestA struct { - Items []PathTestItem ">item" + Items []PathTestItem ">item1" Before, After string } type PathTestB struct { - Other []PathTestItem "items>Item" + Other []PathTestItem "items>Item1" Before, After string } type PathTestC struct { - Values []string "items>item>value" + Values1 []string "items>item1>value" + Values2 []string "items>item2>value" Before, After string } type PathTestSet struct { - Item []PathTestItem + Item1 []PathTestItem } type PathTestD struct { @@ -281,8 +282,8 @@ type PathTestD struct { var pathTests = []interface{}{ &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, - &PathTestC{Values: []string{"A", "C", "D"}, Before: "1", After: "2"}, - &PathTestD{Other: PathTestSet{Item: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, + &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, + &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, } func TestUnmarshalPaths(t *testing.T) { @@ -298,3 +299,31 @@ func TestUnmarshalPaths(t *testing.T) { } } } + +type BadPathTestA struct { + First string "items>item1" + Other string "items>item2" + Second string "items>" +} + +type BadPathTestB struct { + Other string "items>item2>value" + First string "items>item1" + Second string "items>item1>value" +} + +var badPathTests = []struct { + v, e interface{} +}{ + {&BadPathTestA{}, &TagPathError{reflect.Typeof(BadPathTestA{}), "First", "items>item1", "Second", "items>"}}, + {&BadPathTestB{}, &TagPathError{reflect.Typeof(BadPathTestB{}), "First", "items>item1", "Second", "items>item1>value"}}, +} + +func TestUnmarshalBadPaths(t *testing.T) { + for _, tt := range badPathTests { + err := Unmarshal(StringReader(pathTestString), tt.v) + if !reflect.DeepEqual(err, tt.e) { + t.Fatalf("Unmarshal with %#v didn't fail properly: %#v", tt.v, err) + } + } +}