1
0
mirror of https://github.com/golang/go synced 2024-11-23 22:10:04 -07:00

encoding/xml: treat a namespaced name as two names, not one

An XML QName is syntactically two Names separated by a colon, rather
than a single name that has a colon in it.  This fixes multiple bugs in
XML QName reading.

Fixes: #68294
Fixes: #68392
Fixes: #68393
This commit is contained in:
Demi Marie Obenour 2024-07-14 14:39:12 -04:00
parent 239666cd73
commit e1351cd054
2 changed files with 107 additions and 13 deletions

View File

@ -1169,15 +1169,28 @@ func (d *Decoder) nsname() (name Name, ok bool) {
if !ok {
return
}
if strings.Count(s, ":") > 1 {
return name, false
} else if space, local, ok := strings.Cut(s, ":"); !ok || space == "" || local == "" {
name.Local = s
} else {
name.Space = space
name.Local = local
// XML does not allow a document to end with a name, so there must
// be another byte.
b, ok := d.mustgetc()
if !ok {
return
}
return name, true
if b != ':' {
d.ungetc(b)
name.Local = s
return
}
n, ok := d.name()
if ok {
// give a better error message than would otherwise be possible
if d.nextByte == ':' {
d.err = d.syntaxError("colon after prefixed XML name " + string(s) + ":" + string(n))
return name, false
}
name.Space = s
name.Local = n
}
return
}
// Get name: /first(first|second)*/
@ -1229,7 +1242,7 @@ func isNameByte(c byte) bool {
return 'A' <= c && c <= 'Z' ||
'a' <= c && c <= 'z' ||
'0' <= c && c <= '9' ||
c == '_' || c == ':' || c == '.' || c == '-'
c == '_' || c == '.' || c == '-'
}
func isName(s []byte) bool {
@ -1287,7 +1300,6 @@ func isNameString(s string) bool {
var first = &unicode.RangeTable{
R16: []unicode.Range16{
{0x003A, 0x003A, 1},
{0x0041, 0x005A, 1},
{0x005F, 0x005F, 1},
{0x0061, 0x007A, 1},

View File

@ -31,6 +31,89 @@ func (t *toks) Token() (Token, error) {
return tok, nil
}
func TestDecodeBadName(t *testing.T) {
tests := []struct {
name string
invalid string
message string
}{
{
name: "Number after colon",
invalid: `<a:1/>`,
message: "invalid XML name: 1",
},
{
name: "Two colons at end",
invalid: `<a::/>`,
message: "expected element name after <",
},
{
name: "Two colons together in middle",
invalid: "<a::a/>",
message: "expected element name after <",
},
{
name: "Colon at end",
invalid: "<a:/>",
message: "expected element name after <",
},
{
name: "Colon at start",
invalid: "<:a/>",
message: "expected element name after <",
},
{
name: "Number after colon in attribute",
invalid: `<a a:1=""/>`,
message: "invalid XML name: 1",
},
{
name: "Two colons separate",
invalid: `<a a:b:c="1"/>`,
message: "colon after prefixed XML name a:b",
},
{
name: "Two colons at end",
invalid: `<a a::="1"/>`,
message: "expected attribute name in element",
},
{
name: "Two colons together in middle",
invalid: `<a a::a="1"/>`,
message: "expected attribute name in element",
},
{
name: "Colon at end",
invalid: `<a a:="1"/>`,
message: "expected attribute name in element",
},
{
name: "Colon at start",
invalid: `<a :a="1"/>`,
message: "expected attribute name in element",
},
}
for i, j := range tests {
t.Run(j.name, func(t *testing.T) {
d := NewDecoder(strings.NewReader(j.invalid))
tok, err := d.RawToken()
if tok != nil {
t.Fatalf("%d: d.Decode: expected nil token, got %#v", i, tok)
}
if err == nil {
t.Fatalf("%d: d.Decode: expected non-nil error, got nil", i)
}
syntaxError, ok := err.(*SyntaxError)
if !ok {
t.Fatalf("%d: d.Decode: expected syntax error", i)
}
if syntaxError.Msg != j.message {
t.Errorf("%d: bad message: expected %q, got %q", i, j.message, syntaxError.Msg)
}
})
}
}
func TestDecodeEOF(t *testing.T) {
start := StartElement{Name: Name{Local: "test"}}
tests := []struct {
@ -1130,12 +1213,12 @@ func TestIssue20396(t *testing.T) {
wantErr error
}{
{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
UnmarshalError("XML syntax error on line 1: expected element name after <")},
UnmarshalError("XML syntax error on line 1: colon after prefixed XML name a:te")},
{`<a:te=st xmlns:a="abcd"/>`, attrError},
{`<a:te&st xmlns:a="abcd"/>`, attrError},
{`<a:test xmlns:a="abcd"/>`, nil},
{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
UnmarshalError("XML syntax error on line 1: expected element name after <")},
UnmarshalError("XML syntax error on line 1: colon after prefixed XML name a:te")},
{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
{`<a:test xmlns:a="abcd">1</a:test>`, nil},
@ -1324,7 +1407,6 @@ func testRoundTrip(t *testing.T, input string) {
func TestRoundTrip(t *testing.T) {
tests := map[string]string{
"trailing colon": `<foo abc:="x"></foo>`,
"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
}
for name, input := range tests {