mirror of
https://github.com/golang/go
synced 2024-11-21 14:34:41 -07:00
xml: disallow invalid Unicode code points
Fixes #1259. R=rsc CC=golang-dev https://golang.org/cl/2967041
This commit is contained in:
parent
3e2231e41f
commit
27f2d5ce8c
@ -16,6 +16,7 @@ package xml
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
@ -871,6 +872,21 @@ Input:
|
||||
data := p.buf.Bytes()
|
||||
data = data[0 : len(data)-trunc]
|
||||
|
||||
// Inspect each rune for being a disallowed character.
|
||||
buf := data
|
||||
for len(buf) > 0 {
|
||||
r, size := utf8.DecodeRune(buf)
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
p.err = p.syntaxError("invalid UTF-8")
|
||||
return nil
|
||||
}
|
||||
buf = buf[size:]
|
||||
if !isInCharacterRange(r) {
|
||||
p.err = p.syntaxError(fmt.Sprintf("illegal character code %U", r))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Must rewrite \r and \r\n into \n.
|
||||
w := 0
|
||||
for r := 0; r < len(data); r++ {
|
||||
@ -887,6 +903,18 @@ Input:
|
||||
return data[0:w]
|
||||
}
|
||||
|
||||
// Decide whether the given rune is in the XML Character Range, per
|
||||
// the Char production of http://www.xml.com/axml/testaxml.htm,
|
||||
// Section 2.2 Characters.
|
||||
func isInCharacterRange(rune int) (inrange bool) {
|
||||
return rune == 0x09 ||
|
||||
rune == 0x0A ||
|
||||
rune == 0x0D ||
|
||||
rune >= 0x20 && rune <= 0xDF77 ||
|
||||
rune >= 0xE000 && rune <= 0xFFFD ||
|
||||
rune >= 0x10000 && rune <= 0x10FFFF
|
||||
}
|
||||
|
||||
// Get name space name: name with a : stuck in the middle.
|
||||
// The part before the : is the name space identifier.
|
||||
func (p *Parser) nsname() (name Name, ok bool) {
|
||||
|
@ -398,3 +398,44 @@ func TestEntityInsideCDATA(t *testing.T) {
|
||||
t.Fatalf("p.Token() = _, %v, want _, os.EOF", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// The last three tests (respectively one for characters in attribute
|
||||
// names and two for character entities) pass not because of code
|
||||
// changed for issue 1259, but instead pass with the given messages
|
||||
// from other parts of xml.Parser. I provide these to note the
|
||||
// current behavior of situations where one might think that character
|
||||
// range checking would detect the error, but it does not in fact.
|
||||
|
||||
var characterTests = []struct {
|
||||
in string
|
||||
err string
|
||||
}{
|
||||
{"\x12<doc/>", "illegal character code U+0012"},
|
||||
{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
|
||||
{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
|
||||
{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
|
||||
{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
|
||||
{"<doc>&\x01;</doc>", "invalid character entity &;"},
|
||||
{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"},
|
||||
}
|
||||
|
||||
|
||||
func TestDisallowedCharacters(t *testing.T) {
|
||||
|
||||
for i, tt := range characterTests {
|
||||
p := NewParser(StringReader(tt.in))
|
||||
var err os.Error
|
||||
|
||||
for err == nil {
|
||||
_, err = p.Token()
|
||||
}
|
||||
synerr, ok := err.(*SyntaxError)
|
||||
if !ok {
|
||||
t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err)
|
||||
}
|
||||
if synerr.Msg != tt.err {
|
||||
t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user