mirror of
https://github.com/golang/go
synced 2024-11-22 00:54:43 -07:00
xml: disallow invalid Unicode code points
Fixes #1259. R=rsc CC=golang-dev https://golang.org/cl/2967041
This commit is contained in:
parent
3e2231e41f
commit
27f2d5ce8c
@ -16,6 +16,7 @@ package xml
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -871,6 +872,21 @@ Input:
|
|||||||
data := p.buf.Bytes()
|
data := p.buf.Bytes()
|
||||||
data = data[0 : len(data)-trunc]
|
data = data[0 : len(data)-trunc]
|
||||||
|
|
||||||
|
// Inspect each rune for being a disallowed character.
|
||||||
|
buf := data
|
||||||
|
for len(buf) > 0 {
|
||||||
|
r, size := utf8.DecodeRune(buf)
|
||||||
|
if r == utf8.RuneError && size == 1 {
|
||||||
|
p.err = p.syntaxError("invalid UTF-8")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
buf = buf[size:]
|
||||||
|
if !isInCharacterRange(r) {
|
||||||
|
p.err = p.syntaxError(fmt.Sprintf("illegal character code %U", r))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Must rewrite \r and \r\n into \n.
|
// Must rewrite \r and \r\n into \n.
|
||||||
w := 0
|
w := 0
|
||||||
for r := 0; r < len(data); r++ {
|
for r := 0; r < len(data); r++ {
|
||||||
@ -887,6 +903,18 @@ Input:
|
|||||||
return data[0:w]
|
return data[0:w]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decide whether the given rune is in the XML Character Range, per
|
||||||
|
// the Char production of http://www.xml.com/axml/testaxml.htm,
|
||||||
|
// Section 2.2 Characters.
|
||||||
|
func isInCharacterRange(rune int) (inrange bool) {
|
||||||
|
return rune == 0x09 ||
|
||||||
|
rune == 0x0A ||
|
||||||
|
rune == 0x0D ||
|
||||||
|
rune >= 0x20 && rune <= 0xDF77 ||
|
||||||
|
rune >= 0xE000 && rune <= 0xFFFD ||
|
||||||
|
rune >= 0x10000 && rune <= 0x10FFFF
|
||||||
|
}
|
||||||
|
|
||||||
// Get name space name: name with a : stuck in the middle.
|
// Get name space name: name with a : stuck in the middle.
|
||||||
// The part before the : is the name space identifier.
|
// The part before the : is the name space identifier.
|
||||||
func (p *Parser) nsname() (name Name, ok bool) {
|
func (p *Parser) nsname() (name Name, ok bool) {
|
||||||
|
@ -398,3 +398,44 @@ func TestEntityInsideCDATA(t *testing.T) {
|
|||||||
t.Fatalf("p.Token() = _, %v, want _, os.EOF", err)
|
t.Fatalf("p.Token() = _, %v, want _, os.EOF", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// The last three tests (respectively one for characters in attribute
|
||||||
|
// names and two for character entities) pass not because of code
|
||||||
|
// changed for issue 1259, but instead pass with the given messages
|
||||||
|
// from other parts of xml.Parser. I provide these to note the
|
||||||
|
// current behavior of situations where one might think that character
|
||||||
|
// range checking would detect the error, but it does not in fact.
|
||||||
|
|
||||||
|
var characterTests = []struct {
|
||||||
|
in string
|
||||||
|
err string
|
||||||
|
}{
|
||||||
|
{"\x12<doc/>", "illegal character code U+0012"},
|
||||||
|
{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
|
||||||
|
{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
|
||||||
|
{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
|
||||||
|
{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
|
||||||
|
{"<doc>&\x01;</doc>", "invalid character entity &;"},
|
||||||
|
{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func TestDisallowedCharacters(t *testing.T) {
|
||||||
|
|
||||||
|
for i, tt := range characterTests {
|
||||||
|
p := NewParser(StringReader(tt.in))
|
||||||
|
var err os.Error
|
||||||
|
|
||||||
|
for err == nil {
|
||||||
|
_, err = p.Token()
|
||||||
|
}
|
||||||
|
synerr, ok := err.(*SyntaxError)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err)
|
||||||
|
}
|
||||||
|
if synerr.Msg != tt.err {
|
||||||
|
t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user