mirror of
https://github.com/golang/go
synced 2024-10-04 08:31:22 -06:00
unicode/utf8: reject out-of-range runes.
Surrogates are still admitted, but I have sent mail to golang-dev on that topic. Fixes #3785. R=golang-dev, rogpeppe, iant CC=golang-dev https://golang.org/cl/6398049
This commit is contained in:
parent
55ff3f7076
commit
fc360f2381
@ -102,7 +102,7 @@ func decodeRuneInternal(p []byte) (r rune, size int, short bool) {
|
|||||||
// 4-byte, 21-bit sequence?
|
// 4-byte, 21-bit sequence?
|
||||||
if c0 < t5 {
|
if c0 < t5 {
|
||||||
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
|
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
|
||||||
if r <= rune3Max {
|
if r <= rune3Max || MaxRune < r {
|
||||||
return RuneError, 1, false
|
return RuneError, 1, false
|
||||||
}
|
}
|
||||||
return r, 4, false
|
return r, 4, false
|
||||||
@ -177,7 +177,7 @@ func decodeRuneInStringInternal(s string) (r rune, size int, short bool) {
|
|||||||
// 4-byte, 21-bit sequence?
|
// 4-byte, 21-bit sequence?
|
||||||
if c0 < t5 {
|
if c0 < t5 {
|
||||||
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
|
r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx)
|
||||||
if r <= rune3Max {
|
if r <= rune3Max || MaxRune < r {
|
||||||
return RuneError, 1, false
|
return RuneError, 1, false
|
||||||
}
|
}
|
||||||
return r, 4, false
|
return r, 4, false
|
||||||
@ -202,6 +202,9 @@ func FullRuneInString(s string) bool {
|
|||||||
|
|
||||||
// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes.
|
// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes.
|
||||||
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
||||||
|
// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
|
||||||
|
// out of range, or is not the shortest possible UTF-8 encoding for the
|
||||||
|
// value. No other validation is performed.
|
||||||
func DecodeRune(p []byte) (r rune, size int) {
|
func DecodeRune(p []byte) (r rune, size int) {
|
||||||
r, size, _ = decodeRuneInternal(p)
|
r, size, _ = decodeRuneInternal(p)
|
||||||
return
|
return
|
||||||
@ -209,6 +212,9 @@ func DecodeRune(p []byte) (r rune, size int) {
|
|||||||
|
|
||||||
// DecodeRuneInString is like DecodeRune but its input is a string.
|
// DecodeRuneInString is like DecodeRune but its input is a string.
|
||||||
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
||||||
|
// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
|
||||||
|
// out of range, or is not the shortest possible UTF-8 encoding for the
|
||||||
|
// value. No other validation is performed.
|
||||||
func DecodeRuneInString(s string) (r rune, size int) {
|
func DecodeRuneInString(s string) (r rune, size int) {
|
||||||
r, size, _ = decodeRuneInStringInternal(s)
|
r, size, _ = decodeRuneInStringInternal(s)
|
||||||
return
|
return
|
||||||
@ -216,6 +222,9 @@ func DecodeRuneInString(s string) (r rune, size int) {
|
|||||||
|
|
||||||
// DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes.
|
// DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes.
|
||||||
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
||||||
|
// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
|
||||||
|
// out of range, or is not the shortest possible UTF-8 encoding for the
|
||||||
|
// value. No other validation is performed.
|
||||||
func DecodeLastRune(p []byte) (r rune, size int) {
|
func DecodeLastRune(p []byte) (r rune, size int) {
|
||||||
end := len(p)
|
end := len(p)
|
||||||
if end == 0 {
|
if end == 0 {
|
||||||
@ -250,6 +259,9 @@ func DecodeLastRune(p []byte) (r rune, size int) {
|
|||||||
|
|
||||||
// DecodeLastRuneInString is like DecodeLastRune but its input is a string.
|
// DecodeLastRuneInString is like DecodeLastRune but its input is a string.
|
||||||
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
// If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8.
|
||||||
|
// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
|
||||||
|
// out of range, or is not the shortest possible UTF-8 encoding for the
|
||||||
|
// value. No other validation is performed.
|
||||||
func DecodeLastRuneInString(s string) (r rune, size int) {
|
func DecodeLastRuneInString(s string) (r rune, size int) {
|
||||||
end := len(s)
|
end := len(s)
|
||||||
if end == 0 {
|
if end == 0 {
|
||||||
|
@ -311,6 +311,11 @@ var validTests = []ValidTest{
|
|||||||
{string([]byte{66, 250}), false},
|
{string([]byte{66, 250}), false},
|
||||||
{string([]byte{66, 250, 67}), false},
|
{string([]byte{66, 250, 67}), false},
|
||||||
{"a\uFFFDb", true},
|
{"a\uFFFDb", true},
|
||||||
|
{string("\xF7\xBF\xBF\xBF"), true}, // U+1FFFFF
|
||||||
|
{string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range
|
||||||
|
{string("\xc0\x80"), false}, // U+0000 encoded in two bytes: incorrect
|
||||||
|
// TODO {string("\xed\xa0\x80"), false }, // U+D800 high surrogate (sic)
|
||||||
|
// TODO {string("\xed\xbf\xbf"), false }, // U+DFFF low surrogate (sic)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestValid(t *testing.T) {
|
func TestValid(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user