From fc360f238137717e7246cc0fde908b71a3f1e5c7 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Thu, 19 Jul 2012 11:58:14 -0700 Subject: [PATCH] unicode/utf8: reject out-of-range runes. Surrogates are still admitted, but I have sent mail to golang-dev on that topic. Fixes #3785. R=golang-dev, rogpeppe, iant CC=golang-dev https://golang.org/cl/6398049 --- src/pkg/unicode/utf8/utf8.go | 16 ++++++++++++++-- src/pkg/unicode/utf8/utf8_test.go | 5 +++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/pkg/unicode/utf8/utf8.go b/src/pkg/unicode/utf8/utf8.go index 57ea19e96d4..cd9c80c5a59 100644 --- a/src/pkg/unicode/utf8/utf8.go +++ b/src/pkg/unicode/utf8/utf8.go @@ -102,7 +102,7 @@ func decodeRuneInternal(p []byte) (r rune, size int, short bool) { // 4-byte, 21-bit sequence? if c0 < t5 { r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx) - if r <= rune3Max { + if r <= rune3Max || MaxRune < r { return RuneError, 1, false } return r, 4, false @@ -177,7 +177,7 @@ func decodeRuneInStringInternal(s string) (r rune, size int, short bool) { // 4-byte, 21-bit sequence? if c0 < t5 { r = rune(c0&mask4)<<18 | rune(c1&maskx)<<12 | rune(c2&maskx)<<6 | rune(c3&maskx) - if r <= rune3Max { + if r <= rune3Max || MaxRune < r { return RuneError, 1, false } return r, 4, false @@ -202,6 +202,9 @@ func FullRuneInString(s string) bool { // DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. // If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8. +// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is +// out of range, or is not the shortest possible UTF-8 encoding for the +// value. No other validation is performed. func DecodeRune(p []byte) (r rune, size int) { r, size, _ = decodeRuneInternal(p) return @@ -209,6 +212,9 @@ func DecodeRune(p []byte) (r rune, size int) { // DecodeRuneInString is like DecodeRune but its input is a string. // If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8. +// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is +// out of range, or is not the shortest possible UTF-8 encoding for the +// value. No other validation is performed. func DecodeRuneInString(s string) (r rune, size int) { r, size, _ = decodeRuneInStringInternal(s) return @@ -216,6 +222,9 @@ func DecodeRuneInString(s string) (r rune, size int) { // DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes. // If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8. +// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is +// out of range, or is not the shortest possible UTF-8 encoding for the +// value. No other validation is performed. func DecodeLastRune(p []byte) (r rune, size int) { end := len(p) if end == 0 { @@ -250,6 +259,9 @@ func DecodeLastRune(p []byte) (r rune, size int) { // DecodeLastRuneInString is like DecodeLastRune but its input is a string. // If the encoding is invalid, it returns (RuneError, 1), an impossible result for correct UTF-8. +// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is +// out of range, or is not the shortest possible UTF-8 encoding for the +// value. No other validation is performed. func DecodeLastRuneInString(s string) (r rune, size int) { end := len(s) if end == 0 { diff --git a/src/pkg/unicode/utf8/utf8_test.go b/src/pkg/unicode/utf8/utf8_test.go index 4f73c8fb81a..65e6c7e8b35 100644 --- a/src/pkg/unicode/utf8/utf8_test.go +++ b/src/pkg/unicode/utf8/utf8_test.go @@ -311,6 +311,11 @@ var validTests = []ValidTest{ {string([]byte{66, 250}), false}, {string([]byte{66, 250, 67}), false}, {"a\uFFFDb", true}, + {string("\xF7\xBF\xBF\xBF"), true}, // U+1FFFFF + {string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range + {string("\xc0\x80"), false}, // U+0000 encoded in two bytes: incorrect + // TODO {string("\xed\xa0\x80"), false }, // U+D800 high surrogate (sic) + // TODO {string("\xed\xbf\xbf"), false }, // U+DFFF low surrogate (sic) } func TestValid(t *testing.T) {