From 8c62fc0ca3c96ecbd3a6e81546aa8c53e32ff500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= Date: Fri, 4 May 2018 06:54:18 +0200 Subject: [PATCH] strings: fix encoding of \u0080 in map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf being incorrectly encoded as \x80 in strings.Map due to using <= instead of a < comparison operator to check one byte encodings for utf8. Fixes #25242 Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc Reviewed-on: https://go-review.googlesource.com/111286 Run-TryBot: Martin Möhrmann TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/strings/strings.go | 4 ++-- src/strings/strings_test.go | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/strings/strings.go b/src/strings/strings.go index 45345e0088..adbbe742fc 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -479,7 +479,7 @@ func Map(mapping func(rune) rune, s string) string { b = make([]byte, len(s)+utf8.UTFMax) nbytes = copy(b, s[:i]) if r >= 0 { - if r <= utf8.RuneSelf { + if r < utf8.RuneSelf { b[nbytes] = byte(r) nbytes++ } else { @@ -509,7 +509,7 @@ func Map(mapping func(rune) rune, s string) string { r := mapping(c) // common case - if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) { + if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) { b[nbytes] = byte(r) nbytes++ continue diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 876f06c674..78bc573e5f 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -528,6 +528,7 @@ var upperTests = []StringTest{ {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char + {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } var lowerTests = []StringTest{ @@ -538,6 +539,7 @@ var lowerTests = []StringTest{ {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char + {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" @@ -650,6 +652,27 @@ func TestMap(t *testing.T) { if m != expect { t.Errorf("replace invalid sequence: expected %q got %q", expect, m) } + + // 8. Check utf8.RuneSelf and utf8.MaxRune encoding + encode := func(r rune) rune { + switch r { + case utf8.RuneSelf: + return unicode.MaxRune + case unicode.MaxRune: + return utf8.RuneSelf + } + return r + } + s := string(utf8.RuneSelf) + string(utf8.MaxRune) + r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s + m = Map(encode, s) + if m != r { + t.Errorf("encoding not handled correctly: expected %q got %q", r, m) + } + m = Map(encode, r) + if m != s { + t.Errorf("encoding not handled correctly: expected %q got %q", s, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }