From 05f8b44d5edc2960eff106e5e780cf83535d0533 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Thu, 7 Jun 2018 18:18:50 +0000 Subject: [PATCH] unicode: fix SpecialCase to follow its docs & respect explict no-op mappings If SpecialCase contains an explicit CaseRange with zero deltas, respect those and don't fall back to the default behavior. Fixes #25636 Change-Id: Ic554c6b3dd462b1b39c75194eec469b6ff4aa55b Reviewed-on: https://go-review.googlesource.com/117155 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Marcel van Lohuizen --- src/unicode/letter.go | 26 ++++++++++++++------------ src/unicode/letter_test.go | 12 ++++++++++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/unicode/letter.go b/src/unicode/letter.go index 4d9fc67165..8be9a7b7c9 100644 --- a/src/unicode/letter.go +++ b/src/unicode/letter.go @@ -206,9 +206,10 @@ func IsTitle(r rune) bool { } // to maps the rune using the specified case mapping. -func to(_case int, r rune, caseRange []CaseRange) rune { +// It additionally reports whether caseRange contained a mapping for r. +func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) { if _case < 0 || MaxCase <= _case { - return ReplacementChar // as reasonable an error as any + return ReplacementChar, false // as reasonable an error as any } // binary search over ranges lo := 0 @@ -229,9 +230,9 @@ func to(_case int, r rune, caseRange []CaseRange) rune { // bit in the sequence offset. // The constants UpperCase and TitleCase are even while LowerCase // is odd so we take the low bit from _case. - return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)) + return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true } - return r + delta + return r + delta, true } if r < rune(cr.Lo) { hi = m @@ -239,12 +240,13 @@ func to(_case int, r rune, caseRange []CaseRange) rune { lo = m + 1 } } - return r + return r, false } // To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase. func To(_case int, r rune) rune { - return to(_case, r, CaseRanges) + r, _ = to(_case, r, CaseRanges) + return r } // ToUpper maps the rune to upper case. @@ -282,8 +284,8 @@ func ToTitle(r rune) rune { // ToUpper maps the rune to upper case giving priority to the special mapping. func (special SpecialCase) ToUpper(r rune) rune { - r1 := to(UpperCase, r, []CaseRange(special)) - if r1 == r { + r1, hadMapping := to(UpperCase, r, []CaseRange(special)) + if r1 == r && !hadMapping { r1 = ToUpper(r) } return r1 @@ -291,8 +293,8 @@ func (special SpecialCase) ToUpper(r rune) rune { // ToTitle maps the rune to title case giving priority to the special mapping. func (special SpecialCase) ToTitle(r rune) rune { - r1 := to(TitleCase, r, []CaseRange(special)) - if r1 == r { + r1, hadMapping := to(TitleCase, r, []CaseRange(special)) + if r1 == r && !hadMapping { r1 = ToTitle(r) } return r1 @@ -300,8 +302,8 @@ func (special SpecialCase) ToTitle(r rune) rune { // ToLower maps the rune to lower case giving priority to the special mapping. func (special SpecialCase) ToLower(r rune) rune { - r1 := to(LowerCase, r, []CaseRange(special)) - if r1 == r { + r1, hadMapping := to(LowerCase, r, []CaseRange(special)) + if r1 == r && !hadMapping { r1 = ToLower(r) } return r1 diff --git a/src/unicode/letter_test.go b/src/unicode/letter_test.go index 3fe72ff13d..19ee535d57 100644 --- a/src/unicode/letter_test.go +++ b/src/unicode/letter_test.go @@ -9,6 +9,7 @@ import ( "fmt" "runtime" "sort" + "strings" "testing" . "unicode" ) @@ -551,3 +552,14 @@ func TestLatinOffset(t *testing.T) { } } } + +func TestSpecialCaseNoMapping(t *testing.T) { + // Issue 25636 + // no change for rune 'A', zero delta, under upper/lower/title case change. + var noChangeForCapitalA = CaseRange{'A', 'A', [MaxCase]rune{0, 0, 0}} + got := strings.ToLowerSpecial(SpecialCase([]CaseRange{noChangeForCapitalA}), "ABC") + want := "Abc" + if got != want { + t.Errorf("got %q; want %q", got, want) + } +}