1
0
mirror of https://github.com/golang/go synced 2024-10-04 16:31:22 -06:00
go/src/pkg/strings/strings_test.go
Rob Pike 4e2b7f8f41 Unicode: provide an ability to supplement the case-mapping tables
in character and string case mapping routines.

Add a custom mapper for Turkish and Azeri.

A more general solution for deriving the case information from Unicode's
SpecialCasing.txt will require more work.

Fixes #703.

R=rsc, rsc1
CC=golang-dev, mdakin
https://golang.org/cl/824043
2010-03-30 17:51:03 -07:00

496 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
. "strings"
"testing"
"unicode"
"utf8"
)
func eq(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
var abcd = "abcd"
var faces = "☺☻☹"
var commas = "1,2,3,4"
var dots = "1....2....3....4"
type IndexTest struct {
s string
sep string
out int
}
var indexTests = []IndexTest{
IndexTest{"", "", 0},
IndexTest{"", "a", -1},
IndexTest{"", "foo", -1},
IndexTest{"fo", "foo", -1},
IndexTest{"foo", "foo", 0},
IndexTest{"oofofoofooo", "f", 2},
IndexTest{"oofofoofooo", "foo", 4},
IndexTest{"barfoobarfoo", "foo", 3},
IndexTest{"foo", "", 0},
IndexTest{"foo", "o", 1},
IndexTest{"abcABCabc", "A", 3},
// cases with one byte strings - test special case in Index()
IndexTest{"", "a", -1},
IndexTest{"x", "a", -1},
IndexTest{"x", "x", 0},
IndexTest{"abc", "a", 0},
IndexTest{"abc", "b", 1},
IndexTest{"abc", "c", 2},
IndexTest{"abc", "x", -1},
}
var lastIndexTests = []IndexTest{
IndexTest{"", "", 0},
IndexTest{"", "a", -1},
IndexTest{"", "foo", -1},
IndexTest{"fo", "foo", -1},
IndexTest{"foo", "foo", 0},
IndexTest{"foo", "f", 0},
IndexTest{"oofofoofooo", "f", 7},
IndexTest{"oofofoofooo", "foo", 7},
IndexTest{"barfoobarfoo", "foo", 9},
IndexTest{"foo", "", 3},
IndexTest{"foo", "o", 2},
IndexTest{"abcABCabc", "A", 3},
IndexTest{"abcABCabc", "a", 6},
}
var indexAnyTests = []IndexTest{
IndexTest{"", "", -1},
IndexTest{"", "a", -1},
IndexTest{"", "abc", -1},
IndexTest{"a", "", -1},
IndexTest{"a", "a", 0},
IndexTest{"aaa", "a", 0},
IndexTest{"abc", "xyz", -1},
IndexTest{"abc", "xcz", 2},
IndexTest{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
IndexTest{"aRegExp*", ".(|)*+?^$[]", 7},
IndexTest{dots + dots + dots, " ", -1},
}
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
for _, test := range testCases {
actual := f(test.s, test.sep)
if actual != test.out {
t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
}
}
}
func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
type ExplodeTest struct {
s string
n int
a []string
}
var explodetests = []ExplodeTest{
ExplodeTest{abcd, 4, []string{"a", "b", "c", "d"}},
ExplodeTest{faces, 3, []string{"☺", "☻", "☹"}},
ExplodeTest{abcd, 2, []string{"a", "bcd"}},
}
func TestExplode(t *testing.T) {
for _, tt := range explodetests {
a := Split(tt.s, "", tt.n)
if !eq(a, tt.a) {
t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a)
continue
}
s := Join(a, "")
if s != tt.s {
t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s)
}
}
}
type SplitTest struct {
s string
sep string
n int
a []string
}
var splittests = []SplitTest{
SplitTest{abcd, "a", 0, []string{"", "bcd"}},
SplitTest{abcd, "z", 0, []string{"abcd"}},
SplitTest{abcd, "", 0, []string{"a", "b", "c", "d"}},
SplitTest{commas, ",", 0, []string{"1", "2", "3", "4"}},
SplitTest{dots, "...", 0, []string{"1", ".2", ".3", ".4"}},
SplitTest{faces, "☹", 0, []string{"☺☻", ""}},
SplitTest{faces, "~", 0, []string{faces}},
SplitTest{faces, "", 0, []string{"☺", "☻", "☹"}},
SplitTest{"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
SplitTest{"1 2", " ", 3, []string{"1", "2"}},
SplitTest{"123", "", 2, []string{"1", "23"}},
SplitTest{"123", "", 17, []string{"1", "2", "3"}},
}
func TestSplit(t *testing.T) {
for _, tt := range splittests {
a := Split(tt.s, tt.sep, tt.n)
if !eq(a, tt.a) {
t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
continue
}
s := Join(a, tt.sep)
if s != tt.s {
t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
}
}
}
var splitaftertests = []SplitTest{
SplitTest{abcd, "a", 0, []string{"a", "bcd"}},
SplitTest{abcd, "z", 0, []string{"abcd"}},
SplitTest{abcd, "", 0, []string{"a", "b", "c", "d"}},
SplitTest{commas, ",", 0, []string{"1,", "2,", "3,", "4"}},
SplitTest{dots, "...", 0, []string{"1...", ".2...", ".3...", ".4"}},
SplitTest{faces, "☹", 0, []string{"☺☻☹", ""}},
SplitTest{faces, "~", 0, []string{faces}},
SplitTest{faces, "", 0, []string{"☺", "☻", "☹"}},
SplitTest{"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
SplitTest{"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
SplitTest{"1 2", " ", 3, []string{"1 ", "2"}},
SplitTest{"123", "", 2, []string{"1", "23"}},
SplitTest{"123", "", 17, []string{"1", "2", "3"}},
}
func TestSplitAfter(t *testing.T) {
for _, tt := range splitaftertests {
a := SplitAfter(tt.s, tt.sep, tt.n)
if !eq(a, tt.a) {
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
continue
}
s := Join(a, "")
if s != tt.s {
t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
}
}
}
type FieldsTest struct {
s string
a []string
}
var fieldstests = []FieldsTest{
FieldsTest{"", []string{}},
FieldsTest{" ", []string{}},
FieldsTest{" \t ", []string{}},
FieldsTest{" abc ", []string{"abc"}},
FieldsTest{"1 2 3 4", []string{"1", "2", "3", "4"}},
FieldsTest{"1 2 3 4", []string{"1", "2", "3", "4"}},
FieldsTest{"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
FieldsTest{"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
FieldsTest{"\u2000\u2001\u2002", []string{}},
FieldsTest{"\n™\t™\n", []string{"™", "™"}},
FieldsTest{faces, []string{faces}},
}
func TestFields(t *testing.T) {
for _, tt := range fieldstests {
a := Fields(tt.s)
if !eq(a, tt.a) {
t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
continue
}
}
}
// Test case for any function which accepts and returns a single string.
type StringTest struct {
in, out string
}
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
for _, tc := range testCases {
actual := f(tc.in)
if actual != tc.out {
t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
}
}
}
var upperTests = []StringTest{
StringTest{"", ""},
StringTest{"abc", "ABC"},
StringTest{"AbC123", "ABC123"},
StringTest{"azAZ09_", "AZAZ09_"},
StringTest{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
}
var lowerTests = []StringTest{
StringTest{"", ""},
StringTest{"abc", "abc"},
StringTest{"AbC123", "abc123"},
StringTest{"azAZ09_", "azaz09_"},
StringTest{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
}
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
var trimSpaceTests = []StringTest{
StringTest{"", ""},
StringTest{"abc", "abc"},
StringTest{space + "abc" + space, "abc"},
StringTest{" ", ""},
StringTest{" \t\r\n \t\t\r\r\n\n ", ""},
StringTest{" \t\r\n x\t\t\r\r\n\n ", "x"},
StringTest{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
StringTest{"1 \t\r\n2", "1 \t\r\n2"},
StringTest{" x\x80", "x\x80"}, // invalid UTF-8 on end
StringTest{" x\xc0", "x\xc0"}, // invalid UTF-8 on end
}
func tenRunes(rune int) string {
r := make([]int, 10)
for i := range r {
r[i] = rune
}
return string(r)
}
// User-defined self-inverse mapping function
func rot13(rune int) int {
step := 13
if rune >= 'a' && rune <= 'z' {
return ((rune - 'a' + step) % 26) + 'a'
}
if rune >= 'A' && rune <= 'Z' {
return ((rune - 'A' + step) % 26) + 'A'
}
return rune
}
func TestMap(t *testing.T) {
// Run a couple of awful growth/shrinkage tests
a := tenRunes('a')
// 1. Grow. This triggers two reallocations in Map.
maxRune := func(rune int) int { return unicode.MaxRune }
m := Map(maxRune, a)
expect := tenRunes(unicode.MaxRune)
if m != expect {
t.Errorf("growing: expected %q got %q", expect, m)
}
// 2. Shrink
minRune := func(rune int) int { return 'a' }
m = Map(minRune, tenRunes(unicode.MaxRune))
expect = a
if m != expect {
t.Errorf("shrinking: expected %q got %q", expect, m)
}
// 3. Rot13
m = Map(rot13, "a to zed")
expect = "n gb mrq"
if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
// 4. Rot13^2
m = Map(rot13, Map(rot13, "a to zed"))
expect = "a to zed"
if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
// 5. Drop
dropNotLatin := func(rune int) int {
if unicode.Is(unicode.Latin, rune) {
return rune
}
return -1
}
m = Map(dropNotLatin, "Hello, 세계")
expect = "Hello"
if m != expect {
t.Errorf("drop: expected %q got %q", expect, m)
}
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
func TestSpecialCase(t *testing.T) {
lower := "abcçdefgğhıijklmnoöprsştuüvyz"
upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
u := ToUpperSpecial(unicode.TurkishCase, upper)
if u != upper {
t.Errorf("Upper(upper) is %s not %s", u, upper)
}
u = ToUpperSpecial(unicode.TurkishCase, lower)
if u != upper {
t.Errorf("Upper(lower) is %s not %s", u, upper)
}
l := ToLowerSpecial(unicode.TurkishCase, lower)
if l != lower {
t.Errorf("Lower(lower) is %s not %s", l, lower)
}
l = ToLowerSpecial(unicode.TurkishCase, upper)
if l != lower {
t.Errorf("Lower(upper) is %s not %s", l, lower)
}
}
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
func equal(m string, s1, s2 string, t *testing.T) bool {
if s1 == s2 {
return true
}
e1 := Split(s1, "", 0)
e2 := Split(s2, "", 0)
for i, c1 := range e1 {
if i > len(e2) {
break
}
r1, _ := utf8.DecodeRuneInString(c1)
r2, _ := utf8.DecodeRuneInString(e2[i])
if r1 != r2 {
t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
}
}
return false
}
func TestCaseConsistency(t *testing.T) {
// Make a string of all the runes.
a := make([]int, unicode.MaxRune+1)
for i := range a {
a[i] = i
}
s := string(a)
// convert the cases.
upper := ToUpper(s)
lower := ToLower(s)
// Consistency checks
if n := utf8.RuneCountInString(upper); n != unicode.MaxRune+1 {
t.Error("rune count wrong in upper:", n)
}
if n := utf8.RuneCountInString(lower); n != unicode.MaxRune+1 {
t.Error("rune count wrong in lower:", n)
}
if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
t.Error("ToUpper(upper) consistency fail")
}
if !equal("ToLower(lower)", ToLower(lower), lower, t) {
t.Error("ToLower(lower) consistency fail")
}
/*
These fail because of non-one-to-oneness of the data, such as multiple
upper case 'I' mapping to 'i'. We comment them out but keep them for
interest.
For instance: CAPITAL LETTER I WITH DOT ABOVE:
unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
t.Error("ToUpper(lower) consistency fail");
}
if !equal("ToLower(upper)", ToLower(upper), lower, t) {
t.Error("ToLower(upper) consistency fail");
}
*/
}
type RepeatTest struct {
in, out string
count int
}
var RepeatTests = []RepeatTest{
RepeatTest{"", "", 0},
RepeatTest{"", "", 1},
RepeatTest{"", "", 2},
RepeatTest{"-", "", 0},
RepeatTest{"-", "-", 1},
RepeatTest{"-", "----------", 10},
RepeatTest{"abc ", "abc abc abc ", 3},
}
func TestRepeat(t *testing.T) {
for _, tt := range RepeatTests {
a := Repeat(tt.in, tt.count)
if !equal("Repeat(s)", a, tt.out, t) {
t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
continue
}
}
}
func runesEqual(a, b []int) bool {
if len(a) != len(b) {
return false
}
for i, r := range a {
if r != b[i] {
return false
}
}
return true
}
type RunesTest struct {
in string
out []int
lossy bool
}
var RunesTests = []RunesTest{
RunesTest{"", []int{}, false},
RunesTest{" ", []int{32}, false},
RunesTest{"ABC", []int{65, 66, 67}, false},
RunesTest{"abc", []int{97, 98, 99}, false},
RunesTest{"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false},
RunesTest{"ab\x80c", []int{97, 98, 0xFFFD, 99}, true},
RunesTest{"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true},
}
func TestRunes(t *testing.T) {
for _, tt := range RunesTests {
a := []int(tt.in)
if !runesEqual(a, tt.out) {
t.Errorf("[]int(%q) = %v; want %v", tt.in, a, tt.out)
continue
}
if !tt.lossy {
// can only test reassembly if we didn't lose information
s := string(a)
if s != tt.in {
t.Errorf("string([]int(%q)) = %x; want %x", tt.in, s, tt.in)
}
}
}
}