diff --git a/src/pkg/exp/locale/collate/regtest.go b/src/pkg/exp/locale/collate/regtest.go index 38c5783aca7..14a447c1e4e 100644 --- a/src/pkg/exp/locale/collate/regtest.go +++ b/src/pkg/exp/locale/collate/regtest.go @@ -24,6 +24,7 @@ import ( "strconv" "strings" "unicode" + "unicode/utf8" ) // This regression test runs tests for the test files in CollationTest.zip @@ -53,7 +54,7 @@ var localFiles = flag.Bool("local", type Test struct { name string - str []string + str [][]byte comment []string } @@ -186,14 +187,23 @@ func loadTestData() []Test { if m == nil || len(m) < 3 { log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) } - str := "" + str := []byte{} + // In the regression test data (unpaired) surrogates are assigned a weight + // corresponding to their code point value. However, utf8.DecodeRune, + // which is used to compute the implicit weight, assigns FFFD to surrogates. + // We therefore skip tests with surrogates. This skips about 35 entries + // per test. + valid := true for _, split := range strings.Split(m[1], " ") { r, err := strconv.ParseUint(split, 16, 64) Error(err) - str += string(rune(r)) + valid = valid && utf8.ValidRune(rune(r)) + str = append(str, string(rune(r))...) + } + if valid { + test.str = append(test.str, str) + test.comment = append(test.comment, m[2]) } - test.str = append(test.str, str) - test.comment = append(test.comment, m[2]) } tests = append(tests, test) } @@ -227,13 +237,13 @@ func doTest(t Test) { c.Alternate = collate.AltNonIgnorable } - prev := []byte(t.str[0]) + prev := t.str[0] for i := 1; i < len(t.str); i++ { - s := []byte(t.str[i]) + s := t.str[i] ka := c.Key(b, prev) kb := c.Key(b, s) if r := bytes.Compare(ka, kb); r == 1 { - fail(t, "%d: Key(%.4X) < Key(%.4X) (%X < %X) == %d; want -1 or 0", i, runes(prev), runes(s), ka, kb, r) + fail(t, "%d: Key(%.4X) < Key(%.4X) (%X < %X) == %d; want -1 or 0", i, []rune(string(prev)), []rune(string(s)), ka, kb, r) prev = s continue }