1
0
mirror of https://github.com/golang/go synced 2024-11-25 10:17:57 -07:00

exp/locale/collate: from the regression test we derive that the spec

dictates a CJK rune is only part of a certain specified range if it
is explicitly defined in the Unicode Codepoint Database.
Fixed the code and some of the tests accordingly.

R=r
CC=golang-dev
https://golang.org/cl/6160044
This commit is contained in:
Marcel van Lohuizen 2012-05-07 11:51:40 +02:00
parent 18aded7ab9
commit 56a76c88f8
4 changed files with 22 additions and 23 deletions

View File

@ -63,7 +63,7 @@ type convertTest struct {
var convLargeTests = []convertTest{
{pCE(0xFB39), pCE(0xFB39), false},
{cjk(0x2F9B2), pqCE(0x7F4F2, 0x2F9B2), false},
{cjk(0x2F9B2), pqCE(0x4F4F2, 0x2F9B2), false},
{pCE(0xFB40), pCE(0), true},
{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
{pCE(0xFFFE), pCE(illegalOffset), false},

View File

@ -162,16 +162,16 @@ const (
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will never hit as long as we don't remove the characters
// that would match from the table.
return int(r) + commonUnifiedOffset
}
if unicode.Is(unicode.Unified_Ideograph, r) {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset

View File

@ -154,17 +154,16 @@ const (
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will never hit as long as we don't remove the characters
// that would match from the table.
return int(r) + commonUnifiedOffset
}
if unicode.Is(unicode.Unified_Ideograph, r) {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset

View File

@ -141,7 +141,7 @@ var implicitTests = []implicitTest{
{0xF8FF, 0x5F43F},
{0xF900, 0x1F440},
{0xFA23, 0x1F563},
{0xFAFF, 0x1F63F},
{0xFAD9, 0x1F619},
{0xFB00, 0x5F640},
{0x20000, 0x3FB40},
{0x2B81C, 0x4B35C},