exp/locale/collate: include composed characters into the table. This eliminates

the need to decompose characters for the majority of cases. This considerably speeds up collation while increasing the table size minimally. To detect non-normalized strings, rather than relying on exp/norm, the table now includes CCC information. The inclusion of this information does not increase table size. DETAILS - Raw collation elements are now a struct that includes the CCC, rather than a slice of ints. - Builder now ensures that NFD and NFC counterparts are included in the table. This also fixes a bug for Korean which is responsible for most of the growth of the table size. - As there is no more normalization step, code should now handle both strings and byte slices as input. Introduced source type to facilitate this. NOTES - This change does not handle normalization correctly entirely for contractions. This causes a few failures with the regtest. table_test.go contains a few uncommented tests that can be enabled once this is fixed. The easiest is to fix this once we have the new norm.Iter. - Removed a test cases in table_test that covers cases that are now guaranteed to not exist. R=rsc, mpvl CC=golang-dev https://golang.org/cl/6971044
2024-10-01 09:28:37 -06:00 · 2012-12-24 16:42:29 +01:00 · 2012-12-24 16:42:29 +01:00 · 9aa70984a9
commit 9aa70984a9
parent 43f2fc308b
16 changed files with 46794 additions and 42287 deletions
--- a/src/pkg/exp/locale/collate/build/builder.go
+++ b/src/pkg/exp/locale/collate/build/builder.go
@ -98,24 +98,24 @@ func (b *Builder) Tailoring(locale string) *Tailoring {
 // a value for each colelem that is a variable. (See the reference above.)
 func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
 	str := string(runes)
-	elems := make([][]int, len(colelems))
+	elems := make([]rawCE, len(colelems))
 	for i, ce := range colelems {
-		elems[i] = append(elems[i], ce...)
 		if len(ce) == 0 {
-			elems[i] = append(elems[i], []int{0, 0, 0, 0}...)
 			break
 		}
+		elems[i] = makeRawCE(ce, 0)
 		if len(ce) == 1 {
-			elems[i] = append(elems[i], defaultSecondary)
+			elems[i].w[1] = defaultSecondary
 		}
 		if len(ce) <= 2 {
-			elems[i] = append(elems[i], defaultTertiary)
+			elems[i].w[2] = defaultTertiary
 		}
 		if len(ce) <= 3 {
-			elems[i] = append(elems[i], ce[0])
+			elems[i].w[3] = ce[0]
 		}
 	}
 	for i, ce := range elems {
+		p := ce.w[0]
 		isvar := false
 		for _, j := range variables {
 			if i == j {
@ -123,18 +123,18 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
 			}
 		}
 		if isvar {
-			if ce[0] >= b.minNonVar && b.minNonVar > 0 {
-				return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", ce[0], b.minNonVar)
+			if p >= b.minNonVar && b.minNonVar > 0 {
+				return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
 			}
-			if ce[0] > b.varTop {
-				b.varTop = ce[0]
+			if p > b.varTop {
+				b.varTop = p
 			}
-		} else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
-			if ce[0] <= b.varTop {
-				return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
+		} else if p > 1 { // 1 is a special primary value reserved for FFFE
+			if p <= b.varTop {
+				return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
 			}
-			if b.minNonVar == 0 || ce[0] < b.minNonVar {
-				b.minNonVar = ce[0]
+			if b.minNonVar == 0 || p < b.minNonVar {
+				b.minNonVar = p
 			}
 		}
 	}
@ -142,16 +142,42 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
 	if err != nil {
 		return err
 	}
+	cccs := []uint8{}
+	nfd := norm.NFD.String(str)
+	for i := range nfd {
+		cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
+	}
+	if len(cccs) < len(elems) {
+		if len(cccs) > 2 {
+			return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
+		}
+		p := len(elems) - 1
+		for ; p > 0 && elems[p].w[0] == 0; p-- {
+			elems[p].ccc = cccs[len(cccs)-1]
+		}
+		for ; p >= 0; p-- {
+			elems[p].ccc = cccs[0]
+		}
+	} else {
+		for i := range elems {
+			elems[i].ccc = cccs[i]
+		}
+	}
+	// doNorm in collate.go assumes that the following conditions hold.
+	if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
+		return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
+	}
 	b.root.newEntry(str, elems)
 	return nil
 }

 func (t *Tailoring) setAnchor(anchor string) error {
-	anchor = norm.NFD.String(anchor)
+	anchor = norm.NFC.String(anchor)
 	a := t.index.find(anchor)
 	if a == nil {
 		a = t.index.newEntry(anchor, nil)
 		a.implicit = true
+		a.modified = true
 		for _, r := range []rune(anchor) {
 			e := t.index.find(string(r))
 			e.lock = true
@ -221,7 +247,7 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
 	if t.anchor == nil {
 		return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
 	}
-	str = norm.NFD.String(str)
+	str = norm.NFC.String(str)
 	e := t.index.find(str)
 	if e == nil {
 		e = t.index.newEntry(str, nil)
@ -262,12 +288,13 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
 	}
 	e.extend = norm.NFD.String(extend)
 	e.exclude = false
+	e.modified = true
 	e.elems = nil
 	t.anchor = e
 	return nil
 }

-func (o *ordering) getWeight(e *entry) [][]int {
+func (o *ordering) getWeight(e *entry) []rawCE {
 	if len(e.elems) == 0 && e.logical == noAnchor {
 		if e.implicit {
 			for _, r := range e.runes {
@ -279,11 +306,10 @@ func (o *ordering) getWeight(e *entry) [][]int {
 			for ; a.elems == nil && !a.implicit; a = a.next {
 				count[a.level]++
 			}
-			e.elems = append([][]int(nil), make([]int, len(a.elems[0])))
-			copy(e.elems[0], a.elems[0])
+			e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
 			for i := collate.Primary; i < collate.Quaternary; i++ {
 				if count[i] != 0 {
-					e.elems[0][i] -= count[i]
+					e.elems[0].w[i] -= count[i]
 					break
 				}
 			}
@ -315,11 +341,11 @@ func (o *ordering) verifyWeights(a, b *entry, level collate.Level) error {
 		return nil
 	}
 	for i := collate.Primary; i < level; i++ {
-		if a.elems[0][i] < b.elems[0][i] {
+		if a.elems[0].w[i] < b.elems[0].w[i] {
 			return nil
 		}
 	}
-	if a.elems[0][level] >= b.elems[0][level] {
+	if a.elems[0].w[level] >= b.elems[0].w[level] {
 		err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
 		log.Println(err)
 		// TODO: return the error instead, or better, fix the conflicting entry by making room.
@ -339,6 +365,54 @@ func (b *Builder) errorID(locale string, e error) {
 	}
 }

+// patchNorm ensures that NFC and NFD counterparts are consistent.
+func (o *ordering) patchNorm() {
+	// Insert the NFD counterparts, if necessary.
+	for _, e := range o.ordered {
+		nfd := norm.NFD.String(e.str)
+		if nfd != e.str {
+			if e0 := o.find(nfd); e0 != nil && !e0.modified {
+				e0.elems = e.elems
+			} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
+				e := o.newEntry(nfd, e.elems)
+				e.modified = true
+			}
+		}
+	}
+	// Update unchanged composed forms if one of their parts changed.
+	for _, e := range o.ordered {
+		nfd := norm.NFD.String(e.str)
+		if e.modified || nfd == e.str {
+			continue
+		}
+		if e0 := o.find(nfd); e0 != nil {
+			e.elems = e0.elems
+		} else {
+			e.elems = o.genColElems(nfd)
+			if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
+				r := []rune(nfd)
+				head := string(r[0])
+				tail := ""
+				for i := 1; i < len(r); i++ {
+					s := norm.NFC.String(head + string(r[i]))
+					if e0 := o.find(s); e0 != nil && e0.modified {
+						head = s
+					} else {
+						tail += string(r[i])
+					}
+				}
+				e.elems = append(o.genColElems(head), o.genColElems(tail)...)
+			}
+		}
+	}
+	// Exclude entries for which the individual runes generate the same collation elements.
+	for _, e := range o.ordered {
+		if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
+			e.exclude = true
+		}
+	}
+}
+
 func (b *Builder) buildOrdering(o *ordering) {
 	for _, e := range o.ordered {
 		o.getWeight(e)
@ -346,6 +420,7 @@ func (b *Builder) buildOrdering(o *ordering) {
 	for _, e := range o.ordered {
 		o.addExtension(e)
 	}
+	o.patchNorm()
 	o.sort()
 	simplify(o)
 	b.processExpansions(o)   // requires simplify
@ -436,20 +511,20 @@ func (b *Builder) Print(w io.Writer) (n int, err error) {

 // reproducibleFromNFKD checks whether the given expansion could be generated
 // from an NFKD expansion.
-func reproducibleFromNFKD(e *entry, exp, nfkd [][]int) bool {
+func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
 	// Length must be equal.
 	if len(exp) != len(nfkd) {
 		return false
 	}
 	for i, ce := range exp {
 		// Primary and secondary values should be equal.
-		if ce[0] != nfkd[i][0] || ce[1] != nfkd[i][1] {
+		if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
 			return false
 		}
 		// Tertiary values should be equal to maxTertiary for third element onwards.
 		// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
 		// simply be dropped.  Try this out by dropping the following code.
-		if i >= 2 && ce[2] != maxTertiary {
+		if i >= 2 && ce.w[2] != maxTertiary {
 			return false
 		}
 		if _, err := makeCE(ce); err != nil {
@ -469,22 +544,12 @@ func simplify(o *ordering) {
 			keep[e.runes[0]] = true
 		}
 	}
-	// Remove entries for which the runes normalize (using NFD) to identical values.
-	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
-		s := e.str
-		nfd := norm.NFD.String(s)
-		if len(e.runes) > 1 || keep[e.runes[0]] || nfd == s {
-			continue
-		}
-		if equalCEArrays(o.genColElems(nfd), e.elems) {
-			e.remove()
-		}
-	}
 	// Tag entries for which the runes NFKD decompose to identical values.
 	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
 		s := e.str
 		nfkd := norm.NFKD.String(s)
-		if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == s {
+		nfd := norm.NFD.String(s)
+		if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
 			continue
 		}
 		if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
@ -589,18 +654,18 @@ func (b *Builder) processContractions(o *ordering) {
 		// Bucket sort entries in index order.
 		es := make([]*entry, len(l))
 		for _, e := range l {
-			var o, sn int
+			var p, sn int
 			if len(e.runes) > 1 {
 				str := []byte(string(e.runes[1:]))
-				o, sn = t.contractTries.lookup(handle, str)
+				p, sn = t.contractTries.lookup(handle, str)
 				if sn != len(str) {
-					log.Fatalf("processContractions: unexpected length for '%X'; len=%d; want %d", e.runes, sn, len(str))
+					log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
 				}
 			}
-			if es[o] != nil {
-				log.Fatalf("Multiple contractions for position %d for rune %U", o, e.runes[0])
+			if es[p] != nil {
+				log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
 			}
-			es[o] = e
+			es[p] = e
 		}
 		// Create collation elements for contractions.
 		elems := []uint32{}
--- a/src/pkg/exp/locale/collate/build/builder_test.go
+++ b/src/pkg/exp/locale/collate/build/builder_test.go
@ -7,48 +7,64 @@ package build
 import "testing"

 // cjk returns an implicit collation element for a CJK rune.
-func cjk(r rune) [][]int {
+func cjk(r rune) []rawCE {
 	// A CJK character C is represented in the DUCET as
 	//   [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
 	// Where AAAA is the most significant 15 bits plus a base value.
 	// Any base value will work for the test, so we pick the common value of FB40.
 	const base = 0xFB40
-	return [][]int{
-		{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)},
-		{int(r&0x7FFF) | 0x8000, 0, 0, int(r)},
+	return []rawCE{
+		{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
+		{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
 	}
 }

-func pCE(p int) [][]int {
-	return [][]int{{p, defaultSecondary, defaultTertiary, 0}}
+func pCE(p int) []rawCE {
+	return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
 }

-func pqCE(p, q int) [][]int {
-	return [][]int{{p, defaultSecondary, defaultTertiary, q}}
+func pqCE(p, q int) []rawCE {
+	return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
 }

-func ptCE(p, t int) [][]int {
-	return [][]int{{p, defaultSecondary, t, 0}}
+func ptCE(p, t int) []rawCE {
+	return mkCE([]int{p, defaultSecondary, t, 0}, 0)
 }

-func sCE(s int) [][]int {
-	return [][]int{{0, s, defaultTertiary, 0}}
+func ptcCE(p, t int, ccc uint8) []rawCE {
+	return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
 }

-func stCE(s, t int) [][]int {
-	return [][]int{{0, s, t, 0}}
+func sCE(s int) []rawCE {
+	return mkCE([]int{0, s, defaultTertiary, 0}, 0)
+}
+
+func stCE(s, t int) []rawCE {
+	return mkCE([]int{0, s, t, 0}, 0)
+}
+
+func scCE(s int, ccc uint8) []rawCE {
+	return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
+}
+
+func mkCE(w []int, ccc uint8) []rawCE {
+	return []rawCE{rawCE{w, ccc}}
 }

 // ducetElem is used to define test data that is used to generate a table.
 type ducetElem struct {
 	str string
-	ces [][]int
+	ces []rawCE
 }

 func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
 	b := NewBuilder()
 	for _, e := range ducet {
-		if err := b.Add([]rune(e.str), e.ces, nil); err != nil {
+		ces := [][]int{}
+		for _, ce := range e.ces {
+			ces = append(ces, ce.w)
+		}
+		if err := b.Add([]rune(e.str), ces, nil); err != nil {
 			t.Errorf(err.Error())
 		}
 	}
@ -58,7 +74,7 @@ func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
 }

 type convertTest struct {
-	in, out [][]int
+	in, out []rawCE
 	err     bool
 }

@ -173,16 +189,18 @@ func TestSimplify(t *testing.T) {
 }

 var expandTest = []ducetElem{
-	{"\u00C0", append(ptCE(100, 8), sCE(30)...)},
-	{"\u00C8", append(ptCE(105, 8), sCE(30)...)},
-	{"\u00C9", append(ptCE(105, 8), sCE(30)...)}, // identical expansion
+	{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
+	{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
+	{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
+	{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
 	{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
+	{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
 }

 func TestExpand(t *testing.T) {
 	const (
-		totalExpansions = 3
-		totalElements   = 2 + 2 + 3 + totalExpansions
+		totalExpansions = 5
+		totalElements   = 2 + 2 + 2 + 3 + 3 + totalExpansions
 	)
 	b := newBuilder(t, expandTest)
 	o := &b.root
--- a/src/pkg/exp/locale/collate/build/colelem.go
+++ b/src/pkg/exp/locale/collate/build/colelem.go
@ -16,6 +16,17 @@ const (
 	maxTertiary      = 0x1F
 )

+type rawCE struct {
+	w   []int
+	ccc uint8
+}
+
+func makeRawCE(w []int, ccc uint8) rawCE {
+	ce := rawCE{w: make([]int, 4), ccc: ccc}
+	copy(ce.w, w)
+	return ce
+}
+
 // A collation element is represented as an uint32.
 // In the typical case, a rune maps to a single collation element. If a rune
 // can be the start of a contraction or expands into multiple collation elements,
@ -29,29 +40,36 @@ const (
 // 01pppppp pppppppp ppppppp0 ssssssss
 //   - p* is primary collation value
 //   - s* is the secondary collation value
-// or
 // 00pppppp pppppppp ppppppps sssttttt, where
 //   - p* is primary collation value
 //   - s* offset of secondary from default value.
 //   - t* is the tertiary collation value
+// 100ttttt cccccccc pppppppp pppppppp
+//   - t* is the tertiar collation value
+//   - c* is the cannonical combining class
+//   - p* is the primary collation value
 // Collation elements with a secondary value are of the form
-// 10000000 0000ssss ssssssss tttttttt, where
-//   - 16 BMP implicit -> weight
-//   - 8 bit s
-//   - default tertiary
+// 1010cccc ccccssss ssssssss tttttttt, where
+//   - c* is the canonical combining class
+//   - s* is the secondary collation value
+//   - t* is the tertiary collation value
 const (
 	maxPrimaryBits          = 21
+	maxPrimaryCompactBits   = 16
 	maxSecondaryBits        = 12
 	maxSecondaryCompactBits = 8
+	maxCCCBits              = 8
 	maxSecondaryDiffBits    = 4
 	maxTertiaryBits         = 8
 	maxTertiaryCompactBits  = 5

-	isSecondary = 0x80000000
 	isPrimary    = 0x40000000
+	isPrimaryCCC = 0x80000000
+	isSecondary  = 0xA0000000
 )

-func makeCE(weights []int) (uint32, error) {
+func makeCE(rce rawCE) (uint32, error) {
+	weights := rce.w
 	if w := weights[0]; w >= 1<<maxPrimaryBits || w < 0 {
 		return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
 	}
@ -63,14 +81,25 @@ func makeCE(weights []int) (uint32, error) {
 	}
 	ce := uint32(0)
 	if weights[0] != 0 {
-		if weights[2] == defaultTertiary {
+		if rce.ccc != 0 {
+			if weights[0] >= 1<<maxPrimaryCompactBits {
+				return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", weights[0], 1<<maxPrimaryCompactBits)
+			}
+			if weights[1] != defaultSecondary {
+				return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", weights[1], rce.ccc)
+			}
+			ce = uint32(weights[2] << (maxPrimaryCompactBits + maxCCCBits))
+			ce |= uint32(rce.ccc) << maxPrimaryCompactBits
+			ce |= uint32(weights[0])
+			ce |= isPrimaryCCC
+		} else if weights[2] == defaultTertiary {
 			if weights[1] >= 1<<maxSecondaryCompactBits {
 				return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", weights[1], 1<<maxSecondaryCompactBits)
 			}
 			ce = uint32(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
 			ce |= isPrimary
 		} else {
-			d := weights[1] - defaultSecondary + 4
+			d := weights[1] - defaultSecondary + maxSecondaryDiffBits
 			if d >= 1<<maxSecondaryDiffBits || d < 0 {
 				return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
 			}
@ -82,6 +111,7 @@ func makeCE(weights []int) (uint32, error) {
 		}
 	} else {
 		ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
+		ce += uint32(rce.ccc) << (maxSecondaryBits + maxTertiaryBits)
 		ce |= isSecondary
 	}
 	return ce, nil
@ -207,7 +237,7 @@ func implicitPrimary(r rune) int {
 // We will rewrite these characters to a single CE.
 // We assume the CJK values start at 0x8000.
 // See http://unicode.org/reports/tr10/#Implicit_Weights
-func convertLargeWeights(elems [][]int) (res [][]int, err error) {
+func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
 	const (
 		cjkPrimaryStart   = 0xFB40
 		rarePrimaryStart  = 0xFB80
@ -219,7 +249,7 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
 		shiftBits         = 15
 	)
 	for i := 0; i < len(elems); i++ {
-		ce := elems[i]
+		ce := elems[i].w
 		p := ce[0]
 		if p < cjkPrimaryStart {
 			continue
@ -233,10 +263,10 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
 			if i+1 >= len(elems) {
 				return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
 			}
-			if elems[i+1][0]&lowBitsFlag == 0 {
+			if elems[i+1].w[0]&lowBitsFlag == 0 {
 				return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
 			}
-			np := ((p & highBitsMask) << shiftBits) + elems[i+1][0]&lowBitsMask
+			np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
 			switch {
 			case p < rarePrimaryStart:
 				np += commonUnifiedOffset
@ -257,26 +287,25 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {

 // nextWeight computes the first possible collation weights following elems
 // for the given level.
-func nextWeight(level collate.Level, elems [][]int) [][]int {
+func nextWeight(level collate.Level, elems []rawCE) []rawCE {
 	if level == collate.Identity {
-		next := make([][]int, len(elems))
+		next := make([]rawCE, len(elems))
 		copy(next, elems)
 		return next
 	}
-	next := [][]int{make([]int, len(elems[0]))}
-	copy(next[0], elems[0])
-	next[0][level]++
+	next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
+	next[0].w[level]++
 	if level < collate.Secondary {
-		next[0][collate.Secondary] = defaultSecondary
+		next[0].w[collate.Secondary] = defaultSecondary
 	}
 	if level < collate.Tertiary {
-		next[0][collate.Tertiary] = defaultTertiary
+		next[0].w[collate.Tertiary] = defaultTertiary
 	}
 	// Filter entries that cannot influence ordering.
 	for _, ce := range elems[1:] {
 		skip := true
 		for i := collate.Primary; i < level; i++ {
-			skip = skip && ce[i] == 0
+			skip = skip && ce.w[i] == 0
 		}
 		if !skip {
 			next = append(next, ce)
@ -285,18 +314,18 @@ func nextWeight(level collate.Level, elems [][]int) [][]int {
 	return next
 }

-func nextVal(elems [][]int, i int, level collate.Level) (index, value int) {
-	for ; i < len(elems) && elems[i][level] == 0; i++ {
+func nextVal(elems []rawCE, i int, level collate.Level) (index, value int) {
+	for ; i < len(elems) && elems[i].w[level] == 0; i++ {
 	}
 	if i < len(elems) {
-		return i, elems[i][level]
+		return i, elems[i].w[level]
 	}
 	return i, 0
 }

 // compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
 // It also returns the collation level at which the difference is found.
-func compareWeights(a, b [][]int) (result int, level collate.Level) {
+func compareWeights(a, b []rawCE) (result int, level collate.Level) {
 	for level := collate.Primary; level < collate.Identity; level++ {
 		var va, vb int
 		for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
@ -314,19 +343,16 @@ func compareWeights(a, b [][]int) (result int, level collate.Level) {
 	return 0, collate.Identity
 }

-func equalCE(a, b []int) bool {
-	if len(a) != len(b) {
-		return false
-	}
+func equalCE(a, b rawCE) bool {
 	for i := 0; i < 3; i++ {
-		if b[i] != a[i] {
+		if b.w[i] != a.w[i] {
 			return false
 		}
 	}
 	return true
 }

-func equalCEArrays(a, b [][]int) bool {
+func equalCEArrays(a, b []rawCE) bool {
 	if len(a) != len(b) {
 		return false
 	}
--- a/src/pkg/exp/locale/collate/build/colelem_test.go
+++ b/src/pkg/exp/locale/collate/build/colelem_test.go
@ -16,7 +16,7 @@ type ceTest struct {
 }

 func normalCE(in []int) (ce uint32, err error) {
-	return makeCE(in)
+	return makeCE(rawCE{w: in[:3], ccc: uint8(in[3])})
 }

 func expandCE(in []int) (ce uint32, err error) {
@ -32,17 +32,20 @@ func decompCE(in []int) (ce uint32, err error) {
 }

 var ceTests = []ceTest{
-	{normalCE, []int{0, 0, 0}, 0x80000000},
-	{normalCE, []int{0, 0x28, 3}, 0x80002803},
-	{normalCE, []int{100, defaultSecondary, 3}, 0x0000C883},
+	{normalCE, []int{0, 0, 0, 0}, 0xA0000000},
+	{normalCE, []int{0, 0x28, 3, 0}, 0xA0002803},
+	{normalCE, []int{0, 0x28, 3, 0xFF}, 0xAFF02803},
+	{normalCE, []int{100, defaultSecondary, 3, 0}, 0x0000C883},
 	// non-ignorable primary with non-default secondary
-	{normalCE, []int{100, 0x28, defaultTertiary}, 0x4000C828},
-	{normalCE, []int{100, defaultSecondary + 8, 3}, 0x0000C983},
-	{normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-supported secondary
-	{normalCE, []int{100, 1, 3}, 0xFFFF},
-	{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},
-	{normalCE, []int{0, 1 << maxSecondaryBits, 0}, 0xFFFF},
-	{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits}, 0xFFFF},
+	{normalCE, []int{100, 0x28, defaultTertiary, 0}, 0x4000C828},
+	{normalCE, []int{100, defaultSecondary + 8, 3, 0}, 0x0000C983},
+	{normalCE, []int{100, 0, 3, 0}, 0xFFFF}, // non-ignorable primary with non-supported secondary
+	{normalCE, []int{100, 1, 3, 0}, 0xFFFF},
+	{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0, 0}, 0xFFFF},
+	{normalCE, []int{0, 1 << maxSecondaryBits, 0, 0}, 0xFFFF},
+	{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits, 0}, 0xFFFF},
+	{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}, 0x88FF0123},
+	{normalCE, []int{0x123, defaultSecondary + 1, 8, 0xFF}, 0xFFFF},

 	{contractCE, []int{0, 0, 0}, 0xC0000000},
 	{contractCE, []int{1, 1, 1}, 0xC0010011},
@ -85,6 +88,14 @@ func TestColElem(t *testing.T) {
 	}
 }

+func mkRawCES(in [][]int) []rawCE {
+	out := []rawCE{}
+	for _, w := range in {
+		out = append(out, rawCE{w: w})
+	}
+	return out
+}
+
 type weightsTest struct {
 	a, b   [][]int
 	level  collate.Level
@ -119,8 +130,8 @@ var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
 func TestNextWeight(t *testing.T) {
 	for i, tt := range nextWeightTests {
 		test := func(l collate.Level, tt weightsTest, a, gold [][]int) {
-			res := nextWeight(tt.level, a)
-			if !equalCEArrays(gold, res) {
+			res := nextWeight(tt.level, mkRawCES(a))
+			if !equalCEArrays(mkRawCES(gold), res) {
 				t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
 			}
 		}
@ -189,7 +200,7 @@ var compareTests = []weightsTest{
 func TestCompareWeights(t *testing.T) {
 	for i, tt := range compareTests {
 		test := func(tt weightsTest, a, b [][]int) {
-			res, level := compareWeights(a, b)
+			res, level := compareWeights(mkRawCES(a), mkRawCES(b))
 			if res != tt.result {
 				t.Errorf("%d: expected comparisson result %d; found %d", i, tt.result, res)
 			}
--- a/src/pkg/exp/locale/collate/build/order.go
+++ b/src/pkg/exp/locale/collate/build/order.go
@ -6,6 +6,7 @@ package build

 import (
 	"exp/locale/collate"
+	"exp/norm"
 	"fmt"
 	"log"
 	"sort"
@ -28,7 +29,7 @@ const (
 type entry struct {
 	str    string // same as string(runes)
 	runes  []rune
-	elems  [][]int // the collation elements
+	elems  []rawCE // the collation elements
 	extend string  // weights of extend to be appended to elems
 	before bool    // weights relative to next instead of previous.
 	lock   bool    // entry is used in extension and can no longer be moved.
@ -41,6 +42,7 @@ type entry struct {
 	decompose bool // can use NFKD decomposition to generate elems
 	exclude   bool // do not include in table
 	implicit  bool // derived, is not included in the list
+	modified  bool // entry was modified in tailoring
 	logical   logicalAnchor

 	expansionIndex    int // used to store index into expansion table
@ -162,10 +164,10 @@ func (e *entry) encode() (ce uint32, err error) {
 	}
 	switch {
 	case e.decompose:
-		t1 := e.elems[0][2]
+		t1 := e.elems[0].w[2]
 		t2 := 0
 		if len(e.elems) > 1 {
-			t2 = e.elems[1][2]
+			t2 = e.elems[1].w[2]
 		}
 		ce, err = makeDecompose(t1, t2)
 	case e.contractionStarter():
@ -231,7 +233,7 @@ func (o *ordering) insert(e *entry) {

 // newEntry creates a new entry for the given info and inserts it into
 // the index.
-func (o *ordering) newEntry(s string, ces [][]int) *entry {
+func (o *ordering) newEntry(s string, ces []rawCE) *entry {
 	e := &entry{
 		runes: []rune(s),
 		elems: ces,
@ -249,14 +251,29 @@ func (o *ordering) find(str string) *entry {
 	if e == nil {
 		r := []rune(str)
 		if len(r) == 1 {
-			e = o.newEntry(string(r[0]), [][]int{
-				{
+			const (
+				firstHangul = 0xAC00
+				lastHangul  = 0xD7A3
+			)
+			if r[0] >= firstHangul && r[0] <= lastHangul {
+				ce := []rawCE{}
+				nfd := norm.NFD.String(str)
+				for _, r := range nfd {
+					ce = append(ce, o.find(string(r)).elems...)
+				}
+				e = o.newEntry(nfd, ce)
+			} else {
+				e = o.newEntry(string(r[0]), []rawCE{
+					{w: []int{
 						implicitPrimary(r[0]),
 						defaultSecondary,
 						defaultTertiary,
 						int(r[0]),
 					},
+					},
 				})
+				e.modified = true
+			}
 			e.exclude = true // do not index implicits
 		}
 	}
@ -275,7 +292,7 @@ func makeRootOrdering() ordering {
 	}
 	insert := func(typ logicalAnchor, s string, ce []int) {
 		e := &entry{
-			elems:   [][]int{ce},
+			elems:   []rawCE{{w: ce}},
 			str:     s,
 			exclude: true,
 			logical: typ,
@ -362,10 +379,14 @@ func (o *ordering) sort() {

 // genColElems generates a collation element array from the runes in str. This
 // assumes that all collation elements have already been added to the Builder.
-func (o *ordering) genColElems(str string) [][]int {
-	elems := [][]int{}
+func (o *ordering) genColElems(str string) []rawCE {
+	elems := []rawCE{}
 	for _, r := range []rune(str) {
-		elems = append(elems, o.find(string(r)).elems...)
+		for _, ce := range o.find(string(r)).elems {
+			if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
+				elems = append(elems, ce)
+			}
+		}
 	}
 	return elems
 }
--- a/src/pkg/exp/locale/collate/build/order_test.go
+++ b/src/pkg/exp/locale/collate/build/order_test.go
@ -20,7 +20,7 @@ type entryTest struct {
 // entries plus a leading and trailing anchor.
 func makeList(n int) []*entry {
 	es := make([]*entry, n+2)
-	weights := [][]int{{100, 20, 5, 0}}
+	weights := []rawCE{{w: []int{100, 20, 5, 0}}}
 	for i := range es {
 		runes := []rune{rune(i)}
 		es[i] = &entry{
@ -176,8 +176,8 @@ type entryLessTest struct {
 }

 var (
-	w1 = [][]int{{100, 20, 5, 5}}
-	w2 = [][]int{{101, 20, 5, 5}}
+	w1 = []rawCE{{w: []int{100, 20, 5, 5}}}
+	w2 = []rawCE{{w: []int{101, 20, 5, 5}}}
 )

 var entryLessTests = []entryLessTest{
--- a/src/pkg/exp/locale/collate/colelem.go
+++ b/src/pkg/exp/locale/collate/colelem.go
@ -23,7 +23,7 @@ const (
 type colElem uint32

 const (
-	maxCE       colElem = 0x80FFFFFF
+	maxCE       colElem = 0xAFFFFFFF
 	minContract         = 0xC0000000
 	maxContract         = 0xDFFFFFFF
 	minExpand           = 0xE0000000
@ -62,30 +62,37 @@ func (ce colElem) ctype() ceType {
 // 01pppppp pppppppp ppppppp0 ssssssss
 //   - p* is primary collation value
 //   - s* is the secondary collation value
-// or
 // 00pppppp pppppppp ppppppps sssttttt, where
 //   - p* is primary collation value
 //   - s* offset of secondary from default value.
 //   - t* is the tertiary collation value
+// 100ttttt cccccccc pppppppp pppppppp
+//   - t* is the tertiar collation value
+//   - c* is the cannonical combining class
+//   - p* is the primary collation value
 // Collation elements with a secondary value are of the form
-// 10000000 0000ssss ssssssss tttttttt, where
-//   - 16 BMP implicit -> weight
-//   - 8 bit s
-//   - default tertiary
+// 1010cccc ccccssss ssssssss tttttttt, where
+//   - c* is the canonical combining class
+//   - s* is the secondary collation value
+//   - t* is the tertiary collation value
 // 11qqqqqq qqqqqqqq qqqqqqq0 00000000
 //   - q* quaternary value
 const (
 	ceTypeMask            = 0xC0000000
+	ceTypeMaskExt         = 0xE0000000
 	ceType1               = 0x40000000
 	ceType2               = 0x00000000
-	ceType3               = 0x80000000
+	ceType3or4            = 0x80000000
+	ceType4               = 0xA0000000
 	ceTypeQ               = 0xC0000000
-	ceIgnore              = ceType3
+	ceIgnore              = ceType4
 	firstNonPrimary       = 0x80000000
+	lastSpecialPrimary    = 0xA0000000
 	secondaryMask         = 0x80000000
 	hasTertiaryMask       = 0x40000000
 	primaryValueMask      = 0x3FFFFE00
 	primaryShift          = 9
+	compactPrimaryBits    = 16
 	compactSecondaryShift = 5
 	minCompactSecondary   = defaultSecondary - 4
 )
@ -98,10 +105,23 @@ func makeQuaternary(primary int) colElem {
 	return ceTypeQ | colElem(primary<<primaryShift)
 }

+func (ce colElem) ccc() uint8 {
+	if ce&ceType3or4 != 0 {
+		if ce&ceType4 == ceType3or4 {
+			return uint8(ce >> 16)
+		}
+		return uint8(ce >> 20)
+	}
+	return 0
+}
+
 func (ce colElem) primary() int {
 	if ce >= firstNonPrimary {
+		if ce > lastSpecialPrimary {
 			return 0
 		}
+		return int(uint16(ce))
+	}
 	return int(ce&primaryValueMask) >> primaryShift
 }

@ -111,8 +131,11 @@ func (ce colElem) secondary() int {
 		return int(uint8(ce))
 	case ceType2:
 		return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
-	case ceType3:
-		return int(uint16(ce >> 8))
+	case ceType3or4:
+		if ce < ceType4 {
+			return defaultSecondary
+		}
+		return int(ce>>8) & 0xFFF
 	case ceTypeQ:
 		return 0
 	}
@ -121,10 +144,13 @@ func (ce colElem) secondary() int {

 func (ce colElem) tertiary() uint8 {
 	if ce&hasTertiaryMask == 0 {
-		if ce&ceType3 == 0 {
+		if ce&ceType3or4 == 0 {
 			return uint8(ce & 0x1F)
 		}
+		if ce&ceType4 == ceType4 {
 			return uint8(ce)
+		}
+		return uint8(ce>>24) & 0x1F // type 2
 	} else if ce&ceTypeMask == ceType1 {
 		return defaultTertiary
 	}
@ -134,10 +160,15 @@ func (ce colElem) tertiary() uint8 {

 func (ce colElem) updateTertiary(t uint8) colElem {
 	if ce&ceTypeMask == ceType1 {
+		// convert to type 4
 		nce := ce & primaryValueMask
 		nce |= colElem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
 		ce = nce
+	} else if ce&ceTypeMaskExt == ceType3or4 {
+		ce &= ^colElem(maxTertiary << 24)
+		return ce | (colElem(t) << 24)
 	} else {
+		// type 2 or 4
 		ce &= ^colElem(maxTertiary)
 	}
 	return ce | colElem(t)
--- a/src/pkg/exp/locale/collate/colelem_test.go
+++ b/src/pkg/exp/locale/collate/colelem_test.go
@ -23,12 +23,19 @@ func makeCE(weights []int) colElem {
 		maxSecondaryDiffBits    = 4
 		maxTertiaryBits         = 8
 		maxTertiaryCompactBits  = 5
-		isSecondary             = 0x80000000
 		isPrimary               = 0x40000000
+		isPrimaryCCC            = 0x80000000
+		isSecondary             = 0xA0000000
 	)
 	var ce colElem
+	ccc := weights[3]
 	if weights[0] != 0 {
-		if weights[2] == defaultTertiary {
+		if ccc != 0 {
+			ce = colElem(weights[2] << 24)
+			ce |= colElem(ccc) << 16
+			ce |= colElem(weights[0])
+			ce |= isPrimaryCCC
+		} else if weights[2] == defaultTertiary {
 			ce = colElem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
 			ce |= isPrimary
 		} else {
@ -38,6 +45,7 @@ func makeCE(weights []int) colElem {
 		}
 	} else {
 		ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
+		ce += colElem(ccc) << 20
 		ce |= isSecondary
 	}
 	return ce
@ -68,10 +76,11 @@ func makeDecompose(t1, t2 int) colElem {
 }

 func normalCE(inout []int) (ce colElem, t ceType) {
-	w := makeCE(inout)
-	inout[0] = w.primary()
-	inout[1] = w.secondary()
-	inout[2] = int(w.tertiary())
+	ce = makeCE(inout)
+	inout[0] = ce.primary()
+	inout[1] = ce.secondary()
+	inout[2] = int(ce.tertiary())
+	inout[3] = int(ce.ccc())
 	return ce, ceNormal
 }

@ -102,9 +111,13 @@ const (
 )

 var ceTests = []ceTest{
-	{normalCE, []int{0, 0, 0}},
-	{normalCE, []int{0, 30, 3}},
-	{normalCE, []int{100, defaultSecondary, 3}},
+	{normalCE, []int{0, 0, 0, 0}},
+	{normalCE, []int{0, 30, 3, 0}},
+	{normalCE, []int{0, 30, 3, 0xFF}},
+	{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
+	{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
+	{normalCE, []int{100, defaultSecondary, 3, 0}},
+	{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},

 	{contractCE, []int{0, 0, 0}},
 	{contractCE, []int{1, 1, 1}},
@ -127,11 +140,11 @@ func TestColElem(t *testing.T) {
 		copy(inout, tt.arg)
 		ce, typ := tt.f(inout)
 		if ce.ctype() != typ {
-			t.Errorf("%d: type is %d; want %d", i, ce.ctype(), typ)
+			t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
 		}
 		for j, a := range tt.arg {
 			if inout[j] != a {
-				t.Errorf("%d: argument %d is %X; want %X", i, j, inout[j], a)
+				t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
 			}
 		}
 	}
@ -176,7 +189,8 @@ func TestUpdateTertiary(t *testing.T) {
 		{0x4000FE20, 0x0000FE8A, 0x0A},
 		{0x4000FE21, 0x0000FEAA, 0x0A},
 		{0x0000FE8B, 0x0000FE83, 0x03},
-		{0x8000CC02, 0x8000CC1B, 0x1B},
+		{0x82FF0188, 0x9BFF0188, 0x1B},
+		{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
 	}
 	for i, tt := range tests {
 		if out := tt.in.updateTertiary(tt.t); out != tt.out {
@ -184,3 +198,77 @@ func TestUpdateTertiary(t *testing.T) {
 		}
 	}
 }
+
+func TestDoNorm(t *testing.T) {
+	const div = -1 // The insertion point of the next block.
+	tests := []struct {
+		in, out []int
+	}{
+		{in: []int{4, div, 3},
+			out: []int{3, 4},
+		},
+		{in: []int{4, div, 3, 3, 3},
+			out: []int{3, 3, 3, 4},
+		},
+		{in: []int{0, 4, div, 3},
+			out: []int{0, 3, 4},
+		},
+		{in: []int{0, 0, 4, 5, div, 3, 3},
+			out: []int{0, 0, 3, 3, 4, 5},
+		},
+		{in: []int{0, 0, 1, 4, 5, div, 3, 3},
+			out: []int{0, 0, 1, 3, 3, 4, 5},
+		},
+		{in: []int{0, 0, 1, 4, 5, div, 4, 4},
+			out: []int{0, 0, 1, 4, 4, 4, 5},
+		},
+	}
+	for j, tt := range tests {
+		i := iter{}
+		var w, p, s int
+		for k, cc := range tt.in {
+			if cc == 0 {
+				s = 0
+			}
+			if cc == div {
+				w = 100
+				p = k
+				i.pStarter = s
+				continue
+			}
+			i.ce = append(i.ce, makeCE([]int{w, 20, 2, cc}))
+		}
+		i.prevCCC = i.ce[p-1].ccc()
+		i.doNorm(p, i.ce[p].ccc())
+		if len(i.ce) != len(tt.out) {
+			t.Errorf("%d: length was %d; want %d", j, len(i.ce), len(tt.out))
+		}
+		prevCCC := uint8(0)
+		for k, ce := range i.ce {
+			if int(ce.ccc()) != tt.out[k] {
+				t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.ccc(), tt.out[k])
+			}
+			if k > 0 && ce.ccc() == prevCCC && i.ce[k-1].primary() > ce.primary() {
+				t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
+			}
+		}
+	}
+	// test cutoff of large sequence of combining characters.
+	result := []uint8{8, 8, 8, 5, 5}
+	for o := -2; o <= 2; o++ {
+		i := iter{pStarter: 2, prevCCC: 8}
+		n := maxCombiningCharacters + 1 + o
+		for j := 1; j < n+i.pStarter; j++ {
+			i.ce = append(i.ce, makeCE([]int{100, 20, 2, 8}))
+		}
+		p := len(i.ce)
+		i.ce = append(i.ce, makeCE([]int{0, 20, 2, 5}))
+		i.doNorm(p, 5)
+		if i.prevCCC != result[o+2] {
+			t.Errorf("%d: i.prevCCC was %d; want %d", n, i.prevCCC, result[o+2])
+		}
+		if result[o+2] == 5 && i.pStarter != p {
+			t.Errorf("%d: i.pStarter was %d; want %d", n, i.pStarter, p)
+		}
+	}
+}
--- a/src/pkg/exp/locale/collate/collate.go
+++ b/src/pkg/exp/locale/collate/collate.go
@ -10,6 +10,7 @@ package collate
 import (
 	"bytes"
 	"exp/norm"
+	"unicode/utf8"
 )

 // Level identifies the collation comparison level.
@ -112,7 +113,7 @@ func New(loc string) *Collator {

 func newCollator(t *table) *Collator {
 	c := &Collator{
-		Strength: Quaternary,
+		Strength: Tertiary,
 		f:        norm.NFD,
 		t:        t,
 	}
@ -269,8 +270,7 @@ func (c *Collator) key(buf *Buffer, w []colElem) []byte {
 func (c *Collator) getColElems(str []byte) []colElem {
 	i := c.iter(0)
 	i.setInput(c, str)
-	for !i.done() {
-		i.next()
+	for i.next() {
 	}
 	return i.ce
 }
@ -278,88 +278,185 @@ func (c *Collator) getColElems(str []byte) []colElem {
 func (c *Collator) getColElemsString(str string) []colElem {
 	i := c.iter(0)
 	i.setInputString(c, str)
-	for !i.done() {
-		i.next()
+	for i.next() {
 	}
 	return i.ce
 }

+type source struct {
+	str   string
+	bytes []byte
+	buf   [16]byte // Used for decomposing Hangul.
+}
+
+func (src *source) done() bool {
+	return len(src.str) == 0 && len(src.bytes) == 0
+}
+
+func (src *source) tail(n int) (res source) {
+	if src.bytes == nil {
+		res.str = src.str[n:]
+	} else {
+		res.bytes = src.bytes[n:]
+	}
+	return res
+}
+
+func (src *source) nfd(end int) []byte {
+	if src.bytes == nil {
+		return norm.NFD.AppendString(src.buf[:0], src.str[:end])
+	}
+	return norm.NFD.Append(src.buf[:0], src.bytes[:end]...)
+}
+
+func (src *source) properties(f norm.Form) norm.Properties {
+	if src.bytes == nil {
+		return f.PropertiesString(src.str)
+	}
+	return f.Properties(src.bytes)
+}
+
+func (src *source) lookup(t *table) (ce colElem, sz int) {
+	if src.bytes == nil {
+		return t.index.lookupString(src.str)
+	}
+	return t.index.lookup(src.bytes)
+}
+
+func (src *source) rune() (r rune, sz int) {
+	if src.bytes == nil {
+		return utf8.DecodeRuneInString(src.str)
+	}
+	return utf8.DecodeRune(src.bytes)
+}
+
 type iter struct {
-	src        norm.Iter
-	norm       [1024]byte
-	buf        []byte
-	p          int
-	minBufSize int
+	src source

 	wa  [512]colElem
 	ce  []colElem
 	pce int
+	nce int // nce <= len(nce)
+
+	prevCCC  uint8
+	pStarter int

 	t *table
-	_done, eof bool
 }

 func (i *iter) init(c *Collator) {
 	i.t = c.t
-	i.minBufSize = c.t.maxContractLen
 	i.ce = i.wa[:0]
-	i.buf = i.norm[:0]
 }

 func (i *iter) reset() {
 	i.ce = i.ce[:0]
-	i.buf = i.buf[:0]
-	i.p = 0
-	i.eof = i.src.Done()
-	i._done = i.eof
+	i.nce = 0
+	i.prevCCC = 0
+	i.pStarter = 0
 }

 func (i *iter) setInput(c *Collator, s []byte) *iter {
-	i.src.SetInput(c.f, s)
+	i.src.bytes = s
+	i.src.str = ""
 	i.reset()
 	return i
 }

 func (i *iter) setInputString(c *Collator, s string) *iter {
-	i.src.SetInputString(c.f, s)
+	i.src.str = s
+	i.src.bytes = nil
 	i.reset()
 	return i
 }

-func (i *iter) done() bool {
-	return i._done
+// next appends colElems to the internal array until it adds an element with CCC=0.
+// In the majority of cases, a colElem with a primary value > 0 will have
+// a CCC of 0. The CCC values of colation elements are also used to detect if the
+// input string was not normalized and to adjust the result accordingly.
+func (i *iter) next() bool {
+	sz := 0
+	for !i.src.done() {
+		p0 := len(i.ce)
+		i.ce, sz = i.t.appendNext(i.ce, i.src)
+		i.src = i.src.tail(sz)
+		last := len(i.ce) - 1
+		if ccc := i.ce[last].ccc(); ccc == 0 {
+			i.nce = len(i.ce)
+			i.pStarter = last
+			i.prevCCC = 0
+			return true
+		} else if p0 < last && i.ce[p0].ccc() == 0 {
+			// set i.nce to only cover part of i.ce for which ccc == 0 and
+			// use rest the next call to next.
+			for p0++; p0 < last && i.ce[p0].ccc() == 0; p0++ {
+			}
+			i.nce = p0
+			i.pStarter = p0 - 1
+			i.prevCCC = ccc
+			return true
+		} else if ccc < i.prevCCC {
+			i.doNorm(p0, ccc) // should be rare for most common cases
+		} else {
+			i.prevCCC = ccc
+		}
+	}
+	if len(i.ce) != i.nce {
+		i.nce = len(i.ce)
+		return true
+	}
+	return false
 }

-func (i *iter) next() {
-	if !i.eof && len(i.buf)-i.p < i.minBufSize {
-		// replenish buffer
-		n := copy(i.buf, i.buf[i.p:])
-		n += i.src.Next(i.buf[n:cap(i.buf)])
-		i.buf = i.buf[:n]
-		i.p = 0
-		i.eof = i.src.Done()
-	}
-	if i.p == len(i.buf) {
-		i._done = true
-		return
+// nextPlain is the same as next, but does not "normalize" the collation
+// elements.
+// TODO: remove this function. Using this instead of next does not seem
+// to improve performance in any significant way. We retain this until
+// later for evaluation purposes.
+func (i *iter) nextPlain() bool {
+	if i.src.done() {
+		return false
 	}
 	sz := 0
-	i.ce, sz = i.t.appendNext(i.ce, i.buf[i.p:])
-	i.p += sz
+	i.ce, sz = i.t.appendNext(i.ce, i.src)
+	i.src = i.src.tail(sz)
+	i.nce = len(i.ce)
+	return true
+}
+
+const maxCombiningCharacters = 30
+
+// doNorm reorders the collation elements in i.ce.
+// It assumes that blocks of collation elements added with appendNext
+// either start and end with the same CCC or start with CCC == 0.
+// This allows for a single insertion point for the entire block.
+// The correctness of this assumption is verified in builder.go.
+func (i *iter) doNorm(p int, ccc uint8) {
+	if p-i.pStarter > maxCombiningCharacters {
+		i.prevCCC = i.ce[len(i.ce)-1].ccc()
+		i.pStarter = len(i.ce) - 1
+		return
+	}
+	n := len(i.ce)
+	k := p
+	for p--; p > i.pStarter && ccc < i.ce[p-1].ccc(); p-- {
+	}
+	i.ce = append(i.ce, i.ce[p:k]...)
+	copy(i.ce[p:], i.ce[k:])
+	i.ce = i.ce[:n]
 }

 func (i *iter) nextPrimary() int {
 	for {
-		for ; i.pce < len(i.ce); i.pce++ {
+		for ; i.pce < i.nce; i.pce++ {
 			if v := i.ce[i.pce].primary(); v != 0 {
 				i.pce++
 				return v
 			}
 		}
-		if i.done() {
+		if !i.next() {
 			return 0
 		}
-		i.next()
 	}
 	panic("should not reach here")
 }
--- a/src/pkg/exp/locale/collate/collate_test.go
+++ b/src/pkg/exp/locale/collate/collate_test.go
@ -378,6 +378,7 @@ var keyTests = []keyTest{
 func TestKey(t *testing.T) {
 	c, _ := makeTable(appendNextTests[4].in)
 	c.Alternate = collate.AltShifted
+	c.Strength = collate.Quaternary
 	buf := collate.Buffer{}
 	keys1 := [][]byte{}
 	keys2 := [][]byte{}
--- a/src/pkg/exp/locale/collate/contract.go
+++ b/src/pkg/exp/locale/collate/contract.go
@ -27,8 +27,21 @@ type ctScanner struct {
 	done   bool
 }

+type ctScannerString struct {
+	states contractTrieSet
+	s      string
+	n      int
+	index  int
+	pindex int
+	done   bool
+}
+
 func (t contractTrieSet) scanner(index, n int, b []byte) ctScanner {
-	return ctScanner{states: t[index:], s: b, n: n}
+	return ctScanner{s: b, states: t[index:], n: n}
+}
+
+func (t contractTrieSet) scannerString(index, n int, str string) ctScannerString {
+	return ctScannerString{s: str, states: t[index:], n: n}
 }

 // result returns the offset i and bytes consumed p so far.  If no suffix
@ -37,6 +50,10 @@ func (s *ctScanner) result() (i, p int) {
 	return s.index, s.pindex
 }

+func (s *ctScannerString) result() (i, p int) {
+	return s.index, s.pindex
+}
+
 const (
 	final   = 0
 	noIndex = 0xFF
@ -84,3 +101,45 @@ func (s *ctScanner) scan(p int) int {
 	}
 	return pr
 }
+
+// scan is a verbatim copy of ctScanner.scan.
+func (s *ctScannerString) scan(p int) int {
+	pr := p // the p at the rune start
+	str := s.s
+	states, n := s.states, s.n
+	for i := 0; i < n && p < len(str); {
+		e := states[i]
+		c := str[p]
+		// TODO: a significant number of contractions are of a form that
+		// cannot match discontiguous UTF-8 in a normalized string. We could let
+		// a negative value of e.n mean that we can set s.done = true and avoid
+		// the need for additional matches.
+		if c >= e.l {
+			if e.l == c {
+				p++
+				if e.i != noIndex {
+					s.index = int(e.i)
+					s.pindex = p
+				}
+				if e.n != final {
+					i, states, n = 0, states[int(e.h)+n:], int(e.n)
+					if p >= len(str) || utf8.RuneStart(str[p]) {
+						s.states, s.n, pr = states, n, p
+					}
+				} else {
+					s.done = true
+					return p
+				}
+				continue
+			} else if e.n == final && c <= e.h {
+				p++
+				s.done = true
+				s.index = int(c-e.l) + int(e.i)
+				s.pindex = p
+				return p
+			}
+		}
+		i++
+	}
+	return pr
+}
--- a/src/pkg/exp/locale/collate/export_test.go
+++ b/src/pkg/exp/locale/collate/export_test.go
@ -30,7 +30,7 @@ func W(ce ...int) Weights {
 	return w
 }
 func (w Weights) String() string {
-	return fmt.Sprintf("[%d.%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
+	return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
 }

 type Table struct {
@ -52,7 +52,7 @@ func convertToWeights(ws []colElem) []Weights {
 func convertFromWeights(ws []Weights) []colElem {
 	out := make([]colElem, len(ws))
 	for i, w := range ws {
-		out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary})
+		out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary, 0})
 		if out[i] == ceIgnore && w.Quaternary > 0 {
 			out[i] = makeQuaternary(w.Quaternary)
 		}
@ -61,7 +61,7 @@ func convertFromWeights(ws []Weights) []colElem {
 }

 func (t *Table) AppendNext(s []byte) ([]Weights, int) {
-	w, n := t.t.appendNext(nil, s)
+	w, n := t.t.appendNext(nil, source{bytes: s})
 	return convertToWeights(w), n
 }

--- a/src/pkg/exp/locale/collate/table.go
+++ b/src/pkg/exp/locale/collate/table.go
@ -42,13 +42,26 @@ func (t *table) indexedTable(idx tableIndex) *table {
 // sequence of runes, the weights for the interstitial runes are
 // appended as well.  It returns a new slice that includes the appended
 // weights and the number of bytes consumed from s.
-func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
-	v, sz := t.index.lookup(s)
-	ce := colElem(v)
+func (t *table) appendNext(w []colElem, src source) (res []colElem, n int) {
+	ce, sz := src.lookup(t)
 	tp := ce.ctype()
 	if tp == ceNormal {
 		if ce == 0 {
-			r, _ := utf8.DecodeRune(s)
+			r, _ := src.rune()
+			const (
+				hangulSize  = 3
+				firstHangul = 0xAC00
+				lastHangul  = 0xD7A3
+			)
+			if r >= firstHangul && r <= lastHangul {
+				// TODO: performance can be considerably improved here.
+				n = sz
+				for b := src.nfd(hangulSize); len(b) > 0; b = b[sz:] {
+					ce, sz = t.index.lookup(b)
+					w = append(w, ce)
+				}
+				return w, n
+			}
 			ce = makeImplicitCE(implicitPrimary(r))
 		}
 		w = append(w, ce)
@ -56,15 +69,20 @@ func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
 		w = t.appendExpansion(w, ce)
 	} else if tp == ceContractionIndex {
 		n := 0
-		w, n = t.matchContraction(w, ce, s[sz:])
+		src = src.tail(sz)
+		if src.bytes == nil {
+			w, n = t.matchContractionString(w, ce, src.str)
+		} else {
+			w, n = t.matchContraction(w, ce, src.bytes)
+		}
 		sz += n
 	} else if tp == ceDecompose {
-		// Decompose using NFCK and replace tertiary weights.
+		// Decompose using NFKD and replace tertiary weights.
 		t1, t2 := splitDecompose(ce)
 		i := len(w)
-		nfkd := norm.NFKD.Properties(s).Decomposition()
+		nfkd := src.properties(norm.NFKD).Decomposition()
 		for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
-			w, p = t.appendNext(w, nfkd)
+			w, p = t.appendNext(w, source{bytes: nfkd})
 		}
 		w[i] = w[i].updateTertiary(t1)
 		if i++; i < len(w) {
@ -99,16 +117,17 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
 		// By now we should have filtered most cases.
 		p0 := p
 		bufn := 0
-		rune := norm.NFC.Properties(suffix[p:])
+		rune := norm.NFD.Properties(suffix[p:])
 		p += rune.Size()
-		if prevCC := rune.TrailCCC(); prevCC != 0 {
+		if rune.LeadCCC() != 0 {
+			prevCC := rune.TrailCCC()
 			// A gap may only occur in the last normalization segment.
 			// This also ensures that len(scan.s) < norm.MaxSegmentSize.
-			if end := norm.NFC.FirstBoundary(suffix[p:]); end != -1 {
+			if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
 				scan.s = suffix[:p+end]
 			}
 			for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
-				rune = norm.NFC.Properties(suffix[p:])
+				rune = norm.NFD.Properties(suffix[p:])
 				if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
 					break
 				}
@ -136,7 +155,65 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
 	}
 	// Append weights for the runes in the segment not part of the contraction.
 	for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
-		w, p = t.appendNext(w, b)
+		w, p = t.appendNext(w, source{bytes: b})
+	}
+	return w, n
+}
+
+// TODO: unify the two implementations. This is best done after first simplifying
+// the algorithm taking into account the inclusion of both NFC and NFD forms
+// in the table.
+func (t *table) matchContractionString(w []colElem, ce colElem, suffix string) ([]colElem, int) {
+	index, n, offset := splitContractIndex(ce)
+
+	scan := t.contractTries.scannerString(index, n, suffix)
+	buf := [norm.MaxSegmentSize]byte{}
+	bufp := 0
+	p := scan.scan(0)
+
+	if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
+		// By now we should have filtered most cases.
+		p0 := p
+		bufn := 0
+		rune := norm.NFD.PropertiesString(suffix[p:])
+		p += rune.Size()
+		if rune.LeadCCC() != 0 {
+			prevCC := rune.TrailCCC()
+			// A gap may only occur in the last normalization segment.
+			// This also ensures that len(scan.s) < norm.MaxSegmentSize.
+			if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
+				scan.s = suffix[:p+end]
+			}
+			for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
+				rune = norm.NFD.PropertiesString(suffix[p:])
+				if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
+					break
+				}
+				prevCC = rune.TrailCCC()
+				if pp := scan.scan(p); pp != p {
+					// Copy the interstitial runes for later processing.
+					bufn += copy(buf[bufn:], suffix[p0:p])
+					if scan.pindex == pp {
+						bufp = bufn
+					}
+					p, p0 = pp, pp
+				} else {
+					p += rune.Size()
+				}
+			}
+		}
+	}
+	// Append weights for the matched contraction, which may be an expansion.
+	i, n := scan.result()
+	ce = colElem(t.contractElem[i+offset])
+	if ce.ctype() == ceNormal {
+		w = append(w, ce)
+	} else {
+		w = t.appendExpansion(w, ce)
+	}
+	// Append weights for the runes in the segment not part of the contraction.
+	for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
+		w, p = t.appendNext(w, source{bytes: b})
 	}
 	return w, n
 }
--- a/src/pkg/exp/locale/collate/table_test.go
+++ b/src/pkg/exp/locale/collate/table_test.go
@ -42,7 +42,9 @@ func pt(p, t int) []int {
 func makeTable(in []input) (*collate.Collator, error) {
 	b := build.NewBuilder()
 	for _, r := range in {
-		b.Add([]rune(r.str), r.ces, nil)
+		if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
+			panic(e)
+		}
 	}
 	return b.Build()
 }
@ -159,6 +161,7 @@ var appendNextTests = []tableTest{
 			{"b", [][]int{{200}}},
 			{"c", [][]int{{300}}},
 			{"\u03B1", [][]int{{900}}},
+			{"\x01", [][]int{{0, 0, 0, 0}}},

 			// contractions
 			{"a\u0300", [][]int{{101}}},
@ -171,10 +174,11 @@ var appendNextTests = []tableTest{
 			{"a\u0301\u035F", [][]int{{121}}},
 			{"a\u0301\u035Fb", [][]int{{119}}},
 			{"\u03B1\u0345", [][]int{{901}, {902}}},
-			{"\u302E\u18A9", [][]int{{0, 131}, {0, 132}}},
+			{"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
 			{"\u302F\u18A9", [][]int{{0, 130}}},
 		}...),
 		[]check{
+			{"a\x01\u0300", 1, ColElems{w(100)}},
 			{"ab", 1, ColElems{w(100)}},                              // closing segment
 			{"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}},       // closing segment
 			{"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}},        // no closing segment
@ -239,12 +243,17 @@ var appendNextTests = []tableTest{
 			{"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
 			// expansion within a gap
 			{"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
-			{"a\u302E\u18A9\u0301", 9, ColElems{w(102), w(0, 131), w(0, 132)}},
-			{
-				"a\u0317\u302E\u18A9\u0301",
-				11,
-				ColElems{w(102), w(0, 220), w(0, 220), w(0, 131), w(0, 132)},
-			},
+			// repeating CCC blocks last modifier
+			{"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
+			// The trailing combining characters (with lower CCC) should block the first one.
+			// TODO: make the following pass.
+			// {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
+			{"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
+			// Last combiner should match after normalization.
+			// TODO: make the following pass.
+			// {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
+			// The first combiner is blocking the second one as they have the same CCC.
+			{"a\u035D\u035Eb", 1, ColElems{w(100)}},
 		},
 	},
 }
--- a/src/pkg/exp/locale/collate/tables.go
+++ b/src/pkg/exp/locale/collate/tables.go
--- a/src/pkg/exp/locale/collate/trie.go
+++ b/src/pkg/exp/locale/collate/trie.go
@ -97,3 +97,64 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
 	// Illegal rune
 	return 0, 1
 }
+
+// The body of lookupString is a verbatim copy of that of lookup.
+func (t *trie) lookupString(s string) (v colElem, sz int) {
+	c0 := s[0]
+	switch {
+	case c0 < tx:
+		return colElem(t.values0[c0]), 1
+	case c0 < t2:
+		return 0, 1
+	case c0 < t3:
+		if len(s) < 2 {
+			return 0, 0
+		}
+		i := t.index0[c0]
+		c1 := s[1]
+		if c1 < tx || t2 <= c1 {
+			return 0, 1
+		}
+		return t.lookupValue(i, c1), 2
+	case c0 < t4:
+		if len(s) < 3 {
+			return 0, 0
+		}
+		i := t.index0[c0]
+		c1 := s[1]
+		if c1 < tx || t2 <= c1 {
+			return 0, 1
+		}
+		o := int(i)<<6 + int(c1)
+		i = t.index[o]
+		c2 := s[2]
+		if c2 < tx || t2 <= c2 {
+			return 0, 2
+		}
+		return t.lookupValue(i, c2), 3
+	case c0 < t5:
+		if len(s) < 4 {
+			return 0, 0
+		}
+		i := t.index0[c0]
+		c1 := s[1]
+		if c1 < tx || t2 <= c1 {
+			return 0, 1
+		}
+		o := int(i)<<6 + int(c1)
+		i = t.index[o]
+		c2 := s[2]
+		if c2 < tx || t2 <= c2 {
+			return 0, 2
+		}
+		o = int(i)<<6 + int(c2)
+		i = t.index[o]
+		c3 := s[3]
+		if c3 < tx || t2 <= c3 {
+			return 0, 3
+		}
+		return t.lookupValue(i, c3), 4
+	}
+	// Illegal rune
+	return 0, 1
+}