From df07b6d14aa72eee879d13f197dd861aad4b2465 Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Wed, 20 Jul 2011 19:46:05 +1000 Subject: [PATCH] exp/norm: API for normalization library. R=r, r, mpvl, rsc CC=golang-dev https://golang.org/cl/4678041 --- src/pkg/exp/norm/normalize.go | 75 +++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/pkg/exp/norm/normalize.go diff --git a/src/pkg/exp/norm/normalize.go b/src/pkg/exp/norm/normalize.go new file mode 100644 index 00000000000..e219263d4a3 --- /dev/null +++ b/src/pkg/exp/norm/normalize.go @@ -0,0 +1,75 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package form contains types and functions for normalizing Unicode strings. +package norm + +// A Form denotes a canonical representation of Unicode code points. +// The Unicode-defined normalization and equivalence forms are: +// +// NFC Unicode Normalization Form C +// NFD Unicode Normalization Form D +// NFKC Unicode Normalization Form KC +// NFKD Unicode Normalization Form KD +// +// For a Form f, this documentation uses the notation f(x) to mean +// the bytes or string x converted to the given form. +// A position n in x is called a boundary if conversion to the form can +// proceed independently on both sides: +// f(x) == append(f(x[0:n]), f(x[n:])...) +// +// References: http://unicode.org/reports/tr15/ and +// http://unicode.org/notes/tn5/. +type Form int + +const ( + NFC Form = iota + NFD + NFKC + NFKD +) + +// Bytes returns f(b). May return b if f(b) = b. +func (f Form) Bytes(b []byte) []byte + +// String returns f(s). +func (f Form) String(s string) string + +// IsNormal returns true if b == f(b). +func (f Form) IsNormal(b []byte) bool + +// IsNormalString returns true if s == f(s). +func (f Form) IsNormalString(s string) bool + +// Append returns f(append(out, b...)). +// The buffer out must be empty or equal to f(out). +func (f Form) Append(out, b []byte) []byte + +// AppendString returns f(append(out, []byte(s))). +// The buffer out must be empty or equal to f(out). +func (f Form) AppendString(out []byte, s string) []byte + +// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). +// It is not guaranteed to return the largest such n. +func (f Form) QuickSpan(b []byte) int + +// QuickSpanString returns a boundary n such that b[0:n] == f(s[0:n]). +// It is not guaranteed to return the largest such n. +func (f Form) QuickSpanString(s string) int + +// FirstBoundary returns the position i of the first boundary in b. +// It returns len(b), false if b contains no boundaries. +func (f Form) FirstBoundary(b []byte) (i int, ok bool) + +// FirstBoundaryInString return the position i of the first boundary in s. +// It returns len(s), false if s contains no boundaries. +func (f Form) FirstBoundaryInString(s string) (i int, ok bool) + +// LastBoundaryIn returns the position i of the last boundary in b. +// It returns 0, false if b contains no boundary. +func (f Form) LastBoundary(b []byte) (i int, ok bool) + +// LastBoundaryInString returns the position i of the last boundary in s. +// It returns 0, false if s contains no boundary. +func (f Form) LastBoundaryInString(s string) (i int, ok bool)