From 7585aa6ae591a7fecb806d230205f8d12d64c957 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Tue, 17 Jan 2012 14:21:50 -0800 Subject: [PATCH] utf8.String: move to exp/utf8string.String R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/5528115 --- doc/go1.html | 6 ++++- doc/go1.tmpl | 6 ++++- src/buildscript/darwin_386.sh | 2 +- src/buildscript/darwin_amd64.sh | 2 +- src/buildscript/freebsd_386.sh | 2 +- src/buildscript/freebsd_amd64.sh | 2 +- src/buildscript/linux_386.sh | 2 +- src/buildscript/linux_amd64.sh | 2 +- src/buildscript/linux_arm.sh | 2 +- src/buildscript/netbsd_386.sh | 2 +- src/buildscript/netbsd_amd64.sh | 2 +- src/buildscript/openbsd_386.sh | 2 +- src/buildscript/openbsd_amd64.sh | 2 +- src/buildscript/plan9_386.sh | 2 +- src/buildscript/windows_386.sh | 2 +- src/buildscript/windows_amd64.sh | 2 +- src/pkg/exp/utf8string/Makefile | 11 ++++++++ .../utf8 => exp/utf8string}/string.go | 26 +++++++++++-------- .../utf8 => exp/utf8string}/string_test.go | 15 ++++++++--- src/pkg/unicode/utf8/Makefile | 1 - test/alias.go | 5 +--- 21 files changed, 63 insertions(+), 35 deletions(-) create mode 100644 src/pkg/exp/utf8string/Makefile rename src/pkg/{unicode/utf8 => exp/utf8string}/string.go (88%) rename src/pkg/{unicode/utf8 => exp/utf8string}/string_test.go (88%) diff --git a/doc/go1.html b/doc/go1.html index 2168ba56288..af9bbd779a8 100644 --- a/doc/go1.html +++ b/doc/go1.html @@ -922,6 +922,10 @@ Several packages have moved under exp at the time of Go 1's release
  • http/spdy
  • +

    +Also, the utf8.String type has been moved to its own package, exp/utf8string. +

    +

    All these packages are available under the same names, with exp/ prefixed: exp/ebnf etc.

    @@ -935,7 +939,7 @@ Also, the gotype command now resides in exp/gotype, wh Updating: Code that uses packages in exp will need to be updated by hand, or else compiled from an installation that has exp available. -Gofix will warn about such uses. +Gofix or the compiler will complain about such uses.
    TODO: gofix should warn about such uses.

    diff --git a/doc/go1.tmpl b/doc/go1.tmpl index 21256ed7f3a..98517041192 100644 --- a/doc/go1.tmpl +++ b/doc/go1.tmpl @@ -826,6 +826,10 @@ Several packages have moved under exp at the time of Go 1's release
  • http/spdy
  • +

    +Also, the utf8.String type has been moved to its own package, exp/utf8string. +

    +

    All these packages are available under the same names, with exp/ prefixed: exp/ebnf etc.

    @@ -839,7 +843,7 @@ Also, the gotype command now resides in exp/gotype, wh Updating: Code that uses packages in exp will need to be updated by hand, or else compiled from an installation that has exp available. -Gofix will warn about such uses. +Gofix or the compiler will complain about such uses.
    TODO: gofix should warn about such uses.

    diff --git a/src/buildscript/darwin_386.sh b/src/buildscript/darwin_386.sh index 42dbf5e17a9..6c8b997f0d8 100755 --- a/src/buildscript/darwin_386.sh +++ b/src/buildscript/darwin_386.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/darwin_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/darwin_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_386/unicode/utf8.a diff --git a/src/buildscript/darwin_amd64.sh b/src/buildscript/darwin_amd64.sh index 0766ff262c0..417e03fde39 100755 --- a/src/buildscript/darwin_amd64.sh +++ b/src/buildscript/darwin_amd64.sh @@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/darwin_amd64/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6 mkdir -p "$GOROOT"/pkg/darwin_amd64/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_amd64/unicode/utf8.a diff --git a/src/buildscript/freebsd_386.sh b/src/buildscript/freebsd_386.sh index 310f3dcf7b5..2427f914397 100755 --- a/src/buildscript/freebsd_386.sh +++ b/src/buildscript/freebsd_386.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/freebsd_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/freebsd_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_386/unicode/utf8.a diff --git a/src/buildscript/freebsd_amd64.sh b/src/buildscript/freebsd_amd64.sh index 9a91017ac5d..6f632b4a5ff 100755 --- a/src/buildscript/freebsd_amd64.sh +++ b/src/buildscript/freebsd_amd64.sh @@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/freebsd_amd64/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6 mkdir -p "$GOROOT"/pkg/freebsd_amd64/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_amd64/unicode/utf8.a diff --git a/src/buildscript/linux_386.sh b/src/buildscript/linux_386.sh index 26009320ce1..4deb775f735 100755 --- a/src/buildscript/linux_386.sh +++ b/src/buildscript/linux_386.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/linux_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_386/unicode/utf8.a diff --git a/src/buildscript/linux_amd64.sh b/src/buildscript/linux_amd64.sh index f2eae0a59b4..da27acc3fb8 100755 --- a/src/buildscript/linux_amd64.sh +++ b/src/buildscript/linux_amd64.sh @@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_amd64/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6 mkdir -p "$GOROOT"/pkg/linux_amd64/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_amd64/unicode/utf8.a diff --git a/src/buildscript/linux_arm.sh b/src/buildscript/linux_arm.sh index 7589a5719e1..aa68cf802d4 100755 --- a/src/buildscript/linux_arm.sh +++ b/src/buildscript/linux_arm.sh @@ -129,7 +129,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_arm/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.5 mkdir -p "$GOROOT"/pkg/linux_arm/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_arm/unicode/utf8.a diff --git a/src/buildscript/netbsd_386.sh b/src/buildscript/netbsd_386.sh index 8f9491c80f0..5c822f345b5 100755 --- a/src/buildscript/netbsd_386.sh +++ b/src/buildscript/netbsd_386.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/netbsd_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/netbsd_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_386/unicode/utf8.a diff --git a/src/buildscript/netbsd_amd64.sh b/src/buildscript/netbsd_amd64.sh index be0053d066f..0bfea9c114e 100755 --- a/src/buildscript/netbsd_amd64.sh +++ b/src/buildscript/netbsd_amd64.sh @@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/netbsd_amd64/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6 mkdir -p "$GOROOT"/pkg/netbsd_amd64/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_amd64/unicode/utf8.a diff --git a/src/buildscript/openbsd_386.sh b/src/buildscript/openbsd_386.sh index 30ddec6a5dd..be38d39441b 100755 --- a/src/buildscript/openbsd_386.sh +++ b/src/buildscript/openbsd_386.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/openbsd_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/openbsd_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_386/unicode/utf8.a diff --git a/src/buildscript/openbsd_amd64.sh b/src/buildscript/openbsd_amd64.sh index c315d59e163..00b32a139cf 100755 --- a/src/buildscript/openbsd_amd64.sh +++ b/src/buildscript/openbsd_amd64.sh @@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/openbsd_amd64/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6 mkdir -p "$GOROOT"/pkg/openbsd_amd64/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_amd64/unicode/utf8.a diff --git a/src/buildscript/plan9_386.sh b/src/buildscript/plan9_386.sh index 10a24724f1b..d1c3ec6474f 100755 --- a/src/buildscript/plan9_386.sh +++ b/src/buildscript/plan9_386.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/plan9_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/plan9_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/plan9_386/unicode/utf8.a diff --git a/src/buildscript/windows_386.sh b/src/buildscript/windows_386.sh index b5e79288bd9..13885d8a2dd 100755 --- a/src/buildscript/windows_386.sh +++ b/src/buildscript/windows_386.sh @@ -128,7 +128,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/windows_386/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8 mkdir -p "$GOROOT"/pkg/windows_386/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_386/unicode/utf8.a diff --git a/src/buildscript/windows_amd64.sh b/src/buildscript/windows_amd64.sh index bf87f6af988..6eb6349c93d 100755 --- a/src/buildscript/windows_amd64.sh +++ b/src/buildscript/windows_amd64.sh @@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/windows_amd64/unicode.a mkdir -p "$WORK"/unicode/utf8/_obj/ cd "$GOROOT"/src/pkg/unicode/utf8 -6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go +6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6 mkdir -p "$GOROOT"/pkg/windows_amd64/unicode/ cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_amd64/unicode/utf8.a diff --git a/src/pkg/exp/utf8string/Makefile b/src/pkg/exp/utf8string/Makefile new file mode 100644 index 00000000000..49c192861cb --- /dev/null +++ b/src/pkg/exp/utf8string/Makefile @@ -0,0 +1,11 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=exp/utf8string +GOFILES=\ + string.go\ + +include ../../../Make.pkg diff --git a/src/pkg/unicode/utf8/string.go b/src/pkg/exp/utf8string/string.go similarity index 88% rename from src/pkg/unicode/utf8/string.go rename to src/pkg/exp/utf8string/string.go index 443decf056c..da1e2de1ea2 100644 --- a/src/pkg/unicode/utf8/string.go +++ b/src/pkg/exp/utf8string/string.go @@ -2,9 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package utf8 +// Package utf8string provides an efficient way to index strings by rune rather than by byte. +package utf8string -import "errors" +import ( + "errors" + "unicode/utf8" +) // String wraps a regular string with a small structure that provides more // efficient indexing by code point index, as opposed to byte index. @@ -37,10 +41,10 @@ func (s *String) Init(contents string) *String { s.bytePos = 0 s.runePos = 0 for i := 0; i < len(contents); i++ { - if contents[i] >= RuneSelf { + if contents[i] >= utf8.RuneSelf { // Not ASCII. - s.numRunes = RuneCountInString(contents) - _, s.width = DecodeRuneInString(contents) + s.numRunes = utf8.RuneCountInString(contents) + _, s.width = utf8.DecodeRuneInString(contents) s.nonASCII = i return s } @@ -121,7 +125,7 @@ func (s *String) At(i int) rune { switch { case i == s.runePos-1: // backing up one rune - r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos]) + r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos]) s.runePos = i s.bytePos -= s.width return r @@ -130,16 +134,16 @@ func (s *String) At(i int) rune { s.bytePos += s.width fallthrough case i == s.runePos: - r, s.width = DecodeRuneInString(s.str[s.bytePos:]) + r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:]) return r case i == 0: // start of string - r, s.width = DecodeRuneInString(s.str) + r, s.width = utf8.DecodeRuneInString(s.str) s.runePos = 0 s.bytePos = 0 return r case i == s.numRunes-1: // last rune in string - r, s.width = DecodeLastRuneInString(s.str) + r, s.width = utf8.DecodeLastRuneInString(s.str) s.runePos = i s.bytePos = len(s.str) - s.width return r @@ -175,7 +179,7 @@ func (s *String) At(i int) rune { if forward { // TODO: Is it much faster to use a range loop for this scan? for { - r, s.width = DecodeRuneInString(s.str[s.bytePos:]) + r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:]) if s.runePos == i { break } @@ -184,7 +188,7 @@ func (s *String) At(i int) rune { } } else { for { - r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos]) + r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos]) s.runePos-- s.bytePos -= s.width if s.runePos == i { diff --git a/src/pkg/unicode/utf8/string_test.go b/src/pkg/exp/utf8string/string_test.go similarity index 88% rename from src/pkg/unicode/utf8/string_test.go rename to src/pkg/exp/utf8string/string_test.go index 2c139bea98c..28511b2f5f1 100644 --- a/src/pkg/unicode/utf8/string_test.go +++ b/src/pkg/exp/utf8string/string_test.go @@ -2,14 +2,23 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package utf8_test +package utf8string import ( "math/rand" "testing" - . "unicode/utf8" + "unicode/utf8" ) +var testStrings = []string{ + "", + "abcd", + "☺☻☹", + "日a本b語ç日ð本Ê語þ日¥本¼語i日©", + "日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©", + "\x80\x80\x80\x80", +} + func TestScanForwards(t *testing.T) { for _, s := range testStrings { runes := []rune(s) @@ -106,7 +115,7 @@ func TestLimitSliceAccess(t *testing.T) { if str.Slice(0, 0) != "" { t.Error("failure with empty slice at beginning") } - nr := RuneCountInString(s) + nr := utf8.RuneCountInString(s) if str.Slice(nr, nr) != "" { t.Error("failure with empty slice at end") } diff --git a/src/pkg/unicode/utf8/Makefile b/src/pkg/unicode/utf8/Makefile index 9863433b021..b6c36f07b20 100644 --- a/src/pkg/unicode/utf8/Makefile +++ b/src/pkg/unicode/utf8/Makefile @@ -6,7 +6,6 @@ include ../../../Make.inc TARG=unicode/utf8 GOFILES=\ - string.go\ utf8.go\ include ../../../Make.pkg diff --git a/test/alias.go b/test/alias.go index 410a44e6c06..639a9cabbb4 100644 --- a/test/alias.go +++ b/test/alias.go @@ -7,7 +7,7 @@ package main // Test that error messages say what the source file says -// (uint8 vs byte). +// (uint8 vs byte, int32 vs. rune). import ( "fmt" @@ -29,7 +29,4 @@ func main() { ff.Format(fs, x) // ERROR "rune" utf8.RuneStart(x) // ERROR "byte" - - var s utf8.String - s.At(x) // ERROR "int" }