mirror of
https://github.com/golang/go
synced 2024-11-21 22:34:48 -07:00
utf8.String: move to exp/utf8string.String
R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/5528115
This commit is contained in:
parent
3b87d68a07
commit
7585aa6ae5
@ -922,6 +922,10 @@ Several packages have moved under <code>exp</code> at the time of Go 1's release
|
||||
<li><code>http/spdy</code></li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
All these packages are available under the same names, with <code>exp/</code> prefixed: <code>exp/ebnf</code> etc.
|
||||
</p>
|
||||
@ -935,7 +939,7 @@ Also, the <code>gotype</code> command now resides in <code>exp/gotype</code>, wh
|
||||
<em>Updating</em>:
|
||||
Code that uses packages in <code>exp</code> will need to be updated by hand,
|
||||
or else compiled from an installation that has <code>exp</code> available.
|
||||
Gofix will warn about such uses.
|
||||
Gofix or the compiler will complain about such uses.
|
||||
<br>
|
||||
<font color="red">TODO: gofix should warn about such uses.</font>
|
||||
</p>
|
||||
|
@ -826,6 +826,10 @@ Several packages have moved under <code>exp</code> at the time of Go 1's release
|
||||
<li><code>http/spdy</code></li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
All these packages are available under the same names, with <code>exp/</code> prefixed: <code>exp/ebnf</code> etc.
|
||||
</p>
|
||||
@ -839,7 +843,7 @@ Also, the <code>gotype</code> command now resides in <code>exp/gotype</code>, wh
|
||||
<em>Updating</em>:
|
||||
Code that uses packages in <code>exp</code> will need to be updated by hand,
|
||||
or else compiled from an installation that has <code>exp</code> available.
|
||||
Gofix will warn about such uses.
|
||||
Gofix or the compiler will complain about such uses.
|
||||
<br>
|
||||
<font color="red">TODO: gofix should warn about such uses.</font>
|
||||
</p>
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/darwin_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/darwin_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_386/unicode/utf8.a
|
||||
|
@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/darwin_amd64/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
|
||||
mkdir -p "$GOROOT"/pkg/darwin_amd64/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_amd64/unicode/utf8.a
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/freebsd_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/freebsd_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_386/unicode/utf8.a
|
||||
|
@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/freebsd_amd64/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
|
||||
mkdir -p "$GOROOT"/pkg/freebsd_amd64/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_amd64/unicode/utf8.a
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/linux_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_386/unicode/utf8.a
|
||||
|
@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_amd64/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
|
||||
mkdir -p "$GOROOT"/pkg/linux_amd64/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_amd64/unicode/utf8.a
|
||||
|
@ -129,7 +129,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/linux_arm/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.5
|
||||
mkdir -p "$GOROOT"/pkg/linux_arm/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_arm/unicode/utf8.a
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/netbsd_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/netbsd_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_386/unicode/utf8.a
|
||||
|
@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/netbsd_amd64/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
|
||||
mkdir -p "$GOROOT"/pkg/netbsd_amd64/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_amd64/unicode/utf8.a
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/openbsd_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/openbsd_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_386/unicode/utf8.a
|
||||
|
@ -124,7 +124,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/openbsd_amd64/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
|
||||
mkdir -p "$GOROOT"/pkg/openbsd_amd64/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_amd64/unicode/utf8.a
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/plan9_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/plan9_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/plan9_386/unicode/utf8.a
|
||||
|
@ -128,7 +128,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/windows_386/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
|
||||
mkdir -p "$GOROOT"/pkg/windows_386/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_386/unicode/utf8.a
|
||||
|
@ -126,7 +126,7 @@ cp "$WORK"/unicode.a "$GOROOT"/pkg/windows_amd64/unicode.a
|
||||
|
||||
mkdir -p "$WORK"/unicode/utf8/_obj/
|
||||
cd "$GOROOT"/src/pkg/unicode/utf8
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
|
||||
6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
|
||||
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
|
||||
mkdir -p "$GOROOT"/pkg/windows_amd64/unicode/
|
||||
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_amd64/unicode/utf8.a
|
||||
|
11
src/pkg/exp/utf8string/Makefile
Normal file
11
src/pkg/exp/utf8string/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
# Copyright 2009 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../../Make.inc
|
||||
|
||||
TARG=exp/utf8string
|
||||
GOFILES=\
|
||||
string.go\
|
||||
|
||||
include ../../../Make.pkg
|
@ -2,9 +2,13 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package utf8
|
||||
// Package utf8string provides an efficient way to index strings by rune rather than by byte.
|
||||
package utf8string
|
||||
|
||||
import "errors"
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// String wraps a regular string with a small structure that provides more
|
||||
// efficient indexing by code point index, as opposed to byte index.
|
||||
@ -37,10 +41,10 @@ func (s *String) Init(contents string) *String {
|
||||
s.bytePos = 0
|
||||
s.runePos = 0
|
||||
for i := 0; i < len(contents); i++ {
|
||||
if contents[i] >= RuneSelf {
|
||||
if contents[i] >= utf8.RuneSelf {
|
||||
// Not ASCII.
|
||||
s.numRunes = RuneCountInString(contents)
|
||||
_, s.width = DecodeRuneInString(contents)
|
||||
s.numRunes = utf8.RuneCountInString(contents)
|
||||
_, s.width = utf8.DecodeRuneInString(contents)
|
||||
s.nonASCII = i
|
||||
return s
|
||||
}
|
||||
@ -121,7 +125,7 @@ func (s *String) At(i int) rune {
|
||||
switch {
|
||||
|
||||
case i == s.runePos-1: // backing up one rune
|
||||
r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos])
|
||||
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
|
||||
s.runePos = i
|
||||
s.bytePos -= s.width
|
||||
return r
|
||||
@ -130,16 +134,16 @@ func (s *String) At(i int) rune {
|
||||
s.bytePos += s.width
|
||||
fallthrough
|
||||
case i == s.runePos:
|
||||
r, s.width = DecodeRuneInString(s.str[s.bytePos:])
|
||||
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
|
||||
return r
|
||||
case i == 0: // start of string
|
||||
r, s.width = DecodeRuneInString(s.str)
|
||||
r, s.width = utf8.DecodeRuneInString(s.str)
|
||||
s.runePos = 0
|
||||
s.bytePos = 0
|
||||
return r
|
||||
|
||||
case i == s.numRunes-1: // last rune in string
|
||||
r, s.width = DecodeLastRuneInString(s.str)
|
||||
r, s.width = utf8.DecodeLastRuneInString(s.str)
|
||||
s.runePos = i
|
||||
s.bytePos = len(s.str) - s.width
|
||||
return r
|
||||
@ -175,7 +179,7 @@ func (s *String) At(i int) rune {
|
||||
if forward {
|
||||
// TODO: Is it much faster to use a range loop for this scan?
|
||||
for {
|
||||
r, s.width = DecodeRuneInString(s.str[s.bytePos:])
|
||||
r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
|
||||
if s.runePos == i {
|
||||
break
|
||||
}
|
||||
@ -184,7 +188,7 @@ func (s *String) At(i int) rune {
|
||||
}
|
||||
} else {
|
||||
for {
|
||||
r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos])
|
||||
r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
|
||||
s.runePos--
|
||||
s.bytePos -= s.width
|
||||
if s.runePos == i {
|
@ -2,14 +2,23 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package utf8_test
|
||||
package utf8string
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
. "unicode/utf8"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var testStrings = []string{
|
||||
"",
|
||||
"abcd",
|
||||
"☺☻☹",
|
||||
"日a本b語ç日ð本Ê語þ日¥本¼語i日©",
|
||||
"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
|
||||
"\x80\x80\x80\x80",
|
||||
}
|
||||
|
||||
func TestScanForwards(t *testing.T) {
|
||||
for _, s := range testStrings {
|
||||
runes := []rune(s)
|
||||
@ -106,7 +115,7 @@ func TestLimitSliceAccess(t *testing.T) {
|
||||
if str.Slice(0, 0) != "" {
|
||||
t.Error("failure with empty slice at beginning")
|
||||
}
|
||||
nr := RuneCountInString(s)
|
||||
nr := utf8.RuneCountInString(s)
|
||||
if str.Slice(nr, nr) != "" {
|
||||
t.Error("failure with empty slice at end")
|
||||
}
|
@ -6,7 +6,6 @@ include ../../../Make.inc
|
||||
|
||||
TARG=unicode/utf8
|
||||
GOFILES=\
|
||||
string.go\
|
||||
utf8.go\
|
||||
|
||||
include ../../../Make.pkg
|
||||
|
@ -7,7 +7,7 @@
|
||||
package main
|
||||
|
||||
// Test that error messages say what the source file says
|
||||
// (uint8 vs byte).
|
||||
// (uint8 vs byte, int32 vs. rune).
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@ -29,7 +29,4 @@ func main() {
|
||||
ff.Format(fs, x) // ERROR "rune"
|
||||
|
||||
utf8.RuneStart(x) // ERROR "byte"
|
||||
|
||||
var s utf8.String
|
||||
s.At(x) // ERROR "int"
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user