mirror of
https://github.com/golang/go
synced 2024-11-22 03:34:40 -07:00
utf8: add DecodeLastRune and DecodeLastRuneInString to
enable traversing rune-by-rune backwards in strings R=r, rsc CC=golang-dev https://golang.org/cl/2192050
This commit is contained in:
parent
1959c3ac5b
commit
f11271b82e
@ -209,6 +209,73 @@ func DecodeRuneInString(s string) (rune, size int) {
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeLastRune unpacks the last UTF-8 encoding in p
|
||||
// and returns the rune and its width in bytes.
|
||||
func DecodeLastRune(p []byte) (rune, size int) {
|
||||
end := len(p)
|
||||
if end == 0 {
|
||||
return RuneError, 0
|
||||
}
|
||||
start := end - 1
|
||||
rune = int(p[start])
|
||||
if rune < RuneSelf {
|
||||
return rune, 1
|
||||
}
|
||||
// guard against O(n^2) behavior when traversing
|
||||
// backwards through strings with long sequences of
|
||||
// invalid UTF-8.
|
||||
lim := end - UTFMax
|
||||
if lim < 0 {
|
||||
lim = 0
|
||||
}
|
||||
for start--; start >= lim; start-- {
|
||||
if RuneStart(p[start]) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
rune, size = DecodeRune(p[start:end])
|
||||
if start+size != end {
|
||||
return RuneError, 1
|
||||
}
|
||||
return rune, size
|
||||
}
|
||||
|
||||
// DecodeLastRuneInString is like DecodeLastRune but its input is a string.
|
||||
func DecodeLastRuneInString(s string) (rune, size int) {
|
||||
end := len(s)
|
||||
if end == 0 {
|
||||
return RuneError, 0
|
||||
}
|
||||
start := end - 1
|
||||
rune = int(s[start])
|
||||
if rune < RuneSelf {
|
||||
return rune, 1
|
||||
}
|
||||
// guard against O(n^2) behavior when traversing
|
||||
// backwards through strings with long sequences of
|
||||
// invalid UTF-8.
|
||||
lim := end - UTFMax
|
||||
if lim < 0 {
|
||||
lim = 0
|
||||
}
|
||||
for start--; start >= lim; start-- {
|
||||
if RuneStart(s[start]) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
rune, size = DecodeRuneInString(s[start:end])
|
||||
if start+size != end {
|
||||
return RuneError, 1
|
||||
}
|
||||
return rune, size
|
||||
}
|
||||
|
||||
// RuneLen returns the number of bytes required to encode the rune.
|
||||
func RuneLen(rune int) int {
|
||||
switch {
|
||||
|
@ -44,6 +44,12 @@ var utf8map = []Utf8Map{
|
||||
Utf8Map{0xFFFD, "\xef\xbf\xbd"},
|
||||
}
|
||||
|
||||
var testStrings = []string{
|
||||
"",
|
||||
"abcd",
|
||||
"\x80\x80\x80\x80",
|
||||
}
|
||||
|
||||
// strings.Bytes with one extra byte at end
|
||||
func makeBytes(s string) []byte {
|
||||
s += "\x00"
|
||||
@ -141,6 +147,79 @@ func TestDecodeRune(t *testing.T) {
|
||||
if rune != RuneError || size != 1 {
|
||||
t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, 1)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Check that DecodeRune and DecodeLastRune correspond to
|
||||
// the equivalent range loop.
|
||||
func TestSequencing(t *testing.T) {
|
||||
for _, ts := range testStrings {
|
||||
for _, m := range utf8map {
|
||||
for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
|
||||
testSequence(t, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testSequence(t *testing.T, s string) {
|
||||
type info struct {
|
||||
index int
|
||||
rune int
|
||||
}
|
||||
index := make([]info, len(s))
|
||||
b := []byte(s)
|
||||
si := 0
|
||||
j := 0
|
||||
for i, r := range s {
|
||||
if si != i {
|
||||
t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
|
||||
return
|
||||
}
|
||||
index[j] = info{i, r}
|
||||
j++
|
||||
rune1, size1 := DecodeRune(b[i:])
|
||||
if r != rune1 {
|
||||
t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], rune1, r)
|
||||
return
|
||||
}
|
||||
rune2, size2 := DecodeRuneInString(s[i:])
|
||||
if r != rune2 {
|
||||
t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], rune2, r)
|
||||
return
|
||||
}
|
||||
if size1 != size2 {
|
||||
t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
|
||||
return
|
||||
}
|
||||
si += size1
|
||||
}
|
||||
j--
|
||||
for si = len(s); si > 0; {
|
||||
rune1, size1 := DecodeLastRune(b[0:si])
|
||||
rune2, size2 := DecodeLastRuneInString(s[0:si])
|
||||
if size1 != size2 {
|
||||
t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
|
||||
return
|
||||
}
|
||||
if rune1 != index[j].rune {
|
||||
t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, rune1, index[j].rune)
|
||||
return
|
||||
}
|
||||
if rune2 != index[j].rune {
|
||||
t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, rune2, index[j].rune)
|
||||
return
|
||||
}
|
||||
si -= size1
|
||||
if si != index[j].index {
|
||||
t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
|
||||
return
|
||||
}
|
||||
j--
|
||||
}
|
||||
if si != 0 {
|
||||
t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user