1
0
mirror of https://github.com/golang/go synced 2024-11-21 20:34:40 -07:00

change utf8.FullRuneInString and utf8.DecodeRuneInString

to use single string argument instead of string, index.

R=r
DELTA=136  (9 added, 7 deleted, 120 changed)
OCL=28642
CL=28644
This commit is contained in:
Russ Cox 2009-05-11 14:10:34 -07:00
parent 5a11a46e2d
commit 3619f1ea6a
11 changed files with 121 additions and 119 deletions

View File

@ -469,7 +469,7 @@ func (p *pp) doprintf(format string, v reflect.StructValue) {
end := len(format) - 1;
fieldnum := 0; // we process one field per non-trivial format
for i := 0; i <= end; {
c, w := utf8.DecodeRuneInString(format, i);
c, w := utf8.DecodeRuneInString(format[i:len(format)]);
if c != '%' || i == end {
p.add(c);
i += w;
@ -500,7 +500,7 @@ func (p *pp) doprintf(format string, v reflect.StructValue) {
if i < end && format[i] == '.' {
p.fmt.prec, p.fmt.prec_present, i = parsenum(format, i+1, end);
}
c, w = utf8.DecodeRuneInString(format, i);
c, w = utf8.DecodeRuneInString(format[i:len(format)]);
i += w;
// percent is special - absorbs no operand
if c == '%' {

View File

@ -426,7 +426,7 @@ func (x *ChanType) Visit(v ExprVisitor) { v.DoChanType(x); }
// IsExported returns whether name is an exported Go symbol
// (i.e., whether it begins with an uppercase letter).
func IsExported(name string) bool {
ch, len := utf8.DecodeRuneInString(name, 0);
ch, len := utf8.DecodeRuneInString(name);
return unicode.IsUpper(ch);
}

View File

@ -114,7 +114,7 @@ func Unquote(s string) (t string, ok bool) {
w++;
// Coerce to well-formed UTF-8.
default:
rune, size := utf8.DecodeRuneInString(s, r);
rune, size := utf8.DecodeRuneInString(s[r:len(s)]);
r += size;
w += utf8.EncodeRune(rune, b[w:len(b)]);
}

View File

@ -698,7 +698,7 @@ func (p *typeParser) Next() {
return;
}
start := p.index;
c, w := utf8.DecodeRuneInString(p.str, p.index);
c, w := utf8.DecodeRuneInString(p.str[p.index:len(p.str)]);
p.index += w;
switch {
case c == '<':

View File

@ -263,7 +263,7 @@ func (p *parser) nextc() int {
if p.pos >= len(p.re.expr) {
p.ch = endOfFile
} else {
c, w := utf8.DecodeRuneInString(p.re.expr, p.pos);
c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:len(p.re.expr)]);
p.ch = c;
p.pos += w;
}
@ -653,7 +653,7 @@ func (re *Regexp) doExecute(str string, pos int) []int {
charwidth := 1;
c := endOfFile;
if pos < len(str) {
c, charwidth = utf8.DecodeRuneInString(str, pos);
c, charwidth = utf8.DecodeRuneInString(str[pos:len(str)]);
}
for i := 0; i < len(s[in]); i++ {
st := s[in][i];

View File

@ -18,38 +18,38 @@ const lowerhex = "0123456789abcdef"
func Quote(s string) string {
// TODO(rsc): String accumulation could be more efficient.
t := `"`;
for i := 0; i < len(s); i++ {
switch {
case s[i] == '"':
for ; len(s) > 0; s = s[1:len(s)] {
switch c := s[0]; {
case c == '"':
t += `\"`;
case s[i] == '\\':
case c == '\\':
t += `\\`;
case ' ' <= s[i] && s[i] <= '~':
t += string(s[i]);
case s[i] == '\a':
case ' ' <= c && c <= '~':
t += string(c);
case c == '\a':
t += `\a`;
case s[i] == '\b':
case c == '\b':
t += `\b`;
case s[i] == '\f':
case c == '\f':
t += `\f`;
case s[i] == '\n':
case c == '\n':
t += `\n`;
case s[i] == '\r':
case c == '\r':
t += `\r`;
case s[i] == '\t':
case c == '\t':
t += `\t`;
case s[i] == '\v':
case c == '\v':
t += `\v`;
case s[i] < utf8.RuneSelf:
t += `\x` + string(lowerhex[s[i]>>4]) + string(lowerhex[s[i]&0xF]);
case c < utf8.RuneSelf:
t += `\x` + string(lowerhex[c>>4]) + string(lowerhex[c&0xF]);
case utf8.FullRuneInString(s, i):
r, size := utf8.DecodeRuneInString(s, i);
case utf8.FullRuneInString(s):
r, size := utf8.DecodeRuneInString(s);
if r == utf8.RuneError && size == 1 {
goto EscX;
}
i += size-1; // i++ on next iteration
s = s[size-1:len(s)]; // next iteration will slice off 1 more
if r < 0x10000 {
t += `\u`;
for j:=uint(0); j<4; j++ {
@ -65,8 +65,8 @@ func Quote(s string) string {
default:
EscX:
t += `\x`;
t += string(lowerhex[s[i]>>4]);
t += string(lowerhex[s[i]&0xF]);
t += string(lowerhex[c>>4]);
t += string(lowerhex[c&0xF]);
}
}
t += `"`;
@ -97,42 +97,42 @@ func unhex(b byte) (v int, ok bool) {
return;
}
func unquoteChar(s string, i int, q byte) (t string, ii int, err os.Error) {
func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
err = os.EINVAL; // assume error for easy return
// easy cases
switch c := s[i]; {
switch c := s[0]; {
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s, i);
return s[i:i+size], i+size, nil;
r, size := utf8.DecodeRuneInString(s);
return s[0:size], s[size:len(s)], nil;
case c == q:
return;
case c != '\\':
return s[i:i+1], i+1, nil;
return s[0:1], s[1:len(s)], nil;
}
// hard case: c is backslash
if i+1 >= len(s) {
if len(s) <= 1 {
return;
}
c := s[i+1];
i += 2;
c := s[1];
s = s[2:len(s)];
switch c {
case 'a':
return "\a", i, nil;
return "\a", s, nil;
case 'b':
return "\b", i, nil;
return "\b", s, nil;
case 'f':
return "\f", i, nil;
return "\f", s, nil;
case 'n':
return "\n", i, nil;
return "\n", s, nil;
case 'r':
return "\r", i, nil;
return "\r", s, nil;
case 't':
return "\t", i, nil;
return "\t", s, nil;
case 'v':
return "\v", i, nil;
return "\v", s, nil;
case 'x', 'u', 'U':
n := 0;
switch c {
@ -144,43 +144,45 @@ func unquoteChar(s string, i int, q byte) (t string, ii int, err os.Error) {
n = 8;
}
v := 0;
if len(s) < n {
return;
}
for j := 0; j < n; j++ {
if i+j >= len(s) {
return;
}
x, ok := unhex(s[i+j]);
x, ok := unhex(s[j]);
if !ok {
return;
}
v = v<<4 | x;
}
s = s[n:len(s)];
if c == 'x' {
return string([]byte{byte(v)}), i+n, nil;
// single-byte string, possibly not UTF-8
return string([]byte{byte(v)}), s, nil;
}
if v > utf8.RuneMax {
return;
}
return string(v), i+n, nil;
return string(v), s, nil;
case '0', '1', '2', '3', '4', '5', '6', '7':
v := 0;
i--;
for j := 0; j < 3; j++ {
if i+j >= len(s) {
return;
}
x := int(s[i+j]) - '0';
v := int(c) - '0';
if len(s) < 2 {
return;
}
for j := 0; j < 2; j++ { // one digit already; two more
x := int(s[j]) - '0';
if x < 0 || x > 7 {
return;
}
v = (v<<3) | x;
}
s = s[2:len(s)];
if v > 255 {
return;
}
return string(v), i+3, nil;
return string(v), s, nil;
case '\\', q:
return string(c), i, nil;
return string(c), s, nil;
}
return;
}
@ -193,37 +195,35 @@ func unquoteChar(s string, i int, q byte) (t string, ii int, err os.Error) {
func Unquote(s string) (t string, err os.Error) {
err = os.EINVAL; // assume error for easy return
n := len(s);
if n < 2 || s[0] != s[n-1] {
if n < 2 {
return;
}
quote := s[0];
if quote != s[n-1] {
return;
}
s = s[1:n-1];
if quote == '`' {
return s, nil;
}
if quote != '"' && quote != '\'' {
return;
}
switch s[0] {
case '`':
t := s[1:n-1];
return t, nil;
case '"', '\'':
// TODO(rsc): String accumulation could be more efficient.
t := "";
q := s[0];
var c string;
var err os.Error;
for i := 1; i < n-1; {
c, i, err = unquoteChar(s, i, q);
if err != nil {
return "", err;
}
t += c;
if q == '\'' && i != n-1 {
// single-quoted must be single character
return;
}
if i > n-1 {
// read too far
return;
}
// TODO(rsc): String accumulation could be more efficient.
var c, tt string;
var err1 os.Error;
for len(s) > 0 {
if c, s, err1 = unquoteChar(s, quote); err1 != nil {
err = err1;
return;
}
tt += c;
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return;
}
return t, nil
}
return;
return tt, nil
}

View File

@ -11,12 +11,13 @@ import "utf8"
// Invalid UTF-8 sequences become correct encodings of U+FFF8.
func Explode(s string) []string {
a := make([]string, utf8.RuneCountInString(s));
j := 0;
var size, rune int;
for i := 0; i < len(a); i++ {
rune, size = utf8.DecodeRuneInString(s, j);
i := 0;
for len(s) > 0 {
rune, size = utf8.DecodeRuneInString(s);
s = s[size:len(s)];
a[i] = string(rune);
j += size;
i++;
}
return a
}

View File

@ -108,11 +108,12 @@ func decodeRuneInternal(p []byte) (rune, size int, short bool) {
return RuneError, 1, false
}
func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short bool) {
func decodeRuneInStringInternal(s string) (rune, size int, short bool) {
n := len(s);
if n < 1 {
return RuneError, 0, true;
}
c0 := s[i];
c0 := s[0];
// 1-byte, 7-bit sequence?
if c0 < _Tx {
@ -128,7 +129,7 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
if n < 2 {
return RuneError, 1, true
}
c1 := s[i+1];
c1 := s[1];
if c1 < _Tx || _T2 <= c1 {
return RuneError, 1, false
}
@ -146,7 +147,7 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
if n < 3 {
return RuneError, 1, true
}
c2 := s[i+2];
c2 := s[2];
if c2 < _Tx || _T2 <= c2 {
return RuneError, 1, false
}
@ -164,7 +165,7 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
if n < 4 {
return RuneError, 1, true
}
c3 := s[i+3];
c3 := s[3];
if c3 < _Tx || _T2 <= c3 {
return RuneError, 1, false
}
@ -190,8 +191,8 @@ func FullRune(p []byte) bool {
}
// FullRuneInString is like FullRune but its input is a string.
func FullRuneInString(s string, i int) bool {
rune, size, short := decodeRuneInStringInternal(s, i, len(s) - i);
func FullRuneInString(s string) bool {
rune, size, short := decodeRuneInStringInternal(s);
return !short
}
@ -203,9 +204,9 @@ func DecodeRune(p []byte) (rune, size int) {
}
// DecodeRuneInString is like DecodeRune but its input is a string.
func DecodeRuneInString(s string, i int) (rune, size int) {
func DecodeRuneInString(s string) (rune, size int) {
var short bool;
rune, size, short = decodeRuneInStringInternal(s, i, len(s) - i);
rune, size, short = decodeRuneInStringInternal(s);
return;
}
@ -281,7 +282,7 @@ func RuneCountInString(s string) int {
if s[i] < RuneSelf {
i++;
} else {
rune, size, short := decodeRuneInStringInternal(s, i, ei - i);
rune, size, short := decodeRuneInStringInternal(s[i:ei]);
i += size;
}
}

View File

@ -58,17 +58,17 @@ func TestFullRune(t *testing.T) {
if !utf8.FullRune(b) {
t.Errorf("FullRune(%q) (rune %04x) = false, want true", b, m.rune);
}
s := "xx"+m.str;
if !utf8.FullRuneInString(s, 2) {
t.Errorf("FullRuneInString(%q, 2) (rune %04x) = false, want true", s, m.rune);
s := m.str;
if !utf8.FullRuneInString(s) {
t.Errorf("FullRuneInString(%q) (rune %04x) = false, want true", s, m.rune);
}
b1 := b[0:len(b)-1];
if utf8.FullRune(b1) {
t.Errorf("FullRune(%q) = true, want false", b1);
}
s1 := "xxx"+string(b1);
if utf8.FullRuneInString(s1, 3) {
t.Errorf("FullRune(%q, 3) = true, want false", s1);
s1 := string(b1);
if utf8.FullRuneInString(s1) {
t.Errorf("FullRune(%q) = true, want false", s1);
}
}
}
@ -106,10 +106,10 @@ func TestDecodeRune(t *testing.T) {
if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b));
}
s := "xx"+m.str;
rune, size = utf8.DecodeRuneInString(s, 2);
s := m.str;
rune, size = utf8.DecodeRuneInString(s);
if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q, 2) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b));
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b));
}
// there's an extra byte that bytes left behind - make sure trailing byte works
@ -117,10 +117,10 @@ func TestDecodeRune(t *testing.T) {
if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b));
}
s = "x"+m.str+"\x00";
rune, size = utf8.DecodeRuneInString(s, 1);
s = m.str+"\x00";
rune, size = utf8.DecodeRuneInString(s);
if rune != m.rune || size != len(b) {
t.Errorf("DecodeRuneInString(%q, 1) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b));
t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b));
}
// make sure missing bytes fail
@ -132,10 +132,10 @@ func TestDecodeRune(t *testing.T) {
if rune != RuneError || size != wantsize {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize);
}
s = "xxx"+m.str[0:len(m.str)-1];
rune, size = utf8.DecodeRuneInString(s, 3);
s = m.str[0:len(m.str)-1];
rune, size = utf8.DecodeRuneInString(s);
if rune != RuneError || size != wantsize {
t.Errorf("DecodeRuneInString(%q, 3) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, wantsize);
t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, wantsize);
}
// make sure bad sequences fail
@ -148,10 +148,10 @@ func TestDecodeRune(t *testing.T) {
if rune != RuneError || size != 1 {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, RuneError, 1);
}
s = "xxxx"+string(b);
s = string(b);
rune, size = utf8.DecodeRune(b);
if rune != RuneError || size != 1 {
t.Errorf("DecodeRuneInString(%q, 4) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, 1);
t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, 1);
}
}
}

View File

@ -21,7 +21,7 @@ func main() {
ok := true;
cnum := 0;
for i, c = range s {
rune, size := utf8.DecodeRuneInString(s, i); // check it another way
rune, size := utf8.DecodeRuneInString(s[i:len(s)]); // check it another way
if i != offset {
fmt.Printf("unexpected offset %d not %d\n", i, offset);
ok = false;

View File

@ -23,7 +23,7 @@ func main() {
var l = len(s);
for w, i, j := 0,0,0; i < l; i += w {
var r int;
r, w = utf8.DecodeRuneInString(s, i);
r, w = utf8.DecodeRuneInString(s[i:len(s)]);
if w == 0 { panic("zero width in string") }
if r != chars[j] { panic("wrong value from string") }
j++;