1
0
mirror of https://github.com/golang/go synced 2024-11-22 01:54:42 -07:00

change utf8.FullRuneInString and utf8.DecodeRuneInString

to use single string argument instead of string, index.

R=r
DELTA=136  (9 added, 7 deleted, 120 changed)
OCL=28642
CL=28644
This commit is contained in:
Russ Cox 2009-05-11 14:10:34 -07:00
parent 5a11a46e2d
commit 3619f1ea6a
11 changed files with 121 additions and 119 deletions

View File

@ -469,7 +469,7 @@ func (p *pp) doprintf(format string, v reflect.StructValue) {
end := len(format) - 1; end := len(format) - 1;
fieldnum := 0; // we process one field per non-trivial format fieldnum := 0; // we process one field per non-trivial format
for i := 0; i <= end; { for i := 0; i <= end; {
c, w := utf8.DecodeRuneInString(format, i); c, w := utf8.DecodeRuneInString(format[i:len(format)]);
if c != '%' || i == end { if c != '%' || i == end {
p.add(c); p.add(c);
i += w; i += w;
@ -500,7 +500,7 @@ func (p *pp) doprintf(format string, v reflect.StructValue) {
if i < end && format[i] == '.' { if i < end && format[i] == '.' {
p.fmt.prec, p.fmt.prec_present, i = parsenum(format, i+1, end); p.fmt.prec, p.fmt.prec_present, i = parsenum(format, i+1, end);
} }
c, w = utf8.DecodeRuneInString(format, i); c, w = utf8.DecodeRuneInString(format[i:len(format)]);
i += w; i += w;
// percent is special - absorbs no operand // percent is special - absorbs no operand
if c == '%' { if c == '%' {

View File

@ -426,7 +426,7 @@ func (x *ChanType) Visit(v ExprVisitor) { v.DoChanType(x); }
// IsExported returns whether name is an exported Go symbol // IsExported returns whether name is an exported Go symbol
// (i.e., whether it begins with an uppercase letter). // (i.e., whether it begins with an uppercase letter).
func IsExported(name string) bool { func IsExported(name string) bool {
ch, len := utf8.DecodeRuneInString(name, 0); ch, len := utf8.DecodeRuneInString(name);
return unicode.IsUpper(ch); return unicode.IsUpper(ch);
} }

View File

@ -114,7 +114,7 @@ func Unquote(s string) (t string, ok bool) {
w++; w++;
// Coerce to well-formed UTF-8. // Coerce to well-formed UTF-8.
default: default:
rune, size := utf8.DecodeRuneInString(s, r); rune, size := utf8.DecodeRuneInString(s[r:len(s)]);
r += size; r += size;
w += utf8.EncodeRune(rune, b[w:len(b)]); w += utf8.EncodeRune(rune, b[w:len(b)]);
} }

View File

@ -698,7 +698,7 @@ func (p *typeParser) Next() {
return; return;
} }
start := p.index; start := p.index;
c, w := utf8.DecodeRuneInString(p.str, p.index); c, w := utf8.DecodeRuneInString(p.str[p.index:len(p.str)]);
p.index += w; p.index += w;
switch { switch {
case c == '<': case c == '<':

View File

@ -263,7 +263,7 @@ func (p *parser) nextc() int {
if p.pos >= len(p.re.expr) { if p.pos >= len(p.re.expr) {
p.ch = endOfFile p.ch = endOfFile
} else { } else {
c, w := utf8.DecodeRuneInString(p.re.expr, p.pos); c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:len(p.re.expr)]);
p.ch = c; p.ch = c;
p.pos += w; p.pos += w;
} }
@ -653,7 +653,7 @@ func (re *Regexp) doExecute(str string, pos int) []int {
charwidth := 1; charwidth := 1;
c := endOfFile; c := endOfFile;
if pos < len(str) { if pos < len(str) {
c, charwidth = utf8.DecodeRuneInString(str, pos); c, charwidth = utf8.DecodeRuneInString(str[pos:len(str)]);
} }
for i := 0; i < len(s[in]); i++ { for i := 0; i < len(s[in]); i++ {
st := s[in][i]; st := s[in][i];

View File

@ -18,38 +18,38 @@ const lowerhex = "0123456789abcdef"
func Quote(s string) string { func Quote(s string) string {
// TODO(rsc): String accumulation could be more efficient. // TODO(rsc): String accumulation could be more efficient.
t := `"`; t := `"`;
for i := 0; i < len(s); i++ { for ; len(s) > 0; s = s[1:len(s)] {
switch { switch c := s[0]; {
case s[i] == '"': case c == '"':
t += `\"`; t += `\"`;
case s[i] == '\\': case c == '\\':
t += `\\`; t += `\\`;
case ' ' <= s[i] && s[i] <= '~': case ' ' <= c && c <= '~':
t += string(s[i]); t += string(c);
case s[i] == '\a': case c == '\a':
t += `\a`; t += `\a`;
case s[i] == '\b': case c == '\b':
t += `\b`; t += `\b`;
case s[i] == '\f': case c == '\f':
t += `\f`; t += `\f`;
case s[i] == '\n': case c == '\n':
t += `\n`; t += `\n`;
case s[i] == '\r': case c == '\r':
t += `\r`; t += `\r`;
case s[i] == '\t': case c == '\t':
t += `\t`; t += `\t`;
case s[i] == '\v': case c == '\v':
t += `\v`; t += `\v`;
case s[i] < utf8.RuneSelf: case c < utf8.RuneSelf:
t += `\x` + string(lowerhex[s[i]>>4]) + string(lowerhex[s[i]&0xF]); t += `\x` + string(lowerhex[c>>4]) + string(lowerhex[c&0xF]);
case utf8.FullRuneInString(s, i): case utf8.FullRuneInString(s):
r, size := utf8.DecodeRuneInString(s, i); r, size := utf8.DecodeRuneInString(s);
if r == utf8.RuneError && size == 1 { if r == utf8.RuneError && size == 1 {
goto EscX; goto EscX;
} }
i += size-1; // i++ on next iteration s = s[size-1:len(s)]; // next iteration will slice off 1 more
if r < 0x10000 { if r < 0x10000 {
t += `\u`; t += `\u`;
for j:=uint(0); j<4; j++ { for j:=uint(0); j<4; j++ {
@ -65,8 +65,8 @@ func Quote(s string) string {
default: default:
EscX: EscX:
t += `\x`; t += `\x`;
t += string(lowerhex[s[i]>>4]); t += string(lowerhex[c>>4]);
t += string(lowerhex[s[i]&0xF]); t += string(lowerhex[c&0xF]);
} }
} }
t += `"`; t += `"`;
@ -97,42 +97,42 @@ func unhex(b byte) (v int, ok bool) {
return; return;
} }
func unquoteChar(s string, i int, q byte) (t string, ii int, err os.Error) { func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
err = os.EINVAL; // assume error for easy return err = os.EINVAL; // assume error for easy return
// easy cases // easy cases
switch c := s[i]; { switch c := s[0]; {
case c >= utf8.RuneSelf: case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s, i); r, size := utf8.DecodeRuneInString(s);
return s[i:i+size], i+size, nil; return s[0:size], s[size:len(s)], nil;
case c == q: case c == q:
return; return;
case c != '\\': case c != '\\':
return s[i:i+1], i+1, nil; return s[0:1], s[1:len(s)], nil;
} }
// hard case: c is backslash // hard case: c is backslash
if i+1 >= len(s) { if len(s) <= 1 {
return; return;
} }
c := s[i+1]; c := s[1];
i += 2; s = s[2:len(s)];
switch c { switch c {
case 'a': case 'a':
return "\a", i, nil; return "\a", s, nil;
case 'b': case 'b':
return "\b", i, nil; return "\b", s, nil;
case 'f': case 'f':
return "\f", i, nil; return "\f", s, nil;
case 'n': case 'n':
return "\n", i, nil; return "\n", s, nil;
case 'r': case 'r':
return "\r", i, nil; return "\r", s, nil;
case 't': case 't':
return "\t", i, nil; return "\t", s, nil;
case 'v': case 'v':
return "\v", i, nil; return "\v", s, nil;
case 'x', 'u', 'U': case 'x', 'u', 'U':
n := 0; n := 0;
switch c { switch c {
@ -144,43 +144,45 @@ func unquoteChar(s string, i int, q byte) (t string, ii int, err os.Error) {
n = 8; n = 8;
} }
v := 0; v := 0;
for j := 0; j < n; j++ { if len(s) < n {
if i+j >= len(s) {
return; return;
} }
x, ok := unhex(s[i+j]); for j := 0; j < n; j++ {
x, ok := unhex(s[j]);
if !ok { if !ok {
return; return;
} }
v = v<<4 | x; v = v<<4 | x;
} }
s = s[n:len(s)];
if c == 'x' { if c == 'x' {
return string([]byte{byte(v)}), i+n, nil; // single-byte string, possibly not UTF-8
return string([]byte{byte(v)}), s, nil;
} }
if v > utf8.RuneMax { if v > utf8.RuneMax {
return; return;
} }
return string(v), i+n, nil; return string(v), s, nil;
case '0', '1', '2', '3', '4', '5', '6', '7': case '0', '1', '2', '3', '4', '5', '6', '7':
v := 0; v := int(c) - '0';
i--; if len(s) < 2 {
for j := 0; j < 3; j++ {
if i+j >= len(s) {
return; return;
} }
x := int(s[i+j]) - '0'; for j := 0; j < 2; j++ { // one digit already; two more
x := int(s[j]) - '0';
if x < 0 || x > 7 { if x < 0 || x > 7 {
return; return;
} }
v = (v<<3) | x; v = (v<<3) | x;
} }
s = s[2:len(s)];
if v > 255 { if v > 255 {
return; return;
} }
return string(v), i+3, nil; return string(v), s, nil;
case '\\', q: case '\\', q:
return string(c), i, nil; return string(c), s, nil;
} }
return; return;
} }
@ -193,37 +195,35 @@ func unquoteChar(s string, i int, q byte) (t string, ii int, err os.Error) {
func Unquote(s string) (t string, err os.Error) { func Unquote(s string) (t string, err os.Error) {
err = os.EINVAL; // assume error for easy return err = os.EINVAL; // assume error for easy return
n := len(s); n := len(s);
if n < 2 || s[0] != s[n-1] { if n < 2 {
return;
}
quote := s[0];
if quote != s[n-1] {
return;
}
s = s[1:n-1];
if quote == '`' {
return s, nil;
}
if quote != '"' && quote != '\'' {
return; return;
} }
switch s[0] {
case '`':
t := s[1:n-1];
return t, nil;
case '"', '\'':
// TODO(rsc): String accumulation could be more efficient. // TODO(rsc): String accumulation could be more efficient.
t := ""; var c, tt string;
q := s[0]; var err1 os.Error;
var c string; for len(s) > 0 {
var err os.Error; if c, s, err1 = unquoteChar(s, quote); err1 != nil {
for i := 1; i < n-1; { err = err1;
c, i, err = unquoteChar(s, i, q); return;
if err != nil {
return "", err;
} }
t += c; tt += c;
if q == '\'' && i != n-1 { if quote == '\'' && len(s) != 0 {
// single-quoted must be single character // single-quoted must be single character
return; return;
} }
if i > n-1 {
// read too far
return;
} }
} return tt, nil
return t, nil
}
return;
} }

View File

@ -11,12 +11,13 @@ import "utf8"
// Invalid UTF-8 sequences become correct encodings of U+FFF8. // Invalid UTF-8 sequences become correct encodings of U+FFF8.
func Explode(s string) []string { func Explode(s string) []string {
a := make([]string, utf8.RuneCountInString(s)); a := make([]string, utf8.RuneCountInString(s));
j := 0;
var size, rune int; var size, rune int;
for i := 0; i < len(a); i++ { i := 0;
rune, size = utf8.DecodeRuneInString(s, j); for len(s) > 0 {
rune, size = utf8.DecodeRuneInString(s);
s = s[size:len(s)];
a[i] = string(rune); a[i] = string(rune);
j += size; i++;
} }
return a return a
} }

View File

@ -108,11 +108,12 @@ func decodeRuneInternal(p []byte) (rune, size int, short bool) {
return RuneError, 1, false return RuneError, 1, false
} }
func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short bool) { func decodeRuneInStringInternal(s string) (rune, size int, short bool) {
n := len(s);
if n < 1 { if n < 1 {
return RuneError, 0, true; return RuneError, 0, true;
} }
c0 := s[i]; c0 := s[0];
// 1-byte, 7-bit sequence? // 1-byte, 7-bit sequence?
if c0 < _Tx { if c0 < _Tx {
@ -128,7 +129,7 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
if n < 2 { if n < 2 {
return RuneError, 1, true return RuneError, 1, true
} }
c1 := s[i+1]; c1 := s[1];
if c1 < _Tx || _T2 <= c1 { if c1 < _Tx || _T2 <= c1 {
return RuneError, 1, false return RuneError, 1, false
} }
@ -146,7 +147,7 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
if n < 3 { if n < 3 {
return RuneError, 1, true return RuneError, 1, true
} }
c2 := s[i+2]; c2 := s[2];
if c2 < _Tx || _T2 <= c2 { if c2 < _Tx || _T2 <= c2 {
return RuneError, 1, false return RuneError, 1, false
} }
@ -164,7 +165,7 @@ func decodeRuneInStringInternal(s string, i int, n int) (rune, size int, short b
if n < 4 { if n < 4 {
return RuneError, 1, true return RuneError, 1, true
} }
c3 := s[i+3]; c3 := s[3];
if c3 < _Tx || _T2 <= c3 { if c3 < _Tx || _T2 <= c3 {
return RuneError, 1, false return RuneError, 1, false
} }
@ -190,8 +191,8 @@ func FullRune(p []byte) bool {
} }
// FullRuneInString is like FullRune but its input is a string. // FullRuneInString is like FullRune but its input is a string.
func FullRuneInString(s string, i int) bool { func FullRuneInString(s string) bool {
rune, size, short := decodeRuneInStringInternal(s, i, len(s) - i); rune, size, short := decodeRuneInStringInternal(s);
return !short return !short
} }
@ -203,9 +204,9 @@ func DecodeRune(p []byte) (rune, size int) {
} }
// DecodeRuneInString is like DecodeRune but its input is a string. // DecodeRuneInString is like DecodeRune but its input is a string.
func DecodeRuneInString(s string, i int) (rune, size int) { func DecodeRuneInString(s string) (rune, size int) {
var short bool; var short bool;
rune, size, short = decodeRuneInStringInternal(s, i, len(s) - i); rune, size, short = decodeRuneInStringInternal(s);
return; return;
} }
@ -281,7 +282,7 @@ func RuneCountInString(s string) int {
if s[i] < RuneSelf { if s[i] < RuneSelf {
i++; i++;
} else { } else {
rune, size, short := decodeRuneInStringInternal(s, i, ei - i); rune, size, short := decodeRuneInStringInternal(s[i:ei]);
i += size; i += size;
} }
} }

View File

@ -58,17 +58,17 @@ func TestFullRune(t *testing.T) {
if !utf8.FullRune(b) { if !utf8.FullRune(b) {
t.Errorf("FullRune(%q) (rune %04x) = false, want true", b, m.rune); t.Errorf("FullRune(%q) (rune %04x) = false, want true", b, m.rune);
} }
s := "xx"+m.str; s := m.str;
if !utf8.FullRuneInString(s, 2) { if !utf8.FullRuneInString(s) {
t.Errorf("FullRuneInString(%q, 2) (rune %04x) = false, want true", s, m.rune); t.Errorf("FullRuneInString(%q) (rune %04x) = false, want true", s, m.rune);
} }
b1 := b[0:len(b)-1]; b1 := b[0:len(b)-1];
if utf8.FullRune(b1) { if utf8.FullRune(b1) {
t.Errorf("FullRune(%q) = true, want false", b1); t.Errorf("FullRune(%q) = true, want false", b1);
} }
s1 := "xxx"+string(b1); s1 := string(b1);
if utf8.FullRuneInString(s1, 3) { if utf8.FullRuneInString(s1) {
t.Errorf("FullRune(%q, 3) = true, want false", s1); t.Errorf("FullRune(%q) = true, want false", s1);
} }
} }
} }
@ -106,10 +106,10 @@ func TestDecodeRune(t *testing.T) {
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b)); t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b));
} }
s := "xx"+m.str; s := m.str;
rune, size = utf8.DecodeRuneInString(s, 2); rune, size = utf8.DecodeRuneInString(s);
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q, 2) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b)); t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b));
} }
// there's an extra byte that bytes left behind - make sure trailing byte works // there's an extra byte that bytes left behind - make sure trailing byte works
@ -117,10 +117,10 @@ func TestDecodeRune(t *testing.T) {
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b)); t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b));
} }
s = "x"+m.str+"\x00"; s = m.str+"\x00";
rune, size = utf8.DecodeRuneInString(s, 1); rune, size = utf8.DecodeRuneInString(s);
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRuneInString(%q, 1) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b)); t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b));
} }
// make sure missing bytes fail // make sure missing bytes fail
@ -132,10 +132,10 @@ func TestDecodeRune(t *testing.T) {
if rune != RuneError || size != wantsize { if rune != RuneError || size != wantsize {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize); t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize);
} }
s = "xxx"+m.str[0:len(m.str)-1]; s = m.str[0:len(m.str)-1];
rune, size = utf8.DecodeRuneInString(s, 3); rune, size = utf8.DecodeRuneInString(s);
if rune != RuneError || size != wantsize { if rune != RuneError || size != wantsize {
t.Errorf("DecodeRuneInString(%q, 3) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, wantsize); t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, wantsize);
} }
// make sure bad sequences fail // make sure bad sequences fail
@ -148,10 +148,10 @@ func TestDecodeRune(t *testing.T) {
if rune != RuneError || size != 1 { if rune != RuneError || size != 1 {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, RuneError, 1); t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, RuneError, 1);
} }
s = "xxxx"+string(b); s = string(b);
rune, size = utf8.DecodeRune(b); rune, size = utf8.DecodeRune(b);
if rune != RuneError || size != 1 { if rune != RuneError || size != 1 {
t.Errorf("DecodeRuneInString(%q, 4) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, 1); t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, 1);
} }
} }
} }

View File

@ -21,7 +21,7 @@ func main() {
ok := true; ok := true;
cnum := 0; cnum := 0;
for i, c = range s { for i, c = range s {
rune, size := utf8.DecodeRuneInString(s, i); // check it another way rune, size := utf8.DecodeRuneInString(s[i:len(s)]); // check it another way
if i != offset { if i != offset {
fmt.Printf("unexpected offset %d not %d\n", i, offset); fmt.Printf("unexpected offset %d not %d\n", i, offset);
ok = false; ok = false;

View File

@ -23,7 +23,7 @@ func main() {
var l = len(s); var l = len(s);
for w, i, j := 0,0,0; i < l; i += w { for w, i, j := 0,0,0; i < l; i += w {
var r int; var r int;
r, w = utf8.DecodeRuneInString(s, i); r, w = utf8.DecodeRuneInString(s[i:len(s)]);
if w == 0 { panic("zero width in string") } if w == 0 { panic("zero width in string") }
if r != chars[j] { panic("wrong value from string") } if r != chars[j] { panic("wrong value from string") }
j++; j++;