mirror of
https://github.com/golang/go
synced 2024-11-08 18:46:16 -07:00
c48b77b1b5
Surrogate halves are part of UTF-16 and should never appear in UTF-8. (The rune that two combined halves represent in UTF-16 should be encoded directly.) Encoding: encode as RuneError. Decoding: convert to RuneError, consume one byte. This requires changing: package unicode/utf8 runtime for range over string Also added utf8.ValidRune and fixed bug in utf.RuneLen. Fixes #3927. R=golang-dev, rsc, bsiegert CC=golang-dev https://golang.org/cl/6458099
72 lines
1.5 KiB
Go
72 lines
1.5 KiB
Go
// run
|
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Test range over strings.
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
func main() {
|
|
s := "\000\123\x00\xca\xFE\u0123\ubabe\U0000babe\U0010FFFFx"
|
|
expect := []rune{0, 0123, 0, 0xFFFD, 0xFFFD, 0x123, 0xbabe, 0xbabe, 0x10FFFF, 'x'}
|
|
offset := 0
|
|
var i int
|
|
var c rune
|
|
ok := true
|
|
cnum := 0
|
|
for i, c = range s {
|
|
r, size := utf8.DecodeRuneInString(s[i:len(s)]) // check it another way
|
|
if i != offset {
|
|
fmt.Printf("unexpected offset %d not %d\n", i, offset)
|
|
ok = false
|
|
}
|
|
if r != expect[cnum] {
|
|
fmt.Printf("unexpected rune %d from DecodeRuneInString: %x not %x\n", i, r, expect[cnum])
|
|
ok = false
|
|
}
|
|
if c != expect[cnum] {
|
|
fmt.Printf("unexpected rune %d from range: %x not %x\n", i, r, expect[cnum])
|
|
ok = false
|
|
}
|
|
offset += size
|
|
cnum++
|
|
}
|
|
if i != len(s)-1 {
|
|
fmt.Println("after loop i is", i, "not", len(s)-1)
|
|
ok = false
|
|
}
|
|
|
|
i = 12345
|
|
c = 23456
|
|
for i, c = range "" {
|
|
}
|
|
if i != 12345 {
|
|
fmt.Println("range empty string assigned to index:", i)
|
|
ok = false
|
|
}
|
|
if c != 23456 {
|
|
fmt.Println("range empty string assigned to value:", c)
|
|
ok = false
|
|
}
|
|
|
|
for _, c := range "a\xed\xa0\x80a" {
|
|
if c != 'a' && c != utf8.RuneError {
|
|
fmt.Printf("surrogate UTF-8 does not error: %U\n", c)
|
|
ok = false
|
|
}
|
|
}
|
|
|
|
if !ok {
|
|
fmt.Println("BUG: stringrange")
|
|
os.Exit(1)
|
|
}
|
|
}
|