mirror of
https://github.com/golang/go
synced 2024-11-22 07:24:47 -07:00
utf16: new package
needed for interacting with various legacy interfaces, like Windows and the Mac OS clipboard. R=r CC=golang-dev https://golang.org/cl/595041
This commit is contained in:
parent
67148530d2
commit
c75f891a72
@ -117,6 +117,7 @@ DIRS=\
|
||||
testing/script\
|
||||
time\
|
||||
unicode\
|
||||
utf16\
|
||||
utf8\
|
||||
websocket\
|
||||
xgb\
|
||||
|
11
src/pkg/utf16/Makefile
Normal file
11
src/pkg/utf16/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
# Copyright 2009 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../Make.$(GOARCH)
|
||||
|
||||
TARG=utf16
|
||||
GOFILES=\
|
||||
utf16.go\
|
||||
|
||||
include ../../Make.pkg
|
74
src/pkg/utf16/utf16.go
Normal file
74
src/pkg/utf16/utf16.go
Normal file
@ -0,0 +1,74 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package utf16 implements encoding and decoding of UTF-16 sequences.
|
||||
package utf16
|
||||
|
||||
import "unicode"
|
||||
|
||||
const (
|
||||
// 0xd800-0xdc00 encodes the high 10 bits of a pair.
|
||||
// 0xdc00-0xe000 encodes the low 10 bits of a pair.
|
||||
// the value is those 20 bits plus 0x10000.
|
||||
surr1 = 0xd800
|
||||
surr2 = 0xdc00
|
||||
surr3 = 0xe000
|
||||
|
||||
surrSelf = 0x10000
|
||||
)
|
||||
|
||||
// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
|
||||
func Encode(s []int) []uint16 {
|
||||
n := len(s)
|
||||
for _, v := range s {
|
||||
if v >= surrSelf {
|
||||
n++
|
||||
}
|
||||
}
|
||||
|
||||
a := make([]uint16, n)
|
||||
n = 0
|
||||
for _, v := range s {
|
||||
switch {
|
||||
case v < 0, surr1 <= v && v < surr3, v > unicode.MaxRune:
|
||||
v = unicode.ReplacementChar
|
||||
fallthrough
|
||||
case v < surrSelf:
|
||||
a[n] = uint16(v)
|
||||
n++
|
||||
default:
|
||||
v -= surrSelf
|
||||
a[n] = uint16(surr1 + (v>>10)&0x3ff)
|
||||
a[n+1] = uint16(surr2 + v&0x3ff)
|
||||
n += 2
|
||||
}
|
||||
}
|
||||
return a[0:n]
|
||||
}
|
||||
|
||||
// Decode returns the Unicode code point sequence represented
|
||||
// by the UTF-16 encoding s.
|
||||
func Decode(s []uint16) []int {
|
||||
a := make([]int, len(s))
|
||||
n := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
switch r := s[i]; {
|
||||
case surr1 <= r && r < surr2 && i+1 < len(s) &&
|
||||
surr2 <= s[i+1] && s[i+1] < surr3:
|
||||
// valid surrogate sequence
|
||||
a[n] = (int(r)-surr1)<<10 | (int(s[i+1]) - surr2) + 0x10000
|
||||
i++
|
||||
n++
|
||||
case surr1 <= r && r < surr3:
|
||||
// invalid surrogate sequence
|
||||
a[n] = unicode.ReplacementChar
|
||||
n++
|
||||
default:
|
||||
// normal rune
|
||||
a[n] = int(r)
|
||||
n++
|
||||
}
|
||||
}
|
||||
return a[0:n]
|
||||
}
|
81
src/pkg/utf16/utf16_test.go
Normal file
81
src/pkg/utf16/utf16_test.go
Normal file
@ -0,0 +1,81 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package utf16
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type encodeTest struct {
|
||||
in []int
|
||||
out []uint16
|
||||
}
|
||||
|
||||
var encodeTests = []encodeTest{
|
||||
encodeTest{[]int{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
|
||||
encodeTest{[]int{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
|
||||
[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
|
||||
encodeTest{[]int{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
|
||||
[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
|
||||
}
|
||||
|
||||
func TestEncode(t *testing.T) {
|
||||
for _, tt := range encodeTests {
|
||||
out := Encode(tt.in)
|
||||
if !reflect.DeepEqual(out, tt.out) {
|
||||
t.Errorf("Encode(%v) = %v; want %v", hex(tt.in), hex16(out), hex16(tt.out))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type decodeTest struct {
|
||||
in []uint16
|
||||
out []int
|
||||
}
|
||||
|
||||
var decodeTests = []decodeTest{
|
||||
decodeTest{[]uint16{1, 2, 3, 4}, []int{1, 2, 3, 4}},
|
||||
decodeTest{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
|
||||
[]int{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
|
||||
decodeTest{[]uint16{0xd800, 'a'}, []int{0xfffd, 'a'}},
|
||||
decodeTest{[]uint16{0xdfff}, []int{0xfffd}},
|
||||
}
|
||||
|
||||
func TestDecode(t *testing.T) {
|
||||
for _, tt := range decodeTests {
|
||||
out := Decode(tt.in)
|
||||
if !reflect.DeepEqual(out, tt.out) {
|
||||
t.Errorf("Decode(%v) = %v; want %v", hex16(tt.in), hex(out), hex(tt.out))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type hex []int
|
||||
|
||||
func (h hex) Format(f fmt.State, c int) {
|
||||
fmt.Fprint(f, "[")
|
||||
for i, v := range h {
|
||||
if i > 0 {
|
||||
fmt.Fprint(f, " ")
|
||||
}
|
||||
fmt.Fprintf(f, "%x", v)
|
||||
}
|
||||
fmt.Fprint(f, "]")
|
||||
}
|
||||
|
||||
type hex16 []uint16
|
||||
|
||||
func (h hex16) Format(f fmt.State, c int) {
|
||||
fmt.Fprint(f, "[")
|
||||
for i, v := range h {
|
||||
if i > 0 {
|
||||
fmt.Fprint(f, " ")
|
||||
}
|
||||
fmt.Fprintf(f, "%x", v)
|
||||
}
|
||||
fmt.Fprint(f, "]")
|
||||
}
|
Loading…
Reference in New Issue
Block a user