1
0
mirror of https://github.com/golang/go synced 2024-09-30 14:18:32 -06:00

internal/lsp: adding utf16 handling to the span package

Change-Id: Icf8a531c4257e31178beea8f98b755648938fa7a
Reviewed-on: https://go-review.googlesource.com/c/tools/+/166777
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
This commit is contained in:
Ian Cottrell 2019-03-08 13:22:06 -05:00
parent 11955173bd
commit 5c2858a9cf
2 changed files with 132 additions and 0 deletions

71
internal/span/utf16.go Normal file
View File

@ -0,0 +1,71 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package span
import (
"unicode/utf16"
"unicode/utf8"
)
// ToUTF16Column calculates the utf16 column expressed by the point given the
// supplied file contents.
// This is used to convert from the native (always in bytes) column
// representation and the utf16 counts used by some editors.
func ToUTF16Column(offsets Offsets, p Point, content []byte) int {
if content == nil {
return -1
}
// make sure we have a valid offset
p.updateOffset(offsets)
lineOffset := p.Offset - (p.Column - 1)
if lineOffset < 0 || p.Offset > len(content) {
return -1
}
// use the offset to pick out the line start
start := content[lineOffset:]
// now truncate down to the supplied column
start = start[:p.Column]
// and count the number of utf16 characters
// in theory we could do this by hand more efficiently...
return len(utf16.Encode([]rune(string(start))))
}
// FromUTF16Column calculates the byte column expressed by the utf16 character
// offset given the supplied file contents.
// This is used to convert from the utf16 counts used by some editors to the
// native (always in bytes) column representation.
func FromUTF16Column(offsets Offsets, line, chr int, content []byte) Point {
// first build a point for the start of the line the normal way
p := Point{Line: line, Column: 1, Offset: 0}
// now use that to work out the byte offset of the start of the line
p.updateOffset(offsets)
if chr <= 1 {
return p
}
// use that to pick the line out of the file content
remains := content[p.Offset:]
// and now scan forward the specified number of characters
for count := 1; count < chr; count++ {
if len(remains) <= 0 {
return Point{Offset: -1}
}
r, w := utf8.DecodeRune(remains)
if r == '\n' {
return Point{Offset: -1}
}
remains = remains[w:]
if r >= 0x10000 {
// a two point rune
count++
// if we finished in a two point rune, do not advance past the first
if count >= chr {
break
}
}
p.Column += w
p.Offset += w
}
return p
}

View File

@ -0,0 +1,61 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package span_test
import (
"testing"
"golang.org/x/tools/internal/span"
)
// TestUTF16 tests the conversion of column information between the native
// byte offset and the utf16 form.
func TestUTF16(t *testing.T) {
var input = []byte(`
𐐀23456789
1𐐀3456789
12𐐀456789
123𐐀56789
1234𐐀6789
12345𐐀789
123456𐐀89
1234567𐐀9
12345678𐐀
`[1:])
c := span.NewContentConverter("test", input)
for line := 1; line <= 9; line++ {
runeColumn, runeChr := 0, 0
for chr := 1; chr <= 9; chr++ {
switch {
case chr <= line:
runeChr = chr
runeColumn = chr
case chr == line+1:
runeChr = chr - 1
runeColumn = chr - 1
default:
runeChr = chr
runeColumn = chr + 2
}
p := span.Point{Line: line, Column: runeColumn}
// check conversion to utf16 format
gotChr := span.ToUTF16Column(c, p, input)
if runeChr != gotChr {
t.Errorf("ToUTF16Column(%v): expected %v, got %v", p, runeChr, gotChr)
}
// we deliberately delay setting the point's offset
p.Offset = (line-1)*13 + (p.Column - 1)
offset := c.ToOffset(p.Line, p.Column)
if p.Offset != offset {
t.Errorf("ToOffset(%v,%v): expected %v, got %v", p.Line, p.Column, p.Offset, offset)
}
// and check the conversion back
gotPoint := span.FromUTF16Column(c, p.Line, chr, input)
if p != gotPoint {
t.Errorf("FromUTF16Column(%v,%v): expected %v, got %v", p.Line, chr, p, gotPoint)
}
}
}
}