mirror of
https://github.com/golang/go
synced 2024-11-18 11:04:42 -07:00
internal/lsp: adding utf16 handling to the span package
Change-Id: Icf8a531c4257e31178beea8f98b755648938fa7a Reviewed-on: https://go-review.googlesource.com/c/tools/+/166777 Reviewed-by: Rebecca Stambler <rstambler@golang.org>
This commit is contained in:
parent
11955173bd
commit
5c2858a9cf
71
internal/span/utf16.go
Normal file
71
internal/span/utf16.go
Normal file
@ -0,0 +1,71 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package span
|
||||
|
||||
import (
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// ToUTF16Column calculates the utf16 column expressed by the point given the
|
||||
// supplied file contents.
|
||||
// This is used to convert from the native (always in bytes) column
|
||||
// representation and the utf16 counts used by some editors.
|
||||
func ToUTF16Column(offsets Offsets, p Point, content []byte) int {
|
||||
if content == nil {
|
||||
return -1
|
||||
}
|
||||
// make sure we have a valid offset
|
||||
p.updateOffset(offsets)
|
||||
lineOffset := p.Offset - (p.Column - 1)
|
||||
if lineOffset < 0 || p.Offset > len(content) {
|
||||
return -1
|
||||
}
|
||||
// use the offset to pick out the line start
|
||||
start := content[lineOffset:]
|
||||
// now truncate down to the supplied column
|
||||
start = start[:p.Column]
|
||||
// and count the number of utf16 characters
|
||||
// in theory we could do this by hand more efficiently...
|
||||
return len(utf16.Encode([]rune(string(start))))
|
||||
}
|
||||
|
||||
// FromUTF16Column calculates the byte column expressed by the utf16 character
|
||||
// offset given the supplied file contents.
|
||||
// This is used to convert from the utf16 counts used by some editors to the
|
||||
// native (always in bytes) column representation.
|
||||
func FromUTF16Column(offsets Offsets, line, chr int, content []byte) Point {
|
||||
// first build a point for the start of the line the normal way
|
||||
p := Point{Line: line, Column: 1, Offset: 0}
|
||||
// now use that to work out the byte offset of the start of the line
|
||||
p.updateOffset(offsets)
|
||||
if chr <= 1 {
|
||||
return p
|
||||
}
|
||||
// use that to pick the line out of the file content
|
||||
remains := content[p.Offset:]
|
||||
// and now scan forward the specified number of characters
|
||||
for count := 1; count < chr; count++ {
|
||||
if len(remains) <= 0 {
|
||||
return Point{Offset: -1}
|
||||
}
|
||||
r, w := utf8.DecodeRune(remains)
|
||||
if r == '\n' {
|
||||
return Point{Offset: -1}
|
||||
}
|
||||
remains = remains[w:]
|
||||
if r >= 0x10000 {
|
||||
// a two point rune
|
||||
count++
|
||||
// if we finished in a two point rune, do not advance past the first
|
||||
if count >= chr {
|
||||
break
|
||||
}
|
||||
}
|
||||
p.Column += w
|
||||
p.Offset += w
|
||||
}
|
||||
return p
|
||||
}
|
61
internal/span/utf16_test.go
Normal file
61
internal/span/utf16_test.go
Normal file
@ -0,0 +1,61 @@
|
||||
// Copyright 2019 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package span_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/tools/internal/span"
|
||||
)
|
||||
|
||||
// TestUTF16 tests the conversion of column information between the native
|
||||
// byte offset and the utf16 form.
|
||||
func TestUTF16(t *testing.T) {
|
||||
var input = []byte(`
|
||||
𐐀23456789
|
||||
1𐐀3456789
|
||||
12𐐀456789
|
||||
123𐐀56789
|
||||
1234𐐀6789
|
||||
12345𐐀789
|
||||
123456𐐀89
|
||||
1234567𐐀9
|
||||
12345678𐐀
|
||||
`[1:])
|
||||
c := span.NewContentConverter("test", input)
|
||||
for line := 1; line <= 9; line++ {
|
||||
runeColumn, runeChr := 0, 0
|
||||
for chr := 1; chr <= 9; chr++ {
|
||||
switch {
|
||||
case chr <= line:
|
||||
runeChr = chr
|
||||
runeColumn = chr
|
||||
case chr == line+1:
|
||||
runeChr = chr - 1
|
||||
runeColumn = chr - 1
|
||||
default:
|
||||
runeChr = chr
|
||||
runeColumn = chr + 2
|
||||
}
|
||||
p := span.Point{Line: line, Column: runeColumn}
|
||||
// check conversion to utf16 format
|
||||
gotChr := span.ToUTF16Column(c, p, input)
|
||||
if runeChr != gotChr {
|
||||
t.Errorf("ToUTF16Column(%v): expected %v, got %v", p, runeChr, gotChr)
|
||||
}
|
||||
// we deliberately delay setting the point's offset
|
||||
p.Offset = (line-1)*13 + (p.Column - 1)
|
||||
offset := c.ToOffset(p.Line, p.Column)
|
||||
if p.Offset != offset {
|
||||
t.Errorf("ToOffset(%v,%v): expected %v, got %v", p.Line, p.Column, p.Offset, offset)
|
||||
}
|
||||
// and check the conversion back
|
||||
gotPoint := span.FromUTF16Column(c, p.Line, chr, input)
|
||||
if p != gotPoint {
|
||||
t.Errorf("FromUTF16Column(%v,%v): expected %v, got %v", p.Line, chr, p, gotPoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user