internal/lsp: adding utf16 handling to the span package

Change-Id: Icf8a531c4257e31178beea8f98b755648938fa7a Reviewed-on: https://go-review.googlesource.com/c/tools/+/166777 Reviewed-by: Rebecca Stambler <rstambler@golang.org>
2024-09-30 14:18:32 -06:00 · 2019-03-08 13:22:06 -05:00 · 2019-03-08 13:22:06 -05:00 · 5c2858a9cf
commit 5c2858a9cf
parent 11955173bd
2 changed files with 132 additions and 0 deletions
--- a/internal/span/utf16.go
+++ b/internal/span/utf16.go
@ -0,0 +1,71 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package span
+
+import (
+	"unicode/utf16"
+	"unicode/utf8"
+)
+
+// ToUTF16Column calculates the utf16 column expressed by the point given the
+// supplied file contents.
+// This is used to convert from the native (always in bytes) column
+// representation and the utf16 counts used by some editors.
+func ToUTF16Column(offsets Offsets, p Point, content []byte) int {
+	if content == nil {
+		return -1
+	}
+	// make sure we have a valid offset
+	p.updateOffset(offsets)
+	lineOffset := p.Offset - (p.Column - 1)
+	if lineOffset < 0 || p.Offset > len(content) {
+		return -1
+	}
+	// use the offset to pick out the line start
+	start := content[lineOffset:]
+	// now truncate down to the supplied column
+	start = start[:p.Column]
+	// and count the number of utf16 characters
+	// in theory we could do this by hand more efficiently...
+	return len(utf16.Encode([]rune(string(start))))
+}
+
+// FromUTF16Column calculates the byte column expressed by the utf16 character
+// offset given the supplied file contents.
+// This is used to convert from the utf16 counts used by some editors to the
+// native (always in bytes) column representation.
+func FromUTF16Column(offsets Offsets, line, chr int, content []byte) Point {
+	// first build a point for the start of the line the normal way
+	p := Point{Line: line, Column: 1, Offset: 0}
+	// now use that to work out the byte offset of the start of the line
+	p.updateOffset(offsets)
+	if chr <= 1 {
+		return p
+	}
+	// use that to pick the line out of the file content
+	remains := content[p.Offset:]
+	// and now scan forward the specified number of characters
+	for count := 1; count < chr; count++ {
+		if len(remains) <= 0 {
+			return Point{Offset: -1}
+		}
+		r, w := utf8.DecodeRune(remains)
+		if r == '\n' {
+			return Point{Offset: -1}
+		}
+		remains = remains[w:]
+		if r >= 0x10000 {
+			// a two point rune
+			count++
+			// if we finished in a two point rune, do not advance past the first
+			if count >= chr {
+				break
+			}
+		}
+		p.Column += w
+		p.Offset += w
+	}
+	return p
+}
--- a/internal/span/utf16_test.go
+++ b/internal/span/utf16_test.go
@ -0,0 +1,61 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package span_test
+
+import (
+	"testing"
+
+	"golang.org/x/tools/internal/span"
+)
+
+// TestUTF16 tests the conversion of column information between the native
+// byte offset and the utf16 form.
+func TestUTF16(t *testing.T) {
+	var input = []byte(`
+𐐀23456789
+1𐐀3456789
+12𐐀456789
+123𐐀56789
+1234𐐀6789
+12345𐐀789
+123456𐐀89
+1234567𐐀9
+12345678𐐀
+`[1:])
+	c := span.NewContentConverter("test", input)
+	for line := 1; line <= 9; line++ {
+		runeColumn, runeChr := 0, 0
+		for chr := 1; chr <= 9; chr++ {
+			switch {
+			case chr <= line:
+				runeChr = chr
+				runeColumn = chr
+			case chr == line+1:
+				runeChr = chr - 1
+				runeColumn = chr - 1
+			default:
+				runeChr = chr
+				runeColumn = chr + 2
+			}
+			p := span.Point{Line: line, Column: runeColumn}
+			// check conversion to utf16 format
+			gotChr := span.ToUTF16Column(c, p, input)
+			if runeChr != gotChr {
+				t.Errorf("ToUTF16Column(%v): expected %v, got %v", p, runeChr, gotChr)
+			}
+			// we deliberately delay setting the point's offset
+			p.Offset = (line-1)*13 + (p.Column - 1)
+			offset := c.ToOffset(p.Line, p.Column)
+			if p.Offset != offset {
+				t.Errorf("ToOffset(%v,%v): expected %v, got %v", p.Line, p.Column, p.Offset, offset)
+			}
+			// and check the conversion back
+			gotPoint := span.FromUTF16Column(c, p.Line, chr, input)
+			if p != gotPoint {
+				t.Errorf("FromUTF16Column(%v,%v): expected %v, got %v", p.Line, chr, p, gotPoint)
+			}
+		}
+	}
+}