From 5c2858a9cfe5880270ceee8da4391f3c44612b47 Mon Sep 17 00:00:00 2001 From: Ian Cottrell Date: Fri, 8 Mar 2019 13:22:06 -0500 Subject: [PATCH] internal/lsp: adding utf16 handling to the span package Change-Id: Icf8a531c4257e31178beea8f98b755648938fa7a Reviewed-on: https://go-review.googlesource.com/c/tools/+/166777 Reviewed-by: Rebecca Stambler --- internal/span/utf16.go | 71 +++++++++++++++++++++++++++++++++++++ internal/span/utf16_test.go | 61 +++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 internal/span/utf16.go create mode 100644 internal/span/utf16_test.go diff --git a/internal/span/utf16.go b/internal/span/utf16.go new file mode 100644 index 0000000000..94339993a6 --- /dev/null +++ b/internal/span/utf16.go @@ -0,0 +1,71 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package span + +import ( + "unicode/utf16" + "unicode/utf8" +) + +// ToUTF16Column calculates the utf16 column expressed by the point given the +// supplied file contents. +// This is used to convert from the native (always in bytes) column +// representation and the utf16 counts used by some editors. +func ToUTF16Column(offsets Offsets, p Point, content []byte) int { + if content == nil { + return -1 + } + // make sure we have a valid offset + p.updateOffset(offsets) + lineOffset := p.Offset - (p.Column - 1) + if lineOffset < 0 || p.Offset > len(content) { + return -1 + } + // use the offset to pick out the line start + start := content[lineOffset:] + // now truncate down to the supplied column + start = start[:p.Column] + // and count the number of utf16 characters + // in theory we could do this by hand more efficiently... + return len(utf16.Encode([]rune(string(start)))) +} + +// FromUTF16Column calculates the byte column expressed by the utf16 character +// offset given the supplied file contents. +// This is used to convert from the utf16 counts used by some editors to the +// native (always in bytes) column representation. +func FromUTF16Column(offsets Offsets, line, chr int, content []byte) Point { + // first build a point for the start of the line the normal way + p := Point{Line: line, Column: 1, Offset: 0} + // now use that to work out the byte offset of the start of the line + p.updateOffset(offsets) + if chr <= 1 { + return p + } + // use that to pick the line out of the file content + remains := content[p.Offset:] + // and now scan forward the specified number of characters + for count := 1; count < chr; count++ { + if len(remains) <= 0 { + return Point{Offset: -1} + } + r, w := utf8.DecodeRune(remains) + if r == '\n' { + return Point{Offset: -1} + } + remains = remains[w:] + if r >= 0x10000 { + // a two point rune + count++ + // if we finished in a two point rune, do not advance past the first + if count >= chr { + break + } + } + p.Column += w + p.Offset += w + } + return p +} diff --git a/internal/span/utf16_test.go b/internal/span/utf16_test.go new file mode 100644 index 0000000000..007bab77af --- /dev/null +++ b/internal/span/utf16_test.go @@ -0,0 +1,61 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package span_test + +import ( + "testing" + + "golang.org/x/tools/internal/span" +) + +// TestUTF16 tests the conversion of column information between the native +// byte offset and the utf16 form. +func TestUTF16(t *testing.T) { + var input = []byte(` +𐐀23456789 +1𐐀3456789 +12𐐀456789 +123𐐀56789 +1234𐐀6789 +12345𐐀789 +123456𐐀89 +1234567𐐀9 +12345678𐐀 +`[1:]) + c := span.NewContentConverter("test", input) + for line := 1; line <= 9; line++ { + runeColumn, runeChr := 0, 0 + for chr := 1; chr <= 9; chr++ { + switch { + case chr <= line: + runeChr = chr + runeColumn = chr + case chr == line+1: + runeChr = chr - 1 + runeColumn = chr - 1 + default: + runeChr = chr + runeColumn = chr + 2 + } + p := span.Point{Line: line, Column: runeColumn} + // check conversion to utf16 format + gotChr := span.ToUTF16Column(c, p, input) + if runeChr != gotChr { + t.Errorf("ToUTF16Column(%v): expected %v, got %v", p, runeChr, gotChr) + } + // we deliberately delay setting the point's offset + p.Offset = (line-1)*13 + (p.Column - 1) + offset := c.ToOffset(p.Line, p.Column) + if p.Offset != offset { + t.Errorf("ToOffset(%v,%v): expected %v, got %v", p.Line, p.Column, p.Offset, offset) + } + // and check the conversion back + gotPoint := span.FromUTF16Column(c, p.Line, chr, input) + if p != gotPoint { + t.Errorf("FromUTF16Column(%v,%v): expected %v, got %v", p.Line, chr, p, gotPoint) + } + } + } +}