1
0
mirror of https://github.com/golang/go synced 2024-09-30 16:18:35 -06:00
go/internal/span/utf16.go
Paul Jolly 7af746645d internal/span: fix another off-by-one in ToUTF16Column
The current tests contain a bug in the priming of funnyString; the
subslicing leaves the resulting content slice with a capacity greater
than its length. This allowed a bug ToUTF16Column to sneak through where
we were not using 0-based column as the offset within the line.

Fix the priming of funnyString, and fix the implementation of
ToUTF16Column.

Change-Id: I2618878d85bba26f52f99a3fc136ad21fe198dfc
Reviewed-on: https://go-review.googlesource.com/c/tools/+/174357
Reviewed-by: Ian Cottrell <iancottrell@google.com>
Run-TryBot: Ian Cottrell <iancottrell@google.com>
2019-04-29 18:16:56 +00:00

88 lines
2.8 KiB
Go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package span
import (
"fmt"
"unicode/utf16"
"unicode/utf8"
)
// ToUTF16Column calculates the utf16 column expressed by the point given the
// supplied file contents.
// This is used to convert from the native (always in bytes) column
// representation and the utf16 counts used by some editors.
func ToUTF16Column(p Point, content []byte) (int, error) {
if content == nil {
return -1, fmt.Errorf("ToUTF16Column: missing content")
}
if !p.HasPosition() {
return -1, fmt.Errorf("ToUTF16Column: point is missing position")
}
if !p.HasOffset() {
return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
}
offset := p.Offset() // 0-based
colZero := p.Column() - 1 // 0-based
if colZero == 0 {
// 0-based column 0, so it must be chr 1
return 1, nil
}
// work out the offset at the start of the line using the column
lineOffset := offset - colZero
if lineOffset < 0 || offset > len(content) {
return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
}
// Use the offset to pick out the line start.
// This cannot panic: offset > len(content) and lineOffset < offset.
start := content[lineOffset:]
// Now, truncate down to the supplied column.
start = start[:colZero]
// and count the number of utf16 characters
// in theory we could do this by hand more efficiently...
return len(utf16.Encode([]rune(string(start)))) + 1, nil
}
// FromUTF16Column advances the point by the utf16 character offset given the
// supplied line contents.
// This is used to convert from the utf16 counts used by some editors to the
// native (always in bytes) column representation.
func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
if !p.HasOffset() {
return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
}
// if chr is 1 then no adjustment needed
if chr <= 1 {
return p, nil
}
if p.Offset() >= len(content) {
return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
}
remains := content[p.Offset():]
// scan forward the specified number of characters
for count := 1; count < chr; count++ {
if len(remains) <= 0 {
return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
}
r, w := utf8.DecodeRune(remains)
if r == '\n' {
return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the line")
}
remains = remains[w:]
if r >= 0x10000 {
// a two point rune
count++
// if we finished in a two point rune, do not advance past the first
if count >= chr {
break
}
}
p.v.Column += w
p.v.Offset += w
}
return p, nil
}