From 7af746645d5165109de0b5cb499980c22812dfc2 Mon Sep 17 00:00:00 2001 From: Paul Jolly Date: Mon, 29 Apr 2019 18:36:30 +0100 Subject: [PATCH] internal/span: fix another off-by-one in ToUTF16Column The current tests contain a bug in the priming of funnyString; the subslicing leaves the resulting content slice with a capacity greater than its length. This allowed a bug ToUTF16Column to sneak through where we were not using 0-based column as the offset within the line. Fix the priming of funnyString, and fix the implementation of ToUTF16Column. Change-Id: I2618878d85bba26f52f99a3fc136ad21fe198dfc Reviewed-on: https://go-review.googlesource.com/c/tools/+/174357 Reviewed-by: Ian Cottrell Run-TryBot: Ian Cottrell --- internal/span/utf16.go | 14 +++++++------- internal/span/utf16_test.go | 4 +--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/internal/span/utf16.go b/internal/span/utf16.go index 97c06c951a..5f90a51b01 100644 --- a/internal/span/utf16.go +++ b/internal/span/utf16.go @@ -24,14 +24,14 @@ func ToUTF16Column(p Point, content []byte) (int, error) { if !p.HasOffset() { return -1, fmt.Errorf("ToUTF16Column: point is missing offset") } - offset := p.Offset() - col := p.Column() - if col == 1 { - // column 1, so it must be chr 1 + offset := p.Offset() // 0-based + colZero := p.Column() - 1 // 0-based + if colZero == 0 { + // 0-based column 0, so it must be chr 1 return 1, nil } // work out the offset at the start of the line using the column - lineOffset := offset - (col - 1) + lineOffset := offset - colZero if lineOffset < 0 || offset > len(content) { return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content)) } @@ -40,10 +40,10 @@ func ToUTF16Column(p Point, content []byte) (int, error) { start := content[lineOffset:] // Now, truncate down to the supplied column. - start = start[:col] + start = start[:colZero] // and count the number of utf16 characters // in theory we could do this by hand more efficiently... - return len(utf16.Encode([]rune(string(start)))), nil + return len(utf16.Encode([]rune(string(start)))) + 1, nil } // FromUTF16Column advances the point by the utf16 character offset given the diff --git a/internal/span/utf16_test.go b/internal/span/utf16_test.go index 32e9568336..2762772cd1 100644 --- a/internal/span/utf16_test.go +++ b/internal/span/utf16_test.go @@ -12,9 +12,7 @@ import ( ) // The funny character below is 4 bytes long in UTF-8; two UTF-16 code points -var funnyString = []byte(` -š€23 -š€45`[1:]) +var funnyString = []byte("š€23\nš€45") var toUTF16Tests = []struct { scenario string