diff --git a/internal/lsp/completion.go b/internal/lsp/completion.go index affa8da40b..ecd3968f1e 100644 --- a/internal/lsp/completion.go +++ b/internal/lsp/completion.go @@ -30,7 +30,7 @@ func (s *Server) completion(ctx context.Context, params *protocol.CompletionPara if err != nil { return nil, err } - items, surrounding, err := source.Completion(ctx, view, f, rng.Start, source.CompletionOptions{ + candidates, surrounding, err := source.Completion(ctx, view, f, rng.Start, source.CompletionOptions{ DeepComplete: s.useDeepCompletions, }) if err != nil { @@ -58,7 +58,7 @@ func (s *Server) completion(ctx context.Context, params *protocol.CompletionPara } return &protocol.CompletionList{ IsIncomplete: false, - Items: toProtocolCompletionItems(items, prefix, insertionRng, s.insertTextFormat, s.usePlaceholders, s.useDeepCompletions), + Items: toProtocolCompletionItems(candidates, prefix, insertionRng, s.insertTextFormat, s.usePlaceholders, s.useDeepCompletions), }, nil } diff --git a/internal/lsp/fuzzy/input.go b/internal/lsp/fuzzy/input.go new file mode 100644 index 0000000000..b0e0ab2cef --- /dev/null +++ b/internal/lsp/fuzzy/input.go @@ -0,0 +1,185 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzzy + +import ( + "unicode" +) + +// Input specifies the type of the input. This influences how the runes are interpreted wrt to +// segmenting the input. +type Input int + +const ( + // Text represents a text input type. Input is not segmented. + Text Input = iota + // Filename represents a filepath input type with '/' segment delimitors. + Filename + // Symbol represents a symbol input type with '.' and ':' segment delimitors. + Symbol +) + +// RuneRole specifies the role of a rune in the context of an input. +type RuneRole byte + +const ( + // RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII). + RNone RuneRole = iota + // RSep specifies a rune with the role of segment separator. + RSep + // RTail specifies a rune which is a lower-case tail in a word in the input. + RTail + // RUCTail specifies a rune which is an upper-case tail in a word in the input. + RUCTail + // RHead specifies a rune which is the first character in a word in the input. + RHead +) + +// RuneRoles detects the roles of each byte rune in an input string and stores it in the output +// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string +// or when it filled the output. If output is nil, then it gets created. +func RuneRoles(str string, input Input, reuse []RuneRole) []RuneRole { + var output []RuneRole + if cap(reuse) < len(str) { + output = make([]RuneRole, 0, len(str)) + } else { + output = reuse[:0] + } + + prev, prev2 := rtNone, rtNone + for i := 0; i < len(str); i++ { + r := rune(str[i]) + + role := RNone + + curr := rtLower + if str[i] <= unicode.MaxASCII { + curr = runeType(rt[str[i]] - '0') + } + + if curr == rtLower { + if prev == rtNone || prev == rtPunct { + role = RHead + } else { + role = RTail + } + } else if curr == rtUpper { + role = RHead + + if prev == rtUpper { + // This and previous characters are both upper case. + + if i+1 == len(str) { + // This is last character, previous was also uppercase -> this is UCTail + // i.e., (current char is C): aBC / BC / ABC + role = RUCTail + } + } + } else if curr == rtPunct { + switch { + case input == Filename && r == '/': + role = RSep + case input == Symbol && r == '.': + role = RSep + case input == Symbol && r == ':': + role = RSep + } + } + if curr != rtLower { + if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) { + // The previous two characters were uppercase. The current one is not a lower case, so the + // previous one can't be a HEAD. Make it a UCTail. + // i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB. + output[i-1] = RUCTail + } + } + + output = append(output, role) + prev2 = prev + prev = curr + } + return output +} + +type runeType byte + +const ( + rtNone runeType = iota + rtPunct + rtLower + rtUpper +) + +const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000" + +// LastSegment returns the substring representing the last segment from the input, where each +// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol +// or Filename type. +func LastSegment(input string, roles []RuneRole) string { + // Exclude ending separators. + end := len(input) - 1 + for end >= 0 && roles[end] == RSep { + end-- + } + if end < 0 { + return "" + } + + start := end - 1 + for start >= 0 && roles[start] != RSep { + start-- + } + + return input[start+1 : end+1] +} + +// ToLower transforms the input string to lower case, which is stored in the output byte slice. +// The lower casing considers only ASCII values - non ASCII values are left unmodified. +// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets +// created. +func ToLower(input string, reuse []byte) []byte { + output := reuse + if cap(reuse) < len(input) { + output = make([]byte, len(input)) + } + + for i := 0; i < len(input); i++ { + r := rune(input[i]) + if r <= unicode.MaxASCII { + if 'A' <= r && r <= 'Z' { + r += 'a' - 'A' + } + } + output[i] = byte(r) + } + return output[:len(input)] +} + +// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input +// (start is inclusive, end is exclusive). +type WordConsumer func(start, end int) + +// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset +// delimiters for each word are fed to the provided consumer function. +func Words(roles []RuneRole, consume WordConsumer) { + var wordStart int + for i, r := range roles { + switch r { + case RUCTail, RTail: + case RHead, RNone, RSep: + if i != wordStart { + consume(wordStart, i) + } + wordStart = i + if r != RHead { + // Skip this character. + wordStart = i + 1 + } + } + } + if wordStart != len(roles) { + consume(wordStart, len(roles)) + } +} diff --git a/internal/lsp/fuzzy/input_test.go b/internal/lsp/fuzzy/input_test.go new file mode 100644 index 0000000000..351ac9bb4a --- /dev/null +++ b/internal/lsp/fuzzy/input_test.go @@ -0,0 +1,186 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzzy_test + +import ( + "bytes" + "sort" + "testing" + + "golang.org/x/tools/internal/lsp/fuzzy" +) + +var rolesTests = []struct { + str string + input fuzzy.Input + want string +}{ + {str: "abc", want: "Ccc", input: fuzzy.Text}, + {str: ".abc", want: " Ccc", input: fuzzy.Text}, + {str: "abc def", want: "Ccc Ccc", input: fuzzy.Text}, + {str: "SWT MyID", want: "Cuu CcCu", input: fuzzy.Text}, + {str: "ID", want: "Cu", input: fuzzy.Text}, + {str: "IDD", want: "Cuu", input: fuzzy.Text}, + {str: " ID ", want: " Cu ", input: fuzzy.Text}, + {str: "IDSome", want: "CuCccc", input: fuzzy.Text}, + {str: "0123456789", want: "Cccccccccc", input: fuzzy.Text}, + {str: "abcdefghigklmnopqrstuvwxyz", want: "Cccccccccccccccccccccccccc", input: fuzzy.Text}, + {str: "ABCDEFGHIGKLMNOPQRSTUVWXYZ", want: "Cuuuuuuuuuuuuuuuuuuuuuuuuu", input: fuzzy.Text}, + {str: "こんにちは", want: "Ccccccccccccccc", input: fuzzy.Text}, // We don't parse unicode + {str: ":/.", want: " ", input: fuzzy.Text}, + + // Filenames + {str: "abc/def", want: "Ccc/Ccc", input: fuzzy.Filename}, + {str: " abc_def", want: " Ccc Ccc", input: fuzzy.Filename}, + {str: " abc_DDf", want: " Ccc CCc", input: fuzzy.Filename}, + {str: ":.", want: " ", input: fuzzy.Filename}, + + // Symbols + {str: "abc::def::goo", want: "Ccc//Ccc//Ccc", input: fuzzy.Symbol}, + {str: "proto::Message", want: "Ccccc//Ccccccc", input: fuzzy.Symbol}, + {str: "AbstractSWTFactory", want: "CcccccccCuuCcccccc", input: fuzzy.Symbol}, + {str: "Abs012", want: "Cccccc", input: fuzzy.Symbol}, + {str: "/", want: " ", input: fuzzy.Symbol}, + {str: "fOO", want: "CCu", input: fuzzy.Symbol}, + {str: "fo_oo.o_oo", want: "Cc Cc/C Cc", input: fuzzy.Symbol}, +} + +func rolesString(roles []fuzzy.RuneRole) string { + var buf bytes.Buffer + for _, r := range roles { + buf.WriteByte(" /cuC"[int(r)]) + } + return buf.String() +} + +func TestRoles(t *testing.T) { + for _, tc := range rolesTests { + gotRoles := make([]fuzzy.RuneRole, len(tc.str)) + fuzzy.RuneRoles(tc.str, tc.input, gotRoles) + got := rolesString(gotRoles) + if got != tc.want { + t.Errorf("roles(%s) = %v; want %v", tc.str, got, tc.want) + } + } +} + +func words(strWords ...string) [][]byte { + var ret [][]byte + for _, w := range strWords { + ret = append(ret, []byte(w)) + } + return ret +} + +var wordSplitTests = []struct { + input string + want []string +}{ + { + input: "foo bar baz", + want: []string{"foo", "bar", "baz"}, + }, + { + input: "fooBarBaz", + want: []string{"foo", "Bar", "Baz"}, + }, + { + input: "FOOBarBAZ", + want: []string{"FOO", "Bar", "BAZ"}, + }, + { + input: "foo123_bar2Baz3", + want: []string{"foo123", "bar2", "Baz3"}, + }, +} + +func TestWordSplit(t *testing.T) { + for _, tc := range wordSplitTests { + roles := fuzzy.RuneRoles(tc.input, fuzzy.Symbol, nil) + + var got []string + consumer := func(i, j int) { + got = append(got, tc.input[i:j]) + } + fuzzy.Words(roles, consumer) + + if eq := diffStringLists(tc.want, got); !eq { + t.Errorf("input %v: (want %v -> got %v)", tc.input, tc.want, got) + } + } +} + +func diffStringLists(a, b []string) bool { + if len(a) != len(b) { + return false + } + sort.Strings(a) + sort.Strings(b) + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +var lastSegmentSplitTests = []struct { + str string + input fuzzy.Input + want string +}{ + { + str: "identifier", + input: fuzzy.Symbol, + want: "identifier", + }, + { + str: "two_words", + input: fuzzy.Symbol, + want: "two_words", + }, + { + str: "first::second", + input: fuzzy.Symbol, + want: "second", + }, + { + str: "foo.bar.FOOBar_buz123_test", + input: fuzzy.Symbol, + want: "FOOBar_buz123_test", + }, + { + str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go", + input: fuzzy.Filename, + want: "fuzzy_matcher.go", + }, + { + str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go", + input: fuzzy.Text, + want: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go", + }, +} + +func TestLastSegment(t *testing.T) { + for _, tc := range lastSegmentSplitTests { + roles := fuzzy.RuneRoles(tc.str, tc.input, nil) + + got := fuzzy.LastSegment(tc.str, roles) + + if got != tc.want { + t.Errorf("str %v: want %v; got %v", tc.str, tc.want, got) + } + } +} + +func BenchmarkRoles(b *testing.B) { + str := "AbstractSWTFactory" + out := make([]fuzzy.RuneRole, len(str)) + + for i := 0; i < b.N; i++ { + fuzzy.RuneRoles(str, fuzzy.Symbol, out) + } + b.SetBytes(int64(len(str))) +} diff --git a/internal/lsp/fuzzy/matcher.go b/internal/lsp/fuzzy/matcher.go new file mode 100644 index 0000000000..a38a6b4057 --- /dev/null +++ b/internal/lsp/fuzzy/matcher.go @@ -0,0 +1,437 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fuzzy implements a fuzzy matching algorithm. +package fuzzy + +import ( + "bytes" + "fmt" +) + +const ( + // MaxInputSize is the maximum size of the input scored against the fuzzy matcher. Longer inputs + // will be truncated to this size. + MaxInputSize = 127 + // MaxPatternSize is the maximum size of the pattern used to construct the fuzzy matcher. Longer + // inputs are truncated to this size. + MaxPatternSize = 63 +) + +type scoreVal int + +func (s scoreVal) val() int { + return int(s) >> 1 +} + +func (s scoreVal) prevK() int { + return int(s) & 1 +} + +func score(val int, prevK int /*0 or 1*/) scoreVal { + return scoreVal(val<<1 + prevK) +} + +// Matcher implements a fuzzy matching algorithm for scoring candidates against a pattern. +// The matcher does not support parallel usage. +type Matcher struct { + input Input + + pattern string + patternLower []byte // lower-case version of the pattern + patternShort []byte // first characters of the pattern + caseSensitive bool // set if the pattern is mix-cased + + patternRoles []RuneRole // the role of each character in the pattern + roles []RuneRole // the role of each character in the tested string + + scores [MaxInputSize + 1][MaxPatternSize + 1][2]scoreVal + + scoreScale float32 + + lastCandidateLen int // in bytes + lastCandidateMatched bool + + // Here we save the last candidate in lower-case. This is basically a byte slice we reuse for + // performance reasons, so the slice is not reallocated for every candidate. + lowerBuf [MaxInputSize]byte + rolesBuf [MaxInputSize]RuneRole +} + +func (m *Matcher) bestK(i, j int) int { + if m.scores[i][j][0].val() < m.scores[i][j][1].val() { + return 1 + } + return 0 +} + +// NewMatcher returns a new fuzzy matcher for scoring candidates against the provided pattern. +func NewMatcher(pattern string, input Input) *Matcher { + if len(pattern) > MaxPatternSize { + pattern = pattern[:MaxPatternSize] + } + + m := &Matcher{ + input: input, + pattern: pattern, + patternLower: ToLower(pattern, nil), + } + + for i, c := range m.patternLower { + if pattern[i] != c { + m.caseSensitive = true + break + } + } + + if len(pattern) > 3 { + m.patternShort = m.patternLower[:3] + } else { + m.patternShort = m.patternLower + } + + m.patternRoles = RuneRoles(pattern, input, nil) + + if len(pattern) > 0 { + maxCharScore := 4 + if input == Text { + maxCharScore = 6 + } + m.scoreScale = 1 / float32(maxCharScore*len(pattern)) + } + + return m +} + +// SetInput updates the input type for subsequent scoring attempts. +func (m *Matcher) SetInput(input Input) { + if m.input == input { + return + } + m.input = input + m.patternRoles = RuneRoles(m.pattern, input, m.patternRoles) +} + +// Score returns the score returned by matching the candidate to the pattern. +// This is not designed for parallel use. Multiple candidates must be scored sequentally. +// Returns a score between 0 and 1 (0 - no match, 1 - perfect match). +func (m *Matcher) Score(candidate string) float32 { + if len(candidate) > MaxInputSize { + candidate = candidate[:MaxInputSize] + } + lower := ToLower(candidate, m.lowerBuf[:]) + m.lastCandidateLen = len(candidate) + + if len(m.pattern) == 0 { + // Empty patterns perfectly match candidates. + return 1 + } + + if m.match(candidate, lower) { + sc := m.computeScore(candidate, lower) + if sc > minScore/2 && !m.poorMatch() { + m.lastCandidateMatched = true + if len(m.pattern) == len(candidate) { + // Perfect match. + return 1 + } + + if sc < 0 { + sc = 0 + } + normalizedScore := float32(sc) * m.scoreScale + if normalizedScore > 1 { + normalizedScore = 1 + } + + return normalizedScore + } + } + + m.lastCandidateMatched = false + return -1 +} + +const minScore = -10000 + +// MatchedRanges returns matches ranges for the last scored string as a flattened array of +// [begin, end) byte offset pairs. +func (m *Matcher) MatchedRanges() []int { + if len(m.pattern) == 0 || !m.lastCandidateMatched { + return nil + } + i, j := m.lastCandidateLen, len(m.pattern) + if m.scores[i][j][0].val() < minScore/2 && m.scores[i][j][1].val() < minScore/2 { + return nil + } + + var ret []int + k := m.bestK(i, j) + for i > 0 { + take := (k == 1) + k = m.scores[i][j][k].prevK() + if take { + if len(ret) == 0 || ret[len(ret)-1] != i { + ret = append(ret, i) + ret = append(ret, i-1) + } else { + ret[len(ret)-1] = i - 1 + } + j-- + } + i-- + } + // Reverse slice. + for i := 0; i < len(ret)/2; i++ { + ret[i], ret[len(ret)-1-i] = ret[len(ret)-1-i], ret[i] + } + return ret +} + +func (m *Matcher) match(candidate string, candidateLower []byte) bool { + i, j := 0, 0 + for ; i < len(candidateLower) && j < len(m.patternLower); i++ { + if candidateLower[i] == m.patternLower[j] { + j++ + } + } + if j != len(m.patternLower) { + return false + } + + // The input passes the simple test against pattern, so it is time to classify its characters. + // Character roles are used below to find the last segment. + m.roles = RuneRoles(candidate, m.input, m.rolesBuf[:]) + if m.input != Text { + sep := len(candidateLower) - 1 + for sep >= i && m.roles[sep] != RSep { + sep-- + } + if sep >= i { + // We are not in the last segment, check that we have at least one character match in the last + // segment of the candidate. + return bytes.IndexByte(candidateLower[sep:], m.patternLower[len(m.pattern)-1]) != -1 + } + } + return true +} + +func (m *Matcher) computeScore(candidate string, candidateLower []byte) int { + pattLen, candLen := len(m.pattern), len(candidate) + + for j := 0; j <= len(m.pattern); j++ { + m.scores[0][j][0] = minScore << 1 + m.scores[0][j][1] = minScore << 1 + } + m.scores[0][0][0] = score(0, 0) // Start with 0. + + segmentsLeft, lastSegStart := 1, 0 + for i := 0; i < candLen; i++ { + if m.roles[i] == RSep { + segmentsLeft++ + lastSegStart = i + 1 + } + } + + // A per-character bonus for a consecutive match. + consecutiveBonus := 2 + if m.input == Text { + // Consecutive matches for text are more important. + consecutiveBonus = 4 + } + wordIdx := 0 // Word count within segment. + for i := 1; i <= candLen; i++ { + + role := m.roles[i-1] + isHead := role == RHead + + if isHead { + wordIdx++ + } else if role == RSep && segmentsLeft > 1 { + wordIdx = 0 + segmentsLeft-- + } + + var skipPenalty int + if segmentsLeft == 1 && isHead && m.input != Text { + // Skipping a word. + skipPenalty++ + } + if i-1 == lastSegStart { + // Skipping the start of the last segment. + skipPenalty += 3 + } + + for j := 0; j <= pattLen; j++ { + // By default, we don't have a match. Fill in the skip data. + m.scores[i][j][1] = minScore << 1 + + if segmentsLeft > 1 && j == pattLen { + // The very last pattern character can only be matched in the last segment. + m.scores[i][j][0] = minScore << 1 + continue + } + + // Compute the skip score. + k := 0 + if m.scores[i-1][j][0].val() < m.scores[i-1][j][1].val() { + k = 1 + } + + skipScore := m.scores[i-1][j][k].val() + // Do not penalize missing characters after the last matched segment. + if j != pattLen { + skipScore -= skipPenalty + } + m.scores[i][j][0] = score(skipScore, k) + + if j == 0 || candidateLower[i-1] != m.patternLower[j-1] { + // Not a match. + continue + } + pRole := m.patternRoles[j-1] + + if role == RTail && pRole == RHead { + if j > 1 { + // Not a match: a head in the pattern matches a tail character in the candidate. + continue + } + // Special treatment for the first character of the pattern. We allow + // matches in the middle of a word if they are long enough, at least + // min(3, pattern.length) characters. + if !bytes.HasPrefix(candidateLower[i-1:], m.patternShort) { + continue + } + } + + // Compute the char score. + var charScore int + // Bonus 1: the char is in the candidate's last segment. + if segmentsLeft <= 1 { + charScore++ + } + // Bonus 2: Case match or a Head in the pattern aligns with one in the word. + // Single-case patterns lack segmentation signals and we assume any character + // can be a head of a segment. + if candidate[i-1] == m.pattern[j-1] || role == RHead && (!m.caseSensitive || pRole == RHead) { + charScore++ + } + + // Penalty 1: pattern char is Head, candidate char is Tail. + if role == RTail && pRole == RHead { + charScore-- + } + // Penalty 2: first pattern character matched in the middle of a word. + if j == 1 && role == RTail { + charScore -= 4 + } + + // Third dimension encodes whether there is a gap between the previous match and the current + // one. + for k := 0; k < 2; k++ { + sc := m.scores[i-1][j-1][k].val() + charScore + + isConsecutive := k == 1 || i-1 == 0 || i-1 == lastSegStart + if isConsecutive || (m.input == Text && j-1 == 0) { + // Bonus 3: a consecutive match. First character match also gets a bonus to + // ensure prefix final match score normalizes to 1.0. + // Logically, this is a part of charScore, but we have to compute it here because it + // only applies for consecutive matches (k == 1). + sc += consecutiveBonus + } + if k == 0 { + // Penalty 3: Matching inside a segment (and previous char wasn't matched). Penalize for the lack + // of alignment. + if role == RTail || role == RUCTail { + sc -= 3 + } + } + + if sc > m.scores[i][j][1].val() { + m.scores[i][j][1] = score(sc, k) + } + } + } + } + + result := m.scores[len(candidate)][len(m.pattern)][m.bestK(len(candidate), len(m.pattern))].val() + + return result +} + +// ScoreTable returns the score table computed for the provided candidate. Used only for debugging. +func (m *Matcher) ScoreTable(candidate string) string { + var buf bytes.Buffer + + var line1, line2, separator bytes.Buffer + line1.WriteString("\t") + line2.WriteString("\t") + for j := 0; j < len(m.pattern); j++ { + line1.WriteString(fmt.Sprintf("%c\t\t", m.pattern[j])) + separator.WriteString("----------------") + } + + buf.WriteString(line1.String()) + buf.WriteString("\n") + buf.WriteString(separator.String()) + buf.WriteString("\n") + + for i := 1; i <= len(candidate); i++ { + line1.Reset() + line2.Reset() + + line1.WriteString(fmt.Sprintf("%c\t", candidate[i-1])) + line2.WriteString("\t") + + for j := 1; j <= len(m.pattern); j++ { + line1.WriteString(fmt.Sprintf("M%6d(%c)\t", m.scores[i][j][0].val(), dir(m.scores[i][j][0].prevK()))) + line2.WriteString(fmt.Sprintf("H%6d(%c)\t", m.scores[i][j][1].val(), dir(m.scores[i][j][1].prevK()))) + } + buf.WriteString(line1.String()) + buf.WriteString("\n") + buf.WriteString(line2.String()) + buf.WriteString("\n") + buf.WriteString(separator.String()) + buf.WriteString("\n") + } + + return buf.String() +} + +func dir(prevK int) rune { + if prevK == 0 { + return 'M' + } + return 'H' +} + +func (m *Matcher) poorMatch() bool { + if len(m.pattern) < 2 { + return false + } + + i, j := m.lastCandidateLen, len(m.pattern) + k := m.bestK(i, j) + + var counter, len int + for i > 0 { + take := (k == 1) + k = m.scores[i][j][k].prevK() + if take { + len++ + if k == 0 && len < 3 && m.roles[i-1] == RTail { + // Short match in the middle of a word + counter++ + if counter > 1 { + return true + } + } + j-- + } else { + len = 0 + } + i-- + } + return false +} diff --git a/internal/lsp/fuzzy/matcher_test.go b/internal/lsp/fuzzy/matcher_test.go new file mode 100644 index 0000000000..49f1caa810 --- /dev/null +++ b/internal/lsp/fuzzy/matcher_test.go @@ -0,0 +1,352 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Benchmark results: +// +// BenchmarkMatcher-12 1000000 1615 ns/op 30.95 MB/s 0 B/op 0 allocs/op +// +package fuzzy_test + +import ( + "bytes" + "fmt" + "math" + "testing" + + "golang.org/x/tools/internal/lsp/fuzzy" +) + +func ExampleFuzzyMatcher() { + pattern := "TEdit" + candidates := []string{"fuzzy.TextEdit", "ArtEdit", "TED talks about IT"} + + // Create a fuzzy matcher for the pattern. + matcher := fuzzy.NewMatcher(pattern, fuzzy.Text) + + for _, candidate := range candidates { + // Compute candidate's score against the matcher. + score := matcher.Score(candidate) + + if score > -1 { + // Get the substrings in the candidate matching the pattern. + ranges := matcher.MatchedRanges() + + fmt.Println(ranges) // Do something with the ranges. + } + } +} + +type comparator struct { + f func(val, ref float32) bool + descr string +} + +var ( + eq = comparator{ + f: func(val, ref float32) bool { + return val == ref + }, + descr: "==", + } + ge = comparator{ + f: func(val, ref float32) bool { + return val >= ref + }, + descr: ">=", + } +) + +func (c comparator) eval(val, ref float32) bool { + return c.f(val, ref) +} + +func (c comparator) String() string { + return c.descr +} + +type scoreTest struct { + candidate string + comparator + ref float32 +} + +var matcherTests = []struct { + pattern string + input fuzzy.Input + tests []scoreTest +}{ + { + pattern: "", + input: fuzzy.Text, + tests: []scoreTest{ + {"def", eq, 1}, + {"Ab stuff c", eq, 1}, + }, + }, + { + pattern: "abc", + input: fuzzy.Text, + tests: []scoreTest{ + {"def", eq, -1}, + {"abd", eq, -1}, + {"abc", ge, 0}, + {"Abc", ge, 0}, + {"Ab stuff c", ge, 0}, + }, + }, + { + pattern: "Abc", + input: fuzzy.Text, + tests: []scoreTest{ + {"def", eq, -1}, + {"abd", eq, -1}, + {"abc", ge, 0}, + {"Abc", ge, 0}, + {"Ab stuff c", ge, 0}, + }, + }, + { + pattern: "subs", + input: fuzzy.Filename, + tests: []scoreTest{ + {"sub/seq", ge, 0}, + {"sub/seq/end", eq, -1}, + {"sub/seq/base", ge, 0}, + }, + }, + { + pattern: "subs", + input: fuzzy.Filename, + tests: []scoreTest{ + {"//sub/seq", ge, 0}, + {"//sub/seq/end", eq, -1}, + {"//sub/seq/base", ge, 0}, + }, + }, +} + +func TestScore(t *testing.T) { + for _, tc := range matcherTests { + m := fuzzy.NewMatcher(tc.pattern, tc.input) + for _, sct := range tc.tests { + score := m.Score(sct.candidate) + if !sct.comparator.eval(score, sct.ref) { + t.Errorf("not true that m.Score(%s)[=%v] %s %v", sct.candidate, score, sct.comparator, sct.ref) + } + } + } +} + +type candidateCompTest struct { + c1 string + comparator comparator + c2 string +} + +var compareCandidatesTestCases = []struct { + pattern string + input fuzzy.Input + orderedCandidates []string +}{ + { + pattern: "aa", + input: fuzzy.Filename, + orderedCandidates: []string{ + "baab", + "bb_aa", + "a/a/a", + "aa_bb", + "aa_b", + "aabb", + "aab", + "b/aa", + }, + }, + { + pattern: "Foo", + input: fuzzy.Text, + orderedCandidates: []string{ + "Barfoo", + "F_o_o", + "Faoo", + "F__oo", + "F_oo", + "FaoFooa", + "BarFoo", + "FooA", + "FooBar", + "Foo", + }, + }, +} + +func TestCompareCandidateScores(t *testing.T) { + for _, tc := range compareCandidatesTestCases { + m := fuzzy.NewMatcher(tc.pattern, tc.input) + + var prevScore float32 + prevCand := "MIN_SCORE" + for _, cand := range tc.orderedCandidates { + score := m.Score(cand) + if prevScore > score { + t.Errorf("%s[=%v] is scored lower than %s[=%v]", cand, score, prevCand, prevScore) + } + if score < -1 || score > 1 { + t.Errorf("%s score is %v; want value between [-1, 1]", cand, score) + } + prevScore = score + prevCand = cand + } + } +} + +var fuzzyMatcherTestCases = []struct { + p string + str string + want string + input fuzzy.Input +}{ + // fuzzy.Filename + {p: "aa", str: "a_a/a_a", want: "[a]_a/[a]_a", input: fuzzy.Filename}, + {p: "aaaa", str: "a_a/a_a", want: "[a]_[a]/[a]_[a]", input: fuzzy.Filename}, + {p: "aaaa", str: "aaaa", want: "[aaaa]", input: fuzzy.Filename}, + {p: "aaaa", str: "a_a/a_aaaa", want: "a_a/[a]_[aaa]a", input: fuzzy.Filename}, + {p: "aaaa", str: "a_a/aaaaa", want: "a_a/[aaaa]a", input: fuzzy.Filename}, + {p: "aaaa", str: "aabaaa", want: "[aa]b[aa]a", input: fuzzy.Filename}, + {p: "aaaa", str: "a/baaa", want: "[a]/b[aaa]", input: fuzzy.Filename}, + {p: "abcxz", str: "d/abc/abcd/oxz", want: "d/[abc]/abcd/o[xz]", input: fuzzy.Filename}, + {p: "abcxz", str: "d/abcd/abc/oxz", want: "d/[abc]d/abc/o[xz]", input: fuzzy.Filename}, + + // fuzzy.Symbol + {p: "foo", str: "abc::foo", want: "abc::[foo]", input: fuzzy.Symbol}, + {p: "foo", str: "foo.foo", want: "foo.[foo]", input: fuzzy.Symbol}, + {p: "foo", str: "fo_oo.o_oo", want: "[fo]_oo.[o]_oo", input: fuzzy.Symbol}, + {p: "foo", str: "fo_oo.fo_oo", want: "fo_oo.[fo]_[o]o", input: fuzzy.Symbol}, + {p: "fo_o", str: "fo_oo.o_oo", want: "[f]o_oo.[o_o]o", input: fuzzy.Symbol}, + {p: "fOO", str: "fo_oo.o_oo", want: "[f]o_oo.[o]_[o]o", input: fuzzy.Symbol}, + {p: "tedit", str: "foo.TextEdit", want: "foo.[T]ext[Edit]", input: fuzzy.Symbol}, + {p: "TEdit", str: "foo.TextEdit", want: "foo.[T]ext[Edit]", input: fuzzy.Symbol}, + {p: "Tedit", str: "foo.TextEdit", want: "foo.[T]ext[Edit]", input: fuzzy.Symbol}, + {p: "Tedit", str: "foo.Textedit", want: "foo.[Te]xte[dit]", input: fuzzy.Symbol}, + {p: "TEdit", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, + {p: "te", str: "foo.Textedit", want: "foo.[Te]xtedit", input: fuzzy.Symbol}, + {p: "ee", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match + {p: "ex", str: "foo.Textedit", want: "foo.T[ex]tedit", input: fuzzy.Symbol}, + {p: "exdi", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match + {p: "exdit", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match + {p: "extdit", str: "foo.Textedit", want: "foo.T[ext]e[dit]", input: fuzzy.Symbol}, + {p: "e", str: "foo.Textedit", want: "foo.T[e]xtedit", input: fuzzy.Symbol}, + {p: "E", str: "foo.Textedit", want: "foo.T[e]xtedit", input: fuzzy.Symbol}, + {p: "ed", str: "foo.Textedit", want: "foo.Text[ed]it", input: fuzzy.Symbol}, + {p: "edt", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match + {p: "edit", str: "foo.Textedit", want: "foo.Text[edit]", input: fuzzy.Symbol}, + {p: "edin", str: "foo.TexteditNum", want: "foo.Text[edi]t[N]um", input: fuzzy.Symbol}, + {p: "n", str: "node.GoNodeMax", want: "node.Go[N]odeMax", input: fuzzy.Symbol}, + {p: "N", str: "node.GoNodeMax", want: "node.Go[N]odeMax", input: fuzzy.Symbol}, + {p: "completio", str: "completion", want: "[completio]n", input: fuzzy.Symbol}, + {p: "completio", str: "completion.None", want: "[completi]on.N[o]ne", input: fuzzy.Symbol}, +} + +func TestFuzzyMatcherRanges(t *testing.T) { + for _, tc := range fuzzyMatcherTestCases { + matcher := fuzzy.NewMatcher(tc.p, tc.input) + score := matcher.Score(tc.str) + if tc.want == "" { + if score >= 0 { + t.Errorf("Score(%s, %s) = %v; want: <= 0", tc.p, tc.str, score) + } + continue + } + if score < 0 { + t.Errorf("Score(%s, %s) = %v, want: > 0", tc.p, tc.str, score) + continue + } + got := highlightMatches(tc.str, matcher) + if tc.want != got { + t.Errorf("highlightMatches(%s, %s) = %v, want: %v", tc.p, tc.str, got, tc.want) + } + } +} + +var scoreTestCases = []struct { + p string + str string + want float64 +}{ + // Score precision up to five digits. Modify if changing the score, but make sure the new values + // are reasonable. + {p: "abc", str: "abc", want: 1}, + {p: "abc", str: "Abc", want: 1}, + {p: "abc", str: "Abcdef", want: 1}, + {p: "strc", str: "StrCat", want: 1}, + {p: "abc_def", str: "abc_def_xyz", want: 1}, + {p: "abcdef", str: "abc_def_xyz", want: 0.91667}, + {p: "abcxyz", str: "abc_def_xyz", want: 0.875}, + {p: "sc", str: "StrCat", want: 0.75}, + {p: "abc", str: "AbstrBasicCtor", want: 0.75}, + {p: "foo", str: "abc::foo", want: 1}, + {p: "afoo", str: "abc::foo", want: 0.9375}, + {p: "abr", str: "abc::bar", want: 0.5}, + {p: "br", str: "abc::bar", want: 0.375}, + {p: "aar", str: "abc::bar", want: 0.16667}, + {p: "edin", str: "foo.TexteditNum", want: 0}, + {p: "ediu", str: "foo.TexteditNum", want: 0}, + // We want the next two items to have roughly similar scores. + {p: "up", str: "unique_ptr", want: 0.75}, + {p: "up", str: "upper_bound", want: 1}, +} + +func TestScores(t *testing.T) { + for _, tc := range scoreTestCases { + matcher := fuzzy.NewMatcher(tc.p, fuzzy.Symbol) + got := math.Round(float64(matcher.Score(tc.str))*1e5) / 1e5 + if got != tc.want { + t.Errorf("Score(%s, %s) = %v, want: %v", tc.p, tc.str, got, tc.want) + } + } +} + +func highlightMatches(str string, matcher *fuzzy.Matcher) string { + matches := matcher.MatchedRanges() + + var buf bytes.Buffer + index := 0 + for i := 0; i < len(matches)-1; i += 2 { + s, e := matches[i], matches[i+1] + fmt.Fprintf(&buf, "%s[%s]", str[index:s], str[s:e]) + index = e + } + buf.WriteString(str[index:]) + return buf.String() +} + +func BenchmarkMatcher(b *testing.B) { + pattern := "Foo" + candidates := []string{ + "F_o_o", + "Barfoo", + "Faoo", + "F__oo", + "F_oo", + "FaoFooa", + "BarFoo", + "FooA", + "FooBar", + "Foo", + } + + matcher := fuzzy.NewMatcher(pattern, fuzzy.Text) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, c := range candidates { + matcher.Score(c) + } + } + var numBytes int + for _, c := range candidates { + numBytes += len(c) + } + b.SetBytes(int64(numBytes)) +} diff --git a/internal/lsp/source/completion.go b/internal/lsp/source/completion.go index 8bf7e21b52..33980eb0c4 100644 --- a/internal/lsp/source/completion.go +++ b/internal/lsp/source/completion.go @@ -12,6 +12,7 @@ import ( "go/types" "golang.org/x/tools/go/ast/astutil" + "golang.org/x/tools/internal/lsp/fuzzy" "golang.org/x/tools/internal/lsp/snippet" "golang.org/x/tools/internal/lsp/telemetry/trace" "golang.org/x/tools/internal/span" @@ -149,6 +150,9 @@ type completer struct { // deepState contains the current state of our deep completion search. deepState deepCompletionState + + // matcher does fuzzy matching of the candidates for the surrounding prefix. + matcher *fuzzy.Matcher } type compLitInfo struct { @@ -187,16 +191,17 @@ func (c *completer) setSurrounding(ident *ast.Ident) { if c.surrounding != nil { return } - if !(ident.Pos() <= c.pos && c.pos <= ident.End()) { return } - c.surrounding = &Selection{ Content: ident.Name, Range: span.NewRange(c.view.Session().Cache().FileSet(), ident.Pos(), ident.End()), Cursor: c.pos, } + if c.surrounding.Prefix() != "" { + c.matcher = fuzzy.NewMatcher(c.surrounding.Prefix(), fuzzy.Symbol) + } } // found adds a candidate completion. We will also search through the object's