mirror of
https://github.com/golang/go
synced 2024-11-22 08:44:41 -07:00
suffixarray: use binary search for both ends of Lookup
This prevents many unnecessary comparisons when n is large. R=gri, gri1, rsc CC=golang-dev https://golang.org/cl/4068043
This commit is contained in:
parent
2aa6cf3e7e
commit
ab036abdf0
@ -50,27 +50,33 @@ func (x *Index) at(i int) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func (x *Index) search(s []byte) int {
|
// lookupAll returns a slice into the matching region of the index.
|
||||||
return sort.Search(len(x.sa), func(i int) bool { return bytes.Compare(x.at(i), s) >= 0 })
|
// The runtime is O(log(N)*len(s)).
|
||||||
|
func (x *Index) lookupAll(s []byte) []int {
|
||||||
|
// find matching suffix index range [i:j]
|
||||||
|
// find the first index where s would be the prefix
|
||||||
|
i := sort.Search(len(x.sa), func(i int) bool { return bytes.Compare(x.at(i), s) >= 0 })
|
||||||
|
// starting at i, find the first index at which s is not a prefix
|
||||||
|
j := i + sort.Search(len(x.sa)-i, func(j int) bool { return !bytes.HasPrefix(x.at(j+i), s) })
|
||||||
|
return x.sa[i:j]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Lookup returns an unsorted list of at most n indices where the byte string s
|
// Lookup returns an unsorted list of at most n indices where the byte string s
|
||||||
// occurs in the indexed data. If n < 0, all occurrences are returned.
|
// occurs in the indexed data. If n < 0, all occurrences are returned.
|
||||||
// The result is nil if s is empty, s is not found, or n == 0.
|
// The result is nil if s is empty, s is not found, or n == 0.
|
||||||
// Lookup time is O((log(N) + len(result))*len(s)) where N is the
|
// Lookup time is O(log(N)*len(s) + len(result)) where N is the
|
||||||
// size of the indexed data.
|
// size of the indexed data.
|
||||||
//
|
//
|
||||||
func (x *Index) Lookup(s []byte, n int) (result []int) {
|
func (x *Index) Lookup(s []byte, n int) (result []int) {
|
||||||
if len(s) > 0 && n != 0 {
|
if len(s) > 0 && n != 0 {
|
||||||
// find matching suffix index i
|
matches := x.lookupAll(s)
|
||||||
i := x.search(s)
|
if len(matches) < n || n < 0 {
|
||||||
// x.at(i-1) < s <= x.at(i)
|
n = len(matches)
|
||||||
|
}
|
||||||
// collect the following suffixes with matching prefixes
|
if n > 0 {
|
||||||
for (n < 0 || len(result) < n) && i < len(x.sa) && bytes.HasPrefix(x.at(i), s) {
|
result = make([]int, n)
|
||||||
result = append(result, x.sa[i])
|
copy(result, matches)
|
||||||
i++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user