mirror of
https://github.com/golang/go
synced 2024-11-25 01:17:56 -07:00
test: simplify issue 69434 test
Updates #69434 Change-Id: I780c5ed63561eb8fa998bb0e6cdc77a904ff29c8 Reviewed-on: https://go-review.googlesource.com/c/go/+/615915 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: David Chase <drchase@google.com> Auto-Submit: Cuong Manh Le <cuong.manhle.vn@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
676d427f77
commit
677b6cc175
@ -1,4 +1,4 @@
|
|||||||
// run
|
// run -gcflags=-d=maymorestack=runtime.mayMoreStackMove
|
||||||
|
|
||||||
// Copyright 2024 The Go Authors. All rights reserved.
|
// Copyright 2024 The Go Authors. All rights reserved.
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
@ -7,167 +7,47 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"iter"
|
"iter"
|
||||||
"math/rand"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
"unicode"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// WordReader is the struct that implements io.Reader
|
func All() iter.Seq[int] {
|
||||||
type WordReader struct {
|
return func(yield func(int) bool) {
|
||||||
scanner *bufio.Scanner
|
for i := 0; i < 10; i++ {
|
||||||
}
|
if !yield(i) {
|
||||||
|
|
||||||
// NewWordReader creates a new WordReader from an io.Reader
|
|
||||||
func NewWordReader(r io.Reader) *WordReader {
|
|
||||||
scanner := bufio.NewScanner(r)
|
|
||||||
scanner.Split(bufio.ScanWords)
|
|
||||||
return &WordReader{
|
|
||||||
scanner: scanner,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read reads data from the input stream and returns a single lowercase word at a time
|
|
||||||
func (wr *WordReader) Read(p []byte) (n int, err error) {
|
|
||||||
if !wr.scanner.Scan() {
|
|
||||||
if err := wr.scanner.Err(); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
return 0, io.EOF
|
|
||||||
}
|
|
||||||
word := wr.scanner.Text()
|
|
||||||
cleanedWord := removeNonAlphabetic(word)
|
|
||||||
if len(cleanedWord) == 0 {
|
|
||||||
return wr.Read(p)
|
|
||||||
}
|
|
||||||
n = copy(p, []byte(cleanedWord))
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// All returns an iterator allowing the caller to iterate over the WordReader using for/range.
|
|
||||||
func (wr *WordReader) All() iter.Seq[string] {
|
|
||||||
word := make([]byte, 1024)
|
|
||||||
return func(yield func(string) bool) {
|
|
||||||
var err error
|
|
||||||
var n int
|
|
||||||
for n, err = wr.Read(word); err == nil; n, err = wr.Read(word) {
|
|
||||||
if !yield(string(word[:n])) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err != io.EOF {
|
|
||||||
fmt.Fprintf(os.Stderr, "error reading word: %v\n", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// removeNonAlphabetic removes non-alphabetic characters from a word using strings.Map
|
type S struct {
|
||||||
func removeNonAlphabetic(word string) string {
|
round int
|
||||||
return strings.Map(func(r rune) rune {
|
|
||||||
if unicode.IsLetter(r) {
|
|
||||||
return unicode.ToLower(r)
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}, word)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProbabilisticSkipper determines if an item should be retained with probability 1/(1<<n)
|
func NewS(round int) *S {
|
||||||
type ProbabilisticSkipper struct {
|
s := &S{round: round}
|
||||||
n int
|
return s
|
||||||
counter uint64
|
|
||||||
bitmask uint64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewProbabilisticSkipper initializes the ProbabilisticSkipper
|
func (s *S) check(round int) {
|
||||||
func NewProbabilisticSkipper(n int) *ProbabilisticSkipper {
|
if s.round != round {
|
||||||
pr := &ProbabilisticSkipper{n: n}
|
panic("bad round")
|
||||||
pr.refreshCounter()
|
|
||||||
return pr
|
|
||||||
}
|
|
||||||
|
|
||||||
// check panics if pr.n is not the expected value
|
|
||||||
func (pr *ProbabilisticSkipper) check(n int) {
|
|
||||||
if pr.n != n {
|
|
||||||
panic(fmt.Sprintf("check: pr.n != n %d != %d", pr.n, n))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// refreshCounter refreshes the counter with a new random value
|
func f() {
|
||||||
func (pr *ProbabilisticSkipper) refreshCounter() {
|
|
||||||
if pr.n == 0 {
|
|
||||||
pr.bitmask = ^uint64(0) // All bits set to 1
|
|
||||||
} else {
|
|
||||||
pr.bitmask = rand.Uint64()
|
|
||||||
for i := 0; i < pr.n-1; i++ {
|
|
||||||
pr.bitmask &= rand.Uint64()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pr.counter = 64
|
|
||||||
}
|
|
||||||
|
|
||||||
// ShouldSkip returns true with probability 1/(1<<n)
|
|
||||||
func (pr *ProbabilisticSkipper) ShouldSkip() bool {
|
|
||||||
remove := pr.bitmask&1 == 0
|
|
||||||
pr.bitmask >>= 1
|
|
||||||
pr.counter--
|
|
||||||
if pr.counter == 0 {
|
|
||||||
pr.refreshCounter()
|
|
||||||
}
|
|
||||||
return remove
|
|
||||||
}
|
|
||||||
|
|
||||||
// EstimateUniqueWordsIter estimates the number of unique words using a probabilistic counting method
|
|
||||||
func EstimateUniqueWordsIter(reader io.Reader, memorySize int) int {
|
|
||||||
wordReader := NewWordReader(reader)
|
|
||||||
words := make(map[string]struct{}, memorySize)
|
|
||||||
|
|
||||||
rounds := 0
|
rounds := 0
|
||||||
roundRemover := NewProbabilisticSkipper(1)
|
s := NewS(rounds)
|
||||||
wordSkipper := NewProbabilisticSkipper(rounds)
|
s.check(rounds)
|
||||||
wordSkipper.check(rounds)
|
|
||||||
|
|
||||||
for word := range wordReader.All() {
|
for range All() {
|
||||||
wordSkipper.check(rounds)
|
s.check(rounds)
|
||||||
if wordSkipper.ShouldSkip() {
|
|
||||||
delete(words, word)
|
|
||||||
} else {
|
|
||||||
words[word] = struct{}{}
|
|
||||||
|
|
||||||
if len(words) >= memorySize {
|
|
||||||
rounds++
|
rounds++
|
||||||
|
s = NewS(rounds)
|
||||||
wordSkipper = NewProbabilisticSkipper(rounds)
|
s.check(rounds)
|
||||||
for w := range words {
|
|
||||||
if roundRemover.ShouldSkip() {
|
|
||||||
delete(words, w)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wordSkipper.check(rounds)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(words) == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
invProbability := 1 << rounds
|
|
||||||
estimatedUniqueWords := len(words) * invProbability
|
|
||||||
return estimatedUniqueWords
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
input := "Hello, world! This is a test. Hello, world, hello!"
|
f()
|
||||||
expectedUniqueWords := 6 // "hello", "world", "this", "is", "a", "test" (but "hello" and "world" are repeated)
|
|
||||||
memorySize := 6
|
|
||||||
|
|
||||||
reader := strings.NewReader(input)
|
|
||||||
estimatedUniqueWords := EstimateUniqueWordsIter(reader, memorySize)
|
|
||||||
if estimatedUniqueWords != expectedUniqueWords {
|
|
||||||
// ...
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user