1
0
mirror of https://github.com/golang/go synced 2024-11-25 01:17:56 -07:00

test: simplify issue 69434 test

Updates #69434

Change-Id: I780c5ed63561eb8fa998bb0e6cdc77a904ff29c8
Reviewed-on: https://go-review.googlesource.com/c/go/+/615915
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Cuong Manh Le <cuong.manhle.vn@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Cuong Manh Le 2024-09-26 01:37:07 +07:00 committed by Gopher Robot
parent 676d427f77
commit 677b6cc175

View File

@ -1,4 +1,4 @@
// run // run -gcflags=-d=maymorestack=runtime.mayMoreStackMove
// Copyright 2024 The Go Authors. All rights reserved. // Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
@ -7,167 +7,47 @@
package main package main
import ( import (
"bufio"
"fmt"
"io"
"iter" "iter"
"math/rand"
"os"
"strings"
"unicode"
) )
// WordReader is the struct that implements io.Reader func All() iter.Seq[int] {
type WordReader struct { return func(yield func(int) bool) {
scanner *bufio.Scanner for i := 0; i < 10; i++ {
} if !yield(i) {
// NewWordReader creates a new WordReader from an io.Reader
func NewWordReader(r io.Reader) *WordReader {
scanner := bufio.NewScanner(r)
scanner.Split(bufio.ScanWords)
return &WordReader{
scanner: scanner,
}
}
// Read reads data from the input stream and returns a single lowercase word at a time
func (wr *WordReader) Read(p []byte) (n int, err error) {
if !wr.scanner.Scan() {
if err := wr.scanner.Err(); err != nil {
return 0, err
}
return 0, io.EOF
}
word := wr.scanner.Text()
cleanedWord := removeNonAlphabetic(word)
if len(cleanedWord) == 0 {
return wr.Read(p)
}
n = copy(p, []byte(cleanedWord))
return n, nil
}
// All returns an iterator allowing the caller to iterate over the WordReader using for/range.
func (wr *WordReader) All() iter.Seq[string] {
word := make([]byte, 1024)
return func(yield func(string) bool) {
var err error
var n int
for n, err = wr.Read(word); err == nil; n, err = wr.Read(word) {
if !yield(string(word[:n])) {
return return
} }
} }
if err != io.EOF {
fmt.Fprintf(os.Stderr, "error reading word: %v\n", err)
}
} }
} }
// removeNonAlphabetic removes non-alphabetic characters from a word using strings.Map type S struct {
func removeNonAlphabetic(word string) string { round int
return strings.Map(func(r rune) rune {
if unicode.IsLetter(r) {
return unicode.ToLower(r)
}
return -1
}, word)
} }
// ProbabilisticSkipper determines if an item should be retained with probability 1/(1<<n) func NewS(round int) *S {
type ProbabilisticSkipper struct { s := &S{round: round}
n int return s
counter uint64
bitmask uint64
} }
// NewProbabilisticSkipper initializes the ProbabilisticSkipper func (s *S) check(round int) {
func NewProbabilisticSkipper(n int) *ProbabilisticSkipper { if s.round != round {
pr := &ProbabilisticSkipper{n: n} panic("bad round")
pr.refreshCounter()
return pr
}
// check panics if pr.n is not the expected value
func (pr *ProbabilisticSkipper) check(n int) {
if pr.n != n {
panic(fmt.Sprintf("check: pr.n != n %d != %d", pr.n, n))
} }
} }
// refreshCounter refreshes the counter with a new random value func f() {
func (pr *ProbabilisticSkipper) refreshCounter() {
if pr.n == 0 {
pr.bitmask = ^uint64(0) // All bits set to 1
} else {
pr.bitmask = rand.Uint64()
for i := 0; i < pr.n-1; i++ {
pr.bitmask &= rand.Uint64()
}
}
pr.counter = 64
}
// ShouldSkip returns true with probability 1/(1<<n)
func (pr *ProbabilisticSkipper) ShouldSkip() bool {
remove := pr.bitmask&1 == 0
pr.bitmask >>= 1
pr.counter--
if pr.counter == 0 {
pr.refreshCounter()
}
return remove
}
// EstimateUniqueWordsIter estimates the number of unique words using a probabilistic counting method
func EstimateUniqueWordsIter(reader io.Reader, memorySize int) int {
wordReader := NewWordReader(reader)
words := make(map[string]struct{}, memorySize)
rounds := 0 rounds := 0
roundRemover := NewProbabilisticSkipper(1) s := NewS(rounds)
wordSkipper := NewProbabilisticSkipper(rounds) s.check(rounds)
wordSkipper.check(rounds)
for word := range wordReader.All() { for range All() {
wordSkipper.check(rounds) s.check(rounds)
if wordSkipper.ShouldSkip() {
delete(words, word)
} else {
words[word] = struct{}{}
if len(words) >= memorySize {
rounds++ rounds++
s = NewS(rounds)
wordSkipper = NewProbabilisticSkipper(rounds) s.check(rounds)
for w := range words {
if roundRemover.ShouldSkip() {
delete(words, w)
} }
}
}
}
wordSkipper.check(rounds)
}
if len(words) == 0 {
return 0
}
invProbability := 1 << rounds
estimatedUniqueWords := len(words) * invProbability
return estimatedUniqueWords
} }
func main() { func main() {
input := "Hello, world! This is a test. Hello, world, hello!" f()
expectedUniqueWords := 6 // "hello", "world", "this", "is", "a", "test" (but "hello" and "world" are repeated)
memorySize := 6
reader := strings.NewReader(input)
estimatedUniqueWords := EstimateUniqueWordsIter(reader, memorySize)
if estimatedUniqueWords != expectedUniqueWords {
// ...
}
} }