mirror of
https://github.com/golang/go
synced 2024-11-18 08:44:43 -07:00
container/intsets: popcount: use POPCNT on amd64, Hacker's Delight algorithm on 386
This function accounts for 2% of "godoc -analysis=pointer" and this change makes it twice as fast---and simpler. Added test and benchmark. Change-Id: I8578fa42dce34df057d81f6c522a7b4e0506d09d Reviewed-on: https://go-review.googlesource.com/15211 Run-TryBot: Robert Griesemer <gri@golang.org> Reviewed-by: Ilya Tocar <ilya.tocar@intel.com> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
3f8a7a0787
commit
b7f0150d16
20
container/intsets/popcnt_amd64.go
Normal file
20
container/intsets/popcnt_amd64.go
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build amd64
|
||||
|
||||
package intsets
|
||||
|
||||
func popcnt(x word) int
|
||||
func havePOPCNT() bool
|
||||
|
||||
var hasPOPCNT = havePOPCNT()
|
||||
|
||||
// popcount returns the population count (number of set bits) of x.
|
||||
func popcount(x word) int {
|
||||
if hasPOPCNT {
|
||||
return popcnt(x)
|
||||
}
|
||||
return popcountTable(x) // faster than Hacker's Delight
|
||||
}
|
28
container/intsets/popcnt_amd64.s
Normal file
28
container/intsets/popcnt_amd64.s
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func havePOPCNT() bool
|
||||
TEXT ·havePOPCNT(SB),4,$0
|
||||
MOVQ $1, AX
|
||||
CPUID
|
||||
SHRQ $23, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func popcnt(word) int
|
||||
TEXT ·popcnt(SB),NOSPLIT,$0-8
|
||||
XORQ AX, AX
|
||||
MOVQ x+0(FP), SI
|
||||
// POPCNT (SI), AX is not recognized by Go assembler,
|
||||
// so we assemble it ourselves.
|
||||
BYTE $0xf3
|
||||
BYTE $0x48
|
||||
BYTE $0x0f
|
||||
BYTE $0xb8
|
||||
BYTE $0xc6
|
||||
MOVQ AX, ret+8(FP)
|
||||
RET
|
32
container/intsets/popcnt_generic.go
Normal file
32
container/intsets/popcnt_generic.go
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64
|
||||
|
||||
package intsets
|
||||
|
||||
import "runtime"
|
||||
|
||||
// We compared three algorithms---Hacker's Delight, table lookup,
|
||||
// and AMD64's SSE4.1 hardware POPCNT---on a 2.67GHz Xeon X5550.
|
||||
//
|
||||
// % GOARCH=amd64 go test -run=NONE -bench=Popcount
|
||||
// POPCNT 5.12 ns/op
|
||||
// Table 8.53 ns/op
|
||||
// HackersDelight 9.96 ns/op
|
||||
//
|
||||
// % GOARCH=386 go test -run=NONE -bench=Popcount
|
||||
// Table 10.4 ns/op
|
||||
// HackersDelight 5.23 ns/op
|
||||
//
|
||||
// (AMD64's ABM1 hardware supports ntz and nlz too,
|
||||
// but they aren't critical.)
|
||||
|
||||
// popcount returns the population count (number of set bits) of x.
|
||||
func popcount(x word) int {
|
||||
if runtime.GOARCH == "386" {
|
||||
return popcountHD(uint32(x))
|
||||
}
|
||||
return popcountTable(x)
|
||||
}
|
@ -4,6 +4,16 @@
|
||||
|
||||
package intsets
|
||||
|
||||
// From Hacker's Delight, fig 5.2.
|
||||
func popcountHD(x uint32) int {
|
||||
x -= (x >> 1) & 0x55555555
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333)
|
||||
x = (x + (x >> 4)) & 0x0f0f0f0f
|
||||
x = x + (x >> 8)
|
||||
x = x + (x >> 16)
|
||||
return int(x & 0x0000003f)
|
||||
}
|
||||
|
||||
var a [1 << 8]byte
|
||||
|
||||
func init() {
|
||||
@ -18,8 +28,7 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
// popcount returns the population count (number of set bits) of x.
|
||||
func popcount(x word) int {
|
||||
func popcountTable(x word) int {
|
||||
return int(a[byte(x>>(0*8))] +
|
||||
a[byte(x>>(1*8))] +
|
||||
a[byte(x>>(2*8))] +
|
||||
|
@ -4,7 +4,10 @@
|
||||
|
||||
package intsets
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNLZ(t *testing.T) {
|
||||
// Test the platform-specific edge case.
|
||||
@ -23,3 +26,33 @@ func TestNLZ(t *testing.T) {
|
||||
|
||||
// Backdoor for testing.
|
||||
func (s *Sparse) Check() error { return s.check() }
|
||||
|
||||
func dumbPopcount(x word) int {
|
||||
var popcnt int
|
||||
for i := uint(0); i < bitsPerWord; i++ {
|
||||
if x&(1<<i) != 0 {
|
||||
popcnt++
|
||||
}
|
||||
}
|
||||
return popcnt
|
||||
}
|
||||
|
||||
func TestPopcount(t *testing.T) {
|
||||
for i := 0; i < 1e5; i++ {
|
||||
x := word(rand.Uint32())
|
||||
if bitsPerWord == 64 {
|
||||
x = x | (word(rand.Uint32()) << 32)
|
||||
}
|
||||
want := dumbPopcount(x)
|
||||
got := popcount(x)
|
||||
if got != want {
|
||||
t.Errorf("popcount(%d) = %d, want %d", x, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPopcount(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
popcount(word(i))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user