1
0
mirror of https://github.com/golang/go synced 2024-11-18 11:44:45 -07:00

container/intsets: popcount: use POPCNT on amd64, Hacker's Delight algorithm on 386

This function accounts for 2% of "godoc -analysis=pointer"
and this change makes it twice as fast---and simpler.

Added test and benchmark.

Change-Id: I8578fa42dce34df057d81f6c522a7b4e0506d09d
Reviewed-on: https://go-review.googlesource.com/15211
Run-TryBot: Robert Griesemer <gri@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
Alan Donovan 2015-09-30 17:47:19 -04:00
parent 3f8a7a0787
commit b7f0150d16
5 changed files with 125 additions and 3 deletions

View File

@ -0,0 +1,20 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64
package intsets
func popcnt(x word) int
func havePOPCNT() bool
var hasPOPCNT = havePOPCNT()
// popcount returns the population count (number of set bits) of x.
func popcount(x word) int {
if hasPOPCNT {
return popcnt(x)
}
return popcountTable(x) // faster than Hacker's Delight
}

View File

@ -0,0 +1,28 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// func havePOPCNT() bool
TEXT ·havePOPCNT(SB),4,$0
MOVQ $1, AX
CPUID
SHRQ $23, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
// func popcnt(word) int
TEXT ·popcnt(SB),NOSPLIT,$0-8
XORQ AX, AX
MOVQ x+0(FP), SI
// POPCNT (SI), AX is not recognized by Go assembler,
// so we assemble it ourselves.
BYTE $0xf3
BYTE $0x48
BYTE $0x0f
BYTE $0xb8
BYTE $0xc6
MOVQ AX, ret+8(FP)
RET

View File

@ -0,0 +1,32 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64
package intsets
import "runtime"
// We compared three algorithms---Hacker's Delight, table lookup,
// and AMD64's SSE4.1 hardware POPCNT---on a 2.67GHz Xeon X5550.
//
// % GOARCH=amd64 go test -run=NONE -bench=Popcount
// POPCNT 5.12 ns/op
// Table 8.53 ns/op
// HackersDelight 9.96 ns/op
//
// % GOARCH=386 go test -run=NONE -bench=Popcount
// Table 10.4 ns/op
// HackersDelight 5.23 ns/op
//
// (AMD64's ABM1 hardware supports ntz and nlz too,
// but they aren't critical.)
// popcount returns the population count (number of set bits) of x.
func popcount(x word) int {
if runtime.GOARCH == "386" {
return popcountHD(uint32(x))
}
return popcountTable(x)
}

View File

@ -4,6 +4,16 @@
package intsets
// From Hacker's Delight, fig 5.2.
func popcountHD(x uint32) int {
x -= (x >> 1) & 0x55555555
x = (x & 0x33333333) + ((x >> 2) & 0x33333333)
x = (x + (x >> 4)) & 0x0f0f0f0f
x = x + (x >> 8)
x = x + (x >> 16)
return int(x & 0x0000003f)
}
var a [1 << 8]byte
func init() {
@ -18,8 +28,7 @@ func init() {
}
}
// popcount returns the population count (number of set bits) of x.
func popcount(x word) int {
func popcountTable(x word) int {
return int(a[byte(x>>(0*8))] +
a[byte(x>>(1*8))] +
a[byte(x>>(2*8))] +

View File

@ -4,7 +4,10 @@
package intsets
import "testing"
import (
"math/rand"
"testing"
)
func TestNLZ(t *testing.T) {
// Test the platform-specific edge case.
@ -23,3 +26,33 @@ func TestNLZ(t *testing.T) {
// Backdoor for testing.
func (s *Sparse) Check() error { return s.check() }
func dumbPopcount(x word) int {
var popcnt int
for i := uint(0); i < bitsPerWord; i++ {
if x&(1<<i) != 0 {
popcnt++
}
}
return popcnt
}
func TestPopcount(t *testing.T) {
for i := 0; i < 1e5; i++ {
x := word(rand.Uint32())
if bitsPerWord == 64 {
x = x | (word(rand.Uint32()) << 32)
}
want := dumbPopcount(x)
got := popcount(x)
if got != want {
t.Errorf("popcount(%d) = %d, want %d", x, got, want)
}
}
}
func BenchmarkPopcount(b *testing.B) {
for i := 0; i < b.N; i++ {
popcount(word(i))
}
}