1
0
mirror of https://github.com/golang/go synced 2024-11-25 02:57:57 -07:00

test/bench: import new fasta C reference, update Go, optimizations

OLD fasta -n 25000000
	gcc -O2 fasta.c	7.59u 0.06s 7.74r
	gc fasta	9.54u 0.15s 9.84r
	gc_B fasta	9.48u 0.10s 9.62r

NEW fasta -n 25000000
	gcc -O2 fasta.c	2.59u 0.02s 2.66r
	gc fasta	3.00u 0.03s 3.09r
	gc_B fasta	2.72u 0.03s 2.81r

R=r
CC=golang-dev
https://golang.org/cl/1054041
This commit is contained in:
Russ Cox 2010-05-03 17:47:59 -07:00
parent 7d7ebd2fe1
commit f8f83e80b1
2 changed files with 294 additions and 219 deletions

View File

@ -28,146 +28,190 @@ POSSIBILITY OF SUCH DAMAGE.
*/ */
/* /*
* http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gcc&id=4 * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
*/ */
/* The Computer Language Benchmarks Game
* http://shootout.alioth.debian.org/ /* The Computer Language Benchmarks Game
* Contributed by Joern Inge Vestgaarden * http://shootout.alioth.debian.org/
* Modified by Jorge Peixoto de Morais Neto *
* contributed by Petr Prokhorenkov
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <err.h>
#define WIDTH 60 // not available on OS X
#define MIN(a,b) ((a) <= (b) ? (a) : (b)) #define fwrite_unlocked fwrite
#define NELEMENTS(x) (sizeof (x) / sizeof ((x)[0])) #define fputc_unlocked fputc
#define fputs_unlocked fputs
typedef struct { #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
float p; #define unlikely(x) __builtin_expect((x), 0)
char c;
} aminoacid_t;
static inline float myrandom (float max) { #define IM 139968
unsigned long const IM = 139968; #define IA 3877
unsigned long const IA = 3877; #define IC 29573
unsigned long const IC = 29573;
static unsigned long last = 42; #define LINE_LEN 60
last = (last * IA + IC) % IM; #define LOOKUP_SIZE 4096
/*Integer to float conversions are faster if the integer is signed*/ #define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
return max * (long) last / IM;
typedef unsigned random_t;
void
random_init(random_t *random) {
*random = 42;
} }
static inline void accumulate_probabilities (aminoacid_t *genelist, size_t len) { // Special version with result rescaled to LOOKUP_SCALE.
float cp = 0.0; static inline
size_t i; float
for (i = 0; i < len; i++) { random_next_lookup(random_t *random) {
cp += genelist[i].p; *random = (*random*IA + IC)%IM;
genelist[i].p = cp;
return (*random)*(LOOKUP_SCALE/IM);
}
struct amino_acid {
char sym;
float prob;
float cprob_lookup;
};
void
repeat(const char *alu, const char *title, int n) {
int len = strlen(alu);
char buffer[len + LINE_LEN];
int pos = 0;
memcpy(buffer, alu, len);
memcpy(buffer + len, alu, LINE_LEN);
fputs_unlocked(title, stdout);
while (n > 0) {
int bytes = n > LINE_LEN ? LINE_LEN : n;
fwrite_unlocked(buffer + pos, bytes, 1, stdout);
pos += bytes;
if (pos > len) {
pos -= len;
}
fputc_unlocked('\n', stdout);
n -= bytes;
} }
} }
/* This function prints the characters of the string s. When it */ /*
/* reaches the end of the string, it goes back to the beginning */ * Lookup table contains mapping from real values to cumulative
/* It stops when the total number of characters printed is count. */ * probabilities. Careful selection of table size allows lookup
/* Between each WIDTH consecutive characters it prints a newline */ * virtually in constant time.
/* This function assumes that WIDTH <= strlen (s) + 1 */ *
static void repeat_fasta (char const *s, size_t count) { * All cumulative probabilities are rescaled to LOOKUP_SCALE,
size_t pos = 0; * this allows to save one multiplication operation on each iteration
size_t len = strlen (s); * in randomize().
char *s2 = malloc (len + WIDTH); */
memcpy (s2, s, len);
memcpy (s2 + len, s, WIDTH);
do {
size_t line = MIN(WIDTH, count);
fwrite (s2 + pos,1,line,stdout);
putchar_unlocked ('\n');
pos += line;
if (pos >= len) pos -= len;
count -= line;
} while (count);
free (s2);
}
/* This function takes a pointer to the first element of an array */ void *
/* Each element of the array is a struct with a character and */ fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
/* a float number p between 0 and 1. */ float p = 0;
/* The function generates a random float number r and */ int i, j;
/* finds the first array element such that p >= r. */
/* This is a weighted random selection. */
/* The function then prints the character of the array element. */
/* This is done count times. */
/* Between each WIDTH consecutive characters, the function prints a newline */
static void random_fasta (aminoacid_t const *genelist, size_t count) {
do {
size_t line = MIN(WIDTH, count);
size_t pos = 0;
char buf[WIDTH + 1];
do {
float r = myrandom (1.0);
size_t i = 0;
while (genelist[i].p < r)
++i; /* Linear search */
buf[pos++] = genelist[i].c;
} while (pos < line);
buf[line] = '\n';
fwrite (buf, 1, line + 1, stdout);
count -= line;
} while (count);
}
int main (int argc, char **argv) { for (i = 0; i < amino_acid_size; i++) {
size_t n; p += amino_acid[i].prob;
if (argc > 1) { amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
char const *arg = argv[1]; }
char *tail;
n = strtoul (arg, &tail, 0);
if (tail == arg)
errx (1, "Could not convert \"%s\" to an unsigned long integer", arg);
} else n = 1000;
static aminoacid_t iub[] = { // Prevent rounding error.
{ 0.27, 'a' }, amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
{ 0.12, 'c' },
{ 0.12, 'g' },
{ 0.27, 't' },
{ 0.02, 'B' },
{ 0.02, 'D' },
{ 0.02, 'H' },
{ 0.02, 'K' },
{ 0.02, 'M' },
{ 0.02, 'N' },
{ 0.02, 'R' },
{ 0.02, 'S' },
{ 0.02, 'V' },
{ 0.02, 'W' },
{ 0.02, 'Y' }};
static aminoacid_t homosapiens[] = { for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
{ 0.3029549426680, 'a' }, while (amino_acid[j].cprob_lookup < i) {
{ 0.1979883004921, 'c' }, j++;
{ 0.1975473066391, 'g' }, }
{ 0.3015094502008, 't' }}; lookup[i] = &amino_acid[j];
}
accumulate_probabilities (iub, NELEMENTS(iub));
accumulate_probabilities (homosapiens, NELEMENTS(homosapiens));
static char const *const alu ="\
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
fputs (">ONE Homo sapiens alu\n", stdout);
repeat_fasta (alu, 2 * n);
fputs (">TWO IUB ambiguity codes\n", stdout);
random_fasta (iub, 3 * n);
fputs (">THREE Homo sapiens frequency\n", stdout);
random_fasta (homosapiens, 5 * n);
return 0; return 0;
} }
void
randomize(struct amino_acid *amino_acid, int amino_acid_size,
const char *title, int n, random_t *rand) {
struct amino_acid *lookup[LOOKUP_SIZE];
char line_buffer[LINE_LEN + 1];
int i, j;
line_buffer[LINE_LEN] = '\n';
fill_lookup(lookup, amino_acid, amino_acid_size);
fputs_unlocked(title, stdout);
for (i = 0, j = 0; i < n; i++, j++) {
if (j == LINE_LEN) {
fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
j = 0;
}
float r = random_next_lookup(rand);
struct amino_acid *u = lookup[(short)r];
while (unlikely(u->cprob_lookup < r)) {
++u;
}
line_buffer[j] = u->sym;
}
line_buffer[j] = '\n';
fwrite_unlocked(line_buffer, j + 1, 1, stdout);
}
struct amino_acid amino_acid[] = {
{ 'a', 0.27 },
{ 'c', 0.12 },
{ 'g', 0.12 },
{ 't', 0.27 },
{ 'B', 0.02 },
{ 'D', 0.02 },
{ 'H', 0.02 },
{ 'K', 0.02 },
{ 'M', 0.02 },
{ 'N', 0.02 },
{ 'R', 0.02 },
{ 'S', 0.02 },
{ 'V', 0.02 },
{ 'W', 0.02 },
{ 'Y', 0.02 },
};
struct amino_acid homo_sapiens[] = {
{ 'a', 0.3029549426680 },
{ 'c', 0.1979883004921 },
{ 'g', 0.1975473066391 },
{ 't', 0.3015094502008 },
};
static const char alu[] =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
"GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
"GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
"AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
"CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
"CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
"CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
int
main(int argc, const char **argv) {
int n = argc > 1 ? atoi( argv[1] ) : 512;
random_t rand;
random_init(&rand);
repeat(alu, ">ONE Homo sapiens alu\n", n*2);
randomize(amino_acid, ARRAY_SIZE(amino_acid),
">TWO IUB ambiguity codes\n", n*3, &rand);
randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
">THREE Homo sapiens frequency\n", n*5, &rand);
return 0;
}

View File

@ -31,135 +31,137 @@ POSSIBILITY OF SUCH DAMAGE.
* http://shootout.alioth.debian.org/ * http://shootout.alioth.debian.org/
* *
* contributed by The Go Authors. * contributed by The Go Authors.
* Based on C program by Joern Inge Vestgaarden * Based on C program by by Petr Prokhorenkov.
* and Jorge Peixoto de Morais Neto.
*/ */
package main package main
import ( import (
"bufio" "bytes"
"flag" "flag"
"os" "os"
) )
var out *bufio.Writer var out = make(buffer, 0, 32768)
var n = flag.Int("n", 1000, "length of result") var n = flag.Int("n", 1000, "length of result")
const WIDTH = 60 // Fold lines after WIDTH bytes const Line = 60
func min(a, b int) int { func Repeat(alu []byte, n int) {
if a < b { buf := bytes.Add(alu, alu)
return a off := 0
} for n > 0 {
return b m := n
} if m > Line {
m = Line
type AminoAcid struct {
p float
c byte
}
func AccumulateProbabilities(genelist []AminoAcid) {
for i := 1; i < len(genelist); i++ {
genelist[i].p += genelist[i-1].p
}
}
// RepeatFasta prints the characters of the byte slice s. When it
// reaches the end of the slice, it goes back to the beginning.
// It stops after generating count characters.
// After each WIDTH characters it prints a newline.
// It assumes that WIDTH <= len(s) + 1.
func RepeatFasta(s []byte, count int) {
pos := 0
s2 := make([]byte, len(s)+WIDTH)
copy(s2, s)
copy(s2[len(s):], s)
for count > 0 {
line := min(WIDTH, count)
out.Write(s2[pos : pos+line])
out.WriteByte('\n')
pos += line
if pos >= len(s) {
pos -= len(s)
} }
count -= line buf1 := out.NextWrite(m + 1)
copy(buf1, buf[off:])
buf1[m] = '\n'
if off += m; off >= len(alu) {
off -= len(alu)
}
n -= m
} }
} }
var lastrandom uint32 = 42
const ( const (
IM = 139968 IM = 139968
IA = 3877 IA = 3877
IC = 29573 IC = 29573
LookupSize = 4096
LookupScale float64 = LookupSize - 1
) )
// Each element of genelist is a struct with a character and var rand uint32 = 42
// a floating point number p between 0 and 1.
// RandomFasta generates a random float r and type Acid struct {
// finds the first element such that p >= r. sym byte
// This is a weighted random selection. prob float64
// RandomFasta then prints the character of the array element. cprob float64
// This sequence is repeated count times. next *Acid
// Between each WIDTH consecutive characters, the function prints a newline. }
func RandomFasta(genelist []AminoAcid, count int) {
buf := make([]byte, WIDTH+1) func computeLookup(acid []Acid) *[LookupSize]*Acid {
for count > 0 { var lookup [LookupSize]*Acid
line := min(WIDTH, count) var p float64
for pos := 0; pos < line; pos++ { for i := range acid {
lastrandom = (lastrandom*IA + IC) % IM p += acid[i].prob
// Integer to float conversions are faster if the integer is signed. acid[i].cprob = p * LookupScale
r := float(int32(lastrandom)) / IM if i > 0 {
for _, v := range genelist { acid[i-1].next = &acid[i]
if v.p >= r {
buf[pos] = v.c
break
}
}
} }
buf[line] = '\n' }
out.Write(buf[0 : line+1]) acid[len(acid)-1].cprob = 1.0 * LookupScale
count -= line
j := 0
for i := range lookup {
for acid[j].cprob < float64(i) {
j++
}
lookup[i] = &acid[j]
}
return &lookup
}
func Random(acid []Acid, n int) {
lookup := computeLookup(acid)
for n > 0 {
m := n
if m > Line {
m = Line
}
buf := out.NextWrite(m + 1)
f := LookupScale / IM
myrand := rand
for i := 0; i < m; i++ {
myrand = (myrand*IA + IC) % IM
r := float64(int(myrand)) * f
a := lookup[int(r)]
for a.cprob < r {
a = a.next
}
buf[i] = a.sym
}
rand = myrand
buf[m] = '\n'
n -= m
} }
} }
func main() { func main() {
out = bufio.NewWriter(os.Stdout)
defer out.Flush() defer out.Flush()
flag.Parse() flag.Parse()
iub := []AminoAcid{ iub := []Acid{
AminoAcid{0.27, 'a'}, Acid{prob: 0.27, sym: 'a'},
AminoAcid{0.12, 'c'}, Acid{prob: 0.12, sym: 'c'},
AminoAcid{0.12, 'g'}, Acid{prob: 0.12, sym: 'g'},
AminoAcid{0.27, 't'}, Acid{prob: 0.27, sym: 't'},
AminoAcid{0.02, 'B'}, Acid{prob: 0.02, sym: 'B'},
AminoAcid{0.02, 'D'}, Acid{prob: 0.02, sym: 'D'},
AminoAcid{0.02, 'H'}, Acid{prob: 0.02, sym: 'H'},
AminoAcid{0.02, 'K'}, Acid{prob: 0.02, sym: 'K'},
AminoAcid{0.02, 'M'}, Acid{prob: 0.02, sym: 'M'},
AminoAcid{0.02, 'N'}, Acid{prob: 0.02, sym: 'N'},
AminoAcid{0.02, 'R'}, Acid{prob: 0.02, sym: 'R'},
AminoAcid{0.02, 'S'}, Acid{prob: 0.02, sym: 'S'},
AminoAcid{0.02, 'V'}, Acid{prob: 0.02, sym: 'V'},
AminoAcid{0.02, 'W'}, Acid{prob: 0.02, sym: 'W'},
AminoAcid{0.02, 'Y'}, Acid{prob: 0.02, sym: 'Y'},
} }
homosapiens := []AminoAcid{ homosapiens := []Acid{
AminoAcid{0.3029549426680, 'a'}, Acid{prob: 0.3029549426680, sym: 'a'},
AminoAcid{0.1979883004921, 'c'}, Acid{prob: 0.1979883004921, sym: 'c'},
AminoAcid{0.1975473066391, 'g'}, Acid{prob: 0.1975473066391, sym: 'g'},
AminoAcid{0.3015094502008, 't'}, Acid{prob: 0.3015094502008, sym: 't'},
} }
AccumulateProbabilities(iub)
AccumulateProbabilities(homosapiens)
alu := []byte( alu := []byte(
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
@ -170,9 +172,38 @@ func main() {
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA") "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
out.WriteString(">ONE Homo sapiens alu\n") out.WriteString(">ONE Homo sapiens alu\n")
RepeatFasta(alu, 2**n) Repeat(alu, 2**n)
out.WriteString(">TWO IUB ambiguity codes\n") out.WriteString(">TWO IUB ambiguity codes\n")
RandomFasta(iub, 3**n) Random(iub, 3**n)
out.WriteString(">THREE Homo sapiens frequency\n") out.WriteString(">THREE Homo sapiens frequency\n")
RandomFasta(homosapiens, 5**n) Random(homosapiens, 5**n)
}
type buffer []byte
func (b *buffer) Flush() {
p := *b
if len(p) > 0 {
os.Stdout.Write(p)
}
*b = p[0:0]
}
func (b *buffer) WriteString(s string) {
p := b.NextWrite(len(s))
for i := 0; i < len(s); i++ {
p[i] = s[i]
}
}
func (b *buffer) NextWrite(n int) []byte {
p := *b
if len(p)+n > cap(p) {
b.Flush()
p = *b
}
out := p[len(p) : len(p)+n]
*b = p[0 : len(p)+n]
return out
} }