mirror of
https://github.com/golang/go
synced 2024-11-25 02:57:57 -07:00
test/bench: import new fasta C reference, update Go, optimizations
OLD fasta -n 25000000 gcc -O2 fasta.c 7.59u 0.06s 7.74r gc fasta 9.54u 0.15s 9.84r gc_B fasta 9.48u 0.10s 9.62r NEW fasta -n 25000000 gcc -O2 fasta.c 2.59u 0.02s 2.66r gc fasta 3.00u 0.03s 3.09r gc_B fasta 2.72u 0.03s 2.81r R=r CC=golang-dev https://golang.org/cl/1054041
This commit is contained in:
parent
7d7ebd2fe1
commit
f8f83e80b1
@ -28,146 +28,190 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gcc&id=4
|
* http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
|
||||||
*/
|
*/
|
||||||
/* The Computer Language Benchmarks Game
|
|
||||||
* http://shootout.alioth.debian.org/
|
/* The Computer Language Benchmarks Game
|
||||||
* Contributed by Joern Inge Vestgaarden
|
* http://shootout.alioth.debian.org/
|
||||||
* Modified by Jorge Peixoto de Morais Neto
|
*
|
||||||
|
* contributed by Petr Prokhorenkov
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <err.h>
|
|
||||||
|
|
||||||
#define WIDTH 60
|
// not available on OS X
|
||||||
#define MIN(a,b) ((a) <= (b) ? (a) : (b))
|
#define fwrite_unlocked fwrite
|
||||||
#define NELEMENTS(x) (sizeof (x) / sizeof ((x)[0]))
|
#define fputc_unlocked fputc
|
||||||
|
#define fputs_unlocked fputs
|
||||||
|
|
||||||
typedef struct {
|
#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
|
||||||
float p;
|
#define unlikely(x) __builtin_expect((x), 0)
|
||||||
char c;
|
|
||||||
} aminoacid_t;
|
|
||||||
|
|
||||||
static inline float myrandom (float max) {
|
#define IM 139968
|
||||||
unsigned long const IM = 139968;
|
#define IA 3877
|
||||||
unsigned long const IA = 3877;
|
#define IC 29573
|
||||||
unsigned long const IC = 29573;
|
|
||||||
static unsigned long last = 42;
|
#define LINE_LEN 60
|
||||||
last = (last * IA + IC) % IM;
|
#define LOOKUP_SIZE 4096
|
||||||
/*Integer to float conversions are faster if the integer is signed*/
|
#define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
|
||||||
return max * (long) last / IM;
|
|
||||||
|
typedef unsigned random_t;
|
||||||
|
|
||||||
|
void
|
||||||
|
random_init(random_t *random) {
|
||||||
|
*random = 42;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void accumulate_probabilities (aminoacid_t *genelist, size_t len) {
|
// Special version with result rescaled to LOOKUP_SCALE.
|
||||||
float cp = 0.0;
|
static inline
|
||||||
size_t i;
|
float
|
||||||
for (i = 0; i < len; i++) {
|
random_next_lookup(random_t *random) {
|
||||||
cp += genelist[i].p;
|
*random = (*random*IA + IC)%IM;
|
||||||
genelist[i].p = cp;
|
|
||||||
|
return (*random)*(LOOKUP_SCALE/IM);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct amino_acid {
|
||||||
|
char sym;
|
||||||
|
float prob;
|
||||||
|
float cprob_lookup;
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
repeat(const char *alu, const char *title, int n) {
|
||||||
|
int len = strlen(alu);
|
||||||
|
char buffer[len + LINE_LEN];
|
||||||
|
int pos = 0;
|
||||||
|
|
||||||
|
memcpy(buffer, alu, len);
|
||||||
|
memcpy(buffer + len, alu, LINE_LEN);
|
||||||
|
|
||||||
|
fputs_unlocked(title, stdout);
|
||||||
|
while (n > 0) {
|
||||||
|
int bytes = n > LINE_LEN ? LINE_LEN : n;
|
||||||
|
|
||||||
|
fwrite_unlocked(buffer + pos, bytes, 1, stdout);
|
||||||
|
pos += bytes;
|
||||||
|
if (pos > len) {
|
||||||
|
pos -= len;
|
||||||
|
}
|
||||||
|
fputc_unlocked('\n', stdout);
|
||||||
|
n -= bytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function prints the characters of the string s. When it */
|
/*
|
||||||
/* reaches the end of the string, it goes back to the beginning */
|
* Lookup table contains mapping from real values to cumulative
|
||||||
/* It stops when the total number of characters printed is count. */
|
* probabilities. Careful selection of table size allows lookup
|
||||||
/* Between each WIDTH consecutive characters it prints a newline */
|
* virtually in constant time.
|
||||||
/* This function assumes that WIDTH <= strlen (s) + 1 */
|
*
|
||||||
static void repeat_fasta (char const *s, size_t count) {
|
* All cumulative probabilities are rescaled to LOOKUP_SCALE,
|
||||||
size_t pos = 0;
|
* this allows to save one multiplication operation on each iteration
|
||||||
size_t len = strlen (s);
|
* in randomize().
|
||||||
char *s2 = malloc (len + WIDTH);
|
*/
|
||||||
memcpy (s2, s, len);
|
|
||||||
memcpy (s2 + len, s, WIDTH);
|
|
||||||
do {
|
|
||||||
size_t line = MIN(WIDTH, count);
|
|
||||||
fwrite (s2 + pos,1,line,stdout);
|
|
||||||
putchar_unlocked ('\n');
|
|
||||||
pos += line;
|
|
||||||
if (pos >= len) pos -= len;
|
|
||||||
count -= line;
|
|
||||||
} while (count);
|
|
||||||
free (s2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This function takes a pointer to the first element of an array */
|
void *
|
||||||
/* Each element of the array is a struct with a character and */
|
fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
|
||||||
/* a float number p between 0 and 1. */
|
float p = 0;
|
||||||
/* The function generates a random float number r and */
|
int i, j;
|
||||||
/* finds the first array element such that p >= r. */
|
|
||||||
/* This is a weighted random selection. */
|
|
||||||
/* The function then prints the character of the array element. */
|
|
||||||
/* This is done count times. */
|
|
||||||
/* Between each WIDTH consecutive characters, the function prints a newline */
|
|
||||||
static void random_fasta (aminoacid_t const *genelist, size_t count) {
|
|
||||||
do {
|
|
||||||
size_t line = MIN(WIDTH, count);
|
|
||||||
size_t pos = 0;
|
|
||||||
char buf[WIDTH + 1];
|
|
||||||
do {
|
|
||||||
float r = myrandom (1.0);
|
|
||||||
size_t i = 0;
|
|
||||||
while (genelist[i].p < r)
|
|
||||||
++i; /* Linear search */
|
|
||||||
buf[pos++] = genelist[i].c;
|
|
||||||
} while (pos < line);
|
|
||||||
buf[line] = '\n';
|
|
||||||
fwrite (buf, 1, line + 1, stdout);
|
|
||||||
count -= line;
|
|
||||||
} while (count);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main (int argc, char **argv) {
|
for (i = 0; i < amino_acid_size; i++) {
|
||||||
size_t n;
|
p += amino_acid[i].prob;
|
||||||
if (argc > 1) {
|
amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
|
||||||
char const *arg = argv[1];
|
}
|
||||||
char *tail;
|
|
||||||
n = strtoul (arg, &tail, 0);
|
|
||||||
if (tail == arg)
|
|
||||||
errx (1, "Could not convert \"%s\" to an unsigned long integer", arg);
|
|
||||||
} else n = 1000;
|
|
||||||
|
|
||||||
static aminoacid_t iub[] = {
|
// Prevent rounding error.
|
||||||
{ 0.27, 'a' },
|
amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
|
||||||
{ 0.12, 'c' },
|
|
||||||
{ 0.12, 'g' },
|
|
||||||
{ 0.27, 't' },
|
|
||||||
{ 0.02, 'B' },
|
|
||||||
{ 0.02, 'D' },
|
|
||||||
{ 0.02, 'H' },
|
|
||||||
{ 0.02, 'K' },
|
|
||||||
{ 0.02, 'M' },
|
|
||||||
{ 0.02, 'N' },
|
|
||||||
{ 0.02, 'R' },
|
|
||||||
{ 0.02, 'S' },
|
|
||||||
{ 0.02, 'V' },
|
|
||||||
{ 0.02, 'W' },
|
|
||||||
{ 0.02, 'Y' }};
|
|
||||||
|
|
||||||
static aminoacid_t homosapiens[] = {
|
for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
|
||||||
{ 0.3029549426680, 'a' },
|
while (amino_acid[j].cprob_lookup < i) {
|
||||||
{ 0.1979883004921, 'c' },
|
j++;
|
||||||
{ 0.1975473066391, 'g' },
|
}
|
||||||
{ 0.3015094502008, 't' }};
|
lookup[i] = &amino_acid[j];
|
||||||
|
}
|
||||||
|
|
||||||
accumulate_probabilities (iub, NELEMENTS(iub));
|
|
||||||
accumulate_probabilities (homosapiens, NELEMENTS(homosapiens));
|
|
||||||
|
|
||||||
static char const *const alu ="\
|
|
||||||
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
|
|
||||||
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
|
|
||||||
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
|
|
||||||
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
|
|
||||||
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
|
|
||||||
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
|
|
||||||
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
|
||||||
|
|
||||||
fputs (">ONE Homo sapiens alu\n", stdout);
|
|
||||||
repeat_fasta (alu, 2 * n);
|
|
||||||
fputs (">TWO IUB ambiguity codes\n", stdout);
|
|
||||||
random_fasta (iub, 3 * n);
|
|
||||||
fputs (">THREE Homo sapiens frequency\n", stdout);
|
|
||||||
random_fasta (homosapiens, 5 * n);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
randomize(struct amino_acid *amino_acid, int amino_acid_size,
|
||||||
|
const char *title, int n, random_t *rand) {
|
||||||
|
struct amino_acid *lookup[LOOKUP_SIZE];
|
||||||
|
char line_buffer[LINE_LEN + 1];
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
line_buffer[LINE_LEN] = '\n';
|
||||||
|
|
||||||
|
fill_lookup(lookup, amino_acid, amino_acid_size);
|
||||||
|
|
||||||
|
fputs_unlocked(title, stdout);
|
||||||
|
|
||||||
|
for (i = 0, j = 0; i < n; i++, j++) {
|
||||||
|
if (j == LINE_LEN) {
|
||||||
|
fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
|
||||||
|
j = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
float r = random_next_lookup(rand);
|
||||||
|
struct amino_acid *u = lookup[(short)r];
|
||||||
|
while (unlikely(u->cprob_lookup < r)) {
|
||||||
|
++u;
|
||||||
|
}
|
||||||
|
line_buffer[j] = u->sym;
|
||||||
|
}
|
||||||
|
line_buffer[j] = '\n';
|
||||||
|
fwrite_unlocked(line_buffer, j + 1, 1, stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct amino_acid amino_acid[] = {
|
||||||
|
{ 'a', 0.27 },
|
||||||
|
{ 'c', 0.12 },
|
||||||
|
{ 'g', 0.12 },
|
||||||
|
{ 't', 0.27 },
|
||||||
|
|
||||||
|
{ 'B', 0.02 },
|
||||||
|
{ 'D', 0.02 },
|
||||||
|
{ 'H', 0.02 },
|
||||||
|
{ 'K', 0.02 },
|
||||||
|
{ 'M', 0.02 },
|
||||||
|
{ 'N', 0.02 },
|
||||||
|
{ 'R', 0.02 },
|
||||||
|
{ 'S', 0.02 },
|
||||||
|
{ 'V', 0.02 },
|
||||||
|
{ 'W', 0.02 },
|
||||||
|
{ 'Y', 0.02 },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct amino_acid homo_sapiens[] = {
|
||||||
|
{ 'a', 0.3029549426680 },
|
||||||
|
{ 'c', 0.1979883004921 },
|
||||||
|
{ 'g', 0.1975473066391 },
|
||||||
|
{ 't', 0.3015094502008 },
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char alu[] =
|
||||||
|
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
|
||||||
|
"GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
|
||||||
|
"GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
|
||||||
|
"AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
|
||||||
|
"CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
|
||||||
|
"CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
|
||||||
|
"CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, const char **argv) {
|
||||||
|
int n = argc > 1 ? atoi( argv[1] ) : 512;
|
||||||
|
random_t rand;
|
||||||
|
|
||||||
|
random_init(&rand);
|
||||||
|
|
||||||
|
repeat(alu, ">ONE Homo sapiens alu\n", n*2);
|
||||||
|
randomize(amino_acid, ARRAY_SIZE(amino_acid),
|
||||||
|
">TWO IUB ambiguity codes\n", n*3, &rand);
|
||||||
|
randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
|
||||||
|
">THREE Homo sapiens frequency\n", n*5, &rand);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -31,135 +31,137 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
* http://shootout.alioth.debian.org/
|
* http://shootout.alioth.debian.org/
|
||||||
*
|
*
|
||||||
* contributed by The Go Authors.
|
* contributed by The Go Authors.
|
||||||
* Based on C program by Joern Inge Vestgaarden
|
* Based on C program by by Petr Prokhorenkov.
|
||||||
* and Jorge Peixoto de Morais Neto.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bytes"
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
var out *bufio.Writer
|
var out = make(buffer, 0, 32768)
|
||||||
|
|
||||||
var n = flag.Int("n", 1000, "length of result")
|
var n = flag.Int("n", 1000, "length of result")
|
||||||
|
|
||||||
const WIDTH = 60 // Fold lines after WIDTH bytes
|
const Line = 60
|
||||||
|
|
||||||
func min(a, b int) int {
|
func Repeat(alu []byte, n int) {
|
||||||
if a < b {
|
buf := bytes.Add(alu, alu)
|
||||||
return a
|
off := 0
|
||||||
}
|
for n > 0 {
|
||||||
return b
|
m := n
|
||||||
}
|
if m > Line {
|
||||||
|
m = Line
|
||||||
type AminoAcid struct {
|
|
||||||
p float
|
|
||||||
c byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func AccumulateProbabilities(genelist []AminoAcid) {
|
|
||||||
for i := 1; i < len(genelist); i++ {
|
|
||||||
genelist[i].p += genelist[i-1].p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RepeatFasta prints the characters of the byte slice s. When it
|
|
||||||
// reaches the end of the slice, it goes back to the beginning.
|
|
||||||
// It stops after generating count characters.
|
|
||||||
// After each WIDTH characters it prints a newline.
|
|
||||||
// It assumes that WIDTH <= len(s) + 1.
|
|
||||||
func RepeatFasta(s []byte, count int) {
|
|
||||||
pos := 0
|
|
||||||
s2 := make([]byte, len(s)+WIDTH)
|
|
||||||
copy(s2, s)
|
|
||||||
copy(s2[len(s):], s)
|
|
||||||
for count > 0 {
|
|
||||||
line := min(WIDTH, count)
|
|
||||||
out.Write(s2[pos : pos+line])
|
|
||||||
out.WriteByte('\n')
|
|
||||||
pos += line
|
|
||||||
if pos >= len(s) {
|
|
||||||
pos -= len(s)
|
|
||||||
}
|
}
|
||||||
count -= line
|
buf1 := out.NextWrite(m + 1)
|
||||||
|
copy(buf1, buf[off:])
|
||||||
|
buf1[m] = '\n'
|
||||||
|
if off += m; off >= len(alu) {
|
||||||
|
off -= len(alu)
|
||||||
|
}
|
||||||
|
n -= m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var lastrandom uint32 = 42
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
IM = 139968
|
IM = 139968
|
||||||
IA = 3877
|
IA = 3877
|
||||||
IC = 29573
|
IC = 29573
|
||||||
|
|
||||||
|
LookupSize = 4096
|
||||||
|
LookupScale float64 = LookupSize - 1
|
||||||
)
|
)
|
||||||
|
|
||||||
// Each element of genelist is a struct with a character and
|
var rand uint32 = 42
|
||||||
// a floating point number p between 0 and 1.
|
|
||||||
// RandomFasta generates a random float r and
|
type Acid struct {
|
||||||
// finds the first element such that p >= r.
|
sym byte
|
||||||
// This is a weighted random selection.
|
prob float64
|
||||||
// RandomFasta then prints the character of the array element.
|
cprob float64
|
||||||
// This sequence is repeated count times.
|
next *Acid
|
||||||
// Between each WIDTH consecutive characters, the function prints a newline.
|
}
|
||||||
func RandomFasta(genelist []AminoAcid, count int) {
|
|
||||||
buf := make([]byte, WIDTH+1)
|
func computeLookup(acid []Acid) *[LookupSize]*Acid {
|
||||||
for count > 0 {
|
var lookup [LookupSize]*Acid
|
||||||
line := min(WIDTH, count)
|
var p float64
|
||||||
for pos := 0; pos < line; pos++ {
|
for i := range acid {
|
||||||
lastrandom = (lastrandom*IA + IC) % IM
|
p += acid[i].prob
|
||||||
// Integer to float conversions are faster if the integer is signed.
|
acid[i].cprob = p * LookupScale
|
||||||
r := float(int32(lastrandom)) / IM
|
if i > 0 {
|
||||||
for _, v := range genelist {
|
acid[i-1].next = &acid[i]
|
||||||
if v.p >= r {
|
|
||||||
buf[pos] = v.c
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
buf[line] = '\n'
|
}
|
||||||
out.Write(buf[0 : line+1])
|
acid[len(acid)-1].cprob = 1.0 * LookupScale
|
||||||
count -= line
|
|
||||||
|
j := 0
|
||||||
|
for i := range lookup {
|
||||||
|
for acid[j].cprob < float64(i) {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
lookup[i] = &acid[j]
|
||||||
|
}
|
||||||
|
|
||||||
|
return &lookup
|
||||||
|
}
|
||||||
|
|
||||||
|
func Random(acid []Acid, n int) {
|
||||||
|
lookup := computeLookup(acid)
|
||||||
|
for n > 0 {
|
||||||
|
m := n
|
||||||
|
if m > Line {
|
||||||
|
m = Line
|
||||||
|
}
|
||||||
|
buf := out.NextWrite(m + 1)
|
||||||
|
f := LookupScale / IM
|
||||||
|
myrand := rand
|
||||||
|
for i := 0; i < m; i++ {
|
||||||
|
myrand = (myrand*IA + IC) % IM
|
||||||
|
r := float64(int(myrand)) * f
|
||||||
|
a := lookup[int(r)]
|
||||||
|
for a.cprob < r {
|
||||||
|
a = a.next
|
||||||
|
}
|
||||||
|
buf[i] = a.sym
|
||||||
|
}
|
||||||
|
rand = myrand
|
||||||
|
buf[m] = '\n'
|
||||||
|
n -= m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
out = bufio.NewWriter(os.Stdout)
|
|
||||||
defer out.Flush()
|
defer out.Flush()
|
||||||
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
iub := []AminoAcid{
|
iub := []Acid{
|
||||||
AminoAcid{0.27, 'a'},
|
Acid{prob: 0.27, sym: 'a'},
|
||||||
AminoAcid{0.12, 'c'},
|
Acid{prob: 0.12, sym: 'c'},
|
||||||
AminoAcid{0.12, 'g'},
|
Acid{prob: 0.12, sym: 'g'},
|
||||||
AminoAcid{0.27, 't'},
|
Acid{prob: 0.27, sym: 't'},
|
||||||
AminoAcid{0.02, 'B'},
|
Acid{prob: 0.02, sym: 'B'},
|
||||||
AminoAcid{0.02, 'D'},
|
Acid{prob: 0.02, sym: 'D'},
|
||||||
AminoAcid{0.02, 'H'},
|
Acid{prob: 0.02, sym: 'H'},
|
||||||
AminoAcid{0.02, 'K'},
|
Acid{prob: 0.02, sym: 'K'},
|
||||||
AminoAcid{0.02, 'M'},
|
Acid{prob: 0.02, sym: 'M'},
|
||||||
AminoAcid{0.02, 'N'},
|
Acid{prob: 0.02, sym: 'N'},
|
||||||
AminoAcid{0.02, 'R'},
|
Acid{prob: 0.02, sym: 'R'},
|
||||||
AminoAcid{0.02, 'S'},
|
Acid{prob: 0.02, sym: 'S'},
|
||||||
AminoAcid{0.02, 'V'},
|
Acid{prob: 0.02, sym: 'V'},
|
||||||
AminoAcid{0.02, 'W'},
|
Acid{prob: 0.02, sym: 'W'},
|
||||||
AminoAcid{0.02, 'Y'},
|
Acid{prob: 0.02, sym: 'Y'},
|
||||||
}
|
}
|
||||||
|
|
||||||
homosapiens := []AminoAcid{
|
homosapiens := []Acid{
|
||||||
AminoAcid{0.3029549426680, 'a'},
|
Acid{prob: 0.3029549426680, sym: 'a'},
|
||||||
AminoAcid{0.1979883004921, 'c'},
|
Acid{prob: 0.1979883004921, sym: 'c'},
|
||||||
AminoAcid{0.1975473066391, 'g'},
|
Acid{prob: 0.1975473066391, sym: 'g'},
|
||||||
AminoAcid{0.3015094502008, 't'},
|
Acid{prob: 0.3015094502008, sym: 't'},
|
||||||
}
|
}
|
||||||
|
|
||||||
AccumulateProbabilities(iub)
|
|
||||||
AccumulateProbabilities(homosapiens)
|
|
||||||
|
|
||||||
alu := []byte(
|
alu := []byte(
|
||||||
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
|
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
|
||||||
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
|
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
|
||||||
@ -170,9 +172,38 @@ func main() {
|
|||||||
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
|
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
|
||||||
|
|
||||||
out.WriteString(">ONE Homo sapiens alu\n")
|
out.WriteString(">ONE Homo sapiens alu\n")
|
||||||
RepeatFasta(alu, 2**n)
|
Repeat(alu, 2**n)
|
||||||
out.WriteString(">TWO IUB ambiguity codes\n")
|
out.WriteString(">TWO IUB ambiguity codes\n")
|
||||||
RandomFasta(iub, 3**n)
|
Random(iub, 3**n)
|
||||||
out.WriteString(">THREE Homo sapiens frequency\n")
|
out.WriteString(">THREE Homo sapiens frequency\n")
|
||||||
RandomFasta(homosapiens, 5**n)
|
Random(homosapiens, 5**n)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type buffer []byte
|
||||||
|
|
||||||
|
func (b *buffer) Flush() {
|
||||||
|
p := *b
|
||||||
|
if len(p) > 0 {
|
||||||
|
os.Stdout.Write(p)
|
||||||
|
}
|
||||||
|
*b = p[0:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) WriteString(s string) {
|
||||||
|
p := b.NextWrite(len(s))
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
p[i] = s[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) NextWrite(n int) []byte {
|
||||||
|
p := *b
|
||||||
|
if len(p)+n > cap(p) {
|
||||||
|
b.Flush()
|
||||||
|
p = *b
|
||||||
|
}
|
||||||
|
out := p[len(p) : len(p)+n]
|
||||||
|
*b = p[0 : len(p)+n]
|
||||||
|
return out
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user