2009-08-03 22:03:58 -06:00
|
|
|
/*
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
* Neither the name of "The Computer Language Benchmarks Game" nor the
|
|
|
|
name of "The Computer Language Shootout Benchmarks" nor the names of
|
|
|
|
its contributors may be used to endorse or promote products derived
|
|
|
|
from this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gcc&id=4
|
|
|
|
*/
|
|
|
|
/* The Computer Language Benchmarks Game
|
|
|
|
* http://shootout.alioth.debian.org/
|
|
|
|
* Contributed by Joern Inge Vestgaarden
|
|
|
|
* Modified by Jorge Peixoto de Morais Neto
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <err.h>
|
|
|
|
|
|
|
|
#define WIDTH 60
|
|
|
|
#define MIN(a,b) ((a) <= (b) ? (a) : (b))
|
|
|
|
#define NELEMENTS(x) (sizeof (x) / sizeof ((x)[0]))
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
float p;
|
|
|
|
char c;
|
|
|
|
} aminoacid_t;
|
|
|
|
|
|
|
|
static inline float myrandom (float max) {
|
|
|
|
unsigned long const IM = 139968;
|
|
|
|
unsigned long const IA = 3877;
|
|
|
|
unsigned long const IC = 29573;
|
|
|
|
static unsigned long last = 42;
|
|
|
|
last = (last * IA + IC) % IM;
|
|
|
|
/*Integer to float conversions are faster if the integer is signed*/
|
|
|
|
return max * (long) last / IM;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void accumulate_probabilities (aminoacid_t *genelist, size_t len) {
|
|
|
|
float cp = 0.0;
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
cp += genelist[i].p;
|
|
|
|
genelist[i].p = cp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function prints the characters of the string s. When it */
|
|
|
|
/* reaches the end of the string, it goes back to the beginning */
|
|
|
|
/* It stops when the total number of characters printed is count. */
|
|
|
|
/* Between each WIDTH consecutive characters it prints a newline */
|
|
|
|
/* This function assumes that WIDTH <= strlen (s) + 1 */
|
|
|
|
static void repeat_fasta (char const *s, size_t count) {
|
|
|
|
size_t pos = 0;
|
|
|
|
size_t len = strlen (s);
|
|
|
|
char *s2 = malloc (len + WIDTH);
|
|
|
|
memcpy (s2, s, len);
|
|
|
|
memcpy (s2 + len, s, WIDTH);
|
|
|
|
do {
|
|
|
|
size_t line = MIN(WIDTH, count);
|
2009-11-20 09:59:11 -07:00
|
|
|
fwrite (s2 + pos,1,line,stdout);
|
2009-08-03 22:03:58 -06:00
|
|
|
putchar_unlocked ('\n');
|
|
|
|
pos += line;
|
|
|
|
if (pos >= len) pos -= len;
|
|
|
|
count -= line;
|
|
|
|
} while (count);
|
|
|
|
free (s2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function takes a pointer to the first element of an array */
|
|
|
|
/* Each element of the array is a struct with a character and */
|
|
|
|
/* a float number p between 0 and 1. */
|
|
|
|
/* The function generates a random float number r and */
|
|
|
|
/* finds the first array element such that p >= r. */
|
|
|
|
/* This is a weighted random selection. */
|
|
|
|
/* The function then prints the character of the array element. */
|
|
|
|
/* This is done count times. */
|
|
|
|
/* Between each WIDTH consecutive characters, the function prints a newline */
|
|
|
|
static void random_fasta (aminoacid_t const *genelist, size_t count) {
|
|
|
|
do {
|
|
|
|
size_t line = MIN(WIDTH, count);
|
|
|
|
size_t pos = 0;
|
|
|
|
char buf[WIDTH + 1];
|
|
|
|
do {
|
|
|
|
float r = myrandom (1.0);
|
|
|
|
size_t i = 0;
|
|
|
|
while (genelist[i].p < r)
|
|
|
|
++i; /* Linear search */
|
|
|
|
buf[pos++] = genelist[i].c;
|
|
|
|
} while (pos < line);
|
|
|
|
buf[line] = '\n';
|
2009-11-20 09:59:11 -07:00
|
|
|
fwrite (buf, 1, line + 1, stdout);
|
2009-08-03 22:03:58 -06:00
|
|
|
count -= line;
|
|
|
|
} while (count);
|
|
|
|
}
|
|
|
|
|
|
|
|
int main (int argc, char **argv) {
|
|
|
|
size_t n;
|
|
|
|
if (argc > 1) {
|
|
|
|
char const *arg = argv[1];
|
|
|
|
char *tail;
|
|
|
|
n = strtoul (arg, &tail, 0);
|
|
|
|
if (tail == arg)
|
|
|
|
errx (1, "Could not convert \"%s\" to an unsigned long integer", arg);
|
|
|
|
} else n = 1000;
|
|
|
|
|
|
|
|
static aminoacid_t iub[] = {
|
|
|
|
{ 0.27, 'a' },
|
|
|
|
{ 0.12, 'c' },
|
|
|
|
{ 0.12, 'g' },
|
|
|
|
{ 0.27, 't' },
|
|
|
|
{ 0.02, 'B' },
|
|
|
|
{ 0.02, 'D' },
|
|
|
|
{ 0.02, 'H' },
|
|
|
|
{ 0.02, 'K' },
|
|
|
|
{ 0.02, 'M' },
|
|
|
|
{ 0.02, 'N' },
|
|
|
|
{ 0.02, 'R' },
|
|
|
|
{ 0.02, 'S' },
|
|
|
|
{ 0.02, 'V' },
|
|
|
|
{ 0.02, 'W' },
|
|
|
|
{ 0.02, 'Y' }};
|
|
|
|
|
|
|
|
static aminoacid_t homosapiens[] = {
|
|
|
|
{ 0.3029549426680, 'a' },
|
|
|
|
{ 0.1979883004921, 'c' },
|
|
|
|
{ 0.1975473066391, 'g' },
|
|
|
|
{ 0.3015094502008, 't' }};
|
|
|
|
|
|
|
|
accumulate_probabilities (iub, NELEMENTS(iub));
|
|
|
|
accumulate_probabilities (homosapiens, NELEMENTS(homosapiens));
|
|
|
|
|
|
|
|
static char const *const alu ="\
|
|
|
|
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
|
|
|
|
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
|
|
|
|
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
|
|
|
|
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
|
|
|
|
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
|
|
|
|
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
|
|
|
|
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
|
|
|
|
2009-11-20 09:59:11 -07:00
|
|
|
fputs (">ONE Homo sapiens alu\n", stdout);
|
2009-08-03 22:03:58 -06:00
|
|
|
repeat_fasta (alu, 2 * n);
|
2009-11-20 09:59:11 -07:00
|
|
|
fputs (">TWO IUB ambiguity codes\n", stdout);
|
2009-08-03 22:03:58 -06:00
|
|
|
random_fasta (iub, 3 * n);
|
2009-11-20 09:59:11 -07:00
|
|
|
fputs (">THREE Homo sapiens frequency\n", stdout);
|
2009-08-03 22:03:58 -06:00
|
|
|
random_fasta (homosapiens, 5 * n);
|
|
|
|
return 0;
|
|
|
|
}
|