Home | History | Annotate | Download | only in go1
      1 // Copyright 2011 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package go1
      6 
      7 import "runtime"
      8 
      9 // Not a benchmark; input for revcomp.
     10 
     11 var fastabytes = makefasta()
     12 
     13 func makefasta() []byte {
     14 	var n int = 25e6
     15 	if runtime.GOARCH == "arm" {
     16 		// TODO(dfc) remove this limitation after precise gc.
     17 		// A value of 25e6 consumes 465mb of heap on 32bit 
     18 		// platforms, which is too much for most ARM systems. 
     19 		// A value of 25e5 produces a memory layout that 
     20 		// confuses the gc on 32bit platforms. So 25e4 it is.
     21 		n = 25e4
     22 	}
     23 	return fasta(n)
     24 }
     25 
     26 func fasta(n int) []byte {
     27 	out := make(fastaBuffer, 0, 11*n)
     28 
     29 	iub := []fastaAcid{
     30 		{prob: 0.27, sym: 'a'},
     31 		{prob: 0.12, sym: 'c'},
     32 		{prob: 0.12, sym: 'g'},
     33 		{prob: 0.27, sym: 't'},
     34 		{prob: 0.02, sym: 'B'},
     35 		{prob: 0.02, sym: 'D'},
     36 		{prob: 0.02, sym: 'H'},
     37 		{prob: 0.02, sym: 'K'},
     38 		{prob: 0.02, sym: 'M'},
     39 		{prob: 0.02, sym: 'N'},
     40 		{prob: 0.02, sym: 'R'},
     41 		{prob: 0.02, sym: 'S'},
     42 		{prob: 0.02, sym: 'V'},
     43 		{prob: 0.02, sym: 'W'},
     44 		{prob: 0.02, sym: 'Y'},
     45 	}
     46 
     47 	homosapiens := []fastaAcid{
     48 		{prob: 0.3029549426680, sym: 'a'},
     49 		{prob: 0.1979883004921, sym: 'c'},
     50 		{prob: 0.1975473066391, sym: 'g'},
     51 		{prob: 0.3015094502008, sym: 't'},
     52 	}
     53 
     54 	alu := []byte(
     55 		"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
     56 			"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
     57 			"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
     58 			"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
     59 			"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
     60 			"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
     61 			"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
     62 
     63 	out.WriteString(">ONE Homo sapiens alu\n")
     64 	fastaRepeat(&out, alu, 2*n)
     65 	out.WriteString(">TWO IUB ambiguity codes\n")
     66 	fastaRandom(&out, iub, 3*n)
     67 	out.WriteString(">THREE Homo sapiens frequency\n")
     68 	fastaRandom(&out, homosapiens, 5*n)
     69 	return out
     70 }
     71 
     72 type fastaBuffer []byte
     73 
     74 func (b *fastaBuffer) Flush() {
     75 	panic("flush")
     76 }
     77 
     78 func (b *fastaBuffer) WriteString(s string) {
     79 	p := b.NextWrite(len(s))
     80 	copy(p, s)
     81 }
     82 
     83 func (b *fastaBuffer) NextWrite(n int) []byte {
     84 	p := *b
     85 	if len(p)+n > cap(p) {
     86 		b.Flush()
     87 		p = *b
     88 	}
     89 	out := p[len(p) : len(p)+n]
     90 	*b = p[:len(p)+n]
     91 	return out
     92 }
     93 
     94 const fastaLine = 60
     95 
     96 func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
     97 	buf := append(alu, alu...)
     98 	off := 0
     99 	for n > 0 {
    100 		m := n
    101 		if m > fastaLine {
    102 			m = fastaLine
    103 		}
    104 		buf1 := out.NextWrite(m + 1)
    105 		copy(buf1, buf[off:])
    106 		buf1[m] = '\n'
    107 		if off += m; off >= len(alu) {
    108 			off -= len(alu)
    109 		}
    110 		n -= m
    111 	}
    112 }
    113 
    114 const (
    115 	fastaLookupSize          = 4096
    116 	fastaLookupScale float64 = fastaLookupSize - 1
    117 )
    118 
    119 var fastaRand uint32 = 42
    120 
    121 type fastaAcid struct {
    122 	sym   byte
    123 	prob  float64
    124 	cprob float64
    125 	next  *fastaAcid
    126 }
    127 
    128 func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
    129 	var lookup [fastaLookupSize]*fastaAcid
    130 	var p float64
    131 	for i := range acid {
    132 		p += acid[i].prob
    133 		acid[i].cprob = p * fastaLookupScale
    134 		if i > 0 {
    135 			acid[i-1].next = &acid[i]
    136 		}
    137 	}
    138 	acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
    139 
    140 	j := 0
    141 	for i := range lookup {
    142 		for acid[j].cprob < float64(i) {
    143 			j++
    144 		}
    145 		lookup[i] = &acid[j]
    146 	}
    147 
    148 	return &lookup
    149 }
    150 
    151 func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
    152 	const (
    153 		IM = 139968
    154 		IA = 3877
    155 		IC = 29573
    156 	)
    157 	lookup := fastaComputeLookup(acid)
    158 	for n > 0 {
    159 		m := n
    160 		if m > fastaLine {
    161 			m = fastaLine
    162 		}
    163 		buf := out.NextWrite(m + 1)
    164 		f := fastaLookupScale / IM
    165 		myrand := fastaRand
    166 		for i := 0; i < m; i++ {
    167 			myrand = (myrand*IA + IC) % IM
    168 			r := float64(int(myrand)) * f
    169 			a := lookup[int(r)]
    170 			for a.cprob < r {
    171 				a = a.next
    172 			}
    173 			buf[i] = a.sym
    174 		}
    175 		fastaRand = myrand
    176 		buf[m] = '\n'
    177 		n -= m
    178 	}
    179 }
    180