1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package go1 6 7 import "runtime" 8 9 // Not a benchmark; input for revcomp. 10 11 var fastabytes = makefasta() 12 13 func makefasta() []byte { 14 var n int = 25e6 15 if runtime.GOARCH == "arm" { 16 // TODO(dfc) remove this limitation after precise gc. 17 // A value of 25e6 consumes 465mb of heap on 32bit 18 // platforms, which is too much for most ARM systems. 19 // A value of 25e5 produces a memory layout that 20 // confuses the gc on 32bit platforms. So 25e4 it is. 21 n = 25e4 22 } 23 return fasta(n) 24 } 25 26 func fasta(n int) []byte { 27 out := make(fastaBuffer, 0, 11*n) 28 29 iub := []fastaAcid{ 30 {prob: 0.27, sym: 'a'}, 31 {prob: 0.12, sym: 'c'}, 32 {prob: 0.12, sym: 'g'}, 33 {prob: 0.27, sym: 't'}, 34 {prob: 0.02, sym: 'B'}, 35 {prob: 0.02, sym: 'D'}, 36 {prob: 0.02, sym: 'H'}, 37 {prob: 0.02, sym: 'K'}, 38 {prob: 0.02, sym: 'M'}, 39 {prob: 0.02, sym: 'N'}, 40 {prob: 0.02, sym: 'R'}, 41 {prob: 0.02, sym: 'S'}, 42 {prob: 0.02, sym: 'V'}, 43 {prob: 0.02, sym: 'W'}, 44 {prob: 0.02, sym: 'Y'}, 45 } 46 47 homosapiens := []fastaAcid{ 48 {prob: 0.3029549426680, sym: 'a'}, 49 {prob: 0.1979883004921, sym: 'c'}, 50 {prob: 0.1975473066391, sym: 'g'}, 51 {prob: 0.3015094502008, sym: 't'}, 52 } 53 54 alu := []byte( 55 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + 56 "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + 57 "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + 58 "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + 59 "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + 60 "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + 61 "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA") 62 63 out.WriteString(">ONE Homo sapiens alu\n") 64 fastaRepeat(&out, alu, 2*n) 65 out.WriteString(">TWO IUB ambiguity codes\n") 66 fastaRandom(&out, iub, 3*n) 67 out.WriteString(">THREE Homo sapiens frequency\n") 68 fastaRandom(&out, homosapiens, 5*n) 69 return out 70 } 71 72 type fastaBuffer []byte 73 74 func (b *fastaBuffer) Flush() { 75 panic("flush") 76 } 77 78 func (b *fastaBuffer) WriteString(s string) { 79 p := b.NextWrite(len(s)) 80 copy(p, s) 81 } 82 83 func (b *fastaBuffer) NextWrite(n int) []byte { 84 p := *b 85 if len(p)+n > cap(p) { 86 b.Flush() 87 p = *b 88 } 89 out := p[len(p) : len(p)+n] 90 *b = p[:len(p)+n] 91 return out 92 } 93 94 const fastaLine = 60 95 96 func fastaRepeat(out *fastaBuffer, alu []byte, n int) { 97 buf := append(alu, alu...) 98 off := 0 99 for n > 0 { 100 m := n 101 if m > fastaLine { 102 m = fastaLine 103 } 104 buf1 := out.NextWrite(m + 1) 105 copy(buf1, buf[off:]) 106 buf1[m] = '\n' 107 if off += m; off >= len(alu) { 108 off -= len(alu) 109 } 110 n -= m 111 } 112 } 113 114 const ( 115 fastaLookupSize = 4096 116 fastaLookupScale float64 = fastaLookupSize - 1 117 ) 118 119 var fastaRand uint32 = 42 120 121 type fastaAcid struct { 122 sym byte 123 prob float64 124 cprob float64 125 next *fastaAcid 126 } 127 128 func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid { 129 var lookup [fastaLookupSize]*fastaAcid 130 var p float64 131 for i := range acid { 132 p += acid[i].prob 133 acid[i].cprob = p * fastaLookupScale 134 if i > 0 { 135 acid[i-1].next = &acid[i] 136 } 137 } 138 acid[len(acid)-1].cprob = 1.0 * fastaLookupScale 139 140 j := 0 141 for i := range lookup { 142 for acid[j].cprob < float64(i) { 143 j++ 144 } 145 lookup[i] = &acid[j] 146 } 147 148 return &lookup 149 } 150 151 func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) { 152 const ( 153 IM = 139968 154 IA = 3877 155 IC = 29573 156 ) 157 lookup := fastaComputeLookup(acid) 158 for n > 0 { 159 m := n 160 if m > fastaLine { 161 m = fastaLine 162 } 163 buf := out.NextWrite(m + 1) 164 f := fastaLookupScale / IM 165 myrand := fastaRand 166 for i := 0; i < m; i++ { 167 myrand = (myrand*IA + IC) % IM 168 r := float64(int(myrand)) * f 169 a := lookup[int(r)] 170 for a.cprob < r { 171 a = a.next 172 } 173 buf[i] = a.sym 174 } 175 fastaRand = myrand 176 buf[m] = '\n' 177 n -= m 178 } 179 } 180