fasta benchmark

Discipline of Computer Language Benchmarks Game.


 #!/usr/bin/tclsh

 # Fasta benchmark
 #
 # Contributed by Michael Schlenker

 foreach {IM IA IC last} {139968 3877 29573 42} break

 set alu GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGT
 append alu CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCC
 append alu GGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCC
 append alu GGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACT
 append alu CCGTCTCAAAAA

 set iub {a 0.27 c 0.12 g 0.12 t 0.27 B 0.02 D 0.02 H 0.02 K 0.02 M 0.02 N 0.02
     R 0.02 S 0.02 V 0.02 W 0.02 Y 0.02}

 set hsapiens {a 0.3029549426680 c 0.1979883004921 g 0.1975473066391
     t 0.3015094502008}

 proc make_gen_random {} {
     set params [list IM $::IM IA $::IA IC $::IC]
     set body [string map $params {
         expr {($max * [set ::last [expr {($::last * IA + IC) % IM}]]) / IM}}]
     proc gen_random {max} $body
 }

 proc make_cumulative {table} {
     set prob 0.0
     set pl [list]

     foreach {char p} $table {lappend pl [set prob [expr {$prob + $p}]] $char}
     return $pl
 }

 proc make_repeat_fasta {id desc src n} {
     foreach {width s e} {59 0 59} break

     puts ">$id $desc"
     set l [string length $src]
     set ls [string repeat $src [expr {([incr n -1] / $l)+1 }]]
     while {$e < $n} {
         puts [string range $ls $s $e]
         set s [incr e]
         incr e $width
     }
     puts [string range $ls $s $n]
 }

 proc make_random_fasta {id desc src n} {
     foreach {width line} {60 ""} break

     puts ">$id $desc"
     set prob [make_cumulative $src]
     for {set i 0} {$i < $n} {incr i} {
         set rand [gen_random 1.0]
         foreach {p c} $prob {
             if {$p > $rand} {
                 append line $c
                 break
             }
         }
         if {[string length $line] == $width} {
             puts $line
             set line ""
         }
     }
     if {[string length $line]} {puts $line}
 }

 proc main {n} {
     make_gen_random
     make_repeat_fasta ONE "Homo sapiens alu" $::alu [expr {$n*2}]
     make_random_fasta TWO "IUB ambiguity codes" $::iub [expr {$n*3}]
     make_random_fasta THREE "Homo sapiens frequency" $::hsapiens [expr {$n*5}]
 }

 set N [lindex $argv 0]
 if {$N < 1} {set N 1}
 main $N