n50_simple.pl

# Differences

This shows you the differences between two versions of the page.

 — n50_simple.pl [2019/11/12 15:17] (current)hyjeong created 2019/11/12 15:17 hyjeong created 2019/11/12 15:17 hyjeong created Line 1: Line 1: + #​!/​usr/​bin/​perl + # + # Source: + #    http://​genomics-array.blogspot.com/​2011/​02/​calculating-n50-of-contig-assembly-file.html + #    Corrected by Haeyoung Jeong + ## Read Fasta File and compute length ### + # + + my \$length; + my \$totalLength;​ + my @arr; + my \$num; + + while(<>​){ + ​chomp;​ + ​if(/​^>​([^\s]+)/​){ + ​\$num++;​ + push (@arr, \$length); + ​\$totalLength += \$length; + ​\$length=0;​ + } else { + s/\s//g; + s/\t//g; + \$length += length(\$_);​ + } + } + push (@arr, \$length); # for the last contig + \$totalLength += \$length; + + close(FH);​ + + my @sort = sort {\$b <=> \$a} @arr; + my \$n50; + #print "Total \$num contigs (\$totalLength)\n";​ + foreach my \$val(@sort){ + ​\$n50+=\$val;​ + if(\$n50 >= \$totalLength/​2){ + #         print "N50 contig length is \$n50 and N50 value is: \$val\n";​ + ​\$n50val = \$val; + last; + } + } + #print "​Average contig length is ", \$totalLength / \$num, "​\n";​ + #print "Max. contig length is ", \$sort, "​\n";​ + + \$avg = sprintf("​%d",​ \$totalLength / \$num); + + # number, total length, max, N50, average length + print join " ", \$num, \$totalLength,​ \$sort, \$n50val, \$avg, " (count, total, max, n50, average)\n";​ 