n50_simple.pl

# Differences

This shows you the differences between two versions of the page.

 — n50_simple.pl [2019/11/12 15:17] (current)hyjeong created 2019/11/12 15:17 hyjeong created 2019/11/12 15:17 hyjeong created Line 1: Line 1: + #!/usr/bin/perl + # + # Source: + #    http://genomics-array.blogspot.com/2011/02/calculating-n50-of-contig-assembly-file.html + #    Corrected by Haeyoung Jeong + ## Read Fasta File and compute length ### + # + + my \$length; + my \$totalLength; + my @arr; + my \$num; + + while(<>){ + chomp; + if(/^>([^\s]+)/){ + \$num++; + push (@arr, \$length); + \$totalLength += \$length; + \$length=0; + } else { + s/\s//g; + s/\t//g; + \$length += length(\$_); + } + } + push (@arr, \$length); # for the last contig + \$totalLength += \$length; + + close(FH); + + my @sort = sort {\$b <=> \$a} @arr; + my \$n50; + #print "Total \$num contigs (\$totalLength)\n"; + foreach my \$val(@sort){ + \$n50+=\$val; + if(\$n50 >= \$totalLength/2){ + #         print "N50 contig length is \$n50 and N50 value is: \$val\n"; + \$n50val = \$val; + last; + } + } + #print "Average contig length is ", \$totalLength / \$num, "\n"; + #print "Max. contig length is ", \$sort, "\n"; + + \$avg = sprintf("%d", \$totalLength / \$num); + + # number, total length, max, N50, average length + print join " ", \$num, \$totalLength, \$sort, \$n50val, \$avg, " (count, total, max, n50, average)\n"; 