User Tools

Site Tools


Sidebar

This is the sidebar. Without it, the main text is too wide!


2019년 11월 교육 자료

cogclass2018.pl

This is an old revision of the document!


COGclass2018.pl

written by Haeyoung Jeong.

  #!/usr/bin/perl
# $ARGV[0]: GenQuery.COG.csv.bestHit;

$fun = '/data/Utilities/DB/COG/COG2014/fun2003-2014.tab';
$name = '/data/Utilities/DB/COG/COG2014/cognames2003-2014.tab';

open FUN, $fun;
while (<FUN>) {
    chomp;
    next if /^#/;
    my @temp = split /\t/, $_;
    $code2fun{$temp[0]} = $temp[1];
    push @oneLetter, $temp[0];
}
close FUN,

open NAME, $name;
while (<NAME>) {
    chomp;
    next if /^#/;
    my @temp = split /\t/, $_;
    if (length($temp[1]) > 1) {
        my @tmp = split //, $temp[1];
        $item = $tmp[ rand @tmp ];
        $cog2code{$temp[0]} = $item;
        print STDERR "$temp[0] $temp[1] (random selection) ===> $item\n";
    } else {
        $cog2code{$temp[0]} = $temp[1];
    }
    $cog2name{$temp[0]} = $temp[2];
}

open COG, $ARGV[0]; # GenQuery.COG.csv.bestHit
while (<COG>) {
    chomp;
    my @temp = split /,/, $_;
    if ($temp[5] == -1) {
        $notAssigned++;
        $num++;
    
    } else {
        $codeNum{$cog2code{$temp[5]}}++;
        $num++;
    }
    print $_, ',', $cog2code{$temp[5]}, ',' . $cog2name{$temp[5]} . "\n"; 
    $lines++;
} 

print "#--------------------------------------------\n";
foreach (@oneLetter) {
    $codeNum{$_} = 0 unless exists $codeNum{$_};
    print join "\t", '#', $code2fun{$_}, $codeNum{$_} . "\n";
}
print join "\t", '#', 'Not assigned', $notAssigned ."\n";
print "#--------------------------------------------\n";
print "#total: $num (line numbers of $ARGV[0] file: $lines)\n";
cogclass2018.pl.1525047861.txt.gz · Last modified: 2018/04/30 09:24 by hyjeong