#!/usr/bin/perl -w

#===============================================================================
# add common name for taxInfo in annotated .feature or .profile
# extracted from the file -  taxdump.tar.gz downloaded from:
# ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/
#===============================================================================

use strict;
use warnings;

my $feature = $ARGV[0];
# reference database folder, also for taxonomy file
my $dataDIR = "/var/www/html/database";


# test if input feature file exists
if ( !-e $feature ) {
	die "   Can not open $feature !!!\n";
}
# test if input feature file has annotation information (both description and taxonomy information)
if ( ($feature !~ /\_anno/) or ($feature !~ /\.feature$/) ) {
	print "   This is NOT a feature file with annotation information!!!\n";
	print "   Please use desFeature.pl and taxFeature.pl to annotate a feature (.feature) file first!\n\n";
	exit;
}

# get both scientific name and common name by taxID
my %taxIDByName = ();
my %gbName = ();
open(In, "$dataDIR/NCBI/names.dmp") or die "can't open file - names.dmp";
print "Loading taxonomy name (for both scientific name and common name) file ...\n";
while (<In>)	{
	chomp;
	my @array = split(/\s*\|\s*/, $_);
	my $tax_id = $array[0];
	my $name = $array[1];
	if ( /scientific name/ ) {
		$taxIDByName{$name} = $tax_id;
	} elsif ( /genbank common name/ ) {
		$gbName{$tax_id} = $name;
	} elsif ( /common name/ ) {
		if ( exists $gbName{$tax_id} ) {
			if ( length($name) < length($gbName{$tax_id}) ) {
				$gbName{$tax_id} = $name;
			}
		} else {
			$gbName{$tax_id} = $name;
		}
	} 
}
close(In);

# read taxInfo and add genbank common name
open(In, "<", $feature) or die "Can't open $feature !!!\n";
my $outputFile = "tmp.feature";
open(Out, ">", $outputFile);
print "read and add taxonomy infomation ...\n";
my $firstLine = <In>;
print Out $firstLine;
while (<In>) {
	chomp;
	my @array = split(/\t/, $_);
	my $taxInfo = $array[2];
	my @taxAnno = split(/\;/, $taxInfo);
	my $name = "n/a";
	if ( $taxAnno[7] ) {
		$name = $taxAnno[7];
	}
	my $tax_id = "";
	if ( exists $taxIDByName{$name} ) {
		$tax_id = $taxIDByName{$name};
		if ( exists $gbName{$tax_id} ) {
			$name = $gbName{$tax_id};
		}
	}
	$taxInfo = $taxInfo."\;".$name;
	$array[2] = $taxInfo;
	print Out join("\t", @array)."\n";
}	
close(In);
close(Out);

system("mv $outputFile $feature");

