#!/usr/bin/perl -w

#===============================================================================
# test taxonomy information (especially NCBI taxonomy ID) in organisms.txt downloaded from miRBase at:
# http://www.mirbase.org/
#===============================================================================

use strict;
use warnings;

my %taxName = ();
my %taxInfoByID = ();
my %taxInfoByName = ();
my %taxIDByName = ();
my %org_taxid = ();
my $taxonomy;

# reference database folder, also for taxonomy file
my $dataDIR = "/var/www/html/database";

# get taxonomy index and information in the file - "taxonomyAnno", built by buildTaxonamy.pl
open(In, "<", "$dataDIR/NCBI/taxonomyAnno") or die "can not open file: taxonomyAnno\n";
print "Loading taxonomy file ...\n";
my $firstLine = <In>;
while (<In>) {
	chomp;
	my @array = split(/\t/, $_);
	my $taxid = $array[0];
	my $name = $array[1];
	my $info = $array[2];
	$taxName{$taxid} = $name;
	$taxInfoByID{$taxid} = $info;
	$taxInfoByName{$name} = $info;
	$taxIDByName{$name} = $taxid;
}	
close(In);

# get miRBase organism taxid in "organisms.txt" downloaded from ftp://mirbase.org/pub/mirbase/CURRENT/
my $inputFile = "$dataDIR/miRBase/organisms.txt";
open(In, "<", $inputFile) or die "can not open file: $inputFile\n";
print "Loading miRBase organism taxid mapping file ...\n";
$firstLine = <In>;
chomp($firstLine);
open(Out, ">", "organisms_check_list.txt");
print Out $firstLine."\t"."#NCBI_taxName"."\t"."#NCBI_taxInfo_by_taxID"."\t"."#NCBI_taxID_by_name"."\t"."#NCBI_taxName_by_name"."\t"."#NCBI_taxInfo_by_name"."\n";
while (<In>) {
	chomp;
	my $line = $_;
	my @array = split(/\t/, $_);
	my $organism = $array[0];
	my $name = $array[2];
	my $taxid = $array[4];
	$org_taxid{$organism} = $taxid;
	print Out $line;
	
	if ( exists $taxName{$taxid} ) {
		print Out "\t".$taxName{$taxid};
	} else {
		print Out "\t"."n/a";
	}
	
	if ( exists $taxInfoByID{$taxid} ) {
		print Out "\t".$taxInfoByID{$taxid};
	} else {
		print Out "\t"."n/a";
	}
	
	my $taxIDfromName = "n/a";
	if ( exists $taxIDByName{$name} ) {
		$taxIDfromName = $taxIDByName{$name};
	}
	print Out "\t".$taxIDfromName;
	
	if ( exists $taxName{$taxIDfromName} ) {
		print Out "\t".$taxName{$taxIDfromName};
	} else {
		print Out "\t"."n/a";
	}
	
	if ( exists $taxInfoByID{$taxIDfromName} ) {
		print Out "\t".$taxInfoByID{$taxIDfromName};
	} else {
		print Out "\t"."n/a";
	}
	print Out "\n";
}
close(In);
close(Out);
