#!/usr/bin/perl -w

#===============================================================================
# add description information to features (matchType, matchID, description, readNum)
# the description information derives from corresponding refDB (built by buildDescrition.pl)
# ARGV[0]: 	any feature file (*.feature) for single sample or all samples
# Output: 	feature file (*_des.feature) with description information annotated 
# Usage:	desFeature.pl all_sample.feature
#===============================================================================

use strict;
use warnings;

my $feature = $ARGV[0];
my %indexDes = ();
my $refName;
my $matchType;
my $matchID;
my $description;

# test feature file first!!
if ( !-e $feature ) {
	die "   Can not open $feature !!!\n";
}

# reference database for mapping, also for description file
my $dataDIR = "/var/www/html/database/bowtie";

my @refDB = qw(
	bacteria_DNA_Enterococcus_faecium
	bacteria_DNA_Haemophilus_influenzae
	bacteria_DNA_Klebsiella_oxytoca
	bacteria_DNA_Klebsiella_pneumoniae
	bacteria_DNA_Pseudomonas_aeruginosa
	bacteria_DNA_Serratia_liquefaciens
	bacteria_DNA_Serratia_marcescens
	bacteria_DNA_Staphylococcus_aureus
	bacteria_DNA_Stenotrophomonas_maltophilia
	bacteria_DNA_Streptococcus_pneumoniae
);

# get all description index
foreach $refName (@refDB) {
	print "Loading description file for ".$refName." ...\n";
	my $index = getBowtieIndex($refName);
	$index = $index.".description";
	open(In, "<", $index) or print "need to build description index file: $index\n";
	while (<In>) {
		chomp;
		my @head = split(/\t/, $_);
		if ( scalar(@head) == 2 ) {
			$matchID = $head[0];
			$description = $head[1];
			$indexDes{$refName}{$matchID} = $description;
		}
	}
	close(In);
}

open(In, "<", $feature) or die "Can not open $feature !!!\n";
my $outputFile = $feature;
$outputFile =~ s/\.feature/_des.feature/;
open(Out, ">", $outputFile);
print "Output described feature file: $outputFile\n";

# read and output head (first line)
my $firstLine = <In>;
my @array = split(/\t/, $firstLine);
$matchType = shift(@array);
$matchID = shift(@array);
print Out $matchType."\t".$matchID."\t"."description"."\t".join("\t", @array);

# read and output data
while (<In>) {
	chomp;
	@array = split(/\t/, $_);
	$matchType = shift(@array);
	$matchID = shift(@array);
	my @head = split(/\.mis_/, $matchType);
	$refName = $head[0];
	$description = "n/a"; # annotated with 'n/a' - representing not avalable!!!
	if ( exists $indexDes{$refName}{$matchID} ) {
			$description = $indexDes{$refName}{$matchID};
	}
	# output matchType, matchID, description, ...
	print Out $matchType."\t".$matchID."\t".$description."\t".join("\t", @array)."\n";
}
close(In);
close(Out);

#================================ sub functions ================================
# get bowtie index for each reference database 
sub getBowtieIndex {
	my ($refName) = @_;
	
	my $subDIR = "";
	# human small RNA
	if ( $refName eq "bacteria_DNA_Enterococcus_faecium" ) {
		$subDIR = "/BacteriaSpec/all.dna_Enterococcus_faecium";
	} elsif ( $refName eq "bacteria_DNA_Haemophilus_influenzae" ) {
		$subDIR = "/BacteriaSpec/all.dna_Haemophilus_influenzae";
	} elsif ( $refName eq "bacteria_DNA_Klebsiella_oxytoca" ) {
		$subDIR = "/BacteriaSpec/all.dna_Klebsiella_oxytoca";
	} elsif ( $refName eq "bacteria_DNA_Klebsiella_pneumoniae" ) {
		$subDIR = "/BacteriaSpec/all.dna_Klebsiella_pneumoniae";
	} elsif ( $refName eq "bacteria_DNA_Pseudomonas_aeruginosa" ) {
		$subDIR = "/BacteriaSpec/all.dna_Pseudomonas_aeruginosa";
	} elsif ( $refName eq "bacteria_DNA_Serratia_liquefaciens" ) {
		$subDIR = "/BacteriaSpec/all.dna_Serratia_liquefaciens";
	} elsif ( $refName eq "bacteria_DNA_Serratia_marcescens" ) {
		$subDIR = "/BacteriaSpec/all.dna_Serratia_marcescens";
	} elsif ( $refName eq "bacteria_DNA_Staphylococcus_aureus" ) {
		$subDIR = "/BacteriaSpec/all.dna_Staphylococcus_aureus";
	} elsif ( $refName eq "bacteria_DNA_Stenotrophomonas_maltophilia" ) {
		$subDIR = "/BacteriaSpec/all.dna_Stenotrophomonas_maltophilia";
	} elsif ( $refName eq "bacteria_DNA_Streptococcus_pneumoniae" ) {
		$subDIR = "/BacteriaSpec/all.dna_Streptococcus_pneumoniae";
	} elsif ( $refName eq "nt_virus_rtRNA" ) {
		$subDIR = "/NCBI/nt_virus_rtRNA";
	} elsif ( $refName eq "nt_bacteria_rtRNA" ) {
		$subDIR = "/NCBI/nt_bacteria_rtRNA";
	} elsif ( $refName eq "nt_fungi_rtRNA" ) {
		$subDIR = "/NCBI/nt_fungi_rtRNA";
	} elsif ( $refName eq "nt_plant_rtRNA" ) {
		$subDIR = "/NCBI/nt_plant_rtRNA";
	} elsif ( $refName eq "nt_human_rtRNA" ) {
		$subDIR = "/NCBI/nt_human_rtRNA";
	} elsif ( $refName eq "nt_mouse_rtRNA" ) {
		$subDIR = "/NCBI/nt_mouse_rtRNA";
	} elsif ( $refName eq "nt_mammal_rtRNA" ) {
		$subDIR = "/NCBI/nt_mammal_rtRNA";
	} elsif ( $refName eq "nt_chordata_rtRNA" ) {
		$subDIR = "/NCBI/nt_chordata_rtRNA";
	} elsif ( $refName eq "nt_bug_rtRNA" ) {
		$subDIR = "/NCBI/nt_arthropod_rtRNA";
	} elsif ( $refName eq "nt_worm_rtRNA" ) {
		$subDIR = "/NCBI/nt_nematode_rtRNA";
	} elsif ( $refName eq "nt_other_rtRNA" ) {
		$subDIR = "/NCBI/nt_other_rtRNA";
	}

	if ( $subDIR eq "" ) {
		return "";
	} else {
		return $dataDIR.$subDIR;
	}
}