#!/usr/bin/perl -w

#===============================================================================
# Add description informtion to matchID for matched read sequence file (.profile)
# the description information derives from corresponding refDB (built by buildDescrition.pl)
# ARGV[0]: 	any matched read sequence file (*.profile) for single sample or all samples
# ARGV[1]:  path to the database config file
# Output: 	matched read sequence file (*_des.profile) with description information annotated 
# Usage:	desProfile.pl all_sample.profile
#===============================================================================

use strict;
use warnings;
use FindBin qw($Bin);
use lib "$Bin";
use ReadDBConfig;

my $profile = $ARGV[0];
my $dbConfigFile = $ARGV[1];
my %indexDes = ();
my $refName;
my $matchType;
my $readSeq;
my $readLength;
my $matchID;
my $offset;
my $matchNum;
my $description;

# test profile file first!!
if ( !-e $profile ) {
	die "   Can not open $profile !!!\n";
}

my %db_names = ReadDBConfig::readDBConfig($dbConfigFile);

# get all description index
foreach $refName (keys(%db_names)) {
	print "Loading description file for ".$refName." ...\n";
	my $index = $db_names{$refName};
	$index = $index.".description";
	open(In, "<", $index) or print "need to build description index file: $index\n";
	while (<In>) {
		chomp;
		my @head = split(/\t/, $_);
		if ( scalar(@head) == 2 ) {
			$matchID = $head[0];
			$description = $head[1];
			$indexDes{$refName}{$matchID} = $description;
		}
	}
	close(In);
}

open(In, "<", $profile) or die "Can not open $profile !!!\n";
my $outputFile = $profile;
$outputFile =~ s/\.profile/_des.profile/;
open(Out, ">", $outputFile);
print "Output described profile file: $outputFile\n";

# read and output head (first line)
my $firstLine = <In>;
my @array = split(/\t/, $firstLine);
$matchType = shift(@array);
$readSeq = shift(@array);
$readLength = shift(@array);
$matchID = shift(@array);
print Out $matchType."\t".$readSeq."\t".$readLength."\t".$matchID."\t"."description"."\t".join("\t", @array);

# read and output data
while (<In>) {
	chomp;
	@array = split(/\t/, $_);
	$matchType = shift(@array);
	$readSeq = shift(@array);
	$readLength = shift(@array);
	$matchID = shift(@array);
	my @head = split(/\.mis_/, $matchType);
	$refName = $head[0];
	$description = "n/a"; # annotated with 'n/a' - representing not avalable!!!
	if ( exists $indexDes{$refName}{$matchID} ) {
			$description = $indexDes{$refName}{$matchID};
	}
	# output matchType, readSeq, readLength, matchID, description, offset, matchNum, ...
	print Out $matchType."\t".$readSeq."\t".$readLength."\t".$matchID."\t".$description."\t".join("\t", @array)."\n";
}
close(In);
close(Out);
