#!/usr/bin/perl -w

#===============================================================================
# summarize macthed features (.feature)
# for all samples with or without sample order file
# if no sample order file provided, samples will be ordered by alphabet automatically.
# ARGV[0]:	sample order file, e.g. sampleOder,
# 	1	CCACTC_4
# 	2	AAGCTA_3
# 	3	AAGCTA_7
# Need:		"all_sample_matchCount.sum", generated by "sumDist.pl"
# Output:	matched feature file (all_sample.feature) for all samples
# Usage:	sumFeature.pl sampleOrder
# ------Note------
# features are ranked by matchOrder and then by alphabet on matchID
# matchOrder is an ordered array for matchType
# this version keep maxMismatch (0 - 2) and strand information in matchType
# matchType = refName + maxMismatch + strand, e.g., "human_miRNA.mis_0+"
#===============================================================================

use strict;
use warnings;
use Getopt::Long qw(GetOptions);

# get the project name from the command line, or use all_sample as the default
my $project_name = "all_sample";
GetOptions("project=s" => \$project_name);

my $sampleOrder = $ARGV[0];
my $dataDIR = "./";
my @sampleList = ();
my $sample;
my $suffix = "_Processed.feature";

# get sample order
my $i = 0;
if ( $sampleOrder ) {
	open(In, "<", $sampleOrder) or print "can't open sample order file - $sampleOrder\n";
	while (<In>) {
		chomp;
		# make sure to get rid of any kind of carriage return sign
		$_ =~ s/\r|\n//g;
		my @array = split(/\t/, $_);
		if ( scalar(@array) == 2 ) {
			$sampleList[$i] = $array[1];
			$i++;
		}
	}
	close(In);
} else {
	print "no sample order file input, will retrieve and order all samples automatically.\n";
	opendir my $dir, $dataDIR or die "Cannot open directory: $dataDIR";
	my @fileList = readdir $dir;
	closedir $dir;
	my @profileList = grep(/$suffix$/, @fileList);
	foreach my $profile (@profileList) {
		$sample = $profile;
		$sample =~ s/$suffix$//;
		$sampleList[$i] = $sample;
		$i++;
	}
	@sampleList = sort @sampleList;
}

# get match order (ordered by matchType, e.g., "human_miRNA.mis_0")
my @matchOrder = ();
my $inputFile = $project_name."_matchCount.sum";
open(In, "<", $inputFile) or die "$inputFile !!!";;
my $firstLine = <In>;
chomp($firstLine);
@matchOrder = split(/\t/, $firstLine);
shift @matchOrder;
pop @matchOrder;
close(In);

# get features for all samples
my %match = ();
my $matchType;
my $matchID;
my $readNum;
$i = 0;
foreach my $sample (@sampleList) {
	my $profile = $sample.$suffix;
	open(In, "<", $profile) or die "cannot open profile for sample: $profile";
	$i++;
	print "--- Read matched features for #$i sample: $sample\n";
	while (<In>) {
		chomp;
		my @array = split(/\t/, $_);
		$matchType = $array[0];
		$matchID = $array[1];
		$readNum = $array[2];
		if ( !$readNum ) {
			$readNum = 0;
		}
		if ( exists $match{$matchType}{$matchID}{$sample} ) {
			$match{$matchType}{$matchID}{$sample} += $readNum;
		}else{
			$match{$matchType}{$matchID}{$sample} = $readNum;
		}
	}
	close(In);
}

# output formated features for all samples
my $outputFile = $project_name.".feature";
print "=== Write formated features for all samples to: $outputFile\n";
open(Out, ">", $outputFile);
# generate table head
print Out "matchType\t"."matchID\t".join("\t", @sampleList)."\n";
foreach my $type (@matchOrder) {
	foreach my $strand ("\+", "\-") {
		$matchType = $type.$strand;
		my @allMatchID = keys % { $match{$matchType} };
		@allMatchID = sort @allMatchID;
		foreach $matchID (@allMatchID) {
			print Out $matchType."\t".$matchID;
			foreach $sample (@sampleList) {
				print Out "\t";
				if ( exists $match{$matchType}{$matchID}{$sample} ) {
					$readNum = $match{$matchType}{$matchID}{$sample};
					print Out $readNum;
				}
			}
			print Out "\n";
		}
	}
}
close(Out);
