#!/usr/bin/perl -w

#===============================================================================
# ARGV[0]:	any feature file (*.feature) or matched read file (*.profile) with 
# 			matchType ($refName."\.mis_".$maxMismatch.$strand, e.g., "human_miRNA.mis_0+")
# 			labeled on the first colomn
# Output:	feature or profile files divided by matchTypes
# Usage:	divideMatchType.pl all_sample.feature
# 			divideMatchType.pl all_sample_des.feature
#			divideMatchType.pl all_sample_anno.feature
# 			divideMatchType.pl all_sample_human_anno.feature
# 			divideMatchType.pl all_sample.profile
# 			divideMatchType.pl all_sample_des.profile
# 			divideMatchType.pl all_sample_anno.profil
# 			...
#===============================================================================

use strict;
use warnings;

my $inputFile = $ARGV[0];
my $divideOption = $ARGV[1];
if ( !$ARGV[1] ) {
	$divideOption = "-by-matchType";
}
my $prefix = $inputFile;
my $suffix = "div";
my @array = split(/\./, $inputFile);
if (scalar(@array) > 1) {
	$suffix = pop(@array);
	$prefix = join('.', @array);
}

# get input file with matchTypes labeled on the first colomn
# output divieded feature files (e.g., all_sample.human_miRNA.mis_0+.feature) or matched read files (*.profile)
open(In, "<", $inputFile) or die "   Can't open $inputFile !!!\n";
my $firstLine = <In>;
if ( $firstLine !~ /^matchType/ ) {
	die "   Wrong file format in $inputFile - no matchType info!!!\n";
}

my $matchType = "n/a";
while (<In>) {
	@array = split(/\t/, $_);
	# aggregate all *.mis_0+/-, *.mis_1+/-,and mis_2+/- matchType for each refDB together
	if ( $divideOption eq "\-by-refDB" ) {
		@array = split(/\.mis\_|\_sub\.mis\_/, $array[0]);
	}
	my $matchType_next = $array[0];
	$matchType_next =~ s/\_subSeq/\_repSeq/;
	$matchType_next =~ s/\_sub//;
	if ( $matchType_next ne $matchType ) {
		if ( $matchType ne "n/a" ) {
			close(Out);
		}
		$matchType = $matchType_next;
		my $outputFile = $prefix."\.".$matchType."\.".$suffix;
		print "=== Write divieded features for all samples to: $outputFile ===\n";
		if ( !-e $outputFile ) {
			open(Out, ">", $outputFile);
			print Out $firstLine;
		} else {
			open(Out, ">>", $outputFile);
		}
	}
	print Out $_;
}
close(Out);
