#!/usr/bin/perl -w

#===============================================================================
# extract features according to annotation (description or taxonomy) information 
#
# ARGV[0]:	feature file (*_anno.feature) with both descrition and taxonomy information annotated
# ARGV[1]:	specific filter defined by a gene/word list
# Output:	feature file (*_anno.feature) filtered by annotation
# Usage:	extractFeature.pl all_sample_human_anno.feature brain
#===============================================================================

use strict;
use warnings;

my $feature = $ARGV[0];
my $filter = $ARGV[1];
my @wordList = ();
my $prefix = $feature;
my $suffix = "\.feature";
$prefix =~ s/$suffix$//;
my $fileName = $prefix."\(".$filter."\)".$suffix;

# test if input feature file exists
if ( !-e $feature ) {
	die "   Can not open $feature !!!\n";
}
# test if input feature file has annotation information (both description and taxonomy information)
if ( $feature !~ /\_anno.*\.feature/ ) {
	print "   This is NOT a feature file (*_anno.feature) with annotation information!!!\n";
	print "   Please use desFeature.pl and taxFeature.pl to annotate a feature (.feature) file first!\n\n";
	exit;
}

# get specific filter defined by a gene/word list
open(In, "<", $filter) or die "Can not open $filter !!!";
while (<In>) {
	chomp;
	push @wordList, $_;
}
close(In);

# filter features according to annotation (description or taxonomy) information
open(In, "<", $feature) or die "Can not open $feature !!!";
open(Out, ">", $fileName);
print $fileName."\n";
# Read and write head/first line
my $firstLine = <In>;
print Out $firstLine;
my @array = split(/\t/, $firstLine);
my $sampleSize = scalar(@array) - 4;
my $matchType;
my $matchID;
my $taxonomy;
my $description;
my @total = ();
while (<In>) {
	chomp;
	@array = split(/\t/, $_);
	$matchType = shift @array;
	$matchID = shift @array;
	$taxonomy = shift @array;
	$description = shift @array;
	my $sign = 0;
	foreach my $word (@wordList) {
		if ( $description =~ /$word/ ) {
			$sign = 1;
		}
	}
	if ( $sign == 1 ) {
		print Out $filter."\t".$matchID."\t".$taxonomy."\t".$description."\t".join("\t", @array)."\n";
		for my $i (0..($sampleSize - 1)) {
			if ( $array[$i] ) {
				$total[$i] += $array[$i];
			} else {
				$total[$i] += 0;
			}
		}
	}
}
print Out $filter."\t"."total\t".$taxonomy."\t".$filter."\t".join("\t", @total)."\n";
close(In);
close(Out);
