#!/usr/bin/perl -w

#===============================================================================
# ARGV[0]: .profile file, too big for Excel, and need to be divided
# into batches about 600,000 lines or less each.
# e.g., divideProfile.pl all_sample.human_miRNA_precursor.mis_2.profile
#===============================================================================

use strict;
use warnings;

my $profile = $ARGV[0];
my $prefix = $profile;
my $suffix = "profile";
my @array = split(/\./, $profile);
if (scalar(@array) > 1) {
	$suffix = pop(@array);
	$prefix = join('.', @array);
}

my $limit1 = 60*(2**20); # MB
my $limit2 = 68*(2**20); # MB
my $partFile = "";
my $c = 0;
my $i = 1;
my $macthID = "";
my $newSign = 1;
open(IN, "<", $profile);
$partFile = "$prefix"."\($i)"."\.$suffix";
open(OUT, ">", $partFile);
print $partFile."\n";
my $firstLine = <IN>;
print OUT $firstLine;
while(<IN>){
	my @array = split(/\t/, $_);
    if ($macthID ne $array[3]) {
		$macthID = $array[3];
		$newSign = 1;
	} else {
		$newSign = 0;
	};
	if ( (($c > $limit1) and ($newSign == 1)) or ($c > $limit2) ) {
		$c = 0;
		close(OUT);
		$i++;
		$partFile = "$prefix"."\($i)"."\.$suffix";
		open(OUT, ">", $partFile);
		print $partFile."\n";
		print OUT $firstLine;
	}
    print OUT $_;
    $c = $c+length($_);
}
close(IN);
close(OUT);
