#!/usr/bin/perl -w
# Options:
# --config CONFIG_FILE   read options from the config file
# --kit - can be NEB, Illumina, or 4N
# --barcode - path to the sampleBarcode file (optional)
# --gzip or -z - .fastq.gz input files
# --stp - use stop oligo
# user can also provide their own adapters
# -3p   3 prime adapter
# -5p   5 prime adapter

use strict;
use warnings;
use Getopt::Long qw(GetOptions);
use File::Basename;
use FindBin qw($Bin); # $Bin is now the directory where the script is
use lib "$Bin/Tools"; # add sRNAnalyzer/Tools to the loading module loading path
use YAML::Tiny; # load the YAML::Tiny module

my $gzip;
my $stop_oligo;
my $kit;
my $barcode_file;
my $user_3p;
my $user_5p;
my $config_file;

# get the command line options
GetOptions("gzip|z" => \$gzip, "stp" => \$stop_oligo,
           "kit|k=s" => \$kit, "barcode=s" => \$barcode_file,
           "3p=s" => \$user_3p, "5p=s" => \$user_5p,
           "config=s" => \$config_file);

# map from the command line kit option to the corresponding preprocessing perl script
#my %kit_map = (
#    "NEB" => "preprocess_SE.pl",
#    "Illumina" => "preprocess_SEi.pl",
#    "4N" => "preprocess_SE4N.pl"
#);

# the preprocessing script to run
my $preprocess_script = $Bin."/selfDev/preprocess_SE.pl";

# map from kit names to their adapter sequences [3p, 5p]
my %kit_map = (
    "NEB" => ["AGATCGGAAGAGCACACGTCT", "GTTCAGAGTTCTACAGTCCGACGATC"],
    "Illumina" => ["TGGAATTCTCGGGTGCCAAG", "GTTCAGAGTTCTACAGTCCGACGATC"],
    "Bioo" => ["NNNNTGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC", "GTTCAGAGTTCTACAGTCCGACGATC"]
);

my %kit_map_empty = (
    "NEB" => ["AGATCGGAAGAGCACACG", "AGAGTTCTACAGTCCGA"],
    "Illumina" => ["TGGAATTCTCGGGTGCC", "AGAGTTCTACAGTCCGA"],
    "Bioo" => ["", "AGAGTTCTACAGTCCGAC"]
);

# the flags start out empty as the default
my $z_flag = "";
my $stop_flag = "";
my $barcode_flag = "";
my $min_len_flag = "--min-length 15";
my $adapter_3p;
my $adapter_5p;
my $kit_option;

# if config file specified, ignore the other flag and just use the config file
if ($config_file) {
    my $yaml = YAML::Tiny->read($config_file);
    my $config = $yaml->[0];
    my $prep_conf = $config->{preprocess};
    
    # get the config for the gzip flag
    my $gzip = $prep_conf->{gzip};
    if ($gzip) {
        if ($gzip eq "true") {
            $z_flag = "-z";
        }
    }
    
    # get the config for the stop oligo flag
    my $stop_oligo_config = $prep_conf->{"stop-oligo"};
    if ($stop_oligo_config) {
        if ($stop_oligo_config eq "true") {
            $stop_flag = "--stp";
        }
    }
    
    # get the min-length attribute
    my $min_len_config = $prep_conf->{"min-length"};
    if ($min_len_config) {
        if ($min_len_config =~ /^\d+$/) { # check to make sure it is a positive integer
            $min_len_flag = "--min-length $min_len_config";
        }
    }
    
    # get the barcode from the config file if there is one
    my $barcode_config = $prep_conf->{barcode};
    if ($barcode_config) {
        $barcode_flag = "--barcode $barcode_config";
    }
    
    # get adapter sequences from the config file if they exist
    $adapter_3p = $prep_conf->{"adapter-3p"};
    $adapter_5p = $prep_conf->{"adapter-5p"};
    $kit_option = $prep_conf->{kit};
}
else {
    # no config file specified, so use the command line arguments
    # for the options
    
    # set the z flag 
    if ($gzip) { $z_flag = "-z"; }
    # set the stop oligo flag
    if ($stop_oligo) { $stop_flag = "--stp"; }
    # set the --barcode flag
    if ($barcode_file) {
        $barcode_flag = "--barcode $barcode_file";
    }
    else {
        $barcode_flag = "";
    }
    
    $adapter_3p = $user_3p;
    $adapter_5p = $user_5p;
    $kit_option = $kit;
    
    if (!$kit and !$user_3p and !$user_5p) {
        print "You must specify a kit.\n";
        print "Usage: $0 [options] --kit <kit>   or\n";
        print "Usage: $0 [options] --3p <3p-adapter> --5p <5p-adapter>   or\n";
        print "Usage: $0 --config <config-file>.\n\n";
        print "Main arguments:\n";
        print "<kit> - Can be NEB, Illumina, or Bioo, corresponding to the\n";
        print "sRNA library construction kits";
        print "<3p-adpater> and <5p-adapter> - the 3 prime and 5p adapter sequences\n";
        print "<config-file> - path to the pipeline configuration file\n\n";
        print "Options:\n";
        print "  --gzip or -g      read in .fastq.gz files\n";
        print "  --stp             use stop oligo\n";
        print "  --barcode FILE    specify a FILE which contains a sample barcode\n";
        exit;
    }
}

if ($adapter_3p and $adapter_5p) {
        # the user specified their own adapters
        my $adapter_flags = "--3p $adapter_3p --5p $adapter_5p";
        my $empty_flags = "--empty3p $adapter_3p --empty5p $adapter_5p";
        my $command = "$preprocess_script $z_flag $stop_flag $barcode_flag $adapter_flags $empty_flags $min_len_flag";
        system($command);
        exit;
    }
else {  
    # if no adapter sequences, look for the kit specified
    if ($kit_option) {
        if (exists($kit_map{$kit_option})) {
            # if it's a valid kit name, select the correct adpater sequences and run the command
            $adapter_3p = $kit_map{$kit_option}[0];
            $adapter_5p = $kit_map{$kit_option}[1];
            my $emptyIn_3 = $kit_map_empty{$kit_option}[0];
            my $emptyIn_5 = $kit_map_empty{$kit_option}[1];
            
            # set the flags for the empty read trimming settings
            my $emptyIn_3_flag = "";
            my $emptyIn_5_flag = "";
            if ($emptyIn_3) { $emptyIn_3_flag = "--empty3p $emptyIn_3"; }
            if ($emptyIn_5) { $emptyIn_5_flag = "--empty5p $emptyIn_5"; }
            
            my $command = "$preprocess_script $z_flag $stop_flag $barcode_flag --3p $adapter_3p --5p $adapter_5p $emptyIn_3_flag $emptyIn_5_flag $min_len_flag";
            system($command);
            exit;
        }
    }
    else {
        print "Must specify a valid kit (NEB, Illumina, Bioo) or provide 3p and 5' adapters\n";
    }
}
