#!/usr/bin/perl -w

# [2016/03/22]	Retrieve SNP on miRNA (precursor miRNA sequence area) using BioMart API
# An example script demonstrating the use of BioMart API.
# This perl API representation is only available for configuration versions >=  0.5
# Usage:		BioMart_SNP_miR_Retrival.pl > output.txt

# Installation of BioMart Perl API
# http://uswest.ensembl.org/info/data/biomart/biomart_perl_api.html
# To install CPAN in Centos, please use:
#	sudo yum install perl-CPAN
#	or
#	sudo yum -y install perl-CPAN
# To use BioMart API, must install the following modules:
#	sudo cpan -i XML::Simple
#	sudo cpan -i Log::Log4perl
#	sudo cpan -i Exception::Class
#	sudo cpan -i XML::DOM
#	sudo cpan -i ExtUtils::Config
#	sudo cpan -i ExtUtils::Helpers
#	sudo cpan -i ExtUtils::InstallPaths
#	sudo cpan -i Module::Build::Tiny
#	sudo cpan -i Readonly

use strict;
use warnings;

# Set this to path where you installed biomart-perl
use lib "/var/www/html/Tool/biomart-perl/lib/";
use BioMart::Initializer;
use BioMart::Query;
use BioMart::QueryRunner;

my $confFile = "/var/www/html/Tool/biomart-perl/conf/martURLLocation.xml"; # PATH TO YOUR REGISTRY FILE UNDER biomart-perl/conf/.
# For Biomart Central Registry navigate to http://www.biomart.org/biomart/martservice?type=registry";

#
# NB: change action to 'clean' if you wish to start a fresh configuration  
# and to 'cached' if you want to skip configuration step on subsequent runs from the same registry
#

# reference database for mapping
my $dataDIR = "/var/www/html/database/";

# read HGNC_miRBase_list
my @HGNC_symbol = ();
my @miRBase_id = ();
my $refDBFile = "$dataDIR/miRBase/HGNC_miRBase_list_20160323_curated.txt";
open(In, "<", $refDBFile) or die "can not open file: $refDBFile\n";
my $firstLine = <In>;
chomp $firstLine;
my @array = split(/\t/, $firstLine);
while (<In>) {
	chomp;
	@array = split(/\t/, $_);
	push @miRBase_id, $array[1];
	push @HGNC_symbol, $array[2];
}
close In;

# retrieve SNP_miR data batch by batch based on HGNC_miRBase_list
my $segment = 1;
my $lastNum = scalar(@HGNC_symbol) - 1;
my $count = 1;
my $i;
my $j;

my @segment_list = ();
my $outputFileNo;
my $segment_string;

my $action = 'clean';
my $initializer = BioMart::Initializer->new('registryFile'=>$confFile, 'action'=>$action);
my $registry = $initializer->getRegistry;

do {
	$i = ($count - 1) * $segment;
	$j = ($count * $segment) - 1;
	if ($j > $lastNum) {
		$j = $lastNum;
	}
#	@segment_list = @miRBase_id[$i..$j];
	@segment_list = @HGNC_symbol[$i..$j];
	$outputFileNo = sprintf("%04d", $i + 1)."\-".sprintf("%04d", $j + 1);
	print "\[".$outputFileNo."\]\ ".join("\,\ ", @segment_list)."\n";

	
	my $query = BioMart::Query->new('registry'=>$registry,'virtualSchemaName'=>'default');
		
	#	$query->setDataset("hsapiens_snp");		# [Ensembl Variation 84] Homo sapiens Short Variants (SNPs and indels excluding flagged variants) (GRCh38.p5)
		$query->setDataset("hsapiens_snp_som");	# [Ensembl Variation 84] Homo sapiens Somatic Short Variants (SNPs and indels excluding flagged variants) (GRCh38.p5)
	
		$query->addAttribute("refsnp_id");
		$query->addAttribute("refsnp_source");
		$query->addAttribute("chr_name");
		$query->addAttribute("chrom_start");
		$query->addAttribute("chrom_end");
		$query->addAttribute("allele");
		$query->addAttribute("consequence_allele_string");
		$query->addAttribute("consequence_type_tv");
		$query->addAttribute("ensembl_type");
		$query->addAttribute("ensembl_transcript_chrom_strand");
			
		$query->setDataset("hsapiens_gene_ensembl");	# [Ensembl Genes 84] Homo sapiens genes (GRCh38.p5)
#		$query->addFilter("mirbase_id", ["@segment_list"]);	# Input external references ID list [Max 500 advised]
		$query->addFilter("hgnc_symbol", [@segment_list]);	# Input external references ID list [Max 500 advised]
		$query->addFilter("biotype", ["miRNA"]);
		$query->addAttribute("hgnc_symbol");
		$query->addAttribute("mirbase_id");
	
		$query->formatter("TSV");
	
	my $query_runner = BioMart::QueryRunner->new();
	############################## GET COUNT ############################
	# Doesn't work at all
	# $query->count(1);
	# $query_runner->execute($query);
	# print $query_runner->getCount();
	#####################################################################
	
	
	############################## GET RESULTS ##########################
	# to obtain unique rows only
	$query_runner->uniqueRowsOnly(1);
	
	$query_runner->execute($query);
	$query_runner->printHeader();
	$query_runner->printResults();
	$query_runner->printFooter();
	#####################################################################


	$count++;
	
} while ((($count - 1) * $segment) <= $lastNum);
