#!/usr/local/bin/perl -w

# launch XGRAIL on all fasta sequence present in the directory given as parameter

# XGRAIL server URL
$XGRAIL = "http://compbio.ornl.gov/Grail-1.3-bin/RequestForm.DoPost";
$POST = "/usr/local/bin/POST";
$HTML2TXT = "/usr/local/bin/html2txt";

if (!-x $POST)
{
    die "need program $POST";
}
if (!-x $HTML2TXT)
{
    die "need program $HTML2TXT";
}

if ($#ARGV != 1)
{
    die "usage: param1 = directory where FASTA sequence files are; param2 = file containing a list of sequence name and length";
}

if (!-d $ARGV[0])
{
    die "$ARGV[0] is not a directory";
}

$fasta_dir = $ARGV[0];

if ($fasta_dir !~ /\/$/)
{
    $fasta_dir .= "/";
}

# retrieve fasta files
@fasta_files = glob "${fasta_dir}*.tfa";

if ($#fasta_files == -1)
{
    die "not FASTA files (*.tfa) in $fasta_dir";
}

if (!-f $ARGV[1])
{
    die "$ARGV[1] is not a file";
}

# read sequence name to be computed
open (F_IN, "$ARGV[1]") ||
    die "can't open $ARGV[1]";
while (<F_IN>)
{
    ($seq_id) = ($_ =~ /seq(\d+)\s+.+/);
    $seq_id = int $seq_id;
    $good_seq{$seq_id} = 1;
}
close F_IN;

# get PID for temp file
$temp_file = "$$.form_out";

# loop on FASTA files
for ($seq_no=0;$seq_no<=$#fasta_files;$seq_no++)
{
    # retrieve sequence number
    ($seq_id) = ($fasta_files[$seq_no] =~ /seq(\d+)/);
    $seq_id = int $seq_id;

    # check if this sequence has to be computed
    if (!exists $good_seq{$seq_id})
    {
	next;
    }

    print STDOUT "compute seq $seq_id\n";

    # read the sequence file
    if (!-r $fasta_files[$seq_no])
    {
	die "can't read file $fasta_files[$seq_no]";
    }

    open (F_IN, "$fasta_files[$seq_no]") ||
	die "can't open $fasta_files[$seq_no]";

    $seq = "";
    $first=0;
    while (<F_IN>)
    {
	if ($first == 0)
	{
	    $first=1;
	    next;
	}
	$_ =~ s/[\s\n\r\f]//g;
	$seq .= uc($_);
    }
    close F_IN;

    # generate form output file
    open (F_OUT, ">$temp_file") ||
	die "can't create temporary file $temp_file";

    print F_OUT  "organism=arabd";
    printf F_OUT "&grail2exons=on";
    print F_OUT "&grail2-clusters=on";
    print F_OUT "&grail2-translation=on";
    print F_OUT "&sequence=$seq";
    close F_OUT;
    
    # launch the program and reformat the output
    open (F_IN, "(cat $temp_file|$POST -e $XGRAIL)|") ||
	die "can't open pipe";
    $out_file = sprintf "seq%02dgrail.txt", $seq_id;
    open (F_OUT, ">$out_file") ||
	die "can't open result file $out_file";
    $copy=0;
    while (<F_IN>)
    {
	if (/<\/textarea/i)
	{
	    last;
	}
	if (/<textarea/i)
	{
	    $copy=1;
	    next;
	}
	if ($copy == 1)
	{
	    print F_OUT;
	}
	print STDOUT;
    }
    
    close F_IN;
    close F_OUT;
}

unlink "$temp_file";
