#!/usr/local/bin/perl -w

# launch SPL on all fasta sequence present in the directory given as parameter

# SPL server URL
$SPL = "http://genomic.sanger.ac.uk/cgi-bin/gf/gf1.pl";
$POST = "/usr/local/bin/POST";
$HTML2TXT = "/usr/local/bin/html2txt";

if (!-x $POST)
{
    die "need program $POST";
}
if (!-x $HTML2TXT)
{
    die "need program $HTML2TXT";
}

#------------------------------------------------------------

sub complement
{
    if ($_[0] eq "A") {return("T");}
    if ($_[0] eq "T") {return("A");}
    if ($_[0] eq "C") {return("G");}
    if ($_[0] eq "G") {return("C");}
    return($_[0]);
}

#------------------------------------------------------------


if ($#ARGV != 1)
{
    die "usage: param1 = directory where FASTA sequence files are; param2 = file containing a list of sequence name and length";
}

if (!-d $ARGV[0])
{
    die "$ARGV[0] is not a directory";
}

$fasta_dir = $ARGV[0];

if ($fasta_dir !~ /\/$/)
{
    $fasta_dir .= "/";
}

# retrieve fasta files
@fasta_files = glob "${fasta_dir}*.tfa";

if ($#fasta_files == -1)
{
    die "not FASTA files (*.tfa) in $fasta_dir";
}

if (!-f $ARGV[1])
{
    die "$ARGV[1] is not a file";
}

# read sequence name to be computed
open (F_IN, "$ARGV[1]") ||
    die "can't open $ARGV[1]";
while (<F_IN>)
{
    ($seq_id) = ($_ =~ /seq(\d+)\s+.+/);
    $seq_id = int $seq_id;
    $good_seq{$seq_id} = 1;
}
close F_IN;

# get PID for temp file
$temp_file = "$$.form_out";

# loop on FASTA files
for ($seq_no=0;$seq_no<=$#fasta_files;$seq_no++)
{
    # retrieve sequence number
    ($seq_id) = ($fasta_files[$seq_no] =~ /seq(\d+)/);
    $seq_id = int $seq_id;

    # check if this sequence has to be computed
    if (!exists $good_seq{$seq_id})
    {
	next;
    }

    print STDOUT "compute seq $seq_id\n";

    # read the sequence file
    if (!-r $fasta_files[$seq_no])
    {
	die "can't read file $fasta_files[$seq_no]";
    }

    open (F_IN, "$fasta_files[$seq_no]") ||
	die "can't open $fasta_files[$seq_no]";

    $seq = "";
    $first=0;
    while (<F_IN>)
    {
	if ($first == 0)
	{
	    $first=1;
	    next;
	}
	$_ =~ s/[\s\n\r\f]//g;
	$seq .= uc($_);
    }
    close F_IN;

    $seq_length = length($seq);

    # launch program for each strand
    for ($strand=1; $strand<3; $strand++)
    {
	print STDOUT "\tstrand $strand\n";

	# generate form output file
	open (F_OUT, ">$temp_file") ||
	    die "can't create temporary file $temp_file";
	    
	# sequence must contain bases ONLY, NO header
	print F_OUT  "seq_data=$seq";
	printf F_OUT "&seq_name=seq%02d_%d", $seq_id, $strand;
	print F_OUT "&org=a";
	print F_OUT "&program_name=spl";
	close F_OUT;

	# launch the program
	$cmd = sprintf "cat $temp_file|$POST -e $SPL | $HTML2TXT > seq%02dspl%d.txt", $seq_id, $strand;
	system "$cmd";

	# prepare for next strand and reverse the sequence
	$rev = "";
	for ($i = $seq_length - 1; $i >= 0; $i--)
	{
	    $rev .= &complement(substr($seq,$i,1));
	}
	$seq = $rev;
    }
}

unlink "$temp_file";
