#!/usr/local/bin/perl

###############################################################################################
# Perl script
# Author C.Mathe 19/10/98, last modified 23/02/99 to keep the frame from ORF.
# this script extract data from a fgenea output and write them in a standart format
###############################################################################################

($dir,$name_script)= ($0 =~/(.+\/)*(.+\.pl)$/);
$need=$dir."util.pl";
require "$need";

#require "/home/camat/BIOCOMP/Perl/util.pl";

sub readFGENED{ #read fgenea output on direct strand input.
open(FGD,"<$_[0]") ||die "Unable to open $_[0]";
$nbE=0;
undef @Lend_list;
while(<FGD>)
{	
    $type=$Lend=$Rend=$lg=$ph=$F=$A=$D=$proba="";
    $str= "+";
    if (($Lend[$nbE],$Rend[$nbE],$proba[$nbE],$orfL[$nbE],$orfR[$nbE])=($_ =~ /^\s+(\d+)\s\-\s+(\d+)\sw=\s+(\d{1,2}\.\d{2})\s\sORF:\s+(\d+)\s\-\s+(\d+)/))
    {
	push(@Lend_list,$Lend[$nbE]);
	$strand[$nbE]=$str;
	$frame[$nbE]=&calcF($orfL[$nbE],$orfR[$nbE],$strand[$nbE]);
	$nbE++;
    }
}
close FGD;
}

sub readFGENER{ #read fgenea output on reverse strand input
    open(FGR,"<$_[0]")||die "Unable to open @_[0]";
    while(<FGR>)
    {
	$type=$Lend=$Rend=$lg=$ph=$F=$A=$D=$proba="";
	$str="-";
	if (($Revbeg,$Revend,$proba[$nbE],$orfL[$nbE],$orfR[$nbE])=($_ =~ /^\s+(\d+)\s\-\s+(\d+)\sw=\s+(\d{1,2}\.\d{2})\s\sORF:\s+(\d+)\s\-\s+(\d+)/))

	{
	    $Lend[$nbE]=&reverse($Revend); # recalculate coordinate in 5'->3'  
	    $Rend[$nbE]=&reverse($Revbeg);
	    push(@Lend_list,$Lend[$nbE]);
	    $frame[$nbE]=&calcF($orfL[$nbE],$orfR[$nbE],'+');
	    $strand[$nbE]=$str;
	    $nbE++;
	}
    }

    close FGR;
}

sub readLS{
    open(LS,"< $liste") || die "Unable to open $liste.\n";
    $ST=&openST("fgene");
    while(<LS>){
	$count++;
	($seq,$Ltot)=split;              # read each GMhmm file name and each corresponding sequence length
	if (($seq eq "")||($Ltot eq "")) #check if the GMhmm_list is correctly formatted
	{
	    &usage("Fgenea");
	    die "!!! Incorrect input list at line $count!!!\n";}
	$fileD=$seq . "fgene1.txt";
	$fileR=$seq . "fgene2.txt";
	&readFGENED($fileD); 
	&readFGENER($fileR);
	&ordon(@Lend_list);
	print ST "\n";}
close LS;	
}

sub createST{
     @data=&Start("Fgenea");
if ($data[1]==0) { 
    $liste=$data[0];
    &readLS;
    print "output file: $standart\n";
}
else 
{
    $Ltot=$data[1];
    $fileD=$data[0];
    @tmp=split('fgene1',$fileD);
    $fileR=$tmp[0] . "fgene2.txt";
    print "Take $fileR as output name on reverse strand.\n";
    $seq=$tmp[0];
    $file2 =$tmp[0] . "fgene12.txtST";
    open(ST,">$file2");
    print ST "Contig\tType\tStrand\tLend\tRend\tlength\tPhase\tFrame\tAcceptor\tDonor\tScore\n";
    &readFGENED($fileD);
    &readFGENER($fileR);
    &ordon(@Lend_list); #mix results from strand + and -, and ordon them in ascending order.
    print "output file: $file2\n";
}
close ST;
}

&createST;
   
