#!/usr/local/bin/perl

###############################################################################################
# Perl script
# Author C.Mathe 21/01/99
# this script extract data from a Netstart output and write them in a standart format
###############################################################################################

($dir,$name_script)= ($0 =~/(.+\/)*(.+\.pl)$/);
$need=$dir."util.pl";
require "$need";


#require "/home/camat/BIOCOMP/Perl/util.pl";

sub calcNetF{ #calculate the frame of the first exon
    local($first_base,$Strand,$LGtot)=@_;
    local($Frame);
    if ($Strand eq '+') 
    {
	$Frame = ($first_base - 1)%3 +1;}
    else {$Frame = ($LGtot-$first_base)%3 +1;}
    return($Frame);
}


sub readSTARTD{ #read Netstart output.
open(START,"<$_[0]") ||die "Unable to open $_[0]";
$nbS=0;
undef @Start_list;
while(<START>)
{
    if (($start[$nbS],$proba[$nbS])=($_=~ /\s+(\d+)\s+([01].\d{3})\s+Yes/))
    {push(@Start_list,$start[$nbS]);
     $strand[$nbS]=$str;
     $frame[$nbS]=&calcNetF($start[$nbS],$strand[$nbS],$Ltot);
    $nbS++;
 }
}
close(START);
}

sub readSTARTR{ # read NetStart output on reverse strand input.
open(START,"<$_[0]") ||die "Unable to open $_[0]";
while(<START>)
{
    if (($Revstart[$nbS],$proba[$nbS])=($_=~ /\s+(\d+)\s+([01].\d{3})\s+Yes/))
    {$start[$nbS]=&reverse($Revstart[$nbS]);
     push(@Start_list,$start[$nbS]);
     $strand[$nbS]=$str;
     $frame[$nbS]=&calcNetF($start[$nbS],$strand[$nbS],$Ltot);
     $nbS++;
     }
}
close(START);
}

    
sub readLS{
    open(LS,"< $liste") || die "Unable to open $liste.\n";
    $ST=&openST("start");
    while(<LS>){
	$count++;
	($seq,$Ltot)=split;              # read each file name and each corresponding sequence length
	if (($seq eq "")||($Ltot eq "")) #check if the list is correctly formatted
	{
	    &usage("Mzef");
	    die "!!! Incorrect input list at line $count!!!\n";}
	$fileD=$seq . "start1.txt";
	$fileR=$seq . "start2.txt";
	$str="+";
	&readSTARTD($fileD); #read direct strand file
	$str="-";
	&readSTARTR($fileR); #read reverse strand file
	&class_start(@Start_list);
	print ST "\n";}
close LS;	
}

sub createST{
     @data=&Start("NetStart");
     $type = "Init";
     if ($data[1]==0) { 
	 $liste=$data[0];
	 &readLS;
	 print "output file: $standart\n";
     }
     else 
     {
	 $Ltot=$data[1];
	 $fileD=$data[0];
	 @tmp=split('start1',$fileD);
	 $fileR=$tmp[0] . "start2.txt";
	 print "Take $fileR as output name on reverse strand.\n";
	 $seq=$tmp[0];
	 $file2 =$tmp[0] . "start12.txtST";
	 open(ST,">$file2");
	 print ST "Contig\tType\tStrand\tLend\tRend\tlength\tPhase\tFrame\tAcceptor\tDonor\tProba\n";
	 $str="+";
	 &readSTARTD($fileD);
	 $str="-";
	 &readSTARTR($fileR);
	 &class_start(@Start_list);
	 print "output file: $file2\n";
     }
     close ST;
}

&createST;



sub class_start { # routine to re-order start in ascending order, both strand results mixed. 
local(@S_list)=sort{$a<=>$b}@_;
    foreach $L (@S_list){
	for ($k=0;$k<$nbS;$k++)
	{
	    if ($L==$start[$k]) {
		if ($strand[$k] eq '+'){
		    $Lend=$start[$k];
		    $Rend="";
}
		else {$Rend=$start[$k];
		      $Lend="";
		  }
		$ph=$D=$A=$lg="";
		&EcrireST($seq,$type,$strand[$k],$Lend,$Rend,$lg,$ph,$frame[$k],$A,$D,$proba[$k]);
	    }
	}
    }
}









