#!/usr/local/bin/perl

###############################################################################################
# Perl script
# Author C.Mathe 21/01/99, last modified 18/02/99 to keep frame with best probability
# this script extract data from a Mzef output and write them in a standart format
###############################################################################################


($dir,$name_script)= ($0 =~/(.+\/)*(.+\.pl)$/);
$need=$dir."util.pl";
require "$need";

#require "/home/camat/BIOCOMP/Perl/util.pl";

sub readMZEF{ #read Mzef output.
open(MZEF,"<$_[0]") ||die "Unable to open $_[0]";

while(<MZEF>)
{
    if (($Lend[$nbE],$Rend[$nbE],$Ppost[$nbE],$Pfr[1],$Pfr[2],$Pfr[3])=
       ($_ =~ /^\s(\d+)\-(\d+)\t\s([01].\d+)\t\s([01].\d+)\t\s([01].\d+)\t\s([01].\d+)\t\s/)) # read line containing exon coordinates and proba
    {push(@Lend_list,$Lend[$nbE]);
     $frame[$nbE]=$Pfr[1]>=$Pfr[2] ? 1 : 2; #compare probability for each frame and keep the frame with highest probability.
     if ($frame[$nbE]==1) {
	 $frame[$nbE]=$Pfr[1]>$Pfr[3]?1:3;}
     else {$frame[$nbE]=$Pfr[2]>$Pfr[3]?2:3;}
     $proba[$nbE]=$Pfr[$frame[$nbE]];
     $strand[$nbE]=$str;
     $type[$nbE]='Intr';
     $nbE++;
 }
}
close(MZEF);
}

    
sub readLS{
    open(LS,"< $liste") || die "Unable to open $liste.\n";
    $ST=&openST("mzef");
    while(<LS>){
	$count++;
	($seq,$Ltot)=split;              # read each file name and each corresponding sequence length
	if (($seq eq "")||($Ltot eq "")) #check if the list is correctly formatted
	{
	    &usage("Mzef");
	    die "!!! Incorrect input list at line $count!!!\n";}
	$fileD=$seq . "mzef1.txt";
	$fileR=$seq . "mzef2.txt";
	$str="+";
	$nbE=0;
	undef @Lend_list;
	&readMZEF($fileD); #read direct strand file
	$str="-";
	&readMZEF($fileR); #read reverse strand file
	&ordon(@Lend_list);
	print ST "\n";}
close LS;	
}

sub createST{
     @data=&Start("Mzef");
     if ($data[1]==0) { 
	 $liste=$data[0];
	 &readLS;
	 print "output file: $standart\n";
     }
     else 
     {
	 $nbE=0;
	 undef @Lend_list;
	 $Ltot=$data[1];
	 $fileD=$data[0];
	 @tmp=split('mzef1',$fileD);
	 $fileR=$tmp[0] . "mzef2.txt";
	 print "Take $fileR as output name on reverse strand.\n";
	 $seq=$tmp[0];
	 $file2 =$tmp[0] . "mzef12.txtST";
	 open(ST,">$file2");
	 print ST "Contig\tType\tStrand\tLend\tRend\tlength\tPhase\tFrame\tAcceptor\tDonor\tProba\n";
	 $str="+";
	 &readMZEF($fileD);
	 $str="-";
	 &readMZEF($fileR);
	 &ordon(@Lend_list);
	 print "output file: $file2\n";
     }
     close ST;
}

&createST;


