#!/usr/local/bin/perl

###############################################################################
# Perl script
# Author C.Mathe 09/10/98,modified 21/10/98
# this script extract data from a NetPlantGene output file and write them in a standard file format
# file of results : dataset_NPGST
###############################################################################

($dir,$name_script)= ($0 =~/(.+\/)*(.+\.pl)$/);
$need=$dir."util.pl";
require "$need";

#require "/home/camat/BIOCOMP/Perl/util.pl";


sub readNPG{   
    open(NPG,"< @_") || die "Unable to open @_.\n";
                      #open each NetPlantGene output, one after the other
    $splice=0;
    while(<NPG>)
	{
	    $lg=$F=$type=$ph=$Rend=$Lend=$D=$A=$proba="";
	    @word=split;

	    if ($splice==0){
		if ($word[0] eq 'Donor'){$splice=1;}
		if ($word[0] eq 'Acceptor'){$splice=2;}
	    }
	    
	    if ($splice==1){
		if ($word[1] eq '+') {
		    ($D,$str,$proba,$motif)=@word;
		    ($Rend,$Lend,$A)=&calcEnd($D,$str);
		    &EcrireST($seq,$type,$str,$Lend,$Rend,$lg,$ph,$F,$A,$D,$proba);		    
		}		
		if ($word[2] eq '-') {
		    ($D,$R,$str,$proba,$motif)=@word;
		    $D =$D + 1; #correction of the donor position. NPG gave (position -1) on reverse s
		    ($Rend,$Lend,$A)=&calcEnd($D,$str);
		    &EcrireST($seq,$type,$str,$Lend,$Rend,$lg,$ph,$F,$A,$D,$proba);
}
		    if (($word[0] eq "")||($word[0] eq 'No')) {$splice=0;}}
	    
	    if ($splice==2){
		if ($word[1] eq '+') {
		    ($A,$str,$proba,$motif)=@word;
		    ($Rend,$Lend,$D)=&calcBeg($A,$str);
		    &EcrireST($seq,$type,$str,$Lend,$Rend,$lg,$ph,$F,$A,$D,$proba);
		}
		
		if ($word[2] eq '-') {
		    ($A,$R,$str,$proba,$motif)=@word;
		    $A = $A +1; #correction of the acceptor position. NPG gave (position-1) on reverse strand...
		    ($Rend,$Lend,$D)=&calcBeg($A,$str);
		    &EcrireST($seq,$type,$str,$Lend,$Rend,$lg,$ph,$F,$A,$D,$proba);
}
	if (($word[0] eq "")||($word[0] eq 'No')) {$splice=0;}
	}
}    
    close (NPG);
    print ST "\n";
}

sub readLS{
open(LS,"< $liste") || die "Unable to open $liste.\n";
$ST=&openST("netplantgene");

while(<LS>)
#read the file containing all the NetPlantGene output files
{
    ($seq,$Ltot)=split;   # read each sequence name and each length
    if (($seq eq "")||($Ltot eq "")) #check if the list is correctly formatted
    {
	die "!!! Incorrect input list !!!\n"&usage("NetPlantGene");}
    $file=$seq . "netplantgene.txt";
    &readNPG($file);
    print ST "\n";
}
close LS;
}

sub createST{
@data=&Start("NetPlantGene");
if ($data[1]==0) { 
    $liste=$data[0];
    &readLS($liste);
    print "output file: $standart\n";
}
else 
{
    $Ltot=$data[1];
    $file=$data[0];
    @tmp=split('netplantgene',$file);
    $seq=$tmp[0];
    $file2 =$file . "ST";
    open(ST,">$file2");
    print ST "Contig\tType\tStrand\tLend\tRend\tlength\tPhase\tFrame\tAcceptor\tDonor\tProba\n";
    &readNPG($file);
    print "output file $file2\n";
}
close ST;
}

&createST;

