#!/usr/local/bin/perl -w

######################################################################
# script to compar two standart output files of two splice predictors
# author: C.Mathe, 09/06/99
#######################################################################


require "/home/camat/BIOCOMP/Perl/util.pl";



if($#ARGV!=1) {die "\nPerl script to compar two standart outputs, from two splice site predictors.\n 
USAGE:\ncompar_pred.pl file1 file2\n(file1 and file2 are at standard format)\n"}

# read splice site predictions of file 1

open(PRED1, $ARGV[0]) || die "can't open $ARGV[0]";
$prevSeq = 0;

    while (<PRED1>)
    {
    $line=$_;
    $Donor=$Acceptor="";
    if (($seq_id,$strand,$Lend, $Rend,$Acceptor,$Donor) =
	    (/^seq(\d+)\s+([+-])\t(\d*)\t(\d*)\t\t[012]?\t[+-]?[123]?\t(\d*)\t(\d*)\t(\d\.)?\d{1,2}/))
	{
	    $seq_id = int($seq_id);
	    if ($prevSeq != $seq_id)
	    {
		$nb_spliceD=$nb_spliceA=0;
		$prevSeq = $seq_id;
	    }
	    
	    if ($Acceptor ne "") {
		$pred1A{$seq_id}{$nb_spliceA}{'Ac'} = $Acceptor;
		$pred1A{$seq_id}{$nb_spliceA}{'Strand'}=$strand;
		$pred1A{$seq_id}{$nb_spliceA}{'all'}=$line;
		$nb_spliceA++;
	    }
	    elsif ($Donor ne "") {
		$pred1D{$seq_id}{$nb_spliceD}{'Do'} = $Donor;
		$pred1D{$seq_id}{$nb_spliceD}{'Strand'}=$strand;
		$pred1D{$seq_id}{$nb_spliceD}{'all'}=$line;
		$nb_spliceD++;
	    }
	}
    }
    close(PRED1);

print "Contig\tType\tS\tLend\tRend\tLength\tPhase\tFrame\tAc\tDo\tProba\n";

#read splice site predictions of file2
open(PRED2,$ARGV[1]) || die "can't open $ARGV[1]";;
$prevSeq=0;

while (<PRED2>)
{
    $line=$_;
    $Acceptor=$Donor="";
    if (($seq_id,$strand,$Lend,$Rend,$Acceptor,$Donor) =
	(/^seq(\d+)\s+([+-])\t(\d*)\t(\d*)\t\t[012]?\t[+-]?[123]?\t(\d*)\t(\d*)\t(\d\.)?\d{1,2}/))
	{
	    $seq_id = int($seq_id);
	if ($prevSeq != $seq_id)
	{
	    $nb_splice2A=$nb_splice2D=0;
	    $prevSeq = $seq_id;
	    push(@sequence,$seq_id);
	}
	
	if ($Acceptor ne "") {
	    $pred2A{$seq_id}{$nb_splice2A}{'Ac'} = $Acceptor;
	    $pred2A{$seq_id}{$nb_splice2A}{'Strand'}=$strand;
	    $pred2A{$seq_id}{$nb_splice2A}{'all'}=$line;
	    $nb_splice2A++;
	}
	elsif ($Donor ne "") {
	    $pred2D{$seq_id}{$nb_splice2D}{'Do'} = $Donor;
	    $pred2D{$seq_id}{$nb_splice2D}{'Strand'}=$strand;
	    $pred2D{$seq_id}{$nb_splice2D}{'all'}=$line;
	    $nb_splice2D++;
	}
    }
}
close(PRED2);

# make consensus list
$Lastseq=0;
foreach $seq (@sequence)
{
# class in ascending order
    @PRED1A = sort {$pred1A{$seq}{$a}{'Ac'} <=> $pred1A{$seq}{$b}{'Ac'}} keys %{$pred1A{$seq}};
    @PRED1D = sort {$pred1D{$seq}{$a}{'Do'} <=> $pred1D{$seq}{$b}{'Do'}} keys %{$pred1D{$seq}};
    @PRED2A = sort {$pred2A{$seq}{$a}{'Ac'} <=> $pred2A{$seq}{$b}{'Ac'}} keys %{$pred2A{$seq}};
    @PRED2D = sort {$pred2D{$seq}{$a}{'Do'} <=> $pred2D{$seq}{$b}{'Do'}} keys %{$pred2D{$seq}};

    if ($Lastseq !=$seq)
    {
	$Lastseq=$seq;
	print "\n";
    }
    foreach $p (@PRED1A) #loop on Acceptor of predictions 1
    {
	foreach $q (@PRED2A) #loop on Acceptor of predictions 2
	{
	    if (($pred2A{$seq}{$q}{'Ac'}==$pred1A{$seq}{$p}{'Ac'}) &&
		($pred2A{$seq}{$q}{'Strand'} eq $pred1A{$seq}{$p}{'Strand'}))
	    {
		print $pred1A{$seq}{$p}{'all'}; # write the line read in file1
	    }
	}
    }

    foreach $r (@PRED1D) #lopp on Donor of predictions 1
    {
	foreach $t(@PRED2D) #loop on Donor of predictions 2
	{
	    if (($pred2D{$seq}{$t}{'Do'}==$pred1D{$seq}{$r}{'Do'})&&
		($pred2D{$seq}{$t}{'Strand'} eq $pred1D{$seq}{$r}{'Strand'}))
	    {
		print $pred1D{$seq}{$r}{'all'};
	    }
	}
    }
}


