#!/usr/local/bin/perl

######################################################################
# script to compar two standart output files from exon predictors
#author: C.Mathe, 06/05/99.
#######################################################################


require "/home/camat/BIOCOMP/Perl/util.pl";


if($#ARGV!=1) {die "\nPerl script to compar two standart outputs.\n 
USAGE:\ncompar_pred.pl file1 file2
\nFile1 and file2 are two exon predictors. \nFile1 is taken as the *best* one, and its exon labels (Init,Intr,Ter,Sngl) are kept\n"}

open(PRED1, $ARGV[0]) || die "can't open $ARGV[0]";
open(PRED2, $ARGV[1]) || die "can't open $ARGV[1]";

# read prediction1

$prevSeq = 0;
while (<PRED1>)
{
    if (($seq_id, $type,$Lend, $Rend, $Length, $Frame) =
	(/^seq(\d+)\s+(.{0,4})\s+[+-]\s+(\d+)\s+(\d+)\s+(\d+)\s+[+-]?\d?\s+([+-]\d)/))
    {
	$seq_id = int($seq_id);
	if ($prevSeq != $seq_id)
	{
	    $exonNb = 0;
	    $prevSeq = $seq_id;
	}
	$pdata1{$seq_id}{$exonNb}{'Lend'} = $Lend;
	$pdata1{$seq_id}{$exonNb}{'Rend'} = $Rend;
	$pdata1{$seq_id}{$exonNb}{'Length'} = $Length;
	$pdata1{$seq_id}{$exonNb}{'Frame'} = $Frame;
	$pdata1{$seq_id}{$exonNb}{'Type'} = $type;
	$exonNb++;
    }
}
close(PRED1);

# read prediction2
$prevSeq = 0;
while (<PRED2>)
{
    if (($seq_id, $Lend, $Rend, $Length, $Frame) =
	(/^seq(\d+)\s+.{0,4}\s+[+-]\s+(\d+)\s+(\d+)\s+(\d+)\s+[+-]?\d?\s+([+-]\d)/))
    {
	$seq_id = int($seq_id);
	if ($prevSeq != $seq_id)
	{
	    $exonNb = 0;
	    $prevSeq = $seq_id;
	}
	$pdata2{$seq_id}{$exonNb}{'Lend'} = $Lend;
	$pdata2{$seq_id}{$exonNb}{'Rend'} = $Rend;
	$pdata2{$seq_id}{$exonNb}{'Length'} = $Length;
	$pdata2{$seq_id}{$exonNb}{'Frame'} = $Frame;
	$exonNb++;
    }
}
close(PRED2);

# build consensus predictions
print "Contig\tType\tS\tLend\tRend\tLength\tPhase\tFrame\tAc\tDo\tProba\n";
$lastseq=0;
@sequences = sort {$a <=> $b} keys %pdata1;
foreach $s (@sequences) 
{
    @Pexons1 = sort {$pdata1{$s}{$a}{'Lend'} <=> $pdata1{$s}{$b}{'Lend'}} keys %{$pdata1{$s}};
    if (!exists($pdata2{$s}))
    {
	print STDERR "sequence $s not in prediction\n";
	next;
    }
    @Pexons2 = sort {$pdata2{$s}{$a}{'Lend'} <=> $pdata2{$s}{$b}{'Lend'}} keys %{$pdata2{$s}};
    foreach $r (@Pexons1) # loop on prediction1 and compare to prediction2
    {
	foreach $p (@Pexons2) # loop then on prediction2
	{
	    # check if predicted exon2 is in the same frame as prediction1
	    # if not skip it 
	    if ($pdata1{$s}{$r}{'Frame'} != $pdata2{$s}{$p}{'Frame'})
	    {
		next;
	    }
	    # check if prediction1 = prediction2
	    if ($pdata1{$s}{$r}{'Lend'} == $pdata2{$s}{$p}{'Lend'} &&
		 $pdata1{$s}{$r}{'Rend'} == $pdata2{$s}{$p}{'Rend'})
	    {
		if ($lastseq != $s) {print "\n";
				     $lastseq=$s; }
		($str) = ($pdata1{$s}{$r}{'Frame'}=~ /([+-])/);
        	($lg,$A,$D)=&calcAD($pdata1{$s}{$r}{'Lend'},$pdata1{$s}{$r}{'Rend'},$str);
		printf "seq%02d\t$pdata1{$s}{$r}{'Type'}\t$str\t$pdata1{$s}{$r}{'Lend'}\t$pdata1{$s}{$r}{'Rend'}\t$lg\t\t$pdata1{$s}{$r}{'Frame'}\t$A\t$D\n",$s;
	    }
	}
    }
}

