#!/usr/local/bin/perl -w

# takes 2 standard files as input: one real, one prediction 
# plus a data list and compute stats

#============================================================

sub max
  {
    return $_[0] > $_[1] ? $_[0] : $_[1];
  }

#============================================================

sub min
  {
    return $_[0] < $_[1] ? $_[0] : $_[1];
  }

#============================================================

if ($#ARGV != 1)
  {
    die "usage: param1: standard read data param2: standard prediction";
  }

open(RDATA, $ARGV[0]) || die "can't open $ARGV[0]";
open(PDATA, $ARGV[1]) || die "can't open $ARGV[1]";

# read real data
$prevSeq = 0;
while (<RDATA>)
  {
    if (($seq_id, $Lend, $Rend, $Length, $Frame) =
	(/^seq(\d+)\s+.{0,4}\s+[+-]\s+(\d+)\s+(\d+)\s+(\d+)\s+[+-]?\d?\s+([+-]\d)/))
      {
	$seq_id = int($seq_id);
	if ($prevSeq != $seq_id)
	  {
	    $exonNb = 0;
	    $prevSeq = $seq_id;
	  }
	$rdata{$seq_id}{$exonNb}{'Lend'} = $Lend;
	$rdata{$seq_id}{$exonNb}{'Rend'} = $Rend;
	$rdata{$seq_id}{$exonNb}{'Length'} = $Length;
	$rdata{$seq_id}{$exonNb}{'Frame'} = int($Frame);
	$exonNb++;
      }
  }
close(RDATA);

# read prediction
$prevSeq = 0;
while (<PDATA>)
  {
    if (($seq_id, $Lend, $Rend, $Length, $Frame) =
	(/^seq(\d+)\s+.{0,4}\s+[+-]\s+(\d+)\s+(\d+)\s+(\d+)\s+[+-]?\d?\s+([+-]\d)/))
      {
	$seq_id = int($seq_id);
	if ($prevSeq != $seq_id)
	  {
	    $exonNb = 0;
	    $prevSeq = $seq_id;
	  }
	$pdata{$seq_id}{$exonNb}{'Lend'} = $Lend;
	$pdata{$seq_id}{$exonNb}{'Rend'} = $Rend;
	$pdata{$seq_id}{$exonNb}{'Length'} = $Length;
	$pdata{$seq_id}{$exonNb}{'Frame'} = int($Frame);
	$exonNb++;
      }
  }
close(PDATA);

# build overlapping list
@sequences = sort {$a <=> $b} keys %rdata;
foreach $s (@sequences) 
  {
    @Rexons = sort {$rdata{$s}{$a}{'Lend'} <=> $rdata{$s}{$b}{'Lend'}} keys %{$rdata{$s}};
    if (!exists($pdata{$s}))
      {
	print STDERR "sequence $s not in prediction\n";
	next;
      }
    @Pexons = sort {$pdata{$s}{$a}{'Lend'} <=> $pdata{$s}{$b}{'Lend'}} keys %{$pdata{$s}};
    foreach $r (@Rexons) # loop on reality and compare to prediction
      {
	foreach $p (@Pexons) # loop then on prediction
	  {
	    # check if predicted exon is in the same frame as reality
	    # if not skip it 
	    if ($rdata{$s}{$r}{'Frame'} != $pdata{$s}{$p}{'Frame'})
	      {
		next;
	      }
	    # check if prediction overlap reality
	    if (($rdata{$s}{$r}{'Lend'} <= $pdata{$s}{$p}{'Lend'} &&
		 $pdata{$s}{$p}{'Lend'} <= $rdata{$s}{$r}{'Rend'}) ||
		($pdata{$s}{$p}{'Lend'} <= $rdata{$s}{$r}{'Lend'} &&
		 $rdata{$s}{$r}{'Lend'} <= $pdata{$s}{$p}{'Rend'}))
	      {
		push (@{$overlap{$s}{$r}}, $p);
	      }
	  }
      }
  }

# compute TP and FP
print "Contig\tReality\tLength\tPrediction\tTPf\t%TPf\tFPf\t%FPf\t(1+%TPf)/(1+%FPf)\n";
@sequences = sort {$a <=> $b} keys %overlap;
foreach $s (@sequences)
  {
    @Rexons = sort {$a <=> $b} keys %{$overlap{$s}};
    foreach $r (@Rexons)
      {
	foreach $p (@{$overlap{$s}{$r}})
	  {
	    printf "seq%03d\t", $s;
	    printf "%2d [%5d,%5d]\t", $r, $rdata{$s}{$r}{'Lend'}, $rdata{$s}{$r}{'Rend'};
	    $Length =  $rdata{$s}{$r}{'Rend'} -  $rdata{$s}{$r}{'Lend'} + 1;
	    printf "%d\t", $Length;
	    printf  "%2d [%5d,%5d]\t", $p, $pdata{$s}{$p}{'Lend'}, $pdata{$s}{$p}{'Rend'};
	    $TP = &min($rdata{$s}{$r}{'Rend'},  $pdata{$s}{$p}{'Rend'}) -
	      &max($rdata{$s}{$r}{'Lend'},  $pdata{$s}{$p}{'Lend'}) + 1;
	    printf "%d\t", $TP;
	    $pTP =  $TP / $Length * 100;
	    printf "%.2f\t",$pTP;
	    $FP = $pdata{$s}{$p}{'Rend'} - $pdata{$s}{$p}{'Lend'} + 1 - $TP;
	    printf "%d\t", $FP;
	    $pFP = $FP / $Length * 100;
	    printf "%.2f\t", $pFP;
	    printf "%.3f\n", (1 + $pTP)/(1 + $pFP);
	  }
      }
  }
