#!/usr/local/bin/perl -w

# read as STDIN the output (eventually filtered) of cmpexons.pl
# takes as parameter a ST real data file
# generates the number of FN per sequences
# (total number of exon minus the number of distinct real exons predicted
# in frame in the output of cmpexons.pl

if ($#ARGV != 0)
  {
    die "need a ST rreal data file as parameter";
  }

open(RDATA, $ARGV[0]) || die "can't open $ARGV[0]";

# read real data file and compute total number of exon per sequence
$prevSeq = 0;
while (<RDATA>)
  {
    if (($seq_id) = (/^seq(\d+)/))
      {
	$seq_id = int($seq_id);
	if ($prevSeq != $seq_id)
	  {
	    $TExons{$seq_id} = 1;
	    $prevSeq = $seq_id;
	  }
	else
	  {
	    $TExons{$seq_id}++;
	  }
      }
  }

close(RDATA);

# read cmpexons.pl output
while (<STDIN>)
  {
    if (($seq_id, $R) = (/^seq(\d+)\s+(\d+)\s+\[\s*\d+,\s*\d+\]\s+\d+\s+\d+/))
      {
	$predExons{int($seq_id)}{$R} = "";
      }
  }

# compute FN (real exons that have not been predicted)
print "Contig\t#FNf\n";
@sequences = sort {$a <=> $b} keys %TExons;
foreach $s (@sequences)
  {
    if (!exists($predExons{$s}))
      {
	printf "seq%03d\t%d\n", $s, $TExons{$s};
      }
    else
      {
	@pred = keys %{$predExons{$s}};
	printf "seq%03d\t%d\n", $s, $TExons{$s} - $#pred - 1;
      }
  }
