#!/usr/bin/perl
#
# Usage: filter_eval.pl <qrels_file> <submission_file>
#     where qrels is the name of the file containing the relevance
#                  judgments made by assessors
#     and submission is the name of the file containing the answers
#     to be evaluated
# Evaluation is done with the Utility, F-beta, Precision and Recall
#  scores provided in the TREC filtering guidelines
# Output: score for each topic and normalized average across topics
#
# (c) Rada Mihalcea, 2002 --- Initial version 06/26/2002
#
#NOTE: There is an embedded sort that may cause strange behavior on
#some machines; this software was implemented and tested on Linux/RedHat 7.3 


#predefined values; may be changed later on
$MinNU = -0.5;

$#ARGV==1 || die "Usage: filter_eval.pl qrels_file submission_file\n";
$qrels = $ARGV[0];
$submission = $ARGV[1];

if ( (! -e $qrels) || (! open QRELS, "<$qrels") ) {
    die "Can't find/open qrels file `$qrels': $!\n";
}

#read the topic ids and the relevant files into an array
while ($line = <QRELS>) {
    chomp $line;
    ($id, $docid, $rel) = split " ", $line;
    $id =~ /.(\d+)/;
    $tid = $1;
    if($rel == 1) {
	push @{$qrels[$tid]}, $docid;
    }
}
close QRELS || die "Cannot close qrels file: $!\n";

# process submission file in sorted order
if ( (! -e $submission) ||
     (! open INPUT, "sort -k1,1 $submission |") ) {
    die "Can't find/open/sort submission file `$submission': $!\n";
}

# The following figures are important for scoring
# RetRel = retrieved and relevant
# RetNotRel = retrieved and not relevant
# NotRetRel = not retrieved and relevant
 
$RetRel = 0;
$RetNotRetl = 0;
$NotRetRel = 0;

$totT11F = 0;
$totT11SU = 0;
$totPrec = 0;
$totRecall = 0;
$totTopics = 0;

$oldtid = -1;
while($line = <INPUT>) {
    chomp $line;
    ($id, $tmp1, $docid, $tmp2, $tmp3, $teamname) = split /\s+/, $line;
    $id =~ /.(\d+)/;
    $tid = $1;
    #print "$tid $docid\n";
    
    #if this is a new topic, print out the evaluation for the previous topic
    if($tid != $oldtid) {
	
	#nothing to print out, if tid is the very first query
	if(-1 != $oldtid) {
	    &displayScores;
	}
	$RetRel = 0;
	$RetNotRel = 0;
	$NotRetRel = 0;
    }

    $found = 0;
    foreach $keyid (@{$qrels[$tid]}) {
	if($docid == $keyid) {
	    $RetRel++;
	    $found = 1;
	    last;
	}
    }
    if(0 == $found) {
	$RetNotRel++;
    } 
    $oldtid = $tid;
	    
}

#now print the very last topic
if(-1 != $oldtid) {
    &displayScores;
}


#and finally display the averaged scores
if(0 != $totTopics) {
    printf "\n\nAveraged utility score: %1.4f\nAveraged F-beta score:  %1.4f\nAveraged Precision: %1.4f\nAveraged Recall: %1.4f\n",
    $totT11SU/$totTopics,
    $totT11F/$totTopics,
    $totPrec/$totTopics,
    $totRecall/$totTopics;
    
}


sub displayScores{
    $NotRetRel = scalar(@{$qrels[$oldtid]}) - $RetRel;
    
    #compute the utility scores
    $T11U = 2*$RetRel-$RetNotRel;
    $T11NU = $T11U / (2*scalar(@{$qrels[$oldtid]}));
    if($T11NU <= $MinNU) {
	$T11SU = 0;
    }
    else {
	$T11SU = ($T11NU - $MinNU) / (1 - $MinNU);
    }
    
    #and the F-beta scores
    if(0 == $RetNotRel &&
       0 == $RetRel) {
	$T11F = 0;
    }
    else {
	$T11F = (1.25*$RetRel) / (0.25*$NotRetRel + $RetNotRel + 1.25*$RetRel);
    }
    
    # Added on 11-02-2002
    # The Precision score
    if(0 == $RetNotRel &&
       0 == $RetRel) {
	$Prec = 0;
    }
    else {
	$Prec = $RetRel / ($RetRel + $RetNotRel);
    }

    # Added on 11-02-2002
    # The Recall score
    if(0 == $NotRetRel &&
       0 == $RetRel) {
	$Recall = 0;
    }
    else {
	$Recall = $RetRel / ($RetRel + $NotRetRel);
    }

    # T11U is available as well at this point, and 
    # can be printed out if needed
    printf "Topic %d: R+=%d N+=%d R-=%d T11SU=%1.4f T11F=%1.4f P=%1.4f R=%1.4f\n", 
    $oldtid, $RetRel, $RetNotRel, $NotRetRel, $T11SU, $T11F, 
    $Prec, $Recall ;

    $totT11F += $T11F;
    $totT11SU += $T11SU;
    $totPrec += $Prec;
    $totRecall += $Recall;
    $totTopics++;
}


