#!/usr/local/bin/perl -w # pairwise: perform pairwise voting # usage: pairwise tunefile testfile # reference: Van Halteren et.al. ACL98 # 20000420 erikt@uia.ua.ac.be # version date 20000730 use strict; if (not $ARGV[1]) { die "usage: pairwise tunefile testfile\n"; } if (not -f $ARGV[0]) { die "cannot open tunefile\n"; } if (not -f $ARGV[1]) { die "cannot open testfile\n"; } my $tuneFileName = shift(@ARGV); my $testFileName = shift(@ARGV); my $sep = ":"; my ($bestValue,$bestTag,$correct,%double,%doubleTotal,%history,$i,$j, $key1,$key2,$key3,@keys,$line,%single,%singleTotal,%voted,@words); open(INFILE,$tuneFileName); while () { chomp($line = $_); @words = split(/\s+/,$line); $correct = pop(@words); for ($i=0;$i<@words;$i++) { if ($words[$i] =~ /$sep/) { die "illegal word $words[$i]\n"; } $key2 = "$i$sep$words[$i]"; $key1 = "$key2$sep$correct"; $single{$key1} = $single{$key1} ? $single{$key1}+1 : 1; $singleTotal{$key2} = $singleTotal{$key2} ? $singleTotal{$key2}+1 : 1; for ($j=$i+1;$j<@words;$j++) { if ($words[$j] =~ /$sep/) { die "illegal word $words[$j]\n"; } $key2 = "$i$sep$words[$i]$sep$j$sep$words[$j]"; $key1 = "$key2$sep$correct"; $double{$key1} = $double{$key1} ? $double{$key1}+1 : 1; $doubleTotal{$key2} = $doubleTotal{$key2} ? $doubleTotal{$key2}+1 : 1; } } } close(INFILE); open(INFILE,$testFileName); LOOP: while () { chomp($line = $_); @words = split(/\s+/,$line); $correct = pop(@words); $line = join(' ',@words); if ($history{$line}) { print "$line $correct $history{$line}\n"; next LOOP; } %voted = (); for ($i=0;$i<@words;$i++) { if ($words[$i] =~ /$sep/) { die "illegal word $words[$i]\n"; } for ($j=$i+1;$j<@words;$j++) { if ($words[$j] =~ /$sep/) { die "illegal word $words[$j]\n"; } $key2 = "$i$sep$words[$i]$sep$j$sep$words[$j]"; @keys = (sort grep { $key2 eq &removeAnswer($_) } keys %double); if (@keys) { foreach $key1 (@keys) { $key3 = &getAnswer($key1); $voted{$key3} = $voted{$key3} ? $voted{$key3} : 0; $voted{$key3} += $double{$key1}/$doubleTotal{$key2}; } } else { # pair did not occur in tune data: use single occurrences $key2 = "$i$sep$words[$i]"; @keys = (sort grep { $key2 eq &removeAnswer($_) } keys %single); foreach $key1 (@keys) { $key3 = &getAnswer($key1); $voted{$key3} = $voted{$key3} ? $voted{$key3} : 0; $voted{$key3} += $single{$key1}/($singleTotal{$key2}*2); } $key2 = "$j$sep$words[$j]"; @keys = (sort grep { $key2 eq &removeAnswer($_) } keys %single); foreach $key1 (@keys) { $key3 = &getAnswer($key1); $voted{$key3} = $voted{$key3} ? $voted{$key3} : 0; $voted{$key3} += $single{$key1}/($singleTotal{$key2}*2); } } } } $bestValue = -1; $bestTag = "???"; foreach $key3 (keys %voted) { if ($voted{$key3} > $bestValue) { $bestValue = $voted{$key3}; $bestTag = $key3; } } $history{$line} = $bestTag; print "$line $correct $bestTag\n"; } close(INFILE); exit(0); # translates key1 (=key2.key3) to its family key2 sub removeAnswer { my @words = split(/$sep/,shift(@_)); pop(@words); return(join("$sep",@words)); } # returns key3 part of key1 (=key2.key3) sub getAnswer { my @words = split(/$sep/,shift(@_)); return(pop(@words)); }