forked from gilnoh/gigaword-lm-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsketch.pl
46 lines (40 loc) · 1.41 KB
/
sketch.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# temporary sketch of the experiment,
use warnings;
use strict;
use proto_condprob qw(:DEFAULT set_num_thread $DEBUG);
use octave_call;
use Benchmark qw(:all);
our $DEBUG = 2;
set_num_thread(2);
# test call on 2009 small set
# (not meaningful at all, since none of May 2009 holds any event on plane crash) just as functional OKAY-ness. Too small corpus that does not really have those terms)
my $text = "there was an airplane accident";
my $hypothesis = "everyone died";
# my %r = P_t($text);
# my %r = P_t_multithread($text);
# print "Done\n";
# export_hash_to_file(\%r, "Pt_from_sketch.txt");
# my @a = values %r;
# print "Average logprob from the doc-models: ", mean(\@a), "\n";
## P_h_t_multithread call arguments
# argument: hypothesis, text, lambda, collection model path, document models
# output (return):
# ( P(h|t) / P(h) as non-log, P(h|t) as log, P(h) as log, P(t) as log, evidences of un-normalized contributions as the hash reference ).
# time in
my $t0 = Benchmark->new;
P_h_t_multithread($hypothesis, $text, 0.5, "./models/collection/collection.model", "./models/document/afp_eng_2010");
# time out
my $t1 = Benchmark->new;
my $td = timediff($t1, $t0);
print "the code took:", timestr($td), "\n";
sub export_hash_to_file
{
my %h = %{$_[0]};
my $filename = $_[1];
open FILE, ">", $filename;
foreach (sort keys %h)
{
print FILE "$_ \t $h{$_}\n";
}
close FILE;
}