-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsketch_proto.pl
60 lines (45 loc) · 2.23 KB
/
sketch_proto.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# temporary sketch of the experiment,
use warnings;
use strict;
use proto_condprob qw(:DEFAULT set_num_thread $DEBUG $APPROXIMATE_WITH_TOP_N_HITS export_hash_to_file plucene_query solr_query P_t_index P_h_t_index $SOLR_URL);
use octave_call;
use Benchmark qw(:all);
use POSIX qw(_exit);
our $DEBUG = 0;
our $SOLR_URL = "http://127.0.0.1:9911/solr";
#set_num_thread(4);
set_num_thread(6);
our $APPROXIMATE_WITH_TOP_N_HITS=4000;
my $text = lc "A bus collision with a truck in Uganda has resulted in at least 30 fatalities and has left a further 21 injured";
my $hypothesis = lc "30 dies in a bus collision in Uganda";
if ($ARGV[0] and $ARGV[1])
{
$text = $ARGV[0];
$hypothesis = $ARGV[1];
}
# time in
my $t0 = Benchmark->new;
#my %r = P_t_multithread($text, 0.5, "./models/collection/collection.model", "./models/document");
##P_h_t_multithread($hypothesis, $text, 0.5, "./models/collection/collection.model", "./models/document/afp_eng_2010");
## testing new P_t_index, with P_t_multithread_index
#my $href = P_t_multithread_index($text, 0.5, "./models/collection/collection.model", "./models/document", "./models_index");
#my $href = P_t_index($text, 0.5, "./models/collection/collection.model", "./models/document");
#export_hash_to_file($href, "sketch_test.txt");
# The following two lines need octave.
#my @a = values %{$href};
#print "\naverage logprob from the doc-models:", mean(\@a), "\n";
#P_h_t_multithread_index($hypothesis, $text, 0.5, "./models/collection/collection.model", "./models/document", "./models_index");
P_h_t_index($hypothesis, $text, 0.5, "./models/collection/collection.model", "./models/document");
#P_h_t_index($text, $hypothesis, 0.5, "./models/collection/collection.model", "./models/document");
#P_h_t_index($hypothesis, $text, 0.5, "./models/collection/collection.model", "./models/document");
# time out
my $t1 = Benchmark->new;
my $td = timediff($t1, $t0);
#$| = 1; # for _exit
print "the code took:", timestr($td), "\n";
#exit()
# 2nd run, real, normal expected time for pairs.
P_h_t_index($hypothesis, $text, 0.5, "./models/collection/collection.model", "./models/document");
my $t2 = Benchmark->new;
$td = timediff($t2, $t1);
print "2nd time (the normal time) it took: ", timestr($td), "\n";