-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathhtml_to_table.py
64 lines (58 loc) · 2.02 KB
/
html_to_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from collections import Counter
import pandas as pd
def get_results(table):
results = {}
question_split = table.split('<td colspan=2 style="text-align: center"><b>')
for question in question_split:
models_split = question.split(
"""</tr>
<tr>"""
)
for model in models_split:
if (
"<b>" not in model
and model.strip()
and "<caption>" not in model
and len(model) > 10
):
model_name = (
model.split("</td>")[0]
.replace("<td>", "")
.replace("<tr>", "")
.strip()
)
corr_value = model.split("</td>")[-2]
if "Correct" in corr_value:
result = "correct"
if "Incorrect" in corr_value:
result = "incorrect"
if "Partially correct" in corr_value:
result = "part_correct"
if model_name not in results:
results[model_name] = [result]
else:
results[model_name].append(result)
return results
def make_tables(table_file, list_prompts=["spacex", "pizza", "dreambot"]):
all_tables = []
with open(table_file, "r") as a:
tables = a.read()
tables_split = tables.split('<table BORDER=1 style="width: 100%">')
dict_res = {
"spacex": get_results(tables_split[1]),
"pizza": get_results(tables_split[2]),
"dreambot": get_results(tables_split[3]),
}
for prompt in list_prompts:
count_dict = {k: Counter(v) for k, v in dict_res[prompt].items()}
res = (
pd.DataFrame.from_dict(count_dict, orient="index")
.fillna(0)
.astype("int")
.sort_index()
.sort_index(1)
)
all_tables.append(res)
res.to_csv(f"tables_models_eval/{prompt}.csv")
return all_tables
make_tables("html_table.txt")