Skip to content

Commit

Permalink
Extract example translation list in some fr edition pages
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Feb 27, 2024
1 parent 10eb259 commit 6b426a7
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 11 deletions.
29 changes: 18 additions & 11 deletions src/wiktextract/extractor/fr/gloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,22 +108,29 @@ def extract_examples(
):
process_exemple_template(wxr, first_child, gloss_data)
else:
example_nodes = []
source_template = None
for example_template in example_node.find_child(NodeKind.TEMPLATE):
if example_template.template_name == "source":
source_template = example_template
example_data = Example()
ignored_nodes = []
for node in example_node.find_child(
NodeKind.TEMPLATE | NodeKind.LIST
):
if (
node.kind == NodeKind.TEMPLATE
and node.template_name == "source"
):
example_data.ref = clean_node(wxr, None, node).strip("— ()")
ignored_nodes.append(node)
elif node.kind == NodeKind.LIST:
for tr_item in node.find_child(NodeKind.LIST_ITEM):
example_data.translation = clean_node(
wxr, None, tr_item.children
)
ignored_nodes.append(node)
example_nodes = [
node
for node in example_node_children
if node != source_template
if node not in ignored_nodes
]
example_data = Example()
example_data.text = clean_node(wxr, None, example_nodes)
if source_template is not None:
example_data.ref = clean_node(wxr, None, source_template).strip(
"— ()"
)
gloss_data.examples.append(example_data)


Expand Down
33 changes: 33 additions & 0 deletions tests/test_fr_gloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,3 +475,36 @@ def test_variante_kyujitai_de(self):
}
],
)

def test_example_translation_list(self):
self.wxr.wtp.start_page("advena")
self.wxr.wtp.add_page("Modèle:source", 10, "{{{1}}}")
root = self.wxr.wtp.parse(
"""# [[étranger|Étranger]], de passage, venu du dehors.
#* '''''advena''' belli'' {{source|Sil.}}
#*: étranger à la guerre."""
)
page_data = [
WordEntry(word="advena", lang_code="la", lang="Latin", pos="adj")
]
extract_gloss(self.wxr, page_data, root.children[0])
self.assertEqual(
page_data[0].model_dump(
exclude_defaults=True,
exclude=["word", "lang_code", "lang", "pos"],
),
{
"senses": [
{
"examples": [
{
"text": "advena belli",
"ref": "Sil.",
"translation": "étranger à la guerre.",
}
],
"glosses": ["Étranger, de passage, venu du dehors."],
}
]
},
)

0 comments on commit 6b426a7

Please sign in to comment.