From 6b426a7c299b21253d94130f98270b3c57fc5546 Mon Sep 17 00:00:00 2001 From: xxyzz Date: Tue, 27 Feb 2024 15:17:57 +0800 Subject: [PATCH] Extract example translation list in some fr edition pages Page: https://fr.wiktionary.org/wiki/advena --- src/wiktextract/extractor/fr/gloss.py | 29 ++++++++++++++--------- tests/test_fr_gloss.py | 33 +++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/wiktextract/extractor/fr/gloss.py b/src/wiktextract/extractor/fr/gloss.py index ff5328eb..f1fc86c5 100644 --- a/src/wiktextract/extractor/fr/gloss.py +++ b/src/wiktextract/extractor/fr/gloss.py @@ -108,22 +108,29 @@ def extract_examples( ): process_exemple_template(wxr, first_child, gloss_data) else: - example_nodes = [] - source_template = None - for example_template in example_node.find_child(NodeKind.TEMPLATE): - if example_template.template_name == "source": - source_template = example_template + example_data = Example() + ignored_nodes = [] + for node in example_node.find_child( + NodeKind.TEMPLATE | NodeKind.LIST + ): + if ( + node.kind == NodeKind.TEMPLATE + and node.template_name == "source" + ): + example_data.ref = clean_node(wxr, None, node).strip("— ()") + ignored_nodes.append(node) + elif node.kind == NodeKind.LIST: + for tr_item in node.find_child(NodeKind.LIST_ITEM): + example_data.translation = clean_node( + wxr, None, tr_item.children + ) + ignored_nodes.append(node) example_nodes = [ node for node in example_node_children - if node != source_template + if node not in ignored_nodes ] - example_data = Example() example_data.text = clean_node(wxr, None, example_nodes) - if source_template is not None: - example_data.ref = clean_node(wxr, None, source_template).strip( - "— ()" - ) gloss_data.examples.append(example_data) diff --git a/tests/test_fr_gloss.py b/tests/test_fr_gloss.py index d2d32de4..8777d95d 100644 --- a/tests/test_fr_gloss.py +++ b/tests/test_fr_gloss.py @@ -475,3 +475,36 @@ def test_variante_kyujitai_de(self): } ], ) + + def test_example_translation_list(self): + self.wxr.wtp.start_page("advena") + self.wxr.wtp.add_page("Modèle:source", 10, "{{{1}}}") + root = self.wxr.wtp.parse( + """# [[étranger|Étranger]], de passage, venu du dehors. +#* '''''advena''' belli'' {{source|Sil.}} +#*: étranger à la guerre.""" + ) + page_data = [ + WordEntry(word="advena", lang_code="la", lang="Latin", pos="adj") + ] + extract_gloss(self.wxr, page_data, root.children[0]) + self.assertEqual( + page_data[0].model_dump( + exclude_defaults=True, + exclude=["word", "lang_code", "lang", "pos"], + ), + { + "senses": [ + { + "examples": [ + { + "text": "advena belli", + "ref": "Sil.", + "translation": "étranger à la guerre.", + } + ], + "glosses": ["Étranger, de passage, venu du dehors."], + } + ] + }, + )