From 2d10d328f5ac607383913a53c56cdd2847de65d0 Mon Sep 17 00:00:00 2001 From: Empiriker Date: Tue, 28 Nov 2023 12:12:20 +0100 Subject: [PATCH] Avoid dict-like assignment to pydantic classes in Spanish Wiktionary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This work is a contribution to the EWOK project, which receives funding from LABEX ASLAN (ANR–10–LABX–0081) at the Université de Lyon, as part of the "Investissements d'Avenir" program initiated and overseen by the Agence Nationale de la Recherche (ANR) in France. --- src/wiktextract/extractor/es/gloss.py | 12 +++++++----- src/wiktextract/extractor/es/page.py | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/wiktextract/extractor/es/gloss.py b/src/wiktextract/extractor/es/gloss.py index 51562d41..44f209e7 100644 --- a/src/wiktextract/extractor/es/gloss.py +++ b/src/wiktextract/extractor/es/gloss.py @@ -1,10 +1,12 @@ import re from typing import List + +from wikitextprocessor import NodeKind, WikiNode +from wikitextprocessor.parser import WikiNodeChildrenList + from wiktextract.extractor.es.models import Sense, WordEntry from wiktextract.page import clean_node from wiktextract.wxr_context import WiktextractContext -from wikitextprocessor import WikiNode, NodeKind -from wikitextprocessor.parser import WikiNodeChildrenList def extract_gloss( @@ -34,10 +36,10 @@ def extract_gloss( match = re.match(r"^(\d+)", gloss_note) if match: - gloss_data["senseid"] = int(match.group(1)) + gloss_data.senseid = int(match.group(1)) tag_string = gloss_note[len(match.group(1)) :].strip() else: - tag_string = gloss_data["tags"] = gloss_note.strip() + tag_string = gloss_data.tags = gloss_note.strip() # split tags by comma or "y" tags = re.split(r",|y", tag_string) @@ -49,7 +51,7 @@ def extract_gloss( .removeprefix("Main") ) if tag: - gloss_data["tags"].append(tag) + gloss_data.tags.append(tag) if other: wxr.wtp.debug( diff --git a/src/wiktextract/extractor/es/page.py b/src/wiktextract/extractor/es/page.py index 3d764225..688d70f3 100644 --- a/src/wiktextract/extractor/es/page.py +++ b/src/wiktextract/extractor/es/page.py @@ -4,11 +4,11 @@ from typing import Dict, List from wikitextprocessor import NodeKind, WikiNode + from wiktextract.datautils import append_base_data from wiktextract.extractor.es.gloss import extract_gloss +from wiktextract.extractor.es.models import PydanticLogger, WordEntry from wiktextract.extractor.es.pronunciation import extract_pronunciation -from wiktextract.extractor.es.models import WordEntry, PydanticLogger - from wiktextract.page import clean_node from wiktextract.wxr_context import WiktextractContext @@ -63,7 +63,7 @@ def process_pos_block( ): pos_type = wxr.config.POS_SUBTITLES[pos_template_name]["pos"] append_base_data(page_data, "pos", pos_type, base_data) - page_data[-1]["pos_title"] = pos_title + page_data[-1].pos_title = pos_title child_nodes = list(pos_level_node.filter_empty_str_child()) for child in child_nodes: