Skip to content

Commit

Permalink
📝 Improve documentation, type hints in hikari.dataframes.cif
Browse files Browse the repository at this point in the history
  • Loading branch information
Baharis committed Jan 16, 2025
1 parent b8f178a commit 2018a6c
Showing 1 changed file with 51 additions and 38 deletions.
89 changes: 51 additions & 38 deletions hikari/dataframes/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pathlib
import re
import tempfile
from typing import Any, Callable, Sequence, TypeVar

from enum import Enum
from functools import lru_cache
Expand All @@ -12,26 +13,25 @@
from hikari.utility import make_abspath


T = TypeVar('T')


class CifBlock(UserDict):
"""
CifBlock object handles all data inside an individual block of Cif file.
As a subclass of an `UserDict`, in python3.7+ it is ordered by design.
Individual Cif items can be accessed or assigned using a dict-like syntax.
"""

def get_as_type(self, key, typ, default=None):
def get_as_type(self, key: str, typ: Callable[[Any], T], default: Any = None) -> T:
"""
Get value of `self[key]` converted to `typ`. If value is a list,
convert its contents element-wise.
:param key: key associated with accessed element
:type key: str
:param typ: type/function applied to a value or its every element
:type typ: Callable
:param default: if given, return it on KeyError
:type default: Any
:return: converted value of `self[key]` or `default`
:rtype: Union[list, str]
:return: value of `self[key]` or `default` converted to `typ`
"""
value = self.get(key)
if value is None:
Expand All @@ -51,9 +51,7 @@ def read(self, path: str, block: str) -> None:
access and store only the `block` data block in self.
:param path: Absolute or relative path to the .cif file.
:type path: str
:param block: Name of the cif data block to be accessed
:type block: str
"""
reader = CifReader(cif_file_path=path)
self.update(reader.read()[block])
Expand All @@ -64,7 +62,6 @@ def write(self, path: str) -> None:
by the `path` parameter, using 'hikari' as block name.
:param path: Absolute or relative path to the .cif file.
"""
writer = CifWriter(cif_file_path=path)
writer.write(cif_frame=CifFrame({'hikari': self}))
Expand Down Expand Up @@ -109,14 +106,22 @@ def write(self, path: str) -> None:

class CifValidator(UserDict):
"""
This object reads an appropriate cif core dictionary and uses it in order to
format or validate all entries passing through it.
The `CifValidator` contains all keys from core cif dictionary. In order
This object is used to validate individual cif keys when parsing cif files.
It knows the metadata about each key based on its entry
in the cif core dictionary v.2.4.5 packaged with the project.
Since the specification itself is written in cif format,
it is also read using the same `CifReader` (but without `CifValidator`).
Upon initialization, `CifValidator` becomes a dictionary whose
keys are all valid cif keys, according to the cif specification used.
Individual values are themselves dictionaries that store information
about key's contents, `_category`, `_type`, whether they are a `_list` etc.
contains all keys from core cif dictionary. In order
to access individual values, use `.get()` instead of bracket notation.
"""

def __init__(self):
def __init__(self) -> None:
super().__init__()
with tempfile.TemporaryDirectory() as temp_dir:
temp_dic_path = str(pathlib.Path(temp_dir) / 'cif_core.dic')
Expand All @@ -125,15 +130,17 @@ def __init__(self):
reader = CifReader(cif_file_path=temp_dic_path, validate=False)
self.update(reader.read())

def __contains__(self, item):
def __contains__(self, item) -> bool:
try:
_ = self.get(item)
except KeyError:
return False
else:
return True

def get(self, key, default=None):
def get(self, key: str, default: UserDict = None) -> UserDict:
"""Get the dictionary containing information about input cif `key`."""
# def get(self, key: str, default: Any = str) -> UserDict: FAILED
key, _key = (key[1:], key) if key.startswith('_') else (key, '_' + key)
value = UserDict()
try:
Expand All @@ -147,15 +154,17 @@ def get(self, key, default=None):
value = default
return value

def get__category(self, key, default=None):
def get__category(self, key: str, default: str = None) -> str:
"""Close equivalent to `self.get(key).get('_category', default)`"""
value = self.get(key)
if value is not None:
_category = value.get('_category', default)
else:
_category = default
return _category

def get__list(self, key, default=None):
def get__list(self, key: str, default: bool = None) -> bool:
"""Close equivalent to `self.get(key).get('_list', default) == 'yes'`"""
value = self.get(key)
if value is not None:
got = value.get('_list')
Expand All @@ -166,7 +175,14 @@ def get__list(self, key, default=None):


class CifIOBuffer(abc.ABC):
def __init__(self, target):
"""
An abstract base class for Cif reader and writer buffers.
Specifies that data can be added and flushed, names and values are stored
in a list, output is stored in target (dict if reading, file if writing).
"""

@abc.abstractmethod
def __init__(self, target: Any) -> None:
self.names = []
self.values = []

Expand Down Expand Up @@ -204,11 +220,11 @@ def __init__(self, cif_file_path, validate=True):
class CifReaderBuffer(CifIOBuffer):
"""Buffer for reading data from cif file into `CifReader`"""

def __init__(self, target):
def __init__(self, target: dict) -> None:
super().__init__(target=target)
self.target: UserDict = target
self.target: dict = target

def add(self, word):
def add(self, word: str) -> None:
"""Append the word to names or values based on its first char"""
if word.startswith('_'):
if self.values:
Expand All @@ -217,7 +233,7 @@ def add(self, word):
else:
self.values.append(CifReader.revert_delimiters_and_whitespace(word))

def flush(self):
def flush(self) -> None:
"""Update the target dict with names and values stored hitherto"""
d = UserDict()
lv = len(self.values)
Expand Down Expand Up @@ -245,16 +261,16 @@ class CifReader(CifIO):
"""

@property
def blocks(self):
"""A dictionary of all blocks names and their positions in cif file."""
def blocks(self) -> dict[str, int]:
"""A dict of block names:line numbers where they start in cif file."""
return self._blocks(lines=tuple(self.file_lines))

@lru_cache(maxsize=1)
def _blocks(self, lines):
def _blocks(self, lines: Sequence[str]) -> dict[str, int]:
return {l[5:]: i for i, l in enumerate(lines) if l.startswith('data_')}

class State(Enum):
"""This class stores current cif reading state (eg. inside loop etc.)"""
"""This class stores current cif reading state (e.g. inside loop etc.)"""
default = 0
loop_keys = 1
loop_values = 2
Expand Down Expand Up @@ -284,17 +300,14 @@ def item_value_should_be_a_list(k_, v_):
new_dict[k] = v[0]
return new_dict

def parse_lines(self, start, end):
def parse_lines(self, start: int, end: int) -> dict:
"""
Read the data from :attr:`~.CifIO.lines` numbered `start` to `end`,
interpret it, and return it as an instance of a dict.
:param start: number of the first line which data should be read from
:type start: int
:param end: number of the first line which should not be read anymore
:type end: int
:return: ordered dictionary with name: value pairs for all parsed lines
:rtype: dict
"""
parsed_data = dict()
buffer = CifReaderBuffer(target=parsed_data)
Expand Down Expand Up @@ -398,14 +411,14 @@ class CifWriterBuffer(CifIOBuffer):
MIN_STEP_LENGTH = 2
WHITESPACE = {' ', '\t', '\n'}

def __init__(self, target):
def __init__(self, target: TextIO) -> None:
super().__init__(target=target)
self.target: TextIO = target
self.current__category = ''
self.current__list = False
self.current_len = 0

def add(self, data: tuple):
def add(self, data: tuple) -> None:
k_, v_ = data
k__category = cif_core_validator.get__category(k_)
k__list = cif_core_validator.get__list(k_) or isinstance(v_, list)
Expand All @@ -429,7 +442,7 @@ def add(self, data: tuple):
self.current__list = k__list
self.current_len = v_len

def flush(self):
def flush(self) -> None:
s = '\n'
if self.current__list is True:
s += self.format_table()
Expand All @@ -440,7 +453,7 @@ def flush(self):
self.names = []
self.values = []

def format_line(self, k, v):
def format_line(self, k, v) -> str:
name_string = f'{k:<{self.MAX_NAME_LENGTH}}'
step_string = ' ' * self.MIN_STEP_LENGTH
value_string = self.enquote(v)
Expand All @@ -451,7 +464,7 @@ def format_line(self, k, v):
step_string = '\n'
return name_string + step_string + value_string

def format_table(self):
def format_table(self) -> str:
column_widths = [max(map(len, v)) for v in self.values]
if sum(column_widths) + len(column_widths) >= self.MAX_LINE_LENGTH:
pass # TODO: break long loop tables rows into multiple
Expand All @@ -463,7 +476,7 @@ def format_table(self):
formatted_string += f' {" ".join(enquoted_value_row)}\n'
return formatted_string

def enquote(self, text, force=False):
def enquote(self, text: str, force: bool = False) -> str:
if text == '':
quoted = "''"
elif any(whitespace in text for whitespace in self.WHITESPACE) or force:
Expand All @@ -488,7 +501,7 @@ class CifWriter(CifIO):
into cif files
"""

def write(self, cif_frame):
def write(self, cif_frame: CifFrame) -> None:
with open(self.file_path, 'w') as cif_file:
buffer = CifWriterBuffer(target=cif_file)
first_block = True
Expand Down

0 comments on commit 2018a6c

Please sign in to comment.