Add tests and rewrote documentation

- hypothesis tests in particular - changed the API a bit - Rewrote the documentation
kai-tub · Jul 18, 2022 · b4ab2c3 · b4ab2c3
1 parent a98f405
commit b4ab2c3
Show file tree

Hide file tree

Showing 23 changed files with 1,447 additions and 327 deletions.
diff --git a/common_nb_preprocessors/_patterns.py b/common_nb_preprocessors/_patterns.py
@@ -1,9 +1,14 @@
 import re
 
+from pydantic import constr, validate_arguments
+
 __all__ = ["build_prefixed_regex_pattern", "build_prefixed_regex_pattern_with_value"]
 
 
-def build_prefixed_regex_pattern(prefix: str, key_term: str) -> re.Pattern:
+@validate_arguments
+def build_prefixed_regex_pattern(
+    *, prefix: constr(min_length=1), key_term: constr(min_length=1)
+) -> re.Pattern:
     """
     A regular expression builder that returns a compiled
     regular expression that matches a string if:
@@ -29,25 +34,30 @@ def build_prefixed_regex_pattern(prefix: str, key_term: str) -> re.Pattern:
     return pattern
 
 
+@validate_arguments
 def build_prefixed_regex_pattern_with_value(
-    prefix: str, key_term: str, delimiter=r"\s*"
+    *,
+    prefix: constr(min_length=1),
+    key_term: constr(min_length=1),
+    delimiter: constr(min_length=1) = "=",
 ) -> re.Pattern:
     """
     A regular expression builder that returns a compiled
-    regular expression that matches a string if:
-    - An escaped `prefix` string (may have whitespaces before or after)
-    - The escape `key_term` to capture with the group name `key`
-    - Followed by an *unescaped* `delimiter`
+    regular expression that matches a string with:
+    - The (escaped) `prefix` string (may have whitespaces before or after)
+    - The (escaped) `key_term` to capture with the group name `key` is
+    - Followed by an (escaped) `delimiter` (may have whitespaces before or after)
     - and captures the following line until the end of the line with the group name `value`
     """
     prefix = re.escape(prefix)
     key_term = re.escape(key_term)
+    delimiter = re.escape(delimiter)
     pattern = re.compile(
         rf"""
         ^ # match start of each line
         \s*{prefix}\s* # allow whitespace before and after prefix
         (?P<key>{key_term}) # term to capture
-        {delimiter}
+        \s*{delimiter}\s* # allow whitespace before and after delimiter
         (?P<value>[^\n\r]+)
         $ # match end of each line (excludes \n in MULTILINE)
         [\r\n]* # Capture current and all following empty newlines

diff --git a/common_nb_preprocessors/metadata_injector.py b/common_nb_preprocessors/metadata_injector.py
@@ -1,8 +1,6 @@
-from pprint import pprint
-
-import nbformat
+import yaml
 from nbconvert.preprocessors import Preprocessor
-from traitlets import Bool, List, Unicode
+from traitlets import Bool, List, TraitError, Unicode, validate
 
 from ._patterns import (
     build_prefixed_regex_pattern,
@@ -11,65 +9,36 @@
 
 __all__ = [
     "MetaDataListInjectorPreprocessor",
-    "GlobalMetaDataInjectorPreprocessor",
+    "MetaDataMapInjectorPreprocessor",
 ]
 
 
 class MetaDataListInjectorPreprocessor(Preprocessor):
     """
-    Parse all *code* cells and append the matched `strings` with the
-    `prefix` to the `metadata_group` list, which is the `tags` list by default.
-    These `strings` must be on their own line and only contain the `prefix`
-    a `string` from `strings` and whitespace characters.
-
-    With `remove_line=True` (default) the matched line will be removed from
-    the output.
-
-    With the `Preprocessor` configured as:
-
-    - `metadata_group="tags"`
-    - `strings=["hide"]`
-    - `remove_line=True`
-
-    the code cell with the contents:
-
-    .. code-block:: python
-
-        # hide
-        import os
-
-    and the following notebook metadata json:
-
-    .. code-block:: json
-
-        {"metadata": {}}
-
-    Will be transformed to the code-cell with the contents:
-
-    .. code-block:: python
-
-        import os
-
-    and the metadata set as:
-
-    .. code-block:: json
-
-        {"metadata": {"tags": ["hide"]}}
-
-    All matched `strings` (also sometimes called *magic* comments)
-    will be *appended* to the `metadata_group` list.
-    If the entry already exists, it won't be added again.
+    Parse all *code* cells and append the matched magic comments with the
+    `prefix` to the `metadata_group` list.
+    These `strings` must be on their own line and only contain the `prefix`,
+    a `string` from `strings` (i.e., the magic comment) and whitespace characters.
     """
 
     metadata_group = Unicode(default_value="tags").tag(config=True)
-    """Metadata group into which the matched `strings` will be written."""
-    strings = List(Unicode(), default_value=[]).tag(config=True)
-    """List of strings (magic comments) that define the text that will be matched and injected into the selected metadata group."""
+    """Metadata group to which the matched magic comment will be appended to if
+    it doesn't already exist. Default is `tags`."""
+    strings = List(Unicode(), minlen=1).tag(config=True)
+    """List of strings (magic comments) that define the text that will be matched and
+    injected into the selected metadata group."""
     prefix = Unicode(default_value="#").tag(config=True)
-    """The prefix that indicates the possible start of a magic comment line. Should be comment character of the language."""
+    """The prefix that indicates the possible start of a magic comment line.
+    Should be comment character of the language. By default `#`."""
     remove_line = Bool(default_value=True).tag(config=True)
     """By default remove the matching line in the code-cell."""
 
+    @validate("metadata_group")
+    def _valid_metadata_group(self, proposal):
+        if proposal["value"] == "":
+            raise TraitError("metadata_group must be non-empty string!")
+        return proposal["value"]
+
     def _write_tag(self, tag, cell):
         tags = cell.setdefault("metadata", {}).setdefault(self.metadata_group, [])
         if tag not in tags:
@@ -82,7 +51,7 @@ def preprocess_cell(self, cell, resource, index):
         if cell["cell_type"] == "markdown":
             return cell, resource
         for string in self.strings:
-            pattern = build_prefixed_regex_pattern(self.prefix, string)
+            pattern = build_prefixed_regex_pattern(prefix=self.prefix, key_term=string)
             m = pattern.search(cell.source)
             if m is not None:
                 tag = m.group("key")
@@ -93,27 +62,41 @@ def preprocess_cell(self, cell, resource, index):
 
 
 class MetaDataMapInjectorPreprocessor(Preprocessor):
-    metadata_group = Unicode(default_value="tags").tag(config=True)
-    """Metadata group into which the matched `strings` will be written."""
-    keys = List(Unicode(), default_value=[]).tag(config=True)
+    """
+    Parse all *code* cells and add the matched key-value pairs with the
+    `prefix` to the `metadata_group` dictionary.
+    The key-value pairs are generated by searching for each `key` of `keys` followed
+    by `delimiter` and the value.
+    """
+
+    metadata_group = Unicode().tag(config=True)
+    """Metadata group into which the matched key-value pairs will be written."""
+    keys = List(Unicode()).tag(config=True)
     """List of keys that will be used as a key for the `metadata_group` dictionary entry and is followed by the `delimiter` and `value`."""
     prefix = Unicode(default_value="#").tag(config=True)
     """The prefix that indicates the possible start of a magic comment line. Should be comment character of the language."""
     remove_line = Bool(default_value=True).tag(config=True)
     """By default remove the matching line in the code-cell."""
-    delimiter = Unicode(default_value=r"\s*=\s*").tag(config=True)
+    delimiter = Unicode(default_value="=").tag(config=True)
+    """Delimiter that separates the key from the value."""
+    value_to_yaml = Bool(default_value=False).tag(config=True)
+    """Parse the value as yaml syntax before writing it as a dictionary. Default is `False`."""
+
+    @validate("metadata_group")
+    def _valid_metadata_group(self, proposal):
+        if proposal["value"] == "":
+            raise TraitError("metadata_group must be non-empty string!")
+        return proposal["value"]
 
     def _write_entry(self, key, value, cell):
         entries = cell.setdefault("metadata", {}).setdefault(self.metadata_group, {})
         if isinstance(entries, list):
-            raise ValueError(
+            raise TraitError(
                 "Trying to overwrite metadata list type with metadata dictionary.",
                 self.metadata_group,
             )
-        # should include an option to parse it as yaml
-        # or similar
-        value = True if value == "true" else value
-        value = False if value == "false" else value
+        if self.value_to_yaml:
+            value = yaml.safe_load(value)
         entries[key] = value
         return cell
 
@@ -136,53 +119,38 @@ def preprocess_cell(self, cell, resource, index):
         return cell, resource
 
 
-class GlobalMetaDataInjectorPreprocessor(Preprocessor):
-    """
-    Parse all *code* cells and convert the matching `prefix` `key` `value`
-    lines to the global `metadata` field.
-
-    To clean up the output, the lines containing any `string` may be removed
-    by setting `remove_line=True` (default).
-
-    The provided list of `keys` will be used to access the *global* `metadata` field
-    and insert the value that is followed by the `key` in the code cell.
-    Note that the global metadata field will be overwritten if multiple cells define the
-    field's value.
-
-    .. code-block:: python
-
-        # publish true
-        import os
-
-    Will be transformed to:
-
-    ```python
-    import os
-    ```
-
-    where the _notebooks_ cell metadata `publish` field may be created and contain the additional entry `true`.
-
-    To only add a specific value to a metadata field (usually `tags`) look at `MetaDataInjectorPreprocessor`.
-    """
-
-    keys = List(Unicode()).tag(config=True)
-    prefix = Unicode(default_value="#").tag(config=True)
-    delimiter = Unicode(default_value=r"=").tag(config=True)
-
-    def preprocess(self, nb, resources):
-        if len(self.keys) == 0:
-            return nb, resources
-
-        for cell in nb.cells:
-            if cell["cell_type"] == "markdown":
-                continue
-            for key in self.keys:
-                pattern = build_prefixed_regex_pattern_with_value(
-                    self.prefix, key, delimiter=self.delimiter
-                )
-                m = pattern.search(cell.source)
-                if m is not None:
-                    value = m.group("value")
-                    nb.setdefault("metadata", {})
-                    nb["metadata"][key] = value
-        return nb, resources
+# class GlobalMetaDataInjectorPreprocessor(Preprocessor):
+#     """
+#     Parse all *code* cells and convert the matching `prefix` `key` `value`
+#     lines to the global `metadata` field.
+
+#     To clean up the output, the lines containing any `string` may be removed
+#     by setting `remove_line=True` (default).
+
+#     The provided list of `keys` will be used to access the *global* `metadata` field
+#     and insert the value that is followed by the `key` in the code cell.
+#     Note that the global metadata field will be overwritten if multiple cells define the
+#     field's value.
+#     """
+
+#     keys = List(Unicode()).tag(config=True)
+#     prefix = Unicode(default_value="#").tag(config=True)
+#     delimiter = Unicode(default_value=r"=").tag(config=True)
+
+#     def preprocess(self, nb, resources):
+#         if len(self.keys) == 0:
+#             return nb, resources
+
+#         for cell in nb.cells:
+#             if cell["cell_type"] == "markdown":
+#                 continue
+#             for key in self.keys:
+#                 pattern = build_prefixed_regex_pattern_with_value(
+#                     self.prefix, key, delimiter=self.delimiter
+#                 )
+#                 m = pattern.search(cell.source)
+#                 if m is not None:
+#                     value = m.group("value")
+#                     nb.setdefault("metadata", {})
+#                     nb["metadata"][key] = value
+#         return nb, resources