Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ExpandDateTime convenience (datetime components) #20

Merged
merged 2 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 83 additions & 82 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,90 +94,91 @@ quartodoc:
- title: Core
package: ibisml
contents:
- kind: page
path: core
summary:
name: Common
desc: Core APIs
contents:
- Recipe
- RecipeTransform
- TransformResult

- kind: page
path: selectors
summary:
name: Selectors
desc: Select sets of columns by name, type, or other properties
contents:
- cols
- contains
- endswith
- startswith
- matches
- numeric
- nominal
- categorical
- string
- integer
- floating
- temporal
- date
- time
- timestamp
- has_type
- where
- everything
- selector
- kind: page
path: core
summary:
name: Common
desc: Core APIs
contents:
- Recipe
- RecipeTransform
- TransformResult

- kind: page
path: selectors
summary:
name: Selectors
desc: Select sets of columns by name, type, or other properties
contents:
- cols
- contains
- endswith
- startswith
- matches
- numeric
- nominal
- categorical
- string
- integer
- floating
- temporal
- date
- time
- timestamp
- has_type
- where
- everything
- selector

- title: Steps
desc: Define steps in a recipe
package: ibisml
contents:
- kind: page
path: steps-imputation
summary:
name: Imputation
desc: Imputation and handling of missing values
contents:
- ImputeMean
- ImputeMode
- ImputeMedian
- FillNA

- kind: page
path: steps-encoding
summary:
name: Encoding
desc: Encoding of categorical and string columns
contents:
- OneHotEncode
- CategoricalEncode

- kind: page
path: steps-standardization
summary:
name: Standardization
desc: Standardization and normalization of numeric columns
contents:
- ScaleStandard

- kind: page
path: steps-temporal
summary:
name: Temporal
desc: Feature extraction for temporal columns
contents:
- ExpandDate
- ExpandTime

- kind: page
path: steps-other
summary:
name: Other
desc: Other common tabular operations
contents:
- Cast
- Drop
- MutateAt
- Mutate
- kind: page
path: steps-imputation
summary:
name: Imputation
desc: Imputation and handling of missing values
contents:
- ImputeMean
- ImputeMode
- ImputeMedian
- FillNA

- kind: page
path: steps-encoding
summary:
name: Encoding
desc: Encoding of categorical and string columns
contents:
- OneHotEncode
- CategoricalEncode

- kind: page
path: steps-standardization
summary:
name: Standardization
desc: Standardization and normalization of numeric columns
contents:
- ScaleStandard

- kind: page
path: steps-temporal
summary:
name: Temporal
desc: Feature extraction for temporal columns
contents:
- ExpandDateTime
- ExpandDate
- ExpandTime

- kind: page
path: steps-other
summary:
name: Other
desc: Other common tabular operations
contents:
- Cast
- Drop
- MutateAt
- Mutate
3 changes: 2 additions & 1 deletion ibisml/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ibisml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode
from ibisml.steps.standardize import ScaleStandard
from ibisml.steps.encode import OneHotEncode, CategoricalEncode
from ibisml.steps.temporal import ExpandDate, ExpandTime
from ibisml.steps.temporal import ExpandDateTime, ExpandDate, ExpandTime


__all__ = (
Expand All @@ -17,6 +17,7 @@
"ScaleStandard",
"OneHotEncode",
"CategoricalEncode",
"ExpandDateTime",
"ExpandDate",
"ExpandTime",
)
115 changes: 115 additions & 0 deletions ibisml/steps/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,121 @@
from ibisml.select import SelectionType, selector


class ExpandDateTime(Step):
"""A step for expanding date and time columns into one or more features.

New features will be named ``{input_column}_{component}``. For example, if
expanding a ``"year"`` component from column ``"x"``, the feature column
would be named ``"x_year"``.

Parameters
----------
inputs
A selection of date and time columns to expand into new features.
date_components
A sequence of date components to expand. Options include

- ``day``: the day of the month as a numeric value
- ``week``: the week of the year as a numeric value
- ``month``: the month of the year as a categorical value
- ``year``: the year as a numeric value
- ``dow``: the day of the week as a categorical value
- ``doy``: the day of the year as a numeric value

Defaults to ``["dow", "month", "year"]``.
time_components
A sequence of time components to expand. Options include ``hour``,
``minute``, ``second``, and ``millisecond``.

Defaults to ``["hour", "minute", "second"]``.

Examples
--------
>>> import ibisml as ml

Expand date and time columns using the default components

>>> step = ml.ExpandDateTime(ml.datetime())

Expand specific columns using specific components for date and time

>>> step = ml.ExpandDateTime(["x", "y"], ["day", "year"], ["hour", "minute"])
"""

def __init__(
self,
inputs: SelectionType,
datetime_components: list[
Literal[
"day",
"week",
"month",
"year",
"dow",
"doy",
"hour",
"minute",
"second",
"millisecond",
]
] = (
"day",
"week",
"month",
"year",
"dow",
"doy",
"hour",
"minute",
),
):
self.inputs = selector(inputs)
self.datetime_components = list(datetime_components)

def _repr(self) -> Iterable[tuple[str, Any]]:
yield ("", self.inputs)
yield ("datetime_components", self.datetime_components)

def fit(self, table: ir.Table, metadata: Metadata) -> Transform:
columns = self.inputs.select_columns(table, metadata)

if "month" in self.datetime_components:
for col in columns:
metadata.set_categories(
f"{col}_month",
[
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
],
)
if "dow" in self.datetime_components:
for col in columns:
metadata.set_categories(
f"{col}_dow",
[
"Monday",
"Tuesday",
"Wednesday",
"Thurday",
"Friday",
"Saturday",
"Sunday",
],
)

return ml.transforms.ExpandDateTime(columns, self.datetime_components)


class ExpandDate(Step):
"""A step for expanding date columns into one or more features.

Expand Down
3 changes: 2 additions & 1 deletion ibisml/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ibisml.transforms.impute import FillNA
from ibisml.transforms.standardize import ScaleStandard
from ibisml.transforms.encode import OneHotEncode, CategoricalEncode
from ibisml.transforms.temporal import ExpandDate, ExpandTime
from ibisml.transforms.temporal import ExpandDateTime, ExpandDate, ExpandTime

__all__ = (
"Cast",
Expand All @@ -13,6 +13,7 @@
"ScaleStandard",
"OneHotEncode",
"CategoricalEncode",
"ExpandDateTime",
"ExpandDate",
"ExpandTime",
)
Loading
Loading