Skip to content

Commit

Permalink
feat: add metrics module with classification metrics
Browse files Browse the repository at this point in the history
docs: apply proper spacing separation in docstrings

Co-authored-by: Deepyaman Datta <deepyaman.datta@utexas.edu>

feat: lazify classification metrics and clean tests
  • Loading branch information
IndexSeek committed Dec 20, 2024
1 parent 7d2d021 commit d017b8b
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 0 deletions.
147 changes: 147 additions & 0 deletions ibis_ml/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import ibis.expr.datatypes as dt


def accuracy_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
"""Calculate the accuracy score of predicted values against true values.
Parameters
----------
y_true
Table expression column containing the true labels.
y_pred
Table expression column containing the predicted labels.
Returns
-------
float
The accuracy score, representing the fraction of correct predictions.
Examples
--------
>>> import ibis
>>> from ibis_ml.metrics import accuracy_score
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... {
... "id": range(1, 13),
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
... }
... )
>>> accuracy_score(t.actual, t.prediction)
┌──────────┐
│ 0.583333 │
└──────────┘
"""
return (y_true == y_pred).mean() # .to_pyarrow().as_py()


def precision_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
"""Calculate the precision score of predicted values against true values.
Parameters
----------
y_true
Table expression column containing the true labels.
y_pred
Table expression column containing the predicted labels.
Returns
-------
float
The precision score, representing the fraction of true positive predictions.
Examples
--------
>>> import ibis
>>> from ibis_ml.metrics import precision_score
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... {
... "id": range(1, 13),
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
... }
... )
>>> precision_score(t.actual, t.prediction)
┌──────────┐
│ 0.666667 │
└──────────┘
"""
true_positive = (y_true & y_pred).sum()
predicted_positive = y_pred.sum()
return true_positive / predicted_positive


def recall_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
"""Calculate the recall score of predicted values against true values.
Parameters
----------
y_true
Table expression column containing the true labels.
y_pred
Table expression column containing the predicted labels.
Returns
-------
float
The recall score, representing the fraction of true positive predictions.
Examples
--------
>>> import ibis
>>> from ibis_ml.metrics import recall_score
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... {
... "id": range(1, 13),
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
... }
... )
>>> recall_score(t.actual, t.prediction)
┌──────────┐
│ 0.571429 │
└──────────┘
"""
true_positive = (y_true & y_pred).sum()
actual_positive = y_true.sum()
return true_positive / actual_positive


def f1_score(y_true: dt.Integer, y_pred: dt.Integer) -> float:
"""Calculate the F1 score of predicted values against true values.
Parameters
----------
y_true
Table expression column containing the true labels.
y_pred
Table expression column containing the predicted labels.
Returns
-------
float
The F1 score, representing the harmonic mean of precision and recall.
Examples
--------
>>> import ibis
>>> from ibis_ml.metrics import f1_score
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... {
... "id": range(1, 13),
... "actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
... "prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
... }
... )
>>> f1_score(t.actual, t.prediction)
┌──────────┐
│ 0.615385 │
└──────────┘
"""
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
return (2 * precision * recall) / (precision + recall)
35 changes: 35 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import ibis
import pytest
import sklearn.metrics

import ibis_ml.metrics


@pytest.fixture
def results_table():
return ibis.memtable(
{
"id": range(1, 13),
"actual": [1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1],
"prediction": [1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
}
)


@pytest.mark.parametrize(
"metric_name",
[
pytest.param("accuracy_score", id="accuracy_score"),
pytest.param("precision_score", id="precision_score"),
pytest.param("recall_score", id="recall_score"),
pytest.param("f1_score", id="f1_score"),
],
)
def test_classification_metrics(results_table, metric_name):
ibis_ml_func = getattr(ibis_ml.metrics, metric_name)
sklearn_func = getattr(sklearn.metrics, metric_name)
t = results_table
df = t.to_pandas()
result = ibis_ml_func(t.actual, t.prediction).to_pyarrow().as_py()
expected = sklearn_func(df["actual"], df["prediction"])
assert result == pytest.approx(expected, abs=1e-4)

0 comments on commit d017b8b

Please sign in to comment.