Skip to content

Commit

Permalink
BLD, TST: Build and test Pyodide wheels for pandas in CI (pandas-de…
Browse files Browse the repository at this point in the history
…v#57896)

* Create initial Pyodide workflow

* Do not import pandas folder from the repo

* Install hypothesis for testing

* Add pytest decorator to skip tests on WASM

* Skip `time.tzset()` tests on WASM platforms

* Skip file system access tests on WASM

* Skip two more tzset test failures

* Skip two more FS failures on WASM

* Resolve last two tzset failures on WASM

* Add a `WASM` constant for Emscripten platform checks

* Fix floating point imprecision with `np.timedelta64`

* Mark tz OverflowError as xfail on WASM

* Try to fix OverflowError with date ranges

* Move job to unit tests workflow, withdraw env vars

* Fix up a few style errors, use WASM variable

* Bump Pyodide to `0.25.1`

See pyodide/pyodide#4654 for
more discussion. This commit resolves a build error
coming from the `pyodide build` command which
broke due to a new `build` release by PyPA.

* Use shorter job name

* Skip test where warning is not raised properly

* Don't run `test_date_time` loc check on WASM

* Don't run additional loc checks in `test_sas7bdat`

* Disable WASM OverflowError

* Skip tests requiring fp exception support

* xfail tests that require stricter tolerances

* xfail test where `OverflowError`s are received

* Remove upper-pin from `pydantic`

* Better skip messages via `pytest.skipif` decorator

* Import `WASM` var via public API where possible

* Unpin `pytest` for Pyodide job

* Add reason attr when using boolean to skip test

* Don't xfail, skip tests that bring `OverflowError`s

* Skip timedelta test that runs well only on 64-bit

* Skip tests that use `np.timedelta64`

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
agriyakhetarpal and mroeschke authored May 8, 2024
1 parent 231d652 commit 4f743f9
Show file tree
Hide file tree
Showing 20 changed files with 146 additions and 17 deletions.
61 changes: 60 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ jobs:
timeout-minutes: 90

concurrency:
#https://github.community/t/concurrecy-not-work-for-push/183068/7
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev
cancel-in-progress: true

Expand Down Expand Up @@ -346,3 +346,62 @@ jobs:
- name: Run Tests
uses: ./.github/actions/run-tests

emscripten:
# Note: the Python version, Emscripten toolchain version are determined
# by the Pyodide version. The appropriate versions can be found in the
# Pyodide repodata.json "info" field, or in the Makefile.envs file:
# https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2
# The Node.js version can be determined via Pyodide:
# https://pyodide.org/en/stable/usage/index.html#node-js
name: Pyodide build
runs-on: ubuntu-22.04
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm
cancel-in-progress: true
steps:
- name: Checkout pandas Repo
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python for Pyodide
id: setup-python
uses: actions/setup-python@v5
with:
python-version: '3.11.3'

- name: Set up Emscripten toolchain
uses: mymindstorm/setup-emsdk@v14
with:
version: '3.1.46'
actions-cache-folder: emsdk-cache

- name: Install pyodide-build
run: pip install "pyodide-build==0.25.1"

- name: Build pandas for Pyodide
run: |
pyodide build
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '18'

- name: Set up Pyodide virtual environment
run: |
pyodide venv .venv-pyodide
source .venv-pyodide/bin/activate
pip install dist/*.whl
- name: Test pandas for Pyodide
env:
PANDAS_CI: 1
run: |
source .venv-pyodide/bin/activate
pip install pytest hypothesis
# do not import pandas from the checked out repo
cd ..
python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])'
2 changes: 2 additions & 0 deletions pandas/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
PY311,
PY312,
PYPY,
WASM,
)
import pandas.compat.compressors
from pandas.compat.numpy import is_numpy_dev
Expand Down Expand Up @@ -207,4 +208,5 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
"PY311",
"PY312",
"PYPY",
"WASM",
]
2 changes: 2 additions & 0 deletions pandas/compat/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
PY311 = sys.version_info >= (3, 11)
PY312 = sys.version_info >= (3, 12)
PYPY = platform.python_implementation() == "PyPy"
WASM = (sys.platform == "emscripten") or (platform.machine() in ["wasm32", "wasm64"])
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
REF_COUNT = 2 if PY311 else 3

Expand All @@ -27,4 +28,5 @@
"PY311",
"PY312",
"PYPY",
"WASM",
]
3 changes: 3 additions & 0 deletions pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pytest

from pandas.compat import WASM

from pandas.core.dtypes.common import is_number

from pandas import (
Expand Down Expand Up @@ -54,6 +56,7 @@ def test_apply_np_reducer(op, how):
tm.assert_series_equal(result, expected)


@pytest.mark.skipif(WASM, reason="No fp exception support in wasm")
@pytest.mark.parametrize(
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import pytest

from pandas.compat import WASM
from pandas.errors import OutOfBoundsDatetime

import pandas as pd
Expand Down Expand Up @@ -1741,6 +1742,7 @@ def test_td64_div_object_mixed_result(self, box_with_array):
# ------------------------------------------------------------------
# __floordiv__, __rfloordiv__

@pytest.mark.skipif(WASM, reason="no fp exception support in wasm")
def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array):
# GH#35529
box = box_with_array
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/datetimes/methods/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import pytest

from pandas.compat import WASM
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -70,6 +71,9 @@ def test_normalize_tz(self):
assert not rng.is_normalized

@td.skip_if_windows
@pytest.mark.skipif(
WASM, reason="tzset is available only on Unix-like systems, not WASM"
)
@pytest.mark.parametrize(
"timezone",
[
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/indexes/datetimes/methods/test_resolution.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from dateutil.tz import tzlocal
import pytest

from pandas.compat import IS64
from pandas.compat import (
IS64,
WASM,
)

from pandas import date_range

Expand All @@ -20,9 +23,10 @@
("us", "microsecond"),
],
)
@pytest.mark.skipif(WASM, reason="OverflowError received on WASM")
def test_dti_resolution(request, tz_naive_fixture, freq, expected):
tz = tz_naive_fixture
if freq == "YE" and not IS64 and isinstance(tz, tzlocal):
if freq == "YE" and ((not IS64) or WASM) and isinstance(tz, tzlocal):
request.applymarker(
pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
)
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy as np
import pytest

from pandas.compat import WASM
from pandas.errors import (
EmptyDataError,
ParserError,
Expand Down Expand Up @@ -80,6 +81,7 @@ def test_path_path_lib(all_parsers):
tm.assert_frame_equal(df, result)


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
def test_nonexistent_path(all_parsers):
# gh-2428: pls no segfault
# gh-14086: raise more helpful FileNotFoundError
Expand All @@ -93,6 +95,7 @@ def test_nonexistent_path(all_parsers):
assert path == e.value.filename


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
@td.skip_if_windows # os.chmod does not work in windows
def test_no_permission(all_parsers):
# GH 23784
Expand Down
21 changes: 12 additions & 9 deletions pandas/tests/io/parser/test_c_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import pytest

from pandas.compat import WASM
from pandas.compat.numpy import np_version_gte1p24
from pandas.errors import (
ParserError,
Expand Down Expand Up @@ -94,15 +95,16 @@ def test_dtype_and_names_error(c_parser_only):
"""
# fallback casting, but not castable
warning = RuntimeWarning if np_version_gte1p24 else None
with pytest.raises(ValueError, match="cannot safely convert"):
with tm.assert_produces_warning(warning, check_stacklevel=False):
parser.read_csv(
StringIO(data),
sep=r"\s+",
header=None,
names=["a", "b"],
dtype={"a": np.int32},
)
if not WASM: # no fp exception support in wasm
with pytest.raises(ValueError, match="cannot safely convert"):
with tm.assert_produces_warning(warning, check_stacklevel=False):
parser.read_csv(
StringIO(data),
sep=r"\s+",
header=None,
names=["a", "b"],
dtype={"a": np.int32},
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -550,6 +552,7 @@ def test_chunk_whitespace_on_boundary(c_parser_only):
tm.assert_frame_equal(result, expected)


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
def test_file_handles_mmap(c_parser_only, csv1):
# gh-14418
#
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import numpy as np
import pytest

from pandas.compat import IS64
from pandas.compat._constants import (
IS64,
WASM,
)
from pandas.errors import EmptyDataError

import pandas as pd
Expand Down Expand Up @@ -168,6 +171,7 @@ def test_airline(datapath):
tm.assert_frame_equal(df, df0)


@pytest.mark.skipif(WASM, reason="Pyodide/WASM has 32-bitness")
def test_date_time(datapath):
# Support of different SAS date/datetime formats (PR #15871)
fname = datapath("io", "sas", "data", "datetime.sas7bdat")
Expand Down Expand Up @@ -253,6 +257,7 @@ def test_corrupt_read(datapath):
pd.read_sas(fname)


@pytest.mark.xfail(WASM, reason="failing with currently set tolerances on WASM")
def test_max_sas_date(datapath):
# GH 20927
# NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999
Expand Down Expand Up @@ -292,6 +297,7 @@ def test_max_sas_date(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.xfail(WASM, reason="failing with currently set tolerances on WASM")
def test_max_sas_date_iterator(datapath):
# GH 20927
# when called as an iterator, only those chunks with a date > pd.Timestamp.max
Expand Down Expand Up @@ -337,6 +343,7 @@ def test_max_sas_date_iterator(datapath):
tm.assert_frame_equal(results[1], expected[1])


@pytest.mark.skipif(WASM, reason="Pyodide/WASM has 32-bitness")
def test_null_date(datapath):
fname = datapath("io", "sas", "data", "dates_null.sas7bdat")
df = pd.read_sas(fname, encoding="utf-8")
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import numpy as np
import pytest

from pandas.compat import is_platform_windows
from pandas.compat import (
WASM,
is_platform_windows,
)

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -163,6 +166,7 @@ def test_iterator(self):
tm.assert_frame_equal(first, expected.iloc[[0]])
tm.assert_frame_equal(pd.concat(it), expected.iloc[1:])

@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
@pytest.mark.parametrize(
"reader, module, error_class, fn_ext",
[
Expand Down Expand Up @@ -228,6 +232,7 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex
):
method(dummy_frame, path)

@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
@pytest.mark.parametrize(
"reader, module, error_class, fn_ext",
[
Expand Down Expand Up @@ -382,6 +387,7 @@ def mmap_file(datapath):


class TestMMapWrapper:
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
def test_constructor_bad_file(self, mmap_file):
non_file = StringIO("I am not a file")
non_file.fileno = lambda: -1
Expand All @@ -404,6 +410,7 @@ def test_constructor_bad_file(self, mmap_file):
with pytest.raises(ValueError, match=msg):
icom._maybe_memory_map(target, True)

@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
def test_next(self, mmap_file):
with open(mmap_file, encoding="utf-8") as target:
lines = target.readlines()
Expand Down Expand Up @@ -587,6 +594,7 @@ def test_bad_encdoing_errors():
icom.get_handle(path, "w", errors="bad")


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
def test_errno_attribute():
# GH 13872
with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err:
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import numpy as np
import pytest

from pandas.compat import WASM
from pandas.compat._optional import import_optional_dependency
from pandas.errors import (
EmptyDataError,
Expand Down Expand Up @@ -485,6 +486,7 @@ def test_empty_string_etree(val):
read_xml(data, parser="etree")


@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
def test_wrong_file_path(parser):
filename = os.path.join("does", "not", "exist", "books.xml")

Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/scalar/timestamp/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
conversion,
)
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas.compat import WASM
import pandas.util._test_decorators as td

import pandas._testing as tm
Expand Down Expand Up @@ -99,13 +100,15 @@ def test_replace_integer_args(self, tz_aware_fixture):
with pytest.raises(ValueError, match=msg):
ts.replace(hour=0.1)

@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
def test_replace_tzinfo_equiv_tz_localize_none(self):
# GH#14621, GH#7825
# assert conversion to naive is the same as replacing tzinfo with None
ts = Timestamp("2013-11-03 01:59:59.999999-0400", tz="US/Eastern")
assert ts.tz_localize(None) == ts.replace(tzinfo=None)

@td.skip_if_windows
@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
def test_replace_tzinfo(self):
# GH#15683
dt = datetime(2016, 3, 27, 1)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# NB: This is for the Timestamp.timestamp *method* specifically, not
# the Timestamp class in general.

import pytest
from pytz import utc

from pandas._libs.tslibs import Timestamp
from pandas.compat import WASM
import pandas.util._test_decorators as td

import pandas._testing as tm


class TestTimestampMethod:
@td.skip_if_windows
@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
def test_timestamp(self, fixed_now_ts):
# GH#17329
# tz-naive --> treat it as if it were UTC for purposes of timestamp()
Expand Down
Loading

0 comments on commit 4f743f9

Please sign in to comment.