Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Nov 29, 2023
1 parent d4d97ec commit 794902e
Show file tree
Hide file tree
Showing 12 changed files with 39 additions and 94 deletions.
2 changes: 1 addition & 1 deletion opteryx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# deepcode ignore PythonSameEvalBinaryExpressiontrue: false +ve, values can be different
if _env_path.exists() and (dotenv is None): # pragma: no cover
# using a logger here will tie us in knots
print(f"{datetime.datetime.now()} [LOADER] `.env` file exists but `dotEnv` not installed.")
print(f"{datetime.datetime.now()} [LOADER] `.env` file exists but `pydotenv` not installed.")
elif dotenv is not None: # pragma: no cover variables from `.env`")
dotenv.load_dotenv(dotenv_path=_env_path)
print(f"{datetime.datetime.now()} [LOADER] Loading `.env` file.")
Expand Down
5 changes: 5 additions & 0 deletions opteryx/components/binder/binder_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,11 @@ def visit_function_dataset(
node.relation_name = node.alias
node.rows = int(node.args[0].value)

if len(node.args) < 2:
raise InvalidFunctionParameterError(
f"FAKE function expects at least two parameters, the number of rows, and then either the number of columns, or an array of the column types."
)

if node.args[1].node_type == NodeType.NESTED:
column_definition = [node.args[1].centre]
else:
Expand Down

This file was deleted.

15 changes: 14 additions & 1 deletion opteryx/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from opteryx import utils
from opteryx.constants import QueryStatus
from opteryx.constants import ResultType
from opteryx.exceptions import InconsistentSchemaError
from opteryx.exceptions import InvalidCursorStateError
from opteryx.exceptions import MissingSqlStatement
from opteryx.exceptions import UnsupportedSyntaxError
Expand Down Expand Up @@ -308,7 +309,19 @@ def execute_to_arrow(
result_data, self._result_type = next(results, (ResultType._UNDEFINED, None))
if limit is not None:
result_data = utils.arrow.limit_records(result_data, limit)
return pyarrow.concat_tables(result_data, mode="default")
try:
return pyarrow.concat_tables(
result_data, promote_optionsstr="permissive", mode="default"
)
except pyarrow.ArrowInvalid as err:
print(dir(err))
if "struct" in str(err):
raise InconsistentSchemaError(
"Unable to resolve different schemas, most likely related to a STRUCT column."
)
raise InconsistentSchemaError(
"Unable to resolve different schemas, this may be due to uncoercible column types."
)

@property
def stats(self) -> Dict[str, Any]:
Expand Down
5 changes: 5 additions & 0 deletions opteryx/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
├── UnsupportedTypeError
└── ProgrammingError [PEP-0249] *
├── DataError *
│ ├── InconsistentSchemaError
│ ├── EmptyDatasetError
│ └── EmptyResultSetError
├── ExecutionError *
Expand Down Expand Up @@ -328,6 +329,10 @@ class MissingSqlStatement(ProgrammingError):
"""Exception raised for missing SQL statement."""


class InconsistentSchemaError(DataError):
"""Raised whem, despite efforts, we can't get a consistent schema."""


class EmptyDatasetError(DataError):
"""Exception raised when a dataset is empty."""

Expand Down
9 changes: 4 additions & 5 deletions opteryx/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
"""

import datetime
import json

import numpy
import orjson
import pyarrow
from orso.cityhash import CityHash64
from pyarrow import ArrowNotImplementedError
Expand All @@ -26,7 +26,6 @@
import opteryx
from opteryx.exceptions import FunctionNotFoundError
from opteryx.exceptions import IncorrectTypeError
from opteryx.exceptions import InvalidFunctionParameterError
from opteryx.exceptions import UnsupportedSyntaxError
from opteryx.functions import date_functions
from opteryx.functions import number_functions
Expand Down Expand Up @@ -126,7 +125,7 @@ def try_cast(_type):
"DECIMAL": decimal.Decimal,
"VARCHAR": str,
"TIMESTAMP": numpy.datetime64,
"STRUCT": json.loads,
"STRUCT": orjson.loads,
"DATE": lambda x: dates.parse_iso(x).date(),
}
if _type in casters:
Expand All @@ -141,7 +140,7 @@ def _inner(arr):

def _iterate_single_parameter(func):
def _inner(array):
return numpy.array([func(item) for item in array])
return numpy.array(list(map(func, array)))

return _inner

Expand Down Expand Up @@ -234,7 +233,7 @@ def _coalesce(*arrays):
"VARCHAR": cast("VARCHAR"),
"STRING": cast("VARCHAR"), # alias for VARCHAR
"STR": cast("VARCHAR"),
"STRUCT": _iterate_single_parameter(json.loads),
"STRUCT": _iterate_single_parameter(lambda x: orjson.loads(str(x))),
"DATE": cast("DATE"),
"TRY_TIMESTAMP": try_cast("TIMESTAMP"),
"TRY_BOOLEAN": try_cast("BOOLEAN"),
Expand Down
8 changes: 5 additions & 3 deletions opteryx/utils/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ def generate_series(*args):
OrsoTypes.INTEGER,
OrsoTypes.DOUBLE,
):
if arg_len not in (2, 3):
raise SqlError("generate_series for numbers takes 2 or 3 parameters.")
if arg_len not in (1, 2, 3):
raise SqlError(
"generate_series for numbers takes 1 (stop), 2 (start, stop) or 3 (start, stop, interval) parameters."
)
return numeric_range(*arg_vals)

# if the params are timestamps, we create time intervals
Expand Down Expand Up @@ -59,7 +61,7 @@ def numeric_range(*args) -> numpy.array:
generate_range(1, 5, 0.5)
"""
# Define defaults
start, step, dtype = numpy.int64(0), numpy.int64(1), numpy.float64
start, step, dtype = numpy.int64(1), numpy.int64(1), numpy.float64

# Process arguments
if len(args) == 1:
Expand Down
1 change: 1 addition & 0 deletions testdata/flat/struct/001.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"dict": "{\"string\": \"string\"}"}
1 change: 1 addition & 0 deletions testdata/flat/struct/002.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"dict": "{\"string\": \"string\", \"float\": 1.2, \"once\":\"true\"}"}
1 change: 1 addition & 0 deletions testdata/flat/struct/003.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"dict": "{\"int\": 1, \"float\": 1.2, \"once\":\"true\"}"}
3 changes: 0 additions & 3 deletions testdata/flat/struct/files.jsonl

This file was deleted.

3 changes: 2 additions & 1 deletion tests/sql_battery/test_shapes_and_errors_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
ColumnReferencedBeforeEvaluationError,
DatasetNotFoundError,
EmptyDatasetError,
InconsistentSchemaError,
IncompatibleTypesError,
IncorrectTypeError,
InvalidFunctionParameterError,
Expand Down Expand Up @@ -1087,7 +1088,7 @@
("SELECT CAST('abc' AS LIST)", None, None, SqlError),
("SELECT TRY_CAST('abc' AS LIST)", None, None, SqlError),

("SELECT STRUCT(dict) FROM testdata.flat.struct", 3, 1, None),
("SELECT STRUCT(dict) FROM testdata.flat.struct", 3, 1, InconsistentSchemaError),

# V2 Negative Tests
("SELECT $planets.id, name FROM $planets INNER JOIN $satellites ON planetId = $planets.id", None, None, AmbiguousIdentifierError),
Expand Down

0 comments on commit 794902e

Please sign in to comment.