Skip to content

Commit

Permalink
Merge pull request #2305 from mabel-dev/#2303
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer authored Jan 24, 2025
2 parents 4036505 + 178f82e commit 68ed0fc
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 81 deletions.
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 1023
__build__ = 1029

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
128 changes: 65 additions & 63 deletions tests/fuzzing/test_sql_fuzzer_connectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,77 +18,84 @@
from orso.tools import random_int, random_string
from orso.types import OrsoTypes

from pyiceberg.catalog import load_catalog
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster

import opteryx
from opteryx.utils.formatter import format_sql
from opteryx.connectors import SqlConnector
from opteryx.connectors import CqlConnector
from opteryx.connectors import IcebergConnector
from opteryx.connectors import MongoDbConnector
from opteryx import virtual_datasets
from orso.tools import lru_cache_with_expiry

from tests.tools import create_duck_db, populate_mongo
from tests.tools import is_arm, is_mac, is_windows, skip_if, is_version

TEST_CYCLES: int = 100
TEST_CYCLES: int = 20

DATA_CATALOG_CONNECTION = os.environ.get("DATA_CATALOG_CONNECTION")
DATA_CATALOG_STORAGE = os.environ.get("DATA_CATALOG_STORAGE")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
DATASTAX_CLIENT_ID = os.environ["DATASTAX_CLIENT_ID"]
DATASTAX_CLIENT_SECRET = os.environ["DATASTAX_CLIENT_SECRET"]
COCKROACH_PASSWORD = os.environ.get("COCKROACH_PASSWORD")
COCKROACH_USER = os.environ.get("COCKROACH_USER")
COCKROACH_CONNECTION = f"cockroachdb://{COCKROACH_USER}:{COCKROACH_PASSWORD}@redleg-hunter-12763.5xj.cockroachlabs.cloud:26257/opteryx?sslmode=require"
MONGO_CONNECTION = os.environ.get("MONGODB_CONNECTION")
MONGO_DATABASE = os.environ.get("MONGODB_DATABASE")

TABLES = {
"planets": {
"fields": virtual_datasets.planets.schema().columns,
"connectors": [
"$planets", # virtual data
"testdata.planets", # blob/file storage
"'testdata/planets/planets.parquet'", # file-as-table data
"sqlite.planets", # sqlite
"iceberg.planets", # iceberg
# "cockroach.planets", # cockroach (disabled, field names in lowercase)
# "datastax.planets", # datastax (disabled, dataset not present)
"duckdb.planets", # duckdb
# "mongo.planets" # mongo (disabled, typing system not compatible)
]
},
"satellites": {
"fields": virtual_datasets.satellites.schema().columns,
"connectors": [
"$satellites",
"testdata.satellites",
"'testdata/satellites/satellites.parquet'",
"sqlite.satellites"
]
}
}

# Datastax Astra Connection
cloud_config = {"secure_connect_bundle": "secure-connect.zip"}
auth_provider = PlainTextAuthProvider(DATASTAX_CLIENT_ID, DATASTAX_CLIENT_SECRET)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
# Iceberg
iceberg_catalog = load_catalog("opteryx", **{"uri": DATA_CATALOG_CONNECTION, "warehouse": DATA_CATALOG_STORAGE})
# DuckDB
create_duck_db()
# Mongo
populate_mongo()

opteryx.register_store("iceberg", IcebergConnector, catalog=iceberg_catalog)
opteryx.register_store("cockroach", SqlConnector, remove_prefix=True, connection=COCKROACH_CONNECTION)
opteryx.register_store("datastax", CqlConnector, remove_prefix=True, cluster=cluster)
opteryx.register_store("duckdb", SqlConnector, remove_prefix=True, connection="duckdb:///planets.duckdb")
opteryx.register_store("mongo", MongoDbConnector, database=MONGO_DATABASE, connection=MONGO_CONNECTION, remove_prefix=True)
"planets": {
"fields": virtual_datasets.planets.schema().columns,
"connectors": [
"$planets", # virtual data
"testdata.planets", # blob/file storage
"'testdata/planets/planets.parquet'", # file-as-table data
"sqlite.planets", # sqlite
"iceberg.planets", # iceberg
# "cockroach.planets", # cockroach (disabled, field names in lowercase)
# "datastax.planets", # datastax (disabled, dataset not present)
"duckdb.planets", # duckdb
# "mongo.planets" # mongo (disabled, typing system not compatible)
]
},
"satellites": {
"fields": virtual_datasets.satellites.schema().columns,
"connectors": [
"$satellites",
"testdata.satellites",
"'testdata/satellites/satellites.parquet'",
"sqlite.satellites"
]
}
}


@lru_cache_with_expiry
def set_up_connections():

from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from pyiceberg.catalog import load_catalog

DATA_CATALOG_CONNECTION = os.environ.get("DATA_CATALOG_CONNECTION")
DATA_CATALOG_STORAGE = os.environ.get("DATA_CATALOG_STORAGE")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
DATASTAX_CLIENT_ID = os.environ["DATASTAX_CLIENT_ID"]
DATASTAX_CLIENT_SECRET = os.environ["DATASTAX_CLIENT_SECRET"]
COCKROACH_PASSWORD = os.environ.get("COCKROACH_PASSWORD")
COCKROACH_USER = os.environ.get("COCKROACH_USER")
COCKROACH_CONNECTION = f"cockroachdb://{COCKROACH_USER}:{COCKROACH_PASSWORD}@redleg-hunter-12763.5xj.cockroachlabs.cloud:26257/opteryx?sslmode=require"
MONGO_CONNECTION = os.environ.get("MONGODB_CONNECTION")
MONGO_DATABASE = os.environ.get("MONGODB_DATABASE")

# Datastax Astra Connection
# cloud_config = {"secure_connect_bundle": "secure-connect.zip"}
# auth_provider = PlainTextAuthProvider(DATASTAX_CLIENT_ID, DATASTAX_CLIENT_SECRET)
# cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
# Iceberg
iceberg_catalog = load_catalog("opteryx", **{"uri": DATA_CATALOG_CONNECTION, "warehouse": DATA_CATALOG_STORAGE})
# DuckDB
create_duck_db()
# Mongo
# populate_mongo()

opteryx.register_store("iceberg", IcebergConnector, catalog=iceberg_catalog)
#opteryx.register_store("cockroach", SqlConnector, remove_prefix=True, connection=COCKROACH_CONNECTION)
#opteryx.register_store("datastax", CqlConnector, remove_prefix=True, cluster=cluster)
opteryx.register_store("duckdb", SqlConnector, remove_prefix=True, connection="duckdb:///planets.duckdb")
#opteryx.register_store("mongo", MongoDbConnector, database=MONGO_DATABASE, connection=MONGO_CONNECTION, remove_prefix=True)
opteryx.register_store("sqlite", SqlConnector, remove_prefix=True, connection="sqlite:///testdata/sqlite/database.db")

def random_value(t):
if t == OrsoTypes.VARCHAR:
Expand Down Expand Up @@ -180,12 +187,7 @@ def generate_random_sql_select(columns, table):
@pytest.mark.parametrize("i", range(TEST_CYCLES))
def test_sql_fuzzing_connector_comparisons(i):

opteryx.register_store(
"sqlite",
SqlConnector,
remove_prefix=True,
connection="sqlite:///testdata/sqlite/database.db",
)
set_up_connections()

seed = random_int()
random.seed(seed)
Expand Down
35 changes: 19 additions & 16 deletions tests/plan_optimization/test_limit_pushdown_datastax.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,25 @@
import opteryx
from opteryx.connectors import CqlConnector
from opteryx.utils.formatter import format_sql
from orso.tools import lru_cache_with_expiry
from tests.tools import is_arm, is_mac, is_windows, skip_if, is_version

from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
@lru_cache_with_expiry
def set_up_connection():

# We're connecting to DataStax
cloud_config = {"secure_connect_bundle": "secure-connect.zip"}
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster

CLIENT_ID = os.environ["DATASTAX_CLIENT_ID"]
CLIENT_SECRET = os.environ["DATASTAX_CLIENT_SECRET"]

auth_provider = PlainTextAuthProvider(CLIENT_ID, CLIENT_SECRET)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)

opteryx.register_store(
"datastax",
CqlConnector,
remove_prefix=True,
cluster=cluster,
)
DATASTAX_CLIENT_ID = os.environ["DATASTAX_CLIENT_ID"]
DATASTAX_CLIENT_SECRET = os.environ["DATASTAX_CLIENT_SECRET"]

# Datastax Astra Connection
cloud_config = {"secure_connect_bundle": "secure-connect.zip"}
auth_provider = PlainTextAuthProvider(DATASTAX_CLIENT_ID, DATASTAX_CLIENT_SECRET)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)

opteryx.register_store("datastax", CqlConnector, remove_prefix=True, cluster=cluster)

STATEMENTS = [
# baseline
("SELECT name FROM datastax.opteryx.planets;", 9),
Expand All @@ -44,8 +43,12 @@
("SELECT name FROM (SELECT * FROM datastax.opteryx.planets) AS S LIMIT 3", 3),
]

@skip_if(is_arm() or is_windows() or is_mac() or not is_version("3.10"))
@pytest.mark.parametrize("query, expected_rows", STATEMENTS)
def test_datastax_limit_pushdown(query, expected_rows):

set_up_connection()

cur = opteryx.query(query)
cur.materialize()
assert cur.stats["rows_read"] == expected_rows, cur.stats
Expand Down
2 changes: 2 additions & 0 deletions tests/requirements_arm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ pymysql
psycopg2-binary
duckdb==1.1.3 # 1040
duckdb-engine==0.15.0 # 1040
cassandra-driver
pyiceberg[sql-sqlite]
openpyxl
pyiceberg

Expand Down
2 changes: 1 addition & 1 deletion tests/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def run_tests(): # pragma: no cover
CREATE TABLE planets (
id INTEGER PRIMARY KEY,
name VARCHAR(20),
mass DECIMAL(5, 1),
mass DECIMAL(8, 4),
diameter INTEGER,
density DECIMAL(5, 1),
gravity DECIMAL(5, 1),
Expand Down

0 comments on commit 68ed0fc

Please sign in to comment.