Skip to content

Commit

Permalink
Merge pull request #2246 from mabel-dev/#2245
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer authored Jan 14, 2025
2 parents 88e6e70 + f17d286 commit 9853e78
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 5 deletions.
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 982
__build__ = 983

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
4 changes: 3 additions & 1 deletion opteryx/connectors/disk_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,9 @@ def get_dataset_schema(self) -> RelationSchema:
if self.schema:
return self.schema

self.schema = next(self.read_dataset(just_schema=True), None)
for schema in self.read_dataset(just_schema=True):
self.schema = schema
break

if self.schema is None:
if os.path.isdir(self.dataset):
Expand Down
4 changes: 3 additions & 1 deletion opteryx/connectors/gcp_cloudstorage_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,9 @@ def get_dataset_schema(self) -> RelationSchema:
number_of_blobs = sum(len(b) for b in self.blob_list.values())

# Read first blob for schema inference and cache it
self.schema = next(self.read_dataset(just_schema=True), None)
for schema in self.read_dataset(just_schema=True):
self.schema = schema
break

if self.schema is None:
raise DatasetNotFoundError(dataset=self.dataset)
Expand Down
12 changes: 11 additions & 1 deletion opteryx/virtual_datasets/missions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
Space Mission dataset acquired from [Kaggle](https://www.kaggle.com/datasets/agirlcoding/all-space-missions-from-1957).
"""

import datetime

from orso.schema import FlatColumn
from orso.schema import RelationSchema
from orso.types import OrsoTypes
Expand Down Expand Up @@ -62,4 +64,12 @@ def schema():


def statistics() -> RelationStatistics:
return RelationStatistics()
stats = RelationStatistics()

# fmt:off
stats.record_count = 4630
stats.lower_bounds = {'Company': 'AEB', 'Location': 'Blue Origin Launch Site, West Texas, Texas, USA', 'Price': 2.5, 'Lauched_at': datetime.datetime(1957, 10, 4, 19, 28), 'Rocket': 'ASLV', 'Rocket_Status': 'Active', 'Mission': '-TJS_6.00', 'Mission_Status': 'Failure'}
stats.upper_bounds = {'Company': 'i-Space', 'Location': 'Xichang Satellite Launch Center, China', 'Price': 450.0, 'Lauched_at': datetime.datetime(2022, 7, 29, 13, 28), 'Rocket': 'Zoljanah', 'Rocket_Status': 'Retired', 'Mission': 'iPStar-1', 'Mission_Status': 'Success'}
stats.null_count = {'Company': 0, 'Location': 0, 'Price': 3380, 'Lauched_at': 127, 'Rocket': 0, 'Rocket_Status': 0, 'Mission': 0, 'Mission_Status': 0}
# fmt:on
return stats
10 changes: 9 additions & 1 deletion opteryx/virtual_datasets/satellite_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,12 @@ def schema():


def statistics() -> RelationStatistics:
return RelationStatistics()
stats = RelationStatistics()

# fmt:off
stats.record_count = 177
stats.lower_bounds = {'id': 1, 'planetId': 3, 'name': 'Adrastea', 'gm': -0.0, 'radius': 0.3, 'density': 0.34, 'magnitude': -12.74, 'albedo': 0.04}
stats.upper_bounds = {'id': 177, 'planetId': 9, 'name': 'Ymir', 'gm': 9887.834, 'radius': 2631.2, 'density': 3.528, 'magnitude': 27.0, 'albedo': 1.67}
stats.null_count = {'id': 0, 'planetId': 0, 'name': 0, 'gm': 0, 'radius': 0, 'density': 0, 'magnitude': 0, 'albedo': 0}
# fmt:on
return stats

0 comments on commit 9853e78

Please sign in to comment.