diff --git a/cfpq_data/__init__.py b/cfpq_data/__init__.py index 7465df9e..2802b14d 100644 --- a/cfpq_data/__init__.py +++ b/cfpq_data/__init__.py @@ -7,7 +7,7 @@ experimental analysis of context-free path querying algorithms """ -__version__ = "1.0.1-dev" +__version__ = "1.0.1" import cfpq_data.config from cfpq_data.config import * diff --git a/cfpq_data/graphs/readwrite/rdf.py b/cfpq_data/graphs/readwrite/rdf.py index 66fbab7e..6eab0d34 100644 --- a/cfpq_data/graphs/readwrite/rdf.py +++ b/cfpq_data/graphs/readwrite/rdf.py @@ -1,6 +1,7 @@ """Read (and write) a graph from (and to) RDF file. """ +import re from os import path, remove from pathlib import Path from shutil import unpack_archive @@ -26,9 +27,6 @@ "graph_to_rdf", ] -if "dev" in VERSION: - VERSION = "dev" - def graph_from_dataset(graph_name: str, verbose: bool = True) -> MultiDiGraph: """Returns a graph from @@ -65,6 +63,17 @@ def graph_from_dataset(graph_name: str, verbose: bool = True) -> MultiDiGraph: graph_file_path = str(dst / graph_file) if not path.isfile(graph_file_path): + + DATASET_VERSION = VERSION + + if re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION) is not None: + DATASET_VERSION = ( + str( + re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION).group(1) + ) + + ".0.0" + ) + graph_archive = ( graph_file + DATASET[graph_class][graph_name]["ArchiveExtension"] ) @@ -86,7 +95,7 @@ def _inner(bytes_amount): file_size_in_bytes = s3.head_object( Bucket=BUCKET_NAME, - Key=f"{VERSION}/{graph_class}/{graph_archive}", + Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}", )["ContentLength"] with tqdm( @@ -97,14 +106,14 @@ def _inner(bytes_amount): ) as t: s3.download_file( Bucket=BUCKET_NAME, - Key=f"{VERSION}/{graph_class}/{graph_archive}", + Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}", Filename=graph_archive_path, Callback=_hook(t), ) else: s3.download_file( Bucket=BUCKET_NAME, - Key=f"{VERSION}/{graph_class}/{graph_archive}", + Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}", Filename=graph_archive_path, ) diff --git a/utils/fetch_dataset.py b/utils/fetch_dataset.py index 1891abad..3d01f25c 100644 --- a/utils/fetch_dataset.py +++ b/utils/fetch_dataset.py @@ -1,3 +1,4 @@ +import re from collections import defaultdict from json import dumps @@ -7,11 +8,15 @@ from cfpq_data.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME from config import MAIN_FOLDER -if "dev" in VERSION: - VERSION = "dev" - def fetch_dataset(): + DATASET_VERSION = VERSION + + if re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION) is not None: + DATASET_VERSION = ( + str(re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION).group(1)) + ".0.0" + ) + s3 = client( "s3", aws_access_key_id=AWS_ACCESS_KEY_ID, @@ -20,7 +25,9 @@ def fetch_dataset(): dataset = defaultdict(dict) - for graph in s3.list_objects(Bucket="cfpq-data", Prefix=VERSION)["Contents"]: + for graph in s3.list_objects(Bucket="cfpq-data", Prefix=DATASET_VERSION)[ + "Contents" + ]: graph_key = graph["Key"] graph_class, graph_full_name = graph_key.split("/")[1:] graph_name = graph_full_name.split(".")[0]