Skip to content

Commit

Permalink
Fixed bugs and updated version
Browse files Browse the repository at this point in the history
  • Loading branch information
vadyushkins committed May 29, 2021
1 parent bea10d5 commit 39073f3
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 11 deletions.
2 changes: 1 addition & 1 deletion cfpq_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
experimental analysis of context-free path querying algorithms
"""

__version__ = "1.0.1-dev"
__version__ = "1.0.1"

import cfpq_data.config
from cfpq_data.config import *
Expand Down
21 changes: 15 additions & 6 deletions cfpq_data/graphs/readwrite/rdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Read (and write) a graph
from (and to) RDF file.
"""
import re
from os import path, remove
from pathlib import Path
from shutil import unpack_archive
Expand All @@ -26,9 +27,6 @@
"graph_to_rdf",
]

if "dev" in VERSION:
VERSION = "dev"


def graph_from_dataset(graph_name: str, verbose: bool = True) -> MultiDiGraph:
"""Returns a graph from
Expand Down Expand Up @@ -65,6 +63,17 @@ def graph_from_dataset(graph_name: str, verbose: bool = True) -> MultiDiGraph:
graph_file_path = str(dst / graph_file)

if not path.isfile(graph_file_path):

DATASET_VERSION = VERSION

if re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION) is not None:
DATASET_VERSION = (
str(
re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION).group(1)
)
+ ".0.0"
)

graph_archive = (
graph_file + DATASET[graph_class][graph_name]["ArchiveExtension"]
)
Expand All @@ -86,7 +95,7 @@ def _inner(bytes_amount):

file_size_in_bytes = s3.head_object(
Bucket=BUCKET_NAME,
Key=f"{VERSION}/{graph_class}/{graph_archive}",
Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}",
)["ContentLength"]

with tqdm(
Expand All @@ -97,14 +106,14 @@ def _inner(bytes_amount):
) as t:
s3.download_file(
Bucket=BUCKET_NAME,
Key=f"{VERSION}/{graph_class}/{graph_archive}",
Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}",
Filename=graph_archive_path,
Callback=_hook(t),
)
else:
s3.download_file(
Bucket=BUCKET_NAME,
Key=f"{VERSION}/{graph_class}/{graph_archive}",
Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}",
Filename=graph_archive_path,
)

Expand Down
15 changes: 11 additions & 4 deletions utils/fetch_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from collections import defaultdict
from json import dumps

Expand All @@ -7,11 +8,15 @@
from cfpq_data.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME
from config import MAIN_FOLDER

if "dev" in VERSION:
VERSION = "dev"


def fetch_dataset():
DATASET_VERSION = VERSION

if re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION) is not None:
DATASET_VERSION = (
str(re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION).group(1)) + ".0.0"
)

s3 = client(
"s3",
aws_access_key_id=AWS_ACCESS_KEY_ID,
Expand All @@ -20,7 +25,9 @@ def fetch_dataset():

dataset = defaultdict(dict)

for graph in s3.list_objects(Bucket="cfpq-data", Prefix=VERSION)["Contents"]:
for graph in s3.list_objects(Bucket="cfpq-data", Prefix=DATASET_VERSION)[
"Contents"
]:
graph_key = graph["Key"]
graph_class, graph_full_name = graph_key.split("/")[1:]
graph_name = graph_full_name.split(".")[0]
Expand Down

0 comments on commit 39073f3

Please sign in to comment.