Skip to content

Commit

Permalink
Updated project utils
Browse files Browse the repository at this point in the history
  • Loading branch information
vadyushkins committed May 8, 2021
1 parent c5deb9d commit 29c9f84
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 136 deletions.
90 changes: 0 additions & 90 deletions meta_table_for_readme.py

This file was deleted.

46 changes: 0 additions & 46 deletions script.py

This file was deleted.

3 changes: 3 additions & 0 deletions utils/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from pathlib import Path

MAIN_FOLDER = Path(__file__).parent.parent
44 changes: 44 additions & 0 deletions utils/fetch_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from collections import defaultdict
from json import dumps

from boto3 import client

from cfpq_data import __version__ as cfpq_data_version
from cfpq_data.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME
from config import MAIN_FOLDER


def fetch_dataset():
s3 = client(
"s3",
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
)

dataset = defaultdict(dict)

for graph in s3.list_objects(Bucket="cfpq-data", Prefix=cfpq_data_version)[
"Contents"
]:
graph_key = graph["Key"]
graph_class, graph_full_name = graph_key.split("/")[1:]
graph_name = graph_full_name.split(".")[0]
graph_file_extension = "." + graph_full_name.split(".")[1]
graph_archive_extension = graph_full_name.split(graph_file_extension)[1]
dataset[graph_class][graph_name] = {
"VersionId": s3.head_object(Bucket=BUCKET_NAME, Key=graph_key)["VersionId"],
"FileExtension": graph_file_extension,
"ArchiveExtension": graph_archive_extension,
}

return dataset


def update_dataset(dataset):
with open(MAIN_FOLDER / "cfpq_data" / "dataset.py", "w") as fout:
fout.write("dataset = " + dumps(dataset, indent=4))


if __name__ == "__main__":
dataset = fetch_dataset()
update_dataset(dataset)
35 changes: 35 additions & 0 deletions utils/update_dataset_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from csv import DictWriter

from cfpq_data import graph_from_dataset
from config import MAIN_FOLDER
from fetch_dataset import fetch_dataset


def update_dataset_tables(dataset):
for graph_class in dataset.keys():
fieldnames = ["Graph", "#Vertices", "#Edges"]
with open(
MAIN_FOLDER / "docs" / "dataset" / f"{graph_class}.csv", mode="w"
) as csv_file:
csv_writer = DictWriter(csv_file, fieldnames=fieldnames)
csv_writer.writeheader()

for graph_name in dataset[graph_class]:
graph = graph_from_dataset(graph_name)
csv_writer.writerow(
dict(
zip(
fieldnames,
[
graph_name,
f"{graph.number_of_nodes():,}",
f"{graph.number_of_edges():,}",
],
)
)
)


if __name__ == "__main__":
dataset = fetch_dataset()
update_dataset_tables(dataset)

0 comments on commit 29c9f84

Please sign in to comment.