-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c5deb9d
commit 29c9f84
Showing
5 changed files
with
82 additions
and
136 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from pathlib import Path | ||
|
||
MAIN_FOLDER = Path(__file__).parent.parent |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from collections import defaultdict | ||
from json import dumps | ||
|
||
from boto3 import client | ||
|
||
from cfpq_data import __version__ as cfpq_data_version | ||
from cfpq_data.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME | ||
from config import MAIN_FOLDER | ||
|
||
|
||
def fetch_dataset(): | ||
s3 = client( | ||
"s3", | ||
aws_access_key_id=AWS_ACCESS_KEY_ID, | ||
aws_secret_access_key=AWS_SECRET_ACCESS_KEY, | ||
) | ||
|
||
dataset = defaultdict(dict) | ||
|
||
for graph in s3.list_objects(Bucket="cfpq-data", Prefix=cfpq_data_version)[ | ||
"Contents" | ||
]: | ||
graph_key = graph["Key"] | ||
graph_class, graph_full_name = graph_key.split("/")[1:] | ||
graph_name = graph_full_name.split(".")[0] | ||
graph_file_extension = "." + graph_full_name.split(".")[1] | ||
graph_archive_extension = graph_full_name.split(graph_file_extension)[1] | ||
dataset[graph_class][graph_name] = { | ||
"VersionId": s3.head_object(Bucket=BUCKET_NAME, Key=graph_key)["VersionId"], | ||
"FileExtension": graph_file_extension, | ||
"ArchiveExtension": graph_archive_extension, | ||
} | ||
|
||
return dataset | ||
|
||
|
||
def update_dataset(dataset): | ||
with open(MAIN_FOLDER / "cfpq_data" / "dataset.py", "w") as fout: | ||
fout.write("dataset = " + dumps(dataset, indent=4)) | ||
|
||
|
||
if __name__ == "__main__": | ||
dataset = fetch_dataset() | ||
update_dataset(dataset) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from csv import DictWriter | ||
|
||
from cfpq_data import graph_from_dataset | ||
from config import MAIN_FOLDER | ||
from fetch_dataset import fetch_dataset | ||
|
||
|
||
def update_dataset_tables(dataset): | ||
for graph_class in dataset.keys(): | ||
fieldnames = ["Graph", "#Vertices", "#Edges"] | ||
with open( | ||
MAIN_FOLDER / "docs" / "dataset" / f"{graph_class}.csv", mode="w" | ||
) as csv_file: | ||
csv_writer = DictWriter(csv_file, fieldnames=fieldnames) | ||
csv_writer.writeheader() | ||
|
||
for graph_name in dataset[graph_class]: | ||
graph = graph_from_dataset(graph_name) | ||
csv_writer.writerow( | ||
dict( | ||
zip( | ||
fieldnames, | ||
[ | ||
graph_name, | ||
f"{graph.number_of_nodes():,}", | ||
f"{graph.number_of_edges():,}", | ||
], | ||
) | ||
) | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
dataset = fetch_dataset() | ||
update_dataset_tables(dataset) |