Updated project utils

FormalLanguageConstrainedPathQuerying · May 8, 2021 · 29c9f84 · 29c9f84
1 parent c5deb9d
commit 29c9f84
Show file tree

Hide file tree

Showing 5 changed files with 82 additions and 136 deletions.
diff --git a/meta_table_for_readme.py b/meta_table_for_readme.py
diff --git a/script.py b/script.py
diff --git a/utils/config.py b/utils/config.py
@@ -0,0 +1,3 @@
+from pathlib import Path
+
+MAIN_FOLDER = Path(__file__).parent.parent
diff --git a/utils/fetch_dataset.py b/utils/fetch_dataset.py
@@ -0,0 +1,44 @@
+from collections import defaultdict
+from json import dumps
+
+from boto3 import client
+
+from cfpq_data import __version__ as cfpq_data_version
+from cfpq_data.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME
+from config import MAIN_FOLDER
+
+
+def fetch_dataset():
+    s3 = client(
+        "s3",
+        aws_access_key_id=AWS_ACCESS_KEY_ID,
+        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+    )
+
+    dataset = defaultdict(dict)
+
+    for graph in s3.list_objects(Bucket="cfpq-data", Prefix=cfpq_data_version)[
+        "Contents"
+    ]:
+        graph_key = graph["Key"]
+        graph_class, graph_full_name = graph_key.split("/")[1:]
+        graph_name = graph_full_name.split(".")[0]
+        graph_file_extension = "." + graph_full_name.split(".")[1]
+        graph_archive_extension = graph_full_name.split(graph_file_extension)[1]
+        dataset[graph_class][graph_name] = {
+            "VersionId": s3.head_object(Bucket=BUCKET_NAME, Key=graph_key)["VersionId"],
+            "FileExtension": graph_file_extension,
+            "ArchiveExtension": graph_archive_extension,
+        }
+
+    return dataset
+
+
+def update_dataset(dataset):
+    with open(MAIN_FOLDER / "cfpq_data" / "dataset.py", "w") as fout:
+        fout.write("dataset = " + dumps(dataset, indent=4))
+
+
+if __name__ == "__main__":
+    dataset = fetch_dataset()
+    update_dataset(dataset)
diff --git a/utils/update_dataset_tables.py b/utils/update_dataset_tables.py
@@ -0,0 +1,35 @@
+from csv import DictWriter
+
+from cfpq_data import graph_from_dataset
+from config import MAIN_FOLDER
+from fetch_dataset import fetch_dataset
+
+
+def update_dataset_tables(dataset):
+    for graph_class in dataset.keys():
+        fieldnames = ["Graph", "#Vertices", "#Edges"]
+        with open(
+            MAIN_FOLDER / "docs" / "dataset" / f"{graph_class}.csv", mode="w"
+        ) as csv_file:
+            csv_writer = DictWriter(csv_file, fieldnames=fieldnames)
+            csv_writer.writeheader()
+
+            for graph_name in dataset[graph_class]:
+                graph = graph_from_dataset(graph_name)
+                csv_writer.writerow(
+                    dict(
+                        zip(
+                            fieldnames,
+                            [
+                                graph_name,
+                                f"{graph.number_of_nodes():,}",
+                                f"{graph.number_of_edges():,}",
+                            ],
+                        )
+                    )
+                )
+
+
+if __name__ == "__main__":
+    dataset = fetch_dataset()
+    update_dataset_tables(dataset)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from pathlib import Path

		MAIN_FOLDER = Path(__file__).parent.parent