From 89cf9c17882717d72f515ecf0c6d9e5a4a450ad9 Mon Sep 17 00:00:00 2001 From: rderbier Date: Tue, 14 Jan 2025 16:52:42 -0800 Subject: [PATCH] add IBM Anti Money Laundering (aml) dataset --- data/aml/AML_Trans.schema | 37 ++++++++++++++++++++++++++++++++ data/aml/HI-Small_Trans.rdf.gz | 3 +++ data/aml/HI-Small_Trans.template | 23 ++++++++++++++++++++ data/aml/README.md | 14 ++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 data/aml/AML_Trans.schema create mode 100644 data/aml/HI-Small_Trans.rdf.gz create mode 100644 data/aml/HI-Small_Trans.template create mode 100644 data/aml/README.md diff --git a/data/aml/AML_Trans.schema b/data/aml/AML_Trans.schema new file mode 100644 index 0000000..b60382f --- /dev/null +++ b/data/aml/AML_Trans.schema @@ -0,0 +1,37 @@ +: uid . +: string @index(hash) . +: string @index(hash) . +: float @index(float) . +: float @index(float) . +: uid @reverse . +: string . +: string @index(hash) . +: string . +: uid @reverse . +: datetime @index(day) . +: bool @index(bool) . +: string @index(hash) . + +: string @index(exact) @upsert . + +type { + +} + +type { + + +} + +type { + + + + + + + + + + +} diff --git a/data/aml/HI-Small_Trans.rdf.gz b/data/aml/HI-Small_Trans.rdf.gz new file mode 100644 index 0000000..a7bc10c --- /dev/null +++ b/data/aml/HI-Small_Trans.rdf.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a9f0656a50495bd268d03dccd0f1df4523a30eeca94129db4cb8ce1469c908 +size 164364288 diff --git a/data/aml/HI-Small_Trans.template b/data/aml/HI-Small_Trans.template new file mode 100644 index 0000000..519d1a1 --- /dev/null +++ b/data/aml/HI-Small_Trans.template @@ -0,0 +1,23 @@ +# Timestamp,From Bank,Account,To Bank,Account,Amount Received,Receiving Currency,Amount Paid,Payment Currency,Payment Format,Is Laundering + +<_:Bank_[From Bank]> "Bank" . +<_:Bank_[From Bank]> "[From Bank]" . +<_:Bank_[To Bank]> "Bank" . +<_:Bank_[To Bank]> "[To Bank]" . +<_:A_[From Account]> "Account" . +<_:A_[From Account]> "[From Account]" . +<_:A_[From Account]> <_:Bank_[From Bank]> . +<_:A_[To Account]> "Account" . +<_:A_[To Account]> "[To Account]" . +<_:A_[To Account]> <_:Bank_[To Bank]> . +<_:T_[LINENUMBER]> "[Timestamp],[From Bank],[From Account]" . +<_:T_[LINENUMBER]> "Transaction" . +<_:T_[LINENUMBER]> <_:A_[From Account]> . +<_:T_[LINENUMBER]> <_:A_[To Account]> . +<_:T_[LINENUMBER]> "=datetime([Timestamp],%Y/%m/%d %H:%M)" . +<_:T_[LINENUMBER]> "[Amount Received]" . +<_:T_[LINENUMBER]> "[Receiving Currency]" . +<_:T_[LINENUMBER]> "[Amount Paid]" . +<_:T_[LINENUMBER]> "[Payment Currency]" . +<_:T_[LINENUMBER]> "[Payment Format]" . +<_:T_[LINENUMBER]> "[Is Laundering]" . diff --git a/data/aml/README.md b/data/aml/README.md new file mode 100644 index 0000000..22663d3 --- /dev/null +++ b/data/aml/README.md @@ -0,0 +1,14 @@ + +IBM Transactions for Anti Money Laundering (AML) +from +https://www.kaggle.com/datasets/ealtman2019/ibm-transactions-for-anti-money-laundering-aml + +The folder contains a schema file and RDF files created from +HI-Small_Trans.csv + +RDF file has been created using [csv_to_rdf](https://github.com/hypermodeinc/dgraph-experimental/blob/main/data-import/csv-to-rdf/csv_to_rdf.py) python script from +dgraph experimental repository, using the template file provided in this folder. + +See the corresponding HI_Small_Patterns.tx file for generated fraudulent transactions. These transactions are labeled as Transaction.laundering true in the dataset. + +