Skip to content

Commit

Permalink
removed hardcoding
Browse files Browse the repository at this point in the history
  • Loading branch information
dup05 committed Mar 7, 2024
1 parent d2c449f commit 0fadfd5
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 78 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/cleanup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ inputs:
dataset:
description: "dataset"
required: true
input_gcs_bucket:
input_gcs_bucket_path:
description: "Bucket with run time created files"
required: true
job_type:
Expand All @@ -27,4 +27,9 @@ runs:
- name: Cleanup GCS files
if: always() && inputs.job_type == 'streaming'
shell: bash
run: gcloud storage rm gs://${{inputs.input_gcs_bucket}}/*
run: gcloud storage rm ${{inputs.input_gcs_bucket_path}}

- name: Delete pub/sub notification config
if: always() && inputs.job_type == 'streaming'
shell: bash
run: gsutil notification delete ${{inputs.input_gcs_bucket_path}}
60 changes: 3 additions & 57 deletions .github/workflows/configs/load_tests_details.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,69 +6,15 @@
"file_size": "100MB/min",
"gcs_file_path": "gs://input_load_test_streaming_job/*.csv",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019",
"source_file_bucket": "input_dlp_load_test_2",
"source_file_pattern": "largecsv100MB.csv"
},
{
"name": "Streaming2_csv",
"type": "streaming",
"file_type": "CSV",
"file_size": "500MB/min",
"gcs_file_path": "gs://input_load_test_streaming_job/*.csv",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019",
"source_file_bucket": "input_dlp_load_test_2",
"source_file_pattern": "largecsv500MB.csv"
"raw_file_pattern": "gs://input_dlp_load_test_2/largecsv100MB.csv"
},
{
"name": "Batch1_csv",
"type": "batch",
"file_type": "CSV",
"file_size": "500MB",
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv500MB.csv",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"

},
{
"name": "Batch2_csv",
"type": "batch",
"file_type": "CSV",
"file_size": "1GB",
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv1GB.csv",
"file_size": "100MB",
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv100MB.csv",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"

},
{
"name": "Batch3_csv",
"type": "batch",
"file_type": "CSV",
"file_size": "2GB",
"gcs_file_path": "gs://input_dlp_load_test_2/largecsv2GB.csv",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"
},
{
"name": "Batch1_avro",
"type": "batch",
"file_type": "AVRO",
"file_size": "500MB",
"gcs_file_path": "gs://input_dlp_load_test_2/largeavro500MB.avro",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"

},
{
"name": "Batch2_avro",
"type": "batch",
"file_type": "AVRO",
"file_size": "750MB",
"gcs_file_path": "gs://input_dlp_load_test_2/largeavro750MB.avro",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"

},
{
"name": "Batch3_avro",
"type": "batch",
"file_type": "AVRO",
"file_size": "1500MB",
"gcs_file_path": "gs://input_dlp_load_test_2/largeavro1500MB.avro",
"deid_template": "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"
}
]
6 changes: 4 additions & 2 deletions .github/workflows/execute-copy-workflow/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ inputs:
raw_bucket:
description: "GCS Raw bucket name"
required: true
source_file:
raw_file_pattern:
description: "File name pattern"
required: true
input_gcs_bucket:
Expand All @@ -28,14 +28,16 @@ runs:
- name: Execute the workflow
shell: bash
run: |
raw_file_pattern=$(echo "${{inputs.raw_file_pattern}}" | awk -F "/" '{print $NF}')
raw_bucket=$(gcloud storage objects list gs://input_dlp_load_test_2/*.csv | grep "bucket:" | head -1 | awk '{print $2}')
not_finished=true
num_executions=1
while [ $num_executions -le 10 ];
do
echo "Executing workflow: "
gcloud workflows run ${{inputs.workflow_name}} \
--call-log-level=log-errors-only \
--data='{"input_bucket": "${{inputs.input_gcs_bucket}}","raw_bucket": "${{inputs.raw_bucket}}","source_file": "${{inputs.source_file}}"}'
--data='{"input_bucket": "${{inputs.input_gcs_bucket}}","raw_bucket": "$raw_bucket","source_file": "$raw_file_pattern"}'
num_executions=$((num_executions+1))
sleep 60s
done
Expand Down
29 changes: 13 additions & 16 deletions .github/workflows/load-testing-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ env:
INSPECT_TEMPLATE: "projects/dlp-dataflow-load-test/inspectTemplates/dlp-demo-inspect-latest-1706594483019"
DEID_TEMPLATE: "projects/dlp-dataflow-load-test/deidentifyTemplates/dlp-demo-deid-latest-1706594483019"
PUB_SUB_TOPIC: "projects/dlp-dataflow-load-test/topics/load_test_pub_sub_topic"
CLOUD_WORKFLOW: "generate_files_workflow"


jobs:
Expand Down Expand Up @@ -54,16 +55,9 @@ jobs:
id: set-matrix
run: |
matrix=$(jq -c . < .github/workflows/configs/load_tests_details.json)
echo "matrix={\"include\":$(echo $matrix)}" >> $GITHUB_OUTPUT
echo "matrix={\"include\":$(echo $matrix)}" >> $GITHUB_OUTPUT
- name: Test if
id: test-step
if: always() && env.REGION == 'us-west2'
run : |
echo "If condition true"
run-load-test:
run-test:
needs:
- generate-uuid
- pre-processing
Expand All @@ -84,31 +78,34 @@ jobs:
echo "dataset=dataset_${{needs.generate-uuid.outputs.uuid}}_${{matrix.name}}" >> $GITHUB_OUTPUT
echo "Test details: ${{matrix.name}}"
echo "job_name=load-test-${{needs.generate-uuid.outputs.uuid}}-test-$test_name"
echo "dataset=dataset_${{needs.generate-uuid.outputs.uuid}}_${{matrix.name}}"
echo "dataset=dataset_${{needs.generate-uuid.outputs.uuid}}_${{matrix.name}}"
input_gcs_bucket=$(echo "${{ matrix.gcs_file_path }}" | awk -F "/" '{print $3}')
echo "input_gcs_bucket=$input_gcs_bucket" >> $GITHUB_OUTPUT
- name: Submit dataflow job
id: submit-dataflow-job
uses: ./.github/workflows/submit-dataflow-job
with:
project_id: ${{env.PROJECT_ID}}
input_gcs_bucket: "input_dlp_load_test_2"
input_gcs_bucket: ${{ steps.set-job-params.outputs.input_gcs_bucket }}
gcs_file_path: ${{ matrix.gcs_file_path }}
dataset: ${{ steps.set-job-params.outputs.dataset }}
inspect_template: ${{env.INSPECT_TEMPLATE}}
deid_template: ${{ matrix.deid_template }}
job_name: ${{steps.set-job-params.outputs.job_name}}
job_type: ${{ matrix.type }}
gcs_notification_topic: ${{env.PUB_SUB_TOPIC}}

- name: execute copy files workflow for streaming jobs
id: copy-files
if: always() && matrix.type == 'streaming'
uses: ./.github/workflows/execute-copy-workflow
with:
raw_bucket: ${{ matrix.source_file_bucket }}
source_file: ${{ matrix.source_file_pattern }}
input_gcs_bucket: "input_load_test_streaming_job"
raw_file_pattern: ${{ matrix.raw_file_pattern }}
input_gcs_bucket: ${{ steps.set-job-params.outputs.input_gcs_bucket }}
job_id: ${{steps.submit-dataflow-job.outputs.job_id}}
workflow_name: "generate_files_workflow"
workflow_name: ${{env.CLOUD_WORKFLOW}}
region: ${{env.REGION}}

- name: Poll till job finishes
Expand All @@ -133,14 +130,14 @@ jobs:
project_id: ${{env.PROJECT_ID}}
job_id: ${{steps.submit-dataflow-job.outputs.job_id}}
dataset: ${{steps.set-job-params.outputs.dataset}}
input_gcs_bucket: "input_load_test_streaming_job"
input_gcs_bucket_path: ${{ matrix.gcs_file_path }}
job_type: ${{ matrix.type }}

publish-test-results:
needs:
- generate-uuid
- pre-processing
- run-load-test
- run-test
runs-on: [self-hosted, load-testing]
steps:
- uses: actions/checkout@v2
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/submit-dataflow-job/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ inputs:
job_type:
description: "Dataflow job type - streaming/batch"
required: false
gcs_notification_topic:
description: "Pub/sub topic"
required: false
outputs:
job_id:
description: "Job id of deid pipeline"
Expand All @@ -48,6 +51,11 @@ runs:
shell: bash
run: bq --location=US mk -d --description "GitHub load test dataset" ${{inputs.dataset}}

- name: Create pub sub topic
if: always() && inputs.job_type == 'streaming'
shell: bash
run: gsutil notification create -e OBJECT_FINALIZE -t ${{inputs.gcs_notification_topic}} -f json gs://${{inputs.input_gcs_bucket}}

- name: Run Batch DLP Pipeline
if: always() && inputs.job_type == 'batch'
shell: bash
Expand Down Expand Up @@ -91,7 +99,7 @@ runs:
--serviceAccount=demo-service-account@dlp-dataflow-load-test.iam.gserviceaccount.com \
--jobName=${{ inputs.job_name }} \
--processExistingFiles=false \
--gcsNotificationTopic=projects/dlp-dataflow-load-test/topics/load_test_pub_sub_topic"
--gcsNotificationTopic=${{inputs.gcs_notification_topic}}"
- name: Get Job ID #TODO: Poll till status not active
Expand Down

0 comments on commit 0fadfd5

Please sign in to comment.