Skip to content

Commit

Permalink
feat: add AMI house keeping lambda (github-aws-runners#3570)
Browse files Browse the repository at this point in the history
## Description
Quite common to build AMI's with packer. Cleaning up old images could be
a challenge. This housekeeping lambda can remove old AMI based on a few
criteria.

The AMI housekeepr is implemented as an extra terraform module with a
lambda that runs once a day. The housekeepr can be configured to exclude
images based on reference in SSM, and tags. Images. The module is
deisabled by default and can used as part of the main module,
multi-runner or stand alone.

This PR also updates outdated lambda dependencies. And updates terraform
workflows to 1.5.6 as main version.

## Tested

- [x] default example (with new housekeeper)
- [x] multi runner example

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
npalm and github-actions[bot] authored Oct 30, 2023
1 parent 76f45dc commit 87104e8
Show file tree
Hide file tree
Showing 39 changed files with 2,599 additions and 950 deletions.
29 changes: 14 additions & 15 deletions .github/workflows/terraform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ on:
branches:
- main
pull_request:
paths-ignore:
- "modules/*/lambdas/**"
paths: ["**/*.tf", "**/*.hcl", ".github/workflows/terraform.yml"]

permissions:
contents: read
Expand All @@ -18,7 +17,7 @@ jobs:
name: Verify module
strategy:
matrix:
terraform: [1.3.2, "latest"]
terraform: [1.5.6, "latest"]
runs-on: ubuntu-latest
container:
image: hashicorp/terraform:${{ matrix.terraform }}
Expand All @@ -30,9 +29,10 @@ jobs:
touch lambdas/functions/webhook/webhook.zip
touch lambdas/functions/control-plane/runners.zip
touch lambdas/functions/gh-agent-syncer/runner-binaries-syncer.zip
touch lambdas/functions/ami-housekeeper/ami-housekeeper.zip
- name: terraform init
run: terraform init -get -backend=false -input=false
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: check terraform formatting
run: terraform fmt -recursive -check=true -write=false
- if: contains(matrix.terraform, 'latest') # check formatting for the latest release but avoid failing the build
Expand All @@ -41,17 +41,17 @@ jobs:
continue-on-error: true
- name: validate terraform
run: terraform validate
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: Fix for actions/cache on alpine
run: apk add --no-cache tar
continue-on-error: true
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
uses: actions/cache@v3.3.1
name: Cache TFLint plugin dir
with:
path: ~/.tflint.d/plugins
key: tflint-${{ hashFiles('.tflint.hcl') }}
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: Setup TFLint
uses: terraform-linters/setup-tflint@v4
with:
Expand All @@ -69,7 +69,7 @@ jobs:
matrix:
terraform: [1.3.2, "latest"]
module:
["download-lambda", "multi-runner", "runner-binaries-syncer", "runners", "setup-iam-permissions", "ssm", "webhook"]
["ami-housekeeper", "download-lambda", "multi-runner", "runner-binaries-syncer", "runners", "setup-iam-permissions", "ssm", "webhook"]
defaults:
run:
working-directory: modules/${{ matrix.module }}
Expand Down Expand Up @@ -116,7 +116,7 @@ jobs:
strategy:
fail-fast: false
matrix:
terraform: [1.3.2, "latest"]
terraform: [1.5.6, "latest"]
example:
["default", "ubuntu", "prebuilt", "arm64", "ephemeral", "windows", "multi-runner"]
defaults:
Expand All @@ -129,7 +129,7 @@ jobs:
- uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608
- name: terraform init
run: terraform init -get -backend=false -input=false
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: check terraform formatting
run: terraform fmt -recursive -check=true -write=false
- if: contains(matrix.terraform, 'latest') # check formatting for the latest release but avoid failing the build
Expand All @@ -138,25 +138,24 @@ jobs:
continue-on-error: true
- name: validate terraform
run: terraform validate
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: Fix for actions/cache on alpine
run: apk add --no-cache tar
continue-on-error: true
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
uses: actions/cache@v3.3.1
name: Cache TFLint plugin dir
with:
path: ~/.tflint.d/plugins
key: tflint-${{ hashFiles('.tflint.hcl') }}
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: Setup TFLint
uses: terraform-linters/setup-tflint@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- if: contains(matrix.terraform, '1.3.')
- if: contains(matrix.terraform, '1.5.')
name: Run TFLint
working-directory: ${{ github.workspace }}
run: |
tflint --init -c ${GITHUB_WORKSPACE}/.tflint.hcl --chdir modules/${{ matrix.module }}
tflint -f compact -c ${GITHUB_WORKSPACE}/.tflint.hcl --var-file ${GITHUB_WORKSPACE}/.github/lint/tflint.tfvars --chdir examples/${{ matrix.example }}
4 changes: 2 additions & 2 deletions .github/workflows/update-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ name: Update docs
on:
push:
paths:
- "*.tf"
- "*.md"
- "**/*.tf"
- "**/*.md"
- ".github/workflows/update-docs.yml"

permissions:
Expand Down
4 changes: 4 additions & 0 deletions .vscode/gh-runners.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
"name": "✨ root",
"path": ".."
},
{
"name": "🚀 @lambda/functions/ami-housekeeper",
"path": "../lambdas/functions/ami-housekeeper"
},
{
"name": "🚀 @lambda/functions/control-plane",
"path": "../lambdas/functions/control-plane"
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ The example for [ephemeral runners](./examples/ephemeral) is based on the [defau

### Prebuilt Images

This module also allows you to run agents from a prebuilt AMI to gain faster startup times. You can find more information in [the image README.md](/images/README.md)
This module also allows you to run agents from a prebuilt AMI to gain faster startup times. The module provides several examples to build your own custom AMI. To remove old images, an [AMI housekeeper module](./modules/ami-housekeeper/README.md) can be used. You can find more information in [the image README.md](/images/README.md) for building custom images.

### Experimental - Optional queue to publish GitHub workflow job events

Expand Down Expand Up @@ -468,6 +468,7 @@ We welcome any improvement to the standard module to make the default as secure

| Name | Source | Version |
|------|--------|---------|
| <a name="module_ami_housekeeper"></a> [ami\_housekeeper](#module\_ami\_housekeeper) | ./modules/ami-housekeeper | n/a |
| <a name="module_runner_binaries"></a> [runner\_binaries](#module\_runner\_binaries) | ./modules/runner-binaries-syncer | n/a |
| <a name="module_runners"></a> [runners](#module\_runners) | ./modules/runners | n/a |
| <a name="module_ssm"></a> [ssm](#module\_ssm) | ./modules/ssm | n/a |
Expand All @@ -491,6 +492,12 @@ We welcome any improvement to the standard module to make the default as secure
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_ami_filter"></a> [ami\_filter](#input\_ami\_filter) | Map of lists used to create the AMI filter for the action runner AMI. | `map(list(string))` | <pre>{<br> "state": [<br> "available"<br> ]<br>}</pre> | no |
| <a name="input_ami_housekeeper_cleanup_config"></a> [ami\_housekeeper\_cleanup\_config](#input\_ami\_housekeeper\_cleanup\_config) | Configuration for AMI cleanup.<br><br> `amiFilters` - Filters to use when searching for AMIs to cleanup. Default filter for images owned by the account and that are available.<br> `dryRun` - If true, no AMIs will be deregistered. Default false.<br> `launchTemplateNames` - Launch template names to use when searching for AMIs to cleanup. Default no launch templates.<br> `maxItems` - The maximum numer of AMI's tha will be queried for cleanup. Default no maximum.<br> `minimumDaysOld` - Minimum number of days old an AMI must be to be considered for cleanup. Default 30.<br> `ssmParameterNames` - SSM parameter names to use when searching for AMIs to cleanup. This parameter should be set when using SSM to configure the AMI to use. Default no SSM parameters. | <pre>object({<br> amiFilters = optional(list(object({<br> Name = string<br> Values = list(string)<br> })),<br> [{<br> Name : "state",<br> Values : ["available"],<br> },<br> {<br> Name : "image-type",<br> Values : ["machine"],<br> }]<br> )<br> dryRun = optional(bool, false)<br> launchTemplateNames = optional(list(string))<br> maxItems = optional(number)<br> minimumDaysOld = optional(number, 30)<br> ssmParameterNames = optional(list(string))<br> })</pre> | `{}` | no |
| <a name="input_ami_housekeeper_lambda_s3_key"></a> [ami\_housekeeper\_lambda\_s3\_key](#input\_ami\_housekeeper\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no |
| <a name="input_ami_housekeeper_lambda_s3_object_version"></a> [ami\_housekeeper\_lambda\_s3\_object\_version](#input\_ami\_housekeeper\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no |
| <a name="input_ami_housekeeper_lambda_schedule_expression"></a> [ami\_housekeeper\_lambda\_schedule\_expression](#input\_ami\_housekeeper\_lambda\_schedule\_expression) | Scheduler expression for action runner binary syncer. | `string` | `"rate(1 day)"` | no |
| <a name="input_ami_housekeeper_lambda_timeout"></a> [ami\_housekeeper\_lambda\_timeout](#input\_ami\_housekeeper\_lambda\_timeout) | Time out of the lambda in seconds. | `number` | `300` | no |
| <a name="input_ami_housekeeper_lambda_zip"></a> [ami\_housekeeper\_lambda\_zip](#input\_ami\_housekeeper\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no |
| <a name="input_ami_id_ssm_parameter_name"></a> [ami\_id\_ssm\_parameter\_name](#input\_ami\_id\_ssm\_parameter\_name) | Externally managed SSM parameter (of data type aws:ec2:image) that contains the AMI ID to launch runner instances from. Overrides ami\_filter | `string` | `null` | no |
| <a name="input_ami_kms_key_arn"></a> [ami\_kms\_key\_arn](#input\_ami\_kms\_key\_arn) | Optional CMK Key ARN to be used to launch an instance from a shared encrypted AMI | `string` | `null` | no |
| <a name="input_ami_owners"></a> [ami\_owners](#input\_ami\_owners) | The list of owners used to select the AMI of action runner instances. | `list(string)` | <pre>[<br> "amazon"<br>]</pre> | no |
Expand All @@ -501,6 +508,7 @@ We welcome any improvement to the standard module to make the default as secure
| <a name="input_create_service_linked_role_spot"></a> [create\_service\_linked\_role\_spot](#input\_create\_service\_linked\_role\_spot) | (optional) create the service linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no |
| <a name="input_delay_webhook_event"></a> [delay\_webhook\_event](#input\_delay\_webhook\_event) | The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event. | `number` | `30` | no |
| <a name="input_disable_runner_autoupdate"></a> [disable\_runner\_autoupdate](#input\_disable\_runner\_autoupdate) | Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/) | `bool` | `false` | no |
| <a name="input_enable_ami_housekeeper"></a> [enable\_ami\_housekeeper](#input\_enable\_ami\_housekeeper) | Option to disable the lambda to clean up old AMIs. | `bool` | `false` | no |
| <a name="input_enable_cloudwatch_agent"></a> [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enables the cloudwatch agent on the ec2 runner instances. The runner uses a default config that can be overridden via `cloudwatch_config`. | `bool` | `true` | no |
| <a name="input_enable_ephemeral_runners"></a> [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no |
| <a name="input_enable_event_rule_binaries_syncer"></a> [enable\_event\_rule\_binaries\_syncer](#input\_enable\_event\_rule\_binaries\_syncer) | Option to disable EventBridge Lambda trigger for the binary syncer, useful to stop automatic updates of binary distribution. | `bool` | `true` | no |
Expand Down
13 changes: 13 additions & 0 deletions examples/default/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,19 @@ module "runners" {

# Enable debug logging for the lambda functions
# log_level = "debug"

enable_ami_housekeeper = true
ami_housekeeper_cleanup_config = {
ssmParameterNames = ["*/ami-id"]
minimumDaysOld = 10
amiFilters = [
{
Name = "name"
Values = ["*al2023*"]
}
]
}

}

module "webhook_github_app" {
Expand Down
2 changes: 2 additions & 0 deletions images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ These images share the same scripting as used in the user-data mechanism in `/mo

The examples in `linux-al2023` and `windows-core-2019` also upload a `start-runner` script that uses the exact same startup process as used in the user-data mechanism. This means that the image created here does not need any extra scripts injected or changes to boot up and connect to GH.

To remove old images the [AMI house keeper module](./modules/ami-housekeeper/README.md) can be used.

## Building your own

To build these images you first need to install packer.
Expand Down
17 changes: 17 additions & 0 deletions lambdas/functions/ami-housekeeper/jest.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import type { Config } from 'jest';

import defaultConfig from '../../jest.base.config';

const config: Config = {
...defaultConfig,
coverageThreshold: {
global: {
statements: 100,
branches: 100,
functions: 100,
lines: 100,
},
},
};

export default config;
48 changes: 48 additions & 0 deletions lambdas/functions/ami-housekeeper/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"name": "@terraform-aws-github-runner/ami-housekeeper",
"version": "1.0.0",
"main": "lambda.ts",
"license": "MIT",
"scripts": {
"start": "ts-node-dev src/local.ts",
"test": "NODE_ENV=test jest",
"test:watch": "NODE_ENV=test jest --watch",
"lint": "yarn eslint src",
"watch": "ts-node-dev --respawn --exit-child src/local.ts",
"build": "ncc build src/lambda.ts -o dist",
"dist": "yarn build && cd dist && zip ../ami-housekeeper.zip index.js",
"format": "prettier --write \"**/*.ts\"",
"format-check": "prettier --check \"**/*.ts\"",
"all": "yarn build && yarn format && yarn lint && yarn test"
},
"devDependencies": {
"@trivago/prettier-plugin-sort-imports": "^4.2.1",
"@types/aws-lambda": "^8.10.125",
"@types/jest": "^29.5.6",
"@types/node": "^20.8.9",
"@typescript-eslint/eslint-plugin": "^6.9.0",
"@typescript-eslint/parser": "^6.9.0",
"@vercel/ncc": "^0.38.1",
"aws-sdk-client-mock": "^3.0.0",
"aws-sdk-client-mock-jest": "^3.0.0",
"eslint": "^8.52.0",
"eslint-plugin-prettier": "5.0.1",
"jest": "^29.5",
"jest-mock": "^29.5.0",
"jest-mock-extended": "^3.0.4",
"nock": "^13.3.6",
"prettier": "3.0.3",
"ts-jest": "^29.1.0",
"ts-node": "^10.9.1",
"ts-node-dev": "^2.0.0"
},
"dependencies": {
"@aws-sdk/client-ec2": "^3.436.0",
"@aws-sdk/client-ssm": "^3.436.0",
"@aws-sdk/types": "^3.433.0",
"@terraform-aws-github-runner/aws-powertools-util": "*",
"@terraform-aws-github-runner/aws-ssm-util": "*",
"cron-parser": "^4.8.1",
"typescript": "^5.0.4"
}
}
Loading

0 comments on commit 87104e8

Please sign in to comment.