Skip to content

Commit

Permalink
Allow specifying precise timeframes that should be downloaded from S3 (
Browse files Browse the repository at this point in the history
  • Loading branch information
flosell committed Dec 22, 2017
1 parent 360758c commit 4327f45
Show file tree
Hide file tree
Showing 12 changed files with 172 additions and 47 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
* Support for CloudTrail `lookup_events` API that allows users to generate a policy without downloading logs from an S3 bucket.
Note that this API only returns _["create, modify, and delete API calls"](https://docs.aws.amazon.com/awscloudtrail/latest/userguide/view-cloudtrail-events-supported-services.html)_

### Changed

* `trailscraper download` now supports `--from` and `--to` flags to specify the timeframe that should be downloaded. Accepts precise (e.g. "2017-10-12") and relative (e.g. "-2days") arguments.
This replaces the `--past-days` parameter

## 0.3.1

### Added
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
boto3>=1.4.7
click>=6.7
toolz>=0.8.2
toolz>=0.8.2
dateparser>=0.6.0
pytz>=2017.3
47 changes: 47 additions & 0 deletions tests/integration/cli_download_s3_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import datetime

from backports import tempfile
from click.testing import CliRunner
from moto import mock_s3

from tests.test_utils_s3 import file_content, given_a_bucket, given_an_object, given_a_file
from tests.test_utils_testdata import cloudtrail_data_dir
from trailscraper import cli
from trailscraper.s3_download import download_cloudtrail_logs

TEST_LOG_KEY = "some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01/file_name.json.gz"
TEST_LOG_KEY_EXISTING = "some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01/file_name_that_exists.json.gz"


@mock_s3
def test_download_log_files_and_skip_existing_files():
with tempfile.TemporaryDirectory() as dirpath:
given_a_bucket("some-bucket")
given_an_object("some-bucket", TEST_LOG_KEY, "some-file-content")
given_an_object("some-bucket", TEST_LOG_KEY_EXISTING, "some-file-content")

given_a_file(dirpath, TEST_LOG_KEY_EXISTING, "some-content-already-existing")

download_cloudtrail_logs(
target_dir=dirpath,
bucket="some-bucket",
cloudtrail_prefix="some-prefix/",
from_date=datetime.datetime(2017, 1, 1),
to_date=datetime.datetime(2017, 1, 1),
account_ids=["000"],
regions=["some-region-1"])

runner = CliRunner()
result = runner.invoke(cli.root_group, args=[
"download",
"--bucket", "some-bucket",
"--region", "some-region-1",
"--account-id", "000",
"--prefix", "some-prefix/",
"--from", "2017-01-01",
"--to", "2017-01-01"
])
assert result.exit_code == 0

assert file_content(dirpath, TEST_LOG_KEY) == "some-file-content"
assert file_content(dirpath, TEST_LOG_KEY_EXISTING) == "some-content-already-existing"
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,6 @@
from trailscraper import cli


def test_should_output_help_message_by_default():
runner = CliRunner()
result = runner.invoke(cli.root_group)
assert result.exit_code == 0
assert 'Usage:' in result.output


def test_should_output_an_iam_policy_for_a_set_of_cloudtrail_records():
runner = CliRunner()
result = runner.invoke(cli.root_group, args=["generate-policy", "--log-dir", cloudtrail_data_dir()])
Expand Down
10 changes: 10 additions & 0 deletions tests/integration/cli_help_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from click.testing import CliRunner

from trailscraper import cli


def test_should_output_help_message_by_default():
runner = CliRunner()
result = runner.invoke(cli.root_group)
assert result.exit_code == 0
assert 'Usage:' in result.output
52 changes: 30 additions & 22 deletions tests/s3/key_prefixes_test.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,51 @@
from freezegun import freeze_time
import datetime

from trailscraper.s3_download import _s3_key_prefixes


@freeze_time("2017-01-01")
def test_should_generate_a_single_prefix_for_today():
def test_should_generate_prefixes_for_one_day():
assert _s3_key_prefixes(prefix="some-prefix/",
past_days=0,
account_ids=["111"],
regions=["some-region-1"]) == \
["some-prefix/AWSLogs/111/CloudTrail/some-region-1/2017/01/01"]
account_ids=["000"],
regions=["some-region-1"],
from_date=datetime.datetime(2017, 1, 1),
to_date=datetime.datetime(2017, 1, 1)) == \
["some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01"]


@freeze_time("2017-01-01")
def test_should_generate_prefixes_for_multiple_accounts_today():
def test_should_generate_prefixes_for_multiple_accounts_on_one_day():
assert _s3_key_prefixes(prefix="some-prefix/",
past_days=0,
from_date=datetime.datetime(2017, 1, 1),
to_date=datetime.datetime(2017, 1, 1),
account_ids=["000", "111"],
regions=["some-region-1"]) == \
["some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01",
"some-prefix/AWSLogs/111/CloudTrail/some-region-1/2017/01/01"]


@freeze_time("2017-01-01")
def test_should_generate_prefixes_for_regions():
def test_should_generate_prefixes_for_one_day_when_datetime_contains_time():
assert _s3_key_prefixes(prefix="some-prefix/",
past_days=0,
account_ids=["000"],
regions=["some-region-1","some-region-2"]) == \
["some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01",
"some-prefix/AWSLogs/000/CloudTrail/some-region-2/2017/01/01"]
regions=["some-region-1"],
from_date=datetime.datetime(2017, 1, 1, 10, 0, 0),
to_date=datetime.datetime(2017, 1, 1, 11, 0, 0)) == \
["some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01"]


@freeze_time("2017-01-01")
def test_should_generate_prefixes_for_multiple_days_in_the_past():
def test_should_generate_prefixes_for_multiple_days():
assert _s3_key_prefixes(prefix="some-prefix/",
past_days=2,
account_ids=["000"],
regions=["some-region-1"]) == \
regions=["some-region-1"],
from_date=datetime.datetime(2017, 1, 1),
to_date=datetime.datetime(2017, 1, 2)) == \
["some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/02",
"some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01", ]


def test_should_generate_prefixes_for_regions():
assert _s3_key_prefixes(prefix="some-prefix/",
from_date=datetime.datetime(2017, 1, 1),
to_date=datetime.datetime(2017, 1, 1),
account_ids=["000"],
regions=["some-region-1", "some-region-2"]) == \
["some-prefix/AWSLogs/000/CloudTrail/some-region-1/2017/01/01",
"some-prefix/AWSLogs/000/CloudTrail/some-region-1/2016/12/31",
"some-prefix/AWSLogs/000/CloudTrail/some-region-1/2016/12/30", ]
"some-prefix/AWSLogs/000/CloudTrail/some-region-2/2017/01/01"]
8 changes: 5 additions & 3 deletions tests/s3/s3_download_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import datetime

from backports import tempfile
from freezegun import freeze_time
from moto import mock_s3

from tests.test_utils_s3 import file_content, given_a_bucket, given_an_object, given_a_file
from trailscraper.s3_download import download_cloudtrail_logs

Expand All @@ -9,7 +11,6 @@


@mock_s3
@freeze_time("2017-01-01")
def test_download_log_files_and_skip_existing_files():
with tempfile.TemporaryDirectory() as dirpath:
given_a_bucket("some-bucket")
Expand All @@ -22,7 +23,8 @@ def test_download_log_files_and_skip_existing_files():
target_dir=dirpath,
bucket="some-bucket",
cloudtrail_prefix="some-prefix/",
past_days=0,
from_date=datetime.datetime(2017, 1, 1),
to_date=datetime.datetime(2017, 1, 1),
account_ids=["000"],
regions=["some-region-1"])

Expand Down
Empty file added tests/time_utils/__init__.py
Empty file.
36 changes: 36 additions & 0 deletions tests/time_utils/human_readable_time_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import datetime

import pytz

from freezegun import freeze_time

from trailscraper.time_utils import parse_human_readable_time


def test_should_parse_full_dates():
assert parse_human_readable_time("2017-12-22") == \
datetime.datetime(2017, 12, 22, 0, 0, 0)


def test_should_parse_full_datetimes():
assert parse_human_readable_time("2017-12-22 10:11:12") == \
datetime.datetime(2017, 12, 22, 10, 11, 12)

@freeze_time("2010-11-12 13:14:15")
def test_should_parse_human_readable_current_time():
assert parse_human_readable_time("now").astimezone(pytz.utc) == \
datetime.datetime(2010,11,12,13,14,15,tzinfo=pytz.utc)

@freeze_time("2010-11-12 13:14:15")
def test_should_parse_human_readable_relative_times():
assert parse_human_readable_time("one hour ago").astimezone(pytz.utc) == \
datetime.datetime(2010,11,12,12,14,15,tzinfo=pytz.utc)
assert parse_human_readable_time("in 10 minutes").astimezone(pytz.utc) == \
datetime.datetime(2010,11,12,13,24,15,tzinfo=pytz.utc)

assert parse_human_readable_time("-1 hour").astimezone(pytz.utc) == \
datetime.datetime(2010,11,12,12,14,15,tzinfo=pytz.utc)
assert parse_human_readable_time("-1 day").astimezone(pytz.utc) == \
datetime.datetime(2010,11,11,13,14,15,tzinfo=pytz.utc)
assert parse_human_readable_time("-10 minutes").astimezone(pytz.utc) == \
datetime.datetime(2010,11,12,13,4,15,tzinfo=pytz.utc)
31 changes: 22 additions & 9 deletions trailscraper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import click

import trailscraper
from trailscraper import time_utils
from trailscraper.cloudtrail import load_from_dir, load_from_api
from trailscraper.policy_generator import generate_policy_from_records
from trailscraper.s3_download import download_cloudtrail_logs
Expand All @@ -23,22 +24,34 @@ def root_group(verbose):


@click.command()
@click.option('--past-days', default=0, help='How many days to look into the past. 0 means today')
@click.option('--bucket', required=True, help='The S3 bucket that contains cloud-trail logs')
@click.option('--prefix', default="", help='Prefix in the S3 bucket (including trailing slash)')
@click.option('--account-id', multiple=True, required=True, help='ID of the account we want to look at')
@click.option('--region', multiple=True, required=True, help='Regions we want to look at')
@click.option('--log-dir', default="~/.trailscraper/logs", type=click.Path(), help='Where to put logfiles')
@click.option('--bucket', required=True,
help='The S3 bucket that contains cloud-trail logs')
@click.option('--prefix', default="",
help='Prefix in the S3 bucket (including trailing slash)')
@click.option('--account-id', multiple=True, required=True,
help='ID of the account we want to look at')
@click.option('--region', multiple=True, required=True,
help='Regions we want to look at')
@click.option('--log-dir', default="~/.trailscraper/logs", type=click.Path(),
help='Where to put logfiles')
@click.option('--from', 'from_s', default="one day ago", type=click.STRING,
help='Start date, e.g. "2017-01-01" or "-1days"')
@click.option('--to', 'to_s', default="now", type=click.STRING,
help='End date, e.g. "2017-01-01" or "now"')
# pylint: disable=too-many-arguments
def download(past_days, bucket, prefix, account_id, region, log_dir):
def download(bucket, prefix, account_id, region, log_dir, from_s, to_s):
"""Downloads CloudTrail Logs from S3."""
log_dir = os.path.expanduser(log_dir)

download_cloudtrail_logs(log_dir, bucket, prefix, past_days, account_id, region)
from_date = time_utils.parse_human_readable_time(from_s)
to_date = time_utils.parse_human_readable_time(to_s)

download_cloudtrail_logs(log_dir, bucket, prefix, account_id, region, from_date, to_date)


@click.command("generate-policy")
@click.option('--log-dir', default="~/.trailscraper/logs", type=click.Path(), help='Where to put logfiles')
@click.option('--log-dir', default="~/.trailscraper/logs", type=click.Path(),
help='Where to put logfiles')
@click.option('--filter-assumed-role-arn', multiple=True,
help='only consider events from this role (can be used multiple times)')
@click.option('--use-cloudtrail-api', is_flag=True, default=False,
Expand Down
12 changes: 7 additions & 5 deletions trailscraper/s3_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ def _s3_key_prefix(prefix, date, account_id, region):
.format(prefix, account_id, region, date.year, date.month, date.day)


def _s3_key_prefixes(prefix, past_days, account_ids, regions):
now = datetime.datetime.now()
days = [now - datetime.timedelta(days=delta_days) for delta_days in range(past_days + 1)]
def _s3_key_prefixes(prefix, account_ids, regions, from_date, to_date):
delta = to_date - from_date

days = [to_date - datetime.timedelta(days=delta_days) for delta_days in range(delta.days + 1)]
return [_s3_key_prefix(prefix, day, account_id, region)
for account_id in account_ids
for day in days
Expand Down Expand Up @@ -49,9 +50,10 @@ def _download_dir(dist):

_download_dir(prefix)


# pylint: disable=too-many-arguments
def download_cloudtrail_logs(target_dir, bucket, cloudtrail_prefix, past_days, account_ids, regions):
def download_cloudtrail_logs(target_dir, bucket, cloudtrail_prefix, account_ids, regions, from_date, to_date):
"""Downloads cloudtrail logs matching the given arguments to the target dir"""
for prefix in _s3_key_prefixes(cloudtrail_prefix, past_days, account_ids, regions):
for prefix in _s3_key_prefixes(cloudtrail_prefix, account_ids, regions, from_date, to_date):
logging.debug("Downloading logs for %s", prefix)
_s3_download_recursive(bucket, prefix, target_dir)
7 changes: 7 additions & 0 deletions trailscraper/time_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Functions to help parse strings into datetime objects"""
import dateparser


def parse_human_readable_time(time_string):
"""Parse human readable strings (e.g. "now", "2017-01-01" and "one hour ago") into datetime"""
return dateparser.parse(time_string)

0 comments on commit 4327f45

Please sign in to comment.