From 1d5a0b1ca4afe78fc344a15f13a666e8837dd691 Mon Sep 17 00:00:00 2001 From: grobolom Date: Mon, 29 May 2017 17:59:33 -0400 Subject: [PATCH] Add filtering for very old PRs --- scrounger/app/scrounger.py | 16 ++++++++++++---- scrounger/app/settings.py | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/scrounger/app/scrounger.py b/scrounger/app/scrounger.py index f3abd04..e986c03 100644 --- a/scrounger/app/scrounger.py +++ b/scrounger/app/scrounger.py @@ -2,6 +2,8 @@ import os from logging import getLogger from logging.config import dictConfig +from time import strptime +from datetime import datetime, timedelta import requests from bson.json_util import dumps @@ -12,7 +14,7 @@ from graphql import query, flatten_response from settings import ( LOGGING, SCHEDULER_JOBS, SCHEDULER_TIMEZONE, SCHEDULER_API_ENABLED, ORGS_TO_TRACK, TOKEN, - RUN_SCHEDULER + RUN_SCHEDULER, TIMESTAMP_FORMAT ) @@ -76,7 +78,7 @@ def update(): url = 'https://api.github.com/graphql' headers = {'Authorization': 'Bearer {}'.format(TOKEN)} - new_issues = {} + new_prs = {} for org in ORGS_TO_TRACK: logger.debug('finding all issues in {}'.format(org)) @@ -86,11 +88,17 @@ def update(): resp = flatten_response(r.json()) logger.debug('found {} issues for {}'.format(len(resp), org)) - new_issues.update(resp) + new_prs.update(resp) logger.debug('dropping all issues and updating') db.gitdb.everything.delete_many({}) - db.gitdb.everything.insert_many(list(new_issues.values())) + + # filter out PRs that are greater than 90 days old + for pr in new_prs.values(): + issue_time = pr['updated_at'] + parsed_time = datetime(*(strptime(issue_time, TIMESTAMP_FORMAT))[:6]) + if datetime.now() - parsed_time < timedelta(days=90): + db.gitdb.everything.insert_one(pr) return 'success' diff --git a/scrounger/app/settings.py b/scrounger/app/settings.py index 1ee3d83..022d381 100644 --- a/scrounger/app/settings.py +++ b/scrounger/app/settings.py @@ -31,6 +31,8 @@ ORGS_TO_TRACK = os.environ.get('ORGS', 'HearstCorp,Hearst-Hatchery,HearstDigitalStudios').split(',') +TIMESTAMP_FORMAT = '%Y-%m-%dT%H:%M:%SZ' + SCHEDULER_JOBS = [ { 'id': 'update',