Skip to content

Commit

Permalink
Merge pull request #352 from DataRecce/feature/drc-519-enhancement-pr…
Browse files Browse the repository at this point in the history
…ovide-a-recce-command-to-manage-the-state

[Draft] DRC-519 command to purge existing state file from cloud
  • Loading branch information
kentwelcome authored Jun 24, 2024
2 parents 8ab38b0 + 291cdbb commit a5d65ad
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 53 deletions.
66 changes: 66 additions & 0 deletions recce/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ def server(host, port, state_file=None, **kwargs):
@add_options(recce_options)
@add_options(recce_cloud_options)
def run(output, **kwargs):
"""
Run recce to generate the state file in CI/CD pipeline
"""
from rich.console import Console
handle_debug_flag(**kwargs)
console = Console()
Expand Down Expand Up @@ -267,6 +270,9 @@ def run(output, **kwargs):
@add_options(recce_options)
@add_options(recce_cloud_options)
def summary(state_file, **kwargs):
"""
Generate a summary of the recce state file
"""
from rich.console import Console
from .core import load_context
handle_debug_flag(**kwargs)
Expand Down Expand Up @@ -301,6 +307,66 @@ def summary(state_file, **kwargs):
print(output)


@cli.command(cls=TrackCommand)
@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING,
envvar='GITHUB_TOKEN')
@click.option('--state-file-host', help='The host to fetch the state file from.', type=click.STRING,
envvar='RECCE_STATE_FILE_HOST', default='cloud.datarecce.io', hidden=True)
@click.option('--force', '-f', help='Bypasses the confirmation prompt. Purge the state file directly.', is_flag=True)
@add_options(recce_options)
def purge_cloud_state(**kwargs):
"""
Purge the state file from cloud
"""
from rich.console import Console
handle_debug_flag(**kwargs)
console = Console()
cloud_options = {
'host': kwargs.get('state_file_host'),
'token': kwargs.get('cloud_token'),
}
force_to_purge = kwargs.get('force', False)
try:
console.rule('Check Recce State from Cloud')
recce_state = RecceStateLoader(review_mode=False, cloud_mode=True,
state_file=None, cloud_options=cloud_options)
except Exception as e:
console.print("[[red]Error[/red]] Failed to load recce state file.")
console.print(f" Reason: {e}")
return 1

if not recce_state.verify():
error, hint = recce_state.error_and_hint
console.print(f"[[red]Error[/red]] {error}")
console.print(f"{hint}")
return 1

info = recce_state.info()
if info is None:
console.print("[[yellow]Skip[/yellow]] No state file found in cloud.")
return 0

pr_info = info.get('pull_request')
console.print('[green]State File hosted by[/green]', info.get('source'))
console.print('[green]GitHub Repository[/green]', info.get('pull_request').repository)
console.print(f'[green]GitHub Pull Request[/green]\n{pr_info.title} #{pr_info.id}')
console.print(f'Branch merged into [blue]{pr_info.base_branch}[/blue] from [blue]{pr_info.branch}[/blue]')
console.print(pr_info.url)

try:
if force_to_purge is True or click.confirm('\nDo you want to purge the state file?'):
response = recce_state.purge()
if response is True:
console.rule('Purged Successfully')
else:
console.rule('Failed to Purge', style='red')
console.print(f'Reason: {recce_state.error_message}')
except click.exceptions.Abort:
pass

return 0


@cli.group('github', short_help='GitHub related commands', hidden=True)
def github(**kwargs):
pass
Expand Down
17 changes: 15 additions & 2 deletions recce/pull_request.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
import json
import os
from typing import Optional
from typing import Optional, Union

import requests
from pydantic import BaseModel

from recce.git import hosting_repo
from recce.github import recce_pr_information
from recce.state import PullRequestInfo
from recce.util.pydantic_model import pydantic_model_dump


class PullRequestInfo(BaseModel):
id: Optional[Union[int, str]] = None
title: Optional[str] = None
url: Optional[str] = None
branch: Optional[str] = None
base_branch: Optional[str] = None
repository: Optional[str] = None

def to_dict(self):
return pydantic_model_dump(self)


def fetch_pr_metadata(**kwargs):
Expand Down
149 changes: 98 additions & 51 deletions recce/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
from typing import List, Optional, Dict, Union

import botocore.exceptions
import pydantic.version
from pydantic import BaseModel
from pydantic import Field

from recce import get_version
from recce.git import current_branch
from recce.models.types import Run, Check
from recce.pull_request import fetch_pr_metadata, PullRequestInfo
from recce.util.pydantic_model import pydantic_model_json_dump, pydantic_model_dump

logger = logging.getLogger('uvicorn')

Expand All @@ -40,26 +41,6 @@ def check_s3_bucket(bucket_name: str):
return True, None


def pydantic_model_json_dump(model: BaseModel):
pydantic_version = pydantic.version.VERSION
pydantic_major = pydantic_version.split(".")[0]

if pydantic_major == "1":
return model.json(exclude_none=True)
else:
return model.model_dump_json(exclude_none=True)


def pydantic_model_dump(model: BaseModel):
pydantic_version = pydantic.version.VERSION
pydantic_major = pydantic_version.split(".")[0]

if pydantic_major == "1":
return model.dict()
else:
return model.model_dump()


class GitRepoInfo(BaseModel):
branch: Optional[str] = None

Expand All @@ -75,18 +56,6 @@ def to_dict(self):
return pydantic_model_dump(self)


class PullRequestInfo(BaseModel):
id: Optional[Union[int, str]] = None
title: Optional[str] = None
url: Optional[str] = None
branch: Optional[str] = None
base_branch: Optional[str] = None
repository: Optional[str] = None

def to_dict(self):
return pydantic_model_dump(self)


class RecceStateMetadata(BaseModel):
schema_version: str = 'v0'
recce_version: str = Field(default_factory=lambda: get_version())
Expand Down Expand Up @@ -167,6 +136,13 @@ def __init__(self,
self.hint_message = None
self.state: RecceState | None = None
self.state_lock = threading.Lock()
self.pr_info = None

if self.cloud_mode:
if self.cloud_options.get('token'):
self.pr_info = fetch_pr_metadata(github_token=self.cloud_options.get('token'))
else:
raise Exception('No GitHub token is provided to access the pull request information.')

# Load the state
self.load()
Expand Down Expand Up @@ -230,6 +206,44 @@ def refresh(self):
new_state = self.load(refresh=True)
return new_state

def info(self):
if self.state is None:
self.error_message = 'No state is loaded.'
return None

state_info = {
'mode': 'cloud' if self.cloud_mode else 'local',
'source': None,
}
if self.cloud_mode:
if self.cloud_options.get('host', '').startswith('s3://'):
state_info['source'] = self.cloud_options.get('host')
else:
state_info['source'] = 'Recce Cloud'
state_info['pull_request'] = self.pr_info
else:
state_info['source'] = self.state_file
return state_info

def purge(self) -> bool:
if self.cloud_mode is True:
# self.error_message = 'Purging the state is not supported in cloud mode.'
# return False
if self.cloud_options.get('host', '').startswith('s3://'):
return self._purge_state_from_s3_bucket()
else:
return self._purge_state_from_cloud()
else:
if self.state_file is not None:
try:
os.remove(self.state_file)
except Exception as e:
self.error_message = f'Failed to remove the state file: {e}'
return False
else:
self.error_message = 'No state file is provided. Skip removing the state file.'
return False

def _get_presigned_url(self, pr_info: PullRequestInfo, artifact_name: str, method: str = 'upload') -> str:
import requests
# Step 1: Get the token
Expand All @@ -254,23 +268,21 @@ def _load_state_from_file(self, file_path: Optional[str] = None) -> RecceState:
return RecceState.from_file(file_path) if file_path else None

def _load_state_from_cloud(self) -> RecceState:
from recce.pull_request import fetch_pr_metadata
pr_info = fetch_pr_metadata(github_token=self.cloud_options.get('token'))
if (pr_info.id is None) or (pr_info.repository is None):
if (self.pr_info is None) or (self.pr_info.id is None) or (self.pr_info.repository is None):
raise Exception('Cannot get the pull request information from GitHub.')

if self.cloud_options.get('host', '').startswith('s3://'):
logger.debug('Fetching state from AWS S3 bucket...')
return self._load_state_from_s3_bucket(pr_info)
return self._load_state_from_s3_bucket()
else:
logger.debug('Fetching state from Recce Cloud...')
return self._load_state_from_recce_cloud(pr_info)
return self._load_state_from_recce_cloud()

def _load_state_from_recce_cloud(self, pr_info) -> Union[RecceState, None]:
def _load_state_from_recce_cloud(self) -> Union[RecceState, None]:
import tempfile
import requests

presigned_url = self._get_presigned_url(pr_info, RECCE_STATE_COMPRESSED_FILE, method='download')
presigned_url = self._get_presigned_url(self.pr_info, RECCE_STATE_COMPRESSED_FILE, method='download')

with tempfile.NamedTemporaryFile() as tmp:
response = requests.get(presigned_url)
Expand All @@ -284,12 +296,12 @@ def _load_state_from_recce_cloud(self, pr_info) -> Union[RecceState, None]:
f.write(response.content)
return RecceState.from_file(tmp.name, compressed=True)

def _load_state_from_s3_bucket(self, pr_info) -> Union[RecceState, None]:
def _load_state_from_s3_bucket(self) -> Union[RecceState, None]:
import boto3
import tempfile
s3_client = boto3.client('s3')
s3_bucket_name = self.cloud_options.get('host').replace('s3://', '')
s3_bucket_key = f'github/{pr_info.repository}/pulls/{pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'
s3_bucket_key = f'github/{self.pr_info.repository}/pulls/{self.pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'

rc, error_message = check_s3_bucket(s3_bucket_name)
if rc is False:
Expand All @@ -308,23 +320,21 @@ def _load_state_from_s3_bucket(self, pr_info) -> Union[RecceState, None]:
return RecceState.from_file(tmp.name, compressed=True)

def _export_state_to_cloud(self) -> Union[str, None]:
from recce.pull_request import fetch_pr_metadata
pr_info = fetch_pr_metadata(github_token=self.cloud_options.get('token'))
if (pr_info.id is None) or (pr_info.repository is None):
if (self.pr_info is None) or (self.pr_info.id is None) or (self.pr_info.repository is None):
raise Exception('Cannot get the pull request information from GitHub.')

if self.cloud_options.get('host', '').startswith('s3://'):
logger.info("Store recce state to AWS S3 bucket")
return self._export_state_to_s3_bucket(pr_info)
return self._export_state_to_s3_bucket()
else:
logger.info("Store recce state to Recce Cloud")
return self._export_state_to_recce_cloud(pr_info)
return self._export_state_to_recce_cloud()

def _export_state_to_recce_cloud(self, pr_info) -> Union[str, None]:
def _export_state_to_recce_cloud(self) -> Union[str, None]:
import tempfile
import requests

presigned_url = self._get_presigned_url(pr_info, RECCE_STATE_COMPRESSED_FILE, method='upload')
presigned_url = self._get_presigned_url(self.pr_info, RECCE_STATE_COMPRESSED_FILE, method='upload')
with tempfile.NamedTemporaryFile() as tmp:
self._export_state_to_file(tmp.name, compress=True)
response = requests.put(presigned_url, data=open(tmp.name, 'rb').read())
Expand All @@ -333,12 +343,12 @@ def _export_state_to_recce_cloud(self, pr_info) -> Union[str, None]:
return 'Failed to upload the state file to Recce Cloud.'
return 'The state file is uploaded to Recce Cloud.'

def _export_state_to_s3_bucket(self, pr_info) -> Union[str, None]:
def _export_state_to_s3_bucket(self) -> Union[str, None]:
import boto3
import tempfile
s3_client = boto3.client('s3')
s3_bucket_name = self.cloud_options.get('host').replace('s3://', '')
s3_bucket_key = f'github/{pr_info.repository}/pulls/{pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'
s3_bucket_key = f'github/{self.pr_info.repository}/pulls/{self.pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'

rc, error_message = check_s3_bucket(s3_bucket_name)
if rc is False:
Expand All @@ -364,3 +374,40 @@ def _export_state_to_file(self, file_path: Optional[str] = None, compress: bool
with open(file_path, 'w') as f:
f.write(json_data)
return f'The state file is stored at \'{file_path}\''

def _purge_state_from_cloud(self) -> bool:
import requests
logger.debug('Purging the state from Recce Cloud...')
token = self.cloud_options.get('token')
api_url = f'{RECCE_CLOUD_API_HOST}/api/v1/{self.pr_info.repository}/pulls/{self.pr_info.id}/artifacts'
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.delete(api_url, headers=headers)
if response.status_code != 204:
self.error_message = response.text
return False
return True

def _purge_state_from_s3_bucket(self) -> bool:
import boto3
from rich.console import Console
console = Console()
delete_objects = []
logger.debug('Purging the state from AWS S3 bucket...')
s3_client = boto3.client('s3')
s3_bucket_name = self.cloud_options.get('host').replace('s3://', '')
s3_key_prefix = f'github/{self.pr_info.repository}/pulls/{self.pr_info.id}/'
list_response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=s3_key_prefix)
if 'Contents' in list_response:
for obj in list_response['Contents']:
key = obj['Key']
delete_objects.append({'Key': key})
console.print(f'[green]Deleted[/green]: {key}')
else:
return False

delete_response = s3_client.delete_objects(Bucket=s3_bucket_name, Delete={'Objects': delete_objects})
if 'Deleted' not in delete_response:
return False
return True
22 changes: 22 additions & 0 deletions recce/util/pydantic_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pydantic
from pydantic import BaseModel


def pydantic_model_json_dump(model: BaseModel):
pydantic_version = pydantic.version.VERSION
pydantic_major = pydantic_version.split(".")[0]

if pydantic_major == "1":
return model.json(exclude_none=True)
else:
return model.model_dump_json(exclude_none=True)


def pydantic_model_dump(model: BaseModel):
pydantic_version = pydantic.version.VERSION
pydantic_major = pydantic_version.split(".")[0]

if pydantic_major == "1":
return model.dict()
else:
return model.model_dump()

0 comments on commit a5d65ad

Please sign in to comment.