diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..9d8da28 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,42 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +<<<<<<< HEAD +Add any other context about the problem here. +======= +Add any other context about the problem here. +>>>>>>> 500caddadff50ff27d7100a970f2971515df6f4b diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..9dec07c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,24 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +<<<<<<< HEAD +Add any other context or screenshots about the feature request here. +======= +Add any other context or screenshots about the feature request here. +>>>>>>> 500caddadff50ff27d7100a970f2971515df6f4b diff --git a/.github/workflows/dockerpublish.yml b/.github/workflows/dockerpublish.yml new file mode 100644 index 0000000..9d3231f --- /dev/null +++ b/.github/workflows/dockerpublish.yml @@ -0,0 +1,37 @@ +name: Docker Image Publish + +on: + release: + types: [published] + +jobs: + github-cache: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: ./Dockerfile + push: true + tags: papermountain/gitlab-watchman:latest + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache \ No newline at end of file diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index e73c78c..9564245 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 @@ -36,12 +36,12 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test setup & install run: | - pip install wheel setuptools requests colorama termcolor PyYAML - python setup.py sdist bdist_wheel - pip install dist/*.whl + pip install build + python3 -m build + python3 -m pip install dist/*.whl - name: Test rules run: | - python3 -m unittest tests/test_rules.py + python3 -m unittest tests/test_signatures.py - name: Test run run: | gitlab-watchman --version @@ -52,7 +52,7 @@ jobs: runs-on: windows-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8] + python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 @@ -63,10 +63,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install wheel setuptools requests colorama termcolor + pip install build - name: Test setup & install run: | - python setup.py sdist bdist_wheel + python -m build pip install --find-links=dist\ gitlab-watchman - name: Test run run: | diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml index f891d6e..f002b5c 100644 --- a/.github/workflows/pythonpublish.yml +++ b/.github/workflows/pythonpublish.yml @@ -20,12 +20,11 @@ jobs: python-version: '3.x' - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine requests colorama termcolor PyYAML + python3 -m pip install --upgrade pip twine build - name: Build and publish env: TWINE_USERNAME: ${{ '__token__' }} TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} run: | - python setup.py sdist bdist_wheel + python3 -m build twine upload dist/* diff --git a/.gitignore b/.gitignore index 6c480f0..25be5ab 100644 --- a/.gitignore +++ b/.gitignore @@ -123,8 +123,3 @@ venv.bak/ .mypy_cache/ .dmypy.json dmypy.json - -# package related -config/ -*.txt -*.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dbf490..bcec91c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,24 @@ +## 2.0.0 - 2022-04-01 +### Added: +- New scopes for finding exposed data in: + - notes + - snippets +- Docker image now available from the Docker hub, or by building from source. (Credit [@adioss](https://github.com/adioss) for the inspiration) +- Complete rewrite of the codebase to make searching faster and more efficient. + - More modern packaging and distribution. +- Logs now include more data +- Additional signatures added to find more leaked data +- Updated logo to play nicely with dark mode displays + +### Removed: + +- Logging to file and TCP stream - logs to stdout like a true 12 factor app. Reroute stdout as you see fit. --output +- .conf file for configuration options. Pass the environment variables `GITLAB_WATCHMAN_TOKEN` and `GITLAB_WATCHMAN_URL` + +**Breaking changes:** +- The --output flag is no longer required, and therefore not supported + + ## 1.4.0 - 2020-12-24 ### Added: - Refactor of rules into directories for easier management @@ -18,7 +39,7 @@ ## 1.3.0 - 2020-12-12 ### Added: - Add more information about the namespaces a project is in to logs -- Added details owner of that namespace, for groups and users +- Added owner details of that namespace, for groups and users - Time based searching now looks at the time a file was committed, not when a project was active, which greatly reduces multiples of the same detection because a project is active but a file has not been modified. - Rules added: - SSH private keys diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..8318757 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +papermtn@protonmail.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..6b600cc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,26 @@ +# Contributing + +Thanks for being interested in contributing to GitLab Watchman. I'm always looking for more contributions towards the project. + +The two main areas where you can contribute are: +- Signature files +- Additional functionality + + +## Adding new signatures +GitLab Watchman runs using YAML signature files that are stored in the `signature` directory. They define what to search Slack for, and are the heart of the application. + +Instructions on how to create your own signature files can be found in `docs\signatures.md` + +If you do write your own signatures, please contribute them to the project by creating a pull request. + + +## Additional functionality +You can make recommendations for new functionality via raising issues using the feature request template. Even better, you could contribute the additional functionality yourself and create a pull request for the changes to be added to a future release. + +## Style + +GitLab Watchman follows Google style guidelines mostly, but the main thing is to stay consistent to the style already in use. + +## Documentation +Currently, documentation and additional resources are kept on [my blog](https://papermtn.co.uk/category/tools/gitlab-watchman/) \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dc407e7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM alpine/git AS initlayer +WORKDIR /workdir +RUN git clone https://github.com/PaperMtn/gitlab-watchman.git + +FROM python:buster +RUN addgroup --gid 1000 gitlab-watchman +RUN useradd -u 1000 -g 1000 gitlab-watchman +RUN mkdir /home/gitlab-watchman +COPY --from=initlayer /workdir/gitlab-watchman /home/gitlab-watchman +RUN chown -R gitlab-watchman: /home/gitlab-watchman +WORKDIR /home/gitlab-watchman + +RUN python3 -m pip install --upgrade pip +RUN python3 -m pip install requests build PyYAML +RUN python3 -m build +RUN python3 -m pip install dist/*.whl + +USER gitlab-watchman + +ENTRYPOINT ["/usr/local/bin/gitlab-watchman"] \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index df1e71e..fa52e25 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include *.txt -recursive-include gitlab_watchman *.yaml *.yml -recursive-exclude gitlab_watchman test*.yaml test*.yml +recursive-include src/signatures *.yaml *.yml +recursive-exclude src/signatures test*.yaml test*.yml diff --git a/README.md b/README.md index 29e85ba..df5479c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - + # GitLab Watchman ![Python 2.7 and 3 compatible](https://img.shields.io/pypi/pyversions/gitlab-watchman) @@ -17,6 +17,8 @@ It searches GitLab for internally shared projects and looks at: - Issues - Merge requests - Milestones +- Notes +- Snippets For the following data: - GCP keys and service account files @@ -40,8 +42,8 @@ You can run GitLab Watchman to look for results going back as far as: This means after one deep scan, you can schedule GitLab Watchman to run regularly and only return results from your chosen timeframe. -### Rules -GitLab Watchman uses custom YAML rules to detect matches in GitLab. +### Signatures +GitLab Watchman uses custom YAML signatures to detect matches in GitLab. They follow this format: @@ -62,6 +64,8 @@ scope: #what to search, any combination of the below# - wiki_blobs - issues - merge_requests +- notes +- snippet_titles test_cases: match_cases: - #test case that should match the regex# @@ -71,22 +75,13 @@ strings: - #search query to use in GitLab# pattern: #Regex pattern to filter out false positives# ``` -There are Python tests to ensure rules are formatted properly and that the Regex patterns work in the `tests` dir +There are Python tests to ensure signatures are formatted properly and that the Regex patterns work in the `tests` dir -More information about rules, and how you can add your own, is in the file `docs/rules.md`. +More information about signatures, and how you can add your own, is in the file `docs/signatures.md`. ### Logging -GitLab Watchman gives the following logging options: -- Log file -- Stdout -- TCP stream - -Results are output in JSON format, perfect for ingesting into a SIEM or other log analysis platform. - -For file and TCP stream logging, configuration options need to be passed via `.conf` file or environment variable. See the file `docs/logging.md` for instructions on how to set it up. - -If no logging option is given, GitLab Watchman defaults to Stdout logging. +Results are output to stdout in JSON format, perfect for ingesting into a SIEM or other log analysis platform. ## Requirements @@ -122,47 +117,46 @@ api You also need to provide the URL of your GitLab instance. #### Providing token & URL -GitLab Watchman will first try to get the the GitLab token and URL from the environment variables `GITLAB_WATCHMAN_TOKEN` and `GITLAB_WATCHMAN_URL`, if this fails they will be taken from .conf file (see below). +GitLab Watchman will get the GitLab token and URL from the environment variables `GITLAB_WATCHMAN_TOKEN` and `GITLAB_WATCHMAN_URL`. -### .conf file -Configuration options can be passed in a file named `watchman.conf` which must be stored in your home directory. The file should follow the YAML format, and should look like below: -```yaml -gitlab_watchman: - token: abc123 - url: https://gitlab.example.com - logging: - file_logging: - path: - json_tcp: - host: - port: +## Installation +You can install the latest stable version via pip: + +`python3 -m pip install gitlab-watchman` + +Or build from source yourself, which is useful for if you intend to add your own signatures: + +Download the release source files, then from the top level repository run: +```shell +python3 -m build +python3 -m pip install --force-reinstall dist/*.whl ``` -GitLab Watchman will look for this file at runtime, and use the configuration options from here. If you are not using the advanced logging features, leave them blank. -If you are having issues with your .conf file, run it through a YAML linter. +## Docker Image -An example file is in `docs/example.conf` +GitLab Watchman is also available from the Docker hub as a Docker image: -**Note** If you use any other Watchman applications and already have a `watchman.conf` file, just append the conf data for GitLab Watchman to the existing file. +`docker pull papermountain/gitlab-watchman:latest` -## Installation -Install via pip +You can then run GitLab Watchman in a container, making sure you pass the required environment variables: -`pip install gitlab-watchman` +``` +// help +docker run --rm papermountain/gitlab-watchman -h -Or via source +// scan all +docker run --rm -e GITLAB_WATCHMAN_TOKEN=abc123 -e GITLAB_WATCHMAN_URL=https://example.gitlab.com papermountain/gitlab-watchman --timeframe a --all +docker run --rm --env-file .env papermountain/gitlab-watchman --timeframe a --all +``` ## Usage GitLab Watchman will be installed as a global command, use as follows: ``` -usage: gitlab-watchman [-h] --timeframe {d,w,m,a} --output - {file,stdout,stream} [--version] [--all] [--blobs] - [--commits] [--wiki-blobs] [--issues] [--merge-requests] - [--milestones] [--comments] +usage: gitlab-watchman [-h] --timeframe {d,w,m,a} [--version] [--all] [--blobs] [--commits] [--wiki-blobs] [--issues] [--merge-requests] [--milestones] [--notes] [--snippets] Monitoring GitLab for sensitive data shared publicly -optional arguments: +options: -h, --help show this help message and exit --version show program's version number and exit --all Find everything @@ -172,29 +166,17 @@ optional arguments: --issues Search issues --merge-requests Search merge requests --milestones Search milestones - --comments Search comments - -required arguments: - --timeframe {d,w,m,a} - How far back to search: d = 24 hours w = 7 days, m = - 30 days, a = all time - --output {file,stdout,stream} - Where to send results + --notes Search notes + --snippets Search snippets ``` -You can run GitLab Watchman to look for everything, and output to default Stdout: - -`gitlab-watchman --timeframe a --all` - -Or arguments can be grouped together to search more granularly. This will look for commits and milestones for the last 30 days, and output the results to a TCP stream: - -`gitlab-watchman --timeframe m --commits --milestones --output stream` - ## Other Watchman apps You may be interested in some of the other apps in the Watchman family: - [Slack Watchman](https://github.com/PaperMtn/slack-watchman) +- [Slack Watchman for Enterprise Grid](https://github.com/PaperMtn/slack-watchman-enterprise-grid) - [GitHub Watchman](https://github.com/PaperMtn/github-watchman) +- [Trello Watchman](https://github.com/PaperMtn/trello-watchman) ## License The source code for this project is released under the [GNU General Public Licence](https://www.gnu.org/licenses/licenses.html#GPL). This project is not associated with GitLab. diff --git a/docs/example.conf b/docs/example.conf deleted file mode 100644 index be8844e..0000000 --- a/docs/example.conf +++ /dev/null @@ -1,9 +0,0 @@ -gitlab_watchman: - token: abc123 - url: https://gitlab.example.com - logging: - file_logging: - path: /var/log/ - json_tcp: - host: localhost - port: 9020 diff --git a/docs/logging.md b/docs/logging.md deleted file mode 100644 index 14c160d..0000000 --- a/docs/logging.md +++ /dev/null @@ -1,58 +0,0 @@ -# Logging -GitLab Watchman gives the following logging options: -- Log file -- Stdout -- TCP stream - -## JSON formatted logging -All other logging options output their logs in JSON format. Here is an example: - -```json -{"localtime": "2020-01-01 00:00:00,000", "level": "NOTIFY", "source": "GitLab Watchman", "scope": "blobs", "type": "Interesting Potentially Sensitive Files", "severity": "70", "detection": {"basename": "vendor/k8s.io/kubernetes/vendor/github.com/abbot/go-http-auth/test", "blob_id": null, "data": ".........", "path": "westeros_inc/lannister_docs/my.htpasswd", "project_id": 1001, "project_name": "westeros_inc", "project_url": "https://gitlab.westeros.inc/...."}} -``` -This should contain all the information you require to ingest these logs into a SIEM, or other log analysis platform. - - -### File logging -File logging saves JSON formatted logs to a file. - -The path where you want to output the file needs to be passed when running GitLab Watchman. This can be done via the .conf file: -```yaml -gitlab_watchman: - token: abc123 - url: https://gitlab.example.com - logging: - file_logging: - path: /var/put_my_logs_here/ - json_tcp: - host: - port: -``` -Or by setting your log path in the environment variable: `GITLAB_WATCHMAN_LOG_PATH` - -If file logging is selected as the output option, but no path is give, GitLab Watchman defaults to the user's home directory. - -The filename will be `gitlab_watchman.log` - -Note: GitLab Watchman does not handle the rotation of the file. You would need a solution such as logrotate for this. - -### Stdout logging -Stdout logging sends JSON formatted logs to Stdout, for you to capture however you want. - -### TCP stream logging -With this option, JSON formmatted logs are sent to a destination of your choosing via TCP - -You will need to pass GitLab Watchman a host and port to receive the logs, either via .conf file: - -```yaml -gitlab_watchman: - token: abc123 - url: https://gitlab.example.com - logging: - file_logging: - path: - json_tcp: - host: localhost - port: 9020 -``` -Or by setting the environment variables `GITLAB_WATCHMAN_HOST` and `GITLAB_WATCHMAN_PORT` diff --git a/docs/rules.md b/docs/signatures.md similarity index 74% rename from docs/rules.md rename to docs/signatures.md index 092f5f2..0856053 100644 --- a/docs/rules.md +++ b/docs/signatures.md @@ -1,24 +1,26 @@ -# Rules -GitLab Watchman uses rules to provide the search terms to query GitLab and Regex patterns to filter out true positives. +# Signatures +GitLab Watchman uses signatures to provide the search terms to query GitLab and Regex patterns to filter out true positives. They are written in YAML, and follow this format: ```yaml --- -filename: +filename: enabled: #[true|false] meta: name: - author: - date: + author: + date: description: #what the search should find# severity: #rating out of 100# scope: #what to search, any combination of the below# - blobs - commits - milestones -- wiki_blobs +- wiki_blobs - issues - merge_requests +- notes +- snippet_titles test_cases: match_cases: - #test case that should match the regex# @@ -29,29 +31,31 @@ strings: pattern: #Regex pattern to filter out false positives# ``` -Rules are stored in the directory watchman/rules, so you can see examples there. +Signatures are stored in the directory src/signatures, so you can see examples there. **Scope** -This is where GitLab should look: +This is where GitLab should look: - blobs - commits - milestones - wiki_blobs - issues - merge_requests +- notes +- snippet_titles You can search for any combination of these, with each on its own line **Test cases** -These test cases are used to check that the regex pattern works. Each rule should have at least one match (pass) and one fail case. +These test cases are used to check that the regex pattern works. Each signature should have at least one match (pass) and one fail case. If you want to return all results found by a query, enter the value `blank` for both cases. -## Creating your own rules -You can easily create your own rules for GitLab Watchman. The two most important parts are the search queries and the regex pattern. +## Creating your own signatures +You can easily create your own signatures for GitLab Watchman. The two most important parts are the search queries and the regex pattern. ### Search queries -These are stored as the entries in the 'strings' section of the rule, and are the search terms used to query GitLab to find results. +These are stored as the entries in the 'strings' section of the signature, and are the search terms used to query GitLab to find results. Multiple entries can be put under strings to find as many potential hits as you can. So if I wanted to find passwords, I might use both of these search terms: `- password` @@ -69,7 +73,7 @@ Here is an excerpt from the article: - To match a partial word, use \*. In this example, I want to find bugs with any 500 errors. : `bug error 50*` - To use one of symbols above literally, escape the symbol with a preceding \: `argument \-last` -Using this syntax, you can build rules with queries to find very specific files and information. +Using this syntax, you can build signatures with queries to find very specific files and information. ### Regex pattern This pattern is used to filter results that are returned by the search query. diff --git a/gitlab_watchman/__about__.py b/gitlab_watchman/__about__.py deleted file mode 100644 index e01d7c1..0000000 --- a/gitlab_watchman/__about__.py +++ /dev/null @@ -1,34 +0,0 @@ -# GitLab Watchman -# Copyright (C) 2020 PaperMtn -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -__all__ = [ - '__title__', - '__summary__', - '__uri__', - '__version__', - '__author__', - '__email__', - '__license__', -] - -__title__ = 'GitLab Watchman' -__summary__ = 'Monitoring GitLab for sensitive data shared publicly' -__uri__ = 'https://github.com/PaperMtn/gitlab-watchman' -__version__ = '1.4.0' -__author__ = 'PaperMtn' -__email__ = 'papermtn@protonmail.com' -__license__ = 'GPL-3.0' -__copyright__ = '2020 {}'.format(__author__) diff --git a/gitlab_watchman/__init__.py b/gitlab_watchman/__init__.py deleted file mode 100644 index 0bdebeb..0000000 --- a/gitlab_watchman/__init__.py +++ /dev/null @@ -1,268 +0,0 @@ -import builtins -import argparse -import os -import yaml -import time -from pathlib import Path -from datetime import date -from colorama import init, deinit -from termcolor import colored - -import gitlab_watchman.gitlab_wrapper as gitlab -import gitlab_watchman.__about__ as a -import gitlab_watchman.config as cfg -import gitlab_watchman.logger as logger - -RULES_PATH = (Path(__file__).parent / 'rules').resolve() -OUTPUT_LOGGER = '' - - -def validate_conf(path): - """Check the file watchman.conf exists""" - - if os.environ.get('GITLAB_WATCHMAN_TOKEN') and os.environ.get('GITLAB_WATCHMAN_URL'): - return True - if os.path.exists(path): - with open(path) as yaml_file: - return yaml.safe_load(yaml_file).get('gitlab_watchman') - - -def search(gitlab_connection, rule, tf, scope): - if isinstance(OUTPUT_LOGGER, logger.StdoutLogger): - print = OUTPUT_LOGGER.log_info - else: - print = builtins.print - try: - print(colored('Searching for {} in {}'.format(rule.get('meta').get('name'), scope), 'yellow')) - - results = gitlab.search(gitlab_connection, OUTPUT_LOGGER, rule, scope, tf) - if results: - if isinstance(OUTPUT_LOGGER, logger.CSVLogger): - OUTPUT_LOGGER.write_csv('exposed_{}'.format(rule.get('filename').split('.')[0]), scope, results) - else: - for log_data in results: - OUTPUT_LOGGER.log_notification(log_data, scope, rule.get('meta').get('name'), - rule.get('meta').get('severity')) - except Exception as e: - if isinstance(OUTPUT_LOGGER, logger.StdoutLogger): - print = OUTPUT_LOGGER.log_critical - else: - print = builtins.print - - print(colored(e, 'red')) - - -def load_rules(): - """Import YAML rules""" - - rules = [] - try: - for root, dirs, files in os.walk(RULES_PATH): - for rule in files: - rule_path = (Path(root) / rule).resolve() - if rule_path.name.endswith('.yaml'): - with open(rule_path) as yaml_file: - rule = yaml.safe_load(yaml_file) - if rule.get('enabled'): - rules.append(rule) - return rules - except Exception as e: - if isinstance(OUTPUT_LOGGER, logger.StdoutLogger): - print = OUTPUT_LOGGER.log_critical - else: - print = builtins.print - - print(colored(e, 'red')) - - -def main(): - global OUTPUT_LOGGER - try: - init() - - if isinstance(OUTPUT_LOGGER, logger.StdoutLogger): - print = OUTPUT_LOGGER.log_critical - else: - print = builtins.print - - parser = argparse.ArgumentParser(description=a.__summary__) - required = parser.add_argument_group('required arguments') - required.add_argument('--timeframe', choices=['d', 'w', 'm', 'a'], dest='time', - help='How far back to search: d = 24 hours w = 7 days, m = 30 days, a = all time', - required=True) - required.add_argument('--output', choices=['file', 'stdout', 'stream'], dest='logging_type', - help='Where to send results') - parser.add_argument('--version', action='version', - version='gitlab-watchman {}'.format(a.__version__)) - parser.add_argument('--all', dest='everything', action='store_true', - help='Find everything') - parser.add_argument('--blobs', dest='blobs', action='store_true', - help='Search code blobs') - parser.add_argument('--commits', dest='commits', action='store_true', - help='Search commits') - parser.add_argument('--wiki-blobs', dest='wiki', action='store_true', - help='Search wiki blobs') - parser.add_argument('--issues', dest='issues', action='store_true', - help='Search issues') - parser.add_argument('--merge-requests', dest='merge', action='store_true', - help='Search merge requests') - parser.add_argument('--milestones', dest='milestones', action='store_true', - help='Search milestones') - - args = parser.parse_args() - tm = args.time - everything = args.everything - blobs = args.blobs - commits = args.commits - wiki = args.wiki - issues = args.issues - merge = args.merge - milestones = args.milestones - logging_type = args.logging_type - - if tm == 'd': - tf = cfg.DAY_TIMEFRAME - elif tm == 'w': - tf = cfg.WEEK_TIMEFRAME - elif tm == 'm': - tf = cfg.MONTH_TIMEFRAME - else: - tf = cfg.ALL_TIME - conf_path = '{}/watchman.conf'.format(os.path.expanduser('~')) - - if not validate_conf(conf_path): - raise Exception( - colored('GITLAB_WATCHMAN_TOKEN environment variable or watchman.conf file not detected. ' - '\nEnsure environment variable is set or a valid file is located in your home ' - 'directory: {} ', 'red') - .format(os.path.expanduser('~'))) - else: - config = validate_conf(conf_path) - connection = gitlab.initiate_gitlab_connection() - - if logging_type: - if logging_type == 'file': - if os.environ.get('GITLAB_WATCHMAN_LOG_PATH'): - OUTPUT_LOGGER = logger.FileLogger(os.environ.get('GITLAB_WATCHMAN_LOG_PATH')) - elif config.get('logging').get('file_logging').get('path') and \ - os.path.exists(config.get('logging').get('file_logging').get('path')): - OUTPUT_LOGGER = logger.FileLogger(log_path=config.get('logging').get('file_logging').get('path')) - else: - print('No config given, outputting gitlab_watchman.log file to home path') - OUTPUT_LOGGER = logger.FileLogger(log_path=os.path.expanduser('~')) - elif logging_type == 'stdout': - OUTPUT_LOGGER = logger.StdoutLogger() - elif logging_type == 'stream': - if os.environ.get('GITLAB_WATCHMAN_HOST') and os.environ.get('GITLAB_WATCHMAN_PORT'): - OUTPUT_LOGGER = logger.SocketJSONLogger(os.environ.get('GITLAB_WATCHMAN_HOST'), - os.environ.get('GITLAB_WATCHMAN_PORT')) - elif config.get('logging').get('json_tcp').get('host') and \ - config.get('logging').get('json_tcp').get('port'): - OUTPUT_LOGGER = logger.SocketJSONLogger(config.get('logging').get('json_tcp').get('host'), - config.get('logging').get('json_tcp').get('port')) - else: - raise Exception("JSON TCP stream selected with no config") - else: - print('CSV logging is no longer supported, defaulting to Stdout') - OUTPUT_LOGGER = logger.StdoutLogger() - else: - print('No logging option selected, defaulting to Stdout') - OUTPUT_LOGGER = logger.StdoutLogger() - - now = int(time.time()) - today = date.today().strftime('%Y-%m-%d') - start_date = time.strftime('%Y-%m-%d', time.localtime(now - tf)) - - if not isinstance(OUTPUT_LOGGER, logger.StdoutLogger): - print = builtins.print - print(colored(''' - ##### # - # # # ##### # ## ##### - # # # # # # # # - # #### # # # # # ##### - # # # # # ###### # # - # # # # # # # # # - ##### # # ####### # # ##### - - # # - # # # ## ##### #### # # # # ## # # - # # # # # # # # # # ## ## # # ## # - # # # # # # # ###### # ## # # # # # # - # # # ###### # # # # # # ###### # # # - # # # # # # # # # # # # # # # ## - ## ## # # # #### # # # # # # # # - ''', 'magenta')) - print('Version: {}\n'.format(a.__version__)) - print('Searching from {} to {}'.format(start_date, today)) - print('Importing rules...') - rules_list = load_rules() - print('{} rules loaded'.format(len(rules_list))) - else: - OUTPUT_LOGGER.log_info('GitLab Watchman started execution') - OUTPUT_LOGGER.log_info('Version: {}'.format(a.__version__)) - OUTPUT_LOGGER.log_info('Importing rules...') - rules_list = load_rules() - OUTPUT_LOGGER.log_info('{} rules loaded'.format(len(rules_list))) - print = OUTPUT_LOGGER.log_info - - if everything: - print(colored('Getting everything...', 'magenta')) - for rule in rules_list: - if 'blobs' in rule.get('scope'): - search(connection, rule, tf, 'blobs') - if 'commits' in rule.get('scope'): - search(connection, rule, tf, 'commits') - if 'issues' in rule.get('scope'): - search(connection, rule, tf, 'issues') - if 'merge_requests' in rule.get('scope'): - search(connection, rule, tf, 'merge_requests') - if 'wiki_blobs' in rule.get('scope'): - search(connection, rule, tf, 'wiki_blobs') - if 'milestones' in rule.get('scope'): - search(connection, rule, tf, 'milestones') - else: - if blobs: - print(colored('Searching blobs', 'magenta')) - for rule in rules_list: - if 'blobs' in rule.get('scope'): - search(connection, rule, tf, 'blobs') - if commits: - print(colored('Searching commits', 'magenta')) - for rule in rules_list: - if 'commits' in rule.get('scope'): - search(connection, rule, tf, 'commits') - if issues: - print(colored('Searching issues', 'magenta')) - for rule in rules_list: - if 'issues' in rule.get('scope'): - search(connection, rule, tf, 'issues') - if merge: - print(colored('Searching merge requests', 'magenta')) - for rule in rules_list: - if 'merge_requests' in rule.get('scope'): - search(connection, rule, tf, 'merge_requests') - if wiki: - print(colored('Searching wiki blobs', 'magenta')) - for rule in rules_list: - if 'wiki_blobs' in rule.get('scope'): - search(connection, rule, tf, 'wiki_blobs') - if milestones: - print(colored('Searching milestones', 'magenta')) - for rule in rules_list: - if 'milestones' in rule.get('scope'): - search(connection, rule, tf, 'milestones') - print(colored('++++++Audit completed++++++', 'green')) - - deinit() - - except Exception as e: - if isinstance(OUTPUT_LOGGER, logger.StdoutLogger): - print = OUTPUT_LOGGER.log_critical - else: - print = builtins.print - - print(colored(e, 'red')) - - -if __name__ == '__main__': - main() diff --git a/gitlab_watchman/__main__.py b/gitlab_watchman/__main__.py deleted file mode 100644 index fb69ced..0000000 --- a/gitlab_watchman/__main__.py +++ /dev/null @@ -1,3 +0,0 @@ -from gitlab_watchman import main - -main() diff --git a/gitlab_watchman/config.py b/gitlab_watchman/config.py deleted file mode 100644 index 5c81af4..0000000 --- a/gitlab_watchman/config.py +++ /dev/null @@ -1,11 +0,0 @@ -import calendar -import time - -# Epoch time for 24 hours -DAY_TIMEFRAME = 86400 -# Epoch time for 30 days -MONTH_TIMEFRAME = 2592000 -# Epoch time for 7 days -WEEK_TIMEFRAME = 604800 -# Epoch time for a very long time -ALL_TIME = calendar.timegm(time.gmtime()) + 1576800000 diff --git a/gitlab_watchman/gitlab_wrapper.py b/gitlab_watchman/gitlab_wrapper.py deleted file mode 100644 index 7066d7c..0000000 --- a/gitlab_watchman/gitlab_wrapper.py +++ /dev/null @@ -1,590 +0,0 @@ -import builtins -import calendar -import json -import os -import re -import time -import requests -import yaml -import multiprocessing -from requests.exceptions import HTTPError -from requests.packages.urllib3.util import Retry -from requests.adapters import HTTPAdapter -from urllib.parse import quote - -import gitlab_watchman.config as cfg -import gitlab_watchman.logger as logger - - -class GitLabAPIClient(object): - - def __init__(self, token, base_url): - self.token = token - self.base_url = base_url.rstrip('\\') - self.per_page = 100 - self.session = session = requests.session() - session.mount(self.base_url, HTTPAdapter(max_retries=Retry(connect=3, backoff_factor=1))) - session.headers.update({'Authorization': 'Bearer {}'.format(self.token)}) - - def make_request(self, url, params=None, data=None, method='GET', verify_ssl=True): - try: - relative_url = '/'.join((self.base_url, 'api/v4', url)) - response = self.session.request(method, relative_url, params=params, data=data, verify=verify_ssl) - response.raise_for_status() - - return response - - except HTTPError as http_error: - if response.status_code == 400: - if response.json().get('message').get('error'): - raise Exception(response.json().get('message').get('error')) - else: - raise http_error - elif response.status_code == 502 or response.status_code == 500: - print('Retrying...') - time.sleep(30) - response = self.session.request(method, relative_url, params=params, data=data, verify=verify_ssl) - response.raise_for_status() - - return response - elif response.status_code == 429: - print('Rate limit hit, cooling off...') - time.sleep(30) - response = self.session.request(method, relative_url, params=params, data=data, verify=verify_ssl) - response.raise_for_status() - - return response - else: - raise http_error - - except Exception as e: - print(e) - - def get_user_by_id(self, user_id): - return self.make_request('users/{}'.format(user_id)).json() - - def get_user_by_username(self, username): - return self.make_request('users?username={}'.format(username)).json() - - def get_token_user(self): - return self.make_request('user').json() - - def get_licence_info(self): - return self.make_request('license').json() - - def get_project(self, project_id): - return self.make_request('projects/{}'.format(project_id)).json() - - def get_variables(self, project_id): - return self.make_request('projects/{}/variables'.format(project_id)).json() - - def get_project_members(self, project_id): - return self.make_request('projects/{}/members'.format(project_id)).json() - - def get_file(self, project_id, path, ref): - path = ''.join((quote(path, safe=''), '?ref=', ref)) - return self.make_request('projects/{}/repository/files/{}'.format(project_id, path)).json() - - def get_group_members(self, project_id): - return self.make_request('groups/{}/members'.format(project_id)).json() - - def get_commit(self, project_id, commit_id): - return self.make_request('projects/{}/repository/commits/{}'.format(project_id, commit_id)).json() - - def global_search(self, url, search_term='', search_scope=''): - - results = [] - page = 1 - params = { - 'scope': search_scope, - 'search': search_term, - 'per_page': self.per_page, - 'page': '' - } - - response = self.make_request(url, params=params) - page_count = response.headers.get('X-Total-Pages') - - if page_count: - while page <= int(page_count): - params = { - 'scope': search_scope, - 'search': search_term, - 'per_page': self.per_page, - 'page': page - } - r = self.make_request(url, params=params).json() - for value in r: - results.append(value) - page += 1 - else: - params = { - 'scope': search_scope, - 'search': search_term, - } - r = self.make_request(url, params=params).json() - for value in r: - results.append(value) - - return results - - def get_all_projects(self): - """Get all public projects. Uses keyset pagination, which currently - is only available for the Projects resource in the GitLab API""" - - results = [] - - params = { - 'pagination': 'keyset', - 'per_page': self.per_page, - 'order_by': 'id', - 'sort': 'asc' - } - - response = self.make_request('projects', params=params) - while 'link' in response.headers: - next_url = response.headers.get('link') - params = { - 'pagination': 'keyset', - 'per_page': self.per_page, - 'order_by': 'id', - 'sort': 'asc', - 'id_after': next_url.split('id_after=')[1].split('&')[0] - } - response = self.make_request('projects', params=params) - for value in response.json(): - results.append(value) - - return results - - -def initiate_gitlab_connection(): - """Create a GitLab API client object""" - - try: - token = os.environ['GITLAB_WATCHMAN_TOKEN'] - except KeyError: - with open('{}/watchman.conf'.format(os.path.expanduser('~'))) as yaml_file: - config = yaml.safe_load(yaml_file) - - token = config.get('gitlab_watchman').get('token') - - try: - url = os.environ['GITLAB_WATCHMAN_URL'] - except KeyError: - with open('{}/watchman.conf'.format(os.path.expanduser('~'))) as yaml_file: - config = yaml.safe_load(yaml_file) - - url = config.get('gitlab_watchman').get('url') - - return GitLabAPIClient(token, url) - - -def convert_time(timestamp): - """Convert ISO 8601 timestamp to epoch """ - - pattern = '%Y-%m-%dT%H:%M:%S.%f%z' - return int(time.mktime(time.strptime(timestamp, pattern))) - - -def deduplicate(input_list): - """Removes duplicates where results are returned by multiple queries""" - - list_of_strings = [json.dumps(d) for d in input_list] - list_of_strings = set(list_of_strings) - return [json.loads(s) for s in list_of_strings] - - -def split_to_chunks(input_list, no_of_chunks): - """Split the input list into n amount of chunks""" - - return (input_list[i::no_of_chunks] for i in range(no_of_chunks)) - - -def find_group_owners(group_members): - """Return all users who are both active and group Owners""" - - member_list = [] - for user in group_members: - if user.get('state') == 'active' and user.get('access_level') == 50: - member_list.append({ - 'user_id': user.get('id'), - 'name': user.get('name'), - 'username': user.get('username'), - 'access_level': 'Owner' - }) - - return member_list - - -def find_user_owner(user_list): - """Return user who owns a namespace""" - - owner_list = [] - for user in user_list: - owner_list.append({ - 'user_id': user.get('id'), - 'name': user.get('name'), - 'username': user.get('username'), - 'state': user.get('state') - }) - - return owner_list - - -def search(gitlab: GitLabAPIClient, log_handler, rule, scope, timeframe=cfg.ALL_TIME): - """Uses the Search API to get search results for the given scope. These results are then split into (No of cores - - 1) number of chunks, and Multiprocessing is then used to concurrently filter them against Regex using the relevant - worker function """ - - results = [] - if isinstance(log_handler, logger.StdoutLogger): - print = log_handler.log_info - else: - print = builtins.print - - for query in rule.get('strings'): - regex = re.compile(rule.get('pattern')) - search_result_list = gitlab.global_search('search', query, search_scope=scope) - print('{} {} found matching search term: {}'.format(len(search_result_list), scope, query.replace('"', ''))) - result = multiprocessing.Manager().list() - - chunks = multiprocessing.cpu_count() - 1 - list_of_chunks = split_to_chunks(search_result_list, chunks) - - processes = [] - - if scope == 'blobs': - target = blob_worker - elif scope == 'wiki_blobs': - target = wiki_blob_worker - elif scope == 'commits': - target = commit_worker - elif scope == 'issues': - target = issue_worker - elif scope == 'milestones': - target = milestone_worker - else: - target = merge_request_worker - - for search_list in list_of_chunks: - p = multiprocessing.Process(target=target, - args=(gitlab, search_list, regex, timeframe, result)) - processes.append(p) - p.start() - - for process in processes: - process.join() - - results.append(list(result)) - - if results: - results = deduplicate([item for sublist in results for item in sublist]) - print('{} total matches found after filtering'.format(len(results))) - return results - else: - print('No matches found after filtering') - - -def blob_worker(gitlab, blob_list, regex, timeframe, results): - """Worker function for multiprocessing search of blobs. Outputs a list of matches formatted as dicts - ready for logging """ - - now = calendar.timegm(time.gmtime()) - for blob in blob_list: - project = gitlab.get_project(blob.get('project_id')) - file = gitlab.get_file(blob.get('project_id'), blob.get('path'), blob.get('ref')) - if file: - commit = gitlab.get_commit(blob.get('project_id'), file.get('last_commit_id')) - if convert_time(commit.get('committed_date')) > (now - timeframe) and regex.search(str(blob.get('data'))): - results_dict = { - 'blob_id': blob.get('id'), - 'basename': blob.get('basename'), - 'data': blob.get('data'), - 'path': blob.get('path'), - 'ref_branch': blob.get('ref'), - 'commited_date': commit.get('committed_date'), - 'match_string': regex.search(str(blob.get('data'))).group(0), - 'project': { - 'project_url': project.get('web_url'), - 'project_id': blob.get('project_id'), - 'project_name': project.get('name'), - 'last_activity_at': project.get('last_activity_at'), - 'namespace': { - 'namespace_id': project.get('namespace').get('id'), - 'name': project.get('namespace').get('name'), - 'kind': project.get('namespace').get('kind'), - 'full_path': project.get('namespace').get('full_path'), - 'parent_id': project.get('namespace').get('parent_id'), - 'web_url': project.get('namespace').get('web_url') - } - } - } - if project.get('namespace').get('kind') == 'group': - group_members = gitlab.get_group_members(project.get('namespace').get('id')) - owners = find_group_owners(group_members) - if owners: - results_dict['project']['namespace']['members'] = owners - elif project.get('namespace').get('kind') == 'user': - namespace_user = gitlab.get_user_by_username(project.get('namespace').get('full_path')) - user = find_user_owner(namespace_user) - if user: - results_dict['project']['namespace']['owner'] = user - - results.append(results_dict) - - return results - - -def wiki_blob_worker(gitlab, blob_list, regex, timeframe, results): - """Worker function for multiprocessing search of wiki blobs. Outputs a list of matches formatted as dicts - ready for logging """ - - now = calendar.timegm(time.gmtime()) - for blob in blob_list: - project = gitlab.get_project(blob.get('project_id')) - if convert_time(project.get('last_activity_at')) > (now - timeframe) and regex.search(str(blob.get('data'))): - results_dict = { - 'wiki_blob_id': blob.get('id'), - 'basename': blob.get('basename'), - 'data': blob.get('data'), - 'path': blob.get('path'), - 'match_string': regex.search(str(blob.get('data'))).group(0), - 'project': { - 'project_url': project.get('web_url'), - 'project_id': blob.get('project_id'), - 'project_name': project.get('name'), - 'last_activity_at': project.get('last_activity_at'), - 'namespace': { - 'namespace_id': project.get('namespace').get('id'), - 'name': project.get('namespace').get('name'), - 'kind': project.get('namespace').get('kind'), - 'full_path': project.get('namespace').get('full_path'), - 'parent_id': project.get('namespace').get('parent_id'), - 'web_url': project.get('namespace').get('web_url') - } - } - } - if project.get('namespace').get('kind') == 'group': - group_members = gitlab.get_group_members(project.get('namespace').get('id')) - owners = find_group_owners(group_members) - if owners: - results_dict['project']['namespace']['members'] = owners - elif project.get('namespace').get('kind') == 'user': - namespace_user = gitlab.get_user_by_username(project.get('namespace').get('full_path')) - user = find_user_owner(namespace_user) - if user: - results_dict['project']['namespace']['owner'] = user - - results.append(results_dict) - - return results - - -def commit_worker(gitlab, commit_list, regex, timeframe, results): - """Worker function for multiprocessing search of commits. Outputs a list of matches formatted as dicts - ready for logging """ - - now = calendar.timegm(time.gmtime()) - for commit in commit_list: - if convert_time(commit.get('committed_date')) > (now - timeframe) and regex.search(str(commit.get('message'))): - project = gitlab.get_project(commit.get('project_id')) - results_dict = { - 'commit_id': commit.get('id'), - 'title': commit.get('title'), - 'data': commit.get('message'), - 'committed_date': commit.get('committed_date'), - 'committer_name': commit.get('committer_name'), - 'committer_email': commit.get('committer_email'), - 'match_string': regex.search(str(commit.get('message'))).group(0), - 'project': { - 'project_url': project.get('web_url'), - 'project_id': commit.get('project_id'), - 'project_name': project.get('name'), - 'last_activity_at': project.get('last_activity_at'), - 'namespace': { - 'namespace_id': project.get('namespace').get('id'), - 'name': project.get('namespace').get('name'), - 'kind': project.get('namespace').get('kind'), - 'full_path': project.get('namespace').get('full_path'), - 'parent_id': project.get('namespace').get('parent_id'), - 'web_url': project.get('namespace').get('web_url') - } - } - } - if project.get('namespace').get('kind') == 'group': - group_members = gitlab.get_group_members(project.get('namespace').get('id')) - owners = find_group_owners(group_members) - if owners: - results_dict['project']['namespace']['members'] = owners - elif project.get('namespace').get('kind') == 'user': - namespace_user = gitlab.get_user_by_username(project.get('namespace').get('full_path')) - user = find_user_owner(namespace_user) - if user: - results_dict['project']['namespace']['owner'] = user - - results.append(results_dict) - - return results - - -def issue_worker(gitlab, issue_list, regex, timeframe, results): - """Worker function for multiprocessing search of issues. Outputs a list of matches formatted as dicts - ready for logging """ - - now = calendar.timegm(time.gmtime()) - for issue in issue_list: - if convert_time(issue.get('updated_at')) > (now - timeframe) and regex.search(str(issue.get('description'))): - project = gitlab.get_project(issue.get('project_id')) - results_dict = { - 'issue_id': issue.get('id'), - 'title': issue.get('title'), - 'description': issue.get('description'), - 'web_url': issue.get('web_url'), - 'state': issue.get('state'), - 'created_at': issue.get('created_at'), - 'updated_at': issue.get('updated_at'), - 'closed_at': issue.get('closed_at'), - 'author_id': issue.get('author').get('id'), - 'author_username': issue.get('author').get('username'), - 'due_date': issue.get('due_date'), - 'confidential': issue.get('confidential'), - 'match_string': regex.search(str(issue.get('description'))).group(0), - 'project': { - 'project_url': project.get('web_url'), - 'project_id': issue.get('project_id'), - 'project_name': project.get('name'), - 'last_activity_at': project.get('last_activity_at'), - 'namespace': { - 'namespace_id': project.get('namespace').get('id'), - 'name': project.get('namespace').get('name'), - 'kind': project.get('namespace').get('kind'), - 'full_path': project.get('namespace').get('full_path'), - 'parent_id': project.get('namespace').get('parent_id'), - 'web_url': project.get('namespace').get('web_url') - } - } - } - if issue.get('assignee'): - results_dict['assignee_id'] = issue.get('assignee').get('id') - results_dict['assignee_username'] = issue.get('assignee').get('username') - - if project.get('namespace').get('kind') == 'group': - group_members = gitlab.get_group_members(project.get('namespace').get('id')) - owners = find_group_owners(group_members) - if owners: - results_dict['project']['namespace']['members'] = owners - elif project.get('namespace').get('kind') == 'user': - namespace_user = gitlab.get_user_by_username(project.get('namespace').get('full_path')) - user = find_user_owner(namespace_user) - if user: - results_dict['project']['namespace']['owner'] = user - - results.append(results_dict) - - return results - - -def milestone_worker(gitlab, milestone_list, regex, timeframe, results): - """Worker function for multiprocessing search of milestones. Outputs a list of matches formatted as dicts - ready for logging """ - - now = calendar.timegm(time.gmtime()) - for milestone in milestone_list: - if convert_time(milestone.get('updated_at')) > (now - timeframe) and regex.search( - str(milestone.get('description'))): - project = gitlab.get_project(milestone.get('project_id')) - results_dict = { - 'milestone_id': milestone.get('id'), - 'title': milestone.get('title'), - 'description': milestone.get('description'), - 'created_at': milestone.get('created_at'), - 'updated_at': milestone.get('updated_at'), - 'due_date': milestone.get('due_date'), - 'start_date': milestone.get('start_date'), - 'match_string': regex.search(str(milestone.get('description'))).group(0), - 'project': { - 'project_url': project.get('web_url'), - 'project_id': milestone.get('project_id'), - 'project_name': project.get('name'), - 'last_activity_at': project.get('last_activity_at'), - 'namespace': { - 'namespace_id': project.get('namespace').get('id'), - 'name': project.get('namespace').get('name'), - 'kind': project.get('namespace').get('kind'), - 'full_path': project.get('namespace').get('full_path'), - 'parent_id': project.get('namespace').get('parent_id'), - 'web_url': project.get('namespace').get('web_url') - } - } - } - if project.get('namespace').get('kind') == 'group': - group_members = gitlab.get_group_members(project.get('namespace').get('id')) - owners = find_group_owners(group_members) - if owners: - results_dict['project']['namespace']['members'] = owners - elif project.get('namespace').get('kind') == 'user': - namespace_user = gitlab.get_user_by_username(project.get('namespace').get('full_path')) - user = find_user_owner(namespace_user) - if user: - results_dict['project']['namespace']['owner'] = user - - results.append(results_dict) - - return results - - -def merge_request_worker(gitlab, merge_request_list, regex, timeframe, results): - """Worker function for multiprocessing search of merge requests. Outputs a list of matches formatted as dicts - ready for logging """ - - now = calendar.timegm(time.gmtime()) - for merge_request in merge_request_list: - if convert_time(merge_request.get('updated_at')) > (now - timeframe) and \ - regex.search(str(merge_request.get('description'))): - project = gitlab.get_project(merge_request.get('project_id')) - results_dict = { - 'merge_request_id': merge_request.get('id'), - 'title': merge_request.get('title'), - 'description': merge_request.get('description'), - 'state': merge_request.get('state'), - 'created_at': merge_request.get('created_at'), - 'updated_at': merge_request.get('updated_at'), - 'author_id': merge_request.get('author').get('id'), - 'author_username': merge_request.get('author').get('username'), - 'merge_status': merge_request.get('merge_status'), - 'url': merge_request.get('url'), - 'match_string': regex.search(str(merge_request.get('description'))).group(0), - 'project': { - 'project_url': project.get('web_url'), - 'project_id': merge_request.get('project_id'), - 'project_name': project.get('name'), - 'last_activity_at': project.get('last_activity_at'), - 'namespace': { - 'namespace_id': project.get('namespace').get('id'), - 'name': project.get('namespace').get('name'), - 'kind': project.get('namespace').get('kind'), - 'full_path': project.get('namespace').get('full_path'), - 'parent_id': project.get('namespace').get('parent_id'), - 'web_url': project.get('namespace').get('web_url') - } - } - } - if merge_request.get('assignee'): - results_dict['assignee_id'] = merge_request.get('assignee').get('id') - results_dict['assignee_username'] = merge_request.get('assignee').get('username') - - if project.get('namespace').get('kind') == 'group': - group_members = gitlab.get_group_members(project.get('namespace').get('id')) - owners = find_group_owners(group_members) - if owners: - results_dict['project']['namespace']['members'] = owners - elif project.get('namespace').get('kind') == 'user': - namespace_user = gitlab.get_user_by_username(project.get('namespace').get('full_path')) - user = find_user_owner(namespace_user) - if user: - results_dict['project']['namespace']['owner'] = user - - results.append(results_dict) - - return results \ No newline at end of file diff --git a/gitlab_watchman/logger.py b/gitlab_watchman/logger.py deleted file mode 100644 index 24aa9df..0000000 --- a/gitlab_watchman/logger.py +++ /dev/null @@ -1,115 +0,0 @@ -import json -import os -import logging -import socket -import sys -import logging.handlers -from datetime import datetime -from logging import Logger - - -class LoggingBase(Logger): - def __init__(self, name='GitLab Watchman'): - super().__init__(name) - self.notify_format = logging.Formatter( - '{"localtime": "%(asctime)s", "level": "NOTIFY", "source": "%(name)s", "scope": "%(scope)s",' - ' "severity": "%(severity)s", "detection_type": "%(type)s", "detection_data": %(message)s}') - self.info_format = logging.Formatter( - '{"localtime": "%(asctime)s", "level": "%(levelname)s", "source": "%(name)s", "message":' - ' "%(message)s"}') - self.log_path = '' - self.logger = logging.getLogger(self.name) - self.logger.setLevel(logging.DEBUG) - - -class FileLogger(LoggingBase): - def __init__(self, log_path): - LoggingBase.__init__(self) - self.handler = logging.handlers.WatchedFileHandler(os.path.join(log_path, 'gitlab_watchman.log')) - self.logger.addHandler(self.handler) - - def log_notification(self, log_data, scope, detect_type, severity): - self.handler.setFormatter(self.notify_format) - self.logger.warning(json.dumps(log_data), extra={ - 'scope': scope, - 'type': detect_type, - 'severity': severity - }) - - def log_info(self, log_data): - self.handler.setFormatter(self.info_format) - self.logger.info(log_data) - - def log_critical(self, log_data): - self.handler.setFormatter(self.info_format) - self.logger.critical(log_data) - - -class StdoutLogger(LoggingBase): - def __init__(self): - LoggingBase.__init__(self) - self.handler = logging.StreamHandler(sys.stdout) - self.logger.addHandler(self.handler) - - def log_notification(self, log_data, scope, detect_type, severity): - self.handler.setFormatter(self.notify_format) - self.logger.warning(json.dumps(log_data), extra={ - 'scope': scope, - 'type': detect_type, - 'severity': severity - }) - - def log_info(self, log_data): - self.handler.setFormatter(self.info_format) - self.logger.info(log_data) - - def log_critical(self, log_data): - self.handler.setFormatter(self.info_format) - self.logger.critical(log_data) - - -class SocketJSONLogger(object): - def __init__(self, host, port): - self.host = host - self.port = port - self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - self.sock.connect((self.host, self.port)) - except socket.error as error: - print(error) - - def send(self, data): - try: - self.sock.sendall(bytes(data, encoding="utf-8")) - except Exception as e: - print(e) - - def log_notification(self, log_data, scope, detect_type, severity): - message = json.dumps({ - 'localtime': datetime.now().strftime('%Y-%m-%d %H:%M:%S,%f'), - 'level': 'NOTIFY', - 'source': 'GitLab Watchman', - 'scope': scope, - 'severity': severity, - 'detection_type': detect_type, - 'detection_data': log_data - }) + '\n' - self.send(message) - - def log_info(self, log_data): - message = json.dumps({ - 'localtime': datetime.now().strftime('%Y-%m-%d %H:%M:%S,%f'), - 'level': 'INFO', - 'source': 'GitLab Watchman', - 'message': log_data - }) + '\n' - self.send(message) - - def log_critical(self, log_data): - message = json.dumps({ - 'localtime': datetime.now().strftime('%Y-%m-%d %H:%M:%S,%f'), - 'level': 'CRITICAL', - 'source': 'GitLab Watchman', - 'message': log_data - }) + '\n' - self.send(message) diff --git a/gitlab_watchman/rules/files/interesting_files.yaml b/gitlab_watchman/rules/files/interesting_files.yaml deleted file mode 100644 index 3ba4b20..0000000 --- a/gitlab_watchman/rules/files/interesting_files.yaml +++ /dev/null @@ -1,41 +0,0 @@ ---- -filename: interesting_files.yaml -enabled: true -meta: - name: Interesting Potentially Sensitive Files - author: PaperMtn - date: '2020-08-14' - description: Detects other misc potentially sensitive files that have been exposed. - Pattern left blank to return every hit - severity: '70' -scope: -- blobs -test_cases: - match_cases: - - blank - fail_cases: - - blank -strings: -- "* extension:openvpn" -- "* extension:ovpn" -- "* extension:cscfg" -- "* extension:rdp" -- "* extension:jks" -- "* extension:psafe3" -- "* extension:agilekeychain" -- "* extension:keychain" -- "* extension:kwallet" -- "* extension:tblk" -- "* filename:otr.private_key" -- "* filename:secret_token.rb" -- "* filename:publish_over_ssh.BapSshPublisherPlugin.xml" -- "* filename:credentials.xml" -- "* filename:knife.rb" -- "* filename:filezilla.xml" -- "* filename:terraform.tfvars" -- "* extension:tfvars" -- "* extension:htpasswd" -- "* extension:trc" -- "* filename:.ssh" -- "* filename:.chef/*.pem" -pattern: '' diff --git a/gitlab_watchman/rules/passwords.yaml b/gitlab_watchman/rules/passwords.yaml deleted file mode 100644 index 3a94cce..0000000 --- a/gitlab_watchman/rules/passwords.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -filename: passwords.yaml -enabled: true -meta: - name: Passwords - author: PaperMtn - date: '2020-08-14' - description: Detects potentially exposed passwords in plaintext - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - 'Password: WeakPassword123' - - The password is WeakPassword123 - fail_cases: - - You need a strong password -strings: -- '"password:" -(svg|png|jpeg|jpg)' -- '"password is" -(svg|png|jpeg|jpg)' -- '"passwd" -(svg|png|jpeg|jpg)' -pattern: (?i)(password\s*[`=:\"]+\s*[^\s]+|password is\s*[`=:\"]*\s*[^\s]+|pwd\s*[`=:\"]*\s*[^\s]+|passwd\s*[`=:\"]+\s*[^\s]+) diff --git a/gitlab_watchman/rules/tokens/facebook_access_tokens.yaml b/gitlab_watchman/rules/tokens/facebook_access_tokens.yaml deleted file mode 100644 index 97ec80c..0000000 --- a/gitlab_watchman/rules/tokens/facebook_access_tokens.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -filename: facebook_access_tokens.yaml -enabled: true -meta: - name: Facebook Access Tokens - author: PaperMtn - date: '2020-12-12' - description: Detects exposed Facebook API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - 'EAACEdEose0cBATestAccessCodeForFaceb00k' - fail_cases: - - 'EABACEdEose0cBATestAccessCodeForFaceb00k' -strings: -- graph.facebook.com -(svg|png|jpeg) -- facebook.com/dialog/oauth -- eaaced -(svg|png|jpeg) -- facebook_* -(svg|png|jpeg) -pattern: 'EAACEdEose0cBA[0-9A-Za-z]+' diff --git a/gitlab_watchman/rules/tokens/facebook_secret_tokens.yaml b/gitlab_watchman/rules/tokens/facebook_secret_tokens.yaml deleted file mode 100644 index 0016e9b..0000000 --- a/gitlab_watchman/rules/tokens/facebook_secret_tokens.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -filename: facebook_secret_tokens.yaml -enabled: true -meta: - name: Facebook Secret Tokens - author: PaperMtn - date: '2020-12-12' - description: Detects exposed Facebook API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - 'facebook.com client_secret: 111111111111111111111111111111111' - fail_cases: - - 'client_secret: 111111111111111111111111111111111' -strings: -- graph.facebook.com -(svg|png|jpeg) -- facebook.com/dialog/oauth -- eaaced -(svg|png|jpeg) -- facebook_* -(svg|png|jpeg) -pattern: '[f|F][a|A][c|C][e|E][b|B][o|O][o|O][k|K].*[0-9a-f]{32}' diff --git a/gitlab_watchman/rules/tokens/github_api_tokens.yaml b/gitlab_watchman/rules/tokens/github_api_tokens.yaml deleted file mode 100644 index 96b3d19..0000000 --- a/gitlab_watchman/rules/tokens/github_api_tokens.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- -filename: github_api_tokens.yaml -enabled: true -meta: - name: GitHub API Tokens - author: PaperMtn - date: '2020-12-12' - description: Detects exposed GitHub API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - 'https://github.com/login/oauth/authorize/$access_token=abcabc123123abcabc123123&token_type=bearer' - fail_cases: - - 'GET https://github.com/login/oauth/authorize' -strings: -- api.github.com -(svg|png|jpeg) -- github.com/login/oauth/ -- github_* -(svg|png|jpeg) -pattern: '[0-9a-zA-Z]{20,40}' diff --git a/gitlab_watchman/rules/tokens/mailgun_api_tokens.yaml b/gitlab_watchman/rules/tokens/mailgun_api_tokens.yaml deleted file mode 100644 index 3301071..0000000 --- a/gitlab_watchman/rules/tokens/mailgun_api_tokens.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- -filename: mailgun_api_tokens.yaml -enabled: true -meta: - name: Mailgun API Tokens - author: PaperMtn - date: '2020-12-17' - description: Detects exposed Mailgun API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - https://api:key-62833bf8c07c531abxxxxxxxxxxa6678@api.mailgun.net/ - fail_cases: - - https://api:62833bf8c07c531abxxxxxxxxxxa6678@api.mailgun.net/ -strings: -- api.mailgun.net -- mailgun_* -pattern: 'key-[0-9a-zA-Z]{32}' diff --git a/gitlab_watchman/rules/tokens/misc_private_keys.yaml b/gitlab_watchman/rules/tokens/misc_private_keys.yaml deleted file mode 100644 index 5c8662e..0000000 --- a/gitlab_watchman/rules/tokens/misc_private_keys.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -filename: misc_private_keys.yaml -enabled: true -meta: - name: Miscelaneous Private Keys - author: PaperMtn - date: '2020-08-14' - description: Detects miscelaneous private keys that arent covered in other rules - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - '"private_key": "-----BEGIN PRIVATE KEY-----AABBCCDDEEFFGGHHIIJJKK=\n-----END - PRIVATE KEY-----\n' - fail_cases: - - '"private_key": $PRIVATE_KEY_FILE' -strings: -- '"BEGIN DSA PRIVATE"' -- '"BEGIN EC PRIVATE"' -- private -RSA extension:key -pattern: "(?s)(-----BEGIN .+?-----)\\S{0,}" diff --git a/gitlab_watchman/rules/tokens/ms_nuget_key.yaml b/gitlab_watchman/rules/tokens/ms_nuget_key.yaml deleted file mode 100644 index de82c29..0000000 --- a/gitlab_watchman/rules/tokens/ms_nuget_key.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- -filename: ms_nuget_key.yaml -enabled: true -meta: - name: Microsoft NuGet Keys - author: PaperMtn - date: '2020-12-24' - description: Detects exposed Microsoft NuGet Keys - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - nuget setapikey 4003d786-aaaa-bbbb-cccc-c4f3e8ef9b3a - fail_cases: - - nuget setapikey 4003d786-xxxx-bbbb-cccc-c4f3e8ef9b3a -strings: -- api.nuget.org -- nuget_* -- nuget -pattern: '[n|N][u|U][g|G][e|E][t|T].{0,80}[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' diff --git a/gitlab_watchman/rules/tokens/pgp_private_keys.yaml b/gitlab_watchman/rules/tokens/pgp_private_keys.yaml deleted file mode 100644 index 69b4afc..0000000 --- a/gitlab_watchman/rules/tokens/pgp_private_keys.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- -filename: pgp_private_keys.yaml -enabled: true -meta: - name: PGP Private Keys - author: PaperMtn - date: '2020-08-14' - description: Detects exposed PGP private keys - severity: '90' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - '"private_key": "-----BEGIN PRIVATE KEY-----AABBCCDDEEFFGGHHIIJJKK=\n-----END - PRIVATE KEY-----\n' - fail_cases: - - '"private_key": $PRIVATE_KEY_FILE' -strings: -- '"BEGIN PGP PRIVATE KEY BLOCK"' -pattern: (?s)(-----BEGIN .+?-----)\\S{0,} diff --git a/gitlab_watchman/rules/tokens/slack_api_tokens.yaml b/gitlab_watchman/rules/tokens/slack_api_tokens.yaml deleted file mode 100644 index 0a1f09f..0000000 --- a/gitlab_watchman/rules/tokens/slack_api_tokens.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -filename: slack_api_tokens.yaml -enabled: true -meta: - name: Slack API Tokens - author: PaperMtn - date: '2020-08-14' - description: Detects exposed Slack API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - API_KEY = xoxb-11111111111-a1a1a1a1a1a1a1a1a1a1a1a1 - fail_cases: - - API_KEY = xoxb_slack_key12 -strings: -- 'xoxb -(svg|png|jpeg|jpg)' -- 'xoxa -(svg|png|jpeg|jpg)' -- 'xoxp -(svg|png|jpeg|jpg)' -- 'xoxr -(svg|png|jpeg|jpg)' -- 'xoxs -(svg|png|jpeg|jpg)' -pattern: xox[baprs]([0-9a-zA-Z-]{10,72}) diff --git a/gitlab_watchman/rules/tokens/slack_webhooks.yaml b/gitlab_watchman/rules/tokens/slack_webhooks.yaml deleted file mode 100644 index 7fe6d13..0000000 --- a/gitlab_watchman/rules/tokens/slack_webhooks.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- -filename: slack_webhooks.yaml -enabled: true -meta: - name: Slack Webhooks - author: PaperMtns - date: '2020-08-14' - description: Detects exposed Slack webhooks - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - https://hooks.slack.com/services/T01010101/Babababab/aabab1212abab1212abababa - fail_cases: - - https://hooks.slack.com/ - - http://hooks.slack.com/ -strings: -- https://hooks.slack.com/ -pattern: https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24} diff --git a/gitlab_watchman/rules/tokens/stripe_api_tokens.yaml b/gitlab_watchman/rules/tokens/stripe_api_tokens.yaml deleted file mode 100644 index d3daa8f..0000000 --- a/gitlab_watchman/rules/tokens/stripe_api_tokens.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -filename: stripe_api_tokens.yaml -enabled: true -meta: - name: Stripe API Tokens - author: PaperMtn - date: '2020-12-17' - description: Detects exposed Stripe API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - sk_live_aaaaaarmZltoKlnSZMaaaaaa - - rk_live_aaaaaarmZltoKlnSZMaaaaaa - - sk_test_aaaaaarmZltoKlnSZMaaaaaa - fail_cases: - - uk_live_aaaaaarmZltoKlnSZMaaaaaa -strings: -- api.stripe.com -- stripe_* -(svg|png|jpeg) -pattern: '[s|r]k_(live|test)_[0-9a-zA-Z]{24}' diff --git a/gitlab_watchman/rules/tokens/twitter_api_tokens.yaml b/gitlab_watchman/rules/tokens/twitter_api_tokens.yaml deleted file mode 100644 index ceaf155..0000000 --- a/gitlab_watchman/rules/tokens/twitter_api_tokens.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -filename: twitter_api_tokens.yaml -enabled: true -meta: - name: Twitter API Tokens - author: PaperMtn - date: '2020-12-12' - description: Detects exposed Twitter API tokens - severity: '70' -scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests -test_cases: - match_cases: - - 'https://api.twitter.com/oauth/authorize?oauth_token=NPabcdefg0yU5T3abcdefg7iCotZ3cnetKwcabcdefg' - - 'twitter NPabcdefg0yU5T3abcdefg7iCotZ3cnetKwcabcdefg' - fail_cases: - - 'https://api.twitter.com/oauth/authorize?oauth_token=NPcudxy0yU5T3tBzho7iswcTIRlX0iwRl0' -strings: -- api.twitter.com -(svg|png|jpeg) -- twitter_* -(svg|png|jpeg) -- oauth_token_secret -(svg|png|jpeg) -pattern: 'api\.twitter\.com\/oauth.*[0-9a-zA-Z]{35,44}|api\.twitter\.com\/oauth.*[1-9][0-9]+-[0-9a-zA-Z]{40}|([t|T][w|W][i|I][t|T][t|T][e|E][r|R]|oauth_token).*[0-9a-zA-Z]{35,44}' diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..374b58c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..33f05f7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +PyYAML diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2f340ff --- /dev/null +++ b/setup.cfg @@ -0,0 +1,39 @@ +[metadata] +name = gitlab-watchman +version = attr: gitlab_watchman.__version__.__version__ +description = Monitoring GitLab for sensitive data shared publicly +long_description = file: README.md, CHANGELOG.md +license = GPL-3.0 +url = https://github.com/PaperMtn/gitlab-watchman +author = PaperMtn +author_email = papermtn@protonmail.com +keywords = audit, gitlab, gitlab-watchman, watchman, blue-team, red-team, threat-hunting +classifiers = + Intended Audience :: Information Technology + Topic :: Security + License :: OSI Approved :: GNU General Public License v3 (GPLv3) + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + +[options] +zip_safe = False +package_dir = + = src +include_package_data = True +packages = find: +python_requires = >=3.6 +install_requires = + requests + PyYAML + +[options.package_data] +* = *.yml, *.yaml + +[options.entry_points] +console_scripts = + gitlab-watchman = gitlab_watchman:main + +[options.packages.find] +where = src diff --git a/setup.py b/setup.py deleted file mode 100644 index 4f0fb7d..0000000 --- a/setup.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -import gitlab_watchman.__about__ as a -from setuptools import setup - -with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.md')) as f: - README = f.read() - -setup( - name='gitlab-watchman', - version=a.__version__, - url=a.__uri__, - license=a.__license__, - classifiers=[ - 'Intended Audience :: Information Technology', - 'Topic :: Security', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ], - author=a.__author__, - author_email=a.__email__, - long_description=README, - long_description_content_type='text/markdown', - description=a.__summary__, - install_requires=[ - 'requests', - 'colorama', - 'termcolor', - 'PyYAML', - ], - packages=['gitlab_watchman'], - include_package_data=True, - package_data={ - "": ["*.yml", "*.yaml"], - }, - entry_points={ - 'console_scripts': ['gitlab-watchman=gitlab_watchman:main'] - } -) diff --git a/src/gitlab_watchman/__init__.py b/src/gitlab_watchman/__init__.py new file mode 100644 index 0000000..fcc87f9 --- /dev/null +++ b/src/gitlab_watchman/__init__.py @@ -0,0 +1,222 @@ +import argparse +import calendar +import os +import time +from pathlib import Path +from datetime import date + +from . import gitlab_wrapper +from . import __version__ +from . import logger as logger +from . import signature + +SIGNATURE_PATH = (Path(__file__).parents[1] / 'signatures').resolve() +OUTPUT_LOGGER = logger.StdoutLogger + + +def validate_env_vars() -> bool: + """ Check the required environment variables have been set + + Returns: + True if variables have been set + """ + + if os.environ.get('GITLAB_WATCHMAN_TOKEN') and os.environ.get('GITLAB_WATCHMAN_URL'): + return True + else: + return False + + +def search(gitlab_connection: gitlab_wrapper.GitLabAPIClient, + sig: signature.Signature, + tf: int, + scope: str): + """ Use the appropriate search function to search GitLab based on the contents + of the signature file. Output results to stdout + + Args: + gitlab_connection: GitLab API object + sig: Signature object + tf: Timeframe to search for + scope: What sort of GitLab objects to search + """ + + try: + OUTPUT_LOGGER.log_info(f'Searching for {sig.meta.name} in {scope}') + + results = gitlab_wrapper.search(gitlab_connection, OUTPUT_LOGGER, sig, scope, tf) + if results: + for log_data in results: + OUTPUT_LOGGER.log_notification(log_data, scope, sig.meta.name, sig.meta.severity) + except Exception as e: + + OUTPUT_LOGGER.log_critical(e) + + +def init_logger() -> logger.StdoutLogger: + """ Create a logger object + + Returns: + Logging object for outputting results + """ + + return logger.StdoutLogger() + + +def load_signatures() -> [signature.Signature]: + """ Load signatures from YAML files + + Returns: + List containing loaded definitions as signatures objects + """ + + loaded_signatures = [] + try: + for root, dirs, files in os.walk(SIGNATURE_PATH): + for sig_file in files: + sig_path = (Path(root) / sig_file).resolve() + if sig_path.name.endswith('.yaml'): + loaded_def = signature.load_from_yaml(sig_path) + if loaded_def.enabled: + loaded_signatures.append(loaded_def) + return loaded_signatures + except Exception as e: + raise e + + +def main(): + global OUTPUT_LOGGER + try: + OUTPUT_LOGGER = init_logger() + parser = argparse.ArgumentParser(description=__version__.__summary__) + required = parser.add_argument_group('required arguments') + required.add_argument('--timeframe', choices=['d', 'w', 'm', 'a'], dest='time', + help='How far back to search: d = 24 hours w = 7 days, m = 30 days, a = all time', + required=True) + parser.add_argument('--version', action='version', + version=f'gitlab-watchman {__version__.__version__}') + parser.add_argument('--all', dest='everything', action='store_true', + help='Find everything') + parser.add_argument('--blobs', dest='blobs', action='store_true', + help='Search code blobs') + parser.add_argument('--commits', dest='commits', action='store_true', + help='Search commits') + parser.add_argument('--wiki-blobs', dest='wiki', action='store_true', + help='Search wiki blobs') + parser.add_argument('--issues', dest='issues', action='store_true', + help='Search issues') + parser.add_argument('--merge-requests', dest='merge', action='store_true', + help='Search merge requests') + parser.add_argument('--milestones', dest='milestones', action='store_true', + help='Search milestones') + parser.add_argument('--notes', dest='notes', action='store_true', + help='Search notes') + parser.add_argument('--snippets', dest='snippets', action='store_true', + help='Search snippets') + + args = parser.parse_args() + tm = args.time + everything = args.everything + blobs = args.blobs + commits = args.commits + wiki = args.wiki + issues = args.issues + merge = args.merge + milestones = args.milestones + notes = args.notes + snippets = args.snippets + + if tm == 'd': + tf = 86400 + elif tm == 'w': + tf = 604800 + elif tm == 'm': + tf = 2592000 + else: + tf = calendar.timegm(time.gmtime()) + 1576800000 + + if not validate_env_vars(): + raise Exception('GITLAB_WATCHMAN_TOKEN environment variable not detected. ' + 'Ensure environment variable is set') + else: + connection = gitlab_wrapper.initiate_gitlab_connection() + + now = int(time.time()) + today = date.today().strftime('%Y-%m-%d') + start_date = time.strftime('%Y-%m-%d', time.localtime(now - tf)) + + OUTPUT_LOGGER.log_info(f'Version: {__version__.__version__}') + OUTPUT_LOGGER.log_info(f'Searching from {start_date} to {today}') + OUTPUT_LOGGER.log_info('Importing signatures...') + sig_list = load_signatures() + OUTPUT_LOGGER.log_info(f'{len(sig_list)} signatures loaded') + + if everything: + OUTPUT_LOGGER.log_info('Getting everything...') + for sig in sig_list: + if 'blobs' in sig.scope: + search(connection, sig, tf, 'blobs') + if 'commits' in sig.scope: + search(connection, sig, tf, 'commits') + if 'issues' in sig.scope: + search(connection, sig, tf, 'issues') + if 'merge_requests' in sig.scope: + search(connection, sig, tf, 'merge_requests') + if 'wiki_blobs' in sig.scope: + search(connection, sig, tf, 'wiki_blobs') + if 'milestones' in sig.scope: + search(connection, sig, tf, 'milestones') + if 'notes' in sig.scope: + search(connection, sig, tf, 'notes') + if 'snippet_titles' in sig.scope: + search(connection, sig, tf, 'snippet_titles') + else: + if blobs: + OUTPUT_LOGGER.log_info('Searching blobs') + for sig in sig_list: + if 'blobs' in sig.scope: + search(connection, sig, tf, 'blobs') + if commits: + OUTPUT_LOGGER.log_info('Searching commits') + for sig in sig_list: + if 'commits' in sig.scope: + search(connection, sig, tf, 'commits') + if issues: + OUTPUT_LOGGER.log_info('Searching issues') + for sig in sig_list: + if 'issues' in sig.scope: + search(connection, sig, tf, 'issues') + if merge: + OUTPUT_LOGGER.log_info('Searching merge requests') + for sig in sig_list: + if 'merge_requests' in sig.scope: + search(connection, sig, tf, 'merge_requests') + if wiki: + OUTPUT_LOGGER.log_info('Searching wiki blobs') + for sig in sig_list: + if 'wiki_blobs' in sig.scope: + search(connection, sig, tf, 'wiki_blobs') + if milestones: + OUTPUT_LOGGER.log_info('Searching milestones') + for sig in sig_list: + if 'milestones' in sig.scope: + search(connection, sig, tf, 'milestones') + if notes: + OUTPUT_LOGGER.log_info('Searching notes') + for sig in sig_list: + if 'notes' in sig.scope: + search(connection, sig, tf, 'notes') + if snippets: + OUTPUT_LOGGER.log_info('Searching snippets') + for sig in sig_list: + if 'snippet_titles' in sig.scope: + search(connection, sig, tf, 'snippet_titles') + + OUTPUT_LOGGER.log_info('++++++Audit completed++++++') + + except Exception as e: + OUTPUT_LOGGER.log_critical(e) + + +if __name__ == '__main__': + main() diff --git a/src/gitlab_watchman/__main__.py b/src/gitlab_watchman/__main__.py new file mode 100644 index 0000000..427963d --- /dev/null +++ b/src/gitlab_watchman/__main__.py @@ -0,0 +1,3 @@ +from . import main + +main() \ No newline at end of file diff --git a/src/gitlab_watchman/__version__.py b/src/gitlab_watchman/__version__.py new file mode 100644 index 0000000..b3d5abb --- /dev/null +++ b/src/gitlab_watchman/__version__.py @@ -0,0 +1,18 @@ +__all__ = [ + '__title__', + '__summary__', + '__uri__', + '__version__', + '__author__', + '__email__', + '__license__', +] + +__title__ = 'GitLab Watchman' +__summary__ = 'Monitoring GitLab for sensitive data shared publicly' +__uri__ = 'https://github.com/PaperMtn/gitlab-watchman' +__version__ = '2.0.0' +__author__ = 'PaperMtn' +__email__ = 'papermtn@protonmail.com' +__license__ = 'GPL-3.0' +__copyright__ = f'2022 {__author__}' diff --git a/src/gitlab_watchman/gitlab_objects/__init__.py b/src/gitlab_watchman/gitlab_objects/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/gitlab_watchman/gitlab_objects/blob.py b/src/gitlab_watchman/gitlab_objects/blob.py new file mode 100644 index 0000000..9f123ab --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/blob.py @@ -0,0 +1,46 @@ +from dataclasses import dataclass + + +@dataclass +class Blob(object): + """ Class that defines Blob objects for GitLab blobs""" + + __slots__ = [ + 'basename', + 'data', + 'path', + 'filename', + 'id', + 'ref', + 'project_id' + ] + + basename: str + data: str + path: str + filename: str + id: str + ref: str + project_id: str + + +def create_from_dict(blob_dict: dict) -> Blob: + """ Create a Blob object from a dict response from the GitLab API + + Args: + blob_dict: dict/JSON format data from GitLab API + Returns: + A new Blob object + """ + + blob_object = Blob( + id=blob_dict.get('id'), + basename=blob_dict.get('basename'), + data=blob_dict.get('data'), + path=blob_dict.get('path'), + filename=blob_dict.get('filename'), + ref=blob_dict.get('ref'), + project_id=blob_dict.get('project_id') + ) + + return blob_object diff --git a/src/gitlab_watchman/gitlab_objects/commit.py b/src/gitlab_watchman/gitlab_objects/commit.py new file mode 100644 index 0000000..74e0016 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/commit.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass + + +@dataclass +class Commit(object): + """ Class that defines File objects for GitLab files""" + + __slots__ = [ + 'id', + 'created_at', + 'title', + 'message', + 'author_name', + 'author_email', + 'authored_date', + 'committer_name', + 'committer_email', + 'committed_date', + 'web_url', + 'status', + 'project_id' + ] + + id: str + created_at: str + title: str + message: str + author_name: str + author_email: str + authored_date: str + committer_name: str + committer_email: str + committed_date: str + web_url: str + status: str + project_id: str + + +def create_from_dict(commit_dict: dict) -> Commit: + """ Create a Commit object from a dict response from the GitLab API + + Args: + commit_dict: dict/JSON format data from GitLab API + Returns: + A new Note object + """ + + commit_object = Commit( + id=commit_dict.get('id'), + created_at=commit_dict.get('created_at'), + title=commit_dict.get('title'), + message=commit_dict.get('message'), + author_name=commit_dict.get('author_name'), + author_email=commit_dict.get('author_email'), + authored_date=commit_dict.get('authored_date'), + committer_name=commit_dict.get('committer_name'), + committed_date=commit_dict.get('committed_date'), + committer_email=commit_dict.get('committer_email'), + web_url=commit_dict.get('web_url'), + status=commit_dict.get('status'), + project_id=commit_dict.get('project_id') + ) + + return commit_object diff --git a/src/gitlab_watchman/gitlab_objects/file.py b/src/gitlab_watchman/gitlab_objects/file.py new file mode 100644 index 0000000..904a9ec --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/file.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass + + +@dataclass +class File(object): + """ Class that defines File objects for GitLab files""" + + __slots__ = [ + 'file_name', + 'file_path', + 'size', + 'encoding', + 'ref', + 'blob_id', + 'commit_id', + 'last_commit_id', + 'content' + ] + + file_name: str + file_path: str + size: str + encoding: str + ref: str + commit_id: str + last_commit_id: str + content: str + + +def create_from_dict(file_dict: dict) -> File: + """ Create a File object from a dict response from the GitLab API + + Args: + file_dict: dict/JSON format data from GitLab API + Returns: + A new Note object + """ + + file_object = File( + file_name=file_dict.get('file_name'), + file_path=file_dict.get('file_path'), + size=file_dict.get('size'), + encoding=file_dict.get('encoding'), + ref=file_dict.get('ref'), + commit_id=file_dict.get('commit_id'), + last_commit_id=file_dict.get('last_commit_id'), + content=file_dict.get('content') + ) + + return file_object diff --git a/src/gitlab_watchman/gitlab_objects/issue.py b/src/gitlab_watchman/gitlab_objects/issue.py new file mode 100644 index 0000000..12729f7 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/issue.py @@ -0,0 +1,75 @@ +from dataclasses import dataclass + +from . import user + + +@dataclass +class Issue(object): + """ Class that defines Issues objects for GitLab issues""" + + __slots__ = [ + 'id', + 'iid', + 'project_id', + 'title', + 'description', + 'state', + 'created_at', + 'updated_at', + 'closed_at', + 'closed_by', + 'author', + 'type', + 'author', + 'confidential', + 'web_url' + ] + + id: str + iid: str + project_id: str + title: str + description: str + state: str + created_at: str + updated_at: str + closed_by: user.User + closed_at: str + author: str + type: str + author: user.User + confidential: str + web_url: str + + +def create_from_dict(issue_dict: dict) -> Issue: + """ Create a Issue object from a dict response from the GitLab API + + Args: + issue_dict: dict/JSON format data from GitLab API + Returns: + A new MergeRequest object + """ + if issue_dict.get('closed_by'): + closed_by = user.create_from_dict(issue_dict.get('closed_by')) + else: + closed_by = None + + issue_object = Issue( + id=issue_dict.get('id'), + iid=issue_dict.get('iid'), + project_id=issue_dict.get('project_id'), + title=issue_dict.get('title'), + description=issue_dict.get('description'), + state=issue_dict.get('state'), + created_at=issue_dict.get('created_at'), + updated_at=issue_dict.get('updated_at'), + closed_by=closed_by, + closed_at=issue_dict.get('closed_at'), + type=issue_dict.get('type'), + author=user.create_from_dict(issue_dict.get('author')), + confidential=issue_dict.get('confidential'), + web_url=issue_dict.get('web_url'), + ) + + return issue_object diff --git a/src/gitlab_watchman/gitlab_objects/merge_request.py b/src/gitlab_watchman/gitlab_objects/merge_request.py new file mode 100644 index 0000000..3c4a710 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/merge_request.py @@ -0,0 +1,82 @@ +from dataclasses import dataclass + +from . import user + + +@dataclass +class MergeRequest(object): + """ Class that defines MergeRequest objects for GitLab merge requests""" + + __slots__ = [ + 'id', + 'iid', + 'project_id', + 'title', + 'description', + 'state', + 'created_at', + 'updated_at', + 'merged_by', + 'merged_at', + 'target_branch', + 'source_branch', + 'author', + 'source_project_id', + 'target_project_id', + 'merge_status', + 'web_url' + ] + + id: str + iid: str + project_id: str + title: str + description: str + state: str + created_at: str + updated_at: str + merged_by: user.User + merged_at: str + target_branch: str + source_branch: str + author: user.User + source_project_id: str + target_project_id: str + merge_status: str + web_url: str + + +def create_from_dict(mr_dict: dict) -> MergeRequest: + """ Create a MergeRequest object from a dict response from the GitLab API + + Args: + mr_dict: dict/JSON format data from GitLab API + Returns: + A new MergeRequest object + """ + if mr_dict.get('merged_by'): + merged_by = user.create_from_dict(mr_dict.get('merged_by')) + else: + merged_by = None + + mr_object = MergeRequest( + id=mr_dict.get('id'), + iid=mr_dict.get('iid'), + project_id=mr_dict.get('project_id'), + title=mr_dict.get('title'), + description=mr_dict.get('description'), + state=mr_dict.get('state'), + created_at=mr_dict.get('created_at'), + updated_at=mr_dict.get('updated_at'), + merged_by=merged_by, + merged_at=mr_dict.get('merged_at'), + target_branch=mr_dict.get('target_branch'), + source_branch=mr_dict.get('source_branch'), + author=user.create_from_dict(mr_dict.get('author')), + source_project_id=mr_dict.get('source_project_id'), + target_project_id=mr_dict.get('target_project_id'), + merge_status=mr_dict.get('merge_status'), + web_url=mr_dict.get('web_url'), + ) + + return mr_object diff --git a/src/gitlab_watchman/gitlab_objects/milestone.py b/src/gitlab_watchman/gitlab_objects/milestone.py new file mode 100644 index 0000000..38f3f78 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/milestone.py @@ -0,0 +1,61 @@ +from dataclasses import dataclass + + +@dataclass +class Milestone(object): + """ Class that defines Milestone objects for GitLab milestones""" + + __slots__ = [ + 'id', + 'iid', + 'project_id', + 'title', + 'description', + 'state', + 'created_at', + 'updated_at', + 'due_date', + 'start_date', + 'expired', + 'web_url' + ] + + id: str + iid: str + project_id: str + title: str + description: str + state: str + created_at: str + updated_at: str + due_date: str + start_date: str + expired: str + web_url: str + + +def create_from_dict(milestone_dict: dict) -> Milestone: + """ Create a MergeRequest object from a dict response from the GitLab API + + Args: + milestone_dict: dict/JSON format data from GitLab API + Returns: + A new MergeRequest object + """ + + milestone_object = Milestone( + id=milestone_dict.get('id'), + iid=milestone_dict.get('iid'), + title=milestone_dict.get('title'), + description=milestone_dict.get('description'), + state=milestone_dict.get('state'), + created_at=milestone_dict.get('created_at'), + updated_at=milestone_dict.get('updated_at'), + due_date=milestone_dict.get('due_date'), + start_date=milestone_dict.get('start_date'), + expired=milestone_dict.get('expired'), + web_url=milestone_dict.get('web_url'), + project_id=milestone_dict.get('project_id') + ) + + return milestone_object diff --git a/src/gitlab_watchman/gitlab_objects/note.py b/src/gitlab_watchman/gitlab_objects/note.py new file mode 100644 index 0000000..3386d1a --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/note.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass + +from . import user + + +@dataclass +class Note(object): + """ Class that defines User objects for GitLab notes""" + + __slots__ = [ + 'id', + 'type', + 'body', + 'attachment', + 'author', + 'created_at', + 'updated_at', + 'system', + 'noteable_id', + 'noteable_type', + 'commit_id', + 'resolvable', + 'resolved', + 'resolved_by', + 'resolved_at', + 'confidential', + 'noteable_iid', + 'command_changes' + ] + + id: str + type: str + body: str + attachment: str or bool + author: user.User + created_at: str + updated_at: str + system: str + noteable_id: str + noteable_type: str + commit_id: str + resolvable: bool + resolved_by: user.User + resolved_at: str + confidential: str + noteable_iid: str + command_changes: str + + +def create_from_dict(note_dict: dict) -> Note: + """ Create a Note object from a dict response from the GitLab API + + Args: + note_dict: dict/JSON format data from GitLab API + Returns: + A new Note object + """ + if note_dict.get('resolved_by'): + resolved_by = user.create_from_dict(note_dict.get('resolved_by', {})) + else: + resolved_by = None + + note_object = Note( + id=note_dict.get('id'), + type=note_dict.get('type'), + body=note_dict.get('body'), + attachment=note_dict.get('attachment'), + author=user.create_from_dict(note_dict.get('author', {})), + created_at=note_dict.get('created_at'), + updated_at=note_dict.get('updated_at'), + system=note_dict.get('system'), + noteable_id=note_dict.get('noteable_id'), + noteable_type=note_dict.get('noteable_type'), + commit_id=note_dict.get('commit_id'), + resolvable=note_dict.get('resolvable'), + resolved_by=resolved_by, + resolved_at=note_dict.get('resolved_at'), + confidential=note_dict.get('confidential'), + noteable_iid=note_dict.get('noteable_iid'), + command_changes=note_dict.get('command_changes'), + ) + + return note_object diff --git a/src/gitlab_watchman/gitlab_objects/project.py b/src/gitlab_watchman/gitlab_objects/project.py new file mode 100644 index 0000000..0a324fd --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/project.py @@ -0,0 +1,81 @@ +from dataclasses import dataclass +from typing import List + +from . import user + + +@dataclass +class Namespace(object): + id: str + name: str + path: str + kind: str + full_path: str + parent_id: str + web_url: str + members: List[user.User] or None + owner: user.User or None + + +@dataclass +class Project(object): + """ Class that defines User objects for GitLab projects""" + + __slots__ = [ + 'id', + 'description', + 'name', + 'name_with_namespace', + 'path', + 'path_with_namespace', + 'created_at', + 'web_url', + 'last_activity_at', + 'namespace', + ] + + id: str + description: str + name: str + name_with_namespace: str + path: str + path_with_namespace: str + created_at: str + web_url: user.User + last_activity_at: str + namespace: Namespace + + +def create_from_dict(project_dict: dict) -> Project: + """ Create a Project object from a dict response from the GitLab API + + Args: + project_dict: dict/JSON format data from GitLab API + Returns: + A new Project object + """ + + project_object = Project( + id=project_dict.get('id'), + description=project_dict.get('description'), + name=project_dict.get('name'), + name_with_namespace=project_dict.get('name_with_namespace'), + path=project_dict.get('path'), + created_at=project_dict.get('created_at'), + path_with_namespace=project_dict.get('path_with_namespace'), + web_url=project_dict.get('web_url'), + last_activity_at=project_dict.get('last_activity_at'), + namespace=Namespace( + id=project_dict.get('namespace').get('id'), + name=project_dict.get('namespace').get('name'), + path=project_dict.get('namespace').get('path'), + kind=project_dict.get('namespace').get('kind'), + full_path=project_dict.get('namespace').get('full_path'), + parent_id=project_dict.get('namespace').get('parent_id'), + web_url=project_dict.get('namespace').get('web_url'), + members=[], + owner=[] + ) + ) + + return project_object diff --git a/src/gitlab_watchman/gitlab_objects/snippet.py b/src/gitlab_watchman/gitlab_objects/snippet.py new file mode 100644 index 0000000..2ef6866 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/snippet.py @@ -0,0 +1,77 @@ +from dataclasses import dataclass + +from . import user + + +@dataclass +class Snippet(object): + """ Class that defines User objects for GitLab snippets""" + + __slots__ = [ + 'id', + 'title', + 'description', + 'visibility', + 'updated_at', + 'created_at', + 'web_url', + 'author', + 'file_name', + 'files', + ] + + id: str + title: str + description: str + visibility: str or bool + created_at: str + updated_at: str + web_url: str + author: user.User + file_name: str + files: list + + +@dataclass +class File(object): + __slots__ = [ + 'path', + 'raw_url', + ] + + path: str + raw_url: str + + +def create_from_dict(snip_dict: dict) -> Snippet: + """ Create a Snippet object from a dict response from the GitLab API + + Args: + snip_dict: dict/JSON format data from GitLab API + Returns: + A new Snippet object + """ + file_list = [] + if snip_dict.get('files'): + for f in snip_dict.get('files'): + file_list.append(File( + path=f.get('path'), + raw_url=f.get('raw_url') + )) + else: + file_list = None + + snippet_object = Snippet( + id=snip_dict.get('id'), + title=snip_dict.get('title'), + description=snip_dict.get('description'), + visibility=snip_dict.get('visibility'), + author=user.create_from_dict(snip_dict.get('author', {})), + created_at=snip_dict.get('created_at'), + updated_at=snip_dict.get('updated_at'), + web_url=snip_dict.get('web_url'), + file_name=snip_dict.get('file_name'), + files=file_list + ) + + return snippet_object diff --git a/src/gitlab_watchman/gitlab_objects/user.py b/src/gitlab_watchman/gitlab_objects/user.py new file mode 100644 index 0000000..eef54e8 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/user.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass + + +@dataclass +class User(object): + """ Class that defines User objects for GitLab users""" + + __slots__ = [ + 'id', + 'name', + 'username', + 'state', + 'web_url' + ] + + id: str + name: str + username: str + state: str + web_url: bool + + +def create_from_dict(user_dict: dict) -> User: + """ Create a User object from a dict response from the GitLab API + + Args: + user_dict: dict/JSON format data from GitLab API + Returns: + A new User object + """ + + return User( + id=user_dict.get('id'), + name=user_dict.get('name'), + username=user_dict.get('username'), + state=user_dict.get('state'), + web_url=user_dict.get('web_url') + ) diff --git a/src/gitlab_watchman/gitlab_objects/wiki_blob.py b/src/gitlab_watchman/gitlab_objects/wiki_blob.py new file mode 100644 index 0000000..b452636 --- /dev/null +++ b/src/gitlab_watchman/gitlab_objects/wiki_blob.py @@ -0,0 +1,46 @@ +from dataclasses import dataclass + + +@dataclass +class WikiBlob(object): + """ Class that defines WikiBlob objects for GitLab blobs""" + + __slots__ = [ + 'basename', + 'data', + 'path', + 'filename', + 'id', + 'ref', + 'project_id' + ] + + basename: str + data: str + path: str + filename: str + id: str + ref: str + project_id: str + + +def create_from_dict(blob_dict: dict) -> WikiBlob: + """ Create a WikiBlob object from a dict response from the GitLab API + + Args: + blob_dict: dict/JSON format data from GitLab API + Returns: + A new WikiBlob object + """ + + blob_object = WikiBlob( + id=blob_dict.get('id'), + basename=blob_dict.get('basename'), + data=blob_dict.get('data'), + path=blob_dict.get('path'), + filename=blob_dict.get('filename'), + ref=blob_dict.get('ref'), + project_id=blob_dict.get('project_id') + ) + + return blob_object diff --git a/src/gitlab_watchman/gitlab_wrapper.py b/src/gitlab_watchman/gitlab_wrapper.py new file mode 100644 index 0000000..f15927c --- /dev/null +++ b/src/gitlab_watchman/gitlab_wrapper.py @@ -0,0 +1,737 @@ +import calendar +import dataclasses +import json +import os +import re +import time +import requests +import yaml +import multiprocessing +from requests.exceptions import HTTPError +from requests.packages.urllib3.util import Retry +from requests.adapters import HTTPAdapter +from urllib.parse import quote + +from . import logger as logger +from . import signature +from .gitlab_objects import note +from .gitlab_objects import snippet +from .gitlab_objects import project +from .gitlab_objects import blob +from .gitlab_objects import wiki_blob +from .gitlab_objects import file +from .gitlab_objects import commit +from .gitlab_objects import user +from .gitlab_objects import merge_request +from .gitlab_objects import milestone +from .gitlab_objects import issue + +ALL_TIME = calendar.timegm(time.gmtime()) + 1576800000 + + +class GitLabAPIClient(object): + + def __init__(self, token, base_url): + self.token = token + self.base_url = base_url.rstrip('\\') + self.per_page = 100 + self.session = session = requests.session() + session.mount(self.base_url, HTTPAdapter(max_retries=Retry(connect=3, backoff_factor=1))) + session.headers.update({'Authorization': f'Bearer {self.token}'}) + + def make_request(self, url, params=None, data=None, method='GET', verify_ssl=True): + try: + relative_url = '/'.join((self.base_url, 'api/v4', url)) + response = self.session.request(method, relative_url, params=params, data=data, verify=verify_ssl) + response.raise_for_status() + + return response + + except HTTPError as http_error: + if response.status_code == 400: + if response.json().get('message').get('error'): + raise Exception(response.json().get('message').get('error')) + else: + raise http_error + elif response.status_code == 502 or response.status_code == 500: + print('Retrying...') + time.sleep(30) + response = self.session.request(method, relative_url, params=params, data=data, verify=verify_ssl) + response.raise_for_status() + + return response + elif response.status_code == 429: + print('Rate limit hit, cooling off...') + time.sleep(30) + response = self.session.request(method, relative_url, params=params, data=data, verify=verify_ssl) + response.raise_for_status() + + return response + else: + raise http_error + + except Exception as e: + print(e) + + def get_user_by_id(self, user_id: str) -> json: + """ Get a GitLab user by their ID + + Args: + user_id: ID of the user + Returns: + JSON object containing user data + """ + return self.make_request(f'users/{user_id}').json() + + def get_user_by_username(self, username: str) -> json: + """ Get a GitLab user by their username + + Args: + username: Username of the user + Returns: + JSON object containing user data + """ + return self.make_request(f'users?username={username}').json() + + def get_token_user(self) -> json: + """ Get the details of the user who's token is being used + + Returns: + JSON object containing user data + """ + return self.make_request('user').json() + + def get_licence_info(self) -> json: + """ Get information on the GitLab licence + + Returns: + JSON object containing licence information + """ + return self.make_request('license').json() + + def get_project(self, project_id: str) -> json: + """ Get a GitLab project by its ID + + Args: + project_id: ID of the project to return + Returns: + JSON object with project information + """ + return self.make_request(f'projects/{project_id}').json() + + def get_variables(self, project_id: str) -> json: + """ Get publicly available CICD variables for a project + + Args: + project_id: ID of the project to search + Returns: + JSON object containing variable information + """ + return self.make_request(f'projects/{project_id}/variables').json() + + def get_project_members(self, project_id: str) -> json: + """ Get members of a project + + Args: + project_id: ID of the project to retrieve + Returns: + JSON object containing project members + """ + return self.make_request(f'projects/{project_id}/members').json() + + def get_file(self, + project_id: str, + path: str, + ref: str) -> json: + """ Get a file stored in a project + + Args: + project_id: ID of the project to retrieve + path: URL encoded full path to file + ref: The name of branch, tag or commit + Returns: + JSON object with file information + """ + path = ''.join((quote(path, safe=''), '?ref=', ref)) + return self.make_request(f'projects/{project_id}/repository/files/{path}').json() + + def get_group_members(self, group_id: str) -> json: + """ Get members of a GitLab group + + Args: + group_id: ID of the group to get members for + Returns: + JSON object with group member information + """ + return self.make_request(f'groups/{group_id}/members').json() + + def get_commit(self, + project_id: str, + commit_id: str) -> json: + """ Get commit information + + Args: + project_id: ID for the project the commit exists in + commit_id: ID of the commit + Returns: + JSON object containing commit data + """ + return self.make_request(f'projects/{project_id}/repository/commits/{commit_id}').json() + + def get_wiki_page(self, + project_id: str, + slug: str) -> json: + """ + + Args: + project_id: ID of the project the wiki page is in + slug: URL slug for the wiki page + Returns: + JSON object containing wiki data + + """ + return self.make_request(f'projects/{project_id}/wikis/{slug}').json() + + def global_search(self, + search_term: str = '', + search_scope: str = '') -> [json]: + """ Wrapper for the GitLab advanced search API. Uses search term and scope to + decide what to search for. + + Args: + search_term: Search string to use + search_scope: Scope of what to look for (blobs, commits etc.) + Returns: + List containing JSON objects with matches for the search string + """ + + results = [] + page = 1 + params = { + 'scope': search_scope, + 'search': search_term, + 'per_page': self.per_page, + 'page': '' + } + + response = self.make_request('search', params=params) + page_count = response.headers.get('X-Total-Pages') + + if page_count: + while page <= int(page_count): + params = { + 'scope': search_scope, + 'search': search_term, + 'per_page': self.per_page, + 'page': page + } + r = self.make_request('search', params=params).json() + for value in r: + results.append(value) + page += 1 + else: + params = { + 'scope': search_scope, + 'search': search_term, + } + r = self.make_request('search', params=params).json() + for value in r: + results.append(value) + + return results + + def get_all_projects(self) -> list: + """ Get all public projects. Uses keyset pagination, which currently + is only available for the Projects resource in the GitLab API + + Returns: + List of all projects + """ + + results = [] + + params = { + 'pagination': 'keyset', + 'per_page': self.per_page, + 'order_by': 'id', + 'sort': 'asc' + } + + response = self.make_request('projects', params=params) + while 'link' in response.headers: + next_url = response.headers.get('link') + params = { + 'pagination': 'keyset', + 'per_page': self.per_page, + 'order_by': 'id', + 'sort': 'asc', + 'id_after': next_url.split('id_after=')[1].split('&')[0] + } + response = self.make_request('projects', params=params) + for value in response.json(): + results.append(value) + + return results + + +def initiate_gitlab_connection() -> GitLabAPIClient: + """ Create a GitLab API client object + + Returns: + GitLab API client object + """ + + try: + token = os.environ['GITLAB_WATCHMAN_TOKEN'] + except KeyError: + with open(f'{os.path.expanduser("~")}/watchman.conf') as yaml_file: + config = yaml.safe_load(yaml_file) + + token = config.get('gitlab_watchman').get('token') + + try: + url = os.environ['GITLAB_WATCHMAN_URL'] + except KeyError: + with open(f'{os.path.expanduser("~")}/watchman.conf') as yaml_file: + config = yaml.safe_load(yaml_file) + + url = config.get('gitlab_watchman').get('url') + + return GitLabAPIClient(token, url) + + +def _convert_time(timestamp: str) -> int: + """Convert ISO 8601 timestamp to epoch """ + + pattern = '%Y-%m-%dT%H:%M:%S.%f%z' + return int(time.mktime(time.strptime(timestamp, pattern))) + + +def _deduplicate(input_list: list) -> [dict]: + """ Removes duplicates where results are returned by multiple queries + Nested class handles JSON encoding for dataclass objects + + Args: + input_list: List of dataclass objects + Returns: + List of JSON objects with duplicates removed + """ + + class EnhancedJSONEncoder(json.JSONEncoder): + def default(self, o): + if dataclasses.is_dataclass(o): + return dataclasses.asdict(o) + return super().default(o) + + json_set = {json.dumps(dictionary, sort_keys=True, cls=EnhancedJSONEncoder) for dictionary in input_list} + + return [json.loads(t) for t in json_set] + + +def _split_to_chunks(input_list, no_of_chunks): + """Split the input list into n amount of chunks""" + + return (input_list[i::no_of_chunks] for i in range(no_of_chunks)) + + +def find_group_owners(group_members: list) -> list: + """ Return all users who are both active and group Owners + + Args: + group_members: Members of a GitLab group + Returns: + List of owners of a group + """ + + member_list = [] + for user in group_members: + if user.get('state') == 'active' and user.get('access_level') == 50: + member_list.append({ + 'user_id': user.get('id'), + 'name': user.get('name'), + 'username': user.get('username'), + 'access_level': 'Owner' + }) + + return member_list + + +def find_user_owner(user_list: list) -> list: + """ Return user who owns a namespace + + Args: + user_list: List of users + Returns: + List of formatted users owning a namespace + """ + + owner_list = [] + for user in user_list: + owner_list.append({ + 'user_id': user.get('id'), + 'name': user.get('name'), + 'username': user.get('username'), + 'state': user.get('state') + }) + + return owner_list + + +def search(gitlab: GitLabAPIClient, + log_handler: logger.StdoutLogger, + sig: signature.Signature, + scope: str, + timeframe: int = ALL_TIME) -> list: + """ Uses the Search API to get search results for the given scope. These results are then split into (No of cores - + 1) number of chunks, and Multiprocessing is then used to concurrently filter them against Regex using the relevant + worker function + + Args: + gitlab: GitLab API object + log_handler: Logger object for outputting results + sig: Signature object + scope: What sort of GitLab objects to search + timeframe: Timeframe to search in + Returns: + List of JSON formatted results if any are found + """ + + results = [] + + for query in sig.strings: + regex = re.compile(sig.pattern) + search_result_list = gitlab.global_search(query, search_scope=scope) + query_formatted = query.replace('"', '') + log_handler.log_info( + f'{len(search_result_list)} {scope} found matching search term: {query_formatted}') + result = multiprocessing.Manager().list() + + chunks = multiprocessing.cpu_count() - 1 + list_of_chunks = _split_to_chunks(search_result_list, chunks) + + processes = [] + + if scope == 'blobs': + target = _blob_worker + elif scope == 'wiki_blobs': + target = _wiki_blob_worker + elif scope == 'commits': + target = _commit_worker + elif scope == 'issues': + target = _issue_worker + elif scope == 'milestones': + target = _milestone_worker + elif scope == 'notes': + target = _note_worker + elif scope == 'snippet_titles': + target = _snippet_worker + else: + target = _merge_request_worker + + for search_list in list_of_chunks: + p = multiprocessing.Process(target=target, + args=(gitlab, search_list, regex, timeframe, result)) + processes.append(p) + p.start() + + for process in processes: + process.join() + + results.append(list(result)) + + if results: + results = _deduplicate([item for sublist in results for item in sublist]) + log_handler.log_info(f'{len(results)} total matches found after filtering') + return results + else: + log_handler.log_info('No matches found after filtering') + + +def _populate_project_owners(gitlab: GitLabAPIClient, + project_object: project.Project) -> project.Project: + """ Populates a given project with either the user who owns it if the namespace kind == user, + or members of the group who are owners if the namespace kind == group + + Args: + gitlab: GitLab API object + project_object: Project to populate the owners of + Returns: + Project object with owners populated + """ + + if project_object.namespace.kind == 'group': + group_members = gitlab.get_group_members(project_object.namespace.id) + owners = find_group_owners(group_members) + if owners: + owner_list = [] + for owner in owners: + owner_list.append(user.create_from_dict(owner)) + project_object.namespace.members = owners + project_object.namespace.owner = None + elif project_object.namespace.kind == 'user': + namespace_user = gitlab.get_user_by_username(project_object.namespace.full_path) + if namespace_user: + project_object.namespace.owner = user.create_from_dict(namespace_user[0]) + project_object.namespace.members = None + + return project_object + + +def _blob_worker(gitlab: GitLabAPIClient, + blob_list: list, + regex: re.Pattern, + timeframe: int, + results: list) -> list: + """ MULTIPROCESSING WORKER - Iterates through a list of blobs to find matches against the regex + + Args: + gitlab: GitLab API object + blob_list: List of blobs to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for b in blob_list: + blob_object = blob.create_from_dict(b) + project_object = project.create_from_dict(gitlab.get_project(blob_object.project_id)) + file_object = file.create_from_dict(gitlab.get_file(blob_object.project_id, blob_object.path, blob_object.ref)) + if file_object: + commit_object = commit.create_from_dict( + gitlab.get_commit(blob_object.project_id, file_object.last_commit_id)) + if _convert_time(commit_object.committed_date) > (now - timeframe) and regex.search(str(blob_object.data)): + results.append({ + 'match_string': regex.search(str(blob_object.data)).group(0), + 'blob': blob_object, + 'commit': commit_object, + 'project': _populate_project_owners(gitlab, project_object), + # 'file': file_object + }) + + return results + + +def _wiki_blob_worker(gitlab: GitLabAPIClient, + blob_list: list, + regex: re.Pattern, + timeframe: int, + results: list) -> list: + """ MULTIPROCESSING WORKER - Iterates through a list of wiki_blobs to find matches against the regex + + Args: + gitlab: GitLab API object + blob_list: List of wiki_blobs to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for wb in blob_list: + wikiblob_object = wiki_blob.create_from_dict(wb) + project_object = project.create_from_dict(gitlab.get_project(wikiblob_object.project_id)) + if _convert_time(project_object.last_activity_at) > (now - timeframe) and regex.search( + str(wikiblob_object.data)): + results.append({ + 'match_string': regex.search(str(wikiblob_object.data)).group(0), + 'wiki_blob': wikiblob_object, + 'project': _populate_project_owners(gitlab, project_object), + }) + + return results + + +def _commit_worker(gitlab: GitLabAPIClient, + commit_list: list, + regex: re.Pattern, + timeframe: int, + results: list) -> list: + """ MULTIPROCESSING WORKER - Iterates through a list of commits to find matches against the regex + + Args: + gitlab: GitLab API object + commit_list: List of commits to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + + for c in commit_list: + commit_object = commit.create_from_dict(c) + if _convert_time(commit_object.committed_date) > (now - timeframe) and \ + regex.search(str(commit_object.message)): + project_object = project.create_from_dict(gitlab.get_project(commit_object.project_id)) + results.append({ + 'match_string': regex.search(str(commit_object.message)).group(0), + 'commit': commit_object, + 'project': _populate_project_owners(gitlab, project_object) + }) + + return results + + +def _issue_worker(gitlab: GitLabAPIClient, + issue_list: list, + regex: re.Pattern, + timeframe: int, + results: list): + """ MULTIPROCESSING WORKER - Iterates through a list of issues to find matches against the regex + + Args: + gitlab: GitLab API object + issue_list: List of issues to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for i in issue_list: + issue_object = issue.create_from_dict(i) + if _convert_time(issue_object.updated_at) > (now - timeframe) and \ + regex.search(str(issue_object.description)): + project_object = project.create_from_dict(gitlab.get_project(issue_object.project_id)) + results.append({ + 'match_string': regex.search(str(issue_object.description)).group(0), + 'issue': issue_object, + 'project': _populate_project_owners(gitlab, project_object) + }) + + return results + + +def _milestone_worker(gitlab: GitLabAPIClient, + milestone_list: list, + regex: re.Pattern, + timeframe: int, + results: list): + """ MULTIPROCESSING WORKER - Iterates through a list of milestones to find matches against the regex + + Args: + gitlab: GitLab API object + milestone_list: List of milestones to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for m in milestone_list: + milestone_object = milestone.create_from_dict(m) + if _convert_time(milestone_object.updated_at) > (now - timeframe) and \ + regex.search(str(milestone_object.description)): + project_object = project.create_from_dict(gitlab.get_project(milestone_object.project_id)) + results.append({ + 'match_string': regex.search(str(milestone_object.description)).group(0), + 'milestone': milestone_object, + 'project': _populate_project_owners(gitlab, project_object) + }) + + return results + + +def _merge_request_worker(gitlab: GitLabAPIClient, + merge_request_list: list, + regex: re.Pattern, + timeframe: int, + results: list): + """ MULTIPROCESSING WORKER - Iterates through a list of merge requests to find matches against the regex + + Args: + gitlab: GitLab API object + merge_request_list: List of merge requests to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for mr in merge_request_list: + mr_object = merge_request.create_from_dict(mr) + if _convert_time(mr_object.updated_at) > (now - timeframe) and \ + regex.search(str(mr_object.description)): + project_object = project.create_from_dict(gitlab.get_project(mr_object.project_id)) + results.append({ + 'match_string': regex.search(str(mr_object.description)).group(0), + 'merge_request': mr_object, + 'project': _populate_project_owners(gitlab, project_object) + }) + + return results + + +def _note_worker(gitlab_object: GitLabAPIClient, + note_list: list, + regex: re.Pattern, + timeframe: int, + results: list): + """ MULTIPROCESSING WORKER - Iterates through a list of notes to find matches against the regex + + Args: + note_list: List of notes to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for n in note_list: + note_object = note.create_from_dict(n) + if _convert_time(note_object.created_at) > (now - timeframe) and \ + regex.search(str(note_object.body)): + results.append({ + 'note': note_object, + 'match_string': regex.search(str(note_object.body)).group(0) + }) + + return results + + +def _snippet_worker(gitlab_object: GitLabAPIClient, + snippet_list: list, + regex: re.Pattern, + timeframe: int, + results: list): + """ MULTIPROCESSING WORKER - Iterates through a list of snippets to find matches against the regex + + Args: + snippet_list: List of notes to process + regex: Regex pattern to search for + timeframe: Timeframe to search in + results: List of output results + Returns: + Multiprocessing list to be combined by the parent process + """ + + now = calendar.timegm(time.gmtime()) + for s in snippet_list: + snippet_object = snippet.create_from_dict(s) + if _convert_time(snippet_object.created_at) > (now - timeframe) and \ + (regex.search(str(snippet_object.title)) or regex.search(str(snippet_object.description))): + if regex.search(str(snippet_object.title)): + match_string = regex.search(str(snippet_object.title)).group(0) + else: + match_string = regex.search(str(snippet_object.description)).group(0) + results.append({ + 'snippet': snippet_object, + 'match_string': match_string + }) + + return results diff --git a/src/gitlab_watchman/logger.py b/src/gitlab_watchman/logger.py new file mode 100644 index 0000000..754f0df --- /dev/null +++ b/src/gitlab_watchman/logger.py @@ -0,0 +1,55 @@ +import json +import dataclasses +import logging +import sys +import logging.handlers +from logging import Logger + + +class EnhancedJSONEncoder(json.JSONEncoder): + def default(self, o): + if dataclasses.is_dataclass(o): + return dataclasses.asdict(o) + return super().default(o) + + +class LoggingBase(Logger): + def __init__(self, name='Slack Watchman Enterprise'): + super().__init__(name) + self.notify_format = logging.Formatter( + '{"timestamp": "%(asctime)s", "level": "NOTIFY", "scope": "%(scope)s", "severity": ' + '"%(severity)s", "detection_type": "%(type)s", "detection_data": %(message)s}') + self.info_format = logging.Formatter( + '{"timestamp": "%(asctime)s", "level": "%(levelname)s", "message": "%(message)s"}') + self.critical_format = logging.Formatter( + '{"timestamp": "%(asctime)s", "level": "%(levelname)s", "message": "%(message)s"}') + self.logger = logging.getLogger(self.name) + self.logger.setLevel(logging.DEBUG) + + +class StdoutLogger(LoggingBase): + def __init__(self): + LoggingBase.__init__(self) + self.handler = logging.StreamHandler(sys.stdout) + self.logger.addHandler(self.handler) + + def log_notification(self, + log_data: str or dict, + scope: str = None, + detect_type: str = None, + severity: int = None): + self.handler.setFormatter(self.notify_format) + self.logger.warning(json.dumps(log_data, cls=EnhancedJSONEncoder), + extra={ + 'scope': scope, + 'type': detect_type, + 'severity': severity + }) + + def log_info(self, log_data: str or dict): + self.handler.setFormatter(self.info_format) + self.logger.info(log_data) + + def log_critical(self, log_data: str or dict): + self.handler.setFormatter(self.critical_format) + self.logger.critical(log_data) diff --git a/src/gitlab_watchman/signature.py b/src/gitlab_watchman/signature.py new file mode 100644 index 0000000..2ad3099 --- /dev/null +++ b/src/gitlab_watchman/signature.py @@ -0,0 +1,101 @@ +import pathlib +import yaml +from dataclasses import dataclass + + +@dataclass +class Signature(object): + """ Class that handles loaded signature objects. Signatures + define what to search for in GitLab and where to search for it. + They also contain regex patterns to validate data that is found""" + + __slots__ = [ + 'filename', + 'enabled', + 'meta', + 'scope', + 'test_cases', + 'strings', + 'pattern' + ] + + filename: str + enabled: bool + meta: dataclass + scope: list + test_cases: dataclass + strings: str + pattern: str + + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__!r})' + + def __str__(self): + return ' '.join(f'{k}: {v!s}' for k, v in self.__dict__.items()) + + +@dataclass +class Meta(object): + __slots__ = [ + 'name', + 'author', + 'date', + 'version', + 'description', + 'severity' + ] + + name: str + author: str + date: str + version: str + description: str + severity: int + + +@dataclass +class TestCases(object): + __slots__ = [ + 'match_cases', + 'fail_cases', + ] + + match_cases: list + fail_cases: list + + +def load_from_yaml(sig_path: pathlib.PosixPath) -> Signature: + """Load YAML file and return a Signature object + + Args: + sig_path: Path of YAML file + Returns: + Signature object with fields populated from the YAML + signature file + """ + + with open(sig_path) as yaml_file: + yaml_import = yaml.safe_load(yaml_file) + + meta = Meta( + name=yaml_import.get('meta').get('name'), + author=yaml_import.get('meta').get('author'), + date=yaml_import.get('meta').get('date'), + version=yaml_import.get('meta').get('version'), + description=yaml_import.get('meta').get('description'), + severity=yaml_import.get('meta').get('severity') + ) + + test_cases = TestCases( + match_cases=yaml_import.get('test_cases').get('match_cases'), + fail_cases=yaml_import.get('test_cases').get('fail_cases') + ) + + signature = Signature(filename=yaml_import.get('filename'), + enabled=yaml_import.get('enabled'), + meta=meta, + scope=yaml_import.get('scope'), + test_cases=test_cases, + strings=yaml_import.get('strings'), + pattern=yaml_import.get('pattern')) + return signature diff --git a/src/signatures/__init__.py b/src/signatures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gitlab_watchman/rules/files/azure_service_account_files.yaml b/src/signatures/files/azure_service_account_files.yaml similarity index 74% rename from gitlab_watchman/rules/files/azure_service_account_files.yaml rename to src/signatures/files/azure_service_account_files.yaml index e18f317..bff3458 100644 --- a/gitlab_watchman/rules/files/azure_service_account_files.yaml +++ b/src/signatures/files/azure_service_account_files.yaml @@ -8,12 +8,12 @@ meta: description: Detects exposed Azure service account files severity: '90' scope: -- blobs + - blobs test_cases: match_cases: - - '"accessToken": "123123123123123123123123123123123123"' + - '"accessToken": "123123123123123123123123123123123123"' fail_cases: - - '"accessToken": $AZURE_ACCESS_TOKEN' + - '"accessToken": $AZURE_ACCESS_TOKEN' strings: -- azure extension:json + - azure extension:json pattern: (?i)('|"){0,2}(refreshtoken|accesstoken|_clientId)('|"){0,2}:(\s*)('|"){0,2}([0-9a-zA-Z!@#$&()\/\-`_.+,"]{20,})('|"){0,2} diff --git a/gitlab_watchman/rules/files/gcp_service_account_files.yaml b/src/signatures/files/gcp_service_account_files.yaml similarity index 66% rename from gitlab_watchman/rules/files/gcp_service_account_files.yaml rename to src/signatures/files/gcp_service_account_files.yaml index e671864..14b0540 100644 --- a/gitlab_watchman/rules/files/gcp_service_account_files.yaml +++ b/src/signatures/files/gcp_service_account_files.yaml @@ -8,13 +8,13 @@ meta: description: Detects exposed GPC service account files severity: '90' scope: -- blobs + - blobs test_cases: match_cases: - - '"private_key": "-----BEGIN PRIVATE KEY-----AABBCCDDEEFFGGHHIIJJKK=\n-----END + - '"private_key": "-----BEGIN PRIVATE KEY-----AABBCCDDEEFFGGHHIIJJKK=\n-----END PRIVATE KEY-----\n' fail_cases: - - '"private_key": $PRIVATE_KEY_FILE' + - '"private_key": $PRIVATE_KEY_FILE' strings: -- private_key extension:json + - 'private_key extension:json' pattern: "(?s)(-----BEGIN .+?-----)\\S{0,}" diff --git a/src/signatures/files/interesting_files.yaml b/src/signatures/files/interesting_files.yaml new file mode 100644 index 0000000..d4cff6c --- /dev/null +++ b/src/signatures/files/interesting_files.yaml @@ -0,0 +1,39 @@ +--- +filename: interesting_files.yaml +enabled: true +meta: + name: Interesting Potentially Sensitive Files + author: PaperMtn + date: '2020-08-14' + description: Detects other misc potentially sensitive files that have been exposed. + Pattern left blank to return every hit + severity: '40' +scope: + - blobs +test_cases: + match_cases: + - blank + fail_cases: + - blank +strings: + - "* extension:openvpn" + - "* extension:ovpn" + - "* extension:cscfg" + - "* extension:rdp" + - "* extension:jks" + - "* extension:psafe3" + - "* extension:agilekeychain" + - "* extension:keychain" + - "* extension:kwallet" + - "* extension:tblk" + - "* filename:otr.private_key" + - "* filename:secret_token.rb" + - "* filename:publish_over_ssh.BapSshPublisherPlugin.xml" + - "* filename:credentials.xml" + - "* filename:knife.rb" + - "* filename:filezilla.xml" + - "* extension:htpasswd" + - "* extension:trc" + - "* filename:.ssh" + - "* filename:.chef/*.pem" +pattern: '' diff --git a/gitlab_watchman/rules/files/s3_config_files.yaml b/src/signatures/files/s3_config_files.yaml similarity index 68% rename from gitlab_watchman/rules/files/s3_config_files.yaml rename to src/signatures/files/s3_config_files.yaml index 9ad6f97..39a84f6 100644 --- a/gitlab_watchman/rules/files/s3_config_files.yaml +++ b/src/signatures/files/s3_config_files.yaml @@ -8,14 +8,14 @@ meta: description: Detects exposed S3 configuration files severity: '70' scope: -- blobs + - blobs test_cases: match_cases: - - secret_key = 123456789abcdefg123456789123456789 + - secret_key = 123456789abcdefg123456789123456789 fail_cases: - - secret_key = $S3_SECRET_KEY + - secret_key = $S3_SECRET_KEY strings: -- secret extension:s3cfg -- secret extension:ini -- s3cmd.ini + - secret extension:s3cfg + - secret extension:ini + - s3cmd.ini pattern: (?i)('|"){0,2}secret_key('|"){0,2} = ('|"){0,2}([0-9a-zA-Z!@#$&()\/\-`_.+,"]{20,})('|"){0,2} diff --git a/src/signatures/passwords.yaml b/src/signatures/passwords.yaml new file mode 100644 index 0000000..0b2162c --- /dev/null +++ b/src/signatures/passwords.yaml @@ -0,0 +1,29 @@ +--- +filename: passwords.yaml +enabled: false +meta: + name: Passwords + author: PaperMtn + date: '2020-08-14' + description: Detects potentially exposed passwords in plaintext + severity: '40' +scope: + - blobs + - commits + - milestones + - wiki_blobs + - issues + - merge_requests + - notes + - snippet_titles +test_cases: + match_cases: + - 'Password: WeakPassword123' + - The password is WeakPassword123 + fail_cases: + - You need a strong password +strings: + - '"password:" -(svg|png|jpeg)' + - '"password is" -(svg|png|jpeg)' + - '"passwd" -(svg|png|jpeg)' +pattern: (?i)(password\s*[`=:\"]+\s*[^\s]+|password is\s*[`=:\"]*\s*[^\s]+|pwd\s*[`=:\"]*\s*[^\s]+|passwd\s*[`=:\"]+\s*[^\s]+) diff --git a/gitlab_watchman/rules/tokens/access_tokens.yaml b/src/signatures/tokens/access_tokens.yaml similarity index 52% rename from gitlab_watchman/rules/tokens/access_tokens.yaml rename to src/signatures/tokens/access_tokens.yaml index ecd8b54..eae343b 100644 --- a/gitlab_watchman/rules/tokens/access_tokens.yaml +++ b/src/signatures/tokens/access_tokens.yaml @@ -8,17 +8,19 @@ meta: description: Detects exposed access_tokens severity: '70' scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests + - blobs + - commits + - milestones + - wiki_blobs + - issues + - merge_requests + - notes + - snippet_titles test_cases: match_cases: - - 'ACCESS_TOKEN: 123456789abcdefg123456789123456789' + - 'ACCESS_TOKEN: 123456789abcdefg123456789123456789' fail_cases: - - 'ACCESS_TOKEN: $ACCESS_TOKEN' + - 'ACCESS_TOKEN: $ACCESS_TOKEN' strings: -- '"access_token:"' -pattern: (?i)('|"){0,2}access_token('|"){0,2}:(\s*)('|"){0,2}([0-9a-zA-Z!@#$&()\/\-`_.+,"]{30,})('|"){0,2} \ No newline at end of file + - '"access_token:"' +pattern: (?i)('|"){0,2}access_token('|"){0,2}:(\s*)('|"){0,2}([0-9a-zA-Z!@#$&()\/\-`_.+,"]{30,})('|"){0,2} diff --git a/gitlab_watchman/rules/tokens/aws_api_tokens.yaml b/src/signatures/tokens/aws_api_tokens.yaml similarity index 54% rename from gitlab_watchman/rules/tokens/aws_api_tokens.yaml rename to src/signatures/tokens/aws_api_tokens.yaml index 20b048b..f204733 100644 --- a/gitlab_watchman/rules/tokens/aws_api_tokens.yaml +++ b/src/signatures/tokens/aws_api_tokens.yaml @@ -8,18 +8,20 @@ meta: description: Detects exposed AWS API secret tokens severity: '90' scope: -- blobs -- commits -- milestones -- wiki_blobs -- issues -- merge_requests + - blobs + - commits + - milestones + - wiki_blobs + - issues + - merge_requests + - notes + - snippet_titles test_cases: match_cases: - - AbCdEfGh1234567890AbAbAb+aAb1111111111+= + - AbCdEfGh1234567890AbAbAb+aAb1111111111+= fail_cases: - - AbCdEfGh1234567890AbAbAb+aAb1111111111+=AAA + - AbCdEfGh1234567890AbAbAb+aAb1111111111+=AAA strings: -- 'ASIA -(html|svg|png|jpeg|jpg)' -- 'AKIA -(html|svg|png|jpeg|jpg)' + - 'ASIA -(html|svg|png)' + - 'AKIA -(html|svg|png)' pattern: "(? list: + """Load signatures from YAML files + + Returns: + List containing loaded definitions as Signatures objects + """ + + loaded_definitions = [] + try: + for root, dirs, files in os.walk(SIGNATURES_PATH): + for sig_file in files: + sig_path = (Path(root) / sig_file).resolve() + if sig_path.name.endswith('.yaml'): + loaded_def = signature.load_from_yaml(sig_path) + if loaded_def.enabled: + loaded_definitions.append(loaded_def) + return loaded_definitions + except Exception as e: + raise e + + +def check_yaml(sig): + try: + yaml_sig = yaml.safe_load(sig) + except: + return False + return True + + +class TestSigs(unittest.TestCase): + def test_signatures_format(self): + """Check signatures are properly formed YAML ready to be ingested""" + + for root, dirs, files in os.walk(SIGNATURES_PATH): + for sig_file in files: + sig_path = (Path(root) / sig_file).resolve() + if sig_path.name.endswith('.yaml'): + with open(sig_path) as yaml_file: + self.assertTrue(check_yaml(yaml_file.read()), msg=f'Malformed YAML: {yaml_file.name}') + + def test_signature_matching_cases(self): + """Test that the match case strings match the regex. Skip if the match case is 'blank'""" + + sig_list = load_signatures() + for signature in sig_list: + for test_case in signature.test_cases.match_cases: + if not test_case == 'blank': + self.assertRegex(test_case, signature.pattern, msg='Regex does not detect given match case') + + def test_signature_failing_cases(self): + """Test that the fail case strings don't match the regex. Skip if the fail case is 'blank'""" + + sig_list = load_signatures() + for signature in sig_list: + if signature.test_cases.fail_cases: + for test_case in signature.test_cases.fail_cases: + if not test_case == 'blank': + self.assertNotRegex(test_case, signature.pattern, + msg='Regex does detect given failure case, it should ' + 'not') + + +if __name__ == '__main__': + unittest.main()