From e432e5456a68d02c12fd96068bff87dc14e2e759 Mon Sep 17 00:00:00 2001 From: Michelangelo Mori <328978+blkt@users.noreply.github.com> Date: Mon, 20 Jan 2025 21:29:41 +0100 Subject: [PATCH] Add utilities to dump Copilot traffic. This change adds a utility routine to dump raw data tapped from the wire to multiple files in a temporary directory under the path specified via `CODEGATE_DUMP_DIR`. No instrumentation is performed if `CODEGATE_DUMP_DIR` is `None`. The temporary directory is not deleted when the process exits. NOTE: the utility does not ensure the folder specified via `CODEGATE_DUMP_DIR` is writable, which might cause user-visible failures at startup. Co-authored-by: Radoslav Dimitrov --- src/codegate/providers/copilot/provider.py | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/codegate/providers/copilot/provider.py b/src/codegate/providers/copilot/provider.py index d1ade810..b774bdcd 100644 --- a/src/codegate/providers/copilot/provider.py +++ b/src/codegate/providers/copilot/provider.py @@ -1,6 +1,10 @@ import asyncio +import contextlib +import datetime +import os import re import ssl +import tempfile from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Union from urllib.parse import unquote, urljoin, urlparse @@ -26,6 +30,41 @@ setup_logging() logger = structlog.get_logger("codegate").bind(origin="copilot_proxy") + +TEMPDIR = None +if os.getenv("CODEGATE_DUMP_DIR"): + basedir = os.getenv("CODEGATE_DUMP_DIR") + TEMPDIR = tempfile.TemporaryDirectory(prefix="codegate-", dir=basedir, delete=False) + + +def _dump_data(suffix, func): + if os.getenv("CODEGATE_DUMP_DIR"): + buf = bytearray(b"") + + def inner(self, data: bytes): + nonlocal buf + func(self, data) + buf.extend(data) + + if data == b"0\r\n\r\n": + ts = datetime.datetime.now() + fname = os.path.join(TEMPDIR.name, ts.strftime(f"{suffix}-%Y%m%dT%H%M%S%f.txt")) + with open(fname, mode="wb") as fd: + fd.write(buf) + buf = bytearray() + + return inner + return func + + +def _dump_request(func): + return _dump_data("request", func) + + +def _dump_response(func): + return _dump_data("response", func) + + # Constants MAX_BUFFER_SIZE = 10 * 1024 * 1024 # 10MB CHUNK_SIZE = 64 * 1024 # 64KB @@ -911,6 +950,7 @@ def _process_chunk(self, chunk: bytes): self.stream_queue.put_nowait(record) + @_dump_response def _proxy_transport_write(self, data: bytes): # For debugging only # self.data_sent.append(data)