From ec11b039230d5ca36b525fbb561d3923794227b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 13 Jan 2025 13:02:42 +0100 Subject: [PATCH 1/3] Disable Referer by default for Zyte API requests --- docs/index.rst | 1 + docs/reference/settings.rst | 16 +++ docs/setup.rst | 1 + docs/usage/referer.rst | 53 ++++++++++ scrapy_zyte_api/__init__.py | 1 + scrapy_zyte_api/_middlewares.py | 35 +++++++ scrapy_zyte_api/addon.py | 4 + tests/__init__.py | 1 + tests/mockserver.py | 22 +++++ tests/test_addon.py | 2 + tests/test_referer.py | 168 ++++++++++++++++++++++++++++++++ 11 files changed, 304 insertions(+) create mode 100644 docs/usage/referer.rst create mode 100644 tests/test_referer.py diff --git a/docs/index.rst b/docs/index.rst index 0be78fa6..0383a6fc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,7 @@ either :ref:`globally ` or :ref:`per request `, or usage/stats usage/fingerprint usage/proxy + usage/referer .. toctree:: :caption: Reference diff --git a/docs/reference/settings.rst b/docs/reference/settings.rst index 563052f9..ccde6b31 100644 --- a/docs/reference/settings.rst +++ b/docs/reference/settings.rst @@ -286,6 +286,22 @@ For example: } +.. setting:: ZYTE_API_REFERRER_POLICY + +ZYTE_API_REFERRER_POLICY +======================== + +Default: ``"no-referrer"`` + +:setting:`REFERRER_POLICY` to apply to Zyte API requests when using +:ref:`transparent mode ` or :ref:`automatic request parameters +`. + +The :reqmeta:`referrer_policy` request metadata key takes precedence. + +See :ref:`referer`. + + .. setting:: ZYTE_API_RETRY_POLICY ZYTE_API_RETRY_POLICY diff --git a/docs/setup.rst b/docs/setup.rst index 1aea2bde..a6b85e24 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -108,6 +108,7 @@ scrapy-zyte-api integration as follows: } SPIDER_MIDDLEWARES = { "scrapy_zyte_api.ScrapyZyteAPISpiderMiddleware": 100, + "scrapy_zyte_api.ScrapyZyteAPIRefererSpiderMiddleware": 1000, } REQUEST_FINGERPRINTER_CLASS = "scrapy_zyte_api.ScrapyZyteAPIRequestFingerprinter" TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" diff --git a/docs/usage/referer.rst b/docs/usage/referer.rst new file mode 100644 index 00000000..4a0a7f13 --- /dev/null +++ b/docs/usage/referer.rst @@ -0,0 +1,53 @@ +.. _referer: + +================== +The Referer header +================== + +By default, Scrapy automatically sets a `Referer header`_ on every request +yielded from a callback (see the +:class:`~scrapy.spidermiddlewares.referer.RefererMiddleware`). + +However, when using :ref:`transparent mode ` or :ref:`automatic +request parameters `, this behavior is disabled by default for Zyte +API requests, and when using :ref:`manual request parameters `, all +request headers are always ignored for Zyte API requests. + +.. _Referer header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer + +Why is it disabled by default? +============================== + +A misuse of the ``Referer`` header can increase the risk of :ref:`bans `. + +By *not* setting the header, your Zyte API requests let Zyte API choose which +value to use, if any, to minimize bans. + +If you *do* set the header, while Zyte API might still ignore your value to +avoid bans, it may also keep your value regardless of its impact on bans. + +How to override? +================ + +To set the header anyway when using :ref:`transparent mode ` or +:ref:`automatic request parameters `, do any of the following: + +- Set :setting:`ZYTE_API_REFERRER_POLICY` to ``"scrapy-default"`` or to some + other value supported by the :setting:`REFERRER_POLICY` setting. + +- Set the :reqmeta:`referrer_policy` request metadata key on the specific Zyte + API requests where you want to allow the header, to ``"scrapy-default"`` or + to some other value supported by the :setting:`REFERRER_POLICY` setting. + +- Set the header through + :attr:`Request.headers `. + +- Set the header through the :http:`request:customHttpRequestHeaders` field + (for :ref:`HTTP requests `) or the :http:`request:requestHeaders` + field (for :ref:`browser requests `) in the + :reqmeta:`zyte_api_automap` request metadata key. + +When using :ref:`manual request parameters `, you always need to set +the header through the :http:`request:customHttpRequestHeaders` or +:http:`request:requestHeaders` field in the :reqmeta:`zyte_api` request +metadata key. diff --git a/scrapy_zyte_api/__init__.py b/scrapy_zyte_api/__init__.py index 3fb7584e..ff25d9a1 100644 --- a/scrapy_zyte_api/__init__.py +++ b/scrapy_zyte_api/__init__.py @@ -8,6 +8,7 @@ from ._annotations import ExtractFrom, actions, custom_attrs from ._middlewares import ( ScrapyZyteAPIDownloaderMiddleware, + ScrapyZyteAPIRefererSpiderMiddleware, ScrapyZyteAPISpiderMiddleware, ) from ._page_inputs import Actions, Geolocation, Screenshot diff --git a/scrapy_zyte_api/_middlewares.py b/scrapy_zyte_api/_middlewares.py index 080ce52b..70762028 100644 --- a/scrapy_zyte_api/_middlewares.py +++ b/scrapy_zyte_api/_middlewares.py @@ -201,3 +201,38 @@ async def process_spider_output_async(self, response, result, spider): async for item_or_request in result: self._process_output_item_or_request(item_or_request, spider) yield item_or_request + + +class ScrapyZyteAPIRefererSpiderMiddleware: + + @classmethod + def from_crawler(cls, crawler): + return cls(crawler) + + def __init__(self, crawler): + self._default_policy = crawler.settings.get( + "ZYTE_API_REFERRER_POLICY", "no-referrer" + ) + self._param_parser = _ParamParser(crawler, cookies_enabled=False) + + def process_spider_output(self, response, result, spider): + for item_or_request in result: + self._process_output_item_or_request(item_or_request, spider) + yield item_or_request + + async def process_spider_output_async(self, response, result, spider): + async for item_or_request in result: + self._process_output_item_or_request(item_or_request, spider) + yield item_or_request + + def _process_output_item_or_request(self, item_or_request, spider): + if not isinstance(item_or_request, Request): + return + self._process_output_request(item_or_request, spider) + + def _process_output_request(self, request, spider): + if self._is_zyte_api_request(request): + request.meta.setdefault("referrer_policy", self._default_policy) + + def _is_zyte_api_request(self, request): + return self._param_parser.parse(request) is not None diff --git a/scrapy_zyte_api/addon.py b/scrapy_zyte_api/addon.py index 1e6f8daa..fbfb9429 100644 --- a/scrapy_zyte_api/addon.py +++ b/scrapy_zyte_api/addon.py @@ -6,6 +6,7 @@ from scrapy_zyte_api import ( ScrapyZyteAPIDownloaderMiddleware, + ScrapyZyteAPIRefererSpiderMiddleware, ScrapyZyteAPISessionDownloaderMiddleware, ScrapyZyteAPISpiderMiddleware, ) @@ -101,6 +102,9 @@ def update_settings(self, settings: BaseSettings) -> None: 667, ) _setdefault(settings, "SPIDER_MIDDLEWARES", ScrapyZyteAPISpiderMiddleware, 100) + _setdefault( + settings, "SPIDER_MIDDLEWARES", ScrapyZyteAPIRefererSpiderMiddleware, 1000 + ) settings.set( "TWISTED_REACTOR", "twisted.internet.asyncioreactor.AsyncioSelectorReactor", diff --git a/tests/__init__.py b/tests/__init__.py index 12e4b861..b17e1590 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -29,6 +29,7 @@ "REQUEST_FINGERPRINTER_CLASS": "scrapy_zyte_api.ScrapyZyteAPIRequestFingerprinter", "SPIDER_MIDDLEWARES": { "scrapy_zyte_api.ScrapyZyteAPISpiderMiddleware": 100, + "scrapy_zyte_api.ScrapyZyteAPIRefererSpiderMiddleware": 1000, }, "ZYTE_API_KEY": _API_KEY, "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor", diff --git a/tests/mockserver.py b/tests/mockserver.py index 583323a7..256eef5c 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -72,6 +72,12 @@ def getChild(self, path, request): return RequestCountResource() return self + def render_GET(self, request): + referer = request.getHeader(b"Referer") + if referer: + request.responseHeaders.setRawHeaders(b"Referer", [referer]) + return b"" + def render_POST(self, request): DefaultResource.request_count += 1 request_data = json.loads(request.content.read()) @@ -184,6 +190,22 @@ def render_POST(self, request): response_data["httpResponseHeaders"] = [ {"name": "test_header", "value": "test_value"} ] + headers = request_data.get("customHttpRequestHeaders", []) + for header in headers: + if header["name"].strip().lower() == "referer": + referer = header["value"] + break + else: + headers = request_data.get("requestHeaders", {}) + if "referer" in headers: + referer = headers["referer"] + else: + referer = None + if referer is not None: + assert isinstance(response_data["httpResponseHeaders"], list) + response_data["httpResponseHeaders"].append( + {"name": "Referer", "value": referer} + ) actions = request_data.get("actions") if actions: diff --git a/tests/test_addon.py b/tests/test_addon.py index a69d7030..5aa8b634 100644 --- a/tests/test_addon.py +++ b/tests/test_addon.py @@ -8,6 +8,7 @@ from scrapy_zyte_api import ( ScrapyZyteAPIDownloaderMiddleware, + ScrapyZyteAPIRefererSpiderMiddleware, ScrapyZyteAPISessionDownloaderMiddleware, ScrapyZyteAPISpiderMiddleware, ) @@ -148,6 +149,7 @@ def _test_setting_changes(initial_settings, expected_settings): "REQUEST_FINGERPRINTER_CLASS": "scrapy_zyte_api.ScrapyZyteAPIRequestFingerprinter", "SPIDER_MIDDLEWARES": { ScrapyZyteAPISpiderMiddleware: 100, + ScrapyZyteAPIRefererSpiderMiddleware: 1000, }, "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor", "ZYTE_API_FALLBACK_HTTPS_HANDLER": "scrapy.core.downloader.handlers.http.HTTPDownloadHandler", diff --git a/tests/test_referer.py b/tests/test_referer.py new file mode 100644 index 00000000..67e8f078 --- /dev/null +++ b/tests/test_referer.py @@ -0,0 +1,168 @@ +import pytest +from pytest_twisted import ensureDeferred +from scrapy import Spider, signals +from scrapy.utils.test import get_crawler + +try: + import scrapy.addons # noqa: F401 +except ImportError: + ADDON_SUPPORT = False + from . import SETTINGS +else: + ADDON_SUPPORT = True + from . import SETTINGS_ADDON as SETTINGS + + +@pytest.mark.parametrize( + ("settings", "meta", "headers", "expected"), + ( + # Default behavior of non-Zyte-API, transparent/automap, and manual + # Zyte API requests. + ({}, {}, {}, True), + (SETTINGS, {"zyte_api_automap": False}, {}, True), + (SETTINGS, {}, {}, False if ADDON_SUPPORT else True), + ( + SETTINGS, + {"zyte_api": {"httpResponseBody": True, "httpResponseHeaders": True}}, + {}, + False, + ), + # Setting ZYTE_API_REFERRER_POLICY to "scrapy-default" changes that + # for transparent/automap. + ({"ZYTE_API_REFERRER_POLICY": "scrapy-default"}, {}, {}, True), + ( + {**SETTINGS, "ZYTE_API_REFERRER_POLICY": "scrapy-default"}, + {"zyte_api_automap": False}, + {}, + True, + ), + ({**SETTINGS, "ZYTE_API_REFERRER_POLICY": "scrapy-default"}, {}, {}, True), + ( + {**SETTINGS, "ZYTE_API_REFERRER_POLICY": "scrapy-default"}, + {"zyte_api": {"httpResponseBody": True, "httpResponseHeaders": True}}, + {}, + False, + ), + # Setting referrer_policy achieves the same. + ({}, {"referrer_policy": "scrapy-default"}, {}, True), + ( + SETTINGS, + {"referrer_policy": "scrapy-default", "zyte_api_automap": False}, + {}, + True, + ), + (SETTINGS, {"referrer_policy": "scrapy-default"}, {}, True), + ( + SETTINGS, + { + "referrer_policy": "scrapy-default", + "zyte_api": {"httpResponseBody": True, "httpResponseHeaders": True}, + }, + {}, + False, + ), + # Setting Request.headers["Referer"] works for non-Zyte API and for + # transparent/automap. + ({}, {}, {"Referer": "https://example.com"}, "https://example.com"), + ( + SETTINGS, + {"zyte_api_automap": False}, + {"Referer": "https://example.com"}, + "https://example.com", + ), + (SETTINGS, {}, {"Referer": "https://example.com"}, "https://example.com"), + ( + SETTINGS, + {"zyte_api": {"httpResponseBody": True, "httpResponseHeaders": True}}, + {"Referer": "https://example.com"}, + False, + ), + # Setting the header through a Zyte API parameter + # (customHttpRequestHeaders or requestHeaders) always works. + ( + SETTINGS, + { + "zyte_api_automap": { + "customHttpRequestHeaders": [ + {"name": "Referer", "value": "https://example.com"} + ] + } + }, + {}, + "https://example.com", + ), + ( + SETTINGS, + { + "zyte_api_automap": { + "requestHeaders": {"referer": "https://example.com"} + } + }, + {}, + "https://example.com", + ), + ( + SETTINGS, + { + "zyte_api": { + "httpResponseBody": True, + "httpResponseHeaders": True, + "customHttpRequestHeaders": [ + {"name": "Referer", "value": "https://example.com"} + ], + } + }, + {}, + "https://example.com", + ), + ( + SETTINGS, + { + "zyte_api": { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestHeaders": {"referer": "https://example.com"}, + } + }, + {}, + "https://example.com", + ), + ), +) +@ensureDeferred +async def test_main(settings, meta, headers, expected, mockserver): + items = [] + settings["ZYTE_API_URL"] = mockserver.urljoin("/") + start_url = mockserver.urljoin("/a") + follow_up_url = mockserver.urljoin("/b") + + class TestSpider(Spider): + name = "test" + start_urls = [start_url] + + def parse(self, response): + yield response.follow( + follow_up_url, headers=headers, meta=meta, callback=self.parse_referer + ) + + def parse_referer(self, response): + referer = response.headers.get(b"Referer", None) + if referer is not None: + referer = referer.decode() + yield {"Referer": referer} + + def track_items(item, response, spider): + items.append(item) + + crawler = get_crawler(settings_dict=settings, spidercls=TestSpider) + crawler.signals.connect(track_items, signal=signals.item_scraped) + await crawler.crawl() + + assert len(items) == 1 + item = items[0] + if isinstance(expected, str): + assert item["Referer"] == expected + elif expected: + assert item["Referer"] == start_url + else: + assert item["Referer"] is None From c1b3df5c07f01bcdc369d9da241d42eb966d6a3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 13 Jan 2025 15:42:49 +0100 Subject: [PATCH 2/3] Update test expectations, extend coverage and docs --- docs/usage/referer.rst | 17 +++-- tests/test_api_requests.py | 24 +------ tests/test_referer.py | 130 +++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 31 deletions(-) diff --git a/docs/usage/referer.rst b/docs/usage/referer.rst index 4a0a7f13..5adb0421 100644 --- a/docs/usage/referer.rst +++ b/docs/usage/referer.rst @@ -32,22 +32,21 @@ How to override? To set the header anyway when using :ref:`transparent mode ` or :ref:`automatic request parameters `, do any of the following: -- Set :setting:`ZYTE_API_REFERRER_POLICY` to ``"scrapy-default"`` or to some - other value supported by the :setting:`REFERRER_POLICY` setting. - -- Set the :reqmeta:`referrer_policy` request metadata key on the specific Zyte - API requests where you want to allow the header, to ``"scrapy-default"`` or +- Set the :setting:`ZYTE_API_REFERRER_POLICY` setting or the + :reqmeta:`referrer_policy` request metadata key to ``"scrapy-default"`` or to some other value supported by the :setting:`REFERRER_POLICY` setting. -- Set the header through - :attr:`Request.headers `. +- Set the header through the :setting:`DEFAULT_REQUEST_HEADERS` setting or + the :attr:`Request.headers ` attribute. - Set the header through the :http:`request:customHttpRequestHeaders` field (for :ref:`HTTP requests `) or the :http:`request:requestHeaders` - field (for :ref:`browser requests `) in the + field (for :ref:`browser requests `) through the + :setting:`ZYTE_API_AUTOMAP_PARAMS` setting or the :reqmeta:`zyte_api_automap` request metadata key. When using :ref:`manual request parameters `, you always need to set the header through the :http:`request:customHttpRequestHeaders` or -:http:`request:requestHeaders` field in the :reqmeta:`zyte_api` request +:http:`request:requestHeaders` field through the +:setting:`ZYTE_API_DEFAULT_PARAMS` setting or the :reqmeta:`zyte_api` request metadata key. diff --git a/tests/test_api_requests.py b/tests/test_api_requests.py index b9ca96fc..91a82915 100644 --- a/tests/test_api_requests.py +++ b/tests/test_api_requests.py @@ -3291,22 +3291,6 @@ async def test_middleware_headers_start_requests(): assert "customHttpRequestHeaders" not in api_params -@ensureDeferred -async def test_middleware_headers_cb_requests(): - """Callback requests will include the Referer parameter if the Referer - middleware is not disabled.""" - crawler = await get_crawler({"ZYTE_API_TRANSPARENT_MODE": True}) - request = Request(url="https://example.com") - await _process_request(crawler, request) - - handler = get_download_handler(crawler, "https") - param_parser = handler._param_parser - api_params = param_parser.parse(request) - assert api_params["customHttpRequestHeaders"] == [ - {"name": "Referer", "value": request.url}, - ] - - @ensureDeferred async def test_middleware_headers_cb_requests_disable(): """Callback requests will not include the Referer parameter if the Referer @@ -3370,7 +3354,6 @@ async def test_middleware_headers_default(): param_parser = handler._param_parser api_params = param_parser.parse(request) assert api_params["customHttpRequestHeaders"] == [ - {"name": "Referer", "value": request.url}, { "name": "Accept", "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", @@ -3482,7 +3465,6 @@ async def test_middleware_headers_request_headers(): "value": DEFAULT_ACCEPT_ENCODING, }, {"name": "User-Agent", "value": DEFAULT_USER_AGENT}, - {"name": "Referer", "value": request.url}, ] @@ -3581,9 +3563,7 @@ async def test_middleware_headers_custom_middleware_before(): handler = get_download_handler(crawler, "https") param_parser = handler._param_parser api_params = param_parser.parse(request) - assert api_params["customHttpRequestHeaders"] == [ - {"name": "Referer", "value": request.url}, - ] + assert "customHttpRequestHeaders" not in api_params class CustomValuesDownloaderMiddleware: @@ -3620,7 +3600,6 @@ async def test_middleware_headers_custom_middleware_before_custom(): param_parser = handler._param_parser api_params = param_parser.parse(request) assert api_params["customHttpRequestHeaders"] == [ - {"name": "Referer", "value": "https://referrer.example"}, { "name": "Accept", "value": "text/html", @@ -3631,6 +3610,7 @@ async def test_middleware_headers_custom_middleware_before_custom(): "name": "Accept-Encoding", "value": "br", }, + {"name": "Referer", "value": "https://referrer.example"}, ] diff --git a/tests/test_referer.py b/tests/test_referer.py index 67e8f078..c93adb8f 100644 --- a/tests/test_referer.py +++ b/tests/test_referer.py @@ -20,6 +20,7 @@ # Zyte API requests. ({}, {}, {}, True), (SETTINGS, {"zyte_api_automap": False}, {}, True), + (SETTINGS, {"zyte_api_automap": True}, {}, False), (SETTINGS, {}, {}, False if ADDON_SUPPORT else True), ( SETTINGS, @@ -77,6 +78,53 @@ {"Referer": "https://example.com"}, False, ), + # Setting DEFAULT_REQUEST_HEADERS["Referer] works as long as the + # middleware is not configured to set the Referer, since the + # middleware takes precedence. + ({"DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}}, {}, {}, True), + ( + { + "DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}, + "REFERER_ENABLED": False, + }, + {}, + {}, + "https://example.com", + ), + ( + {**SETTINGS, "DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}}, + {"zyte_api_automap": False}, + {}, + True, + ), + ( + { + **SETTINGS, + "DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}, + "REFERER_ENABLED": False, + }, + {"zyte_api_automap": False}, + {}, + "https://example.com", + ), + ( + {**SETTINGS, "DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}}, + {"zyte_api_automap": True}, + {}, + "https://example.com", + ), + ( + {**SETTINGS, "DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}}, + {}, + {}, + "https://example.com" if ADDON_SUPPORT else True, + ), + ( + {**SETTINGS, "DEFAULT_REQUEST_HEADERS": {"Referer": "https://example.com"}}, + {"zyte_api": {"httpResponseBody": True, "httpResponseHeaders": True}}, + {}, + False, + ), # Setting the header through a Zyte API parameter # (customHttpRequestHeaders or requestHeaders) always works. ( @@ -91,6 +139,32 @@ {}, "https://example.com", ), + ( + { + **SETTINGS, + "ZYTE_API_AUTOMAP_PARAMS": { + "customHttpRequestHeaders": [ + {"name": "Referer", "value": "https://example.com"}, + ], + }, + }, + {"zyte_api_automap": True}, + {}, + "https://example.com", + ), + ( + { + **SETTINGS, + "ZYTE_API_AUTOMAP_PARAMS": { + "customHttpRequestHeaders": [ + {"name": "Referer", "value": "https://example.com"}, + ], + }, + }, + {}, + {}, + "https://example.com" if ADDON_SUPPORT else True, + ), ( SETTINGS, { @@ -101,6 +175,28 @@ {}, "https://example.com", ), + ( + { + **SETTINGS, + "ZYTE_API_AUTOMAP_PARAMS": { + "requestHeaders": {"referer": "https://example.com"}, + }, + }, + {"zyte_api_automap": True}, + {}, + "https://example.com", + ), + ( + { + **SETTINGS, + "ZYTE_API_AUTOMAP_PARAMS": { + "requestHeaders": {"referer": "https://example.com"}, + }, + }, + {}, + {}, + "https://example.com" if ADDON_SUPPORT else True, + ), ( SETTINGS, { @@ -115,6 +211,24 @@ {}, "https://example.com", ), + ( + { + **SETTINGS, + "ZYTE_API_DEFAULT_PARAMS": { + "customHttpRequestHeaders": [ + {"name": "Referer", "value": "https://example.com"}, + ], + }, + }, + { + "zyte_api": { + "httpResponseBody": True, + "httpResponseHeaders": True, + } + }, + {}, + "https://example.com", + ), ( SETTINGS, { @@ -127,6 +241,22 @@ {}, "https://example.com", ), + ( + { + **SETTINGS, + "ZYTE_API_DEFAULT_PARAMS": { + "requestHeaders": {"referer": "https://example.com"}, + }, + }, + { + "zyte_api": { + "httpResponseBody": True, + "httpResponseHeaders": True, + } + }, + {}, + "https://example.com", + ), ), ) @ensureDeferred From be233fbf7d365e3fe41f34cd7dc341424629cc2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 13 Jan 2025 16:10:07 +0100 Subject: [PATCH 3/3] Do not change the settings constant in place --- tests/test_referer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_referer.py b/tests/test_referer.py index c93adb8f..f7ebb8a5 100644 --- a/tests/test_referer.py +++ b/tests/test_referer.py @@ -1,3 +1,5 @@ +from copy import copy + import pytest from pytest_twisted import ensureDeferred from scrapy import Spider, signals @@ -262,6 +264,7 @@ @ensureDeferred async def test_main(settings, meta, headers, expected, mockserver): items = [] + settings = copy(settings) settings["ZYTE_API_URL"] = mockserver.urljoin("/") start_url = mockserver.urljoin("/a") follow_up_url = mockserver.urljoin("/b")