From 937a89dd0469955093383dda9f1979bfc9614abf Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 22 Jun 2026 21:15:47 +0000 Subject: [PATCH] [SYNSD-2567] Fix httpx.ResponseNotRead crash in download chunk retry path The multi-threaded chunk download passes read_response_content=False to the retry layer because the chunk's HTTP body is an unconsumed httpx stream. That flag was forwarded to _log_for_retry but not to _is_retryable, so on a retryable status (e.g. 503/429 from S3) _is_retryable called _get_message() -> response.text on the unread stream, raising httpx.ResponseNotRead. Because that is a RuntimeError subclass, _get_message's except did not catch it and the exception escaped the retry loop, aborting the file and forcing a wasteful full-file restart via the outer retry. - Thread read_response_content into _is_retryable and guard both _get_message call sites with it (status-code retry decisions need no body) - Defensively widen _get_message's except to include httpx.StreamError (parent of httpx.ResponseNotRead) - Add sync + async regression tests using a genuinely unread httpx streaming response --- synapseclient/core/retry.py | 18 ++++- .../synapseclient/core/unit_test_retry.py | 76 ++++++++++++++++++- 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/synapseclient/core/retry.py b/synapseclient/core/retry.py index 8f7a3d545..f8e78e77c 100644 --- a/synapseclient/core/retry.py +++ b/synapseclient/core/retry.py @@ -370,6 +370,7 @@ async def foo(a, b, c): return [a, b, c] retry_exceptions=retry_exceptions, retry_errors=retry_errors, non_retryable_errors=non_retry_errors, + read_response_content=read_response_content, ) # Wait then retry @@ -504,6 +505,7 @@ async def foo(a, b, c): return [a, b, c] retry_exceptions=retry_exceptions, retry_errors=retry_errors, non_retryable_errors=non_retry_errors, + read_response_content=read_response_content, ) # Wait then retry @@ -551,6 +553,7 @@ def _is_retryable( retry_exceptions: List[Union[Exception, str]], retry_errors: List[str], non_retryable_errors: List[str], + read_response_content: bool = True, ) -> bool: """Determines if a request should be retried based on the response and caught exception. @@ -564,6 +567,11 @@ def _is_retryable( retry_exceptions: The exceptions that should be retried. retry_errors: The errors that should be retried. non_retryable_errors: The errors that should not be retried. + read_response_content: Whether the response body may be read. For streaming + responses (e.g. the multi-threaded download path) the body is never read, + so accessing it would raise ``httpx.ResponseNotRead``. When False, the + retry decision is made from the status code alone and the body is not + inspected. Returns: True if the request should be retried, False otherwise. @@ -572,7 +580,7 @@ def _is_retryable( # Check if we got a retry-able HTTP error if response is not None and hasattr(response, "status_code"): # First check for non-retryable error patterns even in retry status codes - if response.status_code in retry_status_codes: + if read_response_content and response.status_code in retry_status_codes: response_message = response_message or _get_message(response) # Check for non-retryable error patterns that should never be retried if response_message and any( @@ -585,7 +593,7 @@ def _is_retryable( ) or (response.status_code in retry_status_codes): return True - elif response.status_code not in range(200, 299): + elif read_response_content and response.status_code not in range(200, 299): # For all other non 200 messages look for retryable errors in the body or reason field response_message = response_message or _get_message(response) if ( @@ -659,6 +667,8 @@ def _get_message(response): else: # if the response is not JSON, return the text content return response.text - except (AttributeError, ValueError): - # The response can be truncated. In which case, the message cannot be retrieved. + except (AttributeError, ValueError, httpx.StreamError): + # The response can be truncated, or it may be an unread streaming response + # (httpx.ResponseNotRead is a subclass of httpx.StreamError). In either case + # the message cannot be retrieved. return None diff --git a/tests/unit/synapseclient/core/unit_test_retry.py b/tests/unit/synapseclient/core/unit_test_retry.py index 90ab6b7aa..1fc4f5cd9 100644 --- a/tests/unit/synapseclient/core/unit_test_retry.py +++ b/tests/unit/synapseclient/core/unit_test_retry.py @@ -1,10 +1,82 @@ +from http import HTTPStatus from unittest.mock import AsyncMock, MagicMock +import httpx import pytest from requests import Response -from synapseclient.core.exceptions import SynapseError -from synapseclient.core.retry import with_retry, with_retry_time_based_async +from synapseclient.core.exceptions import SynapseError, SynapseHTTPError +from synapseclient.core.retry import ( + with_retry, + with_retry_time_based, + with_retry_time_based_async, +) + + +def _unread_streaming_http_error() -> SynapseHTTPError: + """Build a SynapseHTTPError wrapping a genuinely-unread httpx streaming response. + + This mirrors the multi-threaded chunk download path: ``session.stream(...)`` + yields a response whose body has NOT been read, and ``_raise_for_status_httpx`` + attaches that unread response to the raised ``SynapseHTTPError``. Accessing + ``response.text`` on it raises ``httpx.ResponseNotRead``. + """ + request = httpx.Request("GET", "https://example.org/presigned-download") + response = httpx.Response( + status_code=503, + request=request, + stream=httpx.ByteStream(b"Slow Down"), + ) + # Sanity check: this is the exact failure the user reported. + with pytest.raises(httpx.ResponseNotRead): + _ = response.text + return SynapseHTTPError("503 Server Error", response=response) + + +def test_with_retry_time_based__unread_streaming_response_retryable_status() -> None: + """Regression test for the ``httpx.ResponseNotRead`` crash in the download path. + + When a chunk range request fails with a retryable status, the retry layer is + called with ``read_response_content=False`` because the streaming body was never + read. ``_is_retryable`` must decide to retry purely from the status code WITHOUT + touching the body. The bug caused it to call ``_get_message`` -> ``response.text``, + raising ``httpx.ResponseNotRead`` and escaping the retry loop entirely. + + Correct behavior: the request is retried, and once the (tiny) retry budget is + exhausted the original ``SynapseHTTPError`` is re-raised -- never + ``httpx.ResponseNotRead``. + """ + error = _unread_streaming_http_error() + function = MagicMock(side_effect=error) + + with pytest.raises(SynapseHTTPError): + with_retry_time_based( + function, + expected_status_codes=(HTTPStatus.PARTIAL_CONTENT,), + read_response_content=False, + retry_max_wait_before_failure=0.01, + ) + + # The status code alone should have driven the retry decision. + assert function.call_count > 1 + + +async def test_with_retry_time_based_async__unread_streaming_response_retryable_status() -> ( + None +): + """Async twin of the unread-streaming-response regression test.""" + error = _unread_streaming_http_error() + function = AsyncMock(side_effect=error) + + with pytest.raises(SynapseHTTPError): + await with_retry_time_based_async( + function, + expected_status_codes=(HTTPStatus.PARTIAL_CONTENT,), + read_response_content=False, + retry_max_wait_before_failure=0.01, + ) + + assert function.call_count > 1 def test_with_retry():