diff --git a/HISTORY.rst b/HISTORY.rst index cbfc4d4..6d0bf0a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,22 @@ History ======= +1.2.0 (2026-06-26) +------------------ + +* Added ``spcs_pat`` to ``DataMasqueInstanceConfig`` for authenticating to + DataMasque instances hosted behind Snowflake SPCS (Snowpark Container Services) + app ingress. When set, the client sends the Programmatic Access Token on the + ``X-SF-SPCS-Authorization`` header to clear the Snowflake gateway, independently + of the instance's own DataMasque auth. +* Added ``SpcsGatewayAuthError``, raised when the SPCS gateway rejects the PAT + before the request reaches DataMasque (with the Snowflake detail and a hint at + the likely cause). +* Added ``spcs`` to ``SnowflakeStageLocation`` so connections staged inside + Snowflake SPCS deserialise correctly. Previously, listing connections on an + instance that held an SPCS-staged Snowflake connection raised a + ``ValidationError`` on the unknown stage value. + 1.1.1 (2026-06-25) ------------------ diff --git a/README.rst b/README.rst index ace7e0b..2e6353b 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,13 @@ Authentication is performed on the first request if ``authenticate()`` is not ca and is automatically retried once on a 401 response. ``client.healthcheck()`` is available as a lightweight readiness probe that does not consume credentials. +For a DataMasque instance hosted behind Snowflake SPCS (Snowpark Container Services) app ingress +(a ``*.snowflakecomputing.app`` ``base_url``), +pass a Snowflake Programmatic Access Token as ``spcs_pat`` on ``DataMasqueInstanceConfig``; +the client sends it on the ``X-SF-SPCS-Authorization`` header to clear the Snowflake gateway, +which strips it before forwarding so your DataMasque auth is unaffected. +See the `usage docs `_ for details. + Error handling ============== @@ -60,6 +67,9 @@ All methods raise subclasses of ``DataMasqueException`` on failure: raised by ``start_masking_run`` when the server rejects the run. - ``DataMasqueUserError`` — raised by user-management methods when the input is invalid. +- ``SpcsGatewayAuthError`` — + raised when a Snowflake SPCS app gateway rejects the configured ``spcs_pat`` + before the request reaches DataMasque. Documentation ============= diff --git a/datamasque/client/base.py b/datamasque/client/base.py index ffd726a..9aa3bea 100644 --- a/datamasque/client/base.py +++ b/datamasque/client/base.py @@ -21,6 +21,7 @@ DataMasqueTransportError, ) from datamasque.client.models.dm_instance import DataMasqueInstanceConfig +from datamasque.client.spcs import install_spcs_gateway_auth logger = logging.getLogger(__name__) @@ -137,6 +138,8 @@ def __init__(self, connection_config: DataMasqueInstanceConfig) -> None: self.verify_ssl = connection_config.verify_ssl self.token_source = connection_config.token_source self._session = _build_session(self.verify_ssl) + if connection_config.spcs_pat: + install_spcs_gateway_auth(self._session, connection_config.spcs_pat) @contextmanager def _maybe_suppress_insecure_warning(self) -> Iterator[None]: diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py index 39c90fa..a7ed612 100644 --- a/datamasque/client/exceptions.py +++ b/datamasque/client/exceptions.py @@ -84,6 +84,22 @@ class IfmAuthError(DataMasqueIfmError): """Raised when the IFM client cannot obtain or refresh a JWT (e.g. invalid credentials, missing scope).""" +class SpcsGatewayAuthError(DataMasqueException): + """ + Raised when a Snowflake SPCS app gateway rejects the configured ``spcs_pat``. + + Only relevant when the client is configured with ``spcs_pat`` for an + instance behind Snowflake SPCS app ingress. The message includes the + Snowflake-provided detail, request id, and a hint at the likely cause + (e.g. an expired PAT or a network policy that excludes your IP). + + Deliberately a direct subclass of `DataMasqueException` rather than + `DataMasqueApiError`: the client's 401 re-authenticate-and-retry path keys + off `DataMasqueApiError`/HTTP status, so keeping this outside that subtree + ensures a gateway rejection aborts immediately instead of looping. + """ + + class RunNotCancellableError(DataMasqueUserError): """ Raised when `cancel_run` is called against a run that is no longer eligible for cancellation. diff --git a/datamasque/client/models/connection.py b/datamasque/client/models/connection.py index 912d46f..720d14b 100644 --- a/datamasque/client/models/connection.py +++ b/datamasque/client/models/connection.py @@ -55,6 +55,7 @@ class SnowflakeStageLocation(str, Enum): local = "local" # Not supported for production use aws_s3 = "aws_s3" azure_blob_storage = "azure_blob_storage" + spcs = "spcs" # DataMasque running inside Snowflake SPCS; staged on the container's own storage class SseSelection(Enum): diff --git a/datamasque/client/models/dm_instance.py b/datamasque/client/models/dm_instance.py index 5026134..321adc8 100644 --- a/datamasque/client/models/dm_instance.py +++ b/datamasque/client/models/dm_instance.py @@ -20,6 +20,14 @@ class DataMasqueInstanceConfig(BaseModel): the client prepends it with `Token ` when sending the `Authorization` header. The client calls `token_source` on each authentication attempt, so the callable is free to fetch and refresh tokens out-of-band (e.g. from a secrets manager). + + `spcs_pat` is an optional Snowflake Programmatic Access Token for reaching a + DataMasque instance hosted behind Snowflake SPCS (Snowpark Container Services) + app ingress, where `base_url` ends in `.snowflakecomputing.app`. It is sent on + every request via the `X-SF-SPCS-Authorization` header to clear the Snowflake + gateway, which strips it before forwarding — so it is independent of, and + layers underneath, whichever DataMasque auth method (`password` or + `token_source`) you choose. """ model_config = ConfigDict(arbitrary_types_allowed=True) @@ -29,6 +37,16 @@ class DataMasqueInstanceConfig(BaseModel): password: Optional[str] = None verify_ssl: bool = True token_source: Optional[Callable[[], str]] = None + spcs_pat: Optional[str] = None + """Snowflake Programmatic Access Token for a DataMasque instance hosted behind + Snowflake SPCS app ingress (a ``*.snowflakecomputing.app`` ``base_url``). + + Mint the PAT in Snowsight (User profile → Programmatic access tokens) for an + account that can reach the SPCS app. The client sends it on the + ``X-SF-SPCS-Authorization`` header so the Snowflake gateway lets the request + through to DataMasque; the gateway strips the header before forwarding, leaving + DataMasque's own ``Authorization`` flow untouched. Leave it unset for + instances that are not behind an SPCS gateway.""" @model_validator(mode="after") def _validate_auth_source(self) -> "DataMasqueInstanceConfig": diff --git a/datamasque/client/spcs.py b/datamasque/client/spcs.py new file mode 100644 index 0000000..900566d --- /dev/null +++ b/datamasque/client/spcs.py @@ -0,0 +1,176 @@ +""" +Snowflake SPCS app gateway authentication for :class:`DataMasqueClient`. + +When a DataMasque instance is hosted behind Snowflake SPCS (Snowpark Container +Services) app ingress (``*.snowflakecomputing.app``), every request must first +clear the Snowflake gateway. We authenticate to the gateway with a Programmatic +Access Token (PAT) sent on ``X-SF-SPCS-Authorization: Snowflake Token=""``. +The gateway accepts the PAT on this alternate header and strips it before +forwarding to the container, so DataMasque's own ``Authorization: Token `` +flow rides through untouched. + +:func:`install_spcs_gateway_auth` attaches this behaviour to a client's +``requests.Session``: it sets the header on the session (so it is sent on every +request, including the unauthenticated login) and registers a response hook that +turns a gateway-originated rejection into a clear :class:`SpcsGatewayAuthError`. +""" + +import re +from typing import Any, Optional + +import requests + +from datamasque.client.exceptions import SpcsGatewayAuthError + +SPCS_GATEWAY_AUTH_HEADER = "X-SF-SPCS-Authorization" + +# Body-shape discriminators for SPCS gateway error responses. +# The gateway emits JSON with `responseType` (ERROR_), `requestId` +# (canonical UUID), and `detail` (free text). All three must be present and +# match these patterns for the body to count as gateway-originated. +_GATEWAY_RESPONSE_TYPE_RE = re.compile(r"^ERROR_[A-Z][A-Z0-9_]+$") +_UUID_RE = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", + re.IGNORECASE, +) + +# Header-shape discriminators for "this response transited a Snowflake SPCS +# gateway". The Server header and the `sfc-ss-` cookie name prefix both appear +# on every gateway-handled response (success and error alike) and aren't +# plausible to spoof by accident. +_SPCS_GATEWAY_SERVER_VALUE = "_" +_SPCS_COOKIE_PREFIX = "sfc-ss-" + + +def _has_spcs_gateway_header_signature(response: requests.Response) -> bool: + """ + True if at least one header-level Snowflake gateway marker is present. + + Looks for either ``Server: _`` (the gateway's literal Server header value) + or any ``Set-Cookie`` carrying the ``sfc-ss-`` cookie name prefix. Either is + sufficient — both indicate the response was emitted by, or transited, + Snowflake's SPCS ingress. + """ + if response.headers.get("server", "").strip() == _SPCS_GATEWAY_SERVER_VALUE: + return True + # `Set-Cookie` may appear multiple times; `requests` flattens duplicates + # via a comma-separated value in `.headers`, but our prefix substring + # check is order- and count-insensitive. + if _SPCS_COOKIE_PREFIX in response.headers.get("set-cookie", ""): + return True + return False + + +def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]: + """ + Return the parsed body iff it is a structurally-valid gateway error. + + All four conditions must hold: + 1. The body parses as JSON and is a dict. + 2. Keys ``responseType``, ``requestId``, ``detail`` are all present and string-typed. + 3. ``responseType`` matches ``^ERROR_$``. + 4. ``requestId`` is a canonical 8-4-4-4-12 UUID. + + Returns the parsed dict (truthy) on match, ``None`` on miss. + """ + try: + data = response.json() + except ValueError: + return None + if not isinstance(data, dict): + return None + response_type = data.get("responseType") + request_id = data.get("requestId") + detail = data.get("detail") + if not (isinstance(response_type, str) and isinstance(request_id, str) and isinstance(detail, str)): + return None + if not _GATEWAY_RESPONSE_TYPE_RE.match(response_type): + return None + if not _UUID_RE.match(request_id): + return None + return data + + +def _hint_for_gateway_detail(detail: str) -> str: + """Map common Snowflake gateway ``detail`` strings to a one-line cause hint.""" + d = (detail or "").lower() + if "network policy" in d: + return ( + "PAT requires a network policy attached to the user (or account) " + "that permits your current public IP. Run `CREATE NETWORK POLICY " + "... ALLOWED_IP_LIST = ('/32')` and `ALTER USER " + "SET NETWORK_POLICY = `." + ) + if "invalid" in d and "token" in d: + return ( + "PAT is malformed, expired, or revoked. Re-mint a PAT in Snowsight " + "(User profile -> Programmatic access tokens) and update `spcs_pat`." + ) + if "expired" in d: + return "PAT has expired. Mint a fresh one in Snowsight and update `spcs_pat`." + if "authentication" in d or "unauthorized" in d: + return ( + "Generic auth rejection. Verify the PAT was minted by a user that " + "has access to this SPCS app, and that any account-level network " + "policy includes your current public IP." + ) + return "Unknown gateway rejection — see the Snowflake `detail` string above and the Snowflake PAT docs." + + +def _check_spcs_gateway_response(response: requests.Response) -> None: + """ + Raise :class:`SpcsGatewayAuthError` iff ``response`` is a gateway-originated rejection. + + Two-layer discriminator — both must hold: + * **Body originated at the gateway**: strict shape match on the JSON body + (multiple fields, typed, with format constraints) via + :func:`_is_spcs_gateway_error_body`. + * **Response transited an SPCS gateway**: header signature confirms via + :func:`_has_spcs_gateway_header_signature`. + + Either layer alone could in principle false-positive on an unrelated + upstream that happened to emit one of those signals; the conjunction is what + makes the check robust. Legitimate DataMasque 401s (DRF ``{"detail": "..."}``) + have the gateway header signature but fail the body shape — so they correctly + flow through to the client's normal re-auth-and-retry path untouched. + """ + if response.status_code not in (401, 403): + return + if not _has_spcs_gateway_header_signature(response): + return + data = _is_spcs_gateway_error_body(response) + if data is None: + return + + response_type = data["responseType"] + request_id = data["requestId"] + detail = data["detail"] + hint = _hint_for_gateway_detail(detail) + raise SpcsGatewayAuthError( + f"SPCS gateway rejected the PAT (HTTP {response.status_code}, " + f"{response_type}). The request never reached DataMasque.\n" + f" Snowflake said: {detail!r}\n" + f" Snowflake reqId: {request_id}\n" + f" Likely cause: {hint}\n" + f" Fix in Snowsight on the account hosting this SPCS app, then retry." + ) + + +def _spcs_gateway_response_hook(response: requests.Response, *args: Any, **kwargs: Any) -> None: + """``requests`` response hook: raise on a gateway-originated auth rejection.""" + _check_spcs_gateway_response(response) + + +def install_spcs_gateway_auth(session: requests.Session, pat: str) -> None: + """ + Configure ``session`` to authenticate to a Snowflake SPCS app gateway. + + Sets the ``X-SF-SPCS-Authorization`` header on the session (so it rides on + every request, including the unauthenticated login) and registers a response + hook that raises :class:`SpcsGatewayAuthError` on a gateway rejection. + + Scoping is automatic: the client's session only ever talks to its own + ``base_url``, so there is no need to match per-request hosts. + """ + session.headers[SPCS_GATEWAY_AUTH_HEADER] = f'Snowflake Token="{pat}"' + session.hooks["response"].append(_spcs_gateway_response_hook) diff --git a/docs/client.rst b/docs/client.rst index e8c7764..87180d0 100644 --- a/docs/client.rst +++ b/docs/client.rst @@ -60,6 +60,14 @@ datamasque.client.files module :undoc-members: :show-inheritance: +datamasque.client.spcs module +----------------------------- + +.. automodule:: datamasque.client.spcs + :members: + :undoc-members: + :show-inheritance: + datamasque.client.ifm module ---------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 0d92cfe..e6d9438 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -19,3 +19,31 @@ To use DataMasque Python in a project: for connection in client.list_connections(): print(connection.name) + +Connecting to an SPCS-hosted instance +===================================== + +When DataMasque is hosted behind Snowflake SPCS (Snowpark Container Services) +app ingress, its ``base_url`` ends in ``.snowflakecomputing.app`` and every +request must first clear the Snowflake gateway. Pass a Snowflake Programmatic +Access Token (PAT) as ``spcs_pat`` and the client sends it on the +``X-SF-SPCS-Authorization`` header automatically; the gateway strips that header +before forwarding, so your DataMasque ``username``/``password`` (or +``token_source``) auth is unaffected. + +.. code-block:: python + + config = DataMasqueInstanceConfig( + base_url="https://my-app.snowflakecomputing.app", + username="api_user", + password="api_password", + spcs_pat="", + ) + client = DataMasqueClient(config) + client.authenticate() + +Mint the PAT in Snowsight (User profile → Programmatic access tokens) for an +account that can reach the SPCS app. If the gateway rejects the PAT (for example +it has expired, or a network policy excludes your IP), the client raises +``SpcsGatewayAuthError`` with the Snowflake-provided detail and a hint at the +likely cause. diff --git a/pyproject.toml b/pyproject.toml index eca07af..a94090e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datamasque-python" -version = "1.1.1" +version = "1.2.0" description = "Official Python client for the DataMasque data-masking API." authors = [ { name = "DataMasque Ltd" }, diff --git a/tests/test_connections.py b/tests/test_connections.py index 367aed5..8ea5207 100644 --- a/tests/test_connections.py +++ b/tests/test_connections.py @@ -951,6 +951,36 @@ def test_snowflake_connection_model_validate_with_stage_location(): assert conn.password is None +def test_snowflake_connection_model_validate_with_spcs_stage_location(): + """ + A Snowflake connection staged in SPCS must deserialise (regression for ui-testing MR !185). + + When DataMasque runs inside Snowflake SPCS it saves connections with + `snowflake_stage_location=spcs`. Listing connections deserialises every + one, so an unknown stage value used to raise `ValidationError` and break + `create_or_update_connection` for unrelated connections on a shared instance. + """ + payload = { + "id": "a1b2c3d4-0000-0000-0000-000000000000", + "name": "snowflake_spcs", + "mask_type": "database", + "db_type": "snowflake", + "user": "snowman", + "database": "icicle", + "snowflake_account_id": "ABCDEF-123456", + "snowflake_warehouse": "warehouse1", + "snowflake_storage_integration_name": "mysi", + "snowflake_stage_location": "spcs", + } + + conn = SnowflakeConnectionConfig.model_validate(payload) + + assert conn.snowflake_stage_location is SnowflakeStageLocation.spcs + # SPCS staging carries no external-storage fields. + assert conn.s3_bucket_name is None + assert conn.snowflake_azure_container_name is None + + def test_snowflake_connection_model_validate_without_stage_location(): payload = { "id": "id-3", diff --git a/tests/test_spcs.py b/tests/test_spcs.py new file mode 100644 index 0000000..b57b752 --- /dev/null +++ b/tests/test_spcs.py @@ -0,0 +1,157 @@ +"""Tests for Snowflake SPCS app gateway auth (`spcs_pat` on the client).""" + +from unittest.mock import patch + +import pytest +import requests_mock + +from datamasque.client import DataMasqueClient +from datamasque.client.exceptions import SpcsGatewayAuthError +from datamasque.client.models.dm_instance import DataMasqueInstanceConfig +from datamasque.client.spcs import ( + SPCS_GATEWAY_AUTH_HEADER, + _hint_for_gateway_detail, +) + +BASE_URL = "https://my-app.snowflakecomputing.app" +PAT = "PAT123" +EXPECTED_HEADER = 'Snowflake Token="PAT123"' +VALID_UUID = "12345678-1234-1234-1234-123456789abc" + +# Headers/body that together mark a response as a gateway-originated rejection. +GATEWAY_HEADERS = {"Server": "_"} + + +def _gateway_error_body(detail="Invalid token", response_type="ERROR_INVALID_TOKEN"): + return {"responseType": response_type, "requestId": VALID_UUID, "detail": detail} + + +@pytest.fixture +def spcs_config(): + return DataMasqueInstanceConfig( + base_url=BASE_URL, + username="api_user", + password="api_password", + spcs_pat=PAT, + ) + + +@pytest.fixture +def spcs_client(spcs_config): + client = DataMasqueClient(spcs_config) + client.token = "Token dm-token" # pretend we're already authenticated with DM + return client + + +def test_spcs_header_present_on_authenticated_request(spcs_client): + with requests_mock.Mocker() as m: + m.get(f"{BASE_URL}/api/anything/", json={}, status_code=200) + spcs_client.make_request("GET", "/api/anything/") + + assert m.last_request.headers[SPCS_GATEWAY_AUTH_HEADER] == EXPECTED_HEADER + # DM's own auth header rides alongside, untouched. + assert m.last_request.headers["Authorization"] == "Token dm-token" + + +def test_spcs_header_present_on_login_request(spcs_config): + """The header must ride on the unauthenticated login POST too (it must clear the gateway).""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + client.authenticate() + + assert m.last_request.headers[SPCS_GATEWAY_AUTH_HEADER] == EXPECTED_HEADER + # Login is unauthenticated — no DM Authorization header on this request. + assert "Authorization" not in m.last_request.headers + + +def test_no_spcs_pat_means_no_header_and_no_hook(client): + """The default client (no spcs_pat) is entirely unaffected.""" + assert SPCS_GATEWAY_AUTH_HEADER not in client._session.headers + assert client._session.hooks["response"] == [] + + client.token = "Token dm-token" + with requests_mock.Mocker() as m: + m.get("http://test-server/api/anything/", json={}, status_code=200) + client.make_request("GET", "/api/anything/") + assert SPCS_GATEWAY_AUTH_HEADER not in m.last_request.headers + + +def test_gateway_401_raises_and_does_not_retry(spcs_config): + """A gateway rejection aborts immediately — no re-auth, no retry loop.""" + with patch.object(DataMasqueClient, "authenticate") as mock_auth: + client = DataMasqueClient(spcs_config) + client.token = "Token dm-token" + with requests_mock.Mocker() as m: + # A single response: a second call would 404 and fail the test loudly. + m.get( + f"{BASE_URL}/api/anything/", + json=_gateway_error_body(), + status_code=401, + headers=GATEWAY_HEADERS, + ) + with pytest.raises(SpcsGatewayAuthError) as exc: + client.make_request("GET", "/api/anything/") + + assert m.call_count == 1 + mock_auth.assert_not_called() + # The helpful hint is surfaced. + assert "PAT is malformed, expired, or revoked" in str(exc.value) + assert VALID_UUID in str(exc.value) + + +def test_normal_dm_401_still_retries(spcs_config): + """A genuine DataMasque 401 (no gateway signature) flows to the normal re-auth retry.""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + m.get( + f"{BASE_URL}/api/anything/", + [ + # DRF-shaped 401, no gateway Server header → not a gateway rejection. + {"json": {"detail": "Authentication credentials were not provided."}, "status_code": 401}, + {"json": {"ok": True}, "status_code": 200}, + ], + ) + # Should NOT raise SpcsGatewayAuthError; should re-auth and succeed. + response = client.make_request("GET", "/api/anything/") + + assert response.status_code == 200 + assert client.token == "Token k" # re-auth happened + + +def test_gateway_signature_without_body_shape_passes_through(spcs_config): + """Gateway header present but DM-shaped body → treated as a normal DM 401, not a gateway rejection.""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + m.get( + f"{BASE_URL}/api/anything/", + [ + {"json": {"detail": "nope"}, "status_code": 401, "headers": GATEWAY_HEADERS}, + {"json": {"ok": True}, "status_code": 200}, + ], + ) + response = client.make_request("GET", "/api/anything/") # must not raise + + assert response.status_code == 200 + + +@pytest.mark.parametrize( + "detail, expected_substring", + [ + ("Request failed network policy check", "network policy"), + ("Invalid token supplied", "malformed, expired, or revoked"), + ("The token has expired", "PAT has expired"), + ("Unauthorized request", "Generic auth rejection"), + ("Something totally unexpected", "Unknown gateway rejection"), + ], +) +def test_hint_for_gateway_detail(detail, expected_substring): + assert expected_substring in _hint_for_gateway_detail(detail) + + +def test_spcs_pat_coexists_with_password_and_token_source(): + """`spcs_pat` is orthogonal to the password/token_source XOR — both combos validate.""" + DataMasqueInstanceConfig(base_url=BASE_URL, username="u", password="p", spcs_pat=PAT) + DataMasqueInstanceConfig(base_url=BASE_URL, username="u", token_source=lambda: "t", spcs_pat=PAT) diff --git a/uv.lock b/uv.lock index 777b461..c6ed95e 100644 --- a/uv.lock +++ b/uv.lock @@ -428,7 +428,7 @@ toml = [ [[package]] name = "datamasque-python" -version = "1.1.1" +version = "1.2.0" source = { editable = "." } dependencies = [ { name = "pydantic" },