datamasque · jiatolentino · Jun 25, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 16, 2026
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,6 +2,15 @@
 History
 =======
 
+1.1.1 (2026-06-25)
+------------------
+
+* Made ``DiscoveryMatch.label`` optional (it is absent for non-sensitive/ignore matches).
+* Added the ``finished_with_warnings`` status to ``AsyncRulesetGenerationTaskStatus``.
+* ``get_db_discovery_result_report`` may now return ``bytes`` (a zip)
+  when the server splits a large DB-discovery report,
+  and ruleset generation from CSV now detects and forwards zip uploads.
+
 1.1.0 (2026-06-24)
 ------------------
 

diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
@@ -98,6 +98,10 @@ def start_async_ruleset_generation_from_csv(
         - A text file handle (e.g. `open(path)`)
         - A binary file handle (e.g. `open(path, 'rb')`)
 
+        If the content is a zip (for example a split report from `get_db_discovery_result_report()`),
+        it is detected by its magic bytes and uploaded as a zip;
+        otherwise it is uploaded as CSV.
+
         Generation runs asynchronously on the server.
         Poll `get_async_ruleset_generation_task_status` until it returns
         `AsyncRulesetGenerationTaskStatus.finished`,
@@ -114,14 +118,22 @@ def start_async_ruleset_generation_from_csv(
         else:
             content = csv_content
 
+        is_zip = False
+        if content.seekable():
+            is_zip = content.read(4) == b"PK\x03\x04"
+            content.seek(0)
+        filename = "ruleset.zip" if is_zip else "ruleset.csv"
+        content_type = "application/zip" if is_zip else "text/csv"
+
         files = [
             UploadFile(
                 field_name="csv_or_zip_file",
-                filename="ruleset.csv",
+                filename=filename,
                 content=content,
-                content_type="text/csv",
+                content_type=content_type,
             ),
         ]
+
         self.make_request(
             method="POST",
             path=f"/api/async-generate-ruleset/{connection_id}/from-csv/",

diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
@@ -252,7 +252,7 @@ class DiscoveryMatch(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
-    label: str
+    label: Optional[str] = None
     categories: list[str]
     flagged_by: str
     description: str
@@ -343,8 +343,8 @@ class FileDiscoveryMatch(BaseModel):
 
     flagged_by: str
     description: str
-    label: Optional[str] = None  # Omitted for non-sensitive and ignored matches.
-    categories: Optional[list[str]] = None  # Omitted for ignored matches.
+    label: Optional[str] = None  # Omitted for non-sensitive matches.
+    categories: Optional[list[str]] = None
     hit_ratio: Optional[int] = None  # None for metadata matches, percentage 0-100 for IDD matches.
 
 

diff --git a/datamasque/client/models/status.py b/datamasque/client/models/status.py
@@ -60,6 +60,7 @@ class AsyncRulesetGenerationTaskStatus(enum.Enum):
     """List of statuses of async ruleset generation tasks."""
 
     finished = "finished"
+    finished_with_warnings = "finished_with_warnings"
     failed = "failed"
     running = "running"
     queued = "queued"

diff --git a/datamasque/client/runs.py b/datamasque/client/runs.py
@@ -1,5 +1,6 @@
 import logging
 import re
+from typing import Union
 
 from datamasque.client.base import BaseClient
 from datamasque.client.exceptions import (
@@ -43,9 +44,12 @@ def get_run_report(self, run_id: RunId) -> str:
         response = self.make_request("GET", f"api/runs/{run_id}/run-report/")
         return response.text
 
-    def get_db_discovery_result_report(self, run_id: RunId, include_selection_column: bool = True) -> str:
+    def get_db_discovery_result_report(self, run_id: RunId, include_selection_column: bool = True) -> Union[str, bytes]:
         """
-        Returns the database-discovery result report for the specified run as CSV.
+        Returns the database-discovery result report for the specified run.
+
+        Returns CSV text (`str`),
+        or a zip of numbered CSV parts as `bytes` when the server splits a large report.
 
         When `include_selection_column` is true (the default),
         the CSV includes a `selected` column suitable for feeding back into ruleset generation.
@@ -54,6 +58,9 @@ def get_db_discovery_result_report(self, run_id: RunId, include_selection_column
         url = f"api/runs/{run_id}/db-discovery-results/report/"
         params = None if include_selection_column else {"include_selection_column": "false"}
         response = self.make_request("GET", url, params=params)
+
+        if response.headers.get("Content-Type", "").startswith("application/zip"):
+            return response.content
         return response.text
 
     def get_unfinished_runs(self) -> dict[str, UnfinishedRun]:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "datamasque-python"
-version = "1.1.0"
+version = "1.1.1"
 description = "Official Python client for the DataMasque data-masking API."
 authors = [
     { name = "DataMasque Ltd" },

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.1.0
+current_version = 1.1.1
 commit = True
 tag = True
 

diff --git a/tests/test_discovery.py b/tests/test_discovery.py
@@ -108,6 +108,17 @@ def test_get_db_discovery_result_report(client):
         assert result == "db discovery report without selection column"
 
 
+def test_get_db_discovery_result_report_returns_zip_bytes_when_split(client):
+    run_id = RunId(1)
+    zip_bytes = b"PK\x03\x04 split report zip bytes"
+    with requests_mock.Mocker() as m:
+        url = f"http://test-server/api/runs/{run_id}/db-discovery-results/report/"
+        m.get(url, content=zip_bytes, headers={"Content-Type": "application/zip"}, status_code=200)
+        result = client.get_db_discovery_result_report(run_id)
+        assert result == zip_bytes
+        assert isinstance(result, bytes)
+
+
 def test_poll_async_ruleset_generation(client):
     connection_id = ConnectionId("1")
     with requests_mock.Mocker() as m:
@@ -463,6 +474,31 @@ def test_start_async_ruleset_generation_from_csv_success(client, csv_content):
         assert form_data["csv_or_zip_file"]["content"] == b"schema,table,column,selected\npublic,users,email,true"
 
 
+@pytest.mark.parametrize(
+    "zip_content",
+    [
+        b"PK\x03\x04 zipped discovery report",
+        BytesIO(b"PK\x03\x04 zipped discovery report"),
+    ],
+    ids=["bytes", "BytesIO"],
+)
+def test_start_async_ruleset_generation_from_csv_uploads_zip_as_zip(client, zip_content):
+    """A split report is uploaded with a .zip filename and zip content-type, whether passed as bytes or a binary stream."""
+    connection_id = ConnectionId("1")
+
+    with requests_mock.Mocker() as m:
+        m.post(
+            f"http://test-server/api/async-generate-ruleset/{connection_id}/from-csv/",
+            status_code=201,
+        )
+        client.start_async_ruleset_generation_from_csv(connection_id, zip_content)
+
+        form_data = parse_multipart_form(m.last_request)
+        assert form_data["csv_or_zip_file"]["filename"] == "ruleset.zip"
+        assert form_data["csv_or_zip_file"]["content_type"] == "application/zip"
+        assert form_data["csv_or_zip_file"]["content"] == b"PK\x03\x04 zipped discovery report"
+
+
 def test_start_async_ruleset_generation_from_csv_with_target_size(client):
     """Test async ruleset generation from CSV with target_size_bytes parameter."""
     connection_id = ConnectionId("1")

diff --git a/uv.lock b/uv.lock