From 093793c41313bfe3b6a35085dca9ee65f2361ae2 Mon Sep 17 00:00:00 2001 From: "Shreyas Waikar (Persistent Systems Inc)" Date: Mon, 1 Jun 2026 16:28:48 +0530 Subject: [PATCH 01/13] chore(deps): bump authlib from 1.6.11 to 1.6.12 in ContentProcessorWorkflow Applies the changes from Dependabot PR microsoft/content-processing-solution-accelerator#589 onto dev so they reach the dev branch ahead of the upstream PR (which targets main). Refs: ADO #44960 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ContentProcessorWorkflow/pyproject.toml | 2 +- src/ContentProcessorWorkflow/uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ContentProcessorWorkflow/pyproject.toml b/src/ContentProcessorWorkflow/pyproject.toml index 804ed5f4..51cea3bb 100644 --- a/src/ContentProcessorWorkflow/pyproject.toml +++ b/src/ContentProcessorWorkflow/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "sas-cosmosdb==0.1.4", "sas-storage==1.0.0", "tenacity==9.1.2", - "authlib==1.6.11", + "authlib==1.6.12", "protobuf==6.33.6", "cryptography==46.0.7", "pyjwt==2.12.1", diff --git a/src/ContentProcessorWorkflow/uv.lock b/src/ContentProcessorWorkflow/uv.lock index 9fd628a0..ce423c6a 100644 --- a/src/ContentProcessorWorkflow/uv.lock +++ b/src/ContentProcessorWorkflow/uv.lock @@ -554,14 +554,14 @@ wheels = [ [[package]] name = "authlib" -version = "1.6.11" +version = "1.6.12" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/28/10/b325d58ffe86815b399334a101e63bc6fa4e1953921cb23703b48a0a0220/authlib-1.6.11.tar.gz", hash = "sha256:64db35b9b01aeccb4715a6c9a6613a06f2bd7be2ab9d2eb89edd1dfc7580a38f", size = 165359, upload-time = "2026-04-16T07:22:50.279Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/30/6691fdc63b35f54a5a65e04fa1e59d827f4d4e8f4a39678ba7d3088ce0c8/authlib-1.6.12.tar.gz", hash = "sha256:0656d8482f28fc8221929d5f35b2bde5d13e10555ebc06b4561b0d622e83b1bd", size = 165368, upload-time = "2026-05-04T08:11:31.826Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/57/2f/55fca558f925a51db046e5b929deb317ddb05afed74b22d89f4eca578980/authlib-1.6.11-py2.py3-none-any.whl", hash = "sha256:c8687a9a26451c51a34a06fa17bb97cb15bba46a6a626755e2d7f50da8bff3e3", size = 244469, upload-time = "2026-04-16T07:22:48.413Z" }, + { url = "https://files.pythonhosted.org/packages/cd/51/9b0b5cd4cf683a02db937a6f9bbebcdc9c56558a7bb3763ce7d3512103c3/authlib-1.6.12-py2.py3-none-any.whl", hash = "sha256:e9229ad7fde610b139dd12f5edbe97eab9ee78bfb85691247e767727850b99ab", size = 244473, upload-time = "2026-05-04T08:11:30.354Z" }, ] [[package]] @@ -2994,7 +2994,7 @@ requires-dist = [ { name = "agent-framework", specifier = "==1.0.0b260107" }, { name = "aiohttp", specifier = "==3.13.5" }, { name = "art", specifier = "==6.5" }, - { name = "authlib", specifier = "==1.6.11" }, + { name = "authlib", specifier = "==1.6.12" }, { name = "azure-ai-agents", specifier = "==1.2.0b5" }, { name = "azure-ai-inference", specifier = "==1.0.0b9" }, { name = "azure-ai-projects", specifier = "==2.0.0b3" }, From 68f54be535f9771b5bd7bcad3e3e6acc843af221 Mon Sep 17 00:00:00 2001 From: "Shreyas Waikar (Persistent Systems Inc)" Date: Mon, 1 Jun 2026 16:28:49 +0530 Subject: [PATCH 02/13] build(deps): bump idna from 3.11 to 3.15 in ContentProcessorAPI Applies the changes from Dependabot PR microsoft/content-processing-solution-accelerator#595 onto dev so they reach the dev branch ahead of the upstream PR (which targets main). Refs: ADO #44960 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ContentProcessorAPI/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ContentProcessorAPI/requirements.txt b/src/ContentProcessorAPI/requirements.txt index aa8b8f50..15754e7f 100644 --- a/src/ContentProcessorAPI/requirements.txt +++ b/src/ContentProcessorAPI/requirements.txt @@ -22,7 +22,7 @@ h11==0.16.0 httpcore==1.0.9 httptools==0.7.1 httpx==0.28.1 -idna==3.11 +idna==3.15 isodate==0.7.2 jinja2==3.1.6 jsonschema==4.25.1 From 03bcf446228729e7aef7a18fbe87359b29f06dd5 Mon Sep 17 00:00:00 2001 From: "Shreyas Waikar (Persistent Systems Inc)" Date: Mon, 1 Jun 2026 16:28:50 +0530 Subject: [PATCH 03/13] build(deps): bump idna from 3.11 to 3.15 in ContentProcessor Applies the changes from Dependabot PR microsoft/content-processing-solution-accelerator#596 onto dev so they reach the dev branch ahead of the upstream PR (which targets main). Refs: ADO #44960 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ContentProcessor/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ContentProcessor/requirements.txt b/src/ContentProcessor/requirements.txt index 123b1cee..aeff7976 100644 --- a/src/ContentProcessor/requirements.txt +++ b/src/ContentProcessor/requirements.txt @@ -15,7 +15,7 @@ colorama==0.4.6 coverage==7.13.5 cryptography==46.0.7 dnspython==2.8.0 -idna==3.11 +idna==3.15 iniconfig==2.3.0 isodate==0.7.2 mongomock==4.3.0 From 77de40526fd51b20dd60039127550cc63fe24830 Mon Sep 17 00:00:00 2001 From: "Shreyas Waikar (Persistent Systems Inc)" Date: Mon, 1 Jun 2026 16:28:51 +0530 Subject: [PATCH 04/13] chore(deps): bump idna from 3.11 to 3.15 in ContentProcessorWorkflow uv.lock Applies the changes from Dependabot PR microsoft/content-processing-solution-accelerator#597 onto dev so they reach the dev branch ahead of the upstream PR (which targets main). Refs: ADO #44960 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ContentProcessorWorkflow/uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ContentProcessorWorkflow/uv.lock b/src/ContentProcessorWorkflow/uv.lock index ce423c6a..04ec2a5e 100644 --- a/src/ContentProcessorWorkflow/uv.lock +++ b/src/ContentProcessorWorkflow/uv.lock @@ -1747,11 +1747,11 @@ wheels = [ [[package]] name = "idna" -version = "3.11" +version = "3.15" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, ] [[package]] From ec8d7a03b6b6e279e6bddd7f96755a44cd1d3bab Mon Sep 17 00:00:00 2001 From: chaudhariniraj Date: Fri, 12 Jun 2026 16:24:39 +0530 Subject: [PATCH 05/13] Resolve CodeQL issues --- .../src/libs/utils/azure_credential_utils.py | 6 +++++- src/ContentProcessor/src/libs/utils/credential_util.py | 6 +++++- src/ContentProcessorAPI/app/libs/base/application_base.py | 4 ++-- .../src/libs/azure/app_configuration.py | 8 +++++++- .../src/libs/base/application_base.py | 3 ++- src/ContentProcessorWorkflow/src/utils/credential_util.py | 7 ++++++- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/ContentProcessor/src/libs/utils/azure_credential_utils.py b/src/ContentProcessor/src/libs/utils/azure_credential_utils.py index 07a4f2b0..5d711e85 100644 --- a/src/ContentProcessor/src/libs/utils/azure_credential_utils.py +++ b/src/ContentProcessor/src/libs/utils/azure_credential_utils.py @@ -130,7 +130,11 @@ def get_azure_credential(): logging.info( "[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential" ) - return DefaultAzureCredential() + raise RuntimeError( + "No Azure authentication available. " + "Use Managed Identity in Azure or run " + "'az login' / 'azd auth login' locally." + ) def get_async_azure_credential(): diff --git a/src/ContentProcessor/src/libs/utils/credential_util.py b/src/ContentProcessor/src/libs/utils/credential_util.py index 52fbdeef..1efcaab7 100644 --- a/src/ContentProcessor/src/libs/utils/credential_util.py +++ b/src/ContentProcessor/src/libs/utils/credential_util.py @@ -130,7 +130,11 @@ def get_azure_credential(): logging.info( "[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential" ) - return DefaultAzureCredential() + raise RuntimeError( + "No Azure authentication available. " + "Use Managed Identity in Azure or run " + "'az login' / 'azd auth login' locally." + ) def get_async_azure_credential(): diff --git a/src/ContentProcessorAPI/app/libs/base/application_base.py b/src/ContentProcessorAPI/app/libs/base/application_base.py index 7ea33d8e..e3fb6e1c 100644 --- a/src/ContentProcessorAPI/app/libs/base/application_base.py +++ b/src/ContentProcessorAPI/app/libs/base/application_base.py @@ -15,7 +15,7 @@ import os from abc import ABC, abstractmethod -from azure.identity import DefaultAzureCredential +from app.utils.azure_credential_utils import get_azure_credential from dotenv import load_dotenv from app.libs.application.application_configuration import ( @@ -72,7 +72,7 @@ def __init__(self, env_file_path: str | None = None, **data): self._load_env(env_file_path=env_file_path) self.application_context = AppContext() - self.application_context.set_credential(DefaultAzureCredential()) + self.application_context.set_credential(get_azure_credential()) app_config_endpoint: str | None = EnvConfiguration().app_config_endpoint if app_config_endpoint != "" and app_config_endpoint is not None: diff --git a/src/ContentProcessorWorkflow/src/libs/azure/app_configuration.py b/src/ContentProcessorWorkflow/src/libs/azure/app_configuration.py index ee2501cd..f333133e 100644 --- a/src/ContentProcessorWorkflow/src/libs/azure/app_configuration.py +++ b/src/ContentProcessorWorkflow/src/libs/azure/app_configuration.py @@ -91,7 +91,13 @@ def __init__( ValueError: If *app_configuration_url* is ``None`` or the credential is missing after defaulting. """ - self.credential = credential or DefaultAzureCredential() + if credential is None: + raise ValueError( + "Azure credential is required. " + "Use Managed Identity, AzureCliCredential, or AzureDeveloperCliCredential." + ) + + self.credential = credential self.app_config_endpoint = app_configuration_url self._initialize_client() diff --git a/src/ContentProcessorWorkflow/src/libs/base/application_base.py b/src/ContentProcessorWorkflow/src/libs/base/application_base.py index fbcbaa23..d3a63579 100644 --- a/src/ContentProcessorWorkflow/src/libs/base/application_base.py +++ b/src/ContentProcessorWorkflow/src/libs/base/application_base.py @@ -36,6 +36,7 @@ def run(self): from abc import ABC, abstractmethod from azure.identity import DefaultAzureCredential +from src.utils.credential_util import get_azure_credential from dotenv import load_dotenv from libs.agent_framework.agent_framework_settings import AgentFrameworkSettings @@ -117,7 +118,7 @@ def __init__(self, env_file_path: str | None = None, **data): self._load_env(env_file_path=env_file_path) self.application_context = AppContext() - self.application_context.set_credential(DefaultAzureCredential()) + self.application_context.set_credential(get_azure_credential()) app_config_url: str | None = _envConfiguration().app_config_endpoint if app_config_url != "" and app_config_url is not None: diff --git a/src/ContentProcessorWorkflow/src/utils/credential_util.py b/src/ContentProcessorWorkflow/src/utils/credential_util.py index b37de6d9..fbef0657 100644 --- a/src/ContentProcessorWorkflow/src/utils/credential_util.py +++ b/src/ContentProcessorWorkflow/src/utils/credential_util.py @@ -126,7 +126,12 @@ def get_azure_credential(): logging.info( "[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential" ) - return DefaultAzureCredential() + + raise RuntimeError( + "No Azure authentication available. " + "Use Managed Identity in Azure or run " + "'az login' / 'azd auth login' locally." + ) def get_async_azure_credential(): From 8fc57b86eb95c492abeed51054bf553b23e9521f Mon Sep 17 00:00:00 2001 From: chaudhariniraj Date: Fri, 12 Jun 2026 17:55:45 +0530 Subject: [PATCH 06/13] Resolve test cases error --- .../utils/test_azure_credential_utils.py | 16 +++++++++------- .../test_azure_credential_utils_extended.py | 14 +++++--------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/tests/ContentProcessor/utils/test_azure_credential_utils.py b/src/tests/ContentProcessor/utils/test_azure_credential_utils.py index 216b302e..7f0f88a0 100644 --- a/src/tests/ContentProcessor/utils/test_azure_credential_utils.py +++ b/src/tests/ContentProcessor/utils/test_azure_credential_utils.py @@ -7,6 +7,8 @@ from unittest.mock import MagicMock, patch +import pytest + import libs.utils.azure_credential_utils as azure_credential_utils MODULE = "libs.utils.azure_credential_utils" @@ -45,16 +47,16 @@ def test_returns_user_assigned_with_client_id(self, mock_managed): mock_managed.assert_called_once_with(client_id="test-client-id") assert credential == mock_instance - @patch(f"{MODULE}.DefaultAzureCredential") @patch(f"{MODULE}.AzureDeveloperCliCredential", side_effect=Exception("no azd")) @patch(f"{MODULE}.AzureCliCredential", side_effect=Exception("no az")) @patch.dict("os.environ", {}, clear=True) - def test_falls_back_to_default(self, mock_cli, mock_dev_cli, mock_default): - mock_instance = MagicMock() - mock_default.return_value = mock_instance - credential = azure_credential_utils.get_azure_credential() - mock_default.assert_called_once() - assert credential == mock_instance + def test_raises_when_no_credentials_available( + self, mock_cli, mock_dev_cli + ): + with pytest.raises(RuntimeError) as exc: + azure_credential_utils.get_azure_credential() + + assert "No Azure authentication available" in str(exc.value) # ── TestGetAsyncAzureCredential ───────────────────────────────────────── diff --git a/src/tests/ContentProcessor/utils/test_azure_credential_utils_extended.py b/src/tests/ContentProcessor/utils/test_azure_credential_utils_extended.py index 11858fdc..edd735d8 100644 --- a/src/tests/ContentProcessor/utils/test_azure_credential_utils_extended.py +++ b/src/tests/ContentProcessor/utils/test_azure_credential_utils_extended.py @@ -42,26 +42,22 @@ def test_get_azure_credential_with_website_site_name(self, monkeypatch): assert credential == mock_instance def test_get_azure_credential_cli_failure_fallback(self, monkeypatch): - """Test fallback to DefaultAzureCredential when CLI credentials fail""" + """Test RuntimeError when all credential options fail""" # Clear all Azure environment indicators for key in ["WEBSITE_SITE_NAME", "AZURE_CLIENT_ID", "MSI_ENDPOINT", "IDENTITY_ENDPOINT", "KUBERNETES_SERVICE_HOST", "CONTAINER_REGISTRY_LOGIN"]: monkeypatch.delenv(key, raising=False) with patch('libs.utils.azure_credential_utils.AzureCliCredential') as mock_cli_cred, \ - patch('libs.utils.azure_credential_utils.AzureDeveloperCliCredential') as mock_azd_cred, \ - patch('libs.utils.azure_credential_utils.DefaultAzureCredential') as mock_default: + patch('libs.utils.azure_credential_utils.AzureDeveloperCliCredential') as mock_azd_cred: - # Make both CLI credentials raise exceptions mock_cli_cred.side_effect = Exception("CLI credential failed") mock_azd_cred.side_effect = Exception("AZD credential failed") - mock_default_instance = Mock() - mock_default.return_value = mock_default_instance - credential = get_azure_credential() + with pytest.raises(RuntimeError) as exc: + get_azure_credential() - assert credential == mock_default_instance - mock_default.assert_called_once() + assert "No Azure authentication available" in str(exc.value) def test_get_azure_credential_azd_success(self, monkeypatch): """Test successful Azure Developer CLI credential""" From e11892e8f6aaef61d6ba5faa17d7962f73e481f5 Mon Sep 17 00:00:00 2001 From: chaudhariniraj Date: Fri, 12 Jun 2026 18:28:45 +0530 Subject: [PATCH 07/13] Resolve test cases error 1 --- src/ContentProcessorWorkflow/src/libs/base/application_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ContentProcessorWorkflow/src/libs/base/application_base.py b/src/ContentProcessorWorkflow/src/libs/base/application_base.py index d3a63579..0379a702 100644 --- a/src/ContentProcessorWorkflow/src/libs/base/application_base.py +++ b/src/ContentProcessorWorkflow/src/libs/base/application_base.py @@ -36,6 +36,7 @@ def run(self): from abc import ABC, abstractmethod from azure.identity import DefaultAzureCredential +from utils.credential_util import get_azure_credential from src.utils.credential_util import get_azure_credential from dotenv import load_dotenv From cab11be1fe2d05e5c4db9b0c2f294486313bd6d6 Mon Sep 17 00:00:00 2001 From: chaudhariniraj Date: Fri, 12 Jun 2026 18:44:21 +0530 Subject: [PATCH 08/13] Resolve test cases error 2 --- src/ContentProcessorWorkflow/src/libs/base/application_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ContentProcessorWorkflow/src/libs/base/application_base.py b/src/ContentProcessorWorkflow/src/libs/base/application_base.py index 0379a702..cb657398 100644 --- a/src/ContentProcessorWorkflow/src/libs/base/application_base.py +++ b/src/ContentProcessorWorkflow/src/libs/base/application_base.py @@ -37,7 +37,6 @@ def run(self): from azure.identity import DefaultAzureCredential from utils.credential_util import get_azure_credential -from src.utils.credential_util import get_azure_credential from dotenv import load_dotenv from libs.agent_framework.agent_framework_settings import AgentFrameworkSettings From cf8dfc7284ee02bd97d4a5ef7c15085b4a30c627 Mon Sep 17 00:00:00 2001 From: chaudhariniraj Date: Fri, 12 Jun 2026 18:54:00 +0530 Subject: [PATCH 09/13] Resolve test cases error 3 --- .../src/libs/base/application_base.py | 1 - .../utils/test_credential_util_extended.py | 24 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/ContentProcessorWorkflow/src/libs/base/application_base.py b/src/ContentProcessorWorkflow/src/libs/base/application_base.py index cb657398..61b6a603 100644 --- a/src/ContentProcessorWorkflow/src/libs/base/application_base.py +++ b/src/ContentProcessorWorkflow/src/libs/base/application_base.py @@ -35,7 +35,6 @@ def run(self): import os from abc import ABC, abstractmethod -from azure.identity import DefaultAzureCredential from utils.credential_util import get_azure_credential from dotenv import load_dotenv diff --git a/src/tests/ContentProcessorWorkflow/utils/test_credential_util_extended.py b/src/tests/ContentProcessorWorkflow/utils/test_credential_util_extended.py index d4fda81d..40cfaf68 100644 --- a/src/tests/ContentProcessorWorkflow/utils/test_credential_util_extended.py +++ b/src/tests/ContentProcessorWorkflow/utils/test_credential_util_extended.py @@ -1,5 +1,6 @@ """Extended tests for credential_util.py to improve coverage""" from unittest.mock import Mock, patch +import pytest from utils.credential_util import ( get_azure_credential, get_async_azure_credential, @@ -40,24 +41,27 @@ def test_get_azure_credential_app_service_environment(self, monkeypatch): assert credential == mock_instance def test_get_azure_credential_all_cli_fail(self, monkeypatch): - """Test fallback when all CLI credentials fail""" - for key in ["WEBSITE_SITE_NAME", "AZURE_CLIENT_ID", "MSI_ENDPOINT", - "IDENTITY_ENDPOINT", "KUBERNETES_SERVICE_HOST", "CONTAINER_REGISTRY_LOGIN"]: + """Test RuntimeError when all credential options fail""" + for key in [ + "WEBSITE_SITE_NAME", + "AZURE_CLIENT_ID", + "MSI_ENDPOINT", + "IDENTITY_ENDPOINT", + "KUBERNETES_SERVICE_HOST", + "CONTAINER_REGISTRY_LOGIN", + ]: monkeypatch.delenv(key, raising=False) with patch('utils.credential_util.AzureCliCredential') as mock_cli, \ - patch('utils.credential_util.AzureDeveloperCliCredential') as mock_azd, \ - patch('utils.credential_util.DefaultAzureCredential') as mock_default: + patch('utils.credential_util.AzureDeveloperCliCredential') as mock_azd: mock_cli.side_effect = Exception("AzureCLI not available") mock_azd.side_effect = Exception("AzureDeveloperCLI not available") - mock_default_instance = Mock() - mock_default.return_value = mock_default_instance - credential = get_azure_credential() + with pytest.raises(RuntimeError) as exc: + get_azure_credential() - assert credential == mock_default_instance - mock_default.assert_called_once() + assert "No Azure authentication available" in str(exc.value) def test_get_azure_credential_cli_success(self, monkeypatch): """Test successful Azure CLI credential""" From dd66aa7bfb3b5b84433cf7e6cf8027d9deca85aa Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Mon, 15 Jun 2026 13:15:18 +0530 Subject: [PATCH 10/13] fix(scoring): show N/A instead of 0% for unavailable entity/schema scores Root cause: When the evaluate step couldn't compute any per-field confidence (e.g. logprobs unavailable on reasoning models like gpt-5/o1/o3, or image-only flow with no Content Understanding signal), save_handler emitted entity_score=0.0, schema_score=0.0. These `0.0`s flowed through Cosmos -> API -> UI and rendered as `0%` (red), indistinguishable from a genuine zero confidence. Fix: Treat `total_evaluated_fields_count == 0` (or no comparison items) as *unavailable* and propagate `None` through the ContentProcessor, ContentProcessorAPI and ContentProcessorWorkflow models. The frontend percentage cell renderer now shows `N/A` for null/undefined and `0%` only for a genuine numeric zero. Files changed: - ContentProcessor: save_handler.py (extracted _derive_aggregate_scores helper) - ContentProcessor: content_process.py default scores -> None - ContentProcessorAPI: ContentProcess + Content_Process default scores -> None - ContentProcessorWorkflow: ContentProcessRecord + Content_Process default scores -> None - ContentProcessorWorkflow: document_process_executor preserves None instead of coercing to 0.0 - ContentProcessorWeb: ProcessQueueGridTypes types scores nullable; ProcessQueueGrid passes undefined for null/undefined; CustomCellRender renders `N/A` when valueText is null/undefined and only `...` while still processing Tests: - New: ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py (5 cases: valid scores, missing per-field signal, no comparison items, genuine zero, all-fields-above-threshold) - Updated existing default-value tests in Workflow + src/tests to assert None - Added tests for explicit zero preservation and Failed status -> None --- .../src/libs/models/content_process.py | 6 +- .../libs/pipeline/handlers/save_handler.py | 67 ++++++--- .../unit/pipeline/test_save_handler_scores.py | 137 ++++++++++++++++++ .../models/contentprocessor/claim_process.py | 12 +- .../contentprocessor/content_process.py | 6 +- .../ProcessQueueGrid/CustomCellRender.tsx | 39 ++++- .../ProcessQueueGrid/ProcessQueueGrid.tsx | 12 +- .../ProcessQueueGrid/ProcessQueueGridTypes.ts | 21 ++- .../src/repositories/model/claim_process.py | 12 +- .../src/services/content_process_models.py | 4 +- .../executor/document_process_executor.py | 40 +++-- .../repositories/test_claim_process_model.py | 28 +++- .../services/test_content_process_models.py | 10 ++ .../repositories/test_claim_process_model.py | 6 +- .../services/test_content_process_models.py | 6 +- 15 files changed, 336 insertions(+), 70 deletions(-) create mode 100644 src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py diff --git a/src/ContentProcessor/src/libs/models/content_process.py b/src/ContentProcessor/src/libs/models/content_process.py index c9aacf1f..018f13b2 100644 --- a/src/ContentProcessor/src/libs/models/content_process.py +++ b/src/ContentProcessor/src/libs/models/content_process.py @@ -67,9 +67,9 @@ class ContentProcess(BaseModel): last_modified_time: datetime.datetime = datetime.datetime.now(datetime.UTC) last_modified_by: Optional[str] = None status: str - entity_score: Optional[float] = 0.0 - min_extracted_entity_score: Optional[float] = 0.0 - schema_score: Optional[float] = 0.0 + entity_score: Optional[float] = None + min_extracted_entity_score: Optional[float] = None + schema_score: Optional[float] = None result: Optional[dict] = None confidence: Optional[dict] = None target_schema: Optional[Schema] = None diff --git a/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py b/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py index 15c90f56..1870a4b1 100644 --- a/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py +++ b/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py @@ -112,20 +112,15 @@ def find_process_result(step_name: str): ) ) - total_evaluated_fields_count = evaluated_result.confidence.get( - "total_evaluated_fields_count", 0 - ) - schema_score = ( - 0 - if total_evaluated_fields_count == 0 - else round( - ( - len(evaluated_result.comparison_result.items) - - evaluated_result.confidence["zero_confidence_fields_count"] - ) - / len(evaluated_result.comparison_result.items), - 3, - ) + # Determine whether per-field confidence could actually be computed. + # When `total_evaluated_fields_count == 0`, no field-level confidence + # signal was produced (e.g. logprobs unavailable on reasoning models, or + # an image flow with no Content Understanding signal). In that case the + # entity/schema scores are *unavailable* rather than genuinely zero, and + # we propagate ``None`` so downstream consumers (API + UI) can render + # an explicit "N/A" instead of a misleading "0%". + entity_score, schema_score, min_extracted_entity_score = ( + self._derive_aggregate_scores(evaluated_result) ) processed_result = ContentProcess( @@ -143,11 +138,9 @@ def find_process_result(step_name: str): self._current_message_context.data_pipeline.pipeline_status.creation_time, "%Y-%m-%dT%H:%M:%S.%fZ", ), - entity_score=evaluated_result.confidence["overall_confidence"], + entity_score=entity_score, schema_score=schema_score, - min_extracted_entity_score=evaluated_result.confidence[ - "min_extracted_field_confidence" - ], + min_extracted_entity_score=min_extracted_entity_score, prompt_tokens=evaluated_result.prompt_tokens, completion_tokens=evaluated_result.completion_tokens, target_schema=Schema.get_schema( @@ -241,3 +234,41 @@ def _summarize_processed_time(self, step_results: list[StepResult]) -> str: # Format the total elapsed time as a string formatted_elapsed_time = f"{total_hours:02}:{total_minutes:02}:{total_seconds:02}.{total_milliseconds:03}" return formatted_elapsed_time + + @staticmethod + def _derive_aggregate_scores( + evaluated_result: DataExtractionResult, + ) -> tuple[float | None, float | None, float | None]: + """Compute ``(entity_score, schema_score, min_extracted_entity_score)``. + + Returns ``(None, None, None)`` when no per-field confidence signal was + produced (i.e. ``total_evaluated_fields_count == 0`` or there are no + comparison items). This happens, for example, when the LLM call could + not return logprobs (reasoning models) and there is no Content + Understanding signal to fall back on. Treating that case as "unknown" + rather than ``0.0`` lets the API and UI render "N/A" instead of a + misleading "0%". + + A genuine zero confidence (e.g. a model that emitted fields but + every token had ``logprob == -inf``) is preserved verbatim. + """ + confidence = evaluated_result.confidence or {} + total_evaluated_fields_count = confidence.get( + "total_evaluated_fields_count", 0 + ) + comparison_items = ( + evaluated_result.comparison_result.items + if evaluated_result.comparison_result is not None + else [] + ) + if total_evaluated_fields_count == 0 or not comparison_items: + return (None, None, None) + + zero_count = confidence.get("zero_confidence_fields_count", 0) + schema_score = round( + (len(comparison_items) - zero_count) / len(comparison_items), + 3, + ) + entity_score = confidence.get("overall_confidence") + min_extracted_entity_score = confidence.get("min_extracted_field_confidence") + return (entity_score, schema_score, min_extracted_entity_score) diff --git a/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py new file mode 100644 index 00000000..66e2e44c --- /dev/null +++ b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py @@ -0,0 +1,137 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for ``SaveHandler._derive_aggregate_scores``. + +Covers the score-availability semantics: +- valid scores flow through verbatim +- missing per-field signal yields ``None`` (rendered as "N/A" in the UI) +- a genuine zero is preserved as ``0`` (rendered as "0%") +- failed processing (no comparison items) yields ``None`` +""" + +from __future__ import annotations + +from libs.pipeline.handlers.logics.evaluate_handler.comparison import ( + ExtractionComparisonData, + ExtractionComparisonItem, +) +from libs.pipeline.handlers.logics.evaluate_handler.model import DataExtractionResult +from libs.pipeline.handlers.save_handler import SaveHandler + + +def _make_result( + *, + items: list[ExtractionComparisonItem], + confidence: dict, +) -> DataExtractionResult: + return DataExtractionResult( + extracted_result={}, + confidence=confidence, + comparison_result=ExtractionComparisonData(items=items), + prompt_tokens=0, + completion_tokens=0, + execution_time=0, + ) + + +class TestDeriveAggregateScores: + def test_valid_scores_flow_through(self): + """A normal evaluate-step result must produce numeric scores.""" + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="90.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="80.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="c", Extracted="z", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = { + "total_evaluated_fields_count": 3, + "overall_confidence": 0.567, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 1, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.567 + # 2 of 3 fields above threshold → 0.667 + assert schema == round(2 / 3, 3) + assert min_score == 0.0 + + def test_missing_per_field_signal_returns_none(self): + """Reasoning-model / image-only flow: no signal → ``None`` everywhere.""" + items: list[ExtractionComparisonItem] = [] + confidence = { + "total_evaluated_fields_count": 0, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 0, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity is None + assert schema is None + assert min_score is None + + def test_no_comparison_items_returns_none(self): + """Even if confidence claims fields exist, an empty comparison list is unknown.""" + confidence = { + "total_evaluated_fields_count": 5, + "overall_confidence": 0.9, + "min_extracted_field_confidence": 0.5, + "zero_confidence_fields_count": 0, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=[], confidence=confidence) + ) + assert entity is None + assert schema is None + assert min_score is None + + def test_genuine_zero_score_preserved(self): + """A real ``0`` confidence (e.g. all fields below threshold) must NOT become ``None``.""" + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = { + "total_evaluated_fields_count": 1, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 1, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.0 + assert schema == 0.0 + assert min_score == 0.0 + + def test_all_fields_above_threshold(self): + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="95.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="90.00%", IsAboveThreshold="True" + ), + ] + confidence = { + "total_evaluated_fields_count": 2, + "overall_confidence": 0.925, + "min_extracted_field_confidence": 0.9, + "zero_confidence_fields_count": 0, + } + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.925 + assert schema == 1.0 + assert min_score == 0.9 diff --git a/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py b/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py index 22625476..fb08f502 100644 --- a/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py +++ b/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py @@ -53,13 +53,13 @@ class Content_Process(EntityBase): mime_type: Optional[str] = Field( description="MIME type of the processed content file", default=None ) - entity_score: float = Field( - description="Score indicating the quality of entity extraction from the content", - default=0.0, + entity_score: Optional[float] = Field( + description="Score indicating the quality of entity extraction from the content. ``None`` means the score was not available (e.g. logprobs unavailable on reasoning models).", + default=None, ) - schema_score: float = Field( - description="Score indicating the quality of schema matching for the content", - default=0.0, + schema_score: Optional[float] = Field( + description="Score indicating the quality of schema matching for the content. ``None`` means the score was not available.", + default=None, ) status: Optional[str] = Field( description="Indicates the current status in the content processing pipeline", diff --git a/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py b/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py index 5bf0ae23..02ec3edf 100644 --- a/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py +++ b/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py @@ -134,9 +134,9 @@ class ContentProcess(BaseModel): ) last_modified_by: Optional[str] = None status: Optional[str] = None - entity_score: Optional[float] = 0.0 - min_extracted_entity_score: Optional[float] = 0.0 - schema_score: Optional[float] = 0.0 + entity_score: Optional[float] = None + min_extracted_entity_score: Optional[float] = None + schema_score: Optional[float] = None result: Optional[dict] = None confidence: Optional[dict] = None target_schema: Optional[Schema] = None diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx index ef36c43e..d701a0ff 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx @@ -29,7 +29,12 @@ interface DeleteItem { interface CellRendererExtraProps { readonly txt?: string; readonly timeString?: string; - readonly valueText?: string; + /** + * Stringified score value. ``undefined`` (or ``null``) means the score is + * not available — the percentage renderer will show "N/A" instead of "0%" + * to distinguish "unavailable" from a genuine zero. + */ + readonly valueText?: string | null; readonly status?: string; readonly lastModifiedBy?: string; readonly text?: string | number; @@ -91,9 +96,31 @@ const CellRenderer: React.FC = ({ type, props }) => { }; // Render for percentage - const renderPercentage = (valueText: string, status: string) => { + const renderPercentage = (valueText: string | null | undefined, status: string) => { + // ``null``/``undefined``/empty string === score unavailable. Render an + // explicit "N/A" so users can distinguish missing scores from a genuine + // zero. (Backends emit ``None``/``null`` when, for example, logprobs were + // unavailable on a reasoning model and confidence couldn't be computed.) + if (valueText === null || valueText === undefined || valueText === '') { + return ( +
+ N/A +
+ ); + } + const decimalValue = Number(valueText); - if (isNaN(decimalValue) || status !== 'Completed') { + if (isNaN(decimalValue)) { + return ( +
+ N/A +
+ ); + } + + // Score is numeric (including a genuine 0): only show "..." while the + // document is still being processed. + if (status !== 'Completed') { return
...
; } @@ -124,7 +151,7 @@ const CellRenderer: React.FC = ({ type, props }) => { }; // Render for schema score - const calculateSchemaScore = (valueText: string, lastModifiedBy: string, status: string) => { + const calculateSchemaScore = (valueText: string | null | undefined, lastModifiedBy: string, status: string) => { if (lastModifiedBy === 'user') { return (
@@ -186,9 +213,9 @@ const CellRenderer: React.FC = ({ type, props }) => { case 'processTime': return renderProcessTimeInSeconds(timeString || ''); case 'percentage': - return renderPercentage(valueText || '', status || ''); + return renderPercentage(valueText, status || ''); case 'schemaScore': - return calculateSchemaScore(valueText || '', lastModifiedBy || '', status || ''); + return calculateSchemaScore(valueText, lastModifiedBy || '', status || ''); case 'text': return renderText(text ?? '', 'center'); case 'date': diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx index 0581b3ac..3b1462d1 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx @@ -373,7 +373,11 @@ const ProcessQueueGrid: React.FC = () => { @@ -382,7 +386,11 @@ const ProcessQueueGrid: React.FC = () => { diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts index d441eb5a..30ac7a8b 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts @@ -17,10 +17,23 @@ export interface ProcessedDocument { readonly file_name: string; /** MIME type of the document. */ readonly mime_type: string; - /** Entity extraction confidence score (0–1). */ - readonly entity_score: number; - /** Schema compliance score (0–1). */ - readonly schema_score: number; + /** + * Entity extraction confidence score in the range 0–1. + * + * ``null``/``undefined`` means the score was not produced by the backend + * (for example: logprobs were unavailable on a reasoning model, or the + * pipeline didn't reach the evaluate step). In that case the UI shows + * "N/A" rather than a misleading "0%". A genuine numeric ``0`` is still + * rendered as ``0%``. + */ + readonly entity_score: number | null | undefined; + /** + * Schema compliance score in the range 0–1. + * + * ``null``/``undefined`` means the score was not produced. See + * {@link entity_score} for rendering semantics. + */ + readonly schema_score: number | null | undefined; /** Current processing status. */ readonly status: string; /** Duration string for processing time (HH:MM:SS). */ diff --git a/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py b/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py index 470a946c..e88500a3 100644 --- a/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py +++ b/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py @@ -77,13 +77,13 @@ class Content_Process(EntityBase): mime_type: Optional[str] = Field( description="MIME type of the processed content file", default=None ) - entity_score: float = Field( - description="Score indicating the quality of entity extraction from the content", - default=0.0, + entity_score: Optional[float] = Field( + description="Score indicating the quality of entity extraction from the content. ``None`` means the score was not available (e.g. logprobs unavailable on reasoning models, image-only flow without Content Understanding).", + default=None, ) - schema_score: float = Field( - description="Score indicating the quality of schema matching for the content", - default=0.0, + schema_score: Optional[float] = Field( + description="Score indicating the quality of schema matching for the content. ``None`` means the score was not available.", + default=None, ) status: Optional[str] = Field( description="Indicates the current status in the content processing pipeline", diff --git a/src/ContentProcessorWorkflow/src/services/content_process_models.py b/src/ContentProcessorWorkflow/src/services/content_process_models.py index 18c01e58..319de5a5 100644 --- a/src/ContentProcessorWorkflow/src/services/content_process_models.py +++ b/src/ContentProcessorWorkflow/src/services/content_process_models.py @@ -80,8 +80,8 @@ class ContentProcessRecord(RootEntityBase): processed_time: Optional[str] = None imported_time: Optional[datetime] = None status: Optional[str] = None - entity_score: Optional[float] = 0.0 - schema_score: Optional[float] = 0.0 + entity_score: Optional[float] = None + schema_score: Optional[float] = None result: Optional[Any] = None confidence: Optional[Any] = None diff --git a/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py b/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py index f131c1a2..4c46fa7e 100644 --- a/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py +++ b/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py @@ -242,8 +242,11 @@ async def _on_poll(poll_data: dict) -> None: status_text = poll_result.get("status", "Failed") - schema_score_f = 0.0 - entity_score_f = 0.0 + # ``None`` here represents "score is not yet available" rather + # than a genuine zero. Preserved through to the API/UI so a + # missing score renders as "N/A" instead of a misleading 0%. + schema_score_f: float | None = None + entity_score_f: float | None = None processed_time = "" result_payload = None @@ -253,18 +256,27 @@ async def _on_poll(poll_data: dict) -> None: ) if isinstance(final_payload, dict): status_text = final_payload.get("status") or status_text - try: - schema_score_f = float( - final_payload.get("schema_score") or 0.0 - ) - except Exception: - schema_score_f = 0.0 - try: - entity_score_f = float( - final_payload.get("entity_score") or 0.0 - ) - except Exception: - entity_score_f = 0.0 + + def _coerce_score(value: object) -> float | None: + """Convert a raw score payload to ``float`` or ``None``. + + Unlike the previous ``float(... or 0.0)`` form, an + explicit ``None`` (score unavailable) is preserved + instead of being silently coerced to ``0.0``. + """ + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + schema_score_f = _coerce_score( + final_payload.get("schema_score") + ) + entity_score_f = _coerce_score( + final_payload.get("entity_score") + ) try: processed_time = ( final_payload.get("processed_time") or "" diff --git a/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py b/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py index 195b9b36..d64abc84 100644 --- a/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py +++ b/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py @@ -42,8 +42,10 @@ def test_defaults(self): assert cp.process_id == "p1" assert cp.file_name == "doc.pdf" assert cp.mime_type is None - assert cp.entity_score == 0.0 - assert cp.schema_score == 0.0 + # ``None`` is now the default for "score unavailable" so the UI can + # render "N/A" rather than a misleading "0%". + assert cp.entity_score is None + assert cp.schema_score is None assert cp.status is None assert cp.processed_time == "" @@ -57,6 +59,28 @@ def test_explicit_scores(self): assert cp.entity_score == 0.95 assert cp.schema_score == 0.87 + def test_explicit_zero_score_preserved(self): + """A literal ``0`` is a real score and must not become ``None``.""" + cp = Content_Process( + process_id="p1", + file_name="doc.pdf", + entity_score=0.0, + schema_score=0.0, + ) + assert cp.entity_score == 0.0 + assert cp.schema_score == 0.0 + + def test_failed_processing_keeps_scores_none(self): + """A failed file must surface unknown scores rather than ``0.0``.""" + cp = Content_Process( + process_id="p1", + file_name="doc.pdf", + status="Failed", + ) + assert cp.status == "Failed" + assert cp.entity_score is None + assert cp.schema_score is None + # ── Claim_Process ──────────────────────────────────────────────────────────── diff --git a/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py b/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py index 19765025..da66aa83 100644 --- a/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py +++ b/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py @@ -152,6 +152,16 @@ def test_construction_with_defaults(self): assert rec.id == "r1" assert rec.process_id == "" assert rec.status is None + # ``None`` (rather than ``0.0``) is the sentinel for "score unavailable" + # so the UI can render "N/A" instead of a misleading "0%". + assert rec.entity_score is None + assert rec.schema_score is None + + def test_explicit_zero_score_preserved(self): + """A literal ``0.0`` must survive round-trip and not be coerced to ``None``.""" + rec = ContentProcessRecord( + id="r1", process_id="r1", entity_score=0.0, schema_score=0.0 + ) assert rec.entity_score == 0.0 assert rec.schema_score == 0.0 diff --git a/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py b/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py index 36de49c0..0a2e5b8e 100644 --- a/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py +++ b/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py @@ -42,8 +42,10 @@ def test_defaults(self): assert cp.process_id == "p1" assert cp.file_name == "doc.pdf" assert cp.mime_type is None - assert cp.entity_score == 0.0 - assert cp.schema_score == 0.0 + # ``None`` is the sentinel for "score unavailable" so the UI can render + # "N/A" instead of "0%". + assert cp.entity_score is None + assert cp.schema_score is None assert cp.status is None assert cp.processed_time == "" diff --git a/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py b/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py index 059b2938..d1665f57 100644 --- a/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py +++ b/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py @@ -218,8 +218,10 @@ def test_content_process_record_defaults(self): assert record.process_id == "" assert record.processed_file_name is None assert record.processed_file_mime_type is None - assert record.entity_score == 0.0 - assert record.schema_score == 0.0 + # ``None`` is the sentinel for "score unavailable" so the UI can render + # "N/A" rather than "0%". + assert record.entity_score is None + assert record.schema_score is None def test_to_cosmos_dict(self): """Test ContentProcessRecord.to_cosmos_dict method""" From 56c724f390f4096a485be5a96ae991099cd8fe66 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Mon, 15 Jun 2026 16:07:20 +0530 Subject: [PATCH 11/13] refactor(scoring): structural completeness fallback instead of N/A Per feedback: Completed runs must always show a meaningful number; Failed runs and genuine zeros stay at 0%. - save_handler._derive_aggregate_scores picks the best available signal: (1) probabilistic confidence when logprobs available; (2) structural completeness (filled fields / total) when no logprobs (reasoning models, image-only flow); (3) 0.0 when no extraction data at all. - _is_filled_value heuristic: None/empty/whitespace count as not filled; descends into nested dicts/lists. - Reverted models from Optional[float]=None back to default 0.0. - Reverted frontend: no N/A path; renders 0% for null/missing scores. - 15 new tests covering all 3 paths + _is_filled_value heuristic. --- .../src/libs/models/content_process.py | 6 +- .../libs/pipeline/handlers/save_handler.py | 97 +++++++--- .../unit/pipeline/test_save_handler_scores.py | 182 ++++++++++++++---- .../models/contentprocessor/claim_process.py | 12 +- .../contentprocessor/content_process.py | 6 +- .../ProcessQueueGrid/CustomCellRender.tsx | 39 +--- .../ProcessQueueGrid/ProcessQueueGrid.tsx | 4 +- .../ProcessQueueGrid/ProcessQueueGridTypes.ts | 21 +- .../src/repositories/model/claim_process.py | 12 +- .../src/services/content_process_models.py | 4 +- .../executor/document_process_executor.py | 24 +-- .../repositories/test_claim_process_model.py | 17 +- .../services/test_content_process_models.py | 11 +- .../repositories/test_claim_process_model.py | 7 +- .../services/test_content_process_models.py | 8 +- 15 files changed, 274 insertions(+), 176 deletions(-) diff --git a/src/ContentProcessor/src/libs/models/content_process.py b/src/ContentProcessor/src/libs/models/content_process.py index 018f13b2..c9aacf1f 100644 --- a/src/ContentProcessor/src/libs/models/content_process.py +++ b/src/ContentProcessor/src/libs/models/content_process.py @@ -67,9 +67,9 @@ class ContentProcess(BaseModel): last_modified_time: datetime.datetime = datetime.datetime.now(datetime.UTC) last_modified_by: Optional[str] = None status: str - entity_score: Optional[float] = None - min_extracted_entity_score: Optional[float] = None - schema_score: Optional[float] = None + entity_score: Optional[float] = 0.0 + min_extracted_entity_score: Optional[float] = 0.0 + schema_score: Optional[float] = 0.0 result: Optional[dict] = None confidence: Optional[dict] = None target_schema: Optional[Schema] = None diff --git a/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py b/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py index 1870a4b1..15f8b878 100644 --- a/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py +++ b/src/ContentProcessor/src/libs/pipeline/handlers/save_handler.py @@ -112,13 +112,12 @@ def find_process_result(step_name: str): ) ) - # Determine whether per-field confidence could actually be computed. - # When `total_evaluated_fields_count == 0`, no field-level confidence - # signal was produced (e.g. logprobs unavailable on reasoning models, or - # an image flow with no Content Understanding signal). In that case the - # entity/schema scores are *unavailable* rather than genuinely zero, and - # we propagate ``None`` so downstream consumers (API + UI) can render - # an explicit "N/A" instead of a misleading "0%". + # Compute the aggregate scores. Successful (Completed) processing + # always yields numeric scores: when probabilistic confidence is + # available (logprobs from non-reasoning models / Content Understanding + # signal) we use it; otherwise we fall back to a structural + # completeness score (fraction of expected fields actually filled). + # Failed runs and genuinely empty extractions remain at ``0.0``. entity_score, schema_score, min_extracted_entity_score = ( self._derive_aggregate_scores(evaluated_result) ) @@ -235,22 +234,51 @@ def _summarize_processed_time(self, step_results: list[StepResult]) -> str: formatted_elapsed_time = f"{total_hours:02}:{total_minutes:02}:{total_seconds:02}.{total_milliseconds:03}" return formatted_elapsed_time + @staticmethod + def _is_filled_value(value: object) -> bool: + """Heuristic: does an extracted value count as "actually filled"? + + Treats ``None``, empty strings, whitespace-only strings, and empty + containers as *not* filled. Recursively descends into dicts/lists so a + nested object that contains only nulls is still counted as empty. + """ + if value is None: + return False + if isinstance(value, bool): + return True + if isinstance(value, str): + return value.strip() != "" + if isinstance(value, dict): + return any(SaveHandler._is_filled_value(v) for v in value.values()) + if isinstance(value, (list, tuple, set)): + return any(SaveHandler._is_filled_value(v) for v in value) + return True + @staticmethod def _derive_aggregate_scores( evaluated_result: DataExtractionResult, - ) -> tuple[float | None, float | None, float | None]: + ) -> tuple[float, float, float]: """Compute ``(entity_score, schema_score, min_extracted_entity_score)``. - Returns ``(None, None, None)`` when no per-field confidence signal was - produced (i.e. ``total_evaluated_fields_count == 0`` or there are no - comparison items). This happens, for example, when the LLM call could - not return logprobs (reasoning models) and there is no Content - Understanding signal to fall back on. Treating that case as "unknown" - rather than ``0.0`` lets the API and UI render "N/A" instead of a - misleading "0%". + Score selection order: + + 1. **Probabilistic confidence** — when the evaluate step produced + per-field confidence (``total_evaluated_fields_count > 0``), use the + probabilistic ``overall_confidence`` plus the ratio of + above-threshold fields. This is the highest-fidelity signal. + + 2. **Structural completeness fallback** — when no probabilistic + signal was produced (e.g. reasoning models like ``gpt-5``/``o1``/``o3`` + don't return logprobs, and image-only flow has no Content + Understanding signal), but extraction still produced a comparison + table, score by *how much of the schema was actually filled*. This + replaces the old behaviour of falsely emitting ``0%`` for completed + runs that simply lacked logprobs. - A genuine zero confidence (e.g. a model that emitted fields but - every token had ``logprob == -inf``) is preserved verbatim. + 3. **Zero** — only when there is literally no extraction data + (failed pipeline / genuinely empty result). Failed processing + continues to surface as ``0`` so the UI consistently renders + ``0%`` for failures and genuine zeros. """ confidence = evaluated_result.confidence or {} total_evaluated_fields_count = confidence.get( @@ -261,14 +289,29 @@ def _derive_aggregate_scores( if evaluated_result.comparison_result is not None else [] ) - if total_evaluated_fields_count == 0 or not comparison_items: - return (None, None, None) - zero_count = confidence.get("zero_confidence_fields_count", 0) - schema_score = round( - (len(comparison_items) - zero_count) / len(comparison_items), - 3, - ) - entity_score = confidence.get("overall_confidence") - min_extracted_entity_score = confidence.get("min_extracted_field_confidence") - return (entity_score, schema_score, min_extracted_entity_score) + # Path 1: probabilistic confidence + if total_evaluated_fields_count > 0 and comparison_items: + zero_count = confidence.get("zero_confidence_fields_count", 0) + schema_score = round( + (len(comparison_items) - zero_count) / len(comparison_items), + 3, + ) + entity_score = float(confidence.get("overall_confidence") or 0.0) + min_extracted_entity_score = float( + confidence.get("min_extracted_field_confidence") or 0.0 + ) + return (entity_score, schema_score, min_extracted_entity_score) + + # Path 2: structural completeness fallback + if comparison_items: + filled = sum( + 1 + for item in comparison_items + if SaveHandler._is_filled_value(item.Extracted) + ) + ratio = round(filled / len(comparison_items), 3) + return (ratio, ratio, ratio) + + # Path 3: nothing to score on + return (0.0, 0.0, 0.0) diff --git a/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py index 66e2e44c..aad71027 100644 --- a/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py +++ b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py @@ -3,11 +3,12 @@ """Tests for ``SaveHandler._derive_aggregate_scores``. -Covers the score-availability semantics: -- valid scores flow through verbatim -- missing per-field signal yields ``None`` (rendered as "N/A" in the UI) -- a genuine zero is preserved as ``0`` (rendered as "0%") -- failed processing (no comparison items) yields ``None`` +Covers the score-derivation contract: +- probabilistic confidence flows through verbatim when available +- structural completeness fallback fires for Completed runs without logprobs + (e.g. reasoning models / image-only flow) instead of emitting a misleading 0% +- a genuine zero is preserved as ``0.0`` +- failed/empty runs return ``0.0`` """ from __future__ import annotations @@ -35,7 +36,7 @@ def _make_result( ) -class TestDeriveAggregateScores: +class TestProbabilisticPath: def test_valid_scores_flow_through(self): """A normal evaluate-step result must produce numeric scores.""" items = [ @@ -63,75 +64,174 @@ def test_valid_scores_flow_through(self): assert schema == round(2 / 3, 3) assert min_score == 0.0 - def test_missing_per_field_signal_returns_none(self): - """Reasoning-model / image-only flow: no signal → ``None`` everywhere.""" - items: list[ExtractionComparisonItem] = [] + def test_all_fields_above_threshold(self): + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="95.00%", IsAboveThreshold="True" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="90.00%", IsAboveThreshold="True" + ), + ] confidence = { - "total_evaluated_fields_count": 0, - "overall_confidence": 0.0, - "min_extracted_field_confidence": 0.0, + "total_evaluated_fields_count": 2, + "overall_confidence": 0.925, + "min_extracted_field_confidence": 0.9, "zero_confidence_fields_count": 0, } entity, schema, min_score = SaveHandler._derive_aggregate_scores( _make_result(items=items, confidence=confidence) ) - assert entity is None - assert schema is None - assert min_score is None + assert entity == 0.925 + assert schema == 1.0 + assert min_score == 0.9 + - def test_no_comparison_items_returns_none(self): - """Even if confidence claims fields exist, an empty comparison list is unknown.""" +class TestStructuralFallback: + """When logprobs are unavailable (reasoning model / image-only) but + extraction succeeded, the Completed file must still get a meaningful + numeric score based on schema completeness.""" + + def test_all_fields_filled_yields_one(self): + items = [ + ExtractionComparisonItem( + Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="b", Extracted="y", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="c", Extracted=42, Confidence="0.00%", IsAboveThreshold="False" + ), + ] + # No probabilistic signal: total_evaluated_fields_count == 0 confidence = { - "total_evaluated_fields_count": 5, - "overall_confidence": 0.9, - "min_extracted_field_confidence": 0.5, + "total_evaluated_fields_count": 0, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, "zero_confidence_fields_count": 0, } entity, schema, min_score = SaveHandler._derive_aggregate_scores( - _make_result(items=[], confidence=confidence) + _make_result(items=items, confidence=confidence) ) - assert entity is None - assert schema is None - assert min_score is None + assert entity == 1.0 + assert schema == 1.0 + assert min_score == 1.0 - def test_genuine_zero_score_preserved(self): - """A real ``0`` confidence (e.g. all fields below threshold) must NOT become ``None``.""" + def test_partial_fill_yields_ratio(self): items = [ ExtractionComparisonItem( Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" ), + ExtractionComparisonItem( + Field="b", Extracted=None, Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="c", Extracted="", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="d", Extracted="z", Confidence="0.00%", IsAboveThreshold="False" + ), + ] + confidence = {"total_evaluated_fields_count": 0} + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + # 2 of 4 fields actually filled → 0.5 + assert entity == 0.5 + assert schema == 0.5 + assert min_score == 0.5 + + def test_all_fields_empty_yields_zero(self): + """Genuine-empty extraction: structural fallback collapses to ``0.0``.""" + items = [ + ExtractionComparisonItem( + Field="a", Extracted=None, Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="b", Extracted="", Confidence="0.00%", IsAboveThreshold="False" + ), + ExtractionComparisonItem( + Field="c", Extracted=" ", Confidence="0.00%", IsAboveThreshold="False" + ), ] + confidence = {"total_evaluated_fields_count": 0} + entity, schema, min_score = SaveHandler._derive_aggregate_scores( + _make_result(items=items, confidence=confidence) + ) + assert entity == 0.0 + assert schema == 0.0 + assert min_score == 0.0 + + +class TestZeroPath: + def test_no_comparison_items_returns_zero(self): + """No extraction data at all (failed pipeline) → ``0.0``.""" confidence = { - "total_evaluated_fields_count": 1, + "total_evaluated_fields_count": 0, "overall_confidence": 0.0, "min_extracted_field_confidence": 0.0, - "zero_confidence_fields_count": 1, + "zero_confidence_fields_count": 0, } entity, schema, min_score = SaveHandler._derive_aggregate_scores( - _make_result(items=items, confidence=confidence) + _make_result(items=[], confidence=confidence) ) assert entity == 0.0 assert schema == 0.0 assert min_score == 0.0 - def test_all_fields_above_threshold(self): + def test_genuine_zero_probabilistic_score_preserved(self): + """A real ``0`` confidence (every field below threshold) must NOT be + replaced by the structural fallback — it's genuinely 0%.""" items = [ ExtractionComparisonItem( - Field="a", Extracted="x", Confidence="95.00%", IsAboveThreshold="True" - ), - ExtractionComparisonItem( - Field="b", Extracted="y", Confidence="90.00%", IsAboveThreshold="True" + Field="a", Extracted="x", Confidence="0.00%", IsAboveThreshold="False" ), ] confidence = { - "total_evaluated_fields_count": 2, - "overall_confidence": 0.925, - "min_extracted_field_confidence": 0.9, - "zero_confidence_fields_count": 0, + "total_evaluated_fields_count": 1, + "overall_confidence": 0.0, + "min_extracted_field_confidence": 0.0, + "zero_confidence_fields_count": 1, } entity, schema, min_score = SaveHandler._derive_aggregate_scores( _make_result(items=items, confidence=confidence) ) - assert entity == 0.925 - assert schema == 1.0 - assert min_score == 0.9 + assert entity == 0.0 + assert schema == 0.0 + assert min_score == 0.0 + + +class TestIsFilledValue: + """Coverage for the ``_is_filled_value`` helper used by the structural fallback.""" + + def test_none_is_empty(self): + assert SaveHandler._is_filled_value(None) is False + + def test_empty_string_is_empty(self): + assert SaveHandler._is_filled_value("") is False + assert SaveHandler._is_filled_value(" ") is False + + def test_non_empty_string_is_filled(self): + assert SaveHandler._is_filled_value("x") is True + + def test_zero_int_is_filled(self): + # A literal ``0`` is a valid extracted value (e.g. count fields). + assert SaveHandler._is_filled_value(0) is True + + def test_bool_is_filled(self): + assert SaveHandler._is_filled_value(False) is True + assert SaveHandler._is_filled_value(True) is True + + def test_empty_container_is_empty(self): + assert SaveHandler._is_filled_value([]) is False + assert SaveHandler._is_filled_value({}) is False + + def test_nested_all_null_is_empty(self): + assert SaveHandler._is_filled_value({"a": None, "b": ""}) is False + assert SaveHandler._is_filled_value([None, "", {"c": None}]) is False + + def test_nested_with_value_is_filled(self): + assert SaveHandler._is_filled_value({"a": None, "b": "x"}) is True + assert SaveHandler._is_filled_value([None, "x"]) is True + diff --git a/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py b/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py index fb08f502..75276839 100644 --- a/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py +++ b/src/ContentProcessorAPI/app/routers/models/contentprocessor/claim_process.py @@ -53,13 +53,13 @@ class Content_Process(EntityBase): mime_type: Optional[str] = Field( description="MIME type of the processed content file", default=None ) - entity_score: Optional[float] = Field( - description="Score indicating the quality of entity extraction from the content. ``None`` means the score was not available (e.g. logprobs unavailable on reasoning models).", - default=None, + entity_score: float = Field( + description="Score indicating the quality of entity extraction from the content. For Completed runs this is either the probabilistic confidence (when logprobs are available) or a structural completeness fallback (fraction of expected fields actually filled). Failed runs and genuinely empty extractions remain at ``0.0``.", + default=0.0, ) - schema_score: Optional[float] = Field( - description="Score indicating the quality of schema matching for the content. ``None`` means the score was not available.", - default=None, + schema_score: float = Field( + description="Score indicating the quality of schema matching for the content. For Completed runs this is either the probabilistic above-threshold ratio or a structural completeness fallback. Failed runs remain at ``0.0``.", + default=0.0, ) status: Optional[str] = Field( description="Indicates the current status in the content processing pipeline", diff --git a/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py b/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py index 02ec3edf..5bf0ae23 100644 --- a/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py +++ b/src/ContentProcessorAPI/app/routers/models/contentprocessor/content_process.py @@ -134,9 +134,9 @@ class ContentProcess(BaseModel): ) last_modified_by: Optional[str] = None status: Optional[str] = None - entity_score: Optional[float] = None - min_extracted_entity_score: Optional[float] = None - schema_score: Optional[float] = None + entity_score: Optional[float] = 0.0 + min_extracted_entity_score: Optional[float] = 0.0 + schema_score: Optional[float] = 0.0 result: Optional[dict] = None confidence: Optional[dict] = None target_schema: Optional[Schema] = None diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx index d701a0ff..ef36c43e 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/CustomCellRender.tsx @@ -29,12 +29,7 @@ interface DeleteItem { interface CellRendererExtraProps { readonly txt?: string; readonly timeString?: string; - /** - * Stringified score value. ``undefined`` (or ``null``) means the score is - * not available — the percentage renderer will show "N/A" instead of "0%" - * to distinguish "unavailable" from a genuine zero. - */ - readonly valueText?: string | null; + readonly valueText?: string; readonly status?: string; readonly lastModifiedBy?: string; readonly text?: string | number; @@ -96,31 +91,9 @@ const CellRenderer: React.FC = ({ type, props }) => { }; // Render for percentage - const renderPercentage = (valueText: string | null | undefined, status: string) => { - // ``null``/``undefined``/empty string === score unavailable. Render an - // explicit "N/A" so users can distinguish missing scores from a genuine - // zero. (Backends emit ``None``/``null`` when, for example, logprobs were - // unavailable on a reasoning model and confidence couldn't be computed.) - if (valueText === null || valueText === undefined || valueText === '') { - return ( -
- N/A -
- ); - } - + const renderPercentage = (valueText: string, status: string) => { const decimalValue = Number(valueText); - if (isNaN(decimalValue)) { - return ( -
- N/A -
- ); - } - - // Score is numeric (including a genuine 0): only show "..." while the - // document is still being processed. - if (status !== 'Completed') { + if (isNaN(decimalValue) || status !== 'Completed') { return
...
; } @@ -151,7 +124,7 @@ const CellRenderer: React.FC = ({ type, props }) => { }; // Render for schema score - const calculateSchemaScore = (valueText: string | null | undefined, lastModifiedBy: string, status: string) => { + const calculateSchemaScore = (valueText: string, lastModifiedBy: string, status: string) => { if (lastModifiedBy === 'user') { return (
@@ -213,9 +186,9 @@ const CellRenderer: React.FC = ({ type, props }) => { case 'processTime': return renderProcessTimeInSeconds(timeString || ''); case 'percentage': - return renderPercentage(valueText, status || ''); + return renderPercentage(valueText || '', status || ''); case 'schemaScore': - return calculateSchemaScore(valueText, lastModifiedBy || '', status || ''); + return calculateSchemaScore(valueText || '', lastModifiedBy || '', status || ''); case 'text': return renderText(text ?? '', 'center'); case 'date': diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx index 3b1462d1..e9026ddb 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGrid.tsx @@ -376,7 +376,7 @@ const ProcessQueueGrid: React.FC = () => { valueText: doc.entity_score === null || doc.entity_score === undefined - ? undefined + ? "0" : doc.entity_score.toString(), status: doc.status, }} @@ -389,7 +389,7 @@ const ProcessQueueGrid: React.FC = () => { valueText: doc.schema_score === null || doc.schema_score === undefined - ? undefined + ? "0" : doc.schema_score.toString(), status: doc.status, }} diff --git a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts index 30ac7a8b..3ffc2409 100644 --- a/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts +++ b/src/ContentProcessorWeb/src/Pages/DefaultPage/Components/ProcessQueueGrid/ProcessQueueGridTypes.ts @@ -17,23 +17,10 @@ export interface ProcessedDocument { readonly file_name: string; /** MIME type of the document. */ readonly mime_type: string; - /** - * Entity extraction confidence score in the range 0–1. - * - * ``null``/``undefined`` means the score was not produced by the backend - * (for example: logprobs were unavailable on a reasoning model, or the - * pipeline didn't reach the evaluate step). In that case the UI shows - * "N/A" rather than a misleading "0%". A genuine numeric ``0`` is still - * rendered as ``0%``. - */ - readonly entity_score: number | null | undefined; - /** - * Schema compliance score in the range 0–1. - * - * ``null``/``undefined`` means the score was not produced. See - * {@link entity_score} for rendering semantics. - */ - readonly schema_score: number | null | undefined; + /** Entity extraction score (0–1). */ + readonly entity_score: number; + /** Schema compliance score (0–1). */ + readonly schema_score: number; /** Current processing status. */ readonly status: string; /** Duration string for processing time (HH:MM:SS). */ diff --git a/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py b/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py index e88500a3..75ce41ba 100644 --- a/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py +++ b/src/ContentProcessorWorkflow/src/repositories/model/claim_process.py @@ -77,13 +77,13 @@ class Content_Process(EntityBase): mime_type: Optional[str] = Field( description="MIME type of the processed content file", default=None ) - entity_score: Optional[float] = Field( - description="Score indicating the quality of entity extraction from the content. ``None`` means the score was not available (e.g. logprobs unavailable on reasoning models, image-only flow without Content Understanding).", - default=None, + entity_score: float = Field( + description="Score indicating the quality of entity extraction (0.0–1.0). For Completed runs this is either probabilistic confidence (logprobs) or a structural completeness fallback. Failed runs remain at ``0.0``.", + default=0.0, ) - schema_score: Optional[float] = Field( - description="Score indicating the quality of schema matching for the content. ``None`` means the score was not available.", - default=None, + schema_score: float = Field( + description="Score indicating the quality of schema matching (0.0–1.0). Failed runs remain at ``0.0``.", + default=0.0, ) status: Optional[str] = Field( description="Indicates the current status in the content processing pipeline", diff --git a/src/ContentProcessorWorkflow/src/services/content_process_models.py b/src/ContentProcessorWorkflow/src/services/content_process_models.py index 319de5a5..18c01e58 100644 --- a/src/ContentProcessorWorkflow/src/services/content_process_models.py +++ b/src/ContentProcessorWorkflow/src/services/content_process_models.py @@ -80,8 +80,8 @@ class ContentProcessRecord(RootEntityBase): processed_time: Optional[str] = None imported_time: Optional[datetime] = None status: Optional[str] = None - entity_score: Optional[float] = None - schema_score: Optional[float] = None + entity_score: Optional[float] = 0.0 + schema_score: Optional[float] = 0.0 result: Optional[Any] = None confidence: Optional[Any] = None diff --git a/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py b/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py index 4c46fa7e..68a81b97 100644 --- a/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py +++ b/src/ContentProcessorWorkflow/src/steps/document_process/executor/document_process_executor.py @@ -242,11 +242,12 @@ async def _on_poll(poll_data: dict) -> None: status_text = poll_result.get("status", "Failed") - # ``None`` here represents "score is not yet available" rather - # than a genuine zero. Preserved through to the API/UI so a - # missing score renders as "N/A" instead of a misleading 0%. - schema_score_f: float | None = None - entity_score_f: float | None = None + # Failed / not-yet-scored documents default to ``0.0``; + # save_handler always emits numeric scores for Completed + # runs (probabilistic if available, otherwise structural + # completeness fallback). + schema_score_f: float = 0.0 + entity_score_f: float = 0.0 processed_time = "" result_payload = None @@ -257,19 +258,14 @@ async def _on_poll(poll_data: dict) -> None: if isinstance(final_payload, dict): status_text = final_payload.get("status") or status_text - def _coerce_score(value: object) -> float | None: - """Convert a raw score payload to ``float`` or ``None``. - - Unlike the previous ``float(... or 0.0)`` form, an - explicit ``None`` (score unavailable) is preserved - instead of being silently coerced to ``0.0``. - """ + def _coerce_score(value: object) -> float: + """Coerce a raw score payload to ``float`` (default ``0.0``).""" if value is None: - return None + return 0.0 try: return float(value) except (TypeError, ValueError): - return None + return 0.0 schema_score_f = _coerce_score( final_payload.get("schema_score") diff --git a/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py b/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py index d64abc84..a970555a 100644 --- a/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py +++ b/src/ContentProcessorWorkflow/tests/unit/repositories/test_claim_process_model.py @@ -42,10 +42,9 @@ def test_defaults(self): assert cp.process_id == "p1" assert cp.file_name == "doc.pdf" assert cp.mime_type is None - # ``None`` is now the default for "score unavailable" so the UI can - # render "N/A" rather than a misleading "0%". - assert cp.entity_score is None - assert cp.schema_score is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0%. + assert cp.entity_score == 0.0 + assert cp.schema_score == 0.0 assert cp.status is None assert cp.processed_time == "" @@ -60,7 +59,7 @@ def test_explicit_scores(self): assert cp.schema_score == 0.87 def test_explicit_zero_score_preserved(self): - """A literal ``0`` is a real score and must not become ``None``.""" + """A literal ``0`` is a real score and must survive round-trip.""" cp = Content_Process( process_id="p1", file_name="doc.pdf", @@ -70,16 +69,16 @@ def test_explicit_zero_score_preserved(self): assert cp.entity_score == 0.0 assert cp.schema_score == 0.0 - def test_failed_processing_keeps_scores_none(self): - """A failed file must surface unknown scores rather than ``0.0``.""" + def test_failed_processing_keeps_default_zero(self): + """A failed file uses the ``0.0`` default so the UI renders ``0%``.""" cp = Content_Process( process_id="p1", file_name="doc.pdf", status="Failed", ) assert cp.status == "Failed" - assert cp.entity_score is None - assert cp.schema_score is None + assert cp.entity_score == 0.0 + assert cp.schema_score == 0.0 # ── Claim_Process ──────────────────────────────────────────────────────────── diff --git a/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py b/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py index da66aa83..c853c2d4 100644 --- a/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py +++ b/src/ContentProcessorWorkflow/tests/unit/services/test_content_process_models.py @@ -152,13 +152,14 @@ def test_construction_with_defaults(self): assert rec.id == "r1" assert rec.process_id == "" assert rec.status is None - # ``None`` (rather than ``0.0``) is the sentinel for "score unavailable" - # so the UI can render "N/A" instead of a misleading "0%". - assert rec.entity_score is None - assert rec.schema_score is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0% + # in the UI; save_handler overwrites with a real numeric score for + # Completed runs. + assert rec.entity_score == 0.0 + assert rec.schema_score == 0.0 def test_explicit_zero_score_preserved(self): - """A literal ``0.0`` must survive round-trip and not be coerced to ``None``.""" + """A literal ``0.0`` must survive round-trip.""" rec = ContentProcessRecord( id="r1", process_id="r1", entity_score=0.0, schema_score=0.0 ) diff --git a/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py b/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py index 0a2e5b8e..fee05fb1 100644 --- a/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py +++ b/src/tests/ContentProcessorWorkflow/repositories/test_claim_process_model.py @@ -42,10 +42,9 @@ def test_defaults(self): assert cp.process_id == "p1" assert cp.file_name == "doc.pdf" assert cp.mime_type is None - # ``None`` is the sentinel for "score unavailable" so the UI can render - # "N/A" instead of "0%". - assert cp.entity_score is None - assert cp.schema_score is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0%. + assert cp.entity_score == 0.0 + assert cp.schema_score == 0.0 assert cp.status is None assert cp.processed_time == "" diff --git a/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py b/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py index d1665f57..5133852a 100644 --- a/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py +++ b/src/tests/ContentProcessorWorkflow/services/test_content_process_models.py @@ -218,10 +218,10 @@ def test_content_process_record_defaults(self): assert record.process_id == "" assert record.processed_file_name is None assert record.processed_file_mime_type is None - # ``None`` is the sentinel for "score unavailable" so the UI can render - # "N/A" rather than "0%". - assert record.entity_score is None - assert record.schema_score is None + # Defaults stay at ``0.0`` so failed/pre-save records render as 0% + # in the UI. + assert record.entity_score == 0.0 + assert record.schema_score == 0.0 def test_to_cosmos_dict(self): """Test ContentProcessRecord.to_cosmos_dict method""" From f9c9955d1184dea890d32930ae49ea4fc6900f87 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Mon, 15 Jun 2026 16:50:10 +0530 Subject: [PATCH 12/13] fix(lint): remove unused DefaultAzureCredential import and fix indentation - F401: drop unused sync DefaultAzureCredential import in 3 credential util files (sync flow now raises RuntimeError; AsyncDefaultAzureCredential is still used). - W293/E122: fix blank-line whitespace and continuation-line indentation in ContentProcessorWorkflow/src/utils/credential_util.py. --- .../src/libs/utils/azure_credential_utils.py | 1 - src/ContentProcessor/src/libs/utils/credential_util.py | 1 - .../src/utils/credential_util.py | 9 ++++----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/ContentProcessor/src/libs/utils/azure_credential_utils.py b/src/ContentProcessor/src/libs/utils/azure_credential_utils.py index 5d711e85..3344379c 100644 --- a/src/ContentProcessor/src/libs/utils/azure_credential_utils.py +++ b/src/ContentProcessor/src/libs/utils/azure_credential_utils.py @@ -19,7 +19,6 @@ from azure.identity import ( AzureCliCredential, AzureDeveloperCliCredential, - DefaultAzureCredential, ManagedIdentityCredential, ) from azure.identity import ( diff --git a/src/ContentProcessor/src/libs/utils/credential_util.py b/src/ContentProcessor/src/libs/utils/credential_util.py index 1efcaab7..791ab42c 100644 --- a/src/ContentProcessor/src/libs/utils/credential_util.py +++ b/src/ContentProcessor/src/libs/utils/credential_util.py @@ -19,7 +19,6 @@ from azure.identity import ( AzureCliCredential, AzureDeveloperCliCredential, - DefaultAzureCredential, ManagedIdentityCredential, ) from azure.identity import ( diff --git a/src/ContentProcessorWorkflow/src/utils/credential_util.py b/src/ContentProcessorWorkflow/src/utils/credential_util.py index fbef0657..306fd180 100644 --- a/src/ContentProcessorWorkflow/src/utils/credential_util.py +++ b/src/ContentProcessorWorkflow/src/utils/credential_util.py @@ -19,7 +19,6 @@ from azure.identity import ( AzureCliCredential, AzureDeveloperCliCredential, - DefaultAzureCredential, ManagedIdentityCredential, ) from azure.identity import ( @@ -126,11 +125,11 @@ def get_azure_credential(): logging.info( "[AUTH] All CLI credentials failed - falling back to DefaultAzureCredential" ) - + raise RuntimeError( - "No Azure authentication available. " - "Use Managed Identity in Azure or run " - "'az login' / 'azd auth login' locally." + "No Azure authentication available. " + "Use Managed Identity in Azure or run " + "'az login' / 'azd auth login' locally." ) From 8d7b592aba17eda3618ca9d81c65728ef1425627 Mon Sep 17 00:00:00 2001 From: Prachig-Microsoft Date: Mon, 15 Jun 2026 16:57:26 +0530 Subject: [PATCH 13/13] fix(lint): remove trailing blank line at EOF (W391) --- .../tests/unit/pipeline/test_save_handler_scores.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py index aad71027..be9649d6 100644 --- a/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py +++ b/src/ContentProcessor/tests/unit/pipeline/test_save_handler_scores.py @@ -234,4 +234,3 @@ def test_nested_all_null_is_empty(self): def test_nested_with_value_is_filled(self): assert SaveHandler._is_filled_value({"a": None, "b": "x"}) is True assert SaveHandler._is_filled_value([None, "x"]) is True -