diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 871503f0ae..de67dfcdd8 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1702,6 +1702,44 @@ def _open_url(self, url: str, timeout: int = 10): from specify_cli.authentication.http import open_url return open_url(url, timeout) + def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None: + """Validate a parsed catalog payload's shape. + + Applied to both network-fetched and cache-loaded payloads so a + once-poisoned cache (older spec-kit version, manual edit, upstream + served a bad payload before the network-side guards were added) + cannot re-crash ``_get_merged_extensions`` on subsequent calls. + + Checking only key presence would let a payload like + ``{"extensions": []}`` or ``{"extensions": null}`` slip through + here and then crash with ``AttributeError: 'list' object has no + attribute 'items'`` deep inside ``_get_merged_extensions``. The + sibling integration catalog reader already guards both the root + object and the nested mapping (see ``integrations/catalog.py``); + the extension catalog must stay consistent so a malformed payload + surfaces as the user-facing ``Invalid catalog format`` error + instead of a raw Python traceback. + + Args: + catalog_data: Parsed JSON payload from the catalog source. + url: Source URL — used in the error message so the user can + tell which catalog in a multi-catalog stack is malformed. + + Raises: + ExtensionError: If the payload's shape is invalid. + """ + if not isinstance(catalog_data, dict): + raise ExtensionError( + f"Invalid catalog format from {url}: expected a JSON object" + ) + if "schema_version" not in catalog_data or "extensions" not in catalog_data: + raise ExtensionError(f"Invalid catalog format from {url}") + if not isinstance(catalog_data.get("extensions"), dict): + raise ExtensionError( + f"Invalid catalog format from {url}: " + "'extensions' must be a JSON object" + ) + def get_active_catalogs(self) -> List[CatalogEntry]: """Get the ordered list of active catalogs. @@ -1827,11 +1865,19 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False # If metadata is invalid or missing expected fields, treat cache as invalid pass - # Use cache if valid + # Use cache if valid. A previously-cached payload must clear the + # same shape checks as a freshly-fetched one — otherwise a once- + # poisoned cache (older spec-kit version, manual edit, upstream + # served a bad payload before the network-side guards were added) + # would re-crash on every invocation despite the cache being + # "valid" by age. If validation fails on the cached read, fall + # through to the network fetch path so the cache gets refreshed. if is_valid: try: - return json.loads(cache_file.read_text()) - except json.JSONDecodeError: + cached_data = json.loads(cache_file.read_text()) + self._validate_catalog_payload(cached_data, entry.url) + return cached_data + except (json.JSONDecodeError, ExtensionError): pass # Fetch from network @@ -1839,8 +1885,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) - if "schema_version" not in catalog_data or "extensions" not in catalog_data: - raise ExtensionError(f"Invalid catalog format from {entry.url}") + self._validate_catalog_payload(catalog_data, entry.url) # Save to cache self.cache_dir.mkdir(parents=True, exist_ok=True) @@ -1895,6 +1940,16 @@ def _get_merged_extensions(self, force_refresh: bool = False) -> List[Dict[str, continue for ext_id, ext_data in catalog_data.get("extensions", {}).items(): + # Per-entry guard: ``_fetch_single_catalog`` already validates + # that ``catalog_data["extensions"]`` is a mapping, but it + # does not (and should not) validate every entry shape there + # — one malformed entry shouldn't poison an otherwise valid + # catalog. Skip non-mapping entries here so a payload like + # ``{"extensions": {"foo": [], "bar": {...}}}`` still merges + # the valid entries without crashing on ``**ext_data``. + # Mirrors ``integrations/catalog.py:245``. + if not isinstance(ext_data, dict): + continue if ext_id not in merged: # Higher-priority catalog wins merged[ext_id] = { **ext_data, diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index fd9d4745f1..89fd033187 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -1860,6 +1860,48 @@ def _open_url(self, url: str, timeout: int = 10): from specify_cli.authentication.http import open_url return open_url(url, timeout) + def _validate_catalog_payload(self, catalog_data: Any, url: str) -> None: + """Validate a parsed preset-catalog payload's shape. + + Applied to both network-fetched and cache-loaded payloads so a + once-poisoned cache (older spec-kit version, manual edit, upstream + served a bad payload before the network-side guards were added) + cannot re-crash ``_get_merged_packs`` on subsequent calls. + + Checking only key presence would let a payload like + ``{"presets": []}`` or ``{"presets": null}`` slip through here and + then crash with ``AttributeError: 'list' object has no attribute + 'items'`` deep inside ``_get_merged_packs``. The sibling + integration catalog reader already guards both the root object and + the nested mapping (see ``integrations/catalog.py``); the preset + catalog must stay consistent so a malformed payload surfaces as + the user-facing ``Invalid preset catalog format`` error instead of + a raw Python traceback. + + Args: + catalog_data: Parsed JSON payload from the catalog source. + url: Source URL — used in the error message so the user can + tell which catalog in a multi-catalog stack is malformed. + + Raises: + PresetError: If the payload's shape is invalid. + """ + if not isinstance(catalog_data, dict): + raise PresetError( + f"Invalid preset catalog format from {url}: " + "expected a JSON object" + ) + if ( + "schema_version" not in catalog_data + or "presets" not in catalog_data + ): + raise PresetError("Invalid preset catalog format") + if not isinstance(catalog_data.get("presets"), dict): + raise PresetError( + f"Invalid preset catalog format from {url}: " + "'presets' must be a JSON object" + ) + def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -2035,21 +2077,25 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = """ cache_file, metadata_file = self._get_cache_paths(entry.url) + # Use cache if valid. A previously-cached payload must clear the + # same shape checks as a freshly-fetched one — otherwise a once- + # poisoned cache would re-crash on every invocation despite the + # cache being "valid" by age. If validation fails on the cached + # read, fall through to the network fetch path so the cache gets + # refreshed. if not force_refresh and self._is_url_cache_valid(entry.url): try: - return json.loads(cache_file.read_text()) - except json.JSONDecodeError: + cached_data = json.loads(cache_file.read_text()) + self._validate_catalog_payload(cached_data, entry.url) + return cached_data + except (json.JSONDecodeError, PresetError): pass try: with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) - if ( - "schema_version" not in catalog_data - or "presets" not in catalog_data - ): - raise PresetError("Invalid preset catalog format") + self._validate_catalog_payload(catalog_data, entry.url) self.cache_dir.mkdir(parents=True, exist_ok=True) cache_file.write_text(json.dumps(catalog_data, indent=2)) @@ -2083,6 +2129,17 @@ def _get_merged_packs(self, force_refresh: bool = False) -> Dict[str, Dict[str, try: data = self._fetch_single_catalog(entry, force_refresh) for pack_id, pack_data in data.get("presets", {}).items(): + # Per-entry guard: ``_fetch_single_catalog`` already + # validates that ``data["presets"]`` is a mapping, but it + # does not (and should not) validate every entry shape + # there — one malformed entry shouldn't poison an + # otherwise valid catalog. Skip non-mapping entries here + # so a payload like ``{"presets": {"foo": [], "bar": + # {...}}}`` still merges the valid entries without + # crashing on ``**pack_data``. Mirrors + # ``integrations/catalog.py:245``. + if not isinstance(pack_data, dict): + continue pack_data_with_catalog = {**pack_data, "_catalog_name": entry.name, "_install_allowed": entry.install_allowed} merged[pack_id] = pack_data_with_catalog except PresetError: diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 153388a541..4e89663a3d 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2577,6 +2577,163 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``extensions`` is the wrong type. + {"schema_version": "1.0", "extensions": []}, + {"schema_version": "1.0", "extensions": "oops"}, + {"schema_version": "1.0", "extensions": None}, + {"schema_version": "1.0", "extensions": 42}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload): + """Malformed catalog payloads raise ExtensionError, not AttributeError. + + Without this guard, a payload like ``{"extensions": []}`` would pass the + key-presence check and then crash with ``AttributeError: 'list' object + has no attribute 'items'`` deep inside ``_get_merged_extensions``. The + sibling integration catalog reader already validates both the root + object and the nested mapping (see ``integrations/catalog.py``); the + extension catalog must stay consistent. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(ExtensionError, match="Invalid catalog format"): + catalog._fetch_single_catalog(entry, force_refresh=True) + + @pytest.mark.parametrize( + "cached_payload", + [ + [], + "oops", + 42, + None, + {"schema_version": "1.0", "extensions": []}, + {"schema_version": "1.0", "extensions": "oops"}, + {"schema_version": "1.0", "extensions": None}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_cached_payload( + self, temp_dir, cached_payload + ): + """A poisoned cache silently falls back to the network instead of + crashing — cached payloads pass through the same shape validation + as freshly-fetched ones. + + Without this, a cache poisoned by an older spec-kit version (or a + manual edit, or an upstream that briefly served a bad payload + before the network guards landed) would re-crash every invocation + of ``_get_merged_extensions`` despite the cache being "valid" by + age. The recovery contract is: if the cached payload fails + validation, drop it and refetch — never propagate + ``AttributeError`` to the caller. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + + # Poison the default-URL cache. ``DEFAULT_CATALOG_URL`` is the + # branch that goes through ``is_cache_valid()`` (the non-default + # branch uses per-URL hashed cache files but the same code path + # below). + catalog.cache_dir.mkdir(parents=True, exist_ok=True) + catalog.cache_file.write_text(json.dumps(cached_payload)) + catalog.cache_metadata_file.write_text( + json.dumps( + { + "cached_at": datetime.now(timezone.utc).isoformat(), + "catalog_url": ExtensionCatalog.DEFAULT_CATALOG_URL, + } + ) + ) + + # Network refetch returns a valid payload so the recovery path + # can complete. + valid = { + "schema_version": "1.0", + "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(valid).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url=ExtensionCatalog.DEFAULT_CATALOG_URL, + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog._fetch_single_catalog(entry, force_refresh=False) + + # The poisoned cache was discarded and the network payload returned. + assert result == valid + + def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): + """Per-entry guard: one malformed entry shouldn't poison the merge. + + ``_fetch_single_catalog`` validates that ``extensions`` is a mapping, + but it doesn't (and shouldn't) validate every entry inside it — a + single bad entry in an otherwise-valid catalog should be skipped, not + crash the whole resolve path. Mirrors the per-entry skip in + ``integrations/catalog.py``: a malformed entry returns no error, + valid entries continue to merge normally. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + # Mix of valid entry, list-shaped entry, and string-shaped entry. + payload = { + "schema_version": "1.0", + "extensions": { + "good": {"name": "Good", "version": "1.0.0"}, + "bad-list": [], + "bad-str": "oops", + }, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch.object(catalog, "get_active_catalogs", return_value=[entry]): + merged = catalog._get_merged_extensions(force_refresh=True) + + # Only the well-formed entry survives; the two malformed entries are + # silently dropped rather than raising or crashing. + assert [ext["id"] for ext in merged] == ["good"] + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_presets.py b/tests/test_presets.py index 8fa700fa77..5322d1454d 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1514,6 +1514,163 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``presets`` is the wrong type. + {"schema_version": "1.0", "presets": []}, + {"schema_version": "1.0", "presets": "oops"}, + {"schema_version": "1.0", "presets": None}, + {"schema_version": "1.0", "presets": 42}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_payload(self, project_dir, payload): + """Malformed catalog payloads raise PresetError, not AttributeError. + + Without this guard, a payload like ``{"presets": []}`` would pass the + key-presence check and then crash with ``AttributeError: 'list' object + has no attribute 'items'`` deep inside ``_get_merged_packs``. The + sibling integration catalog reader already validates both the root + object and the nested mapping (see ``integrations/catalog.py``); the + preset catalog must stay consistent. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(PresetError, match="Invalid preset catalog format"): + catalog._fetch_single_catalog(entry, force_refresh=True) + + @pytest.mark.parametrize( + "cached_payload", + [ + [], + "oops", + 42, + None, + {"schema_version": "1.0", "presets": []}, + {"schema_version": "1.0", "presets": "oops"}, + {"schema_version": "1.0", "presets": None}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_cached_payload( + self, project_dir, cached_payload + ): + """A poisoned cache silently falls back to the network instead of + crashing — cached payloads pass through the same shape validation + as freshly-fetched ones. + + Without this, a cache poisoned by an older spec-kit version (or a + manual edit, or an upstream that briefly served a bad payload + before the network guards landed) would re-crash every invocation + of ``_get_merged_packs`` despite the cache being "valid" by age. + The recovery contract is: if the cached payload fails validation, + drop it and refetch — never propagate ``AttributeError`` to the + caller. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + + # Poison the default-URL cache. ``DEFAULT_CATALOG_URL`` and + # non-default URLs both flow through the same cache-load branch. + cache_file, metadata_file = catalog._get_cache_paths( + catalog.DEFAULT_CATALOG_URL + ) + cache_file.parent.mkdir(parents=True, exist_ok=True) + cache_file.write_text(json.dumps(cached_payload)) + metadata_file.write_text( + json.dumps( + { + "cached_at": datetime.now(timezone.utc).isoformat(), + "catalog_url": catalog.DEFAULT_CATALOG_URL, + } + ) + ) + + # Network refetch returns a valid payload so the recovery path + # can complete. + valid = { + "schema_version": "1.0", + "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(valid).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url=catalog.DEFAULT_CATALOG_URL, + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog._fetch_single_catalog(entry, force_refresh=False) + + # The poisoned cache was discarded and the network payload returned. + assert result == valid + + def test_get_merged_packs_skips_non_mapping_entries(self, project_dir): + """Per-entry guard: one malformed entry shouldn't poison the merge. + + ``_fetch_single_catalog`` validates that ``presets`` is a mapping, + but it doesn't (and shouldn't) validate every entry inside it — a + single bad entry in an otherwise-valid catalog should be skipped, + not crash the whole resolve path. Mirrors the per-entry skip in + ``integrations/catalog.py``: a malformed entry returns no error, + valid entries continue to merge normally. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + payload = { + "schema_version": "1.0", + "presets": { + "good": {"name": "Good", "version": "1.0.0"}, + "bad-list": [], + "bad-str": "oops", + }, + } + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch.object(catalog, "get_active_catalogs", return_value=[entry]): + merged = catalog._get_merged_packs(force_refresh=True) + + # Only the well-formed entry survives; the two malformed entries are + # silently dropped rather than raising or crashing. + assert list(merged.keys()) == ["good"] + def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): """download_pack passes Authorization header when configured.""" from unittest.mock import patch, MagicMock