Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion src/skillspector/nodes/analyzers/static_patterns_supply_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import re
import sys
import tomllib

from skillspector.logging_config import get_logger
from skillspector.models import AnalyzerFinding, Finding, Location, Severity
Expand Down Expand Up @@ -401,6 +402,89 @@ def _extract_packages_from_requirements(content: str) -> list[tuple[str, str | N
return results


_PEP621_METADATA_KEYS = frozenset(
{
"name",
"version",
"description",
"readme",
"license",
"authors",
"maintainers",
"keywords",
"classifiers",
"urls",
"requires-python",
"dynamic",
}
)

_PKG_NAME_RE = re.compile(
r"^([a-zA-Z][a-zA-Z0-9._-]*)(?:\[.*?\])?\s*(?:[=<>!~]=?\s*[\d.*]+)?",
)


def _extract_packages_from_pyproject(content: str) -> list[tuple[str, str | None, int]]:
"""Extract (package_name, version_or_None, line_number) from pyproject.toml content.

Only reads PEP 621 ``[project].dependencies``,
``[project.optional-dependencies]``, and ``[build-system].requires`` so that
TOML metadata keys (``name``, ``requires-python``, etc.) are never treated
as package names. Returns ``[]`` on any TOML parse error.
"""
if not content.strip():
return []
try:
data = tomllib.loads(content)
except tomllib.TOMLDecodeError:
return []

dep_strings: list[str] = []

project = data.get("project", {})
if isinstance(project, dict):
# [project].dependencies
deps = project.get("dependencies", [])
if isinstance(deps, list):
dep_strings.extend(str(d) for d in deps)
# [project.optional-dependencies]
opt_deps = project.get("optional-dependencies", {})
if isinstance(opt_deps, dict):
for group in opt_deps.values():
if isinstance(group, list):
dep_strings.extend(str(d) for d in group)

build_system = data.get("build-system", {})
if isinstance(build_system, dict):
requires = build_system.get("requires", [])
if isinstance(requires, list):
dep_strings.extend(str(d) for d in requires)

lines = content.splitlines()
results: list[tuple[str, str | None, int]] = []
for dep_str in dep_strings:
m = _PKG_NAME_RE.match(dep_str.strip())
if not m:
continue
pkg_name = m.group(1)
if pkg_name.lower() in _PEP621_METADATA_KEYS:
continue

# Approximate line number: find the package name in the raw content.
line_num = 1
for i, line in enumerate(lines, 1):
if pkg_name.lower() in line.lower():
line_num = i
break

# Extract pinned version for SC4 comparison (== or <=).
ver_m = re.search(r"(?:==|<=)\s*([\d.]+)", dep_str)
version = ver_m.group(1) if ver_m else None
results.append((pkg_name, version, line_num))

return results


def _extract_packages_from_package_json(content: str) -> list[tuple[str, str | None, int]]:
"""Extract (package_name, version_or_None, line_number) from package.json content."""
results: list[tuple[str, str | None, int]] = []
Expand Down Expand Up @@ -695,7 +779,10 @@ def _analyze_dependencies(
return findings

if is_python_dep:
packages = _extract_packages_from_requirements(content)
if "pyproject.toml" in lower_path:
packages = _extract_packages_from_pyproject(content)
else:
packages = _extract_packages_from_requirements(content)
ecosystem = ECOSYSTEM_PYPI
fallback_db = _FALLBACK_VULNERABLE_PYPI
popular = _POPULAR_PYPI
Expand Down
82 changes: 82 additions & 0 deletions tests/unit/test_patterns_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,3 +1037,85 @@ def test_extract_packages_package_json(self) -> None:
names = [p[0] for p in sc_mod._extract_packages_from_package_json(content)]
assert "express" in names
assert "lodash" in names

def test_extract_packages_from_pyproject_basic(self) -> None:
"""pyproject.toml: only [project].dependencies entries are extracted."""
content = (
"[project]\n"
'name = "my-skill"\n'
'version = "1.0.0"\n'
'requires-python = ">=3.12"\n'
'description = "A skill"\n'
'dependencies = ["requests>=2.31", "pydantic==2.0.0"]\n'
)
names = [p[0] for p in sc_mod._extract_packages_from_pyproject(content)]
assert "requests" in names
assert "pydantic" in names
# PEP 621 metadata fields must NOT appear as package names
assert "name" not in names
assert "requires-python" not in names
assert "description" not in names
assert "version" not in names

def test_extract_packages_from_pyproject_build_system(self) -> None:
"""[build-system].requires entries are also extracted."""
content = (
"[build-system]\n"
'requires = ["setuptools>=68", "wheel"]\n'
'build-backend = "setuptools.build_meta"\n'
)
names = [p[0] for p in sc_mod._extract_packages_from_pyproject(content)]
assert "setuptools" in names
assert "wheel" in names
assert "build-backend" not in names

def test_extract_packages_from_pyproject_optional_deps(self) -> None:
"""[project.optional-dependencies] entries are also extracted."""
content = (
"[project]\n"
'name = "skill"\n'
"\n"
"[project.optional-dependencies]\n"
'dev = ["pytest>=7.0", "ruff"]\n'
)
names = [p[0] for p in sc_mod._extract_packages_from_pyproject(content)]
assert "pytest" in names
assert "ruff" in names
assert "name" not in names

def test_extract_packages_from_pyproject_malformed_toml(self) -> None:
"""Malformed TOML must not crash the analyzer — return empty list."""
content = "[project\nname = bad toml [\n"
result = sc_mod._extract_packages_from_pyproject(content)
assert result == []

def test_extract_packages_from_pyproject_empty(self) -> None:
"""Empty pyproject.toml returns empty list."""
assert sc_mod._extract_packages_from_pyproject("") == []

def test_pyproject_toml_no_false_positive_metadata_keys(self) -> None:
"""End-to-end: scanning pyproject.toml must not fire SC5/SC6 on metadata keys."""
content = (
"[project]\n"
'name = "my-skill"\n'
'version = "1.0.0"\n'
'description = "helper"\n'
'requires-python = ">=3.12"\n'
'authors = [{name = "Dev"}]\n'
'keywords = ["ai", "agent"]\n'
'dependencies = ["httpx>=0.24"]\n'
)
state = {
"components": ["pyproject.toml"],
"file_cache": {"pyproject.toml": content},
}
from skillspector.nodes.analyzers import static_patterns_supply_chain as sc
from skillspector.nodes.analyzers import static_runner

findings = static_runner.run_static_patterns(state, [sc])
rule_ids = [f.rule_id for f in findings]
# SC5/SC6 must not fire for 'name', 'description', 'requires-python', etc.
for f in findings:
assert f.rule_id not in ("SC5", "SC6") or f.matched_text.lower() in (
"httpx",
), f"False positive SC5/SC6 on metadata key: {f.matched_text}"