Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion conformance.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

[manifest]
implementation = "openarmature-python"
spec_pin = "v0.69.0"
spec_pin = "v0.70.1"

# Status values:
# implemented — shipped behavior matches the proposal's contract
Expand Down Expand Up @@ -610,6 +610,16 @@ since = "0.13.0"
[proposals."0059"]
status = "not-yet"

# Spec v0.70.0 (proposal 0060). Retrieval-provider rerank protocol — the
# ``RerankProvider`` surface + ``RerankEvent`` / ``RerankFailedEvent`` typed
# variants + OTel ``openarmature.rerank.complete`` span / Langfuse Retriever
# observation / rerank metrics. Sibling to the embedding surface (0059);
# python has not shipped retrieval-provider, so rerank is not-yet. Crossed by
# the v0.70.1 pin (adopted for fixture 110 / 0075); the 11 rerank observability
# fixtures (099-109) defer with it.
[proposals."0060"]
status = "not-yet"

# Spec v0.55.0 (proposal 0065; repo pins v0.55.1). Failure-isolation
# cause fidelity at non-node placements (pipeline-utilities §6.3 /
# §11.7). At instance (§9.7), branch (§11.7), and parent-node
Expand Down
2 changes: 1 addition & 1 deletion openarmature-spec
Submodule openarmature-spec updated 50 files
+28 −0 CHANGELOG.md
+3 −5 README.md
+2 −2 docs/proposals.md
+10 −10 proposals/0060-retrieval-provider-rerank.md
+97 −2 spec/graph-engine/spec.md
+1 −1 spec/observability/conformance/075-embedding-failure-event-dispatch-on-provider-unavailable.md
+1 −1 spec/observability/conformance/082-otel-embedding-span-attributes.yaml
+28 −0 spec/observability/conformance/099-rerank-event-dispatch.md
+77 −0 spec/observability/conformance/099-rerank-event-dispatch.yaml
+30 −0 spec/observability/conformance/100-rerank-failure-event-dispatch-on-provider-unavailable.md
+60 −0 spec/observability/conformance/100-rerank-failure-event-dispatch-on-provider-unavailable.yaml
+25 −0 spec/observability/conformance/101-rerank-event-mutual-exclusion.md
+90 −0 spec/observability/conformance/101-rerank-event-mutual-exclusion.yaml
+23 −0 spec/observability/conformance/102-rerank-event-call-id-distinct.md
+67 −0 spec/observability/conformance/102-rerank-event-call-id-distinct.yaml
+24 −0 spec/observability/conformance/103-rerank-event-query-and-documents-populated.md
+53 −0 spec/observability/conformance/103-rerank-event-query-and-documents-populated.yaml
+25 −0 spec/observability/conformance/104-rerank-event-request-params-populated.md
+96 −0 spec/observability/conformance/104-rerank-event-request-params-populated.yaml
+26 −0 spec/observability/conformance/105-rerank-event-top-k-and-result-count-populated.md
+102 −0 spec/observability/conformance/105-rerank-event-top-k-and-result-count-populated.yaml
+25 −0 spec/observability/conformance/106-rerank-event-active-prompt-populated.md
+80 −0 spec/observability/conformance/106-rerank-event-active-prompt-populated.yaml
+33 −0 spec/observability/conformance/107-otel-rerank-span-attributes.md
+150 −0 spec/observability/conformance/107-otel-rerank-span-attributes.yaml
+30 −0 spec/observability/conformance/108-langfuse-rerank-observation.md
+134 −0 spec/observability/conformance/108-langfuse-rerank-observation.yaml
+31 −0 spec/observability/conformance/109-rerank-metrics-token-and-duration.md
+144 −0 spec/observability/conformance/109-rerank-metrics-token-and-duration.yaml
+37 −0 spec/observability/conformance/110-otel-callable-branch-span.md
+84 −0 spec/observability/conformance/110-otel-callable-branch-span.yaml
+125 −15 spec/observability/spec.md
+1 −1 spec/retrieval-provider/conformance/002-embed-model-binding-error.md
+2 −2 spec/retrieval-provider/conformance/003-embed-malformed-response-mismatched-vector-count.md
+2 −2 spec/retrieval-provider/conformance/004-embed-malformed-response-inconsistent-dimensions.md
+31 −0 spec/retrieval-provider/conformance/006-rerank-positive-control.md
+84 −0 spec/retrieval-provider/conformance/006-rerank-positive-control.yaml
+30 −0 spec/retrieval-provider/conformance/007-rerank-model-binding-error.md
+43 −0 spec/retrieval-provider/conformance/007-rerank-model-binding-error.yaml
+28 −0 spec/retrieval-provider/conformance/008-rerank-malformed-response-out-of-range-index.md
+46 −0 spec/retrieval-provider/conformance/008-rerank-malformed-response-out-of-range-index.yaml
+26 −0 spec/retrieval-provider/conformance/009-rerank-malformed-response-duplicate-index.md
+45 −0 spec/retrieval-provider/conformance/009-rerank-malformed-response-duplicate-index.yaml
+29 −0 spec/retrieval-provider/conformance/010-rerank-top-k-respected.md
+133 −0 spec/retrieval-provider/conformance/010-rerank-top-k-respected.yaml
+27 −0 spec/retrieval-provider/conformance/011-rerank-top-k-violation.md
+47 −0 spec/retrieval-provider/conformance/011-rerank-top-k-violation.yaml
+28 −0 spec/retrieval-provider/conformance/012-rerank-per-result-echo-variance.md
+55 −0 spec/retrieval-provider/conformance/012-rerank-per-result-echo-variance.yaml
+189 −51 spec/retrieval-provider/spec.md
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
openarmature = "openarmature.cli:main"

[tool.openarmature]
spec_version = "0.69.0"
spec_version = "0.70.1"

[dependency-groups]
dev = [
Expand Down
4 changes: 2 additions & 2 deletions src/openarmature/AGENTS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# OpenArmature — Agent documentation

*This is the agent guide bundled with the openarmature Python package, version 0.14.0 (spec v0.69.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
*This is the agent guide bundled with the openarmature Python package, version 0.14.0 (spec v0.70.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*

## TL;DR

Expand All @@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents:

## Capability contracts

_Sourced from openarmature-spec v0.69.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
_Sourced from openarmature-spec v0.70.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._

### Capability: `graph-engine`

Expand Down
2 changes: 1 addition & 1 deletion src/openarmature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"""

__version__ = "0.14.0"
__spec_version__ = "0.69.0"
__spec_version__ = "0.70.1"
# Proposal 0052 (spec observability §5.1 / §8.4.1): canonical
# package-registry name for this implementation. Surfaces on every
# OTel invocation span as ``openarmature.implementation.name`` and on
Expand Down
34 changes: 34 additions & 0 deletions tests/conformance/test_fixture_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,40 @@ def _id(case: tuple[str, Path]) -> str:
),
"observability/082-otel-embedding-span-attributes": "Proposal 0059 embedding events; not implemented",
"observability/083-langfuse-embedding-observation": "Proposal 0059 embedding events; not implemented",
# Proposal 0060 (retrieval-provider rerank, v0.70.0): the rerank
# observability fixtures (099-109) model the RerankEvent /
# RerankFailedEvent + rerank span / Langfuse Retriever / rerank-metrics
# surface, which python does not implement (0060 is not-yet; rerank lands
# with the embedding capability in v0.16.0). Sibling to embeddings (074-083).
"observability/099-rerank-event-dispatch": "Proposal 0060 rerank events; not implemented",
"observability/100-rerank-failure-event-dispatch-on-provider-unavailable": (
"Proposal 0060 rerank events; not implemented"
),
"observability/101-rerank-event-mutual-exclusion": "Proposal 0060 rerank events; not implemented",
"observability/102-rerank-event-call-id-distinct": "Proposal 0060 rerank events; not implemented",
"observability/103-rerank-event-query-and-documents-populated": (
"Proposal 0060 rerank events; not implemented"
),
"observability/104-rerank-event-request-params-populated": (
"Proposal 0060 rerank events; not implemented"
),
"observability/105-rerank-event-top-k-and-result-count-populated": (
"Proposal 0060 rerank events; not implemented"
),
"observability/106-rerank-event-active-prompt-populated": (
"Proposal 0060 rerank events; not implemented"
),
"observability/107-otel-rerank-span-attributes": "Proposal 0060 rerank events; not implemented",
"observability/108-langfuse-rerank-observation": "Proposal 0060 rerank events; not implemented",
"observability/109-rerank-metrics-token-and-duration": "Proposal 0060 rerank events; not implemented",
# Proposal 0075 (callable-branch span, fixture 110 added v0.70.1): the case
# mixes a graph-style ``expected.final_state`` with the observability
# ``span_tree``; the cross-capability parser's ObservabilityExpected model
# forbids final_state. RUNS via _run_fixture_110 in test_observability (the
# same defer-from-parse-but-runs pattern as fixture 038).
"observability/110-otel-callable-branch-span": (
"Cross-capability parser doesn't model final_state + span_tree together; runs in test_observability"
),
}


Expand Down
81 changes: 81 additions & 0 deletions tests/conformance/test_observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
"096-tool-call-payload-gating",
"097-otel-tool-span-attributes",
"098-langfuse-tool-observation",
# v0.70.1 — proposal 0075 callable-branch span shape (observability
# §5.7). The ORIGINAL fixture 110 (span shape + skip-emits-no-span);
# the branch_count assertion arrives with the v0.73.1 pin (v0.16.0).
"110-otel-callable-branch-span",
}
)

Expand All @@ -194,6 +198,10 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
"no embedding event/provider to record from"
)

_RERANK_DEFER = (
"rerank capability (proposal 0060) unimplemented until v0.16.0; no rerank event/provider to record from"
)


# Pinned observability fixtures NOT run by this YAML harness, each with an
# explicit reason. The coverage guard (test_observability_fixture_coverage_
Expand Down Expand Up @@ -228,6 +236,24 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
"089-embedding-metrics-token-and-duration",
)
},
# Rerank observability (proposal 0060, v0.70.0). The rerank protocol is
# unshipped in python until v0.16.0; no rerank provider/event exists.
**{
fixture_id: _RERANK_DEFER
for fixture_id in (
"099-rerank-event-dispatch",
"100-rerank-failure-event-dispatch-on-provider-unavailable",
"101-rerank-event-mutual-exclusion",
"102-rerank-event-call-id-distinct",
"103-rerank-event-query-and-documents-populated",
"104-rerank-event-request-params-populated",
"105-rerank-event-top-k-and-result-count-populated",
"106-rerank-event-active-prompt-populated",
"107-otel-rerank-span-attributes",
"108-langfuse-rerank-observation",
"109-rerank-metrics-token-and-duration",
)
},
}


Expand Down Expand Up @@ -429,6 +455,8 @@ async def test_observability_fixture(fixture_path: Path) -> None:
await _run_fixture_028(spec)
elif fixture_id == "038-otel-parallel-branches-dispatch-span":
await _run_fixture_038(spec)
elif fixture_id == "110-otel-callable-branch-span":
await _run_fixture_110(spec)
elif fixture_id in {
"040-llm-cache-attribute-emission",
"041-llm-cache-attribute-absence",
Expand Down Expand Up @@ -1923,6 +1951,59 @@ def _matches(span: Any, eattrs: dict[str, Any] = expected_attrs) -> bool:
_assert_span_tree_matches(all_spans, actual_children, expected_children)


async def _run_fixture_110(spec: Mapping[str, Any]) -> None:
# Proposal 0075 callable-branch span shape (observability §5.7): an
# inline-callable parallel branch renders as ONE per-branch dispatch span
# keyed by openarmature.node.branch_name with NO inner-node spans; a
# when-skipped branch emits no span. Bundled OTel observer (default config),
# as for fixtures 038 / 082.
for case in cast("list[dict[str, Any]]", spec["cases"]):
case_name = cast("str", case["name"])
try:
await _run_fixture_110_case(case)
except AssertionError as e:
raise AssertionError(f"case {case_name!r}: {e}") from e


async def _run_fixture_110_case(case: Mapping[str, Any]) -> None:
observer, exporter = _build_observer()
final = await _run_graph(case, observer)
observer.shutdown()

# ---- final_state: the dispatched callable branches applied their
# updates; the when-skipped branch contributed nothing.
expected_final = cast("dict[str, Any]", case["expected"].get("final_state") or {})
for field_name, expected_value in expected_final.items():
actual = getattr(final, field_name)
assert actual == expected_value, f"final_state.{field_name}: {actual!r} != {expected_value!r}"

spans = exporter.get_finished_spans()
expected_tree = cast("list[dict[str, Any]]", case["expected"]["span_tree"])
inv_root = next((s for s in spans if s.name == "openarmature.invocation" and s.parent is None), None)
assert inv_root is not None, f"invocation root span missing; got {[s.name for s in spans]}"
_assert_span_tree_matches(spans, [inv_root], expected_tree)

# ---- when-skipped branches emit NO span. The span_tree match is
# subset-based, so assert the skip explicitly: any declared branch absent
# from the (dispatched) span_tree must have produced no span.
def _names(nodes: list[dict[str, Any]]) -> set[str]:
out: set[str] = set()
for n in nodes:
out.add(cast("str", n["name"]))
out |= _names(cast("list[dict[str, Any]]", n.get("children") or []))
return out

dispatched = _names(expected_tree)
nodes = cast("dict[str, Any]", case["nodes"])
pb_node = next((ns for ns in nodes.values() if "parallel_branches" in ns), None)
if pb_node is not None:
declared = set(cast("dict[str, Any]", pb_node["parallel_branches"]["branches"]).keys())
for branch in declared - dispatched:
assert [s for s in spans if s.name == branch] == [], (
f"when-skipped branch {branch!r} MUST emit no span"
)


async def _run_fixture_008(spec: Mapping[str, Any]) -> None:
"""Two sub-cases: detached subgraph (one Link, two traces, shared
correlation_id) and detached fan-out (one trace per instance,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_package_versions() -> None:
assert openarmature.__version__ == "0.14.0"
assert openarmature.__spec_version__ == "0.69.0"
assert openarmature.__spec_version__ == "0.70.1"


def test_spec_version_matches_pyproject() -> None:
Expand Down