From 1b8f14cdb1bceabced84baf34b77896ee6f5ff38 Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Mon, 22 Jun 2026 04:58:25 -0600 Subject: [PATCH] =?UTF-8?q?refactor(delegation)!:=20retire=20delegate=5Fco?= =?UTF-8?q?de/coderProfile/composeProductionAgentProfile=20=E2=80=94=20del?= =?UTF-8?q?egate()=20over=20supervise()=20is=20the=20one=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete the legacy task-specific delegation plumbing, collapsing onto the one canonical delegate() verb (over supervise()): - delegate_code / delegate_research MCP tools + their server/bin/openai-tools wiring - coderProfile + DEFAULT_CODER_SYSTEM_PROMPT (the hardcoded coder profile) - composeProductionAgentProfile + buildDelegationMcpServer + DELEGATION_MCP_SERVER_KEY - coderLoopRunner shim (loop-runner.ts) - ResearcherDelegate type, MCP_DISABLE_CODER/RESEARCHER, the bin coder/researcher path KEEP: delegate(), supervise(), the mcp bin delegate path (MCP_ENABLE_DELEGATE), detachedSessionDelegate. detachedSessionDelegate is now profile-parameterized (§1.5): the hardcoded coderProfile is gone — callers supply workerProfile (an AgentProfile); omit it for a minimal model-only default (no hardcoded skills/tools/prompt). The external callers (loops, agent-dev-container) pass executor/model/sandboxClient, not a profile, so they keep working on the default. createSandboxAct no longer injects a delegation MCP into the eval profile; it boots the agent's own profile with optional per-persona overrides. Breaking: removes the listed exports → 0.72.0. Regenerated docs/api; updated canonical-api.md version pin + decision table. --- README.md | 16 +- docs/api/agent.md | 40 +- docs/api/index.md | 191 ++-- docs/api/mcp.md | 1060 ++++----------------- docs/api/profiles.md | 46 +- docs/canonical-api.md | 9 +- examples/mcp-delegation/mcp-delegation.ts | 34 +- package.json | 2 +- src/agent/sandbox-act.ts | 81 +- src/index.ts | 2 - src/intelligence/capability.test.ts | 41 - src/loop-runner.ts | 52 +- src/mcp/bin.ts | 517 +--------- src/mcp/delegates.ts | 64 +- src/mcp/delegation-profile.ts | 228 ----- src/mcp/detached-coder.ts | 57 +- src/mcp/index.ts | 34 +- src/mcp/openai-tools.ts | 42 +- src/mcp/server.ts | 73 +- src/mcp/tools/delegate-code.ts | 218 ----- src/mcp/tools/delegate-research.ts | 233 ----- src/profiles/coder.ts | 55 +- src/profiles/index.ts | 2 +- tests/loop-runner.test.ts | 33 +- tests/mcp/delegate-code.test.ts | 120 --- tests/mcp/delegate-research.test.ts | 84 -- tests/mcp/delegation-profile.test.ts | 143 --- tests/mcp/detached-coder.test.ts | 33 +- tests/mcp/detached-turn.test.ts | 66 +- tests/mcp/idempotency.test.ts | 53 -- tests/mcp/openai-tools.test.ts | 56 +- tests/mcp/server-integration.test.ts | 105 +- tests/mcp/wire-contract.test.ts | 131 +-- tests/sandbox-act.test.ts | 31 +- 34 files changed, 664 insertions(+), 3288 deletions(-) delete mode 100644 src/mcp/delegation-profile.ts delete mode 100644 src/mcp/tools/delegate-code.ts delete mode 100644 src/mcp/tools/delegate-research.ts delete mode 100644 tests/mcp/delegate-code.test.ts delete mode 100644 tests/mcp/delegate-research.test.ts delete mode 100644 tests/mcp/delegation-profile.test.ts delete mode 100644 tests/mcp/idempotency.test.ts diff --git a/README.md b/README.md index 2e65877a..915f62ca 100644 --- a/README.md +++ b/README.md @@ -240,10 +240,10 @@ on a never-touched holdout slice. `runDelegatedLoop` is one entrypoint a worker agent or a scheduled routine calls to run a disciplined loop in a chosen mode, over the hardened engines below. It fails loud on an unwired mode; a thrown engine is captured as `{ ok: false }`, so unattended runs record rather than crash. ```ts -import { runDelegatedLoop, coderLoopRunner, researchLoopRunner, type DelegatedLoopRegistry } from '@tangle-network/agent-runtime' +import { runDelegatedLoop, worktreeLoopRunner, researchLoopRunner, type DelegatedLoopRegistry } from '@tangle-network/agent-runtime' const registry: DelegatedLoopRegistry = { - code: coderLoopRunner({ sandboxClient, args: { goal: 'fix the flaky retry test', repoRoot: '/repo' }, reviewer, winnerSelection: 'smallest-diff' }), + code: worktreeLoopRunner({ repoRoot: '/repo', taskPrompt: 'fix the flaky retry test', harnesses, budget }), research: researchLoopRunner({ research, gate: { selfArtifactKinds: ['spec'] }, maxRounds: 3 }), } const result = await runDelegatedLoop('code', registry) @@ -251,7 +251,7 @@ const result = await runDelegatedLoop('code', registry) Modes: `code`, `review`, `research`, `audit`, `self-improve`, `dynamic`. The `agent-runtime-loop` bin runs the registry from a cron or routine and exits 0 (ok), 1 (recorded failure), or 2 (usage or config error). -The coder delegate (`createDefaultCoderDelegate`, `/mcp`) has default-on safety gates: no-op rejection (an empty patch cannot pass trivially), an always-on secret-path floor (`.env`, keys, wallets), an optional `reviewer` gate, and a `winnerSelection` policy (`highest-score`, `smallest-diff`, `highest-readiness`, `first-approved`). +`worktreeLoopRunner` (`code` mode, the generic recursive path) authors one `AgentProfile` per harness and runs them as a `worktreeFanout` (each leaf `gateOnDeliverable`), winner by the shared valid-only selector. The sandbox-session counterpart is `detachedSessionDelegate` (`/mcp`): it drives the in-box harness over a `SandboxClient` to a mechanically-validated patch, with default-on safety gates — no-op rejection, an always-on secret-path floor (`.env`, keys, wallets), an optional `reviewer` gate, and a `winnerSelection` policy. Its worker profile is a parameter the caller authors (`workerProfile`); omit it for a minimal model-only default. The knowledge-base gate (`createKbGate`, `/mcp`) is fail-closed: a fact's `verbatimPassage` must appear in its `sourceText`, the asserted value must be in the passage, and citations cannot point at self-generated artifacts. `researchLoopRunner` wraps it with a correct-on-veto loop that re-researches the vetoed gaps up to `maxRounds`, then returns the unverified ones rather than dropping them. @@ -271,15 +271,15 @@ The shape: `loop` to `loop.round` (move plus rationale) to `loop.iteration` (age ## MCP delegation server -Expose the delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) to a sandbox coding agent. Mount the canonical server instead of forking delegation logic. +Expose the delegation tools to a sandbox coding agent: the generic `delegate` verb (one intent → a supervisor that authors + drives its own worker, returns the delivered output with its real spend) plus the queue-bound `delegate_feedback`, `delegation_status`, `delegation_history` (and `delegate_ui_audit` when a UI-audit runner is wired). Mount the canonical server instead of forking delegation logic. ```ts -import { createMcpServer, createDefaultCoderDelegate } from '@tangle-network/agent-runtime/mcp' +import { createMcpServer } from '@tangle-network/agent-runtime/mcp' -const server = createMcpServer({ coderDelegate: createDefaultCoderDelegate({ sandboxClient }), researcherDelegate }) +const server = createMcpServer({ delegateSupervisor: { router, backend, deliverable } }) ``` -Or mount the `agent-runtime-mcp` stdio bin on a production `AgentProfile.mcp`. +Or mount the `agent-runtime-mcp` stdio bin on a production `AgentProfile.mcp` with `MCP_ENABLE_DELEGATE=1`. Delegation state is in-memory by default — a server restart drops pending delegations and history. Set `AGENT_RUNTIME_DELEGATION_STATE_FILE=/path/state.json` on the bin (or construct via `DelegationTaskQueue.restore({ store: new FileDelegationStore({ filePath }) })`) to persist records across restarts: `delegation_status`/`delegation_history` keep answering for prior runs, idempotency keys dedupe resubmissions, and in-flight records either resume through the `resumeDelegate` seam (when submitted with a `detachedSessionRef`) or settle as failed with an explicit driver-restart error. A corrupt state file refuses to load (`DelegationStateCorruptError`); `AGENT_RUNTIME_DELEGATION_STATE_RECOVER=1` archives it and starts empty. `AGENT_RUNTIME_DELEGATION_RETAIN_TERMINAL=` caps retained terminal records. @@ -342,7 +342,7 @@ Six subpaths — the public surface: | `@tangle-network/agent-runtime` | chat turns, delegated loop-runner, OTEL export, errors, model resolution | | `.../agent` | `defineAgent` plus surface and outcome adapters | | `.../loops` | **the optimization suite** (`Environment`, `defineStrategy`, `runBenchmark`, `runStrategyEvolution`, `authorStrategy`, `promotionGate`) + the recursive atom (`Supervisor`/`Scope`, `createExecutor`), the `runLoop` kernel, the `Driver` type, `loopDispatch` | -| `.../profiles` | `coderProfile`, `researcherProfile`, the `uiAuditorProfile` presets + the UI-audit workspace I/O helpers | +| `.../profiles` | `coderTaskToPrompt` (the coder task formatter), the `uiAuditorProfile` presets + the UI-audit workspace I/O helpers | | `.../intelligence` | `withTangleIntelligence`, `createIntelligenceClient` — Observe + the provable-OFF billing boundary | | `.../mcp` | `createMcpServer`, `createDefaultCoderDelegate`, `createKbGate`, the `agent-runtime-mcp` bin | diff --git a/docs/api/agent.md b/docs/api/agent.md index a45de284..57718acf 100644 --- a/docs/api/agent.md +++ b/docs/api/agent.md @@ -1103,7 +1103,7 @@ readonly `string`[] ### CreateSandboxActOptions -Defined in: [agent/sandbox-act.ts:29](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L29) +Defined in: [agent/sandbox-act.ts:47](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L47) #### Type Parameters @@ -1121,15 +1121,15 @@ Defined in: [agent/sandbox-act.ts:29](https://github.com/tangle-network/agent-ru > **baseProfile**: `AgentProfile` -Defined in: [agent/sandbox-act.ts:31](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L31) +Defined in: [agent/sandbox-act.ts:49](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L49) -Canonical agent profile — the same one the prod chat turn composes from. +Canonical agent profile — the same one the prod chat turn uses. ##### sandboxClient > **sandboxClient**: [`SandboxClient`](runtime.md#sandboxclient-1) -Defined in: [agent/sandbox-act.ts:33](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L33) +Defined in: [agent/sandbox-act.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L51) Sandbox client used to boot the per-run sandbox. @@ -1137,7 +1137,7 @@ Sandbox client used to boot the per-run sandbox. > **buildPrompt**: (`persona`) => `string` -Defined in: [agent/sandbox-act.ts:35](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L35) +Defined in: [agent/sandbox-act.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L53) Persona → prompt. Pure; the eval cell's input. @@ -1155,20 +1155,18 @@ Persona → prompt. Pure; the eval cell's input. > **output**: [`OutputAdapter`](runtime.md#outputadapter)\<`TRunOutput`\> -Defined in: [agent/sandbox-act.ts:37](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L37) +Defined in: [agent/sandbox-act.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L55) Sandbox event stream → typed output the rubric scores. ##### compose? -> `optional` **compose?**: (`persona`) => [`ComposeProductionAgentProfileOptions`](mcp.md#composeproductionagentprofileoptions) +> `optional` **compose?**: (`persona`) => `SandboxActComposeOverrides` -Defined in: [agent/sandbox-act.ts:44](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L44) +Defined in: [agent/sandbox-act.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L60) -Per-persona composition overrides (workspace-augmented system prompt, -extra file mounts, sandbox key). Merged into -[composeProductionAgentProfile](mcp.md#composeproductionagentprofile); `env` here is overridden by the -top-level `env` option when both are set. +Per-persona profile overrides (workspace-augmented system prompt, extra +file mounts, tool flags, MCP connections). Overlaid onto `baseProfile`. ###### Parameters @@ -1178,13 +1176,13 @@ top-level `env` option when both are set. ###### Returns -[`ComposeProductionAgentProfileOptions`](mcp.md#composeproductionagentprofileoptions) +`SandboxActComposeOverrides` ##### sandboxOverrides? > `optional` **sandboxOverrides?**: `Partial`\<`Omit`\<`CreateSandboxOptions`, `"backend"`\>\> & `object` -Defined in: [agent/sandbox-act.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L46) +Defined in: [agent/sandbox-act.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L62) Sandbox-SDK overrides forwarded to `createSandboxForSpec`. @@ -1198,7 +1196,7 @@ Sandbox-SDK overrides forwarded to `createSandboxForSpec`. > `optional` **name?**: `string` -Defined in: [agent/sandbox-act.ts:48](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L48) +Defined in: [agent/sandbox-act.ts:64](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L64) Stable run name surfaced in mapped `llm_call` events. @@ -1206,7 +1204,7 @@ Stable run name surfaced in mapped `llm_call` events. > `optional` **mapEvent?**: (`event`, `opts`) => [`RuntimeStreamEvent`](index.md#runtimestreamevent) \| `undefined` -Defined in: [agent/sandbox-act.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L50) +Defined in: [agent/sandbox-act.ts:66](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L66) Override the `SandboxEvent → RuntimeStreamEvent` mapper. @@ -1226,14 +1224,6 @@ Override the `SandboxEvent → RuntimeStreamEvent` mapper. [`RuntimeStreamEvent`](index.md#runtimestreamevent) \| `undefined` -##### env? - -> `optional` **env?**: `Record`\<`string`, `string` \| `undefined`\> - -Defined in: [agent/sandbox-act.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L55) - -Environment source for delegation-MCP composition. Defaults to `process.env`. - *** ### AgentSurfaces @@ -1600,7 +1590,7 @@ optional on the type; missing means no measurement was wired). > **createSandboxAct**\<`TPersona`, `TRunOutput`\>(`options`): (`persona`, `ctx`) => [`AgentRunInvocation`](#agentruninvocation)\<`TRunOutput`\> -Defined in: [agent/sandbox-act.ts:64](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L64) +Defined in: [agent/sandbox-act.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/agent/sandbox-act.ts#L78) Build an `AgentRuntime.act` implementation backed by a single prod-profile sandbox run. The returned function honours the `act` contract: it returns diff --git a/docs/api/index.md b/docs/api/index.md index 25977f68..57f36bb7 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -2981,7 +2981,7 @@ Defined in: [improvement/reflective-generator.ts:21](https://github.com/tangle-n ### DelegatedLoopResult -Defined in: [loop-runner.ts:75](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L75) +Defined in: [loop-runner.ts:66](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L66) **`Experimental`** @@ -3000,7 +3000,7 @@ Uniform result — never throws from a registered runner; a > **mode**: `"code"` \| `"review"` \| `"research"` \| `"audit"` \| `"self-improve"` -Defined in: [loop-runner.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L76) +Defined in: [loop-runner.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L67) **`Experimental`** @@ -3008,7 +3008,7 @@ Defined in: [loop-runner.ts:76](https://github.com/tangle-network/agent-runtime/ > **ok**: `boolean` -Defined in: [loop-runner.ts:77](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L77) +Defined in: [loop-runner.ts:68](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L68) **`Experimental`** @@ -3016,7 +3016,7 @@ Defined in: [loop-runner.ts:77](https://github.com/tangle-network/agent-runtime/ > `optional` **output?**: `T` -Defined in: [loop-runner.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L78) +Defined in: [loop-runner.ts:69](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L69) **`Experimental`** @@ -3024,7 +3024,7 @@ Defined in: [loop-runner.ts:78](https://github.com/tangle-network/agent-runtime/ > `optional` **error?**: `string` -Defined in: [loop-runner.ts:79](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L79) +Defined in: [loop-runner.ts:70](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L70) **`Experimental`** @@ -3032,7 +3032,7 @@ Defined in: [loop-runner.ts:79](https://github.com/tangle-network/agent-runtime/ > **durationMs**: `number` -Defined in: [loop-runner.ts:80](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L80) +Defined in: [loop-runner.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L71) **`Experimental`** @@ -3040,7 +3040,7 @@ Defined in: [loop-runner.ts:80](https://github.com/tangle-network/agent-runtime/ ### RunDelegatedLoopOptions -Defined in: [loop-runner.ts:84](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L84) +Defined in: [loop-runner.ts:75](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L75) **`Experimental`** @@ -3050,7 +3050,7 @@ Defined in: [loop-runner.ts:84](https://github.com/tangle-network/agent-runtime/ > `optional` **signal?**: `AbortSignal` -Defined in: [loop-runner.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L85) +Defined in: [loop-runner.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L76) **`Experimental`** @@ -3058,7 +3058,7 @@ Defined in: [loop-runner.ts:85](https://github.com/tangle-network/agent-runtime/ > `optional` **now?**: () => `number` -Defined in: [loop-runner.ts:87](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L87) +Defined in: [loop-runner.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L78) **`Experimental`** @@ -3070,69 +3070,9 @@ Clock override for deterministic tests. *** -### CoderLoopRunnerOptions - -Defined in: [loop-runner.ts:128](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L128) - -**`Experimental`** - -Options for the default `code`/`review` runner. - -#### Properties - -##### sandboxClient - -> **sandboxClient**: [`SandboxClient`](runtime.md#sandboxclient-1) - -Defined in: [loop-runner.ts:129](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L129) - -**`Experimental`** - -##### args - -> **args**: [`DelegateCodeArgs`](mcp.md#delegatecodeargs) - -Defined in: [loop-runner.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L131) - -**`Experimental`** - -What to build — the delegate args (goal, repoRoot, variants, config, …). - -##### reviewer? - -> `optional` **reviewer?**: [`CoderReviewer`](mcp.md#coderreviewer) - -Defined in: [loop-runner.ts:133](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L133) - -**`Experimental`** - -Adversarial reviewer. Pass one to run `review` mode (an approval gate over the candidate). - -##### winnerSelection? - -> `optional` **winnerSelection?**: [`DetachedWinnerSelection`](mcp.md#detachedwinnerselection) - -Defined in: [loop-runner.ts:135](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L135) - -**`Experimental`** - -Winner-selection strategy. Default `highest-score`. - -##### fanoutHarnesses? - -> `optional` **fanoutHarnesses?**: `string`[] - -Defined in: [loop-runner.ts:137](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L137) - -**`Experimental`** - -Harnesses for `variants > 1` fanout. - -*** - ### WorktreeLoopRunnerOptions -Defined in: [loop-runner.ts:158](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L158) +Defined in: [loop-runner.ts:119](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L119) **`Experimental`** @@ -3144,7 +3084,7 @@ Options for the local-repo `code` runner over the GENERIC recursive path. > **repoRoot**: `string` -Defined in: [loop-runner.ts:160](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L160) +Defined in: [loop-runner.ts:121](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L121) **`Experimental`** @@ -3154,7 +3094,7 @@ Absolute path to the local git checkout each worktree is cut from. > **taskPrompt**: `string` -Defined in: [loop-runner.ts:162](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L162) +Defined in: [loop-runner.ts:123](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L123) **`Experimental`** @@ -3164,7 +3104,7 @@ The instruction handed to every authored harness (composed under each profile's > **harnesses**: readonly [`AuthoredHarness`](runtime.md#authoredharness)[] -Defined in: [loop-runner.ts:164](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L164) +Defined in: [loop-runner.ts:125](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L125) **`Experimental`** @@ -3174,7 +3114,7 @@ The supervisor-authored harness profiles — one fanout item (one worktree-CLI l > **budget**: [`Budget`](runtime.md#budget-10) -Defined in: [loop-runner.ts:166](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L166) +Defined in: [loop-runner.ts:127](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L127) **`Experimental`** @@ -3184,7 +3124,7 @@ Conserved budget pool bounding the fanout (equal-k holds by construction). > `optional` **testCmd?**: `string` -Defined in: [loop-runner.ts:168](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L168) +Defined in: [loop-runner.ts:129](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L129) **`Experimental`** @@ -3194,7 +3134,7 @@ Shell command run in each worktree to derive the tests-PASS signal. > `optional` **typecheckCmd?**: `string` -Defined in: [loop-runner.ts:170](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L170) +Defined in: [loop-runner.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L131) **`Experimental`** @@ -3204,7 +3144,7 @@ Shell command run in each worktree to derive the typecheck-PASS signal. > `optional` **require?**: readonly (`"tests"` \| `"typecheck"`)[] -Defined in: [loop-runner.ts:172](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L172) +Defined in: [loop-runner.ts:133](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L133) **`Experimental`** @@ -3214,7 +3154,7 @@ Which verification signals the deliverable REQUIRES present-and-passing (default > `optional` **maxDiffLines?**: `number` -Defined in: [loop-runner.ts:174](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L174) +Defined in: [loop-runner.ts:135](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L135) **`Experimental`** @@ -3224,7 +3164,7 @@ Diff-size cap (lines). > `optional` **forbiddenPaths?**: `string`[] -Defined in: [loop-runner.ts:176](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L176) +Defined in: [loop-runner.ts:137](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L137) **`Experimental`** @@ -3234,7 +3174,7 @@ Literal path prefixes the patch must not touch (the secret-floor is always on re > `optional` **winnerStrategy?**: [`WinnerStrategy`](runtime.md#winnerstrategy) -Defined in: [loop-runner.ts:178](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L178) +Defined in: [loop-runner.ts:139](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L139) **`Experimental`** @@ -3244,7 +3184,7 @@ Winner-selection strategy among gated candidates. Default `highest-score`. > `optional` **runGit?**: [`GitRunner`](mcp.md#gitrunner) -Defined in: [loop-runner.ts:180](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L180) +Defined in: [loop-runner.ts:141](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L141) **`Experimental`** @@ -3254,7 +3194,7 @@ Test seams forwarded to the worktree-CLI leaves so the runner drives offline. > `optional` **runHarness?**: (`options`) => `Promise`\<[`LocalHarnessResult`](mcp.md#localharnessresult)\> -Defined in: [loop-runner.ts:181](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L181) +Defined in: [loop-runner.ts:142](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L142) **`Experimental`** @@ -3290,7 +3230,7 @@ Does NOT throw when: > `optional` **runCommand?**: `WorktreeCheckRunner` -Defined in: [loop-runner.ts:182](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L182) +Defined in: [loop-runner.ts:143](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L143) **`Experimental`** @@ -3298,7 +3238,7 @@ Defined in: [loop-runner.ts:182](https://github.com/tangle-network/agent-runtime ### VetoedFact -Defined in: [loop-runner.ts:243](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L243) +Defined in: [loop-runner.ts:205](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L205) **`Experimental`** @@ -3310,7 +3250,7 @@ A fact rejected at the KB gate — surfaced, never dropped. > **candidate**: [`FactCandidate`](mcp.md#factcandidate) -Defined in: [loop-runner.ts:244](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L244) +Defined in: [loop-runner.ts:206](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L206) **`Experimental`** @@ -3318,7 +3258,7 @@ Defined in: [loop-runner.ts:244](https://github.com/tangle-network/agent-runtime > `optional` **vetoedBy?**: `string` -Defined in: [loop-runner.ts:245](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L245) +Defined in: [loop-runner.ts:207](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L207) **`Experimental`** @@ -3326,7 +3266,7 @@ Defined in: [loop-runner.ts:245](https://github.com/tangle-network/agent-runtime > `optional` **reason?**: `string` -Defined in: [loop-runner.ts:246](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L246) +Defined in: [loop-runner.ts:208](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L208) **`Experimental`** @@ -3334,7 +3274,7 @@ Defined in: [loop-runner.ts:246](https://github.com/tangle-network/agent-runtime ### ResearchLoopResult -Defined in: [loop-runner.ts:250](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L250) +Defined in: [loop-runner.ts:212](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L212) **`Experimental`** @@ -3344,7 +3284,7 @@ Defined in: [loop-runner.ts:250](https://github.com/tangle-network/agent-runtime > **accepted**: [`FactCandidate`](mcp.md#factcandidate)[] -Defined in: [loop-runner.ts:252](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L252) +Defined in: [loop-runner.ts:214](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L214) **`Experimental`** @@ -3354,7 +3294,7 @@ Facts that passed the fail-closed gate — safe to write to the KB. > **vetoed**: [`VetoedFact`](#vetoedfact)[] -Defined in: [loop-runner.ts:254](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L254) +Defined in: [loop-runner.ts:216](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L216) **`Experimental`** @@ -3364,7 +3304,7 @@ Facts the gate vetoed in the final round — escalate, do not silently drop. > **rounds**: `number` -Defined in: [loop-runner.ts:256](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L256) +Defined in: [loop-runner.ts:218](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L218) **`Experimental`** @@ -3374,7 +3314,7 @@ Research rounds actually run. ### ResearchLoopRunnerOptions -Defined in: [loop-runner.ts:260](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L260) +Defined in: [loop-runner.ts:222](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L222) **`Experimental`** @@ -3386,7 +3326,7 @@ Options for the default `research` runner. > **research**: (`round`, `vetoed`) => `Promise`\<[`FactCandidate`](mcp.md#factcandidate)[]\> -Defined in: [loop-runner.ts:267](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L267) +Defined in: [loop-runner.ts:229](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L229) **`Experimental`** @@ -3413,7 +3353,7 @@ Returns fact candidates carrying their grounding (`verbatimPassage` + > `optional` **gate?**: [`CreateKbGateOptions`](mcp.md#createkbgateoptions) -Defined in: [loop-runner.ts:269](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L269) +Defined in: [loop-runner.ts:231](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L231) **`Experimental`** @@ -3423,7 +3363,7 @@ Gate config (extra judges, self-artifact kinds, …). The floor is always on. > `optional` **maxRounds?**: `number` -Defined in: [loop-runner.ts:271](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L271) +Defined in: [loop-runner.ts:233](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L233) **`Experimental`** @@ -6451,7 +6391,7 @@ The agent-profile lever `improve` optimizes. Mirrors the AgentProfile-law > **DelegatedLoopMode** = *typeof* [`DELEGATED_LOOP_MODES`](#delegated_loop_modes)\[`number`\] -Defined in: [loop-runner.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L58) +Defined in: [loop-runner.ts:49](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L49) **`Experimental`** @@ -6461,7 +6401,7 @@ Defined in: [loop-runner.ts:58](https://github.com/tangle-network/agent-runtime/ > **DelegatedLoopRunner**\<`T`\> = (`signal`) => `Promise`\<`T`\> -Defined in: [loop-runner.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L67) +Defined in: [loop-runner.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L58) **`Experimental`** @@ -6490,7 +6430,7 @@ A pre-configured loop for one mode. Returns the mode's raw > **DelegatedLoopRegistry** = `Partial`\<`Record`\<[`DelegatedLoopMode`](#delegatedloopmode), [`DelegatedLoopRunner`](#delegatedlooprunner)\>\> -Defined in: [loop-runner.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L71) +Defined in: [loop-runner.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L62) **`Experimental`** @@ -7012,7 +6952,7 @@ Hard cap on chained gateway hops; refused beyond this. Default keeps recursion b > `const` **DELEGATED\_LOOP\_MODES**: readonly \[`"code"`, `"review"`, `"research"`, `"audit"`, `"self-improve"`\] -Defined in: [loop-runner.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L55) +Defined in: [loop-runner.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L46) **`Experimental`** @@ -7940,7 +7880,7 @@ Defined in: [improvement/reflective-generator.ts:24](https://github.com/tangle-n > **isDelegatedLoopMode**(`value`): value is "code" \| "review" \| "research" \| "audit" \| "self-improve" -Defined in: [loop-runner.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L61) +Defined in: [loop-runner.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L52) **`Experimental`** @@ -7962,7 +7902,7 @@ value is "code" \| "review" \| "research" \| "audit" \| "self-improve" > **runDelegatedLoop**\<`T`\>(`mode`, `registry`, `options?`): `Promise`\<[`DelegatedLoopResult`](#delegatedloopresult)\<`T`\>\> -Defined in: [loop-runner.ts:98](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L98) +Defined in: [loop-runner.ts:89](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L89) **`Experimental`** @@ -7997,42 +7937,20 @@ config bug, not a silent no-op. A runner that throws is captured as *** -### coderLoopRunner() - -> **coderLoopRunner**(`options`): [`DelegatedLoopRunner`](#delegatedlooprunner)\<`CoderOutput`\> - -Defined in: [loop-runner.ts:144](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L144) - -**`Experimental`** - -Build a `code`/`review`-mode runner over the sandbox-session coder delegate. Pass a -`reviewer` to run `review` mode — an approval gate over the validated candidate. - -#### Parameters - -##### options - -[`CoderLoopRunnerOptions`](#coderlooprunneroptions) - -#### Returns - -[`DelegatedLoopRunner`](#delegatedlooprunner)\<`CoderOutput`\> - -*** - ### worktreeLoopRunner() > **worktreeLoopRunner**(`options`): [`DelegatedLoopRunner`](#delegatedlooprunner)\<`WorktreeHarnessResult`\> -Defined in: [loop-runner.ts:197](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L197) +Defined in: [loop-runner.ts:159](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L159) **`Experimental`** `code` mode on the GENERIC recursive path: author one `AgentProfile` per harness, run them as a `worktreeFanout` (N `createWorktreeCliExecutor` leaves, each `gateOnDeliverable`) through -`runPersonified` on the keystone Supervisor. This is the local-repo counterpart to -[coderLoopRunner](#coderlooprunner) (which drives the in-box harness over a `SandboxClient`): no `runLoop` -driver, no role-coupled delegate — the harness list is the fanout, the gate is `patchDelivered`, +`runPersonified` on the keystone Supervisor. The sandbox-session counterpart that drives the in-box +harness over a `SandboxClient` is `detachedSessionDelegate` (`./mcp/delegates`); here there is no +`runLoop` driver, no role-coupled delegate — the harness list is the fanout, the gate is +`patchDelivered`, the winner is the shared valid-only selector (NOT `defaultSelectWinner`, whose non-valid fallback would surface an ungated patch). Equal-k holds by the conserved budget pool. Returns the winning patch artifact, or throws when no candidate is delivered (fail loud, never a vacuous done). @@ -8053,7 +7971,7 @@ patch artifact, or throws when no candidate is delivered (fail loud, never a vac > **researchLoopRunner**(`o`): [`DelegatedLoopRunner`](#delegatedlooprunner)\<[`ResearchLoopResult`](#researchloopresult)\> -Defined in: [loop-runner.ts:282](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L282) +Defined in: [loop-runner.ts:244](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L244) **`Experimental`** @@ -8080,7 +7998,7 @@ never silently dropped) so the caller audits vs retries. > **selfImproveLoopRunner**\<`TScenario`, `TArtifact`\>(`options`): [`DelegatedLoopRunner`](#delegatedlooprunner)\<`SelfImproveResult`\<`TScenario`, `TArtifact`\>\> -Defined in: [loop-runner.ts:309](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L309) +Defined in: [loop-runner.ts:271](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L271) **`Experimental`** @@ -8112,7 +8030,7 @@ Defined in: [loop-runner.ts:309](https://github.com/tangle-network/agent-runtime > **auditLoopRunner**\<`TProposal`, `TEdit`\>(`options`): [`DelegatedLoopRunner`](#delegatedlooprunner)\<[`RunAnalystLoopResult`](analyst-loop.md#runanalystloopresult)\<`TProposal`, `TEdit`\>\> -Defined in: [loop-runner.ts:316](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L316) +Defined in: [loop-runner.ts:278](https://github.com/tangle-network/agent-runtime/blob/main/src/loop-runner.ts#L278) **`Experimental`** @@ -8144,14 +8062,13 @@ Defined in: [loop-runner.ts:316](https://github.com/tangle-network/agent-runtime > **mcpToolsForRuntimeMcp**(): [`OpenAIChatTool`](#openaichattool)[] -Defined in: [mcp/openai-tools.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/openai-tools.ts#L74) +Defined in: [mcp/openai-tools.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/openai-tools.ts#L62) **`Experimental`** -Returns the 5 delegation tools projected into OpenAI Chat Completions -`tools[]` shape. The order is stable: `delegate_code`, -`delegate_research`, `delegate_feedback`, `delegation_status`, -`delegation_history`. +Returns the queue-bound delegation tools projected into OpenAI Chat +Completions `tools[]` shape. The order is stable: `delegate_feedback`, +`delegation_status`, `delegation_history`. #### Returns @@ -8163,7 +8080,7 @@ Returns the 5 delegation tools projected into OpenAI Chat Completions > **mcpToolsForRuntimeMcpSubset**(`names`): [`OpenAIChatTool`](#openaichattool)[] -Defined in: [mcp/openai-tools.ts:112](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/openai-tools.ts#L112) +Defined in: [mcp/openai-tools.ts:90](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/openai-tools.ts#L90) **`Experimental`** diff --git a/docs/api/mcp.md b/docs/api/mcp.md index 88d50ad7..33254553 100644 --- a/docs/api/mcp.md +++ b/docs/api/mcp.md @@ -723,7 +723,7 @@ shape against the structural `FleetHandle` contract. ### DelegateRunCtx -Defined in: [mcp/delegates.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L65) +Defined in: [mcp/delegates.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L56) **`Experimental`** @@ -733,7 +733,7 @@ Defined in: [mcp/delegates.ts:65](https://github.com/tangle-network/agent-runtim > **signal**: `AbortSignal` -Defined in: [mcp/delegates.ts:66](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L66) +Defined in: [mcp/delegates.ts:57](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L57) **`Experimental`** @@ -741,7 +741,7 @@ Defined in: [mcp/delegates.ts:66](https://github.com/tangle-network/agent-runtim > `optional` **detachedSessionRef?**: `string` -Defined in: [mcp/delegates.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L74) +Defined in: [mcp/delegates.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L65) **`Experimental`** @@ -754,7 +754,7 @@ onto the `driveTurn` tick path instead of holding a stream. > `optional` **traceEmitter?**: [`LoopTraceEmitter`](runtime.md#looptraceemitter) -Defined in: [mcp/delegates.ts:83](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L83) +Defined in: [mcp/delegates.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L74) **`Experimental`** @@ -769,7 +769,7 @@ the same stream. > **report**(`progress`): `void` -Defined in: [mcp/delegates.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L67) +Defined in: [mcp/delegates.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L58) **`Experimental`** @@ -787,7 +787,7 @@ Defined in: [mcp/delegates.ts:67](https://github.com/tangle-network/agent-runtim > `optional` **updateDetachedSessionRef**(`ref`): `void` -Defined in: [mcp/delegates.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L76) +Defined in: [mcp/delegates.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L67) **`Experimental`** @@ -807,7 +807,7 @@ Rebind the record's resume key (e.g. once the sandbox id is known). ### CoderReview -Defined in: [mcp/delegates.ts:111](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L111) +Defined in: [mcp/delegates.ts:97](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L97) **`Experimental`** @@ -819,7 +819,7 @@ Structured review verdict over a coder candidate. > **approved**: `boolean` -Defined in: [mcp/delegates.ts:113](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L113) +Defined in: [mcp/delegates.ts:99](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L99) **`Experimental`** @@ -829,7 +829,7 @@ Gate: only approved candidates are eligible to win. > **recommendation**: `"ship"` \| `"approve-with-nits"` \| `"changes-requested"` \| `"reject"` -Defined in: [mcp/delegates.ts:115](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L115) +Defined in: [mcp/delegates.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L101) **`Experimental`** @@ -839,7 +839,7 @@ Reviewer's recommendation — surfaced in traces. > **readiness**: `number` -Defined in: [mcp/delegates.ts:117](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L117) +Defined in: [mcp/delegates.ts:103](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L103) **`Experimental`** @@ -849,7 +849,7 @@ Readiness 0..1, used by the `highest-readiness` winner-selection strategy. > `optional` **notes?**: `string` -Defined in: [mcp/delegates.ts:118](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L118) +Defined in: [mcp/delegates.ts:104](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L104) **`Experimental`** @@ -857,7 +857,7 @@ Defined in: [mcp/delegates.ts:118](https://github.com/tangle-network/agent-runti ### DetachedSessionDelegateOptions -Defined in: [mcp/delegates.ts:150](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L150) +Defined in: [mcp/delegates.ts:136](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L136) **`Experimental`** @@ -867,7 +867,7 @@ Defined in: [mcp/delegates.ts:150](https://github.com/tangle-network/agent-runti > `optional` **executor?**: [`DelegationExecutor`](#delegationexecutor) -Defined in: [mcp/delegates.ts:157](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L157) +Defined in: [mcp/delegates.ts:143](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L143) **`Experimental`** @@ -880,28 +880,42 @@ one or the other, not both. > `optional` **sandboxClient?**: [`SandboxClient`](runtime.md#sandboxclient-1) -Defined in: [mcp/delegates.ts:162](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L162) +Defined in: [mcp/delegates.ts:148](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L148) **`Experimental`** Convenience shorthand for sibling placement. Equivalent to `executor: createSiblingSandboxExecutor({ client: sandboxClient })`. +##### workerProfile? + +> `optional` **workerProfile?**: `AgentProfile` + +Defined in: [mcp/delegates.ts:156](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L156) + +**`Experimental`** + +The worker's authored `AgentProfile` (§1.5: the system authors profiles). Spread onto the +sandbox-session run spec → `runLoop` → the executor's `harnessInvocation`, so the harness runs +under the caller's stance. Omit to use a minimal model-only default (no hardcoded skills/tools); +`harness` / `model` / `systemPrompt` below are convenience overrides layered onto whichever +profile is used. + ##### harness? > `optional` **harness?**: `string` -Defined in: [mcp/delegates.ts:164](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L164) +Defined in: [mcp/delegates.ts:158](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L158) **`Experimental`** -Backend harness for the single-coder path. Default comes from `coderProfile`. +Backend harness for the single-coder path (sets `metadata.backendType`). Default `claude-code`. ##### model? > `optional` **model?**: `string` -Defined in: [mcp/delegates.ts:166](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L166) +Defined in: [mcp/delegates.ts:160](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L160) **`Experimental`** @@ -911,19 +925,19 @@ Model override for the single-coder path. > `optional` **systemPrompt?**: `string` -Defined in: [mcp/delegates.ts:172](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L172) +Defined in: [mcp/delegates.ts:166](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L166) **`Experimental`** -The worker's authored system prompt (§1.5). Flows onto `coderProfile`'s +The worker's authored system prompt (§1.5). Flows onto the run spec's `profile.prompt.systemPrompt` → through `runLoop` → the executor's `harnessInvocation`, so the -harness runs under this stance, not just the default coder prompt. Omit to keep the default. +harness runs under this stance. Omit to keep the profile's own prompt. ##### fanoutHarnesses? > `optional` **fanoutHarnesses?**: `string`[] -Defined in: [mcp/delegates.ts:174](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L174) +Defined in: [mcp/delegates.ts:168](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L168) **`Experimental`** @@ -933,7 +947,7 @@ Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when vari > `optional` **fanoutModels?**: (`string` \| `undefined`)[] -Defined in: [mcp/delegates.ts:176](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L176) +Defined in: [mcp/delegates.ts:170](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L170) **`Experimental`** @@ -943,7 +957,7 @@ Optional per-harness model override for `variants > 1`. > `optional` **maxConcurrency?**: `number` -Defined in: [mcp/delegates.ts:178](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L178) +Defined in: [mcp/delegates.ts:172](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L172) **`Experimental`** @@ -953,7 +967,7 @@ Hard cap on the kernel's per-batch concurrency. Default 4. > `optional` **reviewer?**: [`CoderReviewer`](#coderreviewer) -Defined in: [mcp/delegates.ts:185](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L185) +Defined in: [mcp/delegates.ts:179](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L179) **`Experimental`** @@ -966,7 +980,7 @@ but wrong/unsafe" class the deterministic validator can't see. > `optional` **winnerSelection?**: [`DetachedWinnerSelection`](#detachedwinnerselection) -Defined in: [mcp/delegates.ts:187](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L187) +Defined in: [mcp/delegates.ts:181](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L181) **`Experimental`** @@ -976,7 +990,7 @@ Winner-selection strategy among eligible candidates. Default `highest-score`. > `optional` **traceEmitter?**: [`LoopTraceEmitter`](runtime.md#looptraceemitter) -Defined in: [mcp/delegates.ts:199](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L199) +Defined in: [mcp/delegates.ts:193](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L193) **`Experimental`** @@ -994,7 +1008,7 @@ event stream for them so this emitter observes detached work too. > `optional` **detachedTickIntervalMs?**: `number` -Defined in: [mcp/delegates.ts:201](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L201) +Defined in: [mcp/delegates.ts:195](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L195) **`Experimental`** @@ -1004,7 +1018,7 @@ Tick cadence (ms) for the detached single-variant path. Default 5000. > `optional` **detachedWallCapMs?**: `number` -Defined in: [mcp/delegates.ts:203](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L203) +Defined in: [mcp/delegates.ts:197](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L197) **`Experimental`** @@ -1014,7 +1028,7 @@ Wall-clock cap (ms) forwarded to `driveTurn` for detached turns. ### SettleDetachedCoderTurnOptions -Defined in: [mcp/delegates.ts:442](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L442) +Defined in: [mcp/delegates.ts:438](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L438) **`Experimental`** @@ -1024,7 +1038,7 @@ Defined in: [mcp/delegates.ts:442](https://github.com/tangle-network/agent-runti > **task**: [`CoderTask`](profiles.md#codertask) -Defined in: [mcp/delegates.ts:443](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L443) +Defined in: [mcp/delegates.ts:439](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L439) **`Experimental`** @@ -1032,7 +1046,7 @@ Defined in: [mcp/delegates.ts:443](https://github.com/tangle-network/agent-runti > **sessionId**: `string` -Defined in: [mcp/delegates.ts:445](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L445) +Defined in: [mcp/delegates.ts:441](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L441) **`Experimental`** @@ -1042,7 +1056,7 @@ Session id of the detached turn — used as the synthesized event id. > **signal**: `AbortSignal` -Defined in: [mcp/delegates.ts:446](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L446) +Defined in: [mcp/delegates.ts:442](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L442) **`Experimental`** @@ -1050,7 +1064,7 @@ Defined in: [mcp/delegates.ts:446](https://github.com/tangle-network/agent-runti > `optional` **harness?**: `string` -Defined in: [mcp/delegates.ts:447](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L447) +Defined in: [mcp/delegates.ts:443](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L443) **`Experimental`** @@ -1058,7 +1072,7 @@ Defined in: [mcp/delegates.ts:447](https://github.com/tangle-network/agent-runti > `optional` **model?**: `string` -Defined in: [mcp/delegates.ts:448](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L448) +Defined in: [mcp/delegates.ts:444](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L444) **`Experimental`** @@ -1066,7 +1080,7 @@ Defined in: [mcp/delegates.ts:448](https://github.com/tangle-network/agent-runti > `optional` **reviewer?**: [`CoderReviewer`](#coderreviewer) -Defined in: [mcp/delegates.ts:450](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L450) +Defined in: [mcp/delegates.ts:446](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L446) **`Experimental`** @@ -1074,141 +1088,6 @@ Same gate as the streaming path: an unapproved candidate cannot win. *** -### BuildDelegationMcpServerOptions - -Defined in: [mcp/delegation-profile.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L50) - -#### Properties - -##### sandboxApiKey? - -> `optional` **sandboxApiKey?**: `string` - -Defined in: [mcp/delegation-profile.ts:54](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L54) - -Sandbox API key forwarded as `TANGLE_API_KEY` to the MCP child. The - agent-runtime MCP bin reads `TANGLE_API_KEY` and passes it straight to - `new Sandbox({ apiKey })`. Defaults to `env.TANGLE_API_KEY`. - -##### sandboxBaseUrl? - -> `optional` **sandboxBaseUrl?**: `string` - -Defined in: [mcp/delegation-profile.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L58) - -Sandbox base URL forwarded as `SANDBOX_BASE_URL`. Defaults to - `env.SANDBOX_BASE_URL`, then `env.SANDBOX_API_URL`, then the public - sandbox endpoint. - -##### env? - -> `optional` **env?**: `Record`\<`string`, `string` \| `undefined`\> - -Defined in: [mcp/delegation-profile.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L61) - -Environment source for key + OTEL resolution. Defaults to `process.env`; - injectable for tests and non-process callers. - -*** - -### ComposeProductionAgentProfileOptions - -Defined in: [mcp/delegation-profile.ts:112](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L112) - -#### Properties - -##### sandboxApiKey? - -> `optional` **sandboxApiKey?**: `string` - -Defined in: [mcp/delegation-profile.ts:115](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L115) - -Sandbox API key forwarded to the delegation MCP child. Defaults to - `env.TANGLE_API_KEY`. When unset, the delegation MCP entry is omitted. - -##### sandboxBaseUrl? - -> `optional` **sandboxBaseUrl?**: `string` - -Defined in: [mcp/delegation-profile.ts:117](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L117) - -Sandbox base URL forwarded as `SANDBOX_BASE_URL` to the MCP child. - -##### systemPrompt? - -> `optional` **systemPrompt?**: `string` - -Defined in: [mcp/delegation-profile.ts:120](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L120) - -Replace the base profile's system prompt. Used by per-turn calls that - swap in workspace-augmented prompts (board summary, learned style). - -##### extraFiles? - -> `optional` **extraFiles?**: `AgentProfileFileMount`[] - -Defined in: [mcp/delegation-profile.ts:122](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L122) - -Extra file mounts layered after the base profile's `resources.files`. - -##### name? - -> `optional` **name?**: `string` - -Defined in: [mcp/delegation-profile.ts:124](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L124) - -Override the profile `name`. Defaults to the base profile's name. - -##### env? - -> `optional` **env?**: `Record`\<`string`, `string` \| `undefined`\> - -Defined in: [mcp/delegation-profile.ts:126](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L126) - -Environment source for key + OTEL resolution. Defaults to `process.env`. - -##### tools? - -> `optional` **tools?**: `Record`\<`string`, `boolean`\> - -Defined in: [mcp/delegation-profile.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L131) - -Box built-in tool ON/OFF flags merged over the base profile's `tools` - (overlay wins per key). The sandbox-seam mapping of a certified surface's - tool grants — `AgentProfile.tools` is `Record` box flags, - so it carries grants, not arbitrary tool defs. - -##### hooks? - -> `optional` **hooks?**: `Record`\<`string`, `AgentProfileHookCommand`[]\> - -Defined in: [mcp/delegation-profile.ts:134](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L134) - -Per-event hook commands merged over the base profile's `hooks`. An event - present in both has the extra commands appended after the base ones. - -##### subagents? - -> `optional` **subagents?**: `Record`\<`string`, `AgentSubagentProfile`\> - -Defined in: [mcp/delegation-profile.ts:137](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L137) - -Subagent definitions merged over the base profile's `subagents` (overlay - wins per key). - -##### mcpConnections? - -> `optional` **mcpConnections?**: `Record`\<`string`, `AgentProfileMcpServer`\> - -Defined in: [mcp/delegation-profile.ts:142](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L142) - -Resolved certified MCP connections injected into `AgentProfile.mcp` — the - sandbox-seam delivery of a `ResolvedSurface.mcpConnections`. Merged after - the base map and before the delegation entry, so a base/delegation key is - never silently shadowed by an injected one. - -*** - ### DelegationStore Defined in: [mcp/delegation-store.ts:23](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-store.ts#L23) @@ -1860,7 +1739,7 @@ prompt the delegate would have sent. ##### settleOutput() -> **settleOutput**(`turn`, `record`, `ctx`): `CoderOutput` \| [`ResearchOutputShape`](#researchoutputshape) \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| `Promise`\<`CoderOutput` \| [`ResearchOutputShape`](#researchoutputshape) \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput)\> +> **settleOutput**(`turn`, `record`, `ctx`): `CoderOutput` \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| [`ResearchOutputShape`](#researchoutputshape) \| `Promise`\<`CoderOutput` \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| [`ResearchOutputShape`](#researchoutputshape)\> Defined in: [mcp/detached-turn.ts:384](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/detached-turn.ts#L384) @@ -1888,7 +1767,7 @@ profile's gate — the queue settles the record as failed with that error. ###### Returns -`CoderOutput` \| [`ResearchOutputShape`](#researchoutputshape) \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| `Promise`\<`CoderOutput` \| [`ResearchOutputShape`](#researchoutputshape) \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput)\> +`CoderOutput` \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| [`ResearchOutputShape`](#researchoutputshape) \| `Promise`\<`CoderOutput` \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| [`ResearchOutputShape`](#researchoutputshape)\> *** @@ -2826,7 +2705,7 @@ Set when timeoutMs elapsed before exit. ### McpServerOptions -Defined in: [mcp/server.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L71) +Defined in: [mcp/server.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L62) **`Experimental`** @@ -2836,7 +2715,7 @@ Defined in: [mcp/server.ts:71](https://github.com/tangle-network/agent-runtime/b > `optional` **delegateSupervisor?**: [`DelegateHandlerOptions`](#delegatehandleroptions) -Defined in: [mcp/server.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L78) +Defined in: [mcp/server.ts:69](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L69) **`Experimental`** @@ -2845,34 +2724,11 @@ delegate_code / delegate_research). Inject the supervisor substrate: its brain ` worker `backend`, and the completion `deliverable`. The supervisor AUTHORS its own worker from the agent's intent, so there is no worker profile to wire here. -##### coderDelegate? - -> `optional` **coderDelegate?**: [`CoderDelegate`](#coderdelegate) - -Defined in: [mcp/server.ts:80](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L80) - -**`Experimental`** - -Required to enable delegate_code. - -##### researcherDelegate? - -> `optional` **researcherDelegate?**: [`ResearcherDelegate`](#researcherdelegate) - -Defined in: [mcp/server.ts:87](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L87) - -**`Experimental`** - -Required to enable delegate_research. The substrate cannot ship a -default — wire one that closes over your `runLoop` + a -researcher profile (typically `@tangle-network/agent-knowledge`'s -`researcherProfile` / `multiHarnessResearcherFanout`). - ##### uiAuditorDelegate? > `optional` **uiAuditorDelegate?**: [`UiAuditorDelegate`](#uiauditordelegate) -Defined in: [mcp/server.ts:94](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L94) +Defined in: [mcp/server.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L76) **`Experimental`** @@ -2885,7 +2741,7 @@ canonical in-process choice is `createInProcessUiAuditClient` from > `optional` **feedbackStore?**: [`FeedbackStore`](#feedbackstore) -Defined in: [mcp/server.ts:96](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L96) +Defined in: [mcp/server.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L78) **`Experimental`** @@ -2895,32 +2751,17 @@ Override the default in-memory feedback store. > `optional` **queue?**: [`DelegationTaskQueue`](#delegationtaskqueue) -Defined in: [mcp/server.ts:98](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L98) +Defined in: [mcp/server.ts:80](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L80) **`Experimental`** Override the default in-memory task queue. -##### detachedDispatch? - -> `optional` **detachedDispatch?**: `boolean` - -Defined in: [mcp/server.ts:107](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L107) - -**`Experimental`** - -Record deterministic detached-session resume keys on single-variant -coder/researcher submissions so a durable queue can resume them after a -restart. Enable only when the wired delegates dispatch via sandbox -sessions (`driveTurn`) AND `queue` persists records — the keys are inert -otherwise. The bin turns this on alongside the durable store for -session-backed (sibling/fleet) placements. - ##### extraTools? > `optional` **extraTools?**: [`McpToolDescriptor`](#mcptooldescriptor)[] -Defined in: [mcp/server.ts:113](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L113) +Defined in: [mcp/server.ts:86](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L86) **`Experimental`** @@ -2932,7 +2773,7 @@ duplicate name throws so delegation tools cannot be shadowed silently. > `optional` **traceContext?**: [`TraceContext`](#tracecontext-2) -Defined in: [mcp/server.ts:119](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L119) +Defined in: [mcp/server.ts:92](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L92) **`Experimental`** @@ -2944,7 +2785,7 @@ pass `traceContext` to that queue's constructor instead. > `optional` **serverName?**: `string` -Defined in: [mcp/server.ts:121](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L121) +Defined in: [mcp/server.ts:94](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L94) **`Experimental`** @@ -2954,7 +2795,7 @@ Server display name surfaced via `initialize`. Default `'agent-runtime-mcp'`. > `optional` **serverVersion?**: `string` -Defined in: [mcp/server.ts:123](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L123) +Defined in: [mcp/server.ts:96](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L96) **`Experimental`** @@ -2964,7 +2805,7 @@ Server version surfaced via `initialize`. Default = the package version baked at ### McpToolDescriptor -Defined in: [mcp/server.ts:127](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L127) +Defined in: [mcp/server.ts:100](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L100) **`Experimental`** @@ -2974,7 +2815,7 @@ Defined in: [mcp/server.ts:127](https://github.com/tangle-network/agent-runtime/ > **name**: `string` -Defined in: [mcp/server.ts:128](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L128) +Defined in: [mcp/server.ts:101](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L101) **`Experimental`** @@ -2982,7 +2823,7 @@ Defined in: [mcp/server.ts:128](https://github.com/tangle-network/agent-runtime/ > **description**: `string` -Defined in: [mcp/server.ts:129](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L129) +Defined in: [mcp/server.ts:102](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L102) **`Experimental`** @@ -2990,7 +2831,7 @@ Defined in: [mcp/server.ts:129](https://github.com/tangle-network/agent-runtime/ > **inputSchema**: `Record`\<`string`, `unknown`\> -Defined in: [mcp/server.ts:130](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L130) +Defined in: [mcp/server.ts:103](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L103) **`Experimental`** @@ -2998,7 +2839,7 @@ Defined in: [mcp/server.ts:130](https://github.com/tangle-network/agent-runtime/ > **handler**: (`raw`) => `Promise`\<`unknown`\> -Defined in: [mcp/server.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L131) +Defined in: [mcp/server.ts:104](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L104) **`Experimental`** @@ -3016,7 +2857,7 @@ Defined in: [mcp/server.ts:131](https://github.com/tangle-network/agent-runtime/ ### McpServer -Defined in: [mcp/server.ts:135](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L135) +Defined in: [mcp/server.ts:108](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L108) **`Experimental`** @@ -3026,7 +2867,7 @@ Defined in: [mcp/server.ts:135](https://github.com/tangle-network/agent-runtime/ > `readonly` **tools**: `ReadonlyMap`\<`string`, [`McpToolDescriptor`](#mcptooldescriptor)\> -Defined in: [mcp/server.ts:137](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L137) +Defined in: [mcp/server.ts:110](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L110) **`Experimental`** @@ -3036,7 +2877,7 @@ Tools currently registered (depend on which delegates were wired). > `readonly` **queue**: [`DelegationTaskQueue`](#delegationtaskqueue) -Defined in: [mcp/server.ts:139](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L139) +Defined in: [mcp/server.ts:112](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L112) **`Experimental`** @@ -3046,7 +2887,7 @@ The underlying queue — exposed so tests can introspect it. > `readonly` **feedbackStore**: [`FeedbackStore`](#feedbackstore) -Defined in: [mcp/server.ts:141](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L141) +Defined in: [mcp/server.ts:114](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L114) **`Experimental`** @@ -3058,7 +2899,7 @@ The feedback store — exposed for the same reason. > **handle**(`message`): `Promise`\<[`JsonRpcResponse`](#jsonrpcresponse) \| `null`\> -Defined in: [mcp/server.ts:143](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L143) +Defined in: [mcp/server.ts:116](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L116) **`Experimental`** @@ -3078,7 +2919,7 @@ Handle a single parsed JSON-RPC message. Returns the response object (or `null` > **serve**(`transport?`): `Promise`\<`void`\> -Defined in: [mcp/server.ts:145](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L145) +Defined in: [mcp/server.ts:118](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L118) **`Experimental`** @@ -3098,7 +2939,7 @@ Drive the server on a stdio-shaped transport until `stop()` is called. > **stop**(): `void` -Defined in: [mcp/server.ts:147](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L147) +Defined in: [mcp/server.ts:120](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L120) **`Experimental`** @@ -3112,7 +2953,7 @@ Stop a `serve` call. Subsequent requests are rejected. ### McpTransport -Defined in: [mcp/server.ts:151](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L151) +Defined in: [mcp/server.ts:124](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L124) **`Experimental`** @@ -3122,7 +2963,7 @@ Defined in: [mcp/server.ts:151](https://github.com/tangle-network/agent-runtime/ > **input**: `ReadableStream` -Defined in: [mcp/server.ts:152](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L152) +Defined in: [mcp/server.ts:125](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L125) **`Experimental`** @@ -3130,7 +2971,7 @@ Defined in: [mcp/server.ts:152](https://github.com/tangle-network/agent-runtime/ > **output**: `WritableStream` -Defined in: [mcp/server.ts:153](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L153) +Defined in: [mcp/server.ts:126](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L126) **`Experimental`** @@ -3138,7 +2979,7 @@ Defined in: [mcp/server.ts:153](https://github.com/tangle-network/agent-runtime/ ### JsonRpcMessage -Defined in: [mcp/server.ts:157](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L157) +Defined in: [mcp/server.ts:130](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L130) **`Experimental`** @@ -3148,7 +2989,7 @@ Defined in: [mcp/server.ts:157](https://github.com/tangle-network/agent-runtime/ > **jsonrpc**: `"2.0"` -Defined in: [mcp/server.ts:158](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L158) +Defined in: [mcp/server.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L131) **`Experimental`** @@ -3156,7 +2997,7 @@ Defined in: [mcp/server.ts:158](https://github.com/tangle-network/agent-runtime/ > `optional` **id?**: `string` \| `number` \| `null` -Defined in: [mcp/server.ts:159](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L159) +Defined in: [mcp/server.ts:132](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L132) **`Experimental`** @@ -3164,7 +3005,7 @@ Defined in: [mcp/server.ts:159](https://github.com/tangle-network/agent-runtime/ > **method**: `string` -Defined in: [mcp/server.ts:160](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L160) +Defined in: [mcp/server.ts:133](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L133) **`Experimental`** @@ -3172,7 +3013,7 @@ Defined in: [mcp/server.ts:160](https://github.com/tangle-network/agent-runtime/ > `optional` **params?**: `unknown` -Defined in: [mcp/server.ts:161](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L161) +Defined in: [mcp/server.ts:134](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L134) **`Experimental`** @@ -3180,7 +3021,7 @@ Defined in: [mcp/server.ts:161](https://github.com/tangle-network/agent-runtime/ ### JsonRpcResponse -Defined in: [mcp/server.ts:165](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L165) +Defined in: [mcp/server.ts:138](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L138) **`Experimental`** @@ -3190,7 +3031,7 @@ Defined in: [mcp/server.ts:165](https://github.com/tangle-network/agent-runtime/ > **jsonrpc**: `"2.0"` -Defined in: [mcp/server.ts:166](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L166) +Defined in: [mcp/server.ts:139](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L139) **`Experimental`** @@ -3198,7 +3039,7 @@ Defined in: [mcp/server.ts:166](https://github.com/tangle-network/agent-runtime/ > **id**: `string` \| `number` \| `null` -Defined in: [mcp/server.ts:167](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L167) +Defined in: [mcp/server.ts:140](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L140) **`Experimental`** @@ -3206,7 +3047,7 @@ Defined in: [mcp/server.ts:167](https://github.com/tangle-network/agent-runtime/ > `optional` **result?**: `unknown` -Defined in: [mcp/server.ts:168](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L168) +Defined in: [mcp/server.ts:141](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L141) **`Experimental`** @@ -3214,7 +3055,7 @@ Defined in: [mcp/server.ts:168](https://github.com/tangle-network/agent-runtime/ > `optional` **error?**: `object` -Defined in: [mcp/server.ts:169](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L169) +Defined in: [mcp/server.ts:142](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L142) **`Experimental`** @@ -3473,7 +3314,7 @@ hands it to the `resumeDelegate` seam instead of failing the record. ##### run -> **run**: (`ctx`) => `Promise`\<`CoderOutput` \| [`ResearchOutputShape`](#researchoutputshape) \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput)\> +> **run**: (`ctx`) => `Promise`\<`CoderOutput` \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| [`ResearchOutputShape`](#researchoutputshape)\> Defined in: [mcp/task-queue.ts:127](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/task-queue.ts#L127) @@ -3492,7 +3333,7 @@ queue wraps it with the profile tag. ###### Returns -`Promise`\<`CoderOutput` \| [`ResearchOutputShape`](#researchoutputshape) \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput)\> +`Promise`\<`CoderOutput` \| [`UiAuditorDelegationOutput`](#uiauditordelegationoutput) \| [`ResearchOutputShape`](#researchoutputshape)\> *** @@ -5482,7 +5323,7 @@ Defined in: [mcp/types.ts:288](https://github.com/tangle-network/agent-runtime/b ##### args -> **args**: [`DelegateCodeArgs`](#delegatecodeargs) \| [`DelegateResearchArgs`](#delegateresearchargs) \| [`DelegateUiAuditArgs`](#delegateuiauditargs) +> **args**: [`DelegateCodeArgs`](#delegatecodeargs) \| [`DelegateUiAuditArgs`](#delegateuiauditargs) \| [`DelegateResearchArgs`](#delegateresearchargs) Defined in: [mcp/types.ts:289](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/types.ts#L289) @@ -5798,12 +5639,13 @@ Test seam. > **CoderDelegate** = (`args`, `ctx`) => `Promise`\<`CoderOutput`\> -Defined in: [mcp/delegates.ts:88](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L88) +Defined in: [mcp/delegates.ts:80](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L80) **`Experimental`** -The server's coder-profile delegate slot — the closure the queue invokes for a - `delegate_code` task. `detachedSessionDelegate` is the built-in implementation. +The coder delegate closure — given the coder args + run context, drives the + sandbox-session coder path to a validated `CoderOutput`. `detachedSessionDelegate` is the + built-in implementation; the queue invokes one of these per coder delegation. #### Parameters @@ -5821,35 +5663,11 @@ The server's coder-profile delegate slot — the closure the queue invokes for a *** -### ResearcherDelegate - -> **ResearcherDelegate** = (`args`, `ctx`) => `Promise`\<[`ResearchOutputShape`](#researchoutputshape)\> - -Defined in: [mcp/delegates.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L91) - -**`Experimental`** - -#### Parameters - -##### args - -[`DelegateResearchArgs`](#delegateresearchargs) - -##### ctx - -[`DelegateRunCtx`](#delegaterunctx) - -#### Returns - -`Promise`\<[`ResearchOutputShape`](#researchoutputshape)\> - -*** - ### UiAuditorDelegate > **UiAuditorDelegate** = (`args`, `ctx`) => `Promise`\<[`UiAuditorDelegationOutput`](#uiauditordelegationoutput)\> -Defined in: [mcp/delegates.ts:105](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L105) +Defined in: [mcp/delegates.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L91) **`Experimental`** @@ -5879,7 +5697,7 @@ model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in > **CoderReviewer** = (`output`, `task`, `ctx`) => `Promise`\<[`CoderReview`](#coderreview)\> \| [`CoderReview`](#coderreview) -Defined in: [mcp/delegates.ts:130](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L130) +Defined in: [mcp/delegates.ts:116](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L116) **`Experimental`** @@ -5915,7 +5733,7 @@ judge, a `pnpm review` command, anything returning a `CoderReview`. > **DetachedWinnerSelection** = `"highest-score"` \| `"smallest-diff"` \| `"highest-readiness"` \| `"first-approved"` -Defined in: [mcp/delegates.ts:143](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L143) +Defined in: [mcp/delegates.ts:129](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L129) **`Experimental`** @@ -6093,16 +5911,6 @@ Pluggable git runner (sync) — replaceable in tests. ## Variables -### DELEGATION\_MCP\_SERVER\_KEY - -> `const` **DELEGATION\_MCP\_SERVER\_KEY**: `"agent-runtime-delegation"` = `'agent-runtime-delegation'` - -Defined in: [mcp/delegation-profile.ts:32](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L32) - -MCP server key under which the agent-runtime delegation tools mount. - -*** - ### DELEGATION\_TRACE\_MAX\_SPANS > `const` **DELEGATION\_TRACE\_MAX\_SPANS**: `512` = `512` @@ -6137,31 +5945,31 @@ The built-in lens directory. Domain-blind (about any agent trace); compose at te *** -### DELEGATE\_CODE\_TOOL\_NAME +### DELEGATE\_FEEDBACK\_TOOL\_NAME -> `const` **DELEGATE\_CODE\_TOOL\_NAME**: `"delegate_code"` = `'delegate_code'` +> `const` **DELEGATE\_FEEDBACK\_TOOL\_NAME**: `"delegate_feedback"` = `'delegate_feedback'` -Defined in: [mcp/tools/delegate-code.ts:19](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-code.ts#L19) +Defined in: [mcp/tools/delegate-feedback.ts:24](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-feedback.ts#L24) **`Experimental`** *** -### DELEGATE\_CODE\_DESCRIPTION +### DELEGATE\_FEEDBACK\_DESCRIPTION -> `const` **DELEGATE\_CODE\_DESCRIPTION**: `string` +> `const` **DELEGATE\_FEEDBACK\_DESCRIPTION**: `string` -Defined in: [mcp/tools/delegate-code.ts:22](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-code.ts#L22) +Defined in: [mcp/tools/delegate-feedback.ts:27](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-feedback.ts#L27) **`Experimental`** *** -### DELEGATE\_CODE\_INPUT\_SCHEMA +### DELEGATE\_FEEDBACK\_INPUT\_SCHEMA -> `const` **DELEGATE\_CODE\_INPUT\_SCHEMA**: `object` +> `const` **DELEGATE\_FEEDBACK\_INPUT\_SCHEMA**: `object` -Defined in: [mcp/tools/delegate-code.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-code.ts#L46) +Defined in: [mcp/tools/delegate-feedback.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-feedback.ts#L51) **`Experimental`** @@ -6175,121 +5983,121 @@ Defined in: [mcp/tools/delegate-code.ts:46](https://github.com/tangle-network/ag > `readonly` **properties**: `object` -###### properties.goal +###### properties.refersTo -> `readonly` **goal**: `object` +> `readonly` **refersTo**: `object` -###### properties.goal.type +###### properties.refersTo.type -> `readonly` **type**: `"string"` = `'string'` +> `readonly` **type**: `"object"` = `'object'` -###### properties.goal.description +###### properties.refersTo.properties -> `readonly` **description**: `"Natural-language description of what the coder must accomplish."` = `'Natural-language description of what the coder must accomplish.'` +> `readonly` **properties**: `object` -###### properties.repoRoot +###### properties.refersTo.properties.kind -> `readonly` **repoRoot**: `object` +> `readonly` **kind**: `object` -###### properties.repoRoot.type +###### properties.refersTo.properties.kind.type > `readonly` **type**: `"string"` = `'string'` -###### properties.repoRoot.description +###### properties.refersTo.properties.kind.enum -> `readonly` **description**: `"Absolute path inside the sandbox where the repo lives."` = `'Absolute path inside the sandbox where the repo lives.'` +> `readonly` **enum**: readonly \[`"delegation"`, `"artifact"`, `"outcome"`\] -###### properties.contextHint +###### properties.refersTo.properties.ref -> `readonly` **contextHint**: `object` +> `readonly` **ref**: `object` -###### properties.contextHint.type +###### properties.refersTo.properties.ref.type > `readonly` **type**: `"string"` = `'string'` -###### properties.contextHint.description +###### properties.refersTo.required -> `readonly` **description**: `"Optional free-form context the coder sees in the prompt prelude."` = `'Optional free-form context the coder sees in the prompt prelude.'` +> `readonly` **required**: readonly \[`"kind"`, `"ref"`\] -###### properties.variants +###### properties.refersTo.additionalProperties -> `readonly` **variants**: `object` +> `readonly` **additionalProperties**: `false` = `false` -###### properties.variants.type +###### properties.rating -> `readonly` **type**: `"integer"` = `'integer'` +> `readonly` **rating**: `object` -###### properties.variants.minimum +###### properties.rating.type -> `readonly` **minimum**: `1` = `1` +> `readonly` **type**: `"object"` = `'object'` -###### properties.variants.maximum +###### properties.rating.properties -> `readonly` **maximum**: `8` = `8` +> `readonly` **properties**: `object` -###### properties.variants.description +###### properties.rating.properties.score -> `readonly` **description**: `"Number of parallel coder harnesses. Default 1."` = `'Number of parallel coder harnesses. Default 1.'` +> `readonly` **score**: `object` -###### properties.config +###### properties.rating.properties.score.type -> `readonly` **config**: `object` +> `readonly` **type**: `"number"` = `'number'` -###### properties.config.type +###### properties.rating.properties.score.minimum -> `readonly` **type**: `"object"` = `'object'` +> `readonly` **minimum**: `0` = `0` -###### properties.config.properties +###### properties.rating.properties.score.maximum -> `readonly` **properties**: `object` +> `readonly` **maximum**: `1` = `1` -###### properties.config.properties.testCmd +###### properties.rating.properties.label -> `readonly` **testCmd**: `object` +> `readonly` **label**: `object` -###### properties.config.properties.testCmd.type +###### properties.rating.properties.label.type > `readonly` **type**: `"string"` = `'string'` -###### properties.config.properties.typecheckCmd +###### properties.rating.properties.label.enum -> `readonly` **typecheckCmd**: `object` +> `readonly` **enum**: readonly \[`"good"`, `"bad"`, `"neutral"`, `"mixed"`\] -###### properties.config.properties.typecheckCmd.type +###### properties.rating.properties.notes -> `readonly` **type**: `"string"` = `'string'` +> `readonly` **notes**: `object` -###### properties.config.properties.forbiddenPaths +###### properties.rating.properties.notes.type -> `readonly` **forbiddenPaths**: `object` +> `readonly` **type**: `"string"` = `'string'` -###### properties.config.properties.forbiddenPaths.type +###### properties.rating.required -> `readonly` **type**: `"array"` = `'array'` +> `readonly` **required**: readonly \[`"score"`, `"notes"`\] -###### properties.config.properties.forbiddenPaths.items +###### properties.rating.additionalProperties -> `readonly` **items**: `object` +> `readonly` **additionalProperties**: `false` = `false` -###### properties.config.properties.forbiddenPaths.items.type +###### properties.by -> `readonly` **type**: `"string"` = `'string'` +> `readonly` **by**: `object` -###### properties.config.properties.maxDiffLines +###### properties.by.type -> `readonly` **maxDiffLines**: `object` +> `readonly` **type**: `"string"` = `'string'` -###### properties.config.properties.maxDiffLines.type +###### properties.by.enum -> `readonly` **type**: `"integer"` = `'integer'` +> `readonly` **enum**: readonly \[`"agent"`, `"user"`, `"downstream-judge"`\] -###### properties.config.properties.maxDiffLines.minimum +###### properties.capturedAt -> `readonly` **minimum**: `1` = `1` +> `readonly` **capturedAt**: `object` -###### properties.config.additionalProperties +###### properties.capturedAt.type -> `readonly` **additionalProperties**: `false` = `false` +> `readonly` **type**: `"string"` = `'string'` ###### properties.namespace @@ -6299,13 +6107,9 @@ Defined in: [mcp/tools/delegate-code.ts:46](https://github.com/tangle-network/ag > `readonly` **type**: `"string"` = `'string'` -###### properties.namespace.description - -> `readonly` **description**: `"Multi-tenant scope (customer-id, workspace-id)."` = `'Multi-tenant scope (customer-id, workspace-id).'` - ##### required -> `readonly` **required**: readonly \[`"goal"`, `"repoRoot"`\] +> `readonly` **required**: readonly \[`"refersTo"`, `"rating"`, `"by"`\] ##### additionalProperties @@ -6313,405 +6117,29 @@ Defined in: [mcp/tools/delegate-code.ts:46](https://github.com/tangle-network/ag *** -### DELEGATE\_FEEDBACK\_TOOL\_NAME +### DELEGATE\_UI\_AUDIT\_TOOL\_NAME -> `const` **DELEGATE\_FEEDBACK\_TOOL\_NAME**: `"delegate_feedback"` = `'delegate_feedback'` +> `const` **DELEGATE\_UI\_AUDIT\_TOOL\_NAME**: `"delegate_ui_audit"` = `'delegate_ui_audit'` -Defined in: [mcp/tools/delegate-feedback.ts:24](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-feedback.ts#L24) +Defined in: [mcp/tools/delegate-ui-audit.ts:30](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-ui-audit.ts#L30) **`Experimental`** *** -### DELEGATE\_FEEDBACK\_DESCRIPTION +### DELEGATE\_UI\_AUDIT\_DESCRIPTION -> `const` **DELEGATE\_FEEDBACK\_DESCRIPTION**: `string` +> `const` **DELEGATE\_UI\_AUDIT\_DESCRIPTION**: `string` -Defined in: [mcp/tools/delegate-feedback.ts:27](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-feedback.ts#L27) +Defined in: [mcp/tools/delegate-ui-audit.ts:33](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-ui-audit.ts#L33) **`Experimental`** *** -### DELEGATE\_FEEDBACK\_INPUT\_SCHEMA +### DELEGATE\_UI\_AUDIT\_INPUT\_SCHEMA -> `const` **DELEGATE\_FEEDBACK\_INPUT\_SCHEMA**: `object` - -Defined in: [mcp/tools/delegate-feedback.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-feedback.ts#L51) - -**`Experimental`** - -#### Type Declaration - -##### type - -> `readonly` **type**: `"object"` = `'object'` - -##### properties - -> `readonly` **properties**: `object` - -###### properties.refersTo - -> `readonly` **refersTo**: `object` - -###### properties.refersTo.type - -> `readonly` **type**: `"object"` = `'object'` - -###### properties.refersTo.properties - -> `readonly` **properties**: `object` - -###### properties.refersTo.properties.kind - -> `readonly` **kind**: `object` - -###### properties.refersTo.properties.kind.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.refersTo.properties.kind.enum - -> `readonly` **enum**: readonly \[`"delegation"`, `"artifact"`, `"outcome"`\] - -###### properties.refersTo.properties.ref - -> `readonly` **ref**: `object` - -###### properties.refersTo.properties.ref.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.refersTo.required - -> `readonly` **required**: readonly \[`"kind"`, `"ref"`\] - -###### properties.refersTo.additionalProperties - -> `readonly` **additionalProperties**: `false` = `false` - -###### properties.rating - -> `readonly` **rating**: `object` - -###### properties.rating.type - -> `readonly` **type**: `"object"` = `'object'` - -###### properties.rating.properties - -> `readonly` **properties**: `object` - -###### properties.rating.properties.score - -> `readonly` **score**: `object` - -###### properties.rating.properties.score.type - -> `readonly` **type**: `"number"` = `'number'` - -###### properties.rating.properties.score.minimum - -> `readonly` **minimum**: `0` = `0` - -###### properties.rating.properties.score.maximum - -> `readonly` **maximum**: `1` = `1` - -###### properties.rating.properties.label - -> `readonly` **label**: `object` - -###### properties.rating.properties.label.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.rating.properties.label.enum - -> `readonly` **enum**: readonly \[`"good"`, `"bad"`, `"neutral"`, `"mixed"`\] - -###### properties.rating.properties.notes - -> `readonly` **notes**: `object` - -###### properties.rating.properties.notes.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.rating.required - -> `readonly` **required**: readonly \[`"score"`, `"notes"`\] - -###### properties.rating.additionalProperties - -> `readonly` **additionalProperties**: `false` = `false` - -###### properties.by - -> `readonly` **by**: `object` - -###### properties.by.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.by.enum - -> `readonly` **enum**: readonly \[`"agent"`, `"user"`, `"downstream-judge"`\] - -###### properties.capturedAt - -> `readonly` **capturedAt**: `object` - -###### properties.capturedAt.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.namespace - -> `readonly` **namespace**: `object` - -###### properties.namespace.type - -> `readonly` **type**: `"string"` = `'string'` - -##### required - -> `readonly` **required**: readonly \[`"refersTo"`, `"rating"`, `"by"`\] - -##### additionalProperties - -> `readonly` **additionalProperties**: `false` = `false` - -*** - -### DELEGATE\_RESEARCH\_TOOL\_NAME - -> `const` **DELEGATE\_RESEARCH\_TOOL\_NAME**: `"delegate_research"` = `'delegate_research'` - -Defined in: [mcp/tools/delegate-research.ts:25](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-research.ts#L25) - -**`Experimental`** - -*** - -### DELEGATE\_RESEARCH\_DESCRIPTION - -> `const` **DELEGATE\_RESEARCH\_DESCRIPTION**: `string` - -Defined in: [mcp/tools/delegate-research.ts:28](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-research.ts#L28) - -**`Experimental`** - -*** - -### DELEGATE\_RESEARCH\_INPUT\_SCHEMA - -> `const` **DELEGATE\_RESEARCH\_INPUT\_SCHEMA**: `object` - -Defined in: [mcp/tools/delegate-research.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-research.ts#L52) - -**`Experimental`** - -#### Type Declaration - -##### type - -> `readonly` **type**: `"object"` = `'object'` - -##### properties - -> `readonly` **properties**: `object` - -###### properties.question - -> `readonly` **question**: `object` - -###### properties.question.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.question.description - -> `readonly` **description**: `"The research question to answer."` = `'The research question to answer.'` - -###### properties.namespace - -> `readonly` **namespace**: `object` - -###### properties.namespace.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.namespace.description - -> `readonly` **description**: `"Multi-tenant scope (customer-id, workspace-id). REQUIRED."` = `'Multi-tenant scope (customer-id, workspace-id). REQUIRED.'` - -###### properties.scope - -> `readonly` **scope**: `object` - -###### properties.scope.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.scope.description - -> `readonly` **description**: "Bound, e.g. \"audience for cpg-founder ICP\"." = `'Bound, e.g. "audience for cpg-founder ICP".'` - -###### properties.sources - -> `readonly` **sources**: `object` - -###### properties.sources.type - -> `readonly` **type**: `"array"` = `'array'` - -###### properties.sources.items - -> `readonly` **items**: `object` - -###### properties.sources.items.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.sources.items.enum - -> `readonly` **enum**: readonly [`ResearchSource`](#researchsource)[] - -###### properties.variants - -> `readonly` **variants**: `object` - -###### properties.variants.type - -> `readonly` **type**: `"integer"` = `'integer'` - -###### properties.variants.minimum - -> `readonly` **minimum**: `1` = `1` - -###### properties.variants.maximum - -> `readonly` **maximum**: `8` = `8` - -###### properties.config - -> `readonly` **config**: `object` - -###### properties.config.type - -> `readonly` **type**: `"object"` = `'object'` - -###### properties.config.properties - -> `readonly` **properties**: `object` - -###### properties.config.properties.recencyWindow - -> `readonly` **recencyWindow**: `object` - -###### properties.config.properties.recencyWindow.type - -> `readonly` **type**: `"object"` = `'object'` - -###### properties.config.properties.recencyWindow.properties - -> `readonly` **properties**: `object` - -###### properties.config.properties.recencyWindow.properties.since - -> `readonly` **since**: `object` - -###### properties.config.properties.recencyWindow.properties.since.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.config.properties.recencyWindow.properties.since.description - -> `readonly` **description**: `"ISO datetime"` = `'ISO datetime'` - -###### properties.config.properties.recencyWindow.properties.until - -> `readonly` **until**: `object` - -###### properties.config.properties.recencyWindow.properties.until.type - -> `readonly` **type**: `"string"` = `'string'` - -###### properties.config.properties.recencyWindow.properties.until.description - -> `readonly` **description**: `"ISO datetime"` = `'ISO datetime'` - -###### properties.config.properties.recencyWindow.additionalProperties - -> `readonly` **additionalProperties**: `false` = `false` - -###### properties.config.properties.maxItems - -> `readonly` **maxItems**: `object` - -###### properties.config.properties.maxItems.type - -> `readonly` **type**: `"integer"` = `'integer'` - -###### properties.config.properties.maxItems.minimum - -> `readonly` **minimum**: `1` = `1` - -###### properties.config.properties.minConfidence - -> `readonly` **minConfidence**: `object` - -###### properties.config.properties.minConfidence.type - -> `readonly` **type**: `"number"` = `'number'` - -###### properties.config.properties.minConfidence.minimum - -> `readonly` **minimum**: `0` = `0` - -###### properties.config.properties.minConfidence.maximum - -> `readonly` **maximum**: `1` = `1` - -###### properties.config.additionalProperties - -> `readonly` **additionalProperties**: `false` = `false` - -##### required - -> `readonly` **required**: readonly \[`"question"`, `"namespace"`\] - -##### additionalProperties - -> `readonly` **additionalProperties**: `false` = `false` - -*** - -### DELEGATE\_UI\_AUDIT\_TOOL\_NAME - -> `const` **DELEGATE\_UI\_AUDIT\_TOOL\_NAME**: `"delegate_ui_audit"` = `'delegate_ui_audit'` - -Defined in: [mcp/tools/delegate-ui-audit.ts:30](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-ui-audit.ts#L30) - -**`Experimental`** - -*** - -### DELEGATE\_UI\_AUDIT\_DESCRIPTION - -> `const` **DELEGATE\_UI\_AUDIT\_DESCRIPTION**: `string` - -Defined in: [mcp/tools/delegate-ui-audit.ts:33](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-ui-audit.ts#L33) - -**`Experimental`** - -*** - -### DELEGATE\_UI\_AUDIT\_INPUT\_SCHEMA - -> `const` **DELEGATE\_UI\_AUDIT\_INPUT\_SCHEMA**: `object` +> `const` **DELEGATE\_UI\_AUDIT\_INPUT\_SCHEMA**: `object` Defined in: [mcp/tools/delegate-ui-audit.ts:86](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-ui-audit.ts#L86) @@ -7248,7 +6676,7 @@ sibling mode would lie about workspace topology. > **detachedSessionDelegate**(`options`): [`CoderDelegate`](#coderdelegate) -Defined in: [mcp/delegates.ts:223](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L223) +Defined in: [mcp/delegates.ts:217](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L217) **`Experimental`** @@ -7282,7 +6710,7 @@ cross-restart resume (the `driveTurn` tick) is opt-in behind `MCP_ENABLE_DETACHE > **coderTaskFromArgs**(`args`): [`CoderTask`](profiles.md#codertask) -Defined in: [mcp/delegates.ts:430](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L430) +Defined in: [mcp/delegates.ts:426](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L426) **`Experimental`** @@ -7307,7 +6735,7 @@ process dispatched. > **settleDetachedCoderTurn**(`turn`, `options`): `Promise`\<`CoderOutput`\> -Defined in: [mcp/delegates.ts:468](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L468) +Defined in: [mcp/delegates.ts:464](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegates.ts#L464) **`Experimental`** @@ -7339,68 +6767,6 @@ advertised on the generic `worktreeFanout` path. This helper (with `coderTaskFro *** -### buildDelegationMcpServer() - -> **buildDelegationMcpServer**(`options?`): `Record`\<`string`, `AgentProfileMcpServer`\> \| `undefined` - -Defined in: [mcp/delegation-profile.ts:69](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L69) - -Build the delegation MCP entry the sandbox-side agent loads on startup. -Returns `undefined` when no sandbox API key is resolvable — callers merge -the result into a profile's `mcp` map only when defined. - -#### Parameters - -##### options? - -[`BuildDelegationMcpServerOptions`](#builddelegationmcpserveroptions) = `{}` - -#### Returns - -`Record`\<`string`, `AgentProfileMcpServer`\> \| `undefined` - -*** - -### composeProductionAgentProfile() - -> **composeProductionAgentProfile**(`baseProfile`, `options?`): `AgentProfile` - -Defined in: [mcp/delegation-profile.ts:164](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/delegation-profile.ts#L164) - -Compose the production `AgentProfile`: the canonical base profile with the -delegation MCP merged into `mcp`. Used by every call site that boots a -sandbox or runs a chat turn through the sandbox path, and by eval wiring so -the scorecard profile hash reflects the actual production profile. - -Merge rules: - - `mcp`: base map preserved; `options.mcpConnections` (resolved certified - servers) merged over it; the delegation entry is appended last under - [DELEGATION\_MCP\_SERVER\_KEY](#delegation_mcp_server_key), and omitted entirely when no sandbox - API key resolves. - - `tools`: base box-flags map preserved; `options.tools` overlaid per key. - - `hooks`: per event, base commands preserved; `options.hooks[event]` - appended after the base ones. - - `subagents`: base map preserved; `options.subagents` overlaid per key. - - `prompt.systemPrompt`: replaced when `options.systemPrompt` is set. - - `resources.files`: `options.extraFiles` concatenated after base files. - - `name`: replaced when `options.name` is set. - -#### Parameters - -##### baseProfile - -`AgentProfile` - -##### options? - -[`ComposeProductionAgentProfileOptions`](#composeproductionagentprofileoptions) = `{}` - -#### Returns - -`AgentProfile` - -*** - ### buildDelegationTraceSpans() > **buildDelegationTraceSpans**(`events`): [`DelegationTraceSpan`](#delegationtracespan)[] @@ -7804,7 +7170,7 @@ Does NOT throw when: > **createMcpServer**(`options?`): [`McpServer`](#mcpserver) -Defined in: [mcp/server.ts:177](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L177) +Defined in: [mcp/server.ts:150](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L150) **`Experimental`** @@ -7824,7 +7190,7 @@ Defined in: [mcp/server.ts:177](https://github.com/tangle-network/agent-runtime/ > **createInProcessTransport**(): `object` -Defined in: [mcp/server.ts:389](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L389) +Defined in: [mcp/server.ts:334](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/server.ts#L334) **`Experimental`** @@ -8024,46 +7390,6 @@ Build the driver's MCP tools over a live scope. *** -### validateDelegateCodeArgs() - -> **validateDelegateCodeArgs**(`raw`): [`DelegateCodeArgs`](#delegatecodeargs) - -Defined in: [mcp/tools/delegate-code.ts:90](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-code.ts#L90) - -**`Experimental`** - -#### Parameters - -##### raw - -`unknown` - -#### Returns - -[`DelegateCodeArgs`](#delegatecodeargs) - -*** - -### createDelegateCodeHandler() - -> **createDelegateCodeHandler**(`options`): (`raw`) => `Promise`\<[`DelegateCodeResult`](#delegatecoderesult)\> - -Defined in: [mcp/tools/delegate-code.ts:177](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-code.ts#L177) - -**`Experimental`** - -#### Parameters - -##### options - -`DelegateCodeHandlerOptions` - -#### Returns - -(`raw`) => `Promise`\<[`DelegateCodeResult`](#delegatecoderesult)\> - -*** - ### validateDelegateFeedbackArgs() > **validateDelegateFeedbackArgs**(`raw`): [`DelegateFeedbackArgs`](#delegatefeedbackargs) @@ -8104,46 +7430,6 @@ Defined in: [mcp/tools/delegate-feedback.ts:159](https://github.com/tangle-netwo *** -### validateDelegateResearchArgs() - -> **validateDelegateResearchArgs**(`raw`): [`DelegateResearchArgs`](#delegateresearchargs) - -Defined in: [mcp/tools/delegate-research.ts:94](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-research.ts#L94) - -**`Experimental`** - -#### Parameters - -##### raw - -`unknown` - -#### Returns - -[`DelegateResearchArgs`](#delegateresearchargs) - -*** - -### createDelegateResearchHandler() - -> **createDelegateResearchHandler**(`options`): (`raw`) => `Promise`\<[`DelegateResearchResult`](#delegateresearchresult)\> - -Defined in: [mcp/tools/delegate-research.ts:192](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/delegate-research.ts#L192) - -**`Experimental`** - -#### Parameters - -##### options - -`DelegateResearchHandlerOptions` - -#### Returns - -(`raw`) => `Promise`\<[`DelegateResearchResult`](#delegateresearchresult)\> - -*** - ### validateDelegateUiAuditArgs() > **validateDelegateUiAuditArgs**(`raw`): [`DelegateUiAuditArgs`](#delegateuiauditargs) diff --git a/docs/api/profiles.md b/docs/api/profiles.md index 230d7e65..e3b11f85 100644 --- a/docs/api/profiles.md +++ b/docs/api/profiles.md @@ -204,7 +204,7 @@ Defined in: [audit/issue-writer.ts:374](https://github.com/tangle-network/agent- ### CoderTask -Defined in: [profiles/coder.ts:20](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L20) +Defined in: [profiles/coder.ts:15](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L15) **`Experimental`** @@ -216,7 +216,7 @@ The per-task inputs `coderTaskToPrompt` renders + the worktree gate enforces. > **goal**: `string` -Defined in: [profiles/coder.ts:22](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L22) +Defined in: [profiles/coder.ts:17](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L17) **`Experimental`** @@ -226,7 +226,7 @@ What the agent must accomplish. Free-form prose. > **repoRoot**: `string` -Defined in: [profiles/coder.ts:24](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L24) +Defined in: [profiles/coder.ts:19](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L19) **`Experimental`** @@ -236,7 +236,7 @@ Absolute path inside the sandbox where the repo lives. > `optional` **baseBranch?**: `string` -Defined in: [profiles/coder.ts:26](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L26) +Defined in: [profiles/coder.ts:21](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L21) **`Experimental`** @@ -246,7 +246,7 @@ Default `main`. The branch the agent diffs against. > `optional` **testCmd?**: `string` -Defined in: [profiles/coder.ts:28](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L28) +Defined in: [profiles/coder.ts:23](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L23) **`Experimental`** @@ -256,7 +256,7 @@ Default `pnpm test --run`. > `optional` **typecheckCmd?**: `string` -Defined in: [profiles/coder.ts:30](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L30) +Defined in: [profiles/coder.ts:25](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L25) **`Experimental`** @@ -266,7 +266,7 @@ Default `pnpm typecheck`. > `optional` **contextFiles?**: `string`[] -Defined in: [profiles/coder.ts:32](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L32) +Defined in: [profiles/coder.ts:27](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L27) **`Experimental`** @@ -276,7 +276,7 @@ Files the agent may inspect for context. Surfaced verbatim in the prompt. > `optional` **forbiddenPaths?**: `string`[] -Defined in: [profiles/coder.ts:37](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L37) +Defined in: [profiles/coder.ts:32](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L32) **`Experimental`** @@ -287,7 +287,7 @@ Use glob-free literal path prefixes for unambiguous enforcement. > `optional` **maxDiffLines?**: `number` -Defined in: [profiles/coder.ts:39](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L39) +Defined in: [profiles/coder.ts:34](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L34) **`Experimental`** @@ -1308,32 +1308,6 @@ Severity scale. ## Variables -### DEFAULT\_CODER\_SYSTEM\_PROMPT - -> `const` **DEFAULT\_CODER\_SYSTEM\_PROMPT**: `string` - -Defined in: [profiles/coder.ts:43](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L43) - -**`Experimental`** - -The coder agent's standing instruction (its body lives in `coderProfile.prompt`). - -*** - -### coderProfile - -> `const` **coderProfile**: `AgentProfile` - -Defined in: [profiles/coder.ts:72](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L72) - -**`Experimental`** - -The coder `AgentProfile` — the §1.5 DATA the substrate materializes into a harness invocation. -Stateless and harness-agnostic: a consumer overrides `model`/`metadata.backendType` by spreading -a copy, never by a factory. `worktreeFanout` authors one such profile per harness leaf. - -*** - ### SHARED\_AUDITOR\_RULES > `const` **SHARED\_AUDITOR\_RULES**: `string` @@ -1520,7 +1494,7 @@ Regenerate `/index.md` from registry.json. > **coderTaskToPrompt**(`task`): `string` -Defined in: [profiles/coder.ts:81](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L81) +Defined in: [profiles/coder.ts:38](https://github.com/tangle-network/agent-runtime/blob/main/src/profiles/coder.ts#L38) **`Experimental`** diff --git a/docs/canonical-api.md b/docs/canonical-api.md index bd6b79ff..3ae36c84 100644 --- a/docs/canonical-api.md +++ b/docs/canonical-api.md @@ -2,7 +2,7 @@ -> **Version 0.71.1.** Per-symbol signatures live in the generated `docs/api/` reference (one page per module). The pinned substrate is agent-eval `>=0.95.0 <1.0.0`; the sandbox substrate that materializes profiles into harness shapes is `@tangle-network/sandbox` (peer `>=0.8.0 <1.0.0`). The neutral contract types (`AgentProfile`, `AgentProfileMcpServer`, `HarnessType`, `ReasoningEffort`, `Part`/`ToolPart`/`ToolState`) are owned by **`@tangle-network/agent-interface`** (peer `>=0.10.0 <1.0.0`) — the single source of truth. Substrate symbols (`selfImprove`/`gepaProposer`/`defaultProductionGate`/`heldOutGate`/`pairedBootstrap`/…) are re-exported through `@tangle-network/agent-eval/contract` (or `/campaign`), not local to this package. +> **Version 0.72.0.** Per-symbol signatures live in the generated `docs/api/` reference (one page per module). The pinned substrate is agent-eval `>=0.95.0 <1.0.0`; the sandbox substrate that materializes profiles into harness shapes is `@tangle-network/sandbox` (peer `>=0.8.0 <1.0.0`). The neutral contract types (`AgentProfile`, `AgentProfileMcpServer`, `HarnessType`, `ReasoningEffort`, `Part`/`ToolPart`/`ToolState`) are owned by **`@tangle-network/agent-interface`** (peer `>=0.10.0 <1.0.0`) — the single source of truth. Substrate symbols (`selfImprove`/`gepaProposer`/`defaultProductionGate`/`heldOutGate`/`pairedBootstrap`/…) are re-exported through `@tangle-network/agent-eval/contract` (or `/campaign`), not local to this package. > > **`./loops` is the runtime barrel** — `package.json` maps it to `src/runtime/index.ts`. Everything below labelled `/loops` is the recursive-atom + loop-kernel surface. > @@ -69,10 +69,9 @@ Every symbol below is a LOCAL export of this package (subpath shown) unless tagg | Attach N observers to a running loop | `composeRuntimeHooks(...)` — root export | a second event-bus or callback-prop zoo (there is ONE stream) | | Ship traces to an OTLP collector | `createOtelExporter()` + `buildLoopOtelSpans()` — root export | your own OTLP serializer or pulling the OTEL SDK | | State any benchmark/A-B claim | `pairedLift(...)` (bench) over `pairedBootstrap`/`heldoutSignificance` (substrate) | your own bootstrap loop/PRNG per gate; a point lift without `low/high/pairs` | -| Compose the prod sandbox profile (eval/prod parity) | `composeProductionAgentProfile(base, opts)` — `/mcp` | hand-merging a delegation/retrieval MCP per call site or maintaining two profiles | -| Let an agent **delegate a coding task inside its OWN sandbox environment** (durable, fire-and-poll, survives restart) | the **delegation MCP** — `delegate_code`/`delegate_research` + `delegation_status`/`delegation_history`/`delegate_feedback`, wired by `composeProductionAgentProfile` — `/mcp` | `spawn_agent` — a worker in a *separate, chosen* backend; not own-environment delegation, no durable queue/ledger | -| Let an agent **delegate ONE generic INTENT** (no fixed coder/researcher type) and get the result + real spend SYNCHRONOUSLY | the **`delegate` tool** — `createDelegateHandler` via `createMcpServer({ delegateSupervisor })`; mount it over the `agent-runtime mcp` bin with `MCP_ENABLE_DELEGATE=1` (the bin authors a supervisor over a `sandbox` backend) — `/mcp` | a hardcoded coder/researcher profile, or `delegate_code`+`delegate_research` as two separate verbs — `delegate` is the ONE replacement and is the only delegation path with a cost channel | -| Have a **supervisor spawn + live-drive workers in a backend you choose** and observe/steer/resume them | the **coordination MCP** — `createCoordinationTools` / `serveCoordinationMcp` over a live `Scope`; each worker's leaf is `createExecutor({ backend })` — `/mcp`,`/loops` | `delegate_code` — own-sandbox-only, one-shot, no live steer/recursion/conserved-budget | +| Let an agent **delegate ONE generic INTENT** (no fixed coder/researcher type) and get the result + real spend SYNCHRONOUSLY | the **`delegate` tool** — `createDelegateHandler` via `createMcpServer({ delegateSupervisor })`; mount it over the `agent-runtime mcp` bin with `MCP_ENABLE_DELEGATE=1` (the bin authors a supervisor over a `sandbox` backend) — `/mcp` | a hardcoded coder/researcher profile, or task-specific `delegate_code`/`delegate_research` verbs (RETIRED) — `delegate` is the ONE delegation path and the only one with a cost channel | +| Run a coding task INSIDE the agent's OWN sandbox session (a sibling box, fresh branch, validated patch) | `detachedSessionDelegate({ sandboxClient \| executor, workerProfile? })` — `/mcp` (pass the worker `AgentProfile`; omit for a minimal model-only default) | a hardcoded coder profile baked into the delegate; `delegate()` (that spawns workers in a *chosen* backend, not the agent's own session) | +| Have a **supervisor spawn + live-drive workers in a backend you choose** and observe/steer/resume them | the **coordination MCP** — `createCoordinationTools` / `serveCoordinationMcp` over a live `Scope`; each worker's leaf is `createExecutor({ backend })` — `/mcp`,`/loops` | `detachedSessionDelegate` — own-sandbox-session only, one-shot, no live steer/recursion/conserved-budget | | Stand up a vertical agent in the eval loop | `defineAgent(manifest)` + `createSurfaceImprovementAdapter` — `/agent` | a per-vertical manifest parser, surface-validator, or bespoke `ImprovementAdapter` | | Turn intelligence/observation OFF (prove inference-only billing) | `withTangleIntelligence(agent, { effort: 'off' })` — `/intelligence` | a custom trace-wrapper or hand-rolled effort/tier config | diff --git a/examples/mcp-delegation/mcp-delegation.ts b/examples/mcp-delegation/mcp-delegation.ts index aba4ce0d..51674807 100644 --- a/examples/mcp-delegation/mcp-delegation.ts +++ b/examples/mcp-delegation/mcp-delegation.ts @@ -24,17 +24,14 @@ function buildDelegationMcpEntry(opts: { env: { TANGLE_API_KEY: opts.sandboxApiKey, SANDBOX_BASE_URL: opts.sandboxBaseUrl ?? 'https://sandbox.tangle.tools', + // Opt into the ONE generic `delegate` verb (a supervisor that authors + drives its own + // worker and returns the delivered output with its cost). It needs a real sandbox key. + ...(opts.sandboxApiKey ? { MCP_ENABLE_DELEGATE: '1' } : {}), }, enabled: true, metadata: { surface: 'delegation:dispatch', - tools: [ - 'delegate_code', - 'delegate_research', - 'delegate_feedback', - 'delegation_status', - 'delegation_history', - ], + tools: ['delegate', 'delegate_feedback', 'delegation_status', 'delegation_history'], }, }, } @@ -42,8 +39,7 @@ function buildDelegationMcpEntry(opts: { /** * Compose a product's AgentProfile with the delegation MCP entry merged in. - * In production this is the `composeProductionAgentProfile`-style helper - * each product owns. The shape here is illustrative — copy and adapt. + * The shape here is illustrative — copy and adapt. */ export function composeAgentProfileWithDelegation(opts: { sandboxApiKey: string @@ -65,17 +61,13 @@ export function composeAgentProfileWithDelegation(opts: { // ── 2. SMOKE ───────────────────────────────────────────────────────────── // -// Spawn `agent-runtime-mcp` and verify the five canonical tools show up. -// The child is the same bin a sandbox-side agent would launch when the -// profile mounts the MCP entry above. - -const EXPECTED_TOOLS = [ - 'delegate_code', - 'delegate_feedback', - 'delegate_research', - 'delegation_history', - 'delegation_status', -] +// Spawn `agent-runtime-mcp` and verify the always-on queue-bound tools show +// up. The child is the same bin a sandbox-side agent would launch when the +// profile mounts the MCP entry above. The generic `delegate` verb registers +// only when MCP_ENABLE_DELEGATE=1 AND a real sandbox key resolves, so the +// diagnostic (no-key) smoke asserts only the always-on trio. + +const EXPECTED_TOOLS = ['delegate_feedback', 'delegation_history', 'delegation_status'] interface JsonRpcResponse { jsonrpc: '2.0' @@ -171,7 +163,7 @@ async function smokeMcpToolsList(): Promise { if (missing.length > 0) { throw new Error(`agent-runtime-mcp is missing tools: ${missing.join(', ')}`) } - console.log('OK — all five delegation tools are exposed.') + console.log('OK — the always-on queue-bound delegation tools are exposed.') } finally { child.kill('SIGINT') } diff --git a/package.json b/package.json index 2d314684..34c049bc 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/agent-runtime", - "version": "0.71.1", + "version": "0.72.0", "description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.", "homepage": "https://github.com/tangle-network/agent-runtime#readme", "repository": { diff --git a/src/agent/sandbox-act.ts b/src/agent/sandbox-act.ts index daaf71c1..cb4930f6 100644 --- a/src/agent/sandbox-act.ts +++ b/src/agent/sandbox-act.ts @@ -3,12 +3,13 @@ * * The point of this adapter is parity: the eval substrate must run the agent * through the SAME profile the production chat turn uses, or scorecard numbers - * grade a profile that never ships. `createSandboxAct` composes the production - * profile via {@link composeProductionAgentProfile}, boots a sandbox with it - * through the loop kernel's own {@link createSandboxForSpec}, streams the - * `streamPrompt` events mapped to the `RuntimeStreamEvent` vocabulary, and - * resolves the `OutputAdapter`-parsed output for rubric scoring — satisfying - * the `act` streaming contract with one code path shared by chat and eval. + * grade a profile that never ships. `createSandboxAct` boots a sandbox with the + * agent's profile (the caller's `baseProfile`, with optional per-persona + * overrides) through the loop kernel's own {@link createSandboxForSpec}, + * streams the `streamPrompt` events mapped to the `RuntimeStreamEvent` + * vocabulary, and resolves the `OutputAdapter`-parsed output for rubric scoring + * — satisfying the `act` streaming contract with one code path shared by chat + * and eval. * * Agents with a bespoke streaming chat turn should wire THAT into `act` * directly (the contract is designed for it); this adapter is the default for @@ -16,18 +17,35 @@ * agents agent-builder generates. */ -import type { AgentProfile } from '@tangle-network/agent-interface' +import type { + AgentProfile, + AgentProfileFileMount, + AgentProfileMcpServer, +} from '@tangle-network/agent-interface' import type { SandboxEvent } from '@tangle-network/sandbox' -import type { ComposeProductionAgentProfileOptions } from '../mcp/delegation-profile' -import { composeProductionAgentProfile } from '../mcp/delegation-profile' import type { AgentRunSpec, OutputAdapter, SandboxClient } from '../runtime' import { mapSandboxEvent } from '../runtime' import { createSandboxForSpec } from '../runtime/run-loop' import type { RuntimeStreamEvent } from '../types' import type { AgentRunContext, AgentRunInvocation } from './define-agent' +/** Per-persona profile-merge slots applied over the base profile (§1.5: the caller authors the + * per-persona profile). Each slot overlays the base; an absent slot leaves the base untouched. */ +export interface SandboxActComposeOverrides { + /** Replace the base profile's system prompt (e.g. a workspace-augmented prompt). */ + systemPrompt?: string + /** Extra file mounts layered after the base profile's `resources.files`. */ + extraFiles?: AgentProfileFileMount[] + /** Override the profile `name`. Defaults to the base profile's name. */ + name?: string + /** Box built-in tool ON/OFF flags merged over the base profile's `tools` (overlay wins per key). */ + tools?: Record + /** MCP connections merged over the base profile's `mcp` (overlay wins per key). */ + mcpConnections?: Record +} + export interface CreateSandboxActOptions { - /** Canonical agent profile — the same one the prod chat turn composes from. */ + /** Canonical agent profile — the same one the prod chat turn uses. */ baseProfile: AgentProfile /** Sandbox client used to boot the per-run sandbox. */ sandboxClient: SandboxClient @@ -36,12 +54,10 @@ export interface CreateSandboxActOptions { /** Sandbox event stream → typed output the rubric scores. */ output: OutputAdapter /** - * Per-persona composition overrides (workspace-augmented system prompt, - * extra file mounts, sandbox key). Merged into - * {@link composeProductionAgentProfile}; `env` here is overridden by the - * top-level `env` option when both are set. + * Per-persona profile overrides (workspace-augmented system prompt, extra + * file mounts, tool flags, MCP connections). Overlaid onto `baseProfile`. */ - compose?: (persona: TPersona) => ComposeProductionAgentProfileOptions + compose?: (persona: TPersona) => SandboxActComposeOverrides /** Sandbox-SDK overrides forwarded to `createSandboxForSpec`. */ sandboxOverrides?: AgentRunSpec['sandboxOverrides'] /** Stable run name surfaced in mapped `llm_call` events. */ @@ -51,8 +67,6 @@ export interface CreateSandboxActOptions { event: SandboxEvent, opts: { agentRunName?: string }, ) => RuntimeStreamEvent | undefined - /** Environment source for delegation-MCP composition. Defaults to `process.env`. */ - env?: Record } /** @@ -67,10 +81,7 @@ export function createSandboxAct( const mapEvent = options.mapEvent ?? mapSandboxEvent return (persona: TPersona, ctx: AgentRunContext): AgentRunInvocation => { - const profile = composeProductionAgentProfile(options.baseProfile, { - ...(options.compose?.(persona) ?? {}), - ...(options.env ? { env: options.env } : {}), - }) + const profile = applyComposeOverrides(options.baseProfile, options.compose?.(persona)) const agentRunName = options.name ?? profile.name ?? 'agent' const message = options.buildPrompt(persona) const signal = ctx.signal ?? new AbortController().signal @@ -112,3 +123,31 @@ export function createSandboxAct( return { events: events(), output } } } + +/** Overlay the per-persona overrides onto the base profile. Each slot merges over the base; an + * absent override leaves the base profile untouched. */ +function applyComposeOverrides( + base: AgentProfile, + overrides: SandboxActComposeOverrides | undefined, +): AgentProfile { + if (!overrides) return base + const prompt = overrides.systemPrompt + ? { ...base.prompt, systemPrompt: overrides.systemPrompt } + : base.prompt + const mergedTools = overrides.tools ? { ...(base.tools ?? {}), ...overrides.tools } : base.tools + const mergedMcp = overrides.mcpConnections + ? { ...(base.mcp ?? {}), ...overrides.mcpConnections } + : base.mcp + const baseFiles = base.resources?.files ?? [] + const mergedFiles: AgentProfileFileMount[] = overrides.extraFiles?.length + ? [...baseFiles, ...overrides.extraFiles] + : [...baseFiles] + return { + ...base, + name: overrides.name ?? base.name, + prompt, + ...(mergedTools ? { tools: mergedTools } : {}), + ...(mergedMcp ? { mcp: mergedMcp } : {}), + resources: { ...base.resources, files: mergedFiles }, + } +} diff --git a/src/index.ts b/src/index.ts index a9acf5ae..0ac12c1a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -120,8 +120,6 @@ export * from './improvement' // ── Delegated loop-runner (configured code/research/review/audit/self-improve) ── export { auditLoopRunner, - type CoderLoopRunnerOptions, - coderLoopRunner, DELEGATED_LOOP_MODES, type DelegatedLoopMode, type DelegatedLoopRegistry, diff --git a/src/intelligence/capability.test.ts b/src/intelligence/capability.test.ts index 1a8a317c..047c484a 100644 --- a/src/intelligence/capability.test.ts +++ b/src/intelligence/capability.test.ts @@ -1,5 +1,4 @@ import { describe, expect, it, vi } from 'vitest' -import { composeProductionAgentProfile } from '../mcp/delegation-profile' import { type CapabilityManifest, CapabilityNotAdmittedError, @@ -522,43 +521,3 @@ describe('composeCertifiedProfile — sandbox-code', () => { expect(runSandboxCode).toHaveBeenCalledOnce() }) }) - -describe('composeProductionAgentProfile — new merge slots', () => { - it('merges tools box-flags, hooks, subagents, and injected mcpConnections', () => { - const profile = composeProductionAgentProfile( - { - name: 'base', - tools: { read: true }, - hooks: { 'pre-run': [{ command: 'echo base' }] }, - subagents: { researcher: { description: 'base researcher' } }, - mcp: { existing: { transport: 'stdio', command: 'x' } }, - }, - { - // No sandbox key → delegation entry omitted; assert ONLY the new slots. - env: {}, - tools: { write: true }, - hooks: { 'pre-run': [{ command: 'echo extra' }], 'post-run': [{ command: 'echo post' }] }, - subagents: { coder: { description: 'new coder' } }, - mcpConnections: { ticketing: { transport: 'stdio', command: 'node' } }, - }, - ) - expect(profile.tools).toEqual({ read: true, write: true }) - expect(profile.hooks?.['pre-run']).toEqual([ - { command: 'echo base' }, - { command: 'echo extra' }, - ]) - expect(profile.hooks?.['post-run']).toEqual([{ command: 'echo post' }]) - expect(profile.subagents).toMatchObject({ researcher: {}, coder: {} }) - expect(profile.mcp).toMatchObject({ existing: {}, ticketing: { command: 'node' } }) - }) - - it('leaves the base profile untouched when no new options are given', () => { - const profile = composeProductionAgentProfile( - { name: 'base', tools: { read: true } }, - { env: {} }, - ) - expect(profile.tools).toEqual({ read: true }) - expect(profile.hooks).toBeUndefined() - expect(profile.subagents).toBeUndefined() - }) -}) diff --git a/src/loop-runner.ts b/src/loop-runner.ts index c1312c74..24a59aaa 100644 --- a/src/loop-runner.ts +++ b/src/loop-runner.ts @@ -6,9 +6,9 @@ * One typed entrypoint a worker agent (or a scheduled routine) calls to run a * disciplined loop in a chosen MODE, over agent-runtime's hardened engines: * - * code → build-in-a-loop via the coder delegate (no-op + secret floor, - * optional reviewer gate, winner-selection) - * review → code mode with a REQUIRED reviewer (the gate is the point) + * code → build-in-a-loop on the GENERIC recursive path (worktreeLoopRunner: author one + * `AgentProfile` per harness → worktree-CLI leaves → `patchDelivered` gate) + * review → caller-registered runner — a `code` runner with an approval gate over candidates * research → research-in-a-loop with valid-only KB growth (createKbGate) * audit → analyze trace/run data → findings (runAnalystLoop, caller-wired) * self-improve → closed-loop text/config optimization (selfImprove, held-out gated) @@ -29,22 +29,13 @@ import { import { runAnalystLoop } from './analyst-loop' import type { RunAnalystLoopOpts, RunAnalystLoopResult } from './analyst-loop/types' import { ConfigError } from './errors' -import { - type CoderReviewer, - type DelegateRunCtx, - type DetachedWinnerSelection, - detachedSessionDelegate, -} from './mcp/delegates' -import type { CoderOutput } from './mcp/detached-coder' import { type CreateKbGateOptions, createKbGate, type FactCandidate } from './mcp/kb-gate' -import type { DelegateCodeArgs } from './mcp/types' import { type AuthoredHarness, type Budget, createExecutorRegistry, definePersona, runPersonified, - type SandboxClient, type WinnerStrategy, type WorktreeFanoutOptions, type WorktreePatchArtifact, @@ -124,36 +115,6 @@ export async function runDelegatedLoop( } } -/** @experimental Options for the default `code`/`review` runner. */ -export interface CoderLoopRunnerOptions { - sandboxClient: SandboxClient - /** What to build — the delegate args (goal, repoRoot, variants, config, …). */ - args: DelegateCodeArgs - /** Adversarial reviewer. Pass one to run `review` mode (an approval gate over the candidate). */ - reviewer?: CoderReviewer - /** Winner-selection strategy. Default `highest-score`. */ - winnerSelection?: DetachedWinnerSelection - /** Harnesses for `variants > 1` fanout. */ - fanoutHarnesses?: string[] -} - -/** - * @experimental Build a `code`/`review`-mode runner over the sandbox-session coder delegate. Pass a - * `reviewer` to run `review` mode — an approval gate over the validated candidate. - */ -export function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner { - const delegate = detachedSessionDelegate({ - sandboxClient: options.sandboxClient, - ...(options.reviewer ? { reviewer: options.reviewer } : {}), - ...(options.winnerSelection ? { winnerSelection: options.winnerSelection } : {}), - ...(options.fanoutHarnesses ? { fanoutHarnesses: options.fanoutHarnesses } : {}), - }) - return async (signal) => { - const ctx: DelegateRunCtx = { signal, report: () => {} } - return delegate(options.args, ctx) - } -} - /** @experimental Options for the local-repo `code` runner over the GENERIC recursive path. */ export interface WorktreeLoopRunnerOptions { /** Absolute path to the local git checkout each worktree is cut from. */ @@ -187,9 +148,10 @@ export interface WorktreeLoopRunnerOptions { * * `code` mode on the GENERIC recursive path: author one `AgentProfile` per harness, run them as a * `worktreeFanout` (N `createWorktreeCliExecutor` leaves, each `gateOnDeliverable`) through - * `runPersonified` on the keystone Supervisor. This is the local-repo counterpart to - * {@link coderLoopRunner} (which drives the in-box harness over a `SandboxClient`): no `runLoop` - * driver, no role-coupled delegate — the harness list is the fanout, the gate is `patchDelivered`, + * `runPersonified` on the keystone Supervisor. The sandbox-session counterpart that drives the in-box + * harness over a `SandboxClient` is `detachedSessionDelegate` (`./mcp/delegates`); here there is no + * `runLoop` driver, no role-coupled delegate — the harness list is the fanout, the gate is + * `patchDelivered`, * the winner is the shared valid-only selector (NOT `defaultSelectWinner`, whose non-valid fallback * would surface an ungated patch). Equal-k holds by the conserved budget pool. Returns the winning * patch artifact, or throws when no candidate is delivered (fail loud, never a vacuous done). diff --git a/src/mcp/bin.ts b/src/mcp/bin.ts index 4534062b..08f3e650 100644 --- a/src/mcp/bin.ts +++ b/src/mcp/bin.ts @@ -5,30 +5,16 @@ * * `agent-runtime-mcp` — stdio MCP server entry point. * - * Spins up a server with the default coder delegate (wired against the - * real `@tangle-network/sandbox` client) and, when the optional - * `@tangle-network/agent-knowledge` peer is installed, a researcher - * delegate against `multiHarnessResearcherFanout`. + * Serves the ONE generic `delegate` verb (opt-in via `MCP_ENABLE_DELEGATE=1`): one intent → a + * supervisor that authors + drives its own worker over `supervise()`, returning the delivered output + * with its cost. The supervisor brain runs on the router; authored workers run as sub-sandboxes + * through the same `SandboxClient` the bin loads from `TANGLE_API_KEY`. The queue-bound tools + * (`delegate_feedback`, `delegation_status`, `delegation_history`) are always served. * * Environment variables: * TANGLE_API_KEY required — passed to `new Sandbox({ apiKey })` * SANDBOX_BASE_URL optional — sandbox-SDK base URL override - * TANGLE_FLEET_ID optional — when set, delegations dispatch - * INTO this fleet's shared workspace instead - * of creating sibling sandboxes. Set by the - * parent sandbox when launching this MCP - * server so worker diffs land on the caller's - * filesystem with no cross-sandbox boundary. - * TANGLE_FLEET_EXCLUDE_MACHINES optional — comma-separated machine ids to - * skip during fleet-mode round-robin - * (typically the coordinator machine this - * MCP server is running on). - * MCP_MAX_CONCURRENT_SANDBOXES default 4 — kernel maxConcurrency cap - * MCP_CODER_FANOUT_HARNESSES comma-separated harness ids to use for variants > 1 - * MCP_DISABLE_CODER set to `1` to omit `delegate_code` - * MCP_DISABLE_RESEARCHER set to `1` to omit `delegate_research` even when peer is present - * MCP_ENABLE_DELEGATE set to `1` to serve the ONE generic `delegate` verb (the - * replacement for delegate_code / delegate_research). Its authoring + * MCP_ENABLE_DELEGATE set to `1` to serve the generic `delegate` verb. Its authoring * supervisor runs the brain on the router and spawns authored * workers as sub-sandboxes via the same client; needs TANGLE_API_KEY. * MCP_SUPERVISOR_MODEL supervisor brain model id (falls back to MCP_WORKER_MODEL, then @@ -37,34 +23,12 @@ * MCP_SUPERVISOR_ROUTER_BASE_URL router base for the supervisor brain (defaults to the repo's * resolveRouterBaseUrl, normalized to `/v1`) * MCP_DELEGATE_WORKER_HARNESS harness the authored workers run on (default `opencode`) - * MCP_RESEARCHER_HARNESS researcher worker harness (default `opencode`) - * MCP_RESEARCHER_MODEL researcher worker model id (falls back to - * MCP_WORKER_MODEL, then WORKER_MODEL, then a default) - * MCP_RESEARCHER_FANOUT_HARNESSES comma-separated harnesses for researcher variants > 1 - * MCP_RESEARCHER_FANOUT_MODELS comma-separated per-harness models, index-aligned - * MCP_RESEARCHER_ROUTER_KEY OpenAI-compatible router key for the in-box agent - * (defaults to TANGLE_API_KEY) - * MCP_RESEARCHER_ROUTER_BASE_URL router base for the in-box agent (defaults to the - * repo's resolveRouterBaseUrl, normalized to `/v1`) * AGENT_RUNTIME_DELEGATION_STATE_FILE * optional — absolute path of a JSON state * file. When set, delegation records persist * across MCP restarts (FileDelegationStore): * status/history survive and idempotency keys - * dedupe across processes. Single-variant - * coder/researcher delegations additionally - * dispatch DETACHED (driveTurn ticks against a - * deterministic session id) on session-backed - * placements, so restored in-flight records - * resume against their still-running sandbox - * sessions; non-detached in-flight records - * settle as failed with a truthful - * driver-restart error. - * AGENT_RUNTIME_DELEGATION_DETACHED - * set to `0` to keep every delegation on the - * streaming path even when the state file is - * configured (disables detached dispatch + - * resume). + * dedupe across processes. * AGENT_RUNTIME_DELEGATION_STATE_RECOVER * set to `1` to archive a corrupt state file * (`.corrupt-`) and start empty @@ -74,171 +38,74 @@ * terminal records. Unset = keep forever. */ -import type { SandboxInstance } from '@tangle-network/sandbox' -import { coderTaskToPrompt } from '../profiles/coder' -import type { AgentRunSpec, LoopTraceEmitter, SandboxClient } from '../runtime' -import { runLoop } from '../runtime' -import { detectExecutor } from './bin-helpers' +import type { SandboxClient } from '../runtime' import { delegateEnabled, resolveDelegateSupervisor } from './delegate-supervisor-provisioning' -import { - coderTaskFromArgs, - detachedSessionDelegate, - type ResearcherDelegate, - settleDetachedCoderTurn, -} from './delegates' -import { DEFAULT_SANDBOX_BASE_URL } from './delegation-profile' import { FileDelegationStore } from './delegation-store' -import { composeLoopTraceEmitters } from './delegation-trace' -import { - createDetachedTurnResumeDriver, - type DetachedTurn, - type DriveTurnCapableBox, - detachedTurnEvents, - formatDetachedSessionRef, - parseDetachedSessionRef, - runDetachedTurn, -} from './detached-turn' -import type { DelegationExecutor } from './executor' -import { - applyRouterEnv, - type ProvisionableSpec, - resolveResearcherProvisioning, -} from './researcher-provisioning' import { createMcpServer } from './server' -import { type DelegationResumeDriver, DelegationTaskQueue } from './task-queue' -import { - createPropagatingTraceEmitter, - readTraceContextFromEnv, - type TraceContext, -} from './trace-propagation' -import type { DelegateCodeArgs, DelegateResearchArgs, ResearchOutputShape } from './types' +import { DelegationTaskQueue } from './task-queue' +import { readTraceContextFromEnv, type TraceContext } from './trace-propagation' + +const DEFAULT_SANDBOX_BASE_URL = 'https://sandbox.tangle.tools' async function main(): Promise { - const fanoutHarnesses = parseHarnesses(process.env.MCP_CODER_FANOUT_HARNESSES) - const maxConcurrency = parseConcurrency(process.env.MCP_MAX_CONCURRENT_SANDBOXES) - const wantCoder = !process.env.MCP_DISABLE_CODER - const wantResearcher = !process.env.MCP_DISABLE_RESEARCHER const wantDelegate = delegateEnabled(process.env) - const fleetId = parseFleetId(process.env.TANGLE_FLEET_ID) - // Skip the sandbox client load entirely when no profile delegate needs it — - // the feedback + status + history tools are queue-bound and require no - // sandbox. Useful for tooling that mounts the MCP server purely for - // self-introspection. The generic `delegate` verb needs the client too: its - // authored workers run as sub-sandboxes (the `sandbox` backend). - const needsSandbox = wantCoder || wantResearcher || wantDelegate + // The generic `delegate` verb needs the sandbox client: its authored workers run as sub-sandboxes + // (the `sandbox` backend). When `delegate` is not opted in, the server runs the queue-only subset + // (feedback + status + history) with no sandbox. let sandboxClient: SandboxClient | undefined - let executor: DelegationExecutor | undefined - if (needsSandbox) { + if (wantDelegate) { const apiKey = process.env.TANGLE_API_KEY if (!apiKey && !process.env.AGENT_RUNTIME_MCP_ALLOW_NO_KEY) { process.stderr.write( - 'agent-runtime-mcp: TANGLE_API_KEY is required. Set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to run the queue-only subset.\n', - ) - process.exit(2) - } - // Fleet mode against a diagnostic stub is meaningless — the stub can't - // resolve a real fleet handle. Refuse rather than silently degrading, - // otherwise a fleet-mounted MCP would behave differently than configured. - if (fleetId && !apiKey) { - process.stderr.write( - 'agent-runtime-mcp: TANGLE_FLEET_ID was set but TANGLE_API_KEY is missing; cannot resolve fleet handle. Provide an api key or unset TANGLE_FLEET_ID.\n', + 'agent-runtime-mcp: TANGLE_API_KEY is required to serve `delegate`. Set AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 to run without it for diagnostics, or unset MCP_ENABLE_DELEGATE to run the queue-only subset.\n', ) process.exit(2) } sandboxClient = await loadSandboxClient(apiKey) - executor = await detectExecutor({ sandboxClient }) - if (fleetId) { - process.stderr.write(`agent-runtime-mcp: fleet-aware delegation: fleetId=${fleetId}\n`) - } - process.stderr.write(`agent-runtime-mcp: delegation placement → ${executor.describe()}\n`) } - // Export delegated-loop topology spans to the OTLP / Tangle Intelligence sink - // when OTEL_EXPORTER_OTLP_ENDPOINT is set (+ TRACE_ID / PARENT_SPAN_ID for - // correlation with the caller's trace). A cheap no-op when the endpoint is - // unset — the fleet forwards the env into this MCP's process to turn it on. - // The same context is stamped onto every delegation record (traceId / - // parentSpanId) so journal consumers join records into the caller's trace. + // The supervisor's loop topology spans export to the OTLP / Tangle Intelligence sink when + // OTEL_EXPORTER_OTLP_ENDPOINT is set (+ TRACE_ID / PARENT_SPAN_ID for correlation). The same + // context is stamped onto every delegation record so journal consumers join records into the + // caller's trace. const traceContext = readTraceContextFromEnv() - const { emitter: traceEmitter, exporter: traceExporter } = - createPropagatingTraceEmitter(traceContext) if (process.env.OTEL_EXPORTER_OTLP_ENDPOINT) { process.stderr.write( `agent-runtime-mcp: exporting loop topology → ${process.env.OTEL_EXPORTER_OTLP_ENDPOINT}\n`, ) } - const coderDelegate = - wantCoder && executor - ? detachedSessionDelegate({ - executor, - fanoutHarnesses, - maxConcurrency, - traceEmitter, - }) - : undefined - - const researcherSupport = - wantResearcher && executor - ? await loadResearcherSupport(executor, maxConcurrency, traceEmitter) - : undefined - - // Detached dispatch + resume is QUARANTINED behind MCP_ENABLE_DETACHED_RESUME (default off): the - // recursive Scope/worktree-CLI leaf has no durable detached-resume equivalent yet, so the - // sandbox-session resume path is kept but opt-in. It additionally requires the durable store and - // a session-backed placement with real credentials: in-process placement has no sandbox session - // to detach, and the diagnostic no-key stub cannot resolve boxes. - // AGENT_RUNTIME_DELEGATION_DETACHED=0 keeps everything on the streaming path even when enabled. - const detachedDispatch = - process.env.MCP_ENABLE_DETACHED_RESUME === '1' && - Boolean(process.env.AGENT_RUNTIME_DELEGATION_STATE_FILE?.trim()) && - process.env.AGENT_RUNTIME_DELEGATION_DETACHED !== '0' && - Boolean(process.env.TANGLE_API_KEY) && - (executor?.placement === 'sibling' || executor?.placement === 'fleet') - if (detachedDispatch) { - process.stderr.write( - 'agent-runtime-mcp: detached dispatch enabled — single-variant delegations resume across restarts\n', - ) - } - const resumeDriver = - detachedDispatch && sandboxClient - ? buildResumeDriver({ sandboxClient, researcherResume: researcherSupport?.resume }) - : undefined - // The ONE generic `delegate` verb — opt-in via MCP_ENABLE_DELEGATE=1. Its authoring supervisor // runs the brain on the router and spawns authored workers as sub-sandboxes through the SAME // client, so it needs the loaded `sandboxClient`. Gated on the client resolving (no key → no - // delegate, matching the coder/researcher fail-closed posture). + // delegate, fail-closed). const delegateSupervisor = wantDelegate && sandboxClient ? resolveDelegateSupervisor(sandboxClient) : undefined if (wantDelegate && delegateSupervisor) { process.stderr.write('agent-runtime-mcp: delegate enabled — generic authoring supervisor\n') } - const durableQueue = await buildDurableQueueFromEnv(resumeDriver, traceContext) + const durableQueue = await buildDurableQueueFromEnv(traceContext) const server = createMcpServer({ - coderDelegate, - researcherDelegate: researcherSupport?.delegate, ...(delegateSupervisor ? { delegateSupervisor } : {}), - detachedDispatch, traceContext, ...(durableQueue ? { queue: durableQueue } : {}), }) const shutdown = () => { server.stop() - const pending: Promise[] = [] - if (traceExporter) pending.push(traceExporter.shutdown()) // Drain journal writes so the state file reflects the final record // states before the process exits. A persist failure already routed // through onPersistError; swallow the duplicate rejection here. - if (durableQueue) pending.push(durableQueue.flush().catch(() => {})) - if (pending.length === 0) { - process.exit(0) + if (durableQueue) { + void durableQueue + .flush() + .catch(() => {}) + .finally(() => process.exit(0)) return } - void Promise.allSettled(pending).finally(() => process.exit(0)) + process.exit(0) } process.on('SIGINT', shutdown) process.on('SIGTERM', shutdown) @@ -247,7 +114,6 @@ async function main(): Promise { } async function buildDurableQueueFromEnv( - resumeDriver: DelegationResumeDriver | undefined, traceContext: TraceContext, ): Promise { const stateFile = process.env.AGENT_RUNTIME_DELEGATION_STATE_FILE?.trim() @@ -257,14 +123,9 @@ async function buildDurableQueueFromEnv( recoverCorrupt: process.env.AGENT_RUNTIME_DELEGATION_STATE_RECOVER === '1', }) const maxTerminalRecords = parseRetention(process.env.AGENT_RUNTIME_DELEGATION_RETAIN_TERMINAL) - // With a resume driver, restored in-flight records that carry a - // detachedSessionRef re-attach to their still-running sandbox sessions; - // without one (detached dispatch disabled / no credentials) they settle as - // failed with a truthful driver-restart error. const queue = await DelegationTaskQueue.restore({ store, traceContext, - ...(resumeDriver ? { resumeDelegate: resumeDriver } : {}), ...(maxTerminalRecords !== undefined ? { maxTerminalRecords } : {}), onPersistError: (error) => { // Durable mode that can no longer write is a broken contract: crash @@ -277,80 +138,6 @@ async function buildDurableQueueFromEnv( return queue } -interface ResearcherResumeSupport { - message(args: DelegateResearchArgs): string - settle( - turn: DetachedTurn, - args: DelegateResearchArgs, - signal: AbortSignal, - ): Promise -} - -/** - * Compose the `driveTurn`-backed resume driver over the real sandbox client. - * Profile dispatch: coder records settle through the same parse + validate - * gate the delegate applies; researcher records settle through the - * agent-knowledge preset when the peer is installed. Profiles without resume - * support (ui-auditor, researcher-without-peer) fail loud — the record settles - * as failed with the reason instead of fabricating an output. - */ -function buildResumeDriver(args: { - sandboxClient: SandboxClient - researcherResume: ResearcherResumeSupport | undefined -}): DelegationResumeDriver { - const client = args.sandboxClient as SandboxClient & { - get?: (id: string) => Promise - } - return createDetachedTurnResumeDriver({ - async resolveSandbox(sandboxId) { - if (typeof client.get !== 'function') { - throw new Error( - 'agent-runtime-mcp: the sandbox client exposes no get(sandboxId); upgrade @tangle-network/sandbox to >= 0.6 to resume detached delegations', - ) - } - const box = await client.get(sandboxId) - if (!box) { - throw new Error( - `agent-runtime-mcp: sandbox ${sandboxId} no longer exists — the detached run cannot be resumed`, - ) - } - return box as unknown as DriveTurnCapableBox - }, - buildMessage(record) { - if (record.profile === 'coder') { - const task = coderTaskFromArgs(record.args as DelegateCodeArgs) - return coderTaskToPrompt(task) - } - if (record.profile === 'researcher' && args.researcherResume) { - return args.researcherResume.message(record.args as DelegateResearchArgs) - } - throw new Error( - `agent-runtime-mcp: no detached resume support for profile "${record.profile}"`, - ) - }, - async settleOutput(turn, record, ctx) { - if (record.profile === 'coder') { - if (!record.detachedSessionRef) { - throw new Error( - `agent-runtime-mcp: record ${record.taskId} reached the resume settle without a detachedSessionRef`, - ) - } - return settleDetachedCoderTurn(turn, { - task: coderTaskFromArgs(record.args as DelegateCodeArgs), - sessionId: parseDetachedSessionRef(record.detachedSessionRef).sessionId, - signal: ctx.signal, - }) - } - if (record.profile === 'researcher' && args.researcherResume) { - return args.researcherResume.settle(turn, record.args as DelegateResearchArgs, ctx.signal) - } - throw new Error( - `agent-runtime-mcp: no detached resume support for profile "${record.profile}"`, - ) - }, - }) -} - function parseRetention(raw: string | undefined): number | undefined { if (raw === undefined || raw.trim() === '') return undefined const n = Number(raw) @@ -366,13 +153,13 @@ function parseRetention(raw: string | undefined): number | undefined { async function loadSandboxClient(apiKey: string | undefined): Promise { // Diagnostic mode: AGENT_RUNTIME_MCP_ALLOW_NO_KEY=1 enables tools/list + the // queue-bound tools (status / history / feedback) without sandbox creds. - // Coder + researcher delegations require a real client; the stub fails loud - // at create() so the agent observes the cause instead of silent success. + // `delegate` requires a real client; the stub fails loud at create() so the + // agent observes the cause instead of silent success. if (!apiKey) { return { async create() { throw new Error( - 'agent-runtime-mcp: TANGLE_API_KEY is unset; coder/researcher delegations are disabled in diagnostic mode. Set TANGLE_API_KEY or use MCP_DISABLE_CODER=1 MCP_DISABLE_RESEARCHER=1 to remove the unsupported tools from the tool list.', + 'agent-runtime-mcp: TANGLE_API_KEY is unset; `delegate` is disabled in diagnostic mode. Set TANGLE_API_KEY or unset MCP_ENABLE_DELEGATE to remove the unsupported tool from the tool list.', ) }, } satisfies SandboxClient @@ -400,246 +187,6 @@ async function loadSandboxClient(apiKey: string | undefined): Promise[0]['agentRun'] extends infer T ? NonNullable : never - output: Parameters[0]['output'] - validator: Parameters[0]['validator'] -} - -interface ResearcherFanoutPreset { - agentRuns: NonNullable[0]['agentRuns']> - output: Parameters[0]['output'] - validator: Parameters[0]['validator'] - driver: Parameters[0]['driver'] -} - -interface ResearcherSupport { - delegate: ResearcherDelegate - resume: ResearcherResumeSupport -} - -async function loadResearcherSupport( - executor: DelegationExecutor, - maxConcurrency: number, - traceEmitter?: LoopTraceEmitter, -): Promise { - const sandboxClient = executor.client - // Optional peer — when `@tangle-network/agent-knowledge` isn't installed, - // we silently omit the researcher tool from the advertisement. The - // dynamic-import path is resolved at runtime; TypeScript cannot see the - // peer, so we type the module structurally rather than via its own - // declaration file. - const profilesSpecifier = '@tangle-network/agent-knowledge/profiles' - const mod = await import(profilesSpecifier).catch(() => undefined) - if (!mod) return undefined - type SingleFactory = (opts: { - task: unknown - harness?: string - model?: string - }) => ResearcherProfilePreset - type FanoutFactory = (opts: { - task: unknown - harnesses?: string[] - models?: (string | undefined)[] - }) => ResearcherFanoutPreset - const fanoutFactory = (mod as { multiHarnessResearcherFanout?: FanoutFactory }) - .multiHarnessResearcherFanout - const singleFactory = (mod as { researcherProfile?: SingleFactory }).researcherProfile - if (!fanoutFactory || !singleFactory) return undefined - - // Worker harness + model + provider auth. Two reasons a researcher run otherwise makes - // zero LLM calls and "produces no winner" on a successful box: (1) the profile's default - // harness (opencode/zai-coding-plan/glm-5.1) is not broadly provisionable; (2) the - // sandbox SDK does not wire backend.model.apiKey into the in-box agent's OpenAI-compatible - // provider. resolveResearcherProvisioning picks a provisionable harness + model and the - // router creds (all env-overridable); applyRouterEnv injects them as box env. Applied to - // BOTH the single-variant path and every fanout agent-run so variants > 1 work too. - const { - harness, - model, - routerKey, - routerBaseUrl, - fanoutHarnesses: cfgFanoutHarnesses, - fanoutModels, - } = resolveResearcherProvisioning() - const buildPreset = (task: unknown): ResearcherProfilePreset => { - const preset = singleFactory({ task, harness, model }) - applyRouterEnv(preset.agentRunSpec as ProvisionableSpec, routerKey, routerBaseUrl) - return preset - } - - const settleSingle = async ( - turn: DetachedTurn, - args: DelegateResearchArgs, - sessionId: string, - signal: AbortSignal, - ): Promise => { - const task = buildResearchTask(args) - const preset = buildPreset(task) - if (!preset.validator) { - throw new Error('agent-runtime-mcp: researcher preset exposes no validator; cannot settle') - } - const parsed = preset.output.parse(detachedTurnEvents(sessionId, turn)) - const verdict = await preset.validator.validate(parsed, { iteration: 0, signal }) - if ((verdict as { valid?: boolean }).valid !== true) { - throw new Error('researcher delegate produced no winner') - } - return parsed as ResearchOutputShape - } - - const delegate: ResearcherDelegate = async (args, ctx) => { - const task = buildResearchTask(args) - const variants = Math.max(1, Math.trunc(args.variants ?? 1)) - const loopEmitter = composeLoopTraceEmitters(traceEmitter, ctx.traceEmitter) - ctx.report({ iteration: 0, phase: 'starting' }) - if (variants <= 1) { - const preset = buildPreset(task) - // Detached dispatch — same contract as the coder delegate: one session - // on one box, driveTurn ticks, resume key bound to the sandbox id. - if (ctx.detachedSessionRef !== undefined && ctx.updateDetachedSessionRef) { - const { sessionId } = parseDetachedSessionRef(ctx.detachedSessionRef) - const rebind = ctx.updateDetachedSessionRef - const spec = preset.agentRunSpec as AgentRunSpec - const turn = await runDetachedTurn({ - client: sandboxClient, - spec, - prompt: spec.taskToPrompt(task), - sessionId, - bindSandbox: (sandboxId) => rebind(formatDetachedSessionRef({ sandboxId, sessionId })), - signal: ctx.signal, - report: ctx.report, - ...(loopEmitter ? { traceEmitter: loopEmitter } : {}), - ...(executor.placement === 'fleet' ? { placement: 'fleet' as const } : {}), - }) - const output = await settleSingle(turn, args, sessionId, ctx.signal) - ctx.report({ iteration: 1, phase: 'completed' }) - return output - } - const result = await runLoop({ - driver: { - name: 'mcp-researcher-single', - async plan(t, history) { - return history.length === 0 ? [t] : [] - }, - decide(history) { - return history.length > 0 ? 'pick-winner' : 'fail' - }, - }, - agentRun: preset.agentRunSpec, - output: preset.output, - validator: preset.validator, - task, - ctx: { - sandboxClient, - signal: ctx.signal, - ...(loopEmitter ? { traceEmitter: loopEmitter } : {}), - }, - maxIterations: 1, - maxConcurrency, - }) - const output = result.winner?.output - if (!output) throw new Error('researcher delegate produced no winner') - ctx.report({ iteration: 1, phase: 'completed' }) - return output as ResearchOutputShape - } - // Match the single-variant fix: use a provisionable harness/model and inject router - // creds into every fanout agent-run, else variants > 1 makes zero LLM calls. Default to - // `variants` copies of the working harness; MCP_RESEARCHER_FANOUT_HARNESSES overrides for - // diversity (with optional per-harness MCP_RESEARCHER_FANOUT_MODELS). - const fanoutHarnesses = cfgFanoutHarnesses ?? Array.from({ length: variants }, () => harness) - const fanout = fanoutFactory({ - task, - harnesses: fanoutHarnesses, - models: fanoutHarnesses.map((_, i) => fanoutModels?.[i] ?? model), - }) - for (const spec of fanout.agentRuns) { - applyRouterEnv(spec as ProvisionableSpec, routerKey, routerBaseUrl) - } - // The harness list may be shorter than `variants` (misconfig) — never claim more - // iterations than there are runs. - const runs = fanout.agentRuns.slice(0, variants) - const effectiveVariants = Math.max(1, runs.length) - const result = await runLoop({ - driver: fanout.driver, - agentRuns: runs, - output: fanout.output, - validator: fanout.validator, - task, - ctx: { - sandboxClient, - signal: ctx.signal, - ...(loopEmitter ? { traceEmitter: loopEmitter } : {}), - }, - maxIterations: effectiveVariants, - maxConcurrency: Math.min(maxConcurrency, effectiveVariants), - }) - const output = result.winner?.output - if (!output) throw new Error('researcher delegate fanout produced no winner') - ctx.report({ iteration: result.iterations.length, phase: 'completed' }) - return output as ResearchOutputShape - } - - return { - delegate, - resume: { - message(args) { - const task = buildResearchTask(args) - // Use the same preset construction as dispatch so the displayed prompt can't drift. - const spec = buildPreset(task).agentRunSpec as AgentRunSpec - return spec.taskToPrompt(task) - }, - async settle(turn, args, signal) { - // The session id is only the synthesized event's id — the parser reads - // data.result / data.text, never the id. - return settleSingle(turn, args, 'resumed-detached-turn', signal) - }, - }, - } -} - -function buildResearchTask(args: DelegateResearchArgs): unknown { - return { - question: args.question, - knowledgeNamespace: args.namespace, - scope: args.scope, - sources: args.sources, - recencyWindow: args.config?.recencyWindow - ? { - since: args.config.recencyWindow.since - ? new Date(args.config.recencyWindow.since) - : undefined, - until: args.config.recencyWindow.until - ? new Date(args.config.recencyWindow.until) - : undefined, - } - : undefined, - maxItems: args.config?.maxItems, - minConfidence: args.config?.minConfidence, - } -} - -function parseHarnesses(raw: string | undefined): string[] | undefined { - if (!raw) return undefined - const list = raw - .split(',') - .map((entry) => entry.trim()) - .filter(Boolean) - return list.length > 0 ? list : undefined -} - -function parseFleetId(raw: string | undefined): string | undefined { - if (typeof raw !== 'string') return undefined - const trimmed = raw.trim() - return trimmed.length > 0 ? trimmed : undefined -} - -function parseConcurrency(raw: string | undefined): number { - if (!raw) return 4 - const n = Number(raw) - if (!Number.isFinite(n) || n < 1) return 4 - return Math.min(Math.trunc(n), 32) -} - main().catch((err) => { process.stderr.write(`agent-runtime-mcp: ${err instanceof Error ? err.stack : String(err)}\n`) process.exit(1) diff --git a/src/mcp/delegates.ts b/src/mcp/delegates.ts index a7914a18..2f768460 100644 --- a/src/mcp/delegates.ts +++ b/src/mcp/delegates.ts @@ -1,31 +1,24 @@ /** * @experimental * - * Delegate factories — the layer between MCP tool handlers and the - * underlying `runLoop` runners. + * `detachedSessionDelegate` — the sandbox-session coder delegate: a closure that drives `runLoop` + * against a `SandboxClient` + a caller-supplied (or minimal model-only default) worker profile, to a + * mechanically-validated `CoderOutput`. The caller invokes the returned delegate directly with its + * coder args; when wired into a durable queue it also settles cross-restart-resumed records. * - * Delegation vs COORDINATION (`../runtime/supervise/coordination-mcp.ts`): delegation runs a coding - * task INSIDE the agent's OWN sandbox environment — a sibling box on its own `SandboxClient`, fresh - * branch on its repo — as a durable, fire-and-poll job that survives an MCP restart. It is NOT - * backend-pluggable. To instead SPAWN + live-drive workers in a CHOSEN backend (sandbox OR cli-bridge, - * via `createExecutor({ backend })`) with observe/steer/resume + recursion, use the coordination MCP. + * Delegation vs COORDINATION (`../runtime/supervise/coordination-mcp.ts`): this delegate runs a + * coding task INSIDE the agent's OWN sandbox environment — a sibling box on its own `SandboxClient`, + * fresh branch on its repo. It is NOT backend-pluggable. To instead SPAWN + live-drive workers in a + * CHOSEN backend (sandbox OR cli-bridge, via `createExecutor({ backend })`) with observe/steer/resume + * + recursion, use `delegate()` / the coordination MCP. * - * The MCP server is profile-agnostic: it owns the task queue + feedback - * store + transport. Each `*Delegate` is the closure that the queue - * invokes when a task runs. Consumers can override either delegate to - * inject custom drivers, mocks, fleet-aware dispatchers, etc. - * - * The `detachedSessionDelegate` here is the built-in SANDBOX-SESSION coder path — the live default - * `delegate_code` delegate: workers run the in-box harness over a `SandboxClient`. By default it - * holds the stream; single-variant turns can OPTIONALLY dispatch DETACHED (`driveTurn` ticks) so a - * durable queue resumes them across an MCP restart — that resume tick is the only part gated behind - * `MCP_ENABLE_DETACHED_RESUME` (default off) in `bin.ts`, a capability the recursive - * `Scope`/worktree-CLI leaf has no durable equivalent for yet. For NEW local-repo coding use - * `worktreeFanout` / `worktreeLoopRunner`. The default researcher delegate is **not** wired in this - * file — `agent-knowledge` cannot be imported from `agent-runtime` without inducing a cycle. - * Consumers pass `researcherDelegate` explicitly. + * The worker profile is a parameter the caller supplies (§1.5: the system authors profiles). When + * none is passed, a minimal model-only default profile is materialized in `./detached-coder` — no + * hardcoded skills or tools. For NEW local-repo coding use `worktreeFanout` / `worktreeLoopRunner` + * (author one `AgentProfile` per harness → `createWorktreeCliExecutor` leaves → `gateOnDeliverable`). */ +import type { AgentProfile } from '@tangle-network/agent-interface' import type { CoderTask } from '../profiles/coder' import type { AgentRunSpec, @@ -54,10 +47,8 @@ import { import { createSiblingSandboxExecutor, type DelegationExecutor } from './executor' import type { DelegateCodeArgs, - DelegateResearchArgs, DelegateUiAuditArgs, DelegationProgress, - ResearchOutputShape, UiAuditorDelegationOutput, } from './types' @@ -83,16 +74,11 @@ export interface DelegateRunCtx { traceEmitter?: LoopTraceEmitter } -/** @experimental The server's coder-profile delegate slot — the closure the queue invokes for a - * `delegate_code` task. `detachedSessionDelegate` is the built-in implementation. */ +/** @experimental The coder delegate closure — given the coder args + run context, drives the + * sandbox-session coder path to a validated `CoderOutput`. `detachedSessionDelegate` is the + * built-in implementation; the queue invokes one of these per coder delegation. */ export type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise -/** @experimental */ -export type ResearcherDelegate = ( - args: DelegateResearchArgs, - ctx: DelegateRunCtx, -) => Promise - /** * UI-auditor delegate — fully consumer-injected. agent-runtime ships no * default factory because the inputs are workspace path + judge function @@ -160,14 +146,22 @@ export interface DetachedSessionDelegateOptions { * `executor: createSiblingSandboxExecutor({ client: sandboxClient })`. */ sandboxClient?: SandboxClient - /** Backend harness for the single-coder path. Default comes from `coderProfile`. */ + /** + * The worker's authored `AgentProfile` (§1.5: the system authors profiles). Spread onto the + * sandbox-session run spec → `runLoop` → the executor's `harnessInvocation`, so the harness runs + * under the caller's stance. Omit to use a minimal model-only default (no hardcoded skills/tools); + * `harness` / `model` / `systemPrompt` below are convenience overrides layered onto whichever + * profile is used. + */ + workerProfile?: AgentProfile + /** Backend harness for the single-coder path (sets `metadata.backendType`). Default `claude-code`. */ harness?: string /** Model override for the single-coder path. */ model?: string /** - * The worker's authored system prompt (§1.5). Flows onto `coderProfile`'s + * The worker's authored system prompt (§1.5). Flows onto the run spec's * `profile.prompt.systemPrompt` → through `runLoop` → the executor's `harnessInvocation`, so the - * harness runs under this stance, not just the default coder prompt. Omit to keep the default. + * harness runs under this stance. Omit to keep the profile's own prompt. */ systemPrompt?: string /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */ @@ -233,6 +227,7 @@ export function detachedSessionDelegate(options: DetachedSessionDelegateOptions) ctx.report({ iteration: 0, phase: 'starting' }) if (variants <= 1) { const agentRunSpec = coderRunSpec({ + ...(options.workerProfile ? { profile: options.workerProfile } : {}), ...(options.harness ? { harness: options.harness } : {}), ...(options.model ? { model: options.model } : {}), ...(options.systemPrompt ? { systemPrompt: options.systemPrompt } : {}), @@ -301,6 +296,7 @@ export function detachedSessionDelegate(options: DetachedSessionDelegateOptions) return chosen } const fanout = multiHarnessCoderFanout({ + ...(options.workerProfile ? { profile: options.workerProfile } : {}), ...(fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : {}), diff --git a/src/mcp/delegation-profile.ts b/src/mcp/delegation-profile.ts deleted file mode 100644 index 204b69e7..00000000 --- a/src/mcp/delegation-profile.ts +++ /dev/null @@ -1,228 +0,0 @@ -/** - * Production-profile composition for the agent-runtime delegation MCP. - * - * A product agent's sandbox loads the delegation tools (`delegate_code`, - * `delegate_research`, `delegate_feedback`, `delegation_status`, - * `delegation_history`) by mounting the `agent-runtime-mcp` stdio server as - * an MCP entry in its `AgentProfile`. This module is the single composer for - * that wiring, so every consumer — the fleet agents and agent-builder's - * generated agents — shares one implementation instead of copying it. - * - * The load-bearing invariant: the delegation MCP entry is only ever emitted - * when a sandbox API key is present. Without the key the kernel's - * coder/researcher delegate cannot construct an authenticated sandbox client, - * so we omit the entry rather than ship an MCP child that fails to - * authenticate on startup. No static profile entry, ever. - */ - -import type { - AgentProfile, - AgentProfileFileMount, - AgentProfileMcpServer, - AgentSubagentProfile, -} from '@tangle-network/sandbox' - -/** One hook command entry. The SDK declares `AgentProfile.hooks` as - * `Record` but does not re-export the element - * type from the package entry, so derive it from `AgentProfile` by indexed - * access — the single source of truth, no drift from the SDK shape. */ -type AgentProfileHookCommand = NonNullable[string][number] - -/** MCP server key under which the agent-runtime delegation tools mount. */ -export const DELEGATION_MCP_SERVER_KEY = 'agent-runtime-delegation' - -/** - * Env vars forwarded into the delegation MCP child so its delegated - * build/research loops export topology spans to the configured OTLP / - * Tangle Intelligence sink. Each is forwarded only when present, so the - * child is a no-op exporter until `OTEL_EXPORTER_OTLP_ENDPOINT` is set in the - * parent env — never a hardcoded endpoint. - */ -const OTEL_FORWARD_KEYS = [ - 'OTEL_EXPORTER_OTLP_ENDPOINT', - 'OTEL_EXPORTER_OTLP_HEADERS', - 'TRACE_ID', - 'PARENT_SPAN_ID', -] as const - -export const DEFAULT_SANDBOX_BASE_URL = 'https://sandbox.tangle.tools' - -export interface BuildDelegationMcpServerOptions { - /** Sandbox API key forwarded as `TANGLE_API_KEY` to the MCP child. The - * agent-runtime MCP bin reads `TANGLE_API_KEY` and passes it straight to - * `new Sandbox({ apiKey })`. Defaults to `env.TANGLE_API_KEY`. */ - sandboxApiKey?: string - /** Sandbox base URL forwarded as `SANDBOX_BASE_URL`. Defaults to - * `env.SANDBOX_BASE_URL`, then `env.SANDBOX_API_URL`, then the public - * sandbox endpoint. */ - sandboxBaseUrl?: string - /** Environment source for key + OTEL resolution. Defaults to `process.env`; - * injectable for tests and non-process callers. */ - env?: Record -} - -/** - * Build the delegation MCP entry the sandbox-side agent loads on startup. - * Returns `undefined` when no sandbox API key is resolvable — callers merge - * the result into a profile's `mcp` map only when defined. - */ -export function buildDelegationMcpServer( - options: BuildDelegationMcpServerOptions = {}, -): Record | undefined { - const env = options.env ?? process.env - const sandboxApiKey = options.sandboxApiKey ?? env.TANGLE_API_KEY - if (!sandboxApiKey) return undefined - const baseUrl = - options.sandboxBaseUrl ?? - env.SANDBOX_BASE_URL ?? - env.SANDBOX_API_URL ?? - DEFAULT_SANDBOX_BASE_URL - - const otelEnv: Record = {} - for (const key of OTEL_FORWARD_KEYS) { - const value = env[key] - if (value) otelEnv[key] = value - } - - return { - [DELEGATION_MCP_SERVER_KEY]: { - transport: 'stdio', - command: 'npx', - args: ['-y', '@tangle-network/agent-runtime', 'mcp'], - env: { - TANGLE_API_KEY: sandboxApiKey, - SANDBOX_BASE_URL: baseUrl, - ...otelEnv, - }, - enabled: true, - metadata: { - surface: 'delegation:dispatch', - tools: [ - 'delegate_code', - 'delegate_research', - 'delegate_feedback', - 'delegation_status', - 'delegation_history', - ], - }, - }, - } -} - -export interface ComposeProductionAgentProfileOptions { - /** Sandbox API key forwarded to the delegation MCP child. Defaults to - * `env.TANGLE_API_KEY`. When unset, the delegation MCP entry is omitted. */ - sandboxApiKey?: string - /** Sandbox base URL forwarded as `SANDBOX_BASE_URL` to the MCP child. */ - sandboxBaseUrl?: string - /** Replace the base profile's system prompt. Used by per-turn calls that - * swap in workspace-augmented prompts (board summary, learned style). */ - systemPrompt?: string - /** Extra file mounts layered after the base profile's `resources.files`. */ - extraFiles?: AgentProfileFileMount[] - /** Override the profile `name`. Defaults to the base profile's name. */ - name?: string - /** Environment source for key + OTEL resolution. Defaults to `process.env`. */ - env?: Record - /** Box built-in tool ON/OFF flags merged over the base profile's `tools` - * (overlay wins per key). The sandbox-seam mapping of a certified surface's - * tool grants — `AgentProfile.tools` is `Record` box flags, - * so it carries grants, not arbitrary tool defs. */ - tools?: Record - /** Per-event hook commands merged over the base profile's `hooks`. An event - * present in both has the extra commands appended after the base ones. */ - hooks?: Record - /** Subagent definitions merged over the base profile's `subagents` (overlay - * wins per key). */ - subagents?: Record - /** Resolved certified MCP connections injected into `AgentProfile.mcp` — the - * sandbox-seam delivery of a `ResolvedSurface.mcpConnections`. Merged after - * the base map and before the delegation entry, so a base/delegation key is - * never silently shadowed by an injected one. */ - mcpConnections?: Record -} - -/** - * Compose the production `AgentProfile`: the canonical base profile with the - * delegation MCP merged into `mcp`. Used by every call site that boots a - * sandbox or runs a chat turn through the sandbox path, and by eval wiring so - * the scorecard profile hash reflects the actual production profile. - * - * Merge rules: - * - `mcp`: base map preserved; `options.mcpConnections` (resolved certified - * servers) merged over it; the delegation entry is appended last under - * {@link DELEGATION_MCP_SERVER_KEY}, and omitted entirely when no sandbox - * API key resolves. - * - `tools`: base box-flags map preserved; `options.tools` overlaid per key. - * - `hooks`: per event, base commands preserved; `options.hooks[event]` - * appended after the base ones. - * - `subagents`: base map preserved; `options.subagents` overlaid per key. - * - `prompt.systemPrompt`: replaced when `options.systemPrompt` is set. - * - `resources.files`: `options.extraFiles` concatenated after base files. - * - `name`: replaced when `options.name` is set. - */ -export function composeProductionAgentProfile( - baseProfile: AgentProfile, - options: ComposeProductionAgentProfileOptions = {}, -): AgentProfile { - const delegationMcp = buildDelegationMcpServer({ - sandboxApiKey: options.sandboxApiKey, - sandboxBaseUrl: options.sandboxBaseUrl, - env: options.env, - }) - - const baseMcp = baseProfile.mcp ?? {} - const withInjected: Record = options.mcpConnections - ? { ...baseMcp, ...options.mcpConnections } - : { ...baseMcp } - const mergedMcp: Record = delegationMcp - ? { ...withInjected, ...delegationMcp } - : withInjected - - const baseFiles = baseProfile.resources?.files ?? [] - const mergedFiles: AgentProfileFileMount[] = options.extraFiles?.length - ? [...baseFiles, ...options.extraFiles] - : [...baseFiles] - - const prompt = options.systemPrompt - ? { ...baseProfile.prompt, systemPrompt: options.systemPrompt } - : baseProfile.prompt - - const mergedTools = options.tools - ? { ...(baseProfile.tools ?? {}), ...options.tools } - : baseProfile.tools - - const mergedHooks = mergeHooks(baseProfile.hooks, options.hooks) - - const mergedSubagents = options.subagents - ? { ...(baseProfile.subagents ?? {}), ...options.subagents } - : baseProfile.subagents - - return { - ...baseProfile, - name: options.name ?? baseProfile.name, - prompt, - ...(mergedTools ? { tools: mergedTools } : {}), - ...(mergedHooks ? { hooks: mergedHooks } : {}), - ...(mergedSubagents ? { subagents: mergedSubagents } : {}), - mcp: mergedMcp, - resources: { - ...baseProfile.resources, - files: mergedFiles, - }, - } -} - -/** Merge per-event hook command lists: base commands first, overlay commands - * appended after. Returns the base map unchanged when no overlay is given. */ -function mergeHooks( - base: Record | undefined, - overlay: Record | undefined, -): Record | undefined { - if (!overlay) return base - const merged: Record = { ...(base ?? {}) } - for (const [event, commands] of Object.entries(overlay)) { - merged[event] = [...(merged[event] ?? []), ...commands] - } - return merged -} diff --git a/src/mcp/detached-coder.ts b/src/mcp/detached-coder.ts index eb03ba64..0025f8b8 100644 --- a/src/mcp/detached-coder.ts +++ b/src/mcp/detached-coder.ts @@ -2,14 +2,12 @@ * @experimental * * Sandbox-session coder decode layer. The sandbox-session delegate (`./delegates`) and the - * cross-restart resume driver (`./bin`) run the in-box harness over a `SandboxClient` and need to - * (a) build an `AgentRunSpec` from the authored coder profile, (b) decode the harness event stream - * into a structured `CoderOutput`, and (c) gate it with the shared mechanical checks. This is the - * MCP server's built-in `delegate_code` path — it is the live default delegate, NOT dormant — and is - * kept separate from the generic recursive path: `worktreeFanout` instead settles the raw - * `WorktreePatchArtifact` and gates via `patchDelivered`. Only the OPTIONAL cross-restart resume - * (the `driveTurn` tick) is opt-in (`MCP_ENABLE_DETACHED_RESUME`); the held-stream delegate is - * always live. Prefer `worktreeFanout` / `worktreeLoopRunner` for NEW local-repo coding. + * cross-restart resume driver run the in-box harness over a `SandboxClient` and need to + * (a) build an `AgentRunSpec` from the caller-authored (or minimal model-only default) worker + * profile, (b) decode the harness event stream into a structured `CoderOutput`, and (c) gate it with + * the shared mechanical checks. This sandbox-session path is kept separate from the generic recursive + * path: `worktreeFanout` instead settles the raw `WorktreePatchArtifact` and gates via + * `patchDelivered`. Prefer `worktreeFanout` / `worktreeLoopRunner` for NEW local-repo coding. * * The decode tolerates two `result`-event shapes: * 1. the in-process executor's raw worktree-harness result (`{ branch, patch, stats, checks }`), @@ -20,12 +18,22 @@ import type { AgentProfile } from '@tangle-network/agent-interface' import type { SandboxEvent } from '@tangle-network/sandbox' -import { type CoderTask, coderProfile, coderTaskToPrompt } from '../profiles/coder' +import { type CoderTask, coderTaskToPrompt } from '../profiles/coder' import { type CoderCheckConstraints, runCoderChecks } from '../runtime/supervise/patch-checks' import type { AgentRunSpec, Driver, OutputAdapter, Validator } from '../runtime/types' const DEFAULT_MAX_DIFF_LINES = 400 +/** + * The minimal default worker profile (§1.5: the system authors profiles — there is no hardcoded + * coder). Model-only by construction: no skills, no tool grants, no standing prompt. Callers that + * want a richer worker pass their own `AgentProfile` via `CoderRunSpecOptions.profile`. `harness` / + * `model` / `systemPrompt` are layered onto whichever profile is used. + */ +function minimalCoderProfile(): AgentProfile { + return { name: 'coder' } +} + /** @experimental The structured coder result the sandbox-session path decodes + gates. */ export interface CoderOutput { /** Branch the agent wrote the patch on. */ @@ -41,27 +49,33 @@ export interface CoderOutput { /** @experimental Overrides for one authored coder run on the sandbox-session path. */ export interface CoderRunSpecOptions { + /** + * The caller-authored worker `AgentProfile` (§1.5). When omitted, a minimal model-only default is + * used (no hardcoded skills/tools/prompt). `harness` / `model` / `systemPrompt` are layered onto it. + */ + profile?: AgentProfile /** Sandbox-SDK backend.type. Default `'claude-code'`. */ harness?: string /** Default model id passed in `AgentProfile.model.default`. */ model?: string - /** Custom system prompt replacement. Default = the `coderProfile` constant's prompt. */ + /** Custom system prompt replacement. Default = the supplied profile's own prompt (or none). */ systemPrompt?: string /** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */ name?: string } -/** Build the authored `AgentProfile` for one harness on the sandbox-session path, applying the - * optional per-run overrides over the `coderProfile` constant. */ +/** Build the authored `AgentProfile` for one harness on the sandbox-session path: the caller's + * profile (or the minimal model-only default), with the per-run harness/model/prompt overrides. */ function coderRunProfile(options: CoderRunSpecOptions): AgentProfile { const harness = options.harness ?? 'claude-code' const name = options.name ?? `coder-${harness}` + const base = options.profile ?? minimalCoderProfile() return { - ...coderProfile, + ...base, name, ...(options.systemPrompt ? { prompt: { systemPrompt: options.systemPrompt } } : {}), - model: options.model ? { default: options.model } : undefined, - metadata: { ...coderProfile.metadata, backendType: harness }, + model: options.model ? { default: options.model } : base.model, + metadata: { ...base.metadata, backendType: harness }, } } @@ -79,6 +93,11 @@ export const coderOutputAdapter: OutputAdapter = { parse: parseCode /** @experimental */ export interface MultiHarnessCoderFanoutOptions { + /** + * The caller-authored worker `AgentProfile` (§1.5), shared across every parallel harness. When + * omitted, the minimal model-only default is used. + */ + profile?: AgentProfile /** * Sandbox-SDK backend.type identifiers, one per parallel agent. Default: * `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`. @@ -105,7 +124,13 @@ export function multiHarnessCoderFanout(options: MultiHarnessCoderFanoutOptions ? options.harnesses : ['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1'] const models = options.models ?? [] - const agentRuns = harnesses.map((harness, i) => coderRunSpec({ harness, model: models[i] })) + const agentRuns = harnesses.map((harness, i) => + coderRunSpec({ + ...(options.profile ? { profile: options.profile } : {}), + harness, + model: models[i], + }), + ) const driver: Driver = { name: 'fanout', plan: async (task, history) => (history.length === 0 ? agentRuns.map(() => task) : []), diff --git a/src/mcp/index.ts b/src/mcp/index.ts index 8e36a43c..6f22d2b5 100644 --- a/src/mcp/index.ts +++ b/src/mcp/index.ts @@ -4,15 +4,13 @@ * `@tangle-network/agent-runtime/mcp` — Stdio MCP server exposing the * delegation tools to sandbox coding-harness agents: the generic `delegate` * (one intent → a supervisor that authors + drives its own worker, returns the - * delivered output with its cost), plus the task-specific `delegate_code`, - * `delegate_research`, `delegate_feedback`, `delegation_status`, and - * `delegation_history`. + * delivered output with its cost), plus the queue-bound `delegate_feedback`, + * `delegation_status`, and `delegation_history`. `delegate_ui_audit` is served + * when a `uiAuditorDelegate` is wired. * * Mount the server inside a product agent's sandbox via * `agent-runtime-mcp` (the bin) or wire it into a custom Node entry - * point with `createMcpServer({ ... })`. Pass `coderDelegate` / - * `researcherDelegate` factories you build from your project's - * sandbox client + run-loop topology. + * point with `createMcpServer({ ... })`. */ export type { DetectExecutorArgs } from './bin-helpers' @@ -24,7 +22,6 @@ export type { DelegateRunCtx, DetachedSessionDelegateOptions, DetachedWinnerSelection, - ResearcherDelegate, SettleDetachedCoderTurnOptions, UiAuditorDelegate, } from './delegates' @@ -33,15 +30,6 @@ export { detachedSessionDelegate, settleDetachedCoderTurn, } from './delegates' -export type { - BuildDelegationMcpServerOptions, - ComposeProductionAgentProfileOptions, -} from './delegation-profile' -export { - buildDelegationMcpServer, - composeProductionAgentProfile, - DELEGATION_MCP_SERVER_KEY, -} from './delegation-profile' export type { DelegationStore, FileDelegationStoreOptions } from './delegation-store' export { DelegationPersistenceError, @@ -155,13 +143,6 @@ export { type DelegateResult, validateDelegateArgs, } from './tools/delegate' -export { - createDelegateCodeHandler, - DELEGATE_CODE_DESCRIPTION, - DELEGATE_CODE_INPUT_SCHEMA, - DELEGATE_CODE_TOOL_NAME, - validateDelegateCodeArgs, -} from './tools/delegate-code' export { createDelegateFeedbackHandler, DELEGATE_FEEDBACK_DESCRIPTION, @@ -169,13 +150,6 @@ export { DELEGATE_FEEDBACK_TOOL_NAME, validateDelegateFeedbackArgs, } from './tools/delegate-feedback' -export { - createDelegateResearchHandler, - DELEGATE_RESEARCH_DESCRIPTION, - DELEGATE_RESEARCH_INPUT_SCHEMA, - DELEGATE_RESEARCH_TOOL_NAME, - validateDelegateResearchArgs, -} from './tools/delegate-research' export { createDelegateUiAuditHandler, DELEGATE_UI_AUDIT_DESCRIPTION, diff --git a/src/mcp/openai-tools.ts b/src/mcp/openai-tools.ts index d7bcfac4..da7374d9 100644 --- a/src/mcp/openai-tools.ts +++ b/src/mcp/openai-tools.ts @@ -1,16 +1,15 @@ /** * @experimental * - * OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP - * delegation tools. + * OpenAI Chat Completions `tools[]` projection of the queue-bound agent-runtime + * MCP delegation tools. * * Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the - * model can call `delegate_code`, `delegate_research`, `delegate_feedback`, - * `delegation_status`, and `delegation_history` through the OpenAI-compat - * transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime - * surfaces tool calls as `tool_call` stream events — execution is the - * caller's responsibility (typically the parent sandbox runtime's MCP - * mount). + * model can call `delegate_feedback`, `delegation_status`, and + * `delegation_history` through the OpenAI-compat transport (tcloud, OpenRouter, + * OpenAI direct, cli-bridge). The runtime surfaces tool calls as `tool_call` + * stream events — execution is the caller's responsibility (typically the + * parent sandbox runtime's MCP mount). * * Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts * MCP servers natively and the in-sandbox harness discovers tools via the @@ -22,21 +21,11 @@ */ import type { OpenAIChatTool } from '../types' -import { - DELEGATE_CODE_DESCRIPTION, - DELEGATE_CODE_INPUT_SCHEMA, - DELEGATE_CODE_TOOL_NAME, -} from './tools/delegate-code' import { DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, } from './tools/delegate-feedback' -import { - DELEGATE_RESEARCH_DESCRIPTION, - DELEGATE_RESEARCH_INPUT_SCHEMA, - DELEGATE_RESEARCH_TOOL_NAME, -} from './tools/delegate-research' import { DELEGATION_HISTORY_DESCRIPTION, DELEGATION_HISTORY_INPUT_SCHEMA, @@ -66,23 +55,12 @@ function buildTool( /** * @experimental * - * Returns the 5 delegation tools projected into OpenAI Chat Completions - * `tools[]` shape. The order is stable: `delegate_code`, - * `delegate_research`, `delegate_feedback`, `delegation_status`, - * `delegation_history`. + * Returns the queue-bound delegation tools projected into OpenAI Chat + * Completions `tools[]` shape. The order is stable: `delegate_feedback`, + * `delegation_status`, `delegation_history`. */ export function mcpToolsForRuntimeMcp(): OpenAIChatTool[] { return [ - buildTool( - DELEGATE_CODE_TOOL_NAME, - DELEGATE_CODE_DESCRIPTION, - DELEGATE_CODE_INPUT_SCHEMA as Readonly>, - ), - buildTool( - DELEGATE_RESEARCH_TOOL_NAME, - DELEGATE_RESEARCH_DESCRIPTION, - DELEGATE_RESEARCH_INPUT_SCHEMA as Readonly>, - ), buildTool( DELEGATE_FEEDBACK_TOOL_NAME, DELEGATE_FEEDBACK_DESCRIPTION, diff --git a/src/mcp/server.ts b/src/mcp/server.ts index dbd60f4c..1f63fa97 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -1,14 +1,17 @@ /** * @experimental * - * Stdio JSON-RPC MCP server exposing the 5 delegation tools to sandbox - * coding-harness agents (claude-code, codex, opencode, ...). + * Stdio JSON-RPC MCP server exposing the delegation tools to sandbox + * coding-harness agents (claude-code, codex, opencode, ...): the generic + * `delegate` verb plus the queue-bound `delegate_feedback`, + * `delegation_status`, and `delegation_history`. `delegate_ui_audit` is served + * when a `uiAuditorDelegate` is wired. * * The server is transport-bound but topology-free: tool execution is * delegated to handler functions composed from a queue, a feedback - * store, and per-profile run delegates. Consumers wire those at - * construction time. The `agent-runtime-mcp` bin spins up a default - * configuration for the common case (real sandbox client + coder). + * store, and the wired run delegates. Consumers wire those at + * construction time. The `agent-runtime-mcp` bin serves the generic + * `delegate` verb over a real sandbox client when `MCP_ENABLE_DELEGATE=1`. * * Wire protocol: line-delimited JSON-RPC 2.0 over stdio. Each line is * one request; each response is one line. `tools/list` and `tools/call` @@ -19,7 +22,7 @@ import { createInterface, type Interface as ReadlineInterface } from 'node:readline' import { Readable, Writable } from 'node:stream' import { ValidationError } from '../errors' -import type { CoderDelegate, ResearcherDelegate, UiAuditorDelegate } from './delegates' +import type { UiAuditorDelegate } from './delegates' import { type FeedbackStore, InMemoryFeedbackStore } from './feedback-store' import { DelegationTaskQueue } from './task-queue' import { @@ -29,24 +32,12 @@ import { DELEGATE_TOOL_NAME, type DelegateHandlerOptions, } from './tools/delegate' -import { - createDelegateCodeHandler, - DELEGATE_CODE_DESCRIPTION, - DELEGATE_CODE_INPUT_SCHEMA, - DELEGATE_CODE_TOOL_NAME, -} from './tools/delegate-code' import { createDelegateFeedbackHandler, DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, } from './tools/delegate-feedback' -import { - createDelegateResearchHandler, - DELEGATE_RESEARCH_DESCRIPTION, - DELEGATE_RESEARCH_INPUT_SCHEMA, - DELEGATE_RESEARCH_TOOL_NAME, -} from './tools/delegate-research' import { createDelegateUiAuditHandler, DELEGATE_UI_AUDIT_DESCRIPTION, @@ -76,15 +67,6 @@ export interface McpServerOptions { * the agent's intent, so there is no worker profile to wire here. */ delegateSupervisor?: DelegateHandlerOptions - /** Required to enable delegate_code. */ - coderDelegate?: CoderDelegate - /** - * Required to enable delegate_research. The substrate cannot ship a - * default — wire one that closes over your `runLoop` + a - * researcher profile (typically `@tangle-network/agent-knowledge`'s - * `researcherProfile` / `multiHarnessResearcherFanout`). - */ - researcherDelegate?: ResearcherDelegate /** * Required to enable delegate_ui_audit. Wire one that closes over your * `runLoop` + `uiAuditorProfile` + a `SandboxClient` (the @@ -96,15 +78,6 @@ export interface McpServerOptions { feedbackStore?: FeedbackStore /** Override the default in-memory task queue. */ queue?: DelegationTaskQueue - /** - * Record deterministic detached-session resume keys on single-variant - * coder/researcher submissions so a durable queue can resume them after a - * restart. Enable only when the wired delegates dispatch via sandbox - * sessions (`driveTurn`) AND `queue` persists records — the keys are inert - * otherwise. The bin turns this on alongside the durable store for - * session-backed (sibling/fleet) placements. - */ - detachedDispatch?: boolean /** * Extra tools to serve alongside the delegation tools, for example * `createCoordinationTools(...).tools`. Registered after the built-ins; a @@ -194,34 +167,6 @@ export function createMcpServer(options: McpServerOptions = {}): McpServer { handler: createDelegateHandler(options.delegateSupervisor), }) } - if (options.coderDelegate) { - tools.set(DELEGATE_CODE_TOOL_NAME, { - name: DELEGATE_CODE_TOOL_NAME, - description: DELEGATE_CODE_DESCRIPTION, - inputSchema: DELEGATE_CODE_INPUT_SCHEMA as unknown as Record, - handler: createDelegateCodeHandler({ - queue, - delegate: options.coderDelegate, - ...(options.detachedDispatch !== undefined - ? { detachedDispatch: options.detachedDispatch } - : {}), - }), - }) - } - if (options.researcherDelegate) { - tools.set(DELEGATE_RESEARCH_TOOL_NAME, { - name: DELEGATE_RESEARCH_TOOL_NAME, - description: DELEGATE_RESEARCH_DESCRIPTION, - inputSchema: DELEGATE_RESEARCH_INPUT_SCHEMA as unknown as Record, - handler: createDelegateResearchHandler({ - queue, - delegate: options.researcherDelegate, - ...(options.detachedDispatch !== undefined - ? { detachedDispatch: options.detachedDispatch } - : {}), - }), - }) - } if (options.uiAuditorDelegate) { tools.set(DELEGATE_UI_AUDIT_TOOL_NAME, { name: DELEGATE_UI_AUDIT_TOOL_NAME, diff --git a/src/mcp/tools/delegate-code.ts b/src/mcp/tools/delegate-code.ts deleted file mode 100644 index a1b5460a..00000000 --- a/src/mcp/tools/delegate-code.ts +++ /dev/null @@ -1,218 +0,0 @@ -/** - * @experimental - * - * `delegate_code` MCP tool — async kickoff. The handler validates the - * input, computes an idempotency key over the canonical fields, hands - * the task to the queue, and returns `{ taskId, estimatedDurationMs }`. - */ - -import type { CoderDelegate } from '../delegates' -import { formatDetachedSessionRef } from '../detached-turn' -import { - type DelegateCodeArgs, - type DelegateCodeResult, - type DelegationTaskQueue, - hashIdempotencyInput, -} from '../task-queue' - -/** @experimental */ -export const DELEGATE_CODE_TOOL_NAME = 'delegate_code' - -/** @experimental */ -export const DELEGATE_CODE_DESCRIPTION = [ - 'Delegate a coding task to specialist coder agents that produce a validated patch.', - '', - 'Use when: you need code written, fixed, refactored, or extended to satisfy a', - 'user goal that touches a real repository. The coder runs in an isolated', - 'sandbox, opens a fresh branch, keeps the diff minimal, runs the supplied', - 'test + typecheck commands, and emits a unified-diff patch.', - '', - 'Returns immediately with a taskId. Poll delegation_status to retrieve the', - 'patch + validator verdict (typically minutes-to-hours, longer for large', - 'changes). Identical inputs return the same taskId — safe to retry.', - '', - 'When variants > 1, multiple coder harnesses (claude-code, codex, opencode)', - 'attempt the task in parallel and the highest-scoring patch wins (smallest', - 'passing diff). Use variants for high-stakes changes; single variant for', - 'routine ones.', - '', - 'Capability scope: the coder cannot modify paths outside repoRoot and cannot', - 'touch paths in config.forbiddenPaths. The validator hard-fails on a', - 'forbidden-path violation, diff above config.maxDiffLines, test failure, or', - 'typecheck failure — none of those make it past the gate.', -].join('\n') - -/** @experimental */ -export const DELEGATE_CODE_INPUT_SCHEMA = { - type: 'object', - properties: { - goal: { - type: 'string', - description: 'Natural-language description of what the coder must accomplish.', - }, - repoRoot: { - type: 'string', - description: 'Absolute path inside the sandbox where the repo lives.', - }, - contextHint: { - type: 'string', - description: 'Optional free-form context the coder sees in the prompt prelude.', - }, - variants: { - type: 'integer', - minimum: 1, - maximum: 8, - description: 'Number of parallel coder harnesses. Default 1.', - }, - config: { - type: 'object', - properties: { - testCmd: { type: 'string' }, - typecheckCmd: { type: 'string' }, - forbiddenPaths: { type: 'array', items: { type: 'string' } }, - maxDiffLines: { type: 'integer', minimum: 1 }, - }, - additionalProperties: false, - }, - namespace: { - type: 'string', - description: 'Multi-tenant scope (customer-id, workspace-id).', - }, - }, - required: ['goal', 'repoRoot'], - additionalProperties: false, -} as const - -const SINGLE_VARIANT_ESTIMATE_MS = 6 * 60 * 1000 // 6 minutes — single coder -const FANOUT_PER_VARIANT_ESTIMATE_MS = 8 * 60 * 1000 // 8 minutes — fanout - -/** @experimental */ -export function validateDelegateCodeArgs(raw: unknown): DelegateCodeArgs { - if (raw === null || typeof raw !== 'object') { - throw new TypeError('delegate_code: arguments must be an object') - } - const value = raw as Record - const goal = value.goal - if (typeof goal !== 'string' || goal.trim().length === 0) { - throw new TypeError('delegate_code: `goal` must be a non-empty string') - } - const repoRoot = value.repoRoot - if (typeof repoRoot !== 'string' || repoRoot.trim().length === 0) { - throw new TypeError('delegate_code: `repoRoot` must be a non-empty string') - } - const args: DelegateCodeArgs = { goal: goal.trim(), repoRoot: repoRoot.trim() } - if (typeof value.contextHint === 'string') args.contextHint = value.contextHint - if (value.variants !== undefined) { - const variants = Number(value.variants) - if (!Number.isFinite(variants) || variants < 1 || variants > 8) { - throw new RangeError('delegate_code: `variants` must be an integer in [1, 8]') - } - args.variants = Math.trunc(variants) - } - if (value.config !== undefined) { - args.config = validateConfig(value.config) - } - if (typeof value.namespace === 'string') args.namespace = value.namespace - return args -} - -function validateConfig(raw: unknown): DelegateCodeArgs['config'] { - if (raw === null || typeof raw !== 'object') { - throw new TypeError('delegate_code: `config` must be an object') - } - const value = raw as Record - const out: NonNullable = {} - if (value.testCmd !== undefined) { - if (typeof value.testCmd !== 'string') { - throw new TypeError('delegate_code: `config.testCmd` must be a string') - } - out.testCmd = value.testCmd - } - if (value.typecheckCmd !== undefined) { - if (typeof value.typecheckCmd !== 'string') { - throw new TypeError('delegate_code: `config.typecheckCmd` must be a string') - } - out.typecheckCmd = value.typecheckCmd - } - if (value.forbiddenPaths !== undefined) { - if (!Array.isArray(value.forbiddenPaths)) { - throw new TypeError('delegate_code: `config.forbiddenPaths` must be a string array') - } - out.forbiddenPaths = value.forbiddenPaths.map((entry, i) => { - if (typeof entry !== 'string') { - throw new TypeError(`delegate_code: forbiddenPaths[${i}] must be a string`) - } - return entry - }) - } - if (value.maxDiffLines !== undefined) { - const n = Number(value.maxDiffLines) - if (!Number.isFinite(n) || n < 1) { - throw new RangeError('delegate_code: `config.maxDiffLines` must be a positive integer') - } - out.maxDiffLines = Math.trunc(n) - } - return out -} - -/** @experimental */ -export interface DelegateCodeHandlerOptions { - queue: DelegationTaskQueue - delegate: CoderDelegate - /** Override the duration hint. */ - estimateDurationMs?: (args: DelegateCodeArgs) => number - /** - * Record a deterministic detached-session resume key on single-variant - * submissions (derived from the idempotency key, so retried identical - * inputs name the same logical turn). Enable only when the wired delegate - * dispatches via sandbox sessions — `detachedSessionDelegate` routes - * onto its `driveTurn` tick path when the ref is present. Fanout - * (`variants > 1`) never records a ref: one resume key cannot express N - * sessions + winner selection. - */ - detachedDispatch?: boolean -} - -/** @experimental */ -export function createDelegateCodeHandler( - options: DelegateCodeHandlerOptions, -): (raw: unknown) => Promise { - const estimateDurationMs = options.estimateDurationMs ?? defaultEstimate - return async (raw) => { - const args = validateDelegateCodeArgs(raw) - const idempotencyKey = hashIdempotencyInput({ - profile: 'coder', - goal: args.goal, - repoRoot: args.repoRoot, - contextHint: args.contextHint, - variants: args.variants ?? 1, - config: args.config, - namespace: args.namespace, - }) - const detached = options.detachedDispatch === true && (args.variants ?? 1) <= 1 - const submitted = options.queue.submit({ - profile: 'coder', - args, - namespace: args.namespace, - idempotencyKey, - ...(detached - ? { - detachedSessionRef: formatDetachedSessionRef({ - sessionId: `dlg-turn-coder-${idempotencyKey}`, - }), - } - : {}), - run: async (ctx) => options.delegate(args, ctx), - }) - return { - taskId: submitted.taskId, - estimatedDurationMs: estimateDurationMs(args), - } - } -} - -function defaultEstimate(args: DelegateCodeArgs): number { - const variants = Math.max(1, args.variants ?? 1) - if (variants === 1) return SINGLE_VARIANT_ESTIMATE_MS - return FANOUT_PER_VARIANT_ESTIMATE_MS -} diff --git a/src/mcp/tools/delegate-research.ts b/src/mcp/tools/delegate-research.ts deleted file mode 100644 index 2248fdf2..00000000 --- a/src/mcp/tools/delegate-research.ts +++ /dev/null @@ -1,233 +0,0 @@ -/** - * @experimental - * - * `delegate_research` MCP tool — async kickoff for source-grounded - * research tasks. Same async semantics as `delegate_code`: returns a - * taskId immediately, idempotent on canonical inputs. - * - * The handler does not import a researcher profile directly — consumers - * inject a `ResearcherDelegate` via `createMcpServer({ researcherDelegate })`. - * The substrate cannot depend on `@tangle-network/agent-knowledge` - * without inducing a dependency cycle. - */ - -import type { ResearcherDelegate } from '../delegates' -import { formatDetachedSessionRef } from '../detached-turn' -import { - type DelegateResearchArgs, - type DelegateResearchResult, - type DelegationTaskQueue, - hashIdempotencyInput, -} from '../task-queue' -import type { ResearchSource } from '../types' - -/** @experimental */ -export const DELEGATE_RESEARCH_TOOL_NAME = 'delegate_research' - -/** @experimental */ -export const DELEGATE_RESEARCH_DESCRIPTION = [ - 'Delegate a research question to specialist researcher agents that produce', - 'source-grounded, evidence-bearing knowledge items.', - '', - 'Use when: you need to answer a factual question with external evidence —', - 'audience research, competitive intelligence, recency-bound web searches,', - 'corpus / docs lookups. The researcher emits items[] with provenance, a', - 'citations[] index, and proposedWrites[] you decide whether to persist.', - '', - 'Returns immediately with a taskId. Poll delegation_status to retrieve the', - 'items + verdict. Identical inputs return the same taskId — safe to retry.', - '', - 'When variants > 1, multiple researcher harnesses run in parallel and the', - 'highest-scoring valid output wins (citation density × source diversity ×', - 'recency match × gap coverage). Use variants when answers might disagree.', - '', - 'Multi-tenant isolation: every item carries `namespace`. The validator', - 'hard-fails when any item is scoped outside `namespace`. Never pass another', - "tenant's namespace.", -].join('\n') - -const VALID_SOURCES: readonly ResearchSource[] = ['web', 'corpus', 'twitter', 'github', 'docs'] - -/** @experimental */ -export const DELEGATE_RESEARCH_INPUT_SCHEMA = { - type: 'object', - properties: { - question: { - type: 'string', - description: 'The research question to answer.', - }, - namespace: { - type: 'string', - description: 'Multi-tenant scope (customer-id, workspace-id). REQUIRED.', - }, - scope: { type: 'string', description: 'Bound, e.g. "audience for cpg-founder ICP".' }, - sources: { - type: 'array', - items: { type: 'string', enum: [...VALID_SOURCES] }, - }, - variants: { type: 'integer', minimum: 1, maximum: 8 }, - config: { - type: 'object', - properties: { - recencyWindow: { - type: 'object', - properties: { - since: { type: 'string', description: 'ISO datetime' }, - until: { type: 'string', description: 'ISO datetime' }, - }, - additionalProperties: false, - }, - maxItems: { type: 'integer', minimum: 1 }, - minConfidence: { type: 'number', minimum: 0, maximum: 1 }, - }, - additionalProperties: false, - }, - }, - required: ['question', 'namespace'], - additionalProperties: false, -} as const - -const SINGLE_VARIANT_ESTIMATE_MS = 4 * 60 * 1000 -const FANOUT_PER_VARIANT_ESTIMATE_MS = 6 * 60 * 1000 - -/** @experimental */ -export function validateDelegateResearchArgs(raw: unknown): DelegateResearchArgs { - if (raw === null || typeof raw !== 'object') { - throw new TypeError('delegate_research: arguments must be an object') - } - const value = raw as Record - const question = value.question - if (typeof question !== 'string' || question.trim().length === 0) { - throw new TypeError('delegate_research: `question` must be a non-empty string') - } - const namespace = value.namespace - if (typeof namespace !== 'string' || namespace.trim().length === 0) { - throw new TypeError('delegate_research: `namespace` is required') - } - const args: DelegateResearchArgs = { question: question.trim(), namespace: namespace.trim() } - if (typeof value.scope === 'string') args.scope = value.scope - if (value.sources !== undefined) { - if (!Array.isArray(value.sources)) { - throw new TypeError('delegate_research: `sources` must be a string array') - } - const sources: ResearchSource[] = value.sources.map((src, i) => { - if (typeof src !== 'string' || !VALID_SOURCES.includes(src as ResearchSource)) { - throw new TypeError( - `delegate_research: sources[${i}] must be one of ${VALID_SOURCES.join('|')}`, - ) - } - return src as ResearchSource - }) - args.sources = sources - } - if (value.variants !== undefined) { - const variants = Number(value.variants) - if (!Number.isFinite(variants) || variants < 1 || variants > 8) { - throw new RangeError('delegate_research: `variants` must be an integer in [1, 8]') - } - args.variants = Math.trunc(variants) - } - if (value.config !== undefined) { - args.config = validateConfig(value.config) - } - return args -} - -function validateConfig(raw: unknown): DelegateResearchArgs['config'] { - if (raw === null || typeof raw !== 'object') { - throw new TypeError('delegate_research: `config` must be an object') - } - const value = raw as Record - const out: NonNullable = {} - if (value.recencyWindow !== undefined) { - if (value.recencyWindow === null || typeof value.recencyWindow !== 'object') { - throw new TypeError('delegate_research: `config.recencyWindow` must be an object') - } - const window = value.recencyWindow as Record - const windowOut: NonNullable['recencyWindow']> = {} - if (window.since !== undefined) { - if (typeof window.since !== 'string' || Number.isNaN(Date.parse(window.since))) { - throw new TypeError('delegate_research: `recencyWindow.since` must be an ISO datetime') - } - windowOut.since = window.since - } - if (window.until !== undefined) { - if (typeof window.until !== 'string' || Number.isNaN(Date.parse(window.until))) { - throw new TypeError('delegate_research: `recencyWindow.until` must be an ISO datetime') - } - windowOut.until = window.until - } - out.recencyWindow = windowOut - } - if (value.maxItems !== undefined) { - const n = Number(value.maxItems) - if (!Number.isFinite(n) || n < 1) { - throw new RangeError('delegate_research: `config.maxItems` must be a positive integer') - } - out.maxItems = Math.trunc(n) - } - if (value.minConfidence !== undefined) { - const n = Number(value.minConfidence) - if (!Number.isFinite(n) || n < 0 || n > 1) { - throw new RangeError('delegate_research: `config.minConfidence` must be in [0, 1]') - } - out.minConfidence = n - } - return out -} - -/** @experimental */ -export interface DelegateResearchHandlerOptions { - queue: DelegationTaskQueue - delegate: ResearcherDelegate - estimateDurationMs?: (args: DelegateResearchArgs) => number - /** - * Record a deterministic detached-session resume key on single-variant - * submissions. Same contract as `DelegateCodeHandlerOptions.detachedDispatch`. - */ - detachedDispatch?: boolean -} - -/** @experimental */ -export function createDelegateResearchHandler( - options: DelegateResearchHandlerOptions, -): (raw: unknown) => Promise { - const estimateDurationMs = options.estimateDurationMs ?? defaultEstimate - return async (raw) => { - const args = validateDelegateResearchArgs(raw) - const idempotencyKey = hashIdempotencyInput({ - profile: 'researcher', - question: args.question, - namespace: args.namespace, - scope: args.scope, - sources: args.sources, - variants: args.variants ?? 1, - config: args.config, - }) - const detached = options.detachedDispatch === true && (args.variants ?? 1) <= 1 - const submitted = options.queue.submit({ - profile: 'researcher', - args, - namespace: args.namespace, - idempotencyKey, - ...(detached - ? { - detachedSessionRef: formatDetachedSessionRef({ - sessionId: `dlg-turn-research-${idempotencyKey}`, - }), - } - : {}), - run: async (ctx) => options.delegate(args, ctx), - }) - return { - taskId: submitted.taskId, - estimatedDurationMs: estimateDurationMs(args), - } - } -} - -function defaultEstimate(args: DelegateResearchArgs): number { - const variants = Math.max(1, args.variants ?? 1) - if (variants === 1) return SINGLE_VARIANT_ESTIMATE_MS - return FANOUT_PER_VARIANT_ESTIMATE_MS -} diff --git a/src/profiles/coder.ts b/src/profiles/coder.ts index e61c4d9e..8537d363 100644 --- a/src/profiles/coder.ts +++ b/src/profiles/coder.ts @@ -1,19 +1,14 @@ /** * @experimental * - * `coderProfile` — the §1.5 author-the-profile DATA for code-modification tasks: an `AgentProfile` - * constant (the agent IS its profile) plus a pure `coderTaskToPrompt` formatter that renders a - * `CoderTask` into the per-task instruction. There is no factory, output adapter, or validator - * here — a domain customizes the worker by authoring a profile + handing it to a leaf executor - * (`createWorktreeCliExecutor`) or a fanout (`worktreeFanout`), and "is it delivered" is a - * `DeliverableSpec` (`patchDelivered`), not a bundled validator. - * - * The standing instruction tells the agent to work on a fresh branch, keep the patch minimal, - * avoid forbidden paths, and run the test + typecheck commands before declaring done. + * `CoderTask` + `coderTaskToPrompt` — the per-task DATA + pure formatter for code-modification tasks + * (§1.5: the system authors profiles; there is no hardcoded coder profile constant). A domain + * customizes the worker by authoring its own `AgentProfile` and handing it to a leaf executor + * (`createWorktreeCliExecutor`) or a fanout (`worktreeFanout`); "is it delivered" is a + * `DeliverableSpec` (`patchDelivered`), not a bundled validator. This formatter renders a `CoderTask` + * into the per-task instruction that profile receives. */ -import type { AgentProfile } from '@tangle-network/agent-interface' - const DEFAULT_MAX_DIFF_LINES = 400 /** @experimental The per-task inputs `coderTaskToPrompt` renders + the worktree gate enforces. */ @@ -39,44 +34,6 @@ export interface CoderTask { maxDiffLines?: number } -/** @experimental The coder agent's standing instruction (its body lives in `coderProfile.prompt`). */ -export const DEFAULT_CODER_SYSTEM_PROMPT = [ - 'You are a coder agent operating inside an isolated sandbox workspace.', - 'Your job is to deliver a minimal, correct patch for the user-supplied goal.', - '', - 'Hard rules:', - ' 1. Work on a fresh branch off the supplied base. Do not mutate the base branch.', - ' 2. Never touch a forbidden path. The user will list them explicitly.', - ' 3. Keep the diff under the max-diff cap. Prefer the smallest change that ships.', - ' 4. Run the supplied test and typecheck commands before declaring done.', - ' 5. If either command fails, fix the cause — do not weaken the test or hide the error.', - '', - 'When you finish, emit a single final structured message of the shape:', - ' ```json', - ' { "branch": "",', - ' "patch": "",', - ' "testResult": { "passed": , "output": "" },', - ' "typecheckResult": { "passed": , "output": "" },', - ' "diffStats": { "filesChanged": , "insertions": , "deletions": },', - ' "reviewerNotes": "" }', - ' ```', -].join('\n') - -/** - * @experimental - * - * The coder `AgentProfile` — the §1.5 DATA the substrate materializes into a harness invocation. - * Stateless and harness-agnostic: a consumer overrides `model`/`metadata.backendType` by spreading - * a copy, never by a factory. `worktreeFanout` authors one such profile per harness leaf. - */ -export const coderProfile: AgentProfile = { - name: 'coder', - description: 'Code-modification agent. Minimal-diff worktree-based coder.', - prompt: { systemPrompt: DEFAULT_CODER_SYSTEM_PROMPT }, - tools: { git: true, fs: true, shell: true, test_runner: true }, - metadata: { role: 'coder' }, -} - /** @experimental Render a `CoderTask` into the per-task instruction handed to the coder profile. */ export function coderTaskToPrompt(task: CoderTask): string { const base = task.baseBranch ?? 'main' diff --git a/src/profiles/index.ts b/src/profiles/index.ts index fa98f944..2058d1ac 100644 --- a/src/profiles/index.ts +++ b/src/profiles/index.ts @@ -24,7 +24,7 @@ export { writeAuditIndex, } from '../audit' export type { CoderTask } from './coder' -export { coderProfile, coderTaskToPrompt, DEFAULT_CODER_SYSTEM_PROMPT } from './coder' +export { coderTaskToPrompt } from './coder' export type { BrowserContextHandle, BrowserHandle, diff --git a/tests/loop-runner.test.ts b/tests/loop-runner.test.ts index ebea434e..58b02e07 100644 --- a/tests/loop-runner.test.ts +++ b/tests/loop-runner.test.ts @@ -1,8 +1,6 @@ -import type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox' import { describe, expect, it } from 'vitest' import { ConfigError } from '../src/errors' -import { coderLoopRunner, type DelegatedLoopRegistry, runDelegatedLoop } from '../src/loop-runner' -import type { CoderOutput } from '../src/mcp/detached-coder' +import { type DelegatedLoopRegistry, runDelegatedLoop } from '../src/loop-runner' const clock = () => { let t = 0 @@ -41,35 +39,6 @@ describe('runDelegatedLoop — mode dispatch', () => { }) }) -describe('coderLoopRunner — code mode over the hardened delegate', () => { - it('runs the coder delegate and returns its winning CoderOutput', async () => { - const out: CoderOutput = { - branch: 'feat/fix', - patch: 'diff --git a/src/x.ts b/src/x.ts\n--- a/src/x.ts\n+++ b/src/x.ts\n+ok\n', - testResult: { passed: true, output: 'ok' }, - typecheckResult: { passed: true, output: 'ok' }, - diffStats: { filesChanged: 1, insertions: 1, deletions: 0 }, - } - const sandboxClient = { - async create(_o?: CreateSandboxOptions): Promise { - return { - async *streamPrompt() { - yield { type: 'result', data: { result: out } } satisfies SandboxEvent - }, - } as unknown as SandboxInstance - }, - } - const runner = coderLoopRunner({ - sandboxClient, - args: { goal: 'fix x', repoRoot: '/repo' }, - }) - const registry: DelegatedLoopRegistry = { code: runner } - const r = await runDelegatedLoop('code', registry) - expect(r.ok).toBe(true) - expect(r.output?.branch).toBe('feat/fix') - }) -}) - import { researchLoopRunner, type VetoedFact } from '../src/loop-runner' import type { FactCandidate } from '../src/mcp/kb-gate' diff --git a/tests/mcp/delegate-code.test.ts b/tests/mcp/delegate-code.test.ts deleted file mode 100644 index ddd0b85b..00000000 --- a/tests/mcp/delegate-code.test.ts +++ /dev/null @@ -1,120 +0,0 @@ -import { describe, expect, it } from 'vitest' -import type { CoderDelegate } from '../../src/mcp/delegates' -import { DelegationTaskQueue } from '../../src/mcp/task-queue' -import { - createDelegateCodeHandler, - DELEGATE_CODE_DESCRIPTION, - DELEGATE_CODE_INPUT_SCHEMA, - DELEGATE_CODE_TOOL_NAME, - validateDelegateCodeArgs, -} from '../../src/mcp/tools/delegate-code' - -const stubOutput = { - branch: 'feat/x', - patch: '', - testResult: { passed: true, output: '' }, - typecheckResult: { passed: true, output: '' }, - diffStats: { filesChanged: 0, insertions: 0, deletions: 0 }, -} - -const stubDelegate: CoderDelegate = async () => stubOutput - -describe('validateDelegateCodeArgs', () => { - it('accepts the minimal required surface', () => { - const args = validateDelegateCodeArgs({ goal: 'fix', repoRoot: '/r' }) - expect(args).toEqual({ goal: 'fix', repoRoot: '/r' }) - }) - - it('rejects an empty goal', () => { - expect(() => validateDelegateCodeArgs({ goal: ' ', repoRoot: '/r' })).toThrow(TypeError) - }) - - it('rejects variants outside [1, 8]', () => { - expect(() => validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 0 })).toThrow( - RangeError, - ) - expect(() => validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 9 })).toThrow( - RangeError, - ) - }) - - it('rejects non-string forbiddenPaths entries', () => { - expect(() => - validateDelegateCodeArgs({ - goal: 'g', - repoRoot: '/r', - config: { forbiddenPaths: ['ok', 1] }, - }), - ).toThrow(TypeError) - }) - - it('coerces variants to a positive integer', () => { - const args = validateDelegateCodeArgs({ goal: 'g', repoRoot: '/r', variants: 3.7 }) - expect(args.variants).toBe(3) - }) -}) - -describe('createDelegateCodeHandler', () => { - it('returns a taskId and estimated duration on a happy-path call', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) - const result = await handler({ goal: 'fix', repoRoot: '/r' }) - expect(result.taskId).toMatch(/^dlg-/) - expect(result.estimatedDurationMs).toBeGreaterThan(0) - await new Promise((r) => setImmediate(r)) - await new Promise((r) => setImmediate(r)) - expect(queue.status(result.taskId)?.status).toBe('completed') - }) - - it('is idempotent: duplicate identical input returns the same taskId', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) - const first = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 }) - const second = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 }) - expect(second.taskId).toBe(first.taskId) - }) - - it('returns a different taskId when contextHint differs', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) - const first = await handler({ goal: 'fix', repoRoot: '/r' }) - const second = await handler({ goal: 'fix', repoRoot: '/r', contextHint: 'see issue #42' }) - expect(second.taskId).not.toBe(first.taskId) - }) - - it('propagates validation errors out of the handler', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate: stubDelegate }) - await expect(handler({ goal: '', repoRoot: '/r' })).rejects.toThrow(/non-empty string/) - }) - - it('surfaces delegate exceptions via the queue status', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ - queue, - delegate: async () => { - throw new TypeError('bad sandbox') - }, - }) - const { taskId } = await handler({ goal: 'fix', repoRoot: '/r' }) - await new Promise((r) => setImmediate(r)) - await new Promise((r) => setImmediate(r)) - const status = queue.status(taskId) - expect(status?.status).toBe('failed') - expect(status?.error?.message).toBe('bad sandbox') - expect(status?.error?.kind).toBe('TypeError') - }) -}) - -describe('tool descriptors', () => { - it('exposes a non-empty description that explains when to call', () => { - expect(DELEGATE_CODE_TOOL_NAME).toBe('delegate_code') - expect(DELEGATE_CODE_DESCRIPTION).toMatch(/Use when:/) - }) - it('declares goal and repoRoot as required', () => { - expect((DELEGATE_CODE_INPUT_SCHEMA as { required: string[] }).required).toEqual([ - 'goal', - 'repoRoot', - ]) - }) -}) diff --git a/tests/mcp/delegate-research.test.ts b/tests/mcp/delegate-research.test.ts deleted file mode 100644 index 56d59316..00000000 --- a/tests/mcp/delegate-research.test.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { describe, expect, it } from 'vitest' -import type { ResearcherDelegate } from '../../src/mcp/delegates' -import { DelegationTaskQueue } from '../../src/mcp/task-queue' -import { - createDelegateResearchHandler, - DELEGATE_RESEARCH_DESCRIPTION, - DELEGATE_RESEARCH_INPUT_SCHEMA, - DELEGATE_RESEARCH_TOOL_NAME, - validateDelegateResearchArgs, -} from '../../src/mcp/tools/delegate-research' - -const stubOutput = { items: [], citations: [], proposedWrites: [] } -const stubDelegate: ResearcherDelegate = async () => stubOutput - -describe('validateDelegateResearchArgs', () => { - it('requires question + namespace', () => { - expect(() => validateDelegateResearchArgs({ namespace: 'x' })).toThrow(/question/) - expect(() => validateDelegateResearchArgs({ question: 'q?' })).toThrow(/namespace/) - }) - - it('rejects unknown source types', () => { - expect(() => - validateDelegateResearchArgs({ question: 'q?', namespace: 'x', sources: ['rss'] }), - ).toThrow(TypeError) - }) - - it('validates recencyWindow datetimes', () => { - expect(() => - validateDelegateResearchArgs({ - question: 'q?', - namespace: 'x', - config: { recencyWindow: { since: 'not-a-date' } }, - }), - ).toThrow(TypeError) - const ok = validateDelegateResearchArgs({ - question: 'q?', - namespace: 'x', - config: { recencyWindow: { since: '2026-01-01T00:00:00Z' } }, - }) - expect(ok.config?.recencyWindow?.since).toBe('2026-01-01T00:00:00Z') - }) - - it('rejects minConfidence outside [0, 1]', () => { - expect(() => - validateDelegateResearchArgs({ - question: 'q?', - namespace: 'x', - config: { minConfidence: 1.5 }, - }), - ).toThrow(RangeError) - }) -}) - -describe('createDelegateResearchHandler', () => { - it('returns a taskId with the researcher profile and isolates by namespace', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateResearchHandler({ queue, delegate: stubDelegate }) - const { taskId } = await handler({ question: 'q?', namespace: 'tenant-a' }) - await new Promise((r) => setImmediate(r)) - await new Promise((r) => setImmediate(r)) - const status = queue.status(taskId) - expect(status?.profile).toBe('researcher') - expect(status?.status).toBe('completed') - expect(queue.history({ namespace: 'tenant-a' }).length).toBe(1) - expect(queue.history({ namespace: 'tenant-b' }).length).toBe(0) - }) - - it('is idempotent on identical inputs', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateResearchHandler({ queue, delegate: stubDelegate }) - const a = await handler({ question: 'q?', namespace: 'x' }) - const b = await handler({ question: 'q?', namespace: 'x' }) - expect(b.taskId).toBe(a.taskId) - }) - - it('exposes a description that warns about cross-tenant namespace risk', () => { - expect(DELEGATE_RESEARCH_TOOL_NAME).toBe('delegate_research') - expect(DELEGATE_RESEARCH_DESCRIPTION).toMatch(/namespace/i) - expect((DELEGATE_RESEARCH_INPUT_SCHEMA as { required: string[] }).required).toEqual([ - 'question', - 'namespace', - ]) - }) -}) diff --git a/tests/mcp/delegation-profile.test.ts b/tests/mcp/delegation-profile.test.ts deleted file mode 100644 index c218c93f..00000000 --- a/tests/mcp/delegation-profile.test.ts +++ /dev/null @@ -1,143 +0,0 @@ -import type { AgentProfile } from '@tangle-network/sandbox' -import { describe, expect, it } from 'vitest' -import { - buildDelegationMcpServer, - composeProductionAgentProfile, - DELEGATION_MCP_SERVER_KEY, -} from '../../src/mcp/delegation-profile' - -const BASE: AgentProfile = { - name: 'demo-agent', - prompt: { systemPrompt: 'base prompt', metadata: { v: 1 } }, - mcp: { - 'domain-tools': { transport: 'stdio', command: 'domain-mcp', enabled: true }, - }, - resources: { - files: [{ path: '/skills/a.md', resource: { kind: 'inline', name: 'a', content: 'x' } }], - }, - metadata: { domain: 'demo' }, -} - -describe('buildDelegationMcpServer — fail-closed on missing key', () => { - it('returns undefined when no sandbox API key resolves', () => { - expect(buildDelegationMcpServer({ env: {} })).toBeUndefined() - }) - - it('emits the delegation entry with the runtime MCP bin command when a key is present', () => { - const result = buildDelegationMcpServer({ env: { TANGLE_API_KEY: 'sk-test' } }) - expect(result).toBeDefined() - const entry = result?.[DELEGATION_MCP_SERVER_KEY] - expect(entry).toEqual({ - transport: 'stdio', - command: 'npx', - args: ['-y', '@tangle-network/agent-runtime', 'mcp'], - env: { - TANGLE_API_KEY: 'sk-test', - SANDBOX_BASE_URL: 'https://sandbox.tangle.tools', - }, - enabled: true, - metadata: { - surface: 'delegation:dispatch', - tools: [ - 'delegate_code', - 'delegate_research', - 'delegate_feedback', - 'delegation_status', - 'delegation_history', - ], - }, - }) - }) - - it('prefers explicit sandboxApiKey over the env key', () => { - const result = buildDelegationMcpServer({ - sandboxApiKey: 'explicit', - env: { TANGLE_API_KEY: 'from-env' }, - }) - expect(result?.[DELEGATION_MCP_SERVER_KEY]?.env?.TANGLE_API_KEY).toBe('explicit') - }) - - it('resolves base URL precedence sandboxBaseUrl > SANDBOX_BASE_URL > SANDBOX_API_URL > default', () => { - expect( - buildDelegationMcpServer({ sandboxApiKey: 'k', sandboxBaseUrl: 'https://explicit' })?.[ - DELEGATION_MCP_SERVER_KEY - ]?.env?.SANDBOX_BASE_URL, - ).toBe('https://explicit') - expect( - buildDelegationMcpServer({ - env: { TANGLE_API_KEY: 'k', SANDBOX_BASE_URL: 'https://b1', SANDBOX_API_URL: 'https://b2' }, - })?.[DELEGATION_MCP_SERVER_KEY]?.env?.SANDBOX_BASE_URL, - ).toBe('https://b1') - expect( - buildDelegationMcpServer({ env: { TANGLE_API_KEY: 'k', SANDBOX_API_URL: 'https://b2' } })?.[ - DELEGATION_MCP_SERVER_KEY - ]?.env?.SANDBOX_BASE_URL, - ).toBe('https://b2') - }) - - it('forwards OTEL + trace-correlation vars only when present', () => { - const withOtel = buildDelegationMcpServer({ - env: { - TANGLE_API_KEY: 'k', - OTEL_EXPORTER_OTLP_ENDPOINT: 'https://otlp', - OTEL_EXPORTER_OTLP_HEADERS: 'authorization=Bearer t', - TRACE_ID: 'trace-1', - PARENT_SPAN_ID: 'span-1', - }, - })?.[DELEGATION_MCP_SERVER_KEY]?.env - expect(withOtel).toMatchObject({ - OTEL_EXPORTER_OTLP_ENDPOINT: 'https://otlp', - OTEL_EXPORTER_OTLP_HEADERS: 'authorization=Bearer t', - TRACE_ID: 'trace-1', - PARENT_SPAN_ID: 'span-1', - }) - const withoutOtel = buildDelegationMcpServer({ env: { TANGLE_API_KEY: 'k' } })?.[ - DELEGATION_MCP_SERVER_KEY - ]?.env - expect(withoutOtel).not.toHaveProperty('OTEL_EXPORTER_OTLP_ENDPOINT') - expect(withoutOtel).not.toHaveProperty('TRACE_ID') - }) -}) - -describe('composeProductionAgentProfile — base + delegation merge', () => { - it('merges the delegation MCP alongside the base MCP map when a key is present', () => { - const profile = composeProductionAgentProfile(BASE, { env: { TANGLE_API_KEY: 'k' } }) - expect(Object.keys(profile.mcp ?? {})).toEqual(['domain-tools', DELEGATION_MCP_SERVER_KEY]) - expect(profile.mcp?.['domain-tools']).toEqual(BASE.mcp?.['domain-tools']) - }) - - it('omits the delegation entry entirely when no key resolves — never a static broken entry', () => { - const profile = composeProductionAgentProfile(BASE, { env: {} }) - expect(profile.mcp).toEqual({ 'domain-tools': BASE.mcp?.['domain-tools'] }) - expect(profile.mcp).not.toHaveProperty(DELEGATION_MCP_SERVER_KEY) - }) - - it('does not mutate the base profile', () => { - const baseMcpKeys = Object.keys(BASE.mcp ?? {}) - composeProductionAgentProfile(BASE, { env: { TANGLE_API_KEY: 'k' } }) - expect(Object.keys(BASE.mcp ?? {})).toEqual(baseMcpKeys) - }) - - it('replaces systemPrompt while preserving other prompt fields', () => { - const profile = composeProductionAgentProfile(BASE, { - systemPrompt: 'workspace-augmented', - env: {}, - }) - expect(profile.prompt?.systemPrompt).toBe('workspace-augmented') - expect(profile.prompt?.metadata).toEqual({ v: 1 }) - }) - - it('concatenates extraFiles after the base files', () => { - const extra = { - path: '/skills/b.md', - resource: { kind: 'inline' as const, name: 'b', content: 'y' }, - } - const profile = composeProductionAgentProfile(BASE, { extraFiles: [extra], env: {} }) - expect(profile.resources?.files).toEqual([...(BASE.resources?.files ?? []), extra]) - }) - - it('overrides the profile name when supplied and falls through otherwise', () => { - expect(composeProductionAgentProfile(BASE, { name: 'ws-42', env: {} }).name).toBe('ws-42') - expect(composeProductionAgentProfile(BASE, { env: {} }).name).toBe('demo-agent') - }) -}) diff --git a/tests/mcp/detached-coder.test.ts b/tests/mcp/detached-coder.test.ts index 57bc3ab0..06b3e79a 100644 --- a/tests/mcp/detached-coder.test.ts +++ b/tests/mcp/detached-coder.test.ts @@ -297,11 +297,42 @@ describe('coderOutputAdapter — in-process executor raw artifact projection', ( }) describe('multiHarnessCoderFanout — heterogeneous fanout bundle', () => { - it('produces one AgentRunSpec per harness and a fanout driver of matching n', () => { + it('produces one AgentRunSpec per harness, each tagging its backendType', () => { const bundle = multiHarnessCoderFanout({ harnesses: ['claude-code', 'codex'] }) expect(bundle.agentRuns).toHaveLength(2) expect(bundle.agentRuns.map((s) => s.name)).toEqual(['coder-claude-code', 'coder-codex']) + expect(bundle.agentRuns.map((s) => s.profile.metadata?.backendType)).toEqual([ + 'claude-code', + 'codex', + ]) + }) + + it('uses a minimal model-only default profile (no hardcoded tools/skills/prompt)', () => { + const bundle = multiHarnessCoderFanout({ harnesses: ['claude-code'] }) + const profile = bundle.agentRuns[0]!.profile + expect(profile.tools).toBeUndefined() + expect(profile.prompt).toBeUndefined() + }) + + it('threads a caller-authored worker profile onto every fanout run', () => { + const authored = { + name: 'authored', + tools: { git: true, fs: true }, + prompt: { systemPrompt: 'be careful' }, + } + const bundle = multiHarnessCoderFanout({ + profile: authored, + harnesses: ['claude-code', 'codex'], + }) expect(bundle.agentRuns.every((s) => s.profile.tools?.git === true)).toBe(true) + expect(bundle.agentRuns.every((s) => s.profile.prompt?.systemPrompt === 'be careful')).toBe( + true, + ) + // The per-harness backendType still overrides regardless of the authored profile. + expect(bundle.agentRuns.map((s) => s.profile.metadata?.backendType)).toEqual([ + 'claude-code', + 'codex', + ]) }) }) diff --git a/tests/mcp/detached-turn.test.ts b/tests/mcp/detached-turn.test.ts index af5d27b0..5403f190 100644 --- a/tests/mcp/detached-turn.test.ts +++ b/tests/mcp/detached-turn.test.ts @@ -20,12 +20,43 @@ import { } from '../../src/mcp/detached-turn' import { createSiblingSandboxExecutor } from '../../src/mcp/executor' import { DelegationTaskQueue } from '../../src/mcp/task-queue' -import { createDelegateCodeHandler } from '../../src/mcp/tools/delegate-code' import type { DelegateCodeArgs } from '../../src/mcp/types' import type { LoopTraceEvent, SandboxClient } from '../../src/runtime' const codeArgs: DelegateCodeArgs = { goal: 'fix bug', repoRoot: '/repo' } +/** + * Submit a single-variant coder delegation to the queue exactly as the bin's `delegate` dispatch + * does: a deterministic session-only detached ref (so a restart can resume), and a `run` closure + * that hands the args + ctx to the coder delegate. `detachedDispatch:false` keeps the streaming path + * (no ref recorded). + */ +function submitCoder( + queue: DelegationTaskQueue, + delegate: CoderDelegate, + args: DelegateCodeArgs, + opts: { detachedDispatch?: boolean } = {}, +): { taskId: string } { + const variants = Math.max(1, Math.trunc(args.variants ?? 1)) + const detached = opts.detachedDispatch && variants <= 1 + return queue.submit({ + profile: 'coder', + args, + ...(detached + ? { detachedSessionRef: formatDetachedSessionRef({ sessionId: detachedCoderSessionId() }) } + : {}), + run: (ctx) => delegate(args, ctx), + }) +} + +/** Deterministic single-variant detached session id, matching the `dlg-turn-coder-<8hex>` shape. */ +function detachedCoderSessionId(): string { + const hex = Math.floor(Math.random() * 0xffffffff) + .toString(16) + .padStart(8, '0') + return `dlg-turn-coder-${hex}` +} + const patchText = [ 'diff --git a/src/a.ts b/src/a.ts', '--- a/src/a.ts', @@ -488,8 +519,14 @@ describe('detachedSessionRef population on submit', () => { diffStats: { filesChanged: 1, insertions: 1, deletions: 1 }, } } - const handler = createDelegateCodeHandler({ queue, delegate, detachedDispatch: true }) - const { taskId } = await handler({ goal: 'fix', repoRoot: '/r' }) + const { taskId } = submitCoder( + queue, + delegate, + { goal: 'fix', repoRoot: '/r' }, + { + detachedDispatch: true, + }, + ) await until(() => queue.status(taskId)?.status === 'completed') expect(seenRefs).toHaveLength(1) const parsed = parseDetachedSessionRef(seenRefs[0] as string) @@ -510,8 +547,14 @@ describe('detachedSessionRef population on submit', () => { diffStats: { filesChanged: 1, insertions: 1, deletions: 1 }, } } - const handler = createDelegateCodeHandler({ queue, delegate, detachedDispatch: true }) - const { taskId } = await handler({ goal: 'fix', repoRoot: '/r', variants: 2 }) + const { taskId } = submitCoder( + queue, + delegate, + { goal: 'fix', repoRoot: '/r', variants: 2 }, + { + detachedDispatch: true, + }, + ) await until(() => queue.status(taskId)?.status === 'completed') expect(seenRefs).toEqual([undefined]) }) @@ -529,8 +572,7 @@ describe('detachedSessionRef population on submit', () => { diffStats: { filesChanged: 1, insertions: 1, deletions: 1 }, } } - const handler = createDelegateCodeHandler({ queue, delegate }) - const { taskId } = await handler({ goal: 'fix', repoRoot: '/r' }) + const { taskId } = submitCoder(queue, delegate, { goal: 'fix', repoRoot: '/r' }) await until(() => queue.status(taskId)?.status === 'completed') expect(seenRefs).toEqual([undefined]) }) @@ -566,8 +608,14 @@ describe('detachedSessionDelegate detached path', () => { const executor = createSiblingSandboxExecutor({ client: fakeClient(fake.box) }) const delegate = detachedSessionDelegate({ executor, detachedTickIntervalMs: 1 }) const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate, detachedDispatch: true }) - const { taskId } = await handler({ goal: 'fix', repoRoot: '/r' }) + const { taskId } = submitCoder( + queue, + delegate, + { goal: 'fix', repoRoot: '/r' }, + { + detachedDispatch: true, + }, + ) await until(() => queue.status(taskId)?.status === 'completed') const status = queue.status(taskId, { includeTrace: true })! expect(status.trace?.map((s) => s.kind)).toEqual(['loop', 'branch']) diff --git a/tests/mcp/idempotency.test.ts b/tests/mcp/idempotency.test.ts deleted file mode 100644 index 2d2b5e86..00000000 --- a/tests/mcp/idempotency.test.ts +++ /dev/null @@ -1,53 +0,0 @@ -import { describe, expect, it } from 'vitest' -import type { CoderDelegate, ResearcherDelegate } from '../../src/mcp/delegates' -import { DelegationTaskQueue } from '../../src/mcp/task-queue' -import { createDelegateCodeHandler } from '../../src/mcp/tools/delegate-code' -import { createDelegateResearchHandler } from '../../src/mcp/tools/delegate-research' - -const coderStub: CoderDelegate = async () => ({ - branch: 'feat/x', - patch: '', - testResult: { passed: true, output: '' }, - typecheckResult: { passed: true, output: '' }, - diffStats: { filesChanged: 0, insertions: 0, deletions: 0 }, -}) - -const researcherStub: ResearcherDelegate = async () => ({ - items: [], - citations: [], - proposedWrites: [], -}) - -describe('MCP idempotency — duplicate calls return the same taskId', () => { - it('coder: same arguments → same taskId; counted once in history', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate: coderStub }) - const args = { goal: 'fix nav', repoRoot: '/r', variants: 1, config: { maxDiffLines: 100 } } - const a = await handler(args) - const b = await handler(args) - const c = await handler(args) - expect(b.taskId).toBe(a.taskId) - expect(c.taskId).toBe(a.taskId) - expect(queue.history().length).toBe(1) - }) - - it('coder: mutated config produces a fresh taskId', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateCodeHandler({ queue, delegate: coderStub }) - const a = await handler({ goal: 'fix', repoRoot: '/r', config: { maxDiffLines: 100 } }) - const b = await handler({ goal: 'fix', repoRoot: '/r', config: { maxDiffLines: 200 } }) - expect(b.taskId).not.toBe(a.taskId) - }) - - it('researcher: same arguments → same taskId', async () => { - const queue = new DelegationTaskQueue() - const handler = createDelegateResearchHandler({ queue, delegate: researcherStub }) - const a = await handler({ question: 'who?', namespace: 'n', sources: ['web', 'twitter'] }) - const b = await handler({ question: 'who?', namespace: 'n', sources: ['twitter', 'web'] }) - // The hash canonicalizes by key sort, but `sources` is an array — order matters. - // This ensures order-sensitivity for arrays (the agent should pass a canonical order). - expect(b.taskId).not.toBe(a.taskId) - const c = await handler({ question: 'who?', namespace: 'n', sources: ['web', 'twitter'] }) - expect(c.taskId).toBe(a.taskId) - }) -}) diff --git a/tests/mcp/openai-tools.test.ts b/tests/mcp/openai-tools.test.ts index 0fc7af71..aa1c6688 100644 --- a/tests/mcp/openai-tools.test.ts +++ b/tests/mcp/openai-tools.test.ts @@ -1,26 +1,16 @@ import { describe, expect, it } from 'vitest' import { mcpToolsForRuntimeMcp, mcpToolsForRuntimeMcpSubset } from '../../src/mcp/openai-tools' -import { - DELEGATE_CODE_DESCRIPTION, - DELEGATE_CODE_INPUT_SCHEMA, - DELEGATE_CODE_TOOL_NAME, -} from '../../src/mcp/tools/delegate-code' import { DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_TOOL_NAME, } from '../../src/mcp/tools/delegate-feedback' -import { - DELEGATE_RESEARCH_DESCRIPTION, - DELEGATE_RESEARCH_INPUT_SCHEMA, - DELEGATE_RESEARCH_TOOL_NAME, -} from '../../src/mcp/tools/delegate-research' import { DELEGATION_HISTORY_TOOL_NAME } from '../../src/mcp/tools/delegation-history' import { DELEGATION_STATUS_TOOL_NAME } from '../../src/mcp/tools/delegation-status' describe('mcpToolsForRuntimeMcp', () => { - it('returns exactly the 5 delegation tools', () => { + it('returns exactly the 3 queue-bound delegation tools', () => { const tools = mcpToolsForRuntimeMcp() - expect(tools).toHaveLength(5) + expect(tools).toHaveLength(3) for (const tool of tools) { expect(tool.type).toBe('function') expect(typeof tool.function.name).toBe('string') @@ -31,34 +21,12 @@ describe('mcpToolsForRuntimeMcp', () => { it('emits tool names matching the canonical DELEGATE_*_TOOL_NAME constants', () => { const names = mcpToolsForRuntimeMcp().map((t) => t.function.name) expect(names).toEqual([ - DELEGATE_CODE_TOOL_NAME, - DELEGATE_RESEARCH_TOOL_NAME, DELEGATE_FEEDBACK_TOOL_NAME, DELEGATION_STATUS_TOOL_NAME, DELEGATION_HISTORY_TOOL_NAME, ]) }) - it('delegate_code parameters require goal + repoRoot', () => { - const tool = mcpToolsForRuntimeMcp().find((t) => t.function.name === DELEGATE_CODE_TOOL_NAME) - expect(tool).toBeDefined() - const params = tool!.function.parameters as Record - expect(params.type).toBe('object') - expect(params.required).toEqual(['goal', 'repoRoot']) - const properties = params.properties as Record - expect(properties).toHaveProperty('goal') - expect(properties).toHaveProperty('repoRoot') - }) - - it('delegate_research parameters require question + namespace', () => { - const tool = mcpToolsForRuntimeMcp().find( - (t) => t.function.name === DELEGATE_RESEARCH_TOOL_NAME, - ) - expect(tool).toBeDefined() - const params = tool!.function.parameters as Record - expect(params.required).toEqual(['question', 'namespace']) - }) - it('every tool carries a non-empty, non-placeholder description', () => { for (const tool of mcpToolsForRuntimeMcp()) { expect(tool.function.description).toBeDefined() @@ -70,17 +38,7 @@ describe('mcpToolsForRuntimeMcp', () => { } }) - it('projects the canonical description + schema verbatim (no drift)', () => { - const codeTool = mcpToolsForRuntimeMcp().find( - (t) => t.function.name === DELEGATE_CODE_TOOL_NAME, - )! - expect(codeTool.function.description).toBe(DELEGATE_CODE_DESCRIPTION) - expect(codeTool.function.parameters).toEqual(DELEGATE_CODE_INPUT_SCHEMA) - const researchTool = mcpToolsForRuntimeMcp().find( - (t) => t.function.name === DELEGATE_RESEARCH_TOOL_NAME, - )! - expect(researchTool.function.description).toBe(DELEGATE_RESEARCH_DESCRIPTION) - expect(researchTool.function.parameters).toEqual(DELEGATE_RESEARCH_INPUT_SCHEMA) + it('projects the canonical description verbatim (no drift)', () => { const feedbackTool = mcpToolsForRuntimeMcp().find( (t) => t.function.name === DELEGATE_FEEDBACK_TOOL_NAME, )! @@ -97,18 +55,18 @@ describe('mcpToolsForRuntimeMcp', () => { describe('mcpToolsForRuntimeMcpSubset', () => { it('returns only the named tool', () => { - const subset = mcpToolsForRuntimeMcpSubset([DELEGATE_RESEARCH_TOOL_NAME]) + const subset = mcpToolsForRuntimeMcpSubset([DELEGATION_STATUS_TOOL_NAME]) expect(subset).toHaveLength(1) - expect(subset[0].function.name).toBe(DELEGATE_RESEARCH_TOOL_NAME) + expect(subset[0].function.name).toBe(DELEGATION_STATUS_TOOL_NAME) }) it('returns multiple named tools preserving canonical ordering', () => { const subset = mcpToolsForRuntimeMcpSubset([ DELEGATION_HISTORY_TOOL_NAME, - DELEGATE_CODE_TOOL_NAME, + DELEGATE_FEEDBACK_TOOL_NAME, ]) expect(subset.map((t) => t.function.name)).toEqual([ - DELEGATE_CODE_TOOL_NAME, + DELEGATE_FEEDBACK_TOOL_NAME, DELEGATION_HISTORY_TOOL_NAME, ]) }) diff --git a/tests/mcp/server-integration.test.ts b/tests/mcp/server-integration.test.ts index cbe1f454..41701aa7 100644 --- a/tests/mcp/server-integration.test.ts +++ b/tests/mcp/server-integration.test.ts @@ -1,32 +1,16 @@ import { describe, expect, it } from 'vitest' -import type { CoderDelegate, ResearcherDelegate } from '../../src/mcp/delegates' +import type { UiAuditorDelegate } from '../../src/mcp/delegates' import { createInProcessTransport, createMcpServer, type JsonRpcResponse, } from '../../src/mcp/server' -const coderStub: CoderDelegate = async () => ({ - branch: 'feat/y', - patch: '', - testResult: { passed: true, output: 'ok' }, - typecheckResult: { passed: true, output: 'ok' }, - diffStats: { filesChanged: 1, insertions: 1, deletions: 0 }, -}) - -const researcherStub: ResearcherDelegate = async () => ({ - items: [ - { - id: 'i-1', - namespace: 'tenant-a', - claim: 'cpg-founders use Twitter heavily', - evidence: [{ source: 'twitter', capturedAt: 0 }], - confidence: 0.6, - authoredBy: { kind: 'agent', id: 'r' }, - }, - ], - citations: [{ url: 'https://x.com', quote: 'q', confidence: 0.5 }], - proposedWrites: [], +const uiAuditorStub: UiAuditorDelegate = async (args) => ({ + workspaceDir: args.workspaceDir, + indexFile: 'index.md', + findings: [], + iterations: 0, }) async function rpcCall( @@ -39,11 +23,8 @@ async function rpcCall( } describe('createMcpServer — JSON-RPC surface', () => { - it('responds to initialize + tools/list with the registered tools', async () => { - const server = createMcpServer({ - coderDelegate: coderStub, - researcherDelegate: researcherStub, - }) + it('responds to initialize + tools/list with the always-on queue-bound tools', async () => { + const server = createMcpServer({}) const init = await rpcCall(server, 'initialize', {}, 0) expect(init?.result).toMatchObject({ protocolVersion: '2024-11-05', @@ -52,28 +33,34 @@ describe('createMcpServer — JSON-RPC surface', () => { }) const listed = await rpcCall(server, 'tools/list', {}, 1) const names = (listed?.result as { tools: { name: string }[] }).tools.map((t) => t.name).sort() - expect(names).toEqual([ - 'delegate_code', - 'delegate_feedback', - 'delegate_research', - 'delegation_history', - 'delegation_status', - ]) + expect(names).toEqual(['delegate_feedback', 'delegation_history', 'delegation_status']) }) - it('omits delegate_code when coderDelegate is not wired', async () => { - const server = createMcpServer({ researcherDelegate: researcherStub }) - const listed = await rpcCall(server, 'tools/list', {}, 1) - const names = (listed?.result as { tools: { name: string }[] }).tools.map((t) => t.name) - expect(names).not.toContain('delegate_code') - expect(names).toContain('delegate_research') + it('registers delegate_ui_audit only when a uiAuditorDelegate is wired', async () => { + const without = await rpcCall(createMcpServer({}), 'tools/list', {}, 1) + const withoutNames = (without?.result as { tools: { name: string }[] }).tools.map((t) => t.name) + expect(withoutNames).not.toContain('delegate_ui_audit') + + const withDelegate = await rpcCall( + createMcpServer({ uiAuditorDelegate: uiAuditorStub }), + 'tools/list', + {}, + 1, + ) + const withNames = (withDelegate?.result as { tools: { name: string }[] }).tools.map( + (t) => t.name, + ) + expect(withNames).toContain('delegate_ui_audit') }) it('routes tools/call through the handler and returns structuredContent', async () => { - const server = createMcpServer({ coderDelegate: coderStub }) + const server = createMcpServer({ uiAuditorDelegate: uiAuditorStub }) const call = await rpcCall(server, 'tools/call', { - name: 'delegate_code', - arguments: { goal: 'fix bug', repoRoot: '/r' }, + name: 'delegate_ui_audit', + arguments: { + workspaceDir: '/tmp/audits/x', + routes: [{ name: 'home', url: 'https://example.com' }], + }, }) const result = call?.result as { content: { type: string; text: string }[] @@ -86,28 +73,29 @@ describe('createMcpServer — JSON-RPC surface', () => { }) it('returns -32602 on validation failures', async () => { - const server = createMcpServer({ coderDelegate: coderStub }) + const server = createMcpServer({ uiAuditorDelegate: uiAuditorStub }) const call = await rpcCall(server, 'tools/call', { - name: 'delegate_code', - arguments: { goal: '', repoRoot: '/r' }, + name: 'delegate_ui_audit', + arguments: { workspaceDir: '/tmp/x', routes: [] }, }) expect(call?.error?.code).toBe(-32602) }) it('returns -32601 for unknown tools', async () => { - const server = createMcpServer({ coderDelegate: coderStub }) + const server = createMcpServer({}) const call = await rpcCall(server, 'tools/call', { name: 'delegate_evaluation' }) expect(call?.error?.code).toBe(-32601) }) - it('drives the full lifecycle end-to-end: delegate → status → feedback → history', async () => { - const server = createMcpServer({ - coderDelegate: coderStub, - researcherDelegate: researcherStub, - }) + it('drives the full lifecycle end-to-end: delegate_ui_audit → status → feedback → history', async () => { + const server = createMcpServer({ uiAuditorDelegate: uiAuditorStub }) const created = await rpcCall(server, 'tools/call', { - name: 'delegate_research', - arguments: { question: 'who engages cpg-founders?', namespace: 'tenant-a' }, + name: 'delegate_ui_audit', + arguments: { + workspaceDir: '/tmp/audits/y', + routes: [{ name: 'home', url: 'https://example.com' }], + namespace: 'tenant-a', + }, }) const taskId = (created?.result as { structuredContent: { taskId: string } }).structuredContent .taskId @@ -127,7 +115,7 @@ describe('createMcpServer — JSON-RPC surface', () => { name: 'delegate_feedback', arguments: { refersTo: { kind: 'delegation', ref: taskId }, - rating: { score: 0.85, label: 'good', notes: 'cited the right source' }, + rating: { score: 0.85, label: 'good', notes: 'clean audit' }, by: 'agent', namespace: 'tenant-a', }, @@ -152,7 +140,7 @@ describe('createMcpServer — JSON-RPC surface', () => { describe('createMcpServer — stdio transport', () => { it('handles a single JSON-RPC line through serve() and writes a response', async () => { - const server = createMcpServer({ coderDelegate: coderStub }) + const server = createMcpServer({}) const { transport, clientWrite, clientClose, readServer } = createInProcessTransport() const servePromise = server.serve(transport) clientWrite(JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'initialize' })) @@ -162,13 +150,14 @@ describe('createMcpServer — stdio transport', () => { const responses = await readServer() expect(responses.length).toBeGreaterThanOrEqual(2) const list = responses.find((r) => r.id === 2) - expect((list?.result as { tools: unknown[] }).tools.length).toBe(4) + // Always-on: delegate_feedback, delegation_status, delegation_history. + expect((list?.result as { tools: unknown[] }).tools.length).toBe(3) clientClose() await servePromise }) it('emits a parse error for malformed JSON', async () => { - const server = createMcpServer({ coderDelegate: coderStub }) + const server = createMcpServer({}) const { transport, clientWrite, clientClose, readServer } = createInProcessTransport() const servePromise = server.serve(transport) clientWrite('{not json') diff --git a/tests/mcp/wire-contract.test.ts b/tests/mcp/wire-contract.test.ts index 1f440ff3..c681ce62 100644 --- a/tests/mcp/wire-contract.test.ts +++ b/tests/mcp/wire-contract.test.ts @@ -2,34 +2,19 @@ * Wire-contract snapshot — the FROZEN external surface of the delegation MCP. * * This test pins what an external MCP client observes, independent of any - * internal delegate/topology rewrite: the 5 delegation tool names + their + * internal delegate/topology rewrite: the queue-bound tool names + their * descriptions + input schemas (`tools/list`), and the `tools/call` response * envelope + payload keys per tool (the `{taskId, estimatedDurationMs}` kickoff * shape, the `delegation_status` / `delegation_history` payloads). Any change - * to observable output fails here — so a migration that re-homes the coder - * delegate onto the generic combinator path cannot silently break the contract. + * to observable output fails here. * * Delegates are stubbed (the wire shape, not the work, is under test). */ import { describe, expect, it } from 'vitest' -import type { CoderDelegate, ResearcherDelegate, UiAuditorDelegate } from '../../src/mcp/delegates' +import type { UiAuditorDelegate } from '../../src/mcp/delegates' import { createMcpServer, type JsonRpcResponse } from '../../src/mcp/server' -const coderStub: CoderDelegate = async () => ({ - branch: 'feat/x', - patch: 'diff --git a/x b/x\n--- a/x\n+++ b/x\n+1', - testResult: { passed: true, output: 'ok' }, - typecheckResult: { passed: true, output: 'ok' }, - diffStats: { filesChanged: 1, insertions: 1, deletions: 0 }, -}) - -const researcherStub: ResearcherDelegate = async () => ({ - items: [], - citations: [], - proposedWrites: [], -}) - const uiAuditorStub: UiAuditorDelegate = async () => ({ workspaceDir: '/ws', indexFile: 'index.md', @@ -38,11 +23,7 @@ const uiAuditorStub: UiAuditorDelegate = async () => ({ }) function fullServer() { - return createMcpServer({ - coderDelegate: coderStub, - researcherDelegate: researcherStub, - uiAuditorDelegate: uiAuditorStub, - }) + return createMcpServer({ uiAuditorDelegate: uiAuditorStub }) } async function rpc( @@ -56,83 +37,24 @@ async function rpc( type ToolList = { tools: { name: string; description: string; inputSchema: unknown }[] } +const auditArgs = { + workspaceDir: '/tmp/audits/x', + routes: [{ name: 'home', url: 'https://example.com' }], +} + describe('wire-contract — tools/list (the frozen tool names + schemas)', () => { - it('advertises exactly the 6 tools (5 delegation + the always-on queue trio)', async () => { + it('advertises exactly delegate_ui_audit + the always-on queue trio', async () => { const listed = await rpc(fullServer(), 'tools/list') const tools = (listed?.result as ToolList).tools expect(tools.map((t) => t.name).sort()).toEqual([ - 'delegate_code', 'delegate_feedback', - 'delegate_research', 'delegate_ui_audit', 'delegation_history', 'delegation_status', ]) }) - it('pins the delegate_code input schema (required fields + variants bound + config shape)', async () => { - const listed = await rpc(fullServer(), 'tools/list') - const tools = (listed?.result as ToolList).tools - const code = tools.find((t) => t.name === 'delegate_code')! - expect(code.inputSchema).toMatchInlineSnapshot(` - { - "additionalProperties": false, - "properties": { - "config": { - "additionalProperties": false, - "properties": { - "forbiddenPaths": { - "items": { - "type": "string", - }, - "type": "array", - }, - "maxDiffLines": { - "minimum": 1, - "type": "integer", - }, - "testCmd": { - "type": "string", - }, - "typecheckCmd": { - "type": "string", - }, - }, - "type": "object", - }, - "contextHint": { - "description": "Optional free-form context the coder sees in the prompt prelude.", - "type": "string", - }, - "goal": { - "description": "Natural-language description of what the coder must accomplish.", - "type": "string", - }, - "namespace": { - "description": "Multi-tenant scope (customer-id, workspace-id).", - "type": "string", - }, - "repoRoot": { - "description": "Absolute path inside the sandbox where the repo lives.", - "type": "string", - }, - "variants": { - "description": "Number of parallel coder harnesses. Default 1.", - "maximum": 8, - "minimum": 1, - "type": "integer", - }, - }, - "required": [ - "goal", - "repoRoot", - ], - "type": "object", - } - `) - }) - - it('every delegation tool exposes an object inputSchema with required keys', async () => { + it('every delegation tool exposes an object inputSchema with a non-empty description', async () => { const listed = await rpc(fullServer(), 'tools/list') const tools = (listed?.result as ToolList).tools for (const t of tools) { @@ -141,19 +63,18 @@ describe('wire-contract — tools/list (the frozen tool names + schemas)', () => expect(typeof t.description, `${t.name} description`).toBe('string') expect((t.description as string).length, `${t.name} description non-empty`).toBeGreaterThan(0) } - // The two kickoff tools name their required inputs verbatim. + // The kickoff tool names its required inputs verbatim. const required = (name: string) => (tools.find((t) => t.name === name)!.inputSchema as { required?: string[] }).required - expect(required('delegate_code')).toEqual(['goal', 'repoRoot']) - expect(required('delegate_research')).toEqual(['question', 'namespace']) + expect(required('delegate_ui_audit')).toEqual(['workspaceDir', 'routes']) }) }) describe('wire-contract — tools/call envelope + payloads', () => { - it('delegate_code returns {taskId, estimatedDurationMs} in the MCP content envelope', async () => { + it('delegate_ui_audit returns {taskId, estimatedDurationMs} in the MCP content envelope', async () => { const res = await rpc(fullServer(), 'tools/call', { - name: 'delegate_code', - arguments: { goal: 'fix the bug', repoRoot: '/repo' }, + name: 'delegate_ui_audit', + arguments: auditArgs, }) const result = res?.result as { content: { type: string; text: string }[] @@ -169,20 +90,11 @@ describe('wire-contract — tools/call envelope + payloads', () => { expect(typeof result.structuredContent.estimatedDurationMs).toBe('number') }) - it('delegate_research returns {taskId, estimatedDurationMs}', async () => { - const res = await rpc(fullServer(), 'tools/call', { - name: 'delegate_research', - arguments: { question: 'what is X?', namespace: 'tenant-a' }, - }) - const sc = (res?.result as { structuredContent: Record }).structuredContent - expect(Object.keys(sc).sort()).toEqual(['estimatedDurationMs', 'taskId']) - }) - it('delegation_status reports the queued task by taskId (status payload contract)', async () => { const server = fullServer() const kicked = await rpc(server, 'tools/call', { - name: 'delegate_code', - arguments: { goal: 'fix the bug', repoRoot: '/repo' }, + name: 'delegate_ui_audit', + arguments: auditArgs, }) const taskId = (kicked?.result as { structuredContent: { taskId: string } }).structuredContent .taskId @@ -193,17 +105,14 @@ describe('wire-contract — tools/call envelope + payloads', () => { const sc = (statusRes?.result as { structuredContent: Record }) .structuredContent expect(sc.taskId).toBe(taskId) - expect(sc.profile).toBe('coder') + expect(sc.profile).toBe('ui-auditor') expect(typeof sc.status).toBe('string') expect(typeof sc.startedAt).toBe('string') }) it('delegation_history returns an entries array', async () => { const server = fullServer() - await rpc(server, 'tools/call', { - name: 'delegate_code', - arguments: { goal: 'fix the bug', repoRoot: '/repo' }, - }) + await rpc(server, 'tools/call', { name: 'delegate_ui_audit', arguments: auditArgs }) const res = await rpc(server, 'tools/call', { name: 'delegation_history', arguments: {} }) const sc = (res?.result as { structuredContent: { delegations?: unknown } }).structuredContent expect(Array.isArray(sc.delegations)).toBe(true) diff --git a/tests/sandbox-act.test.ts b/tests/sandbox-act.test.ts index 38b8a738..300b2c54 100644 --- a/tests/sandbox-act.test.ts +++ b/tests/sandbox-act.test.ts @@ -2,7 +2,6 @@ import type { TraceEmitter } from '@tangle-network/agent-eval' import type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox' import { describe, expect, it } from 'vitest' import { type AgentRunContext, collectAgentRun, createSandboxAct } from '../src/agent' -import { DELEGATION_MCP_SERVER_KEY } from '../src/mcp/delegation-profile' import type { OutputAdapter, SandboxClient } from '../src/runtime' const BASE = { @@ -48,20 +47,20 @@ function ctx(): AgentRunContext { } describe('createSandboxAct — prod-profile eval parity', () => { - it('boots the sandbox with the composed PRODUCTION profile (delegation MCP merged when keyed)', async () => { + it('boots the sandbox with the agent profile and streams mapped events + parsed output', async () => { const { client, captured } = fakeClient(SCRIPT) const act = createSandboxAct({ baseProfile: BASE, sandboxClient: client, buildPrompt: (p: string) => `prompt:${p}`, output, - env: { TANGLE_API_KEY: 'sk' }, }) const { events, output: out } = await collectAgentRun(act('persona-1', ctx())) const profile = captured.createOpts?.backend?.profile - expect(Object.keys(profile?.mcp ?? {})).toEqual(['domain', DELEGATION_MCP_SERVER_KEY]) + // The eval profile is the agent's own profile, unchanged — no delegation MCP injected. + expect(Object.keys(profile?.mcp ?? {})).toEqual(['domain']) expect(captured.prompt).toBe('prompt:persona-1') expect(out).toBe('Hello') // text parts → text_delta, cost event → llm_call, bare result → unmapped @@ -72,14 +71,13 @@ describe('createSandboxAct — prod-profile eval parity', () => { ]) }) - it('omits the delegation MCP when no sandbox key resolves — eval profile stays a clean local profile', async () => { + it('leaves the base profile untouched when no compose overrides are given', async () => { const { client, captured } = fakeClient(SCRIPT) const act = createSandboxAct({ baseProfile: BASE, sandboxClient: client, buildPrompt: () => 'go', output, - env: {}, }) await collectAgentRun(act('p', ctx())) expect(captured.createOpts?.backend?.profile?.mcp).toEqual({ domain: BASE.mcp.domain }) @@ -92,13 +90,30 @@ describe('createSandboxAct — prod-profile eval parity', () => { sandboxClient: client, buildPrompt: (p: string) => p, output, - env: {}, compose: (p: string) => ({ systemPrompt: `augmented for ${p}` }), }) await collectAgentRun(act('alice', ctx())) expect(captured.createOpts?.backend?.profile?.prompt?.systemPrompt).toBe('augmented for alice') }) + it('merges per-persona mcpConnections over the base profile mcp map', async () => { + const { client, captured } = fakeClient(SCRIPT) + const act = createSandboxAct({ + baseProfile: BASE, + sandboxClient: client, + buildPrompt: () => 'x', + output, + compose: () => ({ + mcpConnections: { ticketing: { transport: 'stdio', command: 'node', enabled: true } }, + }), + }) + await collectAgentRun(act('p', ctx())) + expect(Object.keys(captured.createOpts?.backend?.profile?.mcp ?? {})).toEqual([ + 'domain', + 'ticketing', + ]) + }) + it('parses output from the RAW stream, including events with no RuntimeStreamEvent projection', async () => { // The result event is unmapped to the stream but MUST reach output.parse. const { client } = fakeClient([{ type: 'result', data: { finalText: 'only-raw' } }]) @@ -107,7 +122,6 @@ describe('createSandboxAct — prod-profile eval parity', () => { sandboxClient: client, buildPrompt: () => 'x', output, - env: {}, }) const { events, output: out } = await collectAgentRun(act('p', ctx())) expect(events).toEqual([]) @@ -121,7 +135,6 @@ describe('createSandboxAct — prod-profile eval parity', () => { sandboxClient: client, buildPrompt: () => 'x', output, - env: {}, }) await expect(collectAgentRun(act('p', ctx()))).rejects.toThrow('stream boom') })