diff --git a/docs/api/mcp.md b/docs/api/mcp.md
index 3325455..fd17377 100644
--- a/docs/api/mcp.md
+++ b/docs/api/mcp.md
@@ -4059,11 +4059,23 @@ Analyst kind ids to run AUTOMATICALLY when a worker settles `done` (the analyst-
  and queued for the driver to pull via `await_event`. Omit/empty = no auto-analysis (default;
  the driver can still run lenses on demand via `run_analyst`). Requires `analysts`.
 
+##### maxLiveWorkers?
+
+> `readonly` `optional` **maxLiveWorkers?**: `number`
+
+Defined in: [mcp/tools/coordination.ts:112](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L112)
+
+Hard cap on how many workers may be LIVE (spawned but not yet settled) at once. `spawn_agent`
+ counts the scope's non-terminal nodes and fails closed (`error: 'max-live-workers'`) BEFORE
+ reserving from the pool when the cap is already met — a concurrency fence on top of the
+ conserved-budget fence (the pool bounds total work; this bounds simultaneous work, e.g. live
+ sandboxes/boxes). Omit or `<= 0` = no cap (the prior behavior; the pool stays the only fence).
+
 ***
 
 ### CoordinationTools
 
-Defined in: [mcp/tools/coordination.ts:116](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L116)
+Defined in: [mcp/tools/coordination.ts:122](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L122)
 
 The supervisor-side toolbox returned by [createCoordinationTools](#createcoordinationtools): the MCP tool
 descriptors a driver `AgentProfile` calls to spawn, steer, observe, and settle workers
@@ -4077,7 +4089,7 @@ choice, steerable counterpart to the one-shot own-sandbox delegation MCP.
 
 > `readonly` **tools**: [`McpToolDescriptor`](#mcptooldescriptor)[]
 
-Defined in: [mcp/tools/coordination.ts:117](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L117)
+Defined in: [mcp/tools/coordination.ts:123](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L123)
 
 #### Methods
 
@@ -4085,7 +4097,7 @@ Defined in: [mcp/tools/coordination.ts:117](https://github.com/tangle-network/ag
 
 > **isStopped**(): `boolean`
 
-Defined in: [mcp/tools/coordination.ts:118](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L118)
+Defined in: [mcp/tools/coordination.ts:124](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L124)
 
 ###### Returns
 
@@ -4095,7 +4107,7 @@ Defined in: [mcp/tools/coordination.ts:118](https://github.com/tangle-network/ag
 
 > **stopReason**(): `string` \| `undefined`
 
-Defined in: [mcp/tools/coordination.ts:119](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L119)
+Defined in: [mcp/tools/coordination.ts:125](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L125)
 
 ###### Returns
 
@@ -4105,7 +4117,7 @@ Defined in: [mcp/tools/coordination.ts:119](https://github.com/tangle-network/ag
 
 > **settled**(): readonly [`SettledWorker`](#settledworker)[]
 
-Defined in: [mcp/tools/coordination.ts:120](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L120)
+Defined in: [mcp/tools/coordination.ts:126](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L126)
 
 ###### Returns
 
@@ -4115,7 +4127,7 @@ readonly [`SettledWorker`](#settledworker)[]
 
 > **questions**(): readonly [`QuestionRecord`](#questionrecord)[]
 
-Defined in: [mcp/tools/coordination.ts:121](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L121)
+Defined in: [mcp/tools/coordination.ts:127](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L127)
 
 ###### Returns
 
@@ -4125,7 +4137,7 @@ readonly [`QuestionRecord`](#questionrecord)[]
 
 > **history**(): readonly [`BusRecord`](runtime.md#busrecord)\<[`CoordinationEvent`](runtime.md#coordinationevent)\>[]
 
-Defined in: [mcp/tools/coordination.ts:125](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L125)
+Defined in: [mcp/tools/coordination.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L131)
 
 The full ordered log of every bus event — UP (settled / question / finding) and DOWN
  (steer / answer) — the observability audit + replay trail. Each record carries seq,
@@ -4139,7 +4151,7 @@ readonly [`BusRecord`](runtime.md#busrecord)\<[`CoordinationEvent`](runtime.md#c
 
 > **stats**(): [`BusStats`](runtime.md#busstats)
 
-Defined in: [mcp/tools/coordination.ts:127](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L127)
+Defined in: [mcp/tools/coordination.ts:133](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L133)
 
 Bus throughput counters (published / pulled / by-kind) for live dashboards.
 
@@ -4151,7 +4163,7 @@ Bus throughput counters (published / pulled / by-kind) for live dashboards.
 
 > **raiseFinding**(`finding`): `Promise`\<`void`\>
 
-Defined in: [mcp/tools/coordination.ts:131](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L131)
+Defined in: [mcp/tools/coordination.ts:137](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L137)
 
 Raise a `finding` on the bus from outside the settle hook — the seam an ONLINE detector
  (mid-run, on the worker pipe) uses to tell the driver "this worker is looping/erroring" the
@@ -7374,7 +7386,7 @@ passed in because replay-safe paths must not read `Date.now`.
 
 > **createCoordinationTools**(`opts`): [`CoordinationTools`](#coordinationtools)
 
-Defined in: [mcp/tools/coordination.ts:154](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L154)
+Defined in: [mcp/tools/coordination.ts:160](https://github.com/tangle-network/agent-runtime/blob/main/src/mcp/tools/coordination.ts#L160)
 
 Build the driver's MCP tools over a live scope.
 
diff --git a/docs/api/runtime.md b/docs/api/runtime.md
index 662cc91..b56825e 100644
--- a/docs/api/runtime.md
+++ b/docs/api/runtime.md
@@ -6929,11 +6929,20 @@ Defined in: [runtime/supervise/coordination-driver.ts:50](https://github.com/tan
 
 Per-child budget reserved from the conserved pool on each spawn.
 
+##### maxLiveWorkers?
+
+> `readonly` `optional` **maxLiveWorkers?**: `number`
+
+Defined in: [runtime/supervise/coordination-driver.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L53)
+
+Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
+ flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap.
+
 ##### systemPrompt
 
 > `readonly` **systemPrompt**: `string` \| ((`task`) => `string`)
 
-Defined in: [runtime/supervise/coordination-driver.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L53)
+Defined in: [runtime/supervise/coordination-driver.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L56)
 
 The driver's stance — a string, or built from the task (the worker-driver prompt /
  the generator). INJECTED so the prompt is a pluggable, optimizable role.
@@ -6942,7 +6951,7 @@ The driver's stance — a string, or built from the task (the worker-driver prom
 
 > `readonly` `optional` **extraTools?**: readonly `object`[]
 
-Defined in: [runtime/supervise/coordination-driver.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L58)
+Defined in: [runtime/supervise/coordination-driver.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L61)
 
 WORK tools the driver may call DIRECTLY (alongside the coordination verbs) — so the driver is
  not a pure manager but a full agent that can ACT (do simple work itself) OR SPAWN (delegate).
@@ -6953,7 +6962,7 @@ WORK tools the driver may call DIRECTLY (alongside the coordination verbs) — s
 
 > `readonly` `optional` **executeExtraTool?**: (`name`, `args`) => `Promise`\<`string` \| `null` \| `undefined`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L65)
+Defined in: [runtime/supervise/coordination-driver.ts:68](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L68)
 
 Runs an `extraTools` call. Returns a string result, or null/undefined to signal "not handled"
  so the call falls through to the coordination dispatch. Required iff `extraTools` is set.
@@ -6976,7 +6985,7 @@ Runs an `extraTools` call. Returns a string result, or null/undefined to signal
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:73](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L73)
+Defined in: [runtime/supervise/coordination-driver.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L76)
 
 Max driver turns before the loop force-finalizes on the best settled child. Default 16.
  `0` lifts the turn-COUNT cap: the loop is bounded instead by the conserved budget pool,
@@ -6987,7 +6996,7 @@ Max driver turns before the loop force-finalizes on the best settled child. Defa
 
 > `readonly` `optional` **now?**: () => `number`
 
-Defined in: [runtime/supervise/coordination-driver.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L76)
+Defined in: [runtime/supervise/coordination-driver.ts:79](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L79)
 
 Injected clock for the in-loop absolute-deadline guard — keeps the deadline check
  deterministic in tests. Defaults to `Date.now`.
@@ -7819,11 +7828,21 @@ Defined in: [runtime/supervise/supervise.ts:77](https://github.com/tangle-networ
 
 Per-child budget reserved on each spawn. Defaults to a quarter of the pool's tokens.
 
+##### maxLiveWorkers?
+
+> `readonly` `optional` **maxLiveWorkers?**: `number`
+
+Defined in: [runtime/supervise/supervise.ts:81](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L81)
+
+Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
+ flight. The conserved pool bounds TOTAL work; this bounds SIMULTANEOUS work (live boxes/
+ sandboxes a real fleet runs at once). Omit/`<= 0` = no cap (the pool stays the only fence).
+
 ##### blobs?
 
 > `readonly` `optional` **blobs?**: [`ResultBlobStore`](#resultblobstore)
 
-Defined in: [runtime/supervise/supervise.ts:79](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L79)
+Defined in: [runtime/supervise/supervise.ts:83](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L83)
 
 Worker output store. Defaults to in-memory.
 
@@ -7831,25 +7850,25 @@ Worker output store. Defaults to in-memory.
 
 > `readonly` `optional` **maxDepth?**: `number`
 
-Defined in: [runtime/supervise/supervise.ts:80](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L80)
+Defined in: [runtime/supervise/supervise.ts:84](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L84)
 
 ##### maxTurns?
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/supervise.ts:81](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L81)
+Defined in: [runtime/supervise/supervise.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L85)
 
 ##### runId?
 
 > `readonly` `optional` **runId?**: `string`
 
-Defined in: [runtime/supervise/supervise.ts:82](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L82)
+Defined in: [runtime/supervise/supervise.ts:86](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L86)
 
 ##### now?
 
 > `readonly` `optional` **now?**: () => `number`
 
-Defined in: [runtime/supervise/supervise.ts:83](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L83)
+Defined in: [runtime/supervise/supervise.ts:87](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L87)
 
 ###### Returns
 
@@ -7859,7 +7878,7 @@ Defined in: [runtime/supervise/supervise.ts:83](https://github.com/tangle-networ
 
 > `readonly` `optional` **allowedModels?**: readonly `string`[]
 
-Defined in: [runtime/supervise/supervise.ts:87](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L87)
+Defined in: [runtime/supervise/supervise.ts:91](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L91)
 
 Restrict the run to this subset of models. When set, every configured model — the
  supervisor router model, the profile's model, and the backend's model — must be a member,
@@ -7936,11 +7955,21 @@ Defined in: [runtime/supervise/supervisor-agent.ts:52](https://github.com/tangle
 
 Per-child budget reserved from the conserved pool on each spawn.
 
+##### maxLiveWorkers?
+
+> `readonly` `optional` **maxLiveWorkers?**: `number`
+
+Defined in: [runtime/supervise/supervisor-agent.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L56)
+
+Hard cap on simultaneously-LIVE workers across both arms — `spawn_agent` fails closed once
+ this many are in flight (a concurrency fence on top of the conserved-pool fence; bounds live
+ boxes/sandboxes, not total work). Omit/`<= 0` = no cap.
+
 ##### router?
 
 > `readonly` `optional` **router?**: [`RouterConfig`](#routerconfig)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:54](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L54)
+Defined in: [runtime/supervise/supervisor-agent.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L58)
 
 Router substrate for a router-brained supervisor (`harness` null). The profile's model wins.
 
@@ -7948,7 +7977,7 @@ Router substrate for a router-brained supervisor (`harness` null). The profile's
 
 > `readonly` `optional` **brain?**: [`ToolLoopChat`](#toolloopchat)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L56)
+Defined in: [runtime/supervise/supervisor-agent.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L60)
 
 Inject the brain directly (tests / advanced) instead of resolving `routerBrain` from the profile.
 
@@ -7956,7 +7985,7 @@ Inject the brain directly (tests / advanced) instead of resolving `routerBrain`
 
 > `readonly` `optional` **driveHarness?**: [`DriveHarness`](#driveharness-1)
 
-Defined in: [runtime/supervise/supervisor-agent.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L58)
+Defined in: [runtime/supervise/supervisor-agent.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L62)
 
 Required for a sandboxed-harness supervisor (`harness` set): runs the harness as the driver.
 
@@ -7964,7 +7993,7 @@ Required for a sandboxed-harness supervisor (`harness` set): runs the harness as
 
 > `readonly` `optional` **extraTools?**: readonly `object`[]
 
-Defined in: [runtime/supervise/supervisor-agent.ts:61](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L61)
+Defined in: [runtime/supervise/supervisor-agent.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L65)
 
 WORK tools the supervisor may call DIRECTLY (router arm) — so it can do simple work ITSELF and
  only delegate when it needs parallelism. Pair with `executeExtraTool`.
@@ -7973,7 +8002,7 @@ WORK tools the supervisor may call DIRECTLY (router arm) — so it can do simple
 
 > `readonly` `optional` **executeExtraTool?**: (`name`, `args`) => `Promise`\<`string` \| `null` \| `undefined`\>
 
-Defined in: [runtime/supervise/supervisor-agent.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L67)
+Defined in: [runtime/supervise/supervisor-agent.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L71)
 
 Runs an `extraTools` call; null/undefined falls through to the coordination dispatch.
 
@@ -7995,7 +8024,7 @@ Runs an `extraTools` call; null/undefined falls through to the coordination disp
 
 > `readonly` `optional` **maxTurns?**: `number`
 
-Defined in: [runtime/supervise/supervisor-agent.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L71)
+Defined in: [runtime/supervise/supervisor-agent.ts:75](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L75)
 
 ***
 
@@ -8932,7 +8961,7 @@ Default impl returns false for every settlement (flat — never widens).
 
 ### WorktreeCliExecutorOptions
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:43](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L43)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:46](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L46)
 
 **`Experimental`**
 
@@ -8942,7 +8971,7 @@ Defined in: [runtime/supervise/worktree-cli-executor.ts:43](https://github.com/t
 
 > **repoRoot**: `string`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:45](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L45)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:48](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L48)
 
 **`Experimental`**
 
@@ -8952,7 +8981,7 @@ Absolute path to the git checkout the worktree is cut from.
 
 > **profile**: `AgentProfile`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:47](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L47)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L50)
 
 **`Experimental`**
 
@@ -8962,7 +8991,7 @@ The SUPERVISOR-AUTHORED profile (the §1.5 payload: systemPrompt + model).
 
 > **harness**: [`LocalHarness`](mcp.md#localharness)
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:49](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L49)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L52)
 
 **`Experimental`**
 
@@ -8972,7 +9001,7 @@ Which local harness CLI drives this leaf (`claude` | `codex` | `opencode`).
 
 > **taskPrompt**: `string`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:51](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L51)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:54](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L54)
 
 **`Experimental`**
 
@@ -8982,7 +9011,7 @@ The per-task instruction handed to the harness (composed under the system prompt
 
 > `optional` **runId?**: `string`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:53](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L53)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:56](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L56)
 
 **`Experimental`**
 
@@ -8992,7 +9021,7 @@ Unique id for the worktree path + branch. Defaults to a fresh UUID.
 
 > `optional` **baseRef?**: `string`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:55](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L55)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:58](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L58)
 
 **`Experimental`**
 
@@ -9002,7 +9031,7 @@ Override the base ref the worktree is cut from (default `HEAD`).
 
 > `optional` **harnessTimeoutMs?**: `number`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:57](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L57)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L60)
 
 **`Experimental`**
 
@@ -9012,7 +9041,7 @@ Wall-clock cap per harness subprocess (ms). Default 5 min (the `runLocalHarness`
 
 > `optional` **testCmd?**: `string`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:62](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L62)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:65](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L65)
 
 **`Experimental`**
 
@@ -9023,7 +9052,7 @@ Its exit code becomes `artifact.checks.tests.passed`. Omit to skip (no signal de
 
 > `optional` **typecheckCmd?**: `string`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:64](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L64)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:67](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L67)
 
 **`Experimental`**
 
@@ -9033,7 +9062,7 @@ Shell command run in the live worktree to derive the typecheck-PASS signal (e.g.
 
 > `optional` **checkTimeoutMs?**: `number`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:66](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L66)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:69](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L69)
 
 **`Experimental`**
 
@@ -9043,7 +9072,7 @@ Wall-clock cap per verification command (ms). Default = `harnessTimeoutMs` or 5
 
 > `optional` **runGit?**: [`GitRunner`](mcp.md#gitrunner)
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:68](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L68)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:71](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L71)
 
 **`Experimental`**
 
@@ -9053,7 +9082,7 @@ Test seam — inject a git runner so unit tests drive the worktree helpers witho
 
 > `optional` **runHarness?**: (`options`) => `Promise`\<[`LocalHarnessResult`](mcp.md#localharnessresult)\>
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:70](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L70)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:73](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L73)
 
 **`Experimental`**
 
@@ -9091,13 +9120,26 @@ Does NOT throw when:
 
 > `optional` **runCommand?**: `WorktreeCheckRunner`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:73](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L73)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:76](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L76)
 
 **`Experimental`**
 
 Test seam — inject the verification-command runner so unit tests script test/typecheck
  outcomes without spawning a real shell. Defaults to a `/bin/sh -c` spawn in the worktree.
 
+##### budgetExempt?
+
+> `optional` **budgetExempt?**: `boolean`
+
+Defined in: [runtime/supervise/worktree-cli-executor.ts:83](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L83)
+
+**`Experimental`**
+
+Exclude this leaf's spend from the conserved pool + equal-k arms. Defaults to `true` because a
+coding-harness CLI does not surface token usage, so metering it would record a fabricated zero
+(the no-silent-zeros rule forbids that). Set `false` ONLY for a harness that surfaces real
+token/usd usage worth metering — the executor would then debit the (real) spend it captures.
+
 ***
 
 ### AuthoredHarness
@@ -12072,7 +12114,7 @@ The conserved spend incurred before the run failed — real cost is paid even wh
 
 > **WorktreePatchArtifact** = `WorktreeHarnessResult`
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:40](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L40)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:43](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L43)
 
 Terminal artifact of one worktree-CLI run — the canonical worktree-harness result (the captured
  diff + the harness's run record + the derived checks).
@@ -14312,7 +14354,7 @@ executor has produced its output. The inner `score` is preserved; only `valid` i
 
 > **driverAgent**(`opts`): [`Agent`](#agent)\<`unknown`, `unknown`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:110](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L110)
+Defined in: [runtime/supervise/coordination-driver.ts:113](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L113)
 
 Build the intelligent recursive driver. Its `act` is the LLM tool-loop; spawn it as a
 `driverChild` (`driver-executor.ts`) to run it inside a nested scope, recursively.
@@ -14333,7 +14375,7 @@ Build the intelligent recursive driver. Its `act` is the LLM tool-loop; spawn it
 
 > **finalizeBestDelivered**(`settled`, `blobs`): `Promise`\<`unknown`\>
 
-Defined in: [runtime/supervise/coordination-driver.ts:263](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L263)
+Defined in: [runtime/supervise/coordination-driver.ts:267](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/coordination-driver.ts#L267)
 
 Keep-best finalize under the completion-oracle: return the highest-scoring DELIVERED child's
  output (settled `done` AND `valid` — its deliverable check passed). Returns undefined when no
@@ -14387,6 +14429,13 @@ Stand up the coordination MCP over a live scope. The HOST address is `127.0.0.1`
 
 [`Budget`](#budget-10)
 
+###### maxLiveWorkers?
+
+`number`
+
+Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
+ flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap.
+
 ###### port?
 
 `number`
@@ -14745,7 +14794,7 @@ Build the worker seam from a backend (WHERE workers run) + an optional completio
 
 > **supervise**(`profile`, `task`, `opts`): `Promise`\<[`SupervisedResult`](#supervisedresult)\<`unknown`\>\>
 
-Defined in: [runtime/supervise/supervise.ts:98](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L98)
+Defined in: [runtime/supervise/supervise.ts:102](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervise.ts#L102)
 
 #### Parameters
 
@@ -14771,7 +14820,7 @@ Defined in: [runtime/supervise/supervise.ts:98](https://github.com/tangle-networ
 
 > **supervisorAgent**(`profile`, `deps`): [`Agent`](#agent)\<`unknown`, `unknown`\>
 
-Defined in: [runtime/supervise/supervisor-agent.ts:74](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L74)
+Defined in: [runtime/supervise/supervisor-agent.ts:78](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/supervisor-agent.ts#L78)
 
 #### Parameters
 
@@ -14957,7 +15006,7 @@ Collect the source's spans and run the agent-eval batch analyzers over them unde
 
 > **createWorktreeCliExecutor**(`options`): [`Executor`](#executor)\<`WorktreeHarnessResult`\>
 
-Defined in: [runtime/supervise/worktree-cli-executor.ts:85](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L85)
+Defined in: [runtime/supervise/worktree-cli-executor.ts:95](https://github.com/tangle-network/agent-runtime/blob/main/src/runtime/supervise/worktree-cli-executor.ts#L95)
 
 **`Experimental`**
 
diff --git a/src/mcp/tools/coordination.ts b/src/mcp/tools/coordination.ts
index aca6326..fc0c066 100644
--- a/src/mcp/tools/coordination.ts
+++ b/src/mcp/tools/coordination.ts
@@ -104,6 +104,12 @@ export interface CoordinationToolsOptions {
    *  and queued for the driver to pull via `await_event`. Omit/empty = no auto-analysis (default;
    *  the driver can still run lenses on demand via `run_analyst`). Requires `analysts`. */
   readonly analyzeOnSettle?: ReadonlyArray<string>
+  /** Hard cap on how many workers may be LIVE (spawned but not yet settled) at once. `spawn_agent`
+   *  counts the scope's non-terminal nodes and fails closed (`error: 'max-live-workers'`) BEFORE
+   *  reserving from the pool when the cap is already met — a concurrency fence on top of the
+   *  conserved-budget fence (the pool bounds total work; this bounds simultaneous work, e.g. live
+   *  sandboxes/boxes). Omit or `<= 0` = no cap (the prior behavior; the pool stays the only fence). */
+  readonly maxLiveWorkers?: number
 }
 
 /**
@@ -375,6 +381,15 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
     })
   }
 
+  // Count workers that are LIVE — spawned but not yet settled — off the scope's in-memory live set
+  // (O(live), synchronous). The terminal statuses are done/failed/cancelled; everything else
+  // (pending/acquiring/running) is still in flight. This is the concurrency fence's input.
+  const maxLiveWorkers = opts.maxLiveWorkers
+  const liveWorkerCount = (): number =>
+    opts.scope.view.nodes.filter(
+      (n) => n.status !== 'done' && n.status !== 'failed' && n.status !== 'cancelled',
+    ).length
+
   const tools: McpToolDescriptor[] = [
     {
       name: 'spawn_agent',
@@ -382,7 +397,9 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
         'Start a worker the driver will drive. `profile` is the worker or another driver; ' +
         '`task` is what it should do. Reserves budget from the conserved pool and fails closed. ' +
         'Pass an optional `budget` (per-field) to give a hard sub-task more than the default — it ' +
-        'merges over the per-worker default; the conserved pool is still the hard fence.',
+        'merges over the per-worker default; the conserved pool is still the hard fence. When a ' +
+        'max-live-workers cap is set it also fails closed (`error: "max-live-workers"`) while that ' +
+        'many workers are still in flight — settle or steer one before spawning another.',
       inputSchema: {
         type: 'object',
         properties: {
@@ -406,6 +423,14 @@ export function createCoordinationTools(opts: CoordinationToolsOptions): Coordin
       },
       handler: (raw) => {
         const a = obj(raw)
+        // Concurrency fence FIRST — fail closed before reserving budget, so a rejected spawn never
+        // touches the pool. The conserved pool bounds TOTAL work; this bounds SIMULTANEOUS work.
+        if (
+          maxLiveWorkers !== undefined &&
+          maxLiveWorkers > 0 &&
+          liveWorkerCount() >= maxLiveWorkers
+        )
+          return Promise.resolve({ error: 'max-live-workers' as const })
         const agent = opts.makeWorkerAgent(a.profile)
         const budget =
           a.budget === undefined ? opts.perWorker : mergeBudget(opts.perWorker, a.budget)
diff --git a/src/runtime/supervise/coordination-driver.ts b/src/runtime/supervise/coordination-driver.ts
index 9f011a2..dcefd02 100644
--- a/src/runtime/supervise/coordination-driver.ts
+++ b/src/runtime/supervise/coordination-driver.ts
@@ -48,6 +48,9 @@ export interface DriverAgentOptions {
   readonly makeWorkerAgent: MakeWorkerAgent
   /** Per-child budget reserved from the conserved pool on each spawn. */
   readonly perWorker: Budget
+  /** Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
+   *  flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap. */
+  readonly maxLiveWorkers?: number
   /** The driver's stance — a string, or built from the task (the worker-driver prompt /
    *  the generator). INJECTED so the prompt is a pluggable, optimizable role. */
   readonly systemPrompt: string | ((task: unknown) => string)
@@ -151,6 +154,7 @@ export function driverAgent(opts: DriverAgentOptions): Agent<unknown, unknown> {
         blobs: opts.blobs,
         makeWorkerAgent: opts.makeWorkerAgent,
         perWorker: opts.perWorker,
+        ...(opts.maxLiveWorkers !== undefined ? { maxLiveWorkers: opts.maxLiveWorkers } : {}),
       })
       const byName = new Map<string, McpToolDescriptor>(coord.tools.map((t) => [t.name, t]))
       const toolSpecs: ToolSpec[] = [
diff --git a/src/runtime/supervise/coordination-mcp.ts b/src/runtime/supervise/coordination-mcp.ts
index 1bcaa59..5e78732 100644
--- a/src/runtime/supervise/coordination-mcp.ts
+++ b/src/runtime/supervise/coordination-mcp.ts
@@ -53,6 +53,9 @@ export async function serveCoordinationMcp(opts: {
   blobs: ResultBlobStore
   makeWorkerAgent: MakeWorkerAgent
   perWorker: Budget
+  /** Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
+   *  flight (a concurrency fence on top of the conserved-pool fence). Omit/`<= 0` = no cap. */
+  maxLiveWorkers?: number
   port?: number
   host?: string
   /** Trace-analyst lenses the driver can run (`run_analyst`) or auto-fire on settle. */
@@ -68,6 +71,7 @@ export async function serveCoordinationMcp(opts: {
     blobs: opts.blobs,
     makeWorkerAgent: opts.makeWorkerAgent,
     perWorker: opts.perWorker,
+    ...(opts.maxLiveWorkers !== undefined ? { maxLiveWorkers: opts.maxLiveWorkers } : {}),
     ...(opts.analysts ? { analysts: opts.analysts } : {}),
     ...(opts.analyzeOnSettle ? { analyzeOnSettle: opts.analyzeOnSettle } : {}),
     ...(opts.onEvent ? { onEvent: opts.onEvent } : {}),
diff --git a/src/runtime/supervise/supervise.ts b/src/runtime/supervise/supervise.ts
index 63e0bba..6e08f43 100644
--- a/src/runtime/supervise/supervise.ts
+++ b/src/runtime/supervise/supervise.ts
@@ -75,6 +75,10 @@ export interface SuperviseOptions {
   ) => Promise<string | null | undefined>
   /** Per-child budget reserved on each spawn. Defaults to a quarter of the pool's tokens. */
   readonly perWorker?: Budget
+  /** Hard cap on simultaneously-LIVE workers — `spawn_agent` fails closed once this many are in
+   *  flight. The conserved pool bounds TOTAL work; this bounds SIMULTANEOUS work (live boxes/
+   *  sandboxes a real fleet runs at once). Omit/`<= 0` = no cap (the pool stays the only fence). */
+  readonly maxLiveWorkers?: number
   /** Worker output store. Defaults to in-memory. */
   readonly blobs?: ResultBlobStore
   readonly maxDepth?: number
@@ -124,6 +128,7 @@ export function supervise(profile: SupervisorProfile, task: unknown, opts: Super
     blobs,
     makeWorkerAgent,
     perWorker,
+    ...(opts.maxLiveWorkers !== undefined ? { maxLiveWorkers: opts.maxLiveWorkers } : {}),
     ...(opts.router ? { router: opts.router } : {}),
     ...(opts.brain ? { brain: opts.brain } : {}),
     ...(opts.driveHarness ? { driveHarness: opts.driveHarness } : {}),
diff --git a/src/runtime/supervise/supervisor-agent.ts b/src/runtime/supervise/supervisor-agent.ts
index 59ef4b3..a67241d 100644
--- a/src/runtime/supervise/supervisor-agent.ts
+++ b/src/runtime/supervise/supervisor-agent.ts
@@ -50,6 +50,10 @@ export interface SupervisorAgentDeps {
   readonly makeWorkerAgent: MakeWorkerAgent
   /** Per-child budget reserved from the conserved pool on each spawn. */
   readonly perWorker: Budget
+  /** Hard cap on simultaneously-LIVE workers across both arms — `spawn_agent` fails closed once
+   *  this many are in flight (a concurrency fence on top of the conserved-pool fence; bounds live
+   *  boxes/sandboxes, not total work). Omit/`<= 0` = no cap. */
+  readonly maxLiveWorkers?: number
   /** Router substrate for a router-brained supervisor (`harness` null). The profile's model wins. */
   readonly router?: RouterConfig
   /** Inject the brain directly (tests / advanced) instead of resolving `routerBrain` from the profile. */
@@ -90,6 +94,7 @@ export function supervisorAgent(
       makeWorkerAgent: deps.makeWorkerAgent,
       perWorker: deps.perWorker,
       systemPrompt,
+      ...(deps.maxLiveWorkers !== undefined ? { maxLiveWorkers: deps.maxLiveWorkers } : {}),
       ...(deps.extraTools ? { extraTools: deps.extraTools } : {}),
       ...(deps.executeExtraTool ? { executeExtraTool: deps.executeExtraTool } : {}),
       ...(deps.maxTurns !== undefined ? { maxTurns: deps.maxTurns } : {}),
@@ -111,6 +116,7 @@ export function supervisorAgent(
         blobs: deps.blobs,
         makeWorkerAgent: deps.makeWorkerAgent,
         perWorker: deps.perWorker,
+        ...(deps.maxLiveWorkers !== undefined ? { maxLiveWorkers: deps.maxLiveWorkers } : {}),
       })
       try {
         await driveHarness({ profile, task, scope, coordinationMcpUrl: mcp.url })
diff --git a/src/runtime/supervise/worktree-cli-executor.ts b/src/runtime/supervise/worktree-cli-executor.ts
index 340950d..c70701e 100644
--- a/src/runtime/supervise/worktree-cli-executor.ts
+++ b/src/runtime/supervise/worktree-cli-executor.ts
@@ -13,9 +13,12 @@
  * result onto the `Executor` port (artifact + spend) and owns the teardown point. The §1.5 payload
  * (authored systemPrompt + model) reaches the harness inside the core, not here.
  *
- * Token accounting: a harness CLI does not surface usage, so this executor is `budgetExempt: true`
- * — its spend is NOT metered against the conserved pool and its iterations are EXCLUDED from the
- * equal-k arms by construction (mirrors `cliExecutor`).
+ * Token accounting: a harness CLI does not surface usage, so this executor defaults to
+ * `budgetExempt: true` — its spend is NOT metered against the conserved pool and its iterations are
+ * EXCLUDED from the equal-k arms by construction (mirrors `cliExecutor`). The exemption is an
+ * explicit, documented `budgetExempt` option rather than a buried hardcode: set it `false` ONLY for
+ * a harness that genuinely surfaces real token/usd usage to meter into the pool — otherwise the
+ * executor would meter a fabricated zero, which the no-silent-zeros rule forbids.
  */
 
 import { randomUUID } from 'node:crypto'
@@ -71,6 +74,13 @@ export interface WorktreeCliExecutorOptions {
   /** Test seam — inject the verification-command runner so unit tests script test/typecheck
    *  outcomes without spawning a real shell. Defaults to a `/bin/sh -c` spawn in the worktree. */
   runCommand?: WorktreeCheckRunner
+  /**
+   * Exclude this leaf's spend from the conserved pool + equal-k arms. Defaults to `true` because a
+   * coding-harness CLI does not surface token usage, so metering it would record a fabricated zero
+   * (the no-silent-zeros rule forbids that). Set `false` ONLY for a harness that surfaces real
+   * token/usd usage worth metering — the executor would then debit the (real) spend it captures.
+   */
+  budgetExempt?: boolean
 }
 
 /**
@@ -97,14 +107,17 @@ export function createWorktreeCliExecutor(
 
   const runId = options.runId ?? randomUUID()
   const controller = new AbortController()
+  // Default true: a harness CLI cannot account tokens, so the honest value is "exclude from the
+  // pool + equal-k" rather than meter a fabricated zero. An explicit `false` opts a real-usage
+  // harness into metering the spend it captures.
+  const budgetExempt = options.budgetExempt ?? true
 
   let run: WorktreeHarnessRun | undefined
   let artifact: ExecutorResult<WorktreePatchArtifact> | undefined
 
   return {
     runtime: 'cli',
-    // A harness CLI cannot account tokens — exclude it from the conserved pool + equal-k.
-    budgetExempt: true,
+    budgetExempt,
     async execute(_task, signal): Promise<ExecutorResult<WorktreePatchArtifact>> {
       const linked = linkSignals(signal, controller.signal)
       const started = Date.now()
@@ -130,7 +143,9 @@ export function createWorktreeCliExecutor(
 
       const spent: Spend = {
         iterations: 1,
-        // budgetExempt: spend is recorded zero (not metered), never a fabricated cost.
+        // The worktree-harness core surfaces no token/usd usage, so tokens/usd are a genuine zero
+        // (NOT a fabricated cost). When budgetExempt is true the pool ignores this spend entirely;
+        // when explicitly false the scope debits exactly this captured spend — the real iteration.
         tokens: zeroTokenUsage(),
         usd: 0,
         ms: Date.now() - started,
diff --git a/tests/loops/coordination.test.ts b/tests/loops/coordination.test.ts
index 2182604..c5326fe 100644
--- a/tests/loops/coordination.test.ts
+++ b/tests/loops/coordination.test.ts
@@ -82,6 +82,64 @@ describe('coordination tools', () => {
     })
   })
 
+  it('spawn_agent fails closed at the maxLiveWorkers cap WITHOUT touching the pool', async () => {
+    // A scope whose live (non-terminal) node set is driven by the spawns we make: each successful
+    // spawn appends a `running` node; nothing settles. The conserved pool always admits, so the
+    // ONLY thing that can stop a spawn here is the concurrency cap.
+    const live: Array<{ status: string }> = []
+    const spawns: unknown[] = []
+    const cappedScope = {
+      spawn: (_a: unknown, _t: unknown, opts: { label: string }) => {
+        spawns.push(opts)
+        live.push({ status: 'running' })
+        return {
+          ok: true as const,
+          handle: {
+            id: `w${live.length - 1}`,
+            label: opts.label,
+            status: 'running' as const,
+            abort() {},
+          },
+        }
+      },
+      next: async () => null,
+      send: () => false,
+      get view() {
+        return { root: 'root', nodes: live, inFlight: live.length }
+      },
+      budget: { tokensLeft: 1e9, usdLeft: 0, deadlineMs: 0, reservedTokens: 0 },
+      signal: new AbortController().signal,
+    } as unknown as Scope<unknown>
+
+    const tb = createCoordinationTools({
+      scope: cappedScope,
+      blobs,
+      makeWorkerAgent,
+      perWorker: { maxIterations: 1, maxTokens: 10 },
+      maxLiveWorkers: 2,
+    })
+    const spawn = () => tool(tb, 'spawn_agent').handler({ profile: {}, task: 'go' })
+    expect(await spawn()).toEqual({ workerId: 'w0' })
+    expect(await spawn()).toEqual({ workerId: 'w1' })
+    // The 2 live workers fill the cap → the 3rd fails closed BEFORE scope.spawn is called.
+    expect(await spawn()).toEqual({ error: 'max-live-workers' })
+    expect(spawns).toHaveLength(2)
+    // A settled worker frees a slot — mark one terminal and the next spawn admits again.
+    live[0]!.status = 'done'
+    expect(await spawn()).toEqual({ workerId: 'w2' })
+
+    // No cap (omitted) → the pool stays the only fence; the same scope admits past the prior cap.
+    const uncapped = createCoordinationTools({
+      scope: cappedScope,
+      blobs,
+      makeWorkerAgent,
+      perWorker: { maxIterations: 1, maxTokens: 10 },
+    })
+    expect(await tool(uncapped, 'spawn_agent').handler({ profile: {}, task: 'go' })).toEqual({
+      workerId: 'w3',
+    })
+  })
+
   it('spawn_agent reserves the per-worker default when no budget is given', async () => {
     const { scope, spawns } = mockScope()
     const tb = createCoordinationTools({
diff --git a/tests/runtime/worktree-cli-executor.test.ts b/tests/runtime/worktree-cli-executor.test.ts
index c7aa922..80f5bc5 100644
--- a/tests/runtime/worktree-cli-executor.test.ts
+++ b/tests/runtime/worktree-cli-executor.test.ts
@@ -169,7 +169,7 @@ describe('createWorktreeCliExecutor', () => {
     expect(state.worktreesRemoved).toEqual(state.worktreesCreated)
   })
 
-  it('is budgetExempt (a harness CLI cannot account tokens)', () => {
+  it('is budgetExempt by default (a harness CLI cannot account tokens)', () => {
     const exec = createWorktreeCliExecutor({
       repoRoot: '/workspace',
       profile: authoredProfile,
@@ -182,6 +182,19 @@ describe('createWorktreeCliExecutor', () => {
     expect(exec.budgetExempt).toBe(true)
   })
 
+  it('budgetExempt: false opts the leaf into metering (explicit, not a buried hardcode)', () => {
+    const exec = createWorktreeCliExecutor({
+      repoRoot: '/workspace',
+      profile: authoredProfile,
+      harness: 'claude',
+      taskPrompt: 'x',
+      budgetExempt: false,
+      runGit: makeFakeGit(freshGitState()),
+      runHarness: vi.fn(),
+    })
+    expect(exec.budgetExempt).toBe(false)
+  })
+
   it('resultArtifact() before execute() resolves throws (fail loud, no fabricated artifact)', () => {
     const exec = createWorktreeCliExecutor({
       repoRoot: '/workspace',