From 2020c0880f9194cc49ce3a0974b22ef2fb78c26a Mon Sep 17 00:00:00 2001
From: Austin Tucker <220209011+austinkennethtucker@users.noreply.github.com>
Date: Mon, 22 Jun 2026 12:53:39 -0400
Subject: [PATCH 1/2] Allow opt-in concurrent Codex runs

---
 .env.example                    |  9 ++++-
 README.md                       | 19 +++++----
 src/dashboard_static/app.js     |  4 +-
 src/dashboard_static/index.html |  8 ++++
 src/server.py                   | 55 ++++++++++++++-----------
 tests/test_server.py            | 71 +++++++++++++++++++++++++++++++++
 6 files changed, 133 insertions(+), 33 deletions(-)

diff --git a/.env.example b/.env.example
index 308f518..2f4a0d4 100644
--- a/.env.example
+++ b/.env.example
@@ -20,9 +20,14 @@ MAX_REQUEST_BODY_BYTES=262144
 MAX_MESSAGES=32
 MAX_TOTAL_TEXT_CHARS=80000
 
-# Local admission queue for short bursts while preserving one active Codex run.
+# Local admission queue for short bursts while all Codex run slots are busy.
 # 0 fails fast with wrapper_busy 429. Values above 0 wait up to that many
-# seconds for the active run to finish. The app clamps this to 0-5 seconds.
+# seconds for a run slot to open. The app clamps this to 0-5 seconds.
 QUEUE_WAIT_SECONDS=0
+
+# Optional provider-side parallelism. Keep 1 unless local wrapper_busy 429s are
+# the bottleneck and the signed-in Codex account tolerates two concurrent CLI
+# executions. The app clamps this to 1-2.
+MAX_CONCURRENT_CODEX_RUNS=1
 CORS_ALLOWED_ORIGINS=
 LOG_LEVEL=INFO
diff --git a/README.md b/README.md
index ff0ad64..4a0ab3a 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,8 @@ project.
 - `GET /v1/models` with the local model alias `codex-cli-default`.
 - `POST /v1/chat/completions` for text-only chat messages.
 - Non-streaming responses and final-only SSE streaming.
-- One active Codex execution at a time, with an optional short local wait queue.
+- One active Codex execution by default, with an optional second local execution
+  slot and short local wait queue.
 - A local operator dashboard at `http://127.0.0.1:8320/dashboard/`.
 
 ## What It Does Not Provide
@@ -228,8 +229,11 @@ For Obsidian LLM Wiki, use:
 required for the inspected OpenAI-compatible provider path.
 
 If near-simultaneous local requests collide, set `QUEUE_WAIT_SECONDS=2` or `3`
-in `.env` and restart the wrapper. The wrapper still permits only one active
-Codex run.
+in `.env` and restart the wrapper. If the collisions are confirmed
+`wrapper_busy` responses rather than upstream rate limits, you can also set
+`MAX_CONCURRENT_CODEX_RUNS=2` to permit a second local Codex CLI execution.
+Keep it at `1` if the signed-in Codex account starts returning
+`upstream_rate_limit`.
 
 ## Configuration
 
@@ -245,6 +249,7 @@ Most configuration lives in `.env`, copied from `.env.example`.
 | `MAX_MESSAGES` | Maximum number of chat messages. |
 | `MAX_TOTAL_TEXT_CHARS` | Maximum total text across messages. |
 | `QUEUE_WAIT_SECONDS` | Short local wait queue for bursty clients. Values are clamped to `0-5`. |
+| `MAX_CONCURRENT_CODEX_RUNS` | Number of provider-side Codex executions allowed at once. Values are clamped to `1-2`; default `1`. |
 | `CORS_ALLOWED_ORIGINS` | Optional comma-separated explicit origins. Requests without `Origin` are allowed. |
 | `LOG_LEVEL` | Python wrapper log level. |
 
@@ -360,11 +365,11 @@ docker compose restart
   `500000`, and restart.
 - `413` with `Request body too large`: increase `MAX_REQUEST_BODY_BYTES`, up to
   `2000000`, and restart.
-- `429` with `code: "wrapper_busy"`: wait for the active request to finish,
-  reduce client concurrency to `1`, increase batch delay, or set a small
-  `QUEUE_WAIT_SECONDS`.
+- `429` with `code: "wrapper_busy"`: wait for active requests to finish,
+  reduce client concurrency to `1`, increase batch delay, set a small
+  `QUEUE_WAIT_SECONDS`, or opt into `MAX_CONCURRENT_CODEX_RUNS=2`.
 - `429` with `code: "upstream_rate_limit"`: the signed-in upstream account is
-  rate limited; wait and retry later.
+  rate limited; wait and retry later, and keep `MAX_CONCURRENT_CODEX_RUNS=1`.
 - `502` or `/healthz` returning `503`: check
   `docker exec -it codex-cli-provider codex login status`, then re-run device
   login if needed.
diff --git a/src/dashboard_static/app.js b/src/dashboard_static/app.js
index 9aca35d..be60da2 100644
--- a/src/dashboard_static/app.js
+++ b/src/dashboard_static/app.js
@@ -78,7 +78,7 @@ function renderStatus(data) {
 
   setText("#runner-value", runner.busy ? "Busy" : runner.ready ? "Ready" : "Unready");
   setClass("#runner-value", runner.ready && !runner.busy ? "ok-text" : runner.busy ? "warn-text" : "bad-text");
-  setText("#runner-detail", data.provider.modelAlias);
+  setText("#runner-detail", `${runner.activeRuns}/${runner.maxConcurrentRuns} active - ${data.provider.modelAlias}`);
 
   setText("#event-value", String(events.total));
   setText("#event-detail", `${events.errors} errors, p95 ${events.p95DurationMs ?? "-"} ms`);
@@ -86,6 +86,8 @@ function renderStatus(data) {
   setText("#limit-body", formatBytes(limits.maxBodyBytes));
   setText("#limit-text", `${limits.maxTotalTextChars.toLocaleString()} chars`);
   setText("#limit-timeout", `${limits.requestTimeoutSeconds}s`);
+  setText("#limit-concurrency", `${limits.maxConcurrentRuns} run${limits.maxConcurrentRuns === 1 ? "" : "s"}`);
+  setText("#limit-queue", `${limits.queueWaitSeconds}s`);
   setText("#last-refresh", `Updated ${new Date().toLocaleTimeString()}`);
 }
 
diff --git a/src/dashboard_static/index.html b/src/dashboard_static/index.html
index 231f055..69dafaf 100644
--- a/src/dashboard_static/index.html
+++ b/src/dashboard_static/index.html
@@ -86,6 +86,14 @@ <h2>Runtime Limits</h2>
               <dt>Timeout</dt>
               <dd id="limit-timeout">-</dd>
             </div>
+            <div>
+              <dt>Runs</dt>
+              <dd id="limit-concurrency">-</dd>
+            </div>
+            <div>
+              <dt>Queue</dt>
+              <dd id="limit-queue">-</dd>
+            </div>
           </dl>
         </article>
 
diff --git a/src/server.py b/src/server.py
index 7cb82f0..68cbc62 100644
--- a/src/server.py
+++ b/src/server.py
@@ -145,6 +145,7 @@ class AppSettings:
     max_total_text_chars: int = 80_000
     codex_request_timeout_seconds: int = 180
     queue_wait_seconds: float = 0.0
+    max_concurrent_codex_runs: int = 1
     dashboard_enabled: bool = True
 
     def __post_init__(self) -> None:
@@ -156,6 +157,7 @@ def __post_init__(self) -> None:
         self.max_total_text_chars = min(max(int(self.max_total_text_chars), 1), 500_000)
         self.codex_request_timeout_seconds = min(max(int(self.codex_request_timeout_seconds), 5), 900)
         self.queue_wait_seconds = min(max(float(self.queue_wait_seconds), 0.0), 5.0)
+        self.max_concurrent_codex_runs = min(max(int(self.max_concurrent_codex_runs), 1), 2)
 
     @classmethod
     def from_env(cls) -> "AppSettings":
@@ -175,6 +177,7 @@ def from_env(cls) -> "AppSettings":
             max_total_text_chars=int(os.environ.get("MAX_TOTAL_TEXT_CHARS", "80000")),
             codex_request_timeout_seconds=int(os.environ.get("CODEX_REQUEST_TIMEOUT_SECONDS", "180")),
             queue_wait_seconds=float(os.environ.get("QUEUE_WAIT_SECONDS", "0")),
+            max_concurrent_codex_runs=int(os.environ.get("MAX_CONCURRENT_CODEX_RUNS", "1")),
             dashboard_enabled=os.environ.get("DASHBOARD_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"},
         )
 
@@ -340,32 +343,31 @@ def summarize_dashboard_events(events: list[dict[str, Any]]) -> dict[str, Any]:
     }
 
 
-async def execute_single_flight(app: FastAPI, settings: AppSettings, prompt: str) -> str:
-    lock: asyncio.Lock = app.state.execution_lock
+def wrapper_busy_error() -> APIError:
+    return APIError(
+        429,
+        "Another Codex execution is already running",
+        "rate_limit_error",
+        code="wrapper_busy",
+        headers={"Retry-After": "3"},
+    )
+
+
+async def execute_with_admission(app: FastAPI, settings: AppSettings, prompt: str) -> str:
+    semaphore: asyncio.Semaphore = app.state.execution_semaphore
     acquired = False
-    if lock.locked():
+    if semaphore.locked():
         if settings.queue_wait_seconds == 0:
-            raise APIError(
-                429,
-                "Another Codex execution is already running",
-                "rate_limit_error",
-                code="wrapper_busy",
-                headers={"Retry-After": "3"},
-            )
+            raise wrapper_busy_error()
         try:
-            await asyncio.wait_for(lock.acquire(), timeout=settings.queue_wait_seconds)
+            await asyncio.wait_for(semaphore.acquire(), timeout=settings.queue_wait_seconds)
             acquired = True
         except asyncio.TimeoutError as exc:
-            raise APIError(
-                429,
-                "Another Codex execution is already running",
-                "rate_limit_error",
-                code="wrapper_busy",
-                headers={"Retry-After": "3"},
-            ) from exc
+            raise wrapper_busy_error() from exc
     else:
-        await lock.acquire()
+        await semaphore.acquire()
         acquired = True
+    app.state.active_executions += 1
     try:
         return await app.state.runner.execute(
             prompt,
@@ -376,7 +378,8 @@ async def execute_single_flight(app: FastAPI, settings: AppSettings, prompt: str
         raise map_runner_error(exc) from exc
     finally:
         if acquired:
-            lock.release()
+            app.state.active_executions = max(0, app.state.active_executions - 1)
+            semaphore.release()
 
 
 def create_app(settings: AppSettings | None = None, runner: Any | None = None) -> FastAPI:
@@ -385,7 +388,8 @@ def create_app(settings: AppSettings | None = None, runner: Any | None = None) -
     app = FastAPI(title="codex-cli-provider", docs_url=None, redoc_url=None, openapi_url=None)
     app.state.settings = settings
     app.state.runner = runner
-    app.state.execution_lock = asyncio.Lock()
+    app.state.execution_semaphore = asyncio.Semaphore(settings.max_concurrent_codex_runs)
+    app.state.active_executions = 0
     app.state.dashboard_events = deque(maxlen=DASHBOARD_EVENT_LIMIT)
 
     if settings.cors_allowed_origins:
@@ -475,6 +479,7 @@ async def dashboard_status() -> JSONResponse:
         require_dashboard_enabled()
         status = await runner.status()
         events = list(app.state.dashboard_events)
+        active_executions = int(app.state.active_executions)
         return dashboard_json({
             "time": now_iso(),
             "provider": {
@@ -485,13 +490,17 @@ async def dashboard_status() -> JSONResponse:
                 },
                 "runner": {
                     "ready": bool(status.get("ready")),
-                    "busy": bool(app.state.execution_lock.locked()),
+                    "busy": active_executions >= settings.max_concurrent_codex_runs,
+                    "activeRuns": active_executions,
+                    "maxConcurrentRuns": settings.max_concurrent_codex_runs,
                 },
                 "limits": {
                     "maxBodyBytes": settings.max_body_bytes,
                     "maxMessages": settings.max_messages,
                     "maxTotalTextChars": settings.max_total_text_chars,
                     "requestTimeoutSeconds": settings.codex_request_timeout_seconds,
+                    "queueWaitSeconds": settings.queue_wait_seconds,
+                    "maxConcurrentRuns": settings.max_concurrent_codex_runs,
                 },
             },
             "events": summarize_dashboard_events(events),
@@ -535,7 +544,7 @@ async def chat(payload: dict[str, Any] = Body(...), _: None = Depends(require_au
         prompt = build_codex_prompt(messages)
         start = time.perf_counter()
         try:
-            text = await execute_single_flight(app, settings, prompt)
+            text = await execute_with_admission(app, settings, prompt)
         finally:
             elapsed_ms = int((time.perf_counter() - start) * 1000)
             LOGGER.info("request complete route=/v1/chat/completions elapsed_ms=%s model=%s", elapsed_ms, MODEL_ALIAS)
diff --git a/tests/test_server.py b/tests/test_server.py
index 69a571c..62ac68b 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -90,6 +90,14 @@ async def request(app, method, url, token=TEST_SECRET, **kwargs):
         return await client.request(method, url, headers=headers, **kwargs)
 
 
+async def wait_for_prompt_count(runner, count):
+    for _ in range(50):
+        if len(runner.prompts) >= count:
+            return
+        await asyncio.sleep(0.01)
+    raise AssertionError(f"expected {count} prompts, got {len(runner.prompts)}")
+
+
 def test_default_app_uses_local_runner():
     app = create_app(settings())
     assert isinstance(app.state.runner, LocalCodexRunner)
@@ -101,6 +109,15 @@ def test_app_settings_do_not_require_runner_socket():
     assert not hasattr(configured, "runner_api_key")
 
 
+def test_admission_settings_are_clamped():
+    high = AppSettings(proxy_api_key=TEST_SECRET, queue_wait_seconds=99, max_concurrent_codex_runs=99)
+    low = AppSettings(proxy_api_key=TEST_SECRET, queue_wait_seconds=-1, max_concurrent_codex_runs=-1)
+    assert high.queue_wait_seconds == 5.0
+    assert high.max_concurrent_codex_runs == 2
+    assert low.queue_wait_seconds == 0.0
+    assert low.max_concurrent_codex_runs == 1
+
+
 @pytest.mark.asyncio
 async def test_health_ready_and_unready():
     ready_app = create_app(settings(), FakeRunner(ready=True))
@@ -149,6 +166,19 @@ async def test_dashboard_status_is_sanitized_and_unauthenticated():
     assert TEST_SECRET not in response.text
 
 
+@pytest.mark.asyncio
+async def test_dashboard_status_reports_admission_limits():
+    app = create_app(settings(max_concurrent_codex_runs=2, queue_wait_seconds=3), FakeRunner())
+    response = await request(app, "GET", "/dashboard/api/status", token=None)
+    assert response.status_code == 200
+    body = response.json()
+    assert body["provider"]["runner"]["activeRuns"] == 0
+    assert body["provider"]["runner"]["maxConcurrentRuns"] == 2
+    assert body["provider"]["runner"]["busy"] is False
+    assert body["provider"]["limits"]["queueWaitSeconds"] == 3
+    assert body["provider"]["limits"]["maxConcurrentRuns"] == 2
+
+
 @pytest.mark.asyncio
 async def test_dashboard_can_be_disabled():
     app = create_app(settings(dashboard_enabled=False), FakeRunner())
@@ -477,6 +507,47 @@ async def test_single_flight_queue_timeout_returns_wrapper_busy_429():
     assert second.json()["error"]["code"] == "wrapper_busy"
 
 
+@pytest.mark.asyncio
+async def test_two_execution_slots_allow_two_parallel_requests():
+    runner = BlockingRunner()
+    app = create_app(settings(max_concurrent_codex_runs=2, queue_wait_seconds=0), runner)
+    payload = {"model": "codex-cli-default", "messages": [{"role": "user", "content": "hello"}]}
+
+    first = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload))
+    second = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload))
+    await wait_for_prompt_count(runner, 2)
+
+    status = await request(app, "GET", "/dashboard/api/status", token=None)
+    assert status.json()["provider"]["runner"]["activeRuns"] == 2
+    assert status.json()["provider"]["runner"]["busy"] is True
+
+    runner.release.set()
+    first_response = await first
+    second_response = await second
+    assert first_response.status_code == 200
+    assert second_response.status_code == 200
+
+
+@pytest.mark.asyncio
+async def test_concurrency_limit_returns_429_when_all_slots_busy():
+    runner = BlockingRunner()
+    app = create_app(settings(max_concurrent_codex_runs=2, queue_wait_seconds=0), runner)
+    payload = {"model": "codex-cli-default", "messages": [{"role": "user", "content": "hello"}]}
+
+    first = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload))
+    second = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload))
+    await wait_for_prompt_count(runner, 2)
+
+    third = await request(app, "POST", "/v1/chat/completions", json=payload)
+    runner.release.set()
+    await first
+    await second
+
+    assert third.status_code == 429
+    assert third.headers["retry-after"] == "3"
+    assert third.json()["error"]["code"] == "wrapper_busy"
+
+
 @pytest.mark.asyncio
 async def test_runner_errors_are_mapped():
     cases = [

From 6e154b5875a62eba3176882ba95780c1873f6128 Mon Sep 17 00:00:00 2001
From: Austin Tucker <220209011+austinkennethtucker@users.noreply.github.com>
Date: Mon, 22 Jun 2026 13:52:48 -0400
Subject: [PATCH 2/2] Add pi-node2 image deployment runbook

---
 .github/workflows/candidate-image.yml | 124 +++++++++++++++++++++++
 .github/workflows/ci.yml              |   8 +-
 .github/workflows/deploy-pi-node2.yml |  59 +++++++++++
 .github/workflows/release.yml         |  17 +++-
 .gitignore                            |   1 +
 README.md                             | 106 +++++++++++++++++++-
 docs/pi-node2-reimage.md              | 123 +++++++++++++++++++++++
 scripts/deploy_compose_image.py       |  82 +++++++++++++++
 scripts/image_tags.py                 | 117 ++++++++++++++++++++++
 scripts/smoke_test_provider.py        | 138 ++++++++++++++++++++++++++
 tests/test_image_tags.py              |  38 +++++++
 11 files changed, 807 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/candidate-image.yml
 create mode 100644 .github/workflows/deploy-pi-node2.yml
 create mode 100644 docs/pi-node2-reimage.md
 create mode 100644 scripts/deploy_compose_image.py
 create mode 100644 scripts/image_tags.py
 create mode 100644 scripts/smoke_test_provider.py
 create mode 100644 tests/test_image_tags.py

diff --git a/.github/workflows/candidate-image.yml b/.github/workflows/candidate-image.yml
new file mode 100644
index 0000000..de6ad9a
--- /dev/null
+++ b/.github/workflows/candidate-image.yml
@@ -0,0 +1,124 @@
+name: candidate-image
+
+on:
+  workflow_dispatch:
+    inputs:
+      image_tag:
+        description: "Optional candidate tag. Defaults to codex-cli-provider-dev-<branch>-<short-sha>."
+        required: false
+        type: string
+      platforms:
+        description: "Target image platform(s). Use linux/arm64 for pi-node2 validation."
+        required: true
+        default: linux/arm64
+        type: choice
+        options:
+          - linux/arm64
+          - linux/amd64
+          - linux/amd64,linux/arm64
+
+permissions:
+  contents: read
+  packages: write
+
+concurrency:
+  group: candidate-image-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  publish-candidate-image:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
+        with:
+          python-version: "3.12"
+          cache: pip
+
+      - name: Install dependencies
+        run: pip install --requirement requirements.txt
+
+      - name: Unit tests
+        run: python -m pytest -q
+
+      - name: Syntax checks
+        run: python -m compileall -q src scripts tests
+
+      - name: Prepare throwaway local config
+        run: |
+          cp .env.example .env
+          mkdir -p data/secrets data/codex-home data/codex-work
+          python - <<'PY'
+          import pathlib
+          import secrets
+
+          pathlib.Path("data/secrets/proxy_api_key").write_text(secrets.token_urlsafe(48) + "\n", encoding="utf-8")
+          PY
+          chmod 600 .env
+          chmod 644 data/secrets/proxy_api_key
+          chmod 700 data/secrets data/codex-home data/codex-work
+
+      - name: Repo hygiene checks
+        run: python scripts/check_repo_hygiene.py
+
+      - name: Compose security checks
+        run: python scripts/check_compose_security.py
+
+      - name: Image Compose security checks
+        env:
+          COMPOSE_FILE: docker-compose.image.yml
+          CODEX_CLI_PROVIDER_IMAGE: ghcr.io/${{ github.repository }}:codex-cli-provider-dev-compose-check
+        run: python scripts/check_compose_security.py
+
+      - name: Resolve candidate tag
+        id: candidate
+        env:
+          REQUESTED_IMAGE_TAG: ${{ inputs.image_tag }}
+        run: >
+          python scripts/image_tags.py candidate
+          --ref-name "$GITHUB_REF_NAME"
+          --sha "$GITHUB_SHA"
+          --requested "$REQUESTED_IMAGE_TAG"
+          --github-output
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
+
+      - name: Log in to GHCR
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and publish candidate image
+        uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
+        with:
+          context: .
+          file: Dockerfile
+          platforms: ${{ inputs.platforms }}
+          push: true
+          tags: ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }}
+          labels: |
+            org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
+            org.opencontainers.image.revision=${{ github.sha }}
+            org.opencontainers.image.version=${{ steps.candidate.outputs.tag }}
+
+      - name: Inspect candidate image
+        run: docker buildx imagetools inspect ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }}
+
+      - name: Print pi-node2 pull command
+        run: |
+          echo "Candidate image:"
+          echo "ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }}"
+          echo
+          echo "pi-node2 command:"
+          echo "CODEX_CLI_PROVIDER_IMAGE=ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }} docker compose -f docker-compose.image.yml up -d"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1df1b06..5c341e1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -37,7 +37,7 @@ jobs:
         run: python -m pytest -q
 
       - name: Syntax checks
-        run: python -m py_compile src/server.py scripts/check_compose_security.py scripts/check_repo_hygiene.py tests/test_server.py
+        run: python -m compileall -q src scripts tests
 
       - name: Prepare throwaway local config
         run: |
@@ -62,5 +62,11 @@ jobs:
       - name: Compose security checks
         run: python scripts/check_compose_security.py
 
+      - name: Image Compose security checks
+        env:
+          COMPOSE_FILE: docker-compose.image.yml
+          CODEX_CLI_PROVIDER_IMAGE: registry.example.com/your-org/codex-cli-provider:codex-cli-provider-0.1.2
+        run: python scripts/check_compose_security.py
+
       - name: Docker build
         run: docker compose build
diff --git a/.github/workflows/deploy-pi-node2.yml b/.github/workflows/deploy-pi-node2.yml
new file mode 100644
index 0000000..739567c
--- /dev/null
+++ b/.github/workflows/deploy-pi-node2.yml
@@ -0,0 +1,59 @@
+name: deploy-pi-node2
+
+on:
+  workflow_dispatch:
+    inputs:
+      image_tag:
+        description: "Candidate or release image tag to deploy, for example codex-cli-provider-dev-branch-abcdef123456."
+        required: true
+        type: string
+      chat_smoke:
+        description: "Run one live Codex-backed chat completion after deploy."
+        required: true
+        default: false
+        type: boolean
+
+permissions:
+  contents: read
+
+concurrency:
+  group: deploy-pi-node2
+  cancel-in-progress: false
+
+jobs:
+  deploy:
+    runs-on: [self-hosted, linux, arm64, pi-node2]
+    environment: pi-node2
+    timeout-minutes: 20
+
+    steps:
+      - name: Update fixed deploy checkout
+        env:
+          DEPLOY_DIR: ${{ vars.PI_NODE2_DEPLOY_DIR || '/home/pi/projects/codex-cli-provider' }}
+        run: |
+          set -eu
+          mkdir -p "$(dirname "$DEPLOY_DIR")"
+          if [ ! -d "$DEPLOY_DIR/.git" ]; then
+            git clone "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY.git" "$DEPLOY_DIR"
+          fi
+          cd "$DEPLOY_DIR"
+          git fetch --prune origin "$GITHUB_REF"
+          git checkout --force "$GITHUB_SHA"
+
+      - name: Deploy and smoke test
+        env:
+          DEPLOY_DIR: ${{ vars.PI_NODE2_DEPLOY_DIR || '/home/pi/projects/codex-cli-provider' }}
+          IMAGE_REPOSITORY: ghcr.io/${{ github.repository }}
+          IMAGE_TAG: ${{ inputs.image_tag }}
+          CHAT_SMOKE: ${{ inputs.chat_smoke }}
+        run: |
+          set -eu
+          chat_arg=""
+          if [ "$CHAT_SMOKE" = "true" ]; then
+            chat_arg="--chat-smoke"
+          fi
+          cd "$DEPLOY_DIR"
+          python3 scripts/deploy_compose_image.py \
+            --image-repository "$IMAGE_REPOSITORY" \
+            --image-tag "$IMAGE_TAG" \
+            $chat_arg
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 763bc95..7a7ed03 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
     inputs:
       image_tag:
-        description: "Image tag to publish, for example v0.1.0 or test"
+        description: "Image tag to publish, for example codex-cli-provider-0.1.0"
         required: true
         type: string
   push:
@@ -28,6 +28,19 @@ jobs:
       - name: Checkout
         uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
 
+      - name: Validate dispatch image tag
+        if: github.event_name == 'workflow_dispatch'
+        run: python scripts/image_tags.py validate --kind release "${{ inputs.image_tag }}"
+
+      - name: Validate git tag image tag
+        if: github.event_name == 'push'
+        run: |
+          release_tag="codex-cli-provider-${GITHUB_REF_NAME#v}"
+          python scripts/image_tags.py validate --kind release "$release_tag"
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
 
@@ -45,7 +58,7 @@ jobs:
           images: ghcr.io/${{ github.repository }}
           tags: |
             type=raw,value=${{ inputs.image_tag }},enable=${{ github.event_name == 'workflow_dispatch' }}
-            type=ref,event=tag
+            type=semver,pattern=codex-cli-provider-{{version}},enable=${{ startsWith(github.ref, 'refs/tags/v') }}
 
       - name: Build and publish image
         uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
diff --git a/.gitignore b/.gitignore
index be87a03..db5c5f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ data/
 logs/
 tmp/
 temp/
+handoff.md
 
 .DS_Store
 .idea/
diff --git a/README.md b/README.md
index 4a0ab3a..49a4996 100644
--- a/README.md
+++ b/README.md
@@ -300,6 +300,107 @@ OpenAI API keys in Compose files.
 Codex authentication still lives only in the mounted `data/codex-home`
 directory and must be completed inside the running container.
 
+## Pre-Merge pi-node2 Image Test
+
+Use the `candidate-image` GitHub Actions workflow to publish a disposable image
+from a branch before merging it. The default target is `linux/arm64` for
+`pi-node2` validation, and the generated tag looks like:
+
+```text
+codex-cli-provider-dev-branch-name-abcdef123456
+```
+
+Run the workflow from the branch you want to test. Leave `image_tag` empty
+unless you need a stable candidate tag; custom candidate tags must start with
+`codex-cli-provider-dev-`. Do not use `latest`.
+
+On `pi-node2`, keep using the image-only Compose file and the dedicated local
+`data/codex-home` auth mount:
+
+```bash
+docker login ghcr.io
+export CODEX_CLI_PROVIDER_IMAGE=ghcr.io/subdepthtech/codex-cli-provider:codex-cli-provider-dev-branch-name-abcdef123456
+docker compose -f docker-compose.image.yml pull
+docker compose -f docker-compose.image.yml up -d
+python3 scripts/smoke_test_provider.py
+```
+
+To verify one real Codex-backed request after the container is healthy and
+logged in, run:
+
+```bash
+python3 scripts/smoke_test_provider.py --chat
+```
+
+The non-chat smoke test checks `/healthz`, confirms `/v1/models` rejects
+unauthenticated callers, and confirms the authenticated model list includes
+`codex-cli-default`. The `--chat` check sends one live upstream request through
+the signed-in Codex CLI account.
+
+After the `pi-node2` smoke test passes, merge the branch and publish the release
+image. The release workflow publishes versioned tags like
+`codex-cli-provider-0.1.2` from either a manual dispatch or a Git tag such as
+`v0.1.2`.
+
+## Automated pi-node2 Deployment
+
+The `deploy-pi-node2` GitHub Actions workflow deploys an already-published
+candidate or release image on a self-hosted runner installed on `pi-node2`.
+It intentionally does not use SSH keys or GitHub-hosted runner secrets.
+
+One-time `pi-node2` setup:
+
+```bash
+mkdir -p ~/projects
+git clone https://github.com/subdepthtech/codex-cli-provider.git ~/projects/codex-cli-provider
+cd ~/projects/codex-cli-provider
+cp .env.example .env
+mkdir -p data/codex-home data/codex-work data/secrets
+python3 - <<'PY'
+import pathlib, secrets
+path = pathlib.Path("data/secrets/proxy_api_key")
+path.write_text(secrets.token_urlsafe(48) + "\n")
+PY
+chmod 600 .env data/secrets/proxy_api_key
+chmod 700 data/codex-home data/codex-work data/secrets
+docker login ghcr.io
+```
+
+The deploy smoke test expects the dedicated `data/codex-home` mount on
+`pi-node2` to already contain a valid Codex login. After starting the container
+with a candidate or release image for the first time, complete device login
+inside that container:
+
+```bash
+docker exec -it codex-cli-provider \
+  codex login --device-auth \
+  -c forced_login_method='"chatgpt"' \
+  -c cli_auth_credentials_store='"file"'
+```
+
+Install the GitHub self-hosted runner on `pi-node2` with labels including
+`self-hosted`, `linux`, `arm64`, and `pi-node2`. In GitHub, create an
+environment named `pi-node2` and require manual approval before deployment.
+For a public repository, do not let untrusted pull requests run jobs on this
+runner.
+
+Run the `deploy-pi-node2` workflow from a trusted branch, preferably `main`, and
+pass the exact image tag printed by `candidate-image`. The workflow uses
+`/home/pi/projects/codex-cli-provider` by default; set the repository or
+environment variable `PI_NODE2_DEPLOY_DIR` to override that path.
+
+The deploy workflow updates the fixed checkout, validates the image tag, runs
+the image-only Compose security check, pulls the image, restarts the service,
+and runs `scripts/smoke_test_provider.py`. Enable `chat_smoke` to run one live
+Codex-backed request after restart.
+
+## Host Reimage Runbook
+
+For rebuilding the homelab `pi-node2` host, use
+[`docs/pi-node2-reimage.md`](docs/pi-node2-reimage.md). Keep host-specific
+values, backup locations, runner registration details, and credential recovery
+notes in the ignored top-level `handoff.md` file, not in tracked documentation.
+
 ## Verification
 
 Run repository checks without live credentials:
@@ -316,9 +417,8 @@ PYTHONPATH=. .venv/bin/pytest -q
 Live checks require a running container and the dedicated Codex login:
 
 ```bash
-PROXY_API_KEY="$(cat data/secrets/proxy_api_key)"
-curl -f http://127.0.0.1:8320/healthz
-curl -f -H "Authorization: Bearer $PROXY_API_KEY" http://127.0.0.1:8320/v1/models
+python3 scripts/smoke_test_provider.py
+python3 scripts/smoke_test_provider.py --chat
 ```
 
 Do not print or inspect `data/codex-home/auth.json`.
diff --git a/docs/pi-node2-reimage.md b/docs/pi-node2-reimage.md
new file mode 100644
index 0000000..f2353f4
--- /dev/null
+++ b/docs/pi-node2-reimage.md
@@ -0,0 +1,123 @@
+# pi-node2 Reimage Runbook
+
+This runbook is for rebuilding the homelab `pi-node2` host that runs
+`codex-cli-provider` from a published image. Keep host-specific values, tokens,
+runner registration details, and recovery notes in the ignored top-level
+`handoff.md` file, not in tracked documentation.
+
+## Before Reimage
+
+Record the current deploy state without printing credentials:
+
+```bash
+cd /home/pi/projects/codex-cli-provider
+git status --short --branch
+git remote -v
+docker compose -f docker-compose.image.yml ps
+docker image ls 'ghcr.io/subdepthtech/codex-cli-provider'
+test -f .env && printf '.env exists\n'
+test -f data/secrets/proxy_api_key && printf 'proxy_api_key exists\n'
+test -f data/codex-home/auth.json && printf 'codex auth exists\n'
+```
+
+Decide whether to preserve or recreate local state:
+
+- `.env`: local wrapper limits and deploy settings. Back it up only through the
+  host's private backup path.
+- `data/secrets/proxy_api_key`: wrapper bearer token. Preserve it only if
+  existing clients must keep working without changing their configured API key.
+  Otherwise regenerate it after the reimage.
+- `data/codex-home/`: dedicated Codex/ChatGPT login state. Treat it as a live
+  credential. Prefer a fresh device login unless the operator explicitly chooses
+  to restore the dedicated project auth home.
+- `data/codex-work/`: disposable provider workspace unless the operator has put
+  recovery artifacts there.
+- Docker/GHCR auth and GitHub self-hosted runner registration are host-local and
+  should be recreated after the reimage.
+
+Do not back up or publish a normal user `~/.codex`, an `OPENAI_API_KEY`, Docker
+socket credentials, or any host home directory into this project.
+
+## Restore From Image
+
+Install Docker Engine, Docker Compose v2, Git, and Python 3. Then recreate the
+checkout and local state:
+
+```bash
+mkdir -p /home/pi/projects
+git clone https://github.com/subdepthtech/codex-cli-provider.git /home/pi/projects/codex-cli-provider
+cd /home/pi/projects/codex-cli-provider
+cp .env.example .env
+mkdir -p data/codex-home data/codex-work data/secrets
+python3 - <<'PY'
+import pathlib, secrets
+path = pathlib.Path("data/secrets/proxy_api_key")
+path.write_text(secrets.token_urlsafe(48) + "\n")
+PY
+chmod 600 .env data/secrets/proxy_api_key
+chmod 700 data/codex-home data/codex-work data/secrets
+docker login ghcr.io
+```
+
+If restoring a preserved `.env`, `proxy_api_key`, or `data/codex-home`, copy it
+into place before starting the service and keep the same file permissions.
+
+Start from an explicit candidate or release tag. Never use `latest`:
+
+```bash
+export CODEX_CLI_PROVIDER_IMAGE=ghcr.io/subdepthtech/codex-cli-provider:codex-cli-provider-0.1.2
+COMPOSE_FILE=docker-compose.image.yml python3 scripts/check_compose_security.py
+docker compose -f docker-compose.image.yml pull
+docker compose -f docker-compose.image.yml up -d
+```
+
+If `data/codex-home` was not restored, complete the dedicated ChatGPT device
+login inside the container:
+
+```bash
+docker exec -it codex-cli-provider \
+  codex login --device-auth \
+  -c forced_login_method='"chatgpt"' \
+  -c cli_auth_credentials_store='"file"'
+```
+
+Verify the deployment:
+
+```bash
+python3 scripts/smoke_test_provider.py
+python3 scripts/smoke_test_provider.py --chat
+```
+
+The `--chat` check sends one live upstream request through the signed-in Codex
+CLI account. Skip it until the dedicated device login is complete.
+
+## Restore Automated Deploys
+
+Install the GitHub self-hosted runner on `pi-node2` with labels:
+
+```text
+self-hosted, linux, arm64, pi-node2
+```
+
+In GitHub, keep the `pi-node2` environment protected with manual approval.
+Only trusted branches should deploy to the self-hosted runner. Set
+`PI_NODE2_DEPLOY_DIR` only if the checkout is not
+`/home/pi/projects/codex-cli-provider`.
+
+After the runner is online, use the `deploy-pi-node2` workflow with an exact
+candidate or release image tag. The workflow validates the tag, updates the
+fixed checkout, runs the image compose security check, restarts the service, and
+runs the smoke test.
+
+## Local Handoff Notes
+
+Use the ignored top-level `handoff.md` for details that should survive the
+reimage discussion but must not be pushed, such as:
+
+- whether the old `.env` or wrapper bearer token was preserved;
+- whether `data/codex-home` was restored or reauthenticated;
+- current image tag running on the host;
+- self-hosted runner registration state;
+- private backup location names.
+
+Do not commit `handoff.md`.
diff --git a/scripts/deploy_compose_image.py b/scripts/deploy_compose_image.py
new file mode 100644
index 0000000..6c33ada
--- /dev/null
+++ b/scripts/deploy_compose_image.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from scripts.image_tags import validate_image_tag
+
+
+REQUIRED_DIRS = (
+    Path("data/codex-home"),
+    Path("data/codex-work"),
+    Path("data/secrets"),
+)
+REQUIRED_FILES = (
+    Path(".env"),
+    Path("data/secrets/proxy_api_key"),
+)
+
+
+def fail(message: str) -> None:
+    print(f"FAIL: {message}", file=sys.stderr)
+    raise SystemExit(1)
+
+
+def run(command: list[str], *, env: dict[str, str] | None = None) -> None:
+    print("+ " + " ".join(command))
+    subprocess.run(command, env=env, check=True)
+
+
+def check_local_state() -> None:
+    missing_dirs = [str(path) for path in REQUIRED_DIRS if not path.is_dir()]
+    missing_files = [str(path) for path in REQUIRED_FILES if not path.is_file()]
+    if missing_dirs or missing_files:
+        missing = ", ".join(missing_dirs + missing_files)
+        fail(f"missing local deployment state: {missing}")
+
+
+def deploy(args: argparse.Namespace) -> None:
+    image_tag = validate_image_tag(args.image_tag, args.tag_kind)
+    image_ref = f"{args.image_repository.rstrip(':')}:{image_tag}"
+
+    os.chdir(args.repo_dir.resolve())
+    check_local_state()
+
+    env = os.environ.copy()
+    env["CODEX_CLI_PROVIDER_IMAGE"] = image_ref
+    env["COMPOSE_FILE"] = "docker-compose.image.yml"
+
+    run(["python3", "scripts/check_compose_security.py"], env=env)
+    run(["docker", "compose", "-f", "docker-compose.image.yml", "pull"], env=env)
+    run(["docker", "compose", "-f", "docker-compose.image.yml", "up", "-d", "--remove-orphans"], env=env)
+    run(["python3", "scripts/smoke_test_provider.py", "--base-url", args.base_url], env=env)
+    if args.chat_smoke:
+        run(["python3", "scripts/smoke_test_provider.py", "--base-url", args.base_url, "--chat"], env=env)
+
+    print(f"deployed {image_ref}")
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Deploy a published codex-cli-provider image with Compose.")
+    parser.add_argument("--repo-dir", type=Path, default=Path.cwd())
+    parser.add_argument("--image-repository", required=True, help="Image repository without tag, for example ghcr.io/org/repo.")
+    parser.add_argument("--image-tag", required=True)
+    parser.add_argument("--tag-kind", choices=("candidate", "release", "any"), default="any")
+    parser.add_argument("--base-url", default="http://127.0.0.1:8320")
+    parser.add_argument("--chat-smoke", action="store_true")
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str] | None = None) -> None:
+    args = parse_args(argv if argv is not None else sys.argv[1:])
+    deploy(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/image_tags.py b/scripts/image_tags.py
new file mode 100644
index 0000000..7661bd9
--- /dev/null
+++ b/scripts/image_tags.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import re
+import sys
+from pathlib import Path
+
+
+PROJECT_NAME = "codex-cli-provider"
+MAX_DOCKER_TAG_LENGTH = 128
+DOCKER_TAG_RE = re.compile(r"^[A-Za-z0-9_][A-Za-z0-9_.-]{0,127}$")
+RELEASE_TAG_RE = re.compile(rf"^{PROJECT_NAME}-[0-9]+\.[0-9]+\.[0-9]+(?:-[A-Za-z0-9_.-]+)?$")
+CANDIDATE_TAG_RE = re.compile(rf"^{PROJECT_NAME}-dev-[A-Za-z0-9][A-Za-z0-9_.-]*$")
+SHA_RE = re.compile(r"^[A-Fa-f0-9]{7,64}$")
+
+
+def fail(message: str) -> None:
+    print(f"FAIL: {message}", file=sys.stderr)
+    raise SystemExit(1)
+
+
+def sanitize_ref_name(ref_name: str) -> str:
+    safe = re.sub(r"[^a-z0-9_.-]+", "-", ref_name.strip().lower())
+    safe = re.sub(r"[-.]{2,}", "-", safe)
+    safe = safe.strip("-._")
+    return safe or "ref"
+
+
+def build_candidate_tag(ref_name: str, sha: str) -> str:
+    normalized_sha = sha.strip().lower()
+    if not SHA_RE.fullmatch(normalized_sha):
+        fail("candidate image tags require a git SHA")
+
+    short_sha = normalized_sha[:12]
+    prefix = f"{PROJECT_NAME}-dev-"
+    suffix_separator = "-"
+    max_ref_length = MAX_DOCKER_TAG_LENGTH - len(prefix) - len(suffix_separator) - len(short_sha)
+    ref_part = sanitize_ref_name(ref_name)
+    if len(ref_part) > max_ref_length:
+        ref_part = ref_part[:max_ref_length].rstrip("-._") or "ref"
+
+    return f"{prefix}{ref_part}{suffix_separator}{short_sha}"
+
+
+def validate_image_tag(tag: str, kind: str) -> str:
+    normalized = tag.strip()
+    if not normalized:
+        fail("image tag must not be empty")
+    if normalized == "latest":
+        fail("image tag must not be latest")
+    if not DOCKER_TAG_RE.fullmatch(normalized):
+        fail(
+            "image tag must be a Docker tag of at most 128 characters using "
+            "only letters, digits, underscore, period, and dash"
+        )
+
+    if kind == "release" and not RELEASE_TAG_RE.fullmatch(normalized):
+        fail(f"release image tags must look like {PROJECT_NAME}-0.1.2")
+    if kind == "candidate" and not CANDIDATE_TAG_RE.fullmatch(normalized):
+        fail(f"candidate image tags must look like {PROJECT_NAME}-dev-branch-abcdef123456")
+    if kind == "any" and not (RELEASE_TAG_RE.fullmatch(normalized) or CANDIDATE_TAG_RE.fullmatch(normalized)):
+        fail("image tag must be a release or candidate tag")
+
+    return normalized
+
+
+def write_github_output(name: str, value: str) -> None:
+    output_path = os.environ.get("GITHUB_OUTPUT")
+    if not output_path:
+        fail("GITHUB_OUTPUT is not set")
+    with Path(output_path).open("a", encoding="utf-8") as output:
+        output.write(f"{name}={value}\n")
+
+
+def candidate(args: argparse.Namespace) -> None:
+    if args.requested.strip():
+        tag = validate_image_tag(args.requested, "candidate")
+    else:
+        tag = build_candidate_tag(args.ref_name, args.sha)
+        validate_image_tag(tag, "candidate")
+
+    if args.github_output:
+        write_github_output("tag", tag)
+    print(tag)
+
+
+def validate(args: argparse.Namespace) -> None:
+    tag = validate_image_tag(args.tag, args.kind)
+    print(tag)
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Resolve and validate codex-cli-provider image tags.")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    candidate_parser = subparsers.add_parser("candidate", help="Resolve a pre-release candidate image tag.")
+    candidate_parser.add_argument("--ref-name", required=True)
+    candidate_parser.add_argument("--sha", required=True)
+    candidate_parser.add_argument("--requested", default="")
+    candidate_parser.add_argument("--github-output", action="store_true")
+    candidate_parser.set_defaults(func=candidate)
+
+    validate_parser = subparsers.add_parser("validate", help="Validate an image tag.")
+    validate_parser.add_argument("--kind", choices=("release", "candidate", "any"), default="any")
+    validate_parser.add_argument("tag")
+    validate_parser.set_defaults(func=validate)
+
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str] | None = None) -> None:
+    args = parse_args(argv if argv is not None else sys.argv[1:])
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/smoke_test_provider.py b/scripts/smoke_test_provider.py
new file mode 100644
index 0000000..617ba62
--- /dev/null
+++ b/scripts/smoke_test_provider.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+DEFAULT_BASE_URL = "http://127.0.0.1:8320"
+DEFAULT_MODEL = "codex-cli-default"
+
+
+def fail(message: str) -> None:
+    print(f"FAIL: {message}", file=sys.stderr)
+    raise SystemExit(1)
+
+
+def read_token(path: Path) -> str:
+    try:
+        token = path.read_text(encoding="utf-8").strip()
+    except FileNotFoundError:
+        fail(f"token file not found: {path}")
+    if not token:
+        fail(f"token file is empty: {path}")
+    return token
+
+
+def request_json(
+    method: str,
+    url: str,
+    *,
+    timeout: int,
+    headers: dict[str, str] | None = None,
+    payload: dict[str, Any] | None = None,
+) -> tuple[int, dict[str, Any]]:
+    data = None
+    request_headers = dict(headers or {})
+    if payload is not None:
+        data = json.dumps(payload).encode("utf-8")
+        request_headers["Content-Type"] = "application/json"
+
+    request = urllib.request.Request(url, data=data, headers=request_headers, method=method)
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as response:
+            body = response.read()
+            status = response.status
+    except urllib.error.HTTPError as error:
+        body = error.read()
+        status = error.code
+    except urllib.error.URLError as error:
+        fail(f"request failed for {url}: {error.reason}")
+
+    if not body:
+        return status, {}
+    try:
+        parsed = json.loads(body.decode("utf-8"))
+    except json.JSONDecodeError:
+        fail(f"non-JSON response from {url}: HTTP {status}")
+    if not isinstance(parsed, dict):
+        fail(f"unexpected JSON response from {url}: HTTP {status}")
+    return status, parsed
+
+
+def expect_status(status: int, expected: int, label: str, body: dict[str, Any]) -> None:
+    if status != expected:
+        fail(f"{label} returned HTTP {status}, expected {expected}: {json.dumps(body, sort_keys=True)[:500]}")
+
+
+def smoke_test(args: argparse.Namespace) -> None:
+    base_url = args.base_url.rstrip("/")
+    token = read_token(args.token_file)
+    auth_headers = {"Authorization": f"Bearer {token}"}
+
+    status, body = request_json("GET", f"{base_url}/healthz", timeout=args.timeout)
+    expect_status(status, 200, "healthz", body)
+    if body.get("status") != "ok":
+        fail(f"healthz returned unexpected body: {body}")
+    print("healthz ok")
+
+    status, body = request_json("GET", f"{base_url}/v1/models", timeout=args.timeout)
+    expect_status(status, 401, "unauthenticated /v1/models", body)
+    print("auth gate ok")
+
+    status, body = request_json("GET", f"{base_url}/v1/models", timeout=args.timeout, headers=auth_headers)
+    expect_status(status, 200, "authenticated /v1/models", body)
+    models = body.get("data")
+    if not isinstance(models, list) or not any(isinstance(model, dict) and model.get("id") == args.model for model in models):
+        fail(f"{args.model} missing from /v1/models response")
+    print("models ok")
+
+    if args.chat:
+        payload = {
+            "model": args.model,
+            "messages": [{"role": "user", "content": args.prompt}],
+        }
+        status, body = request_json(
+            "POST",
+            f"{base_url}/v1/chat/completions",
+            timeout=args.chat_timeout,
+            headers=auth_headers,
+            payload=payload,
+        )
+        expect_status(status, 200, "chat completion", body)
+        choices = body.get("choices")
+        if not isinstance(choices, list) or not choices:
+            fail("chat completion response did not include choices")
+        message = choices[0].get("message") if isinstance(choices[0], dict) else None
+        content = message.get("content") if isinstance(message, dict) else None
+        if not isinstance(content, str) or not content.strip():
+            fail("chat completion response did not include non-empty message content")
+        print("chat completion ok")
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Smoke test a running codex-cli-provider container.")
+    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
+    parser.add_argument("--token-file", type=Path, default=Path("data/secrets/proxy_api_key"))
+    parser.add_argument("--model", default=DEFAULT_MODEL)
+    parser.add_argument("--timeout", type=int, default=15)
+    parser.add_argument("--chat-timeout", type=int, default=240)
+    parser.add_argument("--chat", action="store_true", help="Also run one live Codex-backed chat completion.")
+    parser.add_argument(
+        "--prompt",
+        default="Reply with one short sentence confirming the image smoke test ran.",
+        help="Prompt used only when --chat is set.",
+    )
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str] | None = None) -> None:
+    args = parse_args(argv if argv is not None else sys.argv[1:])
+    smoke_test(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_image_tags.py b/tests/test_image_tags.py
new file mode 100644
index 0000000..9ea9daa
--- /dev/null
+++ b/tests/test_image_tags.py
@@ -0,0 +1,38 @@
+import pytest
+
+from scripts.image_tags import build_candidate_tag, sanitize_ref_name, validate_image_tag
+
+
+def test_build_candidate_tag_uses_branch_and_short_sha():
+    tag = build_candidate_tag("feature/pi-node2 smoke", "ABCDEF1234567890")
+    assert tag == "codex-cli-provider-dev-feature-pi-node2-smoke-abcdef123456"
+
+
+def test_build_candidate_tag_truncates_long_refs_to_docker_limit():
+    tag = build_candidate_tag("feature/" + "x" * 200, "0123456789abcdef")
+    assert len(tag) == 128
+    assert tag.endswith("-0123456789ab")
+
+
+def test_sanitize_ref_name_has_stable_fallback():
+    assert sanitize_ref_name("///") == "ref"
+
+
+def test_validate_release_tag_accepts_project_semver():
+    assert validate_image_tag("codex-cli-provider-0.1.2", "release") == "codex-cli-provider-0.1.2"
+    assert validate_image_tag("codex-cli-provider-0.1.2-rc.1", "release") == "codex-cli-provider-0.1.2-rc.1"
+
+
+@pytest.mark.parametrize(
+    "tag,kind",
+    [
+        ("latest", "any"),
+        ("v0.1.2", "release"),
+        ("codex-cli-provider-0.1.2+build", "release"),
+        ("codex-cli-provider-0.1.2", "candidate"),
+        ("codex-cli-provider-dev-feature", "release"),
+    ],
+)
+def test_validate_image_tag_rejects_invalid_tags(tag, kind):
+    with pytest.raises(SystemExit):
+        validate_image_tag(tag, kind)