diff --git a/.env.example b/.env.example index 308f518..2f4a0d4 100644 --- a/.env.example +++ b/.env.example @@ -20,9 +20,14 @@ MAX_REQUEST_BODY_BYTES=262144 MAX_MESSAGES=32 MAX_TOTAL_TEXT_CHARS=80000 -# Local admission queue for short bursts while preserving one active Codex run. +# Local admission queue for short bursts while all Codex run slots are busy. # 0 fails fast with wrapper_busy 429. Values above 0 wait up to that many -# seconds for the active run to finish. The app clamps this to 0-5 seconds. +# seconds for a run slot to open. The app clamps this to 0-5 seconds. QUEUE_WAIT_SECONDS=0 + +# Optional provider-side parallelism. Keep 1 unless local wrapper_busy 429s are +# the bottleneck and the signed-in Codex account tolerates two concurrent CLI +# executions. The app clamps this to 1-2. +MAX_CONCURRENT_CODEX_RUNS=1 CORS_ALLOWED_ORIGINS= LOG_LEVEL=INFO diff --git a/.github/workflows/candidate-image.yml b/.github/workflows/candidate-image.yml new file mode 100644 index 0000000..de6ad9a --- /dev/null +++ b/.github/workflows/candidate-image.yml @@ -0,0 +1,124 @@ +name: candidate-image + +on: + workflow_dispatch: + inputs: + image_tag: + description: "Optional candidate tag. Defaults to codex-cli-provider-dev--." + required: false + type: string + platforms: + description: "Target image platform(s). Use linux/arm64 for pi-node2 validation." + required: true + default: linux/arm64 + type: choice + options: + - linux/arm64 + - linux/amd64 + - linux/amd64,linux/arm64 + +permissions: + contents: read + packages: write + +concurrency: + group: candidate-image-${{ github.ref }} + cancel-in-progress: true + +jobs: + publish-candidate-image: + runs-on: ubuntu-latest + timeout-minutes: 45 + + steps: + - name: Checkout + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.12" + cache: pip + + - name: Install dependencies + run: pip install --requirement requirements.txt + + - name: Unit tests + run: python -m pytest -q + + - name: Syntax checks + run: python -m compileall -q src scripts tests + + - name: Prepare throwaway local config + run: | + cp .env.example .env + mkdir -p data/secrets data/codex-home data/codex-work + python - <<'PY' + import pathlib + import secrets + + pathlib.Path("data/secrets/proxy_api_key").write_text(secrets.token_urlsafe(48) + "\n", encoding="utf-8") + PY + chmod 600 .env + chmod 644 data/secrets/proxy_api_key + chmod 700 data/secrets data/codex-home data/codex-work + + - name: Repo hygiene checks + run: python scripts/check_repo_hygiene.py + + - name: Compose security checks + run: python scripts/check_compose_security.py + + - name: Image Compose security checks + env: + COMPOSE_FILE: docker-compose.image.yml + CODEX_CLI_PROVIDER_IMAGE: ghcr.io/${{ github.repository }}:codex-cli-provider-dev-compose-check + run: python scripts/check_compose_security.py + + - name: Resolve candidate tag + id: candidate + env: + REQUESTED_IMAGE_TAG: ${{ inputs.image_tag }} + run: > + python scripts/image_tags.py candidate + --ref-name "$GITHUB_REF_NAME" + --sha "$GITHUB_SHA" + --requested "$REQUESTED_IMAGE_TAG" + --github-output + + - name: Set up QEMU + uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and publish candidate image + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 + with: + context: . + file: Dockerfile + platforms: ${{ inputs.platforms }} + push: true + tags: ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }} + labels: | + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} + org.opencontainers.image.version=${{ steps.candidate.outputs.tag }} + + - name: Inspect candidate image + run: docker buildx imagetools inspect ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }} + + - name: Print pi-node2 pull command + run: | + echo "Candidate image:" + echo "ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }}" + echo + echo "pi-node2 command:" + echo "CODEX_CLI_PROVIDER_IMAGE=ghcr.io/${{ github.repository }}:${{ steps.candidate.outputs.tag }} docker compose -f docker-compose.image.yml up -d" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1df1b06..5c341e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,7 +37,7 @@ jobs: run: python -m pytest -q - name: Syntax checks - run: python -m py_compile src/server.py scripts/check_compose_security.py scripts/check_repo_hygiene.py tests/test_server.py + run: python -m compileall -q src scripts tests - name: Prepare throwaway local config run: | @@ -62,5 +62,11 @@ jobs: - name: Compose security checks run: python scripts/check_compose_security.py + - name: Image Compose security checks + env: + COMPOSE_FILE: docker-compose.image.yml + CODEX_CLI_PROVIDER_IMAGE: registry.example.com/your-org/codex-cli-provider:codex-cli-provider-0.1.2 + run: python scripts/check_compose_security.py + - name: Docker build run: docker compose build diff --git a/.github/workflows/deploy-pi-node2.yml b/.github/workflows/deploy-pi-node2.yml new file mode 100644 index 0000000..739567c --- /dev/null +++ b/.github/workflows/deploy-pi-node2.yml @@ -0,0 +1,59 @@ +name: deploy-pi-node2 + +on: + workflow_dispatch: + inputs: + image_tag: + description: "Candidate or release image tag to deploy, for example codex-cli-provider-dev-branch-abcdef123456." + required: true + type: string + chat_smoke: + description: "Run one live Codex-backed chat completion after deploy." + required: true + default: false + type: boolean + +permissions: + contents: read + +concurrency: + group: deploy-pi-node2 + cancel-in-progress: false + +jobs: + deploy: + runs-on: [self-hosted, linux, arm64, pi-node2] + environment: pi-node2 + timeout-minutes: 20 + + steps: + - name: Update fixed deploy checkout + env: + DEPLOY_DIR: ${{ vars.PI_NODE2_DEPLOY_DIR || '/home/pi/projects/codex-cli-provider' }} + run: | + set -eu + mkdir -p "$(dirname "$DEPLOY_DIR")" + if [ ! -d "$DEPLOY_DIR/.git" ]; then + git clone "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY.git" "$DEPLOY_DIR" + fi + cd "$DEPLOY_DIR" + git fetch --prune origin "$GITHUB_REF" + git checkout --force "$GITHUB_SHA" + + - name: Deploy and smoke test + env: + DEPLOY_DIR: ${{ vars.PI_NODE2_DEPLOY_DIR || '/home/pi/projects/codex-cli-provider' }} + IMAGE_REPOSITORY: ghcr.io/${{ github.repository }} + IMAGE_TAG: ${{ inputs.image_tag }} + CHAT_SMOKE: ${{ inputs.chat_smoke }} + run: | + set -eu + chat_arg="" + if [ "$CHAT_SMOKE" = "true" ]; then + chat_arg="--chat-smoke" + fi + cd "$DEPLOY_DIR" + python3 scripts/deploy_compose_image.py \ + --image-repository "$IMAGE_REPOSITORY" \ + --image-tag "$IMAGE_TAG" \ + $chat_arg diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 763bc95..7a7ed03 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: image_tag: - description: "Image tag to publish, for example v0.1.0 or test" + description: "Image tag to publish, for example codex-cli-provider-0.1.0" required: true type: string push: @@ -28,6 +28,19 @@ jobs: - name: Checkout uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + - name: Validate dispatch image tag + if: github.event_name == 'workflow_dispatch' + run: python scripts/image_tags.py validate --kind release "${{ inputs.image_tag }}" + + - name: Validate git tag image tag + if: github.event_name == 'push' + run: | + release_tag="codex-cli-provider-${GITHUB_REF_NAME#v}" + python scripts/image_tags.py validate --kind release "$release_tag" + + - name: Set up QEMU + uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0 + - name: Set up Docker Buildx uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 @@ -45,7 +58,7 @@ jobs: images: ghcr.io/${{ github.repository }} tags: | type=raw,value=${{ inputs.image_tag }},enable=${{ github.event_name == 'workflow_dispatch' }} - type=ref,event=tag + type=semver,pattern=codex-cli-provider-{{version}},enable=${{ startsWith(github.ref, 'refs/tags/v') }} - name: Build and publish image uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 diff --git a/.gitignore b/.gitignore index be87a03..db5c5f8 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ data/ logs/ tmp/ temp/ +handoff.md .DS_Store .idea/ diff --git a/README.md b/README.md index ff0ad64..49a4996 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,8 @@ project. - `GET /v1/models` with the local model alias `codex-cli-default`. - `POST /v1/chat/completions` for text-only chat messages. - Non-streaming responses and final-only SSE streaming. -- One active Codex execution at a time, with an optional short local wait queue. +- One active Codex execution by default, with an optional second local execution + slot and short local wait queue. - A local operator dashboard at `http://127.0.0.1:8320/dashboard/`. ## What It Does Not Provide @@ -228,8 +229,11 @@ For Obsidian LLM Wiki, use: required for the inspected OpenAI-compatible provider path. If near-simultaneous local requests collide, set `QUEUE_WAIT_SECONDS=2` or `3` -in `.env` and restart the wrapper. The wrapper still permits only one active -Codex run. +in `.env` and restart the wrapper. If the collisions are confirmed +`wrapper_busy` responses rather than upstream rate limits, you can also set +`MAX_CONCURRENT_CODEX_RUNS=2` to permit a second local Codex CLI execution. +Keep it at `1` if the signed-in Codex account starts returning +`upstream_rate_limit`. ## Configuration @@ -245,6 +249,7 @@ Most configuration lives in `.env`, copied from `.env.example`. | `MAX_MESSAGES` | Maximum number of chat messages. | | `MAX_TOTAL_TEXT_CHARS` | Maximum total text across messages. | | `QUEUE_WAIT_SECONDS` | Short local wait queue for bursty clients. Values are clamped to `0-5`. | +| `MAX_CONCURRENT_CODEX_RUNS` | Number of provider-side Codex executions allowed at once. Values are clamped to `1-2`; default `1`. | | `CORS_ALLOWED_ORIGINS` | Optional comma-separated explicit origins. Requests without `Origin` are allowed. | | `LOG_LEVEL` | Python wrapper log level. | @@ -295,6 +300,107 @@ OpenAI API keys in Compose files. Codex authentication still lives only in the mounted `data/codex-home` directory and must be completed inside the running container. +## Pre-Merge pi-node2 Image Test + +Use the `candidate-image` GitHub Actions workflow to publish a disposable image +from a branch before merging it. The default target is `linux/arm64` for +`pi-node2` validation, and the generated tag looks like: + +```text +codex-cli-provider-dev-branch-name-abcdef123456 +``` + +Run the workflow from the branch you want to test. Leave `image_tag` empty +unless you need a stable candidate tag; custom candidate tags must start with +`codex-cli-provider-dev-`. Do not use `latest`. + +On `pi-node2`, keep using the image-only Compose file and the dedicated local +`data/codex-home` auth mount: + +```bash +docker login ghcr.io +export CODEX_CLI_PROVIDER_IMAGE=ghcr.io/subdepthtech/codex-cli-provider:codex-cli-provider-dev-branch-name-abcdef123456 +docker compose -f docker-compose.image.yml pull +docker compose -f docker-compose.image.yml up -d +python3 scripts/smoke_test_provider.py +``` + +To verify one real Codex-backed request after the container is healthy and +logged in, run: + +```bash +python3 scripts/smoke_test_provider.py --chat +``` + +The non-chat smoke test checks `/healthz`, confirms `/v1/models` rejects +unauthenticated callers, and confirms the authenticated model list includes +`codex-cli-default`. The `--chat` check sends one live upstream request through +the signed-in Codex CLI account. + +After the `pi-node2` smoke test passes, merge the branch and publish the release +image. The release workflow publishes versioned tags like +`codex-cli-provider-0.1.2` from either a manual dispatch or a Git tag such as +`v0.1.2`. + +## Automated pi-node2 Deployment + +The `deploy-pi-node2` GitHub Actions workflow deploys an already-published +candidate or release image on a self-hosted runner installed on `pi-node2`. +It intentionally does not use SSH keys or GitHub-hosted runner secrets. + +One-time `pi-node2` setup: + +```bash +mkdir -p ~/projects +git clone https://github.com/subdepthtech/codex-cli-provider.git ~/projects/codex-cli-provider +cd ~/projects/codex-cli-provider +cp .env.example .env +mkdir -p data/codex-home data/codex-work data/secrets +python3 - <<'PY' +import pathlib, secrets +path = pathlib.Path("data/secrets/proxy_api_key") +path.write_text(secrets.token_urlsafe(48) + "\n") +PY +chmod 600 .env data/secrets/proxy_api_key +chmod 700 data/codex-home data/codex-work data/secrets +docker login ghcr.io +``` + +The deploy smoke test expects the dedicated `data/codex-home` mount on +`pi-node2` to already contain a valid Codex login. After starting the container +with a candidate or release image for the first time, complete device login +inside that container: + +```bash +docker exec -it codex-cli-provider \ + codex login --device-auth \ + -c forced_login_method='"chatgpt"' \ + -c cli_auth_credentials_store='"file"' +``` + +Install the GitHub self-hosted runner on `pi-node2` with labels including +`self-hosted`, `linux`, `arm64`, and `pi-node2`. In GitHub, create an +environment named `pi-node2` and require manual approval before deployment. +For a public repository, do not let untrusted pull requests run jobs on this +runner. + +Run the `deploy-pi-node2` workflow from a trusted branch, preferably `main`, and +pass the exact image tag printed by `candidate-image`. The workflow uses +`/home/pi/projects/codex-cli-provider` by default; set the repository or +environment variable `PI_NODE2_DEPLOY_DIR` to override that path. + +The deploy workflow updates the fixed checkout, validates the image tag, runs +the image-only Compose security check, pulls the image, restarts the service, +and runs `scripts/smoke_test_provider.py`. Enable `chat_smoke` to run one live +Codex-backed request after restart. + +## Host Reimage Runbook + +For rebuilding the homelab `pi-node2` host, use +[`docs/pi-node2-reimage.md`](docs/pi-node2-reimage.md). Keep host-specific +values, backup locations, runner registration details, and credential recovery +notes in the ignored top-level `handoff.md` file, not in tracked documentation. + ## Verification Run repository checks without live credentials: @@ -311,9 +417,8 @@ PYTHONPATH=. .venv/bin/pytest -q Live checks require a running container and the dedicated Codex login: ```bash -PROXY_API_KEY="$(cat data/secrets/proxy_api_key)" -curl -f http://127.0.0.1:8320/healthz -curl -f -H "Authorization: Bearer $PROXY_API_KEY" http://127.0.0.1:8320/v1/models +python3 scripts/smoke_test_provider.py +python3 scripts/smoke_test_provider.py --chat ``` Do not print or inspect `data/codex-home/auth.json`. @@ -360,11 +465,11 @@ docker compose restart `500000`, and restart. - `413` with `Request body too large`: increase `MAX_REQUEST_BODY_BYTES`, up to `2000000`, and restart. -- `429` with `code: "wrapper_busy"`: wait for the active request to finish, - reduce client concurrency to `1`, increase batch delay, or set a small - `QUEUE_WAIT_SECONDS`. +- `429` with `code: "wrapper_busy"`: wait for active requests to finish, + reduce client concurrency to `1`, increase batch delay, set a small + `QUEUE_WAIT_SECONDS`, or opt into `MAX_CONCURRENT_CODEX_RUNS=2`. - `429` with `code: "upstream_rate_limit"`: the signed-in upstream account is - rate limited; wait and retry later. + rate limited; wait and retry later, and keep `MAX_CONCURRENT_CODEX_RUNS=1`. - `502` or `/healthz` returning `503`: check `docker exec -it codex-cli-provider codex login status`, then re-run device login if needed. diff --git a/docs/pi-node2-reimage.md b/docs/pi-node2-reimage.md new file mode 100644 index 0000000..f2353f4 --- /dev/null +++ b/docs/pi-node2-reimage.md @@ -0,0 +1,123 @@ +# pi-node2 Reimage Runbook + +This runbook is for rebuilding the homelab `pi-node2` host that runs +`codex-cli-provider` from a published image. Keep host-specific values, tokens, +runner registration details, and recovery notes in the ignored top-level +`handoff.md` file, not in tracked documentation. + +## Before Reimage + +Record the current deploy state without printing credentials: + +```bash +cd /home/pi/projects/codex-cli-provider +git status --short --branch +git remote -v +docker compose -f docker-compose.image.yml ps +docker image ls 'ghcr.io/subdepthtech/codex-cli-provider' +test -f .env && printf '.env exists\n' +test -f data/secrets/proxy_api_key && printf 'proxy_api_key exists\n' +test -f data/codex-home/auth.json && printf 'codex auth exists\n' +``` + +Decide whether to preserve or recreate local state: + +- `.env`: local wrapper limits and deploy settings. Back it up only through the + host's private backup path. +- `data/secrets/proxy_api_key`: wrapper bearer token. Preserve it only if + existing clients must keep working without changing their configured API key. + Otherwise regenerate it after the reimage. +- `data/codex-home/`: dedicated Codex/ChatGPT login state. Treat it as a live + credential. Prefer a fresh device login unless the operator explicitly chooses + to restore the dedicated project auth home. +- `data/codex-work/`: disposable provider workspace unless the operator has put + recovery artifacts there. +- Docker/GHCR auth and GitHub self-hosted runner registration are host-local and + should be recreated after the reimage. + +Do not back up or publish a normal user `~/.codex`, an `OPENAI_API_KEY`, Docker +socket credentials, or any host home directory into this project. + +## Restore From Image + +Install Docker Engine, Docker Compose v2, Git, and Python 3. Then recreate the +checkout and local state: + +```bash +mkdir -p /home/pi/projects +git clone https://github.com/subdepthtech/codex-cli-provider.git /home/pi/projects/codex-cli-provider +cd /home/pi/projects/codex-cli-provider +cp .env.example .env +mkdir -p data/codex-home data/codex-work data/secrets +python3 - <<'PY' +import pathlib, secrets +path = pathlib.Path("data/secrets/proxy_api_key") +path.write_text(secrets.token_urlsafe(48) + "\n") +PY +chmod 600 .env data/secrets/proxy_api_key +chmod 700 data/codex-home data/codex-work data/secrets +docker login ghcr.io +``` + +If restoring a preserved `.env`, `proxy_api_key`, or `data/codex-home`, copy it +into place before starting the service and keep the same file permissions. + +Start from an explicit candidate or release tag. Never use `latest`: + +```bash +export CODEX_CLI_PROVIDER_IMAGE=ghcr.io/subdepthtech/codex-cli-provider:codex-cli-provider-0.1.2 +COMPOSE_FILE=docker-compose.image.yml python3 scripts/check_compose_security.py +docker compose -f docker-compose.image.yml pull +docker compose -f docker-compose.image.yml up -d +``` + +If `data/codex-home` was not restored, complete the dedicated ChatGPT device +login inside the container: + +```bash +docker exec -it codex-cli-provider \ + codex login --device-auth \ + -c forced_login_method='"chatgpt"' \ + -c cli_auth_credentials_store='"file"' +``` + +Verify the deployment: + +```bash +python3 scripts/smoke_test_provider.py +python3 scripts/smoke_test_provider.py --chat +``` + +The `--chat` check sends one live upstream request through the signed-in Codex +CLI account. Skip it until the dedicated device login is complete. + +## Restore Automated Deploys + +Install the GitHub self-hosted runner on `pi-node2` with labels: + +```text +self-hosted, linux, arm64, pi-node2 +``` + +In GitHub, keep the `pi-node2` environment protected with manual approval. +Only trusted branches should deploy to the self-hosted runner. Set +`PI_NODE2_DEPLOY_DIR` only if the checkout is not +`/home/pi/projects/codex-cli-provider`. + +After the runner is online, use the `deploy-pi-node2` workflow with an exact +candidate or release image tag. The workflow validates the tag, updates the +fixed checkout, runs the image compose security check, restarts the service, and +runs the smoke test. + +## Local Handoff Notes + +Use the ignored top-level `handoff.md` for details that should survive the +reimage discussion but must not be pushed, such as: + +- whether the old `.env` or wrapper bearer token was preserved; +- whether `data/codex-home` was restored or reauthenticated; +- current image tag running on the host; +- self-hosted runner registration state; +- private backup location names. + +Do not commit `handoff.md`. diff --git a/scripts/deploy_compose_image.py b/scripts/deploy_compose_image.py new file mode 100644 index 0000000..6c33ada --- /dev/null +++ b/scripts/deploy_compose_image.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +import argparse +import os +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from scripts.image_tags import validate_image_tag + + +REQUIRED_DIRS = ( + Path("data/codex-home"), + Path("data/codex-work"), + Path("data/secrets"), +) +REQUIRED_FILES = ( + Path(".env"), + Path("data/secrets/proxy_api_key"), +) + + +def fail(message: str) -> None: + print(f"FAIL: {message}", file=sys.stderr) + raise SystemExit(1) + + +def run(command: list[str], *, env: dict[str, str] | None = None) -> None: + print("+ " + " ".join(command)) + subprocess.run(command, env=env, check=True) + + +def check_local_state() -> None: + missing_dirs = [str(path) for path in REQUIRED_DIRS if not path.is_dir()] + missing_files = [str(path) for path in REQUIRED_FILES if not path.is_file()] + if missing_dirs or missing_files: + missing = ", ".join(missing_dirs + missing_files) + fail(f"missing local deployment state: {missing}") + + +def deploy(args: argparse.Namespace) -> None: + image_tag = validate_image_tag(args.image_tag, args.tag_kind) + image_ref = f"{args.image_repository.rstrip(':')}:{image_tag}" + + os.chdir(args.repo_dir.resolve()) + check_local_state() + + env = os.environ.copy() + env["CODEX_CLI_PROVIDER_IMAGE"] = image_ref + env["COMPOSE_FILE"] = "docker-compose.image.yml" + + run(["python3", "scripts/check_compose_security.py"], env=env) + run(["docker", "compose", "-f", "docker-compose.image.yml", "pull"], env=env) + run(["docker", "compose", "-f", "docker-compose.image.yml", "up", "-d", "--remove-orphans"], env=env) + run(["python3", "scripts/smoke_test_provider.py", "--base-url", args.base_url], env=env) + if args.chat_smoke: + run(["python3", "scripts/smoke_test_provider.py", "--base-url", args.base_url, "--chat"], env=env) + + print(f"deployed {image_ref}") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Deploy a published codex-cli-provider image with Compose.") + parser.add_argument("--repo-dir", type=Path, default=Path.cwd()) + parser.add_argument("--image-repository", required=True, help="Image repository without tag, for example ghcr.io/org/repo.") + parser.add_argument("--image-tag", required=True) + parser.add_argument("--tag-kind", choices=("candidate", "release", "any"), default="any") + parser.add_argument("--base-url", default="http://127.0.0.1:8320") + parser.add_argument("--chat-smoke", action="store_true") + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> None: + args = parse_args(argv if argv is not None else sys.argv[1:]) + deploy(args) + + +if __name__ == "__main__": + main() diff --git a/scripts/image_tags.py b/scripts/image_tags.py new file mode 100644 index 0000000..7661bd9 --- /dev/null +++ b/scripts/image_tags.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +import argparse +import os +import re +import sys +from pathlib import Path + + +PROJECT_NAME = "codex-cli-provider" +MAX_DOCKER_TAG_LENGTH = 128 +DOCKER_TAG_RE = re.compile(r"^[A-Za-z0-9_][A-Za-z0-9_.-]{0,127}$") +RELEASE_TAG_RE = re.compile(rf"^{PROJECT_NAME}-[0-9]+\.[0-9]+\.[0-9]+(?:-[A-Za-z0-9_.-]+)?$") +CANDIDATE_TAG_RE = re.compile(rf"^{PROJECT_NAME}-dev-[A-Za-z0-9][A-Za-z0-9_.-]*$") +SHA_RE = re.compile(r"^[A-Fa-f0-9]{7,64}$") + + +def fail(message: str) -> None: + print(f"FAIL: {message}", file=sys.stderr) + raise SystemExit(1) + + +def sanitize_ref_name(ref_name: str) -> str: + safe = re.sub(r"[^a-z0-9_.-]+", "-", ref_name.strip().lower()) + safe = re.sub(r"[-.]{2,}", "-", safe) + safe = safe.strip("-._") + return safe or "ref" + + +def build_candidate_tag(ref_name: str, sha: str) -> str: + normalized_sha = sha.strip().lower() + if not SHA_RE.fullmatch(normalized_sha): + fail("candidate image tags require a git SHA") + + short_sha = normalized_sha[:12] + prefix = f"{PROJECT_NAME}-dev-" + suffix_separator = "-" + max_ref_length = MAX_DOCKER_TAG_LENGTH - len(prefix) - len(suffix_separator) - len(short_sha) + ref_part = sanitize_ref_name(ref_name) + if len(ref_part) > max_ref_length: + ref_part = ref_part[:max_ref_length].rstrip("-._") or "ref" + + return f"{prefix}{ref_part}{suffix_separator}{short_sha}" + + +def validate_image_tag(tag: str, kind: str) -> str: + normalized = tag.strip() + if not normalized: + fail("image tag must not be empty") + if normalized == "latest": + fail("image tag must not be latest") + if not DOCKER_TAG_RE.fullmatch(normalized): + fail( + "image tag must be a Docker tag of at most 128 characters using " + "only letters, digits, underscore, period, and dash" + ) + + if kind == "release" and not RELEASE_TAG_RE.fullmatch(normalized): + fail(f"release image tags must look like {PROJECT_NAME}-0.1.2") + if kind == "candidate" and not CANDIDATE_TAG_RE.fullmatch(normalized): + fail(f"candidate image tags must look like {PROJECT_NAME}-dev-branch-abcdef123456") + if kind == "any" and not (RELEASE_TAG_RE.fullmatch(normalized) or CANDIDATE_TAG_RE.fullmatch(normalized)): + fail("image tag must be a release or candidate tag") + + return normalized + + +def write_github_output(name: str, value: str) -> None: + output_path = os.environ.get("GITHUB_OUTPUT") + if not output_path: + fail("GITHUB_OUTPUT is not set") + with Path(output_path).open("a", encoding="utf-8") as output: + output.write(f"{name}={value}\n") + + +def candidate(args: argparse.Namespace) -> None: + if args.requested.strip(): + tag = validate_image_tag(args.requested, "candidate") + else: + tag = build_candidate_tag(args.ref_name, args.sha) + validate_image_tag(tag, "candidate") + + if args.github_output: + write_github_output("tag", tag) + print(tag) + + +def validate(args: argparse.Namespace) -> None: + tag = validate_image_tag(args.tag, args.kind) + print(tag) + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Resolve and validate codex-cli-provider image tags.") + subparsers = parser.add_subparsers(dest="command", required=True) + + candidate_parser = subparsers.add_parser("candidate", help="Resolve a pre-release candidate image tag.") + candidate_parser.add_argument("--ref-name", required=True) + candidate_parser.add_argument("--sha", required=True) + candidate_parser.add_argument("--requested", default="") + candidate_parser.add_argument("--github-output", action="store_true") + candidate_parser.set_defaults(func=candidate) + + validate_parser = subparsers.add_parser("validate", help="Validate an image tag.") + validate_parser.add_argument("--kind", choices=("release", "candidate", "any"), default="any") + validate_parser.add_argument("tag") + validate_parser.set_defaults(func=validate) + + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> None: + args = parse_args(argv if argv is not None else sys.argv[1:]) + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/scripts/smoke_test_provider.py b/scripts/smoke_test_provider.py new file mode 100644 index 0000000..617ba62 --- /dev/null +++ b/scripts/smoke_test_provider.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +import argparse +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + + +DEFAULT_BASE_URL = "http://127.0.0.1:8320" +DEFAULT_MODEL = "codex-cli-default" + + +def fail(message: str) -> None: + print(f"FAIL: {message}", file=sys.stderr) + raise SystemExit(1) + + +def read_token(path: Path) -> str: + try: + token = path.read_text(encoding="utf-8").strip() + except FileNotFoundError: + fail(f"token file not found: {path}") + if not token: + fail(f"token file is empty: {path}") + return token + + +def request_json( + method: str, + url: str, + *, + timeout: int, + headers: dict[str, str] | None = None, + payload: dict[str, Any] | None = None, +) -> tuple[int, dict[str, Any]]: + data = None + request_headers = dict(headers or {}) + if payload is not None: + data = json.dumps(payload).encode("utf-8") + request_headers["Content-Type"] = "application/json" + + request = urllib.request.Request(url, data=data, headers=request_headers, method=method) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + body = response.read() + status = response.status + except urllib.error.HTTPError as error: + body = error.read() + status = error.code + except urllib.error.URLError as error: + fail(f"request failed for {url}: {error.reason}") + + if not body: + return status, {} + try: + parsed = json.loads(body.decode("utf-8")) + except json.JSONDecodeError: + fail(f"non-JSON response from {url}: HTTP {status}") + if not isinstance(parsed, dict): + fail(f"unexpected JSON response from {url}: HTTP {status}") + return status, parsed + + +def expect_status(status: int, expected: int, label: str, body: dict[str, Any]) -> None: + if status != expected: + fail(f"{label} returned HTTP {status}, expected {expected}: {json.dumps(body, sort_keys=True)[:500]}") + + +def smoke_test(args: argparse.Namespace) -> None: + base_url = args.base_url.rstrip("/") + token = read_token(args.token_file) + auth_headers = {"Authorization": f"Bearer {token}"} + + status, body = request_json("GET", f"{base_url}/healthz", timeout=args.timeout) + expect_status(status, 200, "healthz", body) + if body.get("status") != "ok": + fail(f"healthz returned unexpected body: {body}") + print("healthz ok") + + status, body = request_json("GET", f"{base_url}/v1/models", timeout=args.timeout) + expect_status(status, 401, "unauthenticated /v1/models", body) + print("auth gate ok") + + status, body = request_json("GET", f"{base_url}/v1/models", timeout=args.timeout, headers=auth_headers) + expect_status(status, 200, "authenticated /v1/models", body) + models = body.get("data") + if not isinstance(models, list) or not any(isinstance(model, dict) and model.get("id") == args.model for model in models): + fail(f"{args.model} missing from /v1/models response") + print("models ok") + + if args.chat: + payload = { + "model": args.model, + "messages": [{"role": "user", "content": args.prompt}], + } + status, body = request_json( + "POST", + f"{base_url}/v1/chat/completions", + timeout=args.chat_timeout, + headers=auth_headers, + payload=payload, + ) + expect_status(status, 200, "chat completion", body) + choices = body.get("choices") + if not isinstance(choices, list) or not choices: + fail("chat completion response did not include choices") + message = choices[0].get("message") if isinstance(choices[0], dict) else None + content = message.get("content") if isinstance(message, dict) else None + if not isinstance(content, str) or not content.strip(): + fail("chat completion response did not include non-empty message content") + print("chat completion ok") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Smoke test a running codex-cli-provider container.") + parser.add_argument("--base-url", default=DEFAULT_BASE_URL) + parser.add_argument("--token-file", type=Path, default=Path("data/secrets/proxy_api_key")) + parser.add_argument("--model", default=DEFAULT_MODEL) + parser.add_argument("--timeout", type=int, default=15) + parser.add_argument("--chat-timeout", type=int, default=240) + parser.add_argument("--chat", action="store_true", help="Also run one live Codex-backed chat completion.") + parser.add_argument( + "--prompt", + default="Reply with one short sentence confirming the image smoke test ran.", + help="Prompt used only when --chat is set.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> None: + args = parse_args(argv if argv is not None else sys.argv[1:]) + smoke_test(args) + + +if __name__ == "__main__": + main() diff --git a/src/dashboard_static/app.js b/src/dashboard_static/app.js index 9aca35d..be60da2 100644 --- a/src/dashboard_static/app.js +++ b/src/dashboard_static/app.js @@ -78,7 +78,7 @@ function renderStatus(data) { setText("#runner-value", runner.busy ? "Busy" : runner.ready ? "Ready" : "Unready"); setClass("#runner-value", runner.ready && !runner.busy ? "ok-text" : runner.busy ? "warn-text" : "bad-text"); - setText("#runner-detail", data.provider.modelAlias); + setText("#runner-detail", `${runner.activeRuns}/${runner.maxConcurrentRuns} active - ${data.provider.modelAlias}`); setText("#event-value", String(events.total)); setText("#event-detail", `${events.errors} errors, p95 ${events.p95DurationMs ?? "-"} ms`); @@ -86,6 +86,8 @@ function renderStatus(data) { setText("#limit-body", formatBytes(limits.maxBodyBytes)); setText("#limit-text", `${limits.maxTotalTextChars.toLocaleString()} chars`); setText("#limit-timeout", `${limits.requestTimeoutSeconds}s`); + setText("#limit-concurrency", `${limits.maxConcurrentRuns} run${limits.maxConcurrentRuns === 1 ? "" : "s"}`); + setText("#limit-queue", `${limits.queueWaitSeconds}s`); setText("#last-refresh", `Updated ${new Date().toLocaleTimeString()}`); } diff --git a/src/dashboard_static/index.html b/src/dashboard_static/index.html index 231f055..69dafaf 100644 --- a/src/dashboard_static/index.html +++ b/src/dashboard_static/index.html @@ -86,6 +86,14 @@

Runtime Limits

Timeout
-
+
+
Runs
+
-
+
+
+
Queue
+
-
+
diff --git a/src/server.py b/src/server.py index 7cb82f0..68cbc62 100644 --- a/src/server.py +++ b/src/server.py @@ -145,6 +145,7 @@ class AppSettings: max_total_text_chars: int = 80_000 codex_request_timeout_seconds: int = 180 queue_wait_seconds: float = 0.0 + max_concurrent_codex_runs: int = 1 dashboard_enabled: bool = True def __post_init__(self) -> None: @@ -156,6 +157,7 @@ def __post_init__(self) -> None: self.max_total_text_chars = min(max(int(self.max_total_text_chars), 1), 500_000) self.codex_request_timeout_seconds = min(max(int(self.codex_request_timeout_seconds), 5), 900) self.queue_wait_seconds = min(max(float(self.queue_wait_seconds), 0.0), 5.0) + self.max_concurrent_codex_runs = min(max(int(self.max_concurrent_codex_runs), 1), 2) @classmethod def from_env(cls) -> "AppSettings": @@ -175,6 +177,7 @@ def from_env(cls) -> "AppSettings": max_total_text_chars=int(os.environ.get("MAX_TOTAL_TEXT_CHARS", "80000")), codex_request_timeout_seconds=int(os.environ.get("CODEX_REQUEST_TIMEOUT_SECONDS", "180")), queue_wait_seconds=float(os.environ.get("QUEUE_WAIT_SECONDS", "0")), + max_concurrent_codex_runs=int(os.environ.get("MAX_CONCURRENT_CODEX_RUNS", "1")), dashboard_enabled=os.environ.get("DASHBOARD_ENABLED", "true").strip().lower() not in {"0", "false", "no", "off"}, ) @@ -340,32 +343,31 @@ def summarize_dashboard_events(events: list[dict[str, Any]]) -> dict[str, Any]: } -async def execute_single_flight(app: FastAPI, settings: AppSettings, prompt: str) -> str: - lock: asyncio.Lock = app.state.execution_lock +def wrapper_busy_error() -> APIError: + return APIError( + 429, + "Another Codex execution is already running", + "rate_limit_error", + code="wrapper_busy", + headers={"Retry-After": "3"}, + ) + + +async def execute_with_admission(app: FastAPI, settings: AppSettings, prompt: str) -> str: + semaphore: asyncio.Semaphore = app.state.execution_semaphore acquired = False - if lock.locked(): + if semaphore.locked(): if settings.queue_wait_seconds == 0: - raise APIError( - 429, - "Another Codex execution is already running", - "rate_limit_error", - code="wrapper_busy", - headers={"Retry-After": "3"}, - ) + raise wrapper_busy_error() try: - await asyncio.wait_for(lock.acquire(), timeout=settings.queue_wait_seconds) + await asyncio.wait_for(semaphore.acquire(), timeout=settings.queue_wait_seconds) acquired = True except asyncio.TimeoutError as exc: - raise APIError( - 429, - "Another Codex execution is already running", - "rate_limit_error", - code="wrapper_busy", - headers={"Retry-After": "3"}, - ) from exc + raise wrapper_busy_error() from exc else: - await lock.acquire() + await semaphore.acquire() acquired = True + app.state.active_executions += 1 try: return await app.state.runner.execute( prompt, @@ -376,7 +378,8 @@ async def execute_single_flight(app: FastAPI, settings: AppSettings, prompt: str raise map_runner_error(exc) from exc finally: if acquired: - lock.release() + app.state.active_executions = max(0, app.state.active_executions - 1) + semaphore.release() def create_app(settings: AppSettings | None = None, runner: Any | None = None) -> FastAPI: @@ -385,7 +388,8 @@ def create_app(settings: AppSettings | None = None, runner: Any | None = None) - app = FastAPI(title="codex-cli-provider", docs_url=None, redoc_url=None, openapi_url=None) app.state.settings = settings app.state.runner = runner - app.state.execution_lock = asyncio.Lock() + app.state.execution_semaphore = asyncio.Semaphore(settings.max_concurrent_codex_runs) + app.state.active_executions = 0 app.state.dashboard_events = deque(maxlen=DASHBOARD_EVENT_LIMIT) if settings.cors_allowed_origins: @@ -475,6 +479,7 @@ async def dashboard_status() -> JSONResponse: require_dashboard_enabled() status = await runner.status() events = list(app.state.dashboard_events) + active_executions = int(app.state.active_executions) return dashboard_json({ "time": now_iso(), "provider": { @@ -485,13 +490,17 @@ async def dashboard_status() -> JSONResponse: }, "runner": { "ready": bool(status.get("ready")), - "busy": bool(app.state.execution_lock.locked()), + "busy": active_executions >= settings.max_concurrent_codex_runs, + "activeRuns": active_executions, + "maxConcurrentRuns": settings.max_concurrent_codex_runs, }, "limits": { "maxBodyBytes": settings.max_body_bytes, "maxMessages": settings.max_messages, "maxTotalTextChars": settings.max_total_text_chars, "requestTimeoutSeconds": settings.codex_request_timeout_seconds, + "queueWaitSeconds": settings.queue_wait_seconds, + "maxConcurrentRuns": settings.max_concurrent_codex_runs, }, }, "events": summarize_dashboard_events(events), @@ -535,7 +544,7 @@ async def chat(payload: dict[str, Any] = Body(...), _: None = Depends(require_au prompt = build_codex_prompt(messages) start = time.perf_counter() try: - text = await execute_single_flight(app, settings, prompt) + text = await execute_with_admission(app, settings, prompt) finally: elapsed_ms = int((time.perf_counter() - start) * 1000) LOGGER.info("request complete route=/v1/chat/completions elapsed_ms=%s model=%s", elapsed_ms, MODEL_ALIAS) diff --git a/tests/test_image_tags.py b/tests/test_image_tags.py new file mode 100644 index 0000000..9ea9daa --- /dev/null +++ b/tests/test_image_tags.py @@ -0,0 +1,38 @@ +import pytest + +from scripts.image_tags import build_candidate_tag, sanitize_ref_name, validate_image_tag + + +def test_build_candidate_tag_uses_branch_and_short_sha(): + tag = build_candidate_tag("feature/pi-node2 smoke", "ABCDEF1234567890") + assert tag == "codex-cli-provider-dev-feature-pi-node2-smoke-abcdef123456" + + +def test_build_candidate_tag_truncates_long_refs_to_docker_limit(): + tag = build_candidate_tag("feature/" + "x" * 200, "0123456789abcdef") + assert len(tag) == 128 + assert tag.endswith("-0123456789ab") + + +def test_sanitize_ref_name_has_stable_fallback(): + assert sanitize_ref_name("///") == "ref" + + +def test_validate_release_tag_accepts_project_semver(): + assert validate_image_tag("codex-cli-provider-0.1.2", "release") == "codex-cli-provider-0.1.2" + assert validate_image_tag("codex-cli-provider-0.1.2-rc.1", "release") == "codex-cli-provider-0.1.2-rc.1" + + +@pytest.mark.parametrize( + "tag,kind", + [ + ("latest", "any"), + ("v0.1.2", "release"), + ("codex-cli-provider-0.1.2+build", "release"), + ("codex-cli-provider-0.1.2", "candidate"), + ("codex-cli-provider-dev-feature", "release"), + ], +) +def test_validate_image_tag_rejects_invalid_tags(tag, kind): + with pytest.raises(SystemExit): + validate_image_tag(tag, kind) diff --git a/tests/test_server.py b/tests/test_server.py index 69a571c..62ac68b 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -90,6 +90,14 @@ async def request(app, method, url, token=TEST_SECRET, **kwargs): return await client.request(method, url, headers=headers, **kwargs) +async def wait_for_prompt_count(runner, count): + for _ in range(50): + if len(runner.prompts) >= count: + return + await asyncio.sleep(0.01) + raise AssertionError(f"expected {count} prompts, got {len(runner.prompts)}") + + def test_default_app_uses_local_runner(): app = create_app(settings()) assert isinstance(app.state.runner, LocalCodexRunner) @@ -101,6 +109,15 @@ def test_app_settings_do_not_require_runner_socket(): assert not hasattr(configured, "runner_api_key") +def test_admission_settings_are_clamped(): + high = AppSettings(proxy_api_key=TEST_SECRET, queue_wait_seconds=99, max_concurrent_codex_runs=99) + low = AppSettings(proxy_api_key=TEST_SECRET, queue_wait_seconds=-1, max_concurrent_codex_runs=-1) + assert high.queue_wait_seconds == 5.0 + assert high.max_concurrent_codex_runs == 2 + assert low.queue_wait_seconds == 0.0 + assert low.max_concurrent_codex_runs == 1 + + @pytest.mark.asyncio async def test_health_ready_and_unready(): ready_app = create_app(settings(), FakeRunner(ready=True)) @@ -149,6 +166,19 @@ async def test_dashboard_status_is_sanitized_and_unauthenticated(): assert TEST_SECRET not in response.text +@pytest.mark.asyncio +async def test_dashboard_status_reports_admission_limits(): + app = create_app(settings(max_concurrent_codex_runs=2, queue_wait_seconds=3), FakeRunner()) + response = await request(app, "GET", "/dashboard/api/status", token=None) + assert response.status_code == 200 + body = response.json() + assert body["provider"]["runner"]["activeRuns"] == 0 + assert body["provider"]["runner"]["maxConcurrentRuns"] == 2 + assert body["provider"]["runner"]["busy"] is False + assert body["provider"]["limits"]["queueWaitSeconds"] == 3 + assert body["provider"]["limits"]["maxConcurrentRuns"] == 2 + + @pytest.mark.asyncio async def test_dashboard_can_be_disabled(): app = create_app(settings(dashboard_enabled=False), FakeRunner()) @@ -477,6 +507,47 @@ async def test_single_flight_queue_timeout_returns_wrapper_busy_429(): assert second.json()["error"]["code"] == "wrapper_busy" +@pytest.mark.asyncio +async def test_two_execution_slots_allow_two_parallel_requests(): + runner = BlockingRunner() + app = create_app(settings(max_concurrent_codex_runs=2, queue_wait_seconds=0), runner) + payload = {"model": "codex-cli-default", "messages": [{"role": "user", "content": "hello"}]} + + first = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload)) + second = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload)) + await wait_for_prompt_count(runner, 2) + + status = await request(app, "GET", "/dashboard/api/status", token=None) + assert status.json()["provider"]["runner"]["activeRuns"] == 2 + assert status.json()["provider"]["runner"]["busy"] is True + + runner.release.set() + first_response = await first + second_response = await second + assert first_response.status_code == 200 + assert second_response.status_code == 200 + + +@pytest.mark.asyncio +async def test_concurrency_limit_returns_429_when_all_slots_busy(): + runner = BlockingRunner() + app = create_app(settings(max_concurrent_codex_runs=2, queue_wait_seconds=0), runner) + payload = {"model": "codex-cli-default", "messages": [{"role": "user", "content": "hello"}]} + + first = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload)) + second = asyncio.create_task(request(app, "POST", "/v1/chat/completions", json=payload)) + await wait_for_prompt_count(runner, 2) + + third = await request(app, "POST", "/v1/chat/completions", json=payload) + runner.release.set() + await first + await second + + assert third.status_code == 429 + assert third.headers["retry-after"] == "3" + assert third.json()["error"]["code"] == "wrapper_busy" + + @pytest.mark.asyncio async def test_runner_errors_are_mapped(): cases = [