From 7af49e9738bd162f2305dfaa533cacabf98c28d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Wed, 17 Jun 2026 09:49:17 -0400 Subject: [PATCH 01/13] initial env and test setup --- .gitignore | 6 + AGENTS.md | 24 ++++ CLAUDE.md | 2 +- src/Dockerfile => Dockerfile | 0 Taskfile.yml | 123 +++++++++++++++----- bin/build-helper-image.sh | 59 ++++++++++ bin/kind-env.sh | 17 +++ bin/opslevel-runner-coding-agent | 8 ++ bin/opslevel-runner-runner | 8 ++ bin/setup-kind.sh | 69 +++++++++++ bin/stop-kind.sh | 16 +++ src/Procfile | 3 +- tests/enqueue-coding-agent-job.sh | 101 ++++++++++++++++ {src/scripts => tests}/enqueue-test-jobs.go | 0 {src/scripts => tests}/enqueue-test-jobs.sh | 13 ++- tests/run-runner.sh | 65 +++++++++++ 16 files changed, 477 insertions(+), 37 deletions(-) rename src/Dockerfile => Dockerfile (100%) create mode 100755 bin/build-helper-image.sh create mode 100755 bin/kind-env.sh create mode 100755 bin/opslevel-runner-coding-agent create mode 100755 bin/opslevel-runner-runner create mode 100755 bin/setup-kind.sh create mode 100755 bin/stop-kind.sh create mode 100755 tests/enqueue-coding-agent-job.sh rename {src/scripts => tests}/enqueue-test-jobs.go (100%) rename {src/scripts => tests}/enqueue-test-jobs.sh (76%) create mode 100755 tests/run-runner.sh diff --git a/.gitignore b/.gitignore index 05944a5..04b4e20 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,14 @@ src/opslevel-runner src/go.work** **coverage.txt +# Host build artifacts (local kind dev) +dist/ + # ignore any user-created yaml files that may have been used for tophatting. src/*.yaml # Git worktrees .worktrees/ + +# Local environment overrides (e.g. KUBECONFIG env var) +.env.local diff --git a/AGENTS.md b/AGENTS.md index df7a4af..e6fca92 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,3 +38,27 @@ bd sync # Sync with git - NEVER say "ready to push when you are" - YOU must push - If push fails, resolve and retry until it succeeds +## Container / kind tooling + +Prefer podman, fall back to docker. Under podman, kind needs the experimental +provider env var. Keep snippets pure shell (no Taskfile vars) so they can be +pasted into a terminal as-is: + +```bash +if command -v podman &>/dev/null; then + export KIND_EXPERIMENTAL_PROVIDER=podman + cmd=podman +else + cmd=docker +fi +``` + +Use `"$cmd"` for build/save/exec calls. Helper-image build+load logic lives in +`bin/build-helper-image.sh` (loads on rebuild or when absent in kind). + +`crictl` is not present in kind nodes; query node images with: + +```bash +"$cmd" exec -control-plane ctr -n k8s.io images ls -q +``` + diff --git a/CLAUDE.md b/CLAUDE.md index c2a9abd..ea2a88e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -53,7 +53,7 @@ Run an end-to-end test with Faktory and a local Kubernetes cluster: task start-faktory # Terminal 2: Enqueue test jobs (requires Faktory running) -cd src && go run scripts/enqueue-test-jobs.go 50 +go run -C src ../tests/enqueue-test-jobs.go 50 # Monitor jobs at http://localhost:7420 ``` diff --git a/src/Dockerfile b/Dockerfile similarity index 100% rename from src/Dockerfile rename to Dockerfile diff --git a/Taskfile.yml b/Taskfile.yml index 51c3961..08f6c17 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -1,10 +1,16 @@ # https://taskfile.dev/ -version: '3' +version: "3" + +set: [errexit, pipefail] env: - OPSLEVEL_GO_PKG: "github.com/opslevel/opslevel-go/v2024" + OPSLEVEL_GO_PKG: "github.com/opslevel/opslevel-go/v2026" SRC_DIR: "{{.TASKFILE_DIR}}/src" + HOMEBREW_NO_AUTO_UPDATE: 1 + +vars: + FAKTORY_VERSION: "1.9.3" tasks: ci: @@ -34,6 +40,7 @@ tasks: setup: desc: Setup workspace for local development cmds: + - task: install-faktory - task: workspace test: @@ -56,51 +63,107 @@ tasks: cmds: - cmd: echo "Setting up opslevel-go workspace..." silent: true + - git submodule update --init --recursive - go work init || exit 0 - go work use . submodules/opslevel-go - cmd: echo "opslevel-go workspace ready!" silent: true - start-faktory: - desc: Start Faktory and opslevel-runner in faktory mode + run: + desc: Start Faktory + runner + coding-agent workers via goreman; (re)build/load helper image on change. + deps: + - build-helper-image + - run-watcher + dir: "{{.SRC_DIR}}" + cmds: + - | + go tool goreman start + + run-watcher: + desc: "Restart both host workers when any .go file changes. Run under goreman (Procfile watcher)." + dir: "{{.SRC_DIR}}" + watch: true + sources: + - "**/*.go" + cmds: + - go tool goreman run restart runner coding-agent || true + + build-helper-image: + desc: Build the runner helper image and load it into kind (loads on rebuild or when absent in kind). dir: "{{.SRC_DIR}}" + watch: true + sources: + - "**/*.go" + - "go.mod" + - "go.sum" + - "../Dockerfile" deps: - - install-faktory + - task: setup-kind + vars: { KIND_CLUSTER: "{{.KIND_CLUSTER}}" } + vars: + KIND_CLUSTER: '{{default "opslevel-runner" .KIND_CLUSTER}}' + HELPER_IMAGE: '{{default "opslevel-runner:local" .HELPER_IMAGE}}' + cmds: + - HELPER_IMAGE={{.HELPER_IMAGE}} {{.TASKFILE_DIR}}/bin/build-helper-image.sh {{.KIND_CLUSTER}} + + stop-kind: + desc: Clean orphaned job pods and stop kind cluster + vars: + KIND_CLUSTER: '{{default "opslevel-runner" .KIND_CLUSTER}}' cmds: - - go tool goreman start + - "{{.TASKFILE_DIR}}/bin/stop-kind.sh {{.KIND_CLUSTER}}" # internal (not directly called) tasks - install-redis: - desc: install "redis-server" + setup-kind: internal: true + vars: + KIND_CLUSTER: '{{default "opslevel-runner" .KIND_CLUSTER}}' + cmds: + # --wait: block until the cluster exists for tasks requiring + # a cluster to target; e.g. build-helper-image's `kind load` + - "{{.TASKFILE_DIR}}/bin/setup-kind.sh --wait {{.KIND_CLUSTER}}" + + install-redis: + desc: Install redis-server status: - - test -n "command -v redis-server" + - command -v redis-server + cmds: + - task: install-redis-{{OS}} + + install-redis-darwin: + internal: true cmds: - brew install redis - install-faktory: - desc: install "faktory" + install-redis-linux: internal: true + cmds: + - sudo apt-get install -y redis-server + + install-faktory: + desc: Install Faktory from GitHub releases deps: - install-redis - status: - - test -n "command -v faktory" + cmds: + - task: install-faktory-{{OS}} + + install-faktory-darwin: + internal: true vars: - GOARCH: - sh: go env GOARCH - EXPECTED_SHA256: - sh: | - case $(go env GOARCH) in - amd64) echo "c20fbf67cd54f2313a4180b0506ac96fbb66bfc8b9a39917f27246b41087f300" ;; - arm64) echo "edfaaa5242ec7702ad0eb14c6f388b25a77d1fb01d9ec9845332df50bead64f4" ;; - *) echo "unsupported arch: $(go env GOARCH)" >&2; exit 1 ;; - esac - cmds: - - mkdir -p ./faktory - - curl -sL https://github.com/contribsys/faktory/releases/download/v1.8.0/faktory-ent_1.8.0.macos.{{.GOARCH}}.tbz -o ./faktory/faktory-ent.osx.tbz - - echo "{{.EXPECTED_SHA256}} ./faktory/faktory-ent.osx.tbz" | shasum -a 256 -c - - - tar xjf ./faktory/faktory-ent.osx.tbz -C ./faktory - - mv ./faktory/faktory /usr/local/bin - - chmod +x /usr/local/bin/faktory - - rm -rf ./faktory + ARCH: + sh: uname -m | sed 's/x86_64/amd64/' + ASSET_PATTERN: "faktory_{{.FAKTORY_VERSION}}.macos.{{.ARCH}}.tbz" + status: + - test -x /usr/local/bin/faktory && /usr/local/bin/faktory -v | grep -q "{{.FAKTORY_VERSION}}" + cmds: + - echo "Installing Faktory {{.FAKTORY_VERSION}} for macOS ({{.ARCH}})..." + - | + DOWNLOAD_URL=$(curl --silent "https://api.github.com/repos/contribsys/faktory/releases/tags/v{{.FAKTORY_VERSION}}" \ + | jq --raw-output --arg ASSET_PATTERN "{{.ASSET_PATTERN}}" \ + '.assets[] | select(.name == $ASSET_PATTERN) | .browser_download_url') + curl -fsSL "$DOWNLOAD_URL" -o /tmp/{{.ASSET_PATTERN}} + - tar -xjf /tmp/{{.ASSET_PATTERN}} -C /tmp + - sudo install -m 0755 /tmp/faktory /usr/local/bin/faktory + - rm -f /tmp/{{.ASSET_PATTERN}} /tmp/faktory + - echo "Faktory {{.FAKTORY_VERSION}} installed to /usr/local/bin/faktory" diff --git a/bin/build-helper-image.sh b/bin/build-helper-image.sh new file mode 100755 index 0000000..0193cc2 --- /dev/null +++ b/bin/build-helper-image.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# Build the runner helper image and load it into kind. +# Loads iff: we rebuilt this run OR the image is absent in the kind cluster. + +set -eu + +CLUSTER_NAME="${1:-opslevel-runner}" +HELPER_IMAGE="${HELPER_IMAGE:-opslevel-runner:local}" + +SCRIPT_DIR="${BASH_SOURCE[0]%/*}" +source "$SCRIPT_DIR/kind-env.sh" + +GOARCH="$(go env GOARCH)" +DIST_DIR="$SCRIPT_DIR/../dist" +DIST_BIN="$DIST_DIR/linux/${GOARCH}/opslevel-runner" +SRC_CHECKSUM_PREVIOUS="$DIST_DIR/linux/${GOARCH}/.build-checksum" + +image_in_kind() { + "$cmd" exec "${CLUSTER_NAME}-control-plane" ctr -n k8s.io images ls -q 2>/dev/null \ + | grep -q "$HELPER_IMAGE" +} + +checksum_sources() { + { cd "$SCRIPT_DIR/../src" && \ + find . \ + \( -name '*.go' -o -name 'go.mod' -o -name 'go.sum' \) \ + -type f \ + -print0 | + LC_ALL=C sort -z | + xargs -0 shasum -a 256 + shasum -a 256 "$SCRIPT_DIR/../Dockerfile" + } | shasum -a 256 | cut -d' ' -f1 +} + +# checksum the real image inputs (binary embeds the compiled go code) +src_checksum="$(checksum_sources)" + +build_image() { + if [ ! -f "$DIST_BIN" ] || [ ! -f "$SRC_CHECKSUM_PREVIOUS" ] || [ "$(< "$SRC_CHECKSUM_PREVIOUS")" != "$src_checksum" ]; then + mkdir -p "$DIST_DIR/linux/${GOARCH}" + CGO_ENABLED=0 GOOS=linux GOARCH="$GOARCH" go build -C "$SCRIPT_DIR/../src" -o "$DIST_BIN" . + "$cmd" build -f "$SCRIPT_DIR/../Dockerfile" \ + --build-arg "TARGETPLATFORM=linux/${GOARCH}" \ + -t "$HELPER_IMAGE" \ + "$DIST_DIR" + printf '%s' "$src_checksum" > "$SRC_CHECKSUM_PREVIOUS" + return 0 + fi + return 1 +} + +if build_image || ! image_in_kind; then + if [ "$cmd" = podman ]; then + "$cmd" save "$HELPER_IMAGE" | kind load image-archive /dev/stdin --name "$CLUSTER_NAME" + else + kind load docker-image "$HELPER_IMAGE" --name "$CLUSTER_NAME" + fi +fi diff --git a/bin/kind-env.sh b/bin/kind-env.sh new file mode 100755 index 0000000..ba37c38 --- /dev/null +++ b/bin/kind-env.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Shared kind env/runtime detection. Sourced by setup-kind.sh and stop-kind.sh. +# Caller may set SCRIPT_DIR to this script's dir (bin/); defaults to self-located. +# Sets $cmd (podman|docker) and exports KUBECONFIG. + +SCRIPT_DIR="${SCRIPT_DIR:-${BASH_SOURCE[0]%/*}}" + +# optional local overrides (e.g. KUBECONFIG); gitignored +[ -f "$SCRIPT_DIR/../.env.local" ] && source "$SCRIPT_DIR/../.env.local" +export KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config}" + +if command -v podman &>/dev/null; then + export KIND_EXPERIMENTAL_PROVIDER=podman + cmd=podman +else + cmd=docker +fi diff --git a/bin/opslevel-runner-coding-agent b/bin/opslevel-runner-coding-agent new file mode 100755 index 0000000..ae8e7cc --- /dev/null +++ b/bin/opslevel-runner-coding-agent @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -eu +SCRIPT_DIR="${BASH_SOURCE[0]%/*}" + +# source for KUBECONFIG and k8s context to be set +source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner +exec go run -C "$SCRIPT_DIR/../src" . --log-level TRACE --job-pod-helper-image=localhost/opslevel-runner:local run --mode=faktory --queues=coding-agent --queue=coding-agent --job-pod-max-wait=300 --runner-pod-namespace=default --job-agent-mode=true diff --git a/bin/opslevel-runner-runner b/bin/opslevel-runner-runner new file mode 100755 index 0000000..1e43ced --- /dev/null +++ b/bin/opslevel-runner-runner @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -eu +SCRIPT_DIR="${BASH_SOURCE[0]%/*}" + +# source for KUBECONFIG and k8s context to be set +source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner +exec go run -C "$SCRIPT_DIR/../src" . run --mode=faktory --queues=runner --job-pod-max-wait=300 --runner-pod-namespace=default diff --git a/bin/setup-kind.sh b/bin/setup-kind.sh new file mode 100755 index 0000000..feefd42 --- /dev/null +++ b/bin/setup-kind.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Sourced by opslevel-runner launchers (triggered by Taskfile; started through goreman +# Procfile) +# - opslevel-runner-coding-agent +# - opslevel-runner-runner +# +# - to inherit KUBECONFIG and set the right k8s context to launch jobs +# - to start the k8s cluster and/or create it if it doesn't exist +# - the lockfile is a transient mutex guarding the create/start critical +# section only; it is always removed when setup completes (success or +# failure). Cluster state is authoritative — derived from +# `kind get clusters` / container inspect, never from lock presence. +# +# Uses 'return' inside a function so sourcing callers are not killed on exit. +set -eu + +_setup_kind() { + # --wait: losers of the lock race block until the cluster exists. + # Pass this when the caller immediately uses the cluster (e.g. `kind load` + # in bin/build-helper-image.sh). Worker launchers omit it — the exec'd Go + # process tolerates the cluster appearing slightly later, so blocking is unnecessary. + if [ "${1:-}" = "--wait" ]; then local wait=true; shift; fi + + local CLUSTER_NAME="${1:-opslevel-runner}" + + local SCRIPT_DIR="${BASH_SOURCE[0]%/*}" + # export so the exec'd worker inherits the same kubeconfig context was pinned into; + # also sets $cmd (podman|docker) and KIND_EXPERIMENTAL_PROVIDER + source "$SCRIPT_DIR/kind-env.sh" + + local lockfile="${TMPDIR:-/tmp}/setup-kind-${CLUSTER_NAME}.lock" + + # de-sync concurrent workers before racing the lock + sleep "$(( ms = RANDOM % 1200 + 200, ms / 1000 )).$(printf '%03d' "$(( ms % 1000 ))")" + + if ! ( set -C; : > "$lockfile" ) 2>/dev/null; then + + # loser: another caller owns the critical section; KUBECONFIG already exported above. + # Without --wait, return immediately — worker launchers exec a Go + # process that can tolerate a brief delay before the cluster is ready. + if [ -n "${wait:-}" ]; then + until kind get clusters | grep -q "^${CLUSTER_NAME}$"; do sleep 0.5; done + fi + return 0 + fi + + # winner: owns the lock; always release it when done (success or failure) + trap 'rm -f "$lockfile" 2>/dev/null || true' ERR + + # create the cluster if it doesn't exist + if ! kind get clusters | grep -q "^${CLUSTER_NAME}$"; then + kind create cluster --kubeconfig "$KUBECONFIG" --name "$CLUSTER_NAME" + fi + + # start cluster if not running yet + if [ "$("$cmd" inspect -f '{{.State.Status}}' "${CLUSTER_NAME}-control-plane")" != "running" ]; then + "$cmd" start "${CLUSTER_NAME}-control-plane" + fi + + # set context for user interaction + kubectl config set-context "kind-${CLUSTER_NAME}" --namespace default + kubectl config use-context "kind-${CLUSTER_NAME}" + + # release the mutex — lock is transient, not a persistent session flag + trap - ERR + rm -f "$lockfile" +} + +_setup_kind "$@" diff --git a/bin/stop-kind.sh b/bin/stop-kind.sh new file mode 100755 index 0000000..b4733e0 --- /dev/null +++ b/bin/stop-kind.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -eu + +CLUSTER_NAME="${1:-opslevel-runner}" + +SCRIPT_DIR="${BASH_SOURCE[0]%/*}" +source "$SCRIPT_DIR/kind-env.sh" + +# delete any running pods and stop kind cluster +if kind get clusters | grep -q "^${CLUSTER_NAME}$"; then + kubectl delete pods --all --namespace default --ignore-not-found --wait --timeout=60s 2>/dev/null || true + if [ "$("$cmd" inspect -f '{{.State.Status}}' "${CLUSTER_NAME}-control-plane" 2>/dev/null)" = "running" ]; then + "$cmd" stop "${CLUSTER_NAME}-control-plane" + fi +fi diff --git a/src/Procfile b/src/Procfile index a6f2839..98d7b8d 100644 --- a/src/Procfile +++ b/src/Procfile @@ -1,2 +1,3 @@ faktory: faktory -runner: go run main.go run --mode=faktory --queues=runner --job-pod-max-wait=300 \ No newline at end of file +runner: ../bin/opslevel-runner-runner +coding-agent: ../bin/opslevel-runner-coding-agent \ No newline at end of file diff --git a/tests/enqueue-coding-agent-job.sh b/tests/enqueue-coding-agent-job.sh new file mode 100755 index 0000000..0555aaa --- /dev/null +++ b/tests/enqueue-coding-agent-job.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# +# Enqueue a coding-agent job to test the squid egress proxy sidecar. +# +# The job is placed on the 'coding-agent' Faktory queue, which is consumed +# exclusively by the coding-agent worker (src/Procfile). That worker passes +# --queue=coding-agent which triggers squid sidecar injection (k8s.go:262). +# The normal 'runner' worker ignores this queue, demonstrating production-like routing. +# +# Usage: ./tests/enqueue-coding-agent-job.sh +# +# Prerequisites: +# 1. kind cluster up with helper image loaded: +# task build-helper-image +# 2. squid-config ConfigMap applied to the default namespace: +# kubectl apply -f - <` (default 3600s) independently +# of the job commands, so the pod stays alive for exec after the job completes. +# +# Manual proxy probe: +# POD=$(kubectl get pods -n default -l app.kubernetes.io/managed-by=runner-faktory \ +# --sort-by=.metadata.creationTimestamp -o name | tail -1) +# +# # Confirm squid got the PROXY_ALLOWED_DOMAINS append: +# kubectl exec -n default $POD -c squid-proxy -- cat /etc/squid/conf.d/allowed-domains.txt +# +# # Exec into the job container: +# kubectl exec -it -n default $POD -c job -- sh +# Inside: +# export http_proxy=http://localhost:3128 https_proxy=http://localhost:3128 +# # Allowed via PROXY_ALLOWED_DOMAINS runtime append: +# wget -qO- http://example.com >/dev/null && echo "ALLOWED: example.com (PROXY_ALLOWED_DOMAINS)" +# # Allowed via base allowlist: +# wget -qO- https://github.com >/dev/null && echo "ALLOWED: github.com (base list)" +# # Denied (not in allowlist): +# wget -qO- https://wikipedia.org >/dev/null && echo "OPEN" || echo "DENIED: wikipedia.org" +# # For richer output: apk add --no-cache curl +# # curl -x http://localhost:3128 -v https://github.com +# +# # Check squid access log (TCP_DENIED vs allowed): +# kubectl logs -n default $POD -c squid-proxy +# +# Cleanup stale job pods after testing: +# kubectl delete pods -n default -l app.kubernetes.io/managed-by=runner-faktory +# + +set -e + +# load KUBECONFIG (.env.local) + set $cmd / KIND_EXPERIMENTAL_PROVIDER for k8s context +SCRIPT_DIR="${BASH_SOURCE[0]%/*}/../bin" +source "$SCRIPT_DIR/kind-env.sh" + +src="${BASH_SOURCE[0]%/*}/../src" +JOB_ID="coding-agent-proxy-test-$(date +%s)" + +echo "Enqueuing coding-agent proxy test job (ID: ${JOB_ID}) ..." + +JOB_FILE=$(mktemp) +cat > "$JOB_FILE" <}) ..." +exec "$BINARY" \ + --log-level "${OPSLEVEL_LOG_LEVEL:-TRACE}" \ + --log-format "${OPSLEVEL_LOG_FORMAT:-TEXT}" \ + --job-pod-helper-image "$HELPER_IMAGE" \ + "${EXTRA_FLAGS[@]}" \ + run \ + --mode faktory \ + --queues "$FAKTORY_QUEUES" \ + --runner-pod-namespace default From 8ca780787796e8ca04623ae0d029b6a9e726f2fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Wed, 17 Jun 2026 14:47:56 -0400 Subject: [PATCH 02/13] fix: metrics port conflicts --- Taskfile.yml | 9 ++++++++- bin/opslevel-runner-coding-agent | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 08f6c17..5652dd1 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -86,7 +86,14 @@ tasks: sources: - "**/*.go" cmds: - - go tool goreman run restart runner coding-agent || true + - | + elapsed=0 + until go tool goreman run list >/dev/null 2>&1; do + sleep 1 + elapsed=$((elapsed + 1)) + [ "$elapsed" -ge 10 ] && break + done + go tool goreman run restart runner coding-agent build-helper-image: desc: Build the runner helper image and load it into kind (loads on rebuild or when absent in kind). diff --git a/bin/opslevel-runner-coding-agent b/bin/opslevel-runner-coding-agent index ae8e7cc..9e96385 100755 --- a/bin/opslevel-runner-coding-agent +++ b/bin/opslevel-runner-coding-agent @@ -5,4 +5,4 @@ SCRIPT_DIR="${BASH_SOURCE[0]%/*}" # source for KUBECONFIG and k8s context to be set source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner -exec go run -C "$SCRIPT_DIR/../src" . --log-level TRACE --job-pod-helper-image=localhost/opslevel-runner:local run --mode=faktory --queues=coding-agent --queue=coding-agent --job-pod-max-wait=300 --runner-pod-namespace=default --job-agent-mode=true +exec go run -C "$SCRIPT_DIR/../src" . --log-level TRACE --job-pod-helper-image=localhost/opslevel-runner:local run --mode=faktory --queues=coding-agent --queue=coding-agent --job-pod-max-wait=300 --runner-pod-namespace=default --job-agent-mode=true --metrics-port=10355 From d42a1369dc2cbec5b59bd63ddc2fdcdb940376e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Wed, 17 Jun 2026 17:00:20 -0400 Subject: [PATCH 03/13] fix: exec local runner scripts with helper image name built locally - configure local runner startup to use the same helper image as coding-agent jobs - improve script readability so run arguments are easier to inspect and maintain - clarify test job enqueueing purpose by naming it for runner jobs --- bin/opslevel-runner-coding-agent | 9 ++++++++- bin/opslevel-runner-runner | 7 ++++++- tests/{enqueue-test-jobs.sh => enqueue-runner-jobs.sh} | 0 3 files changed, 14 insertions(+), 2 deletions(-) rename tests/{enqueue-test-jobs.sh => enqueue-runner-jobs.sh} (100%) diff --git a/bin/opslevel-runner-coding-agent b/bin/opslevel-runner-coding-agent index 9e96385..28239ff 100755 --- a/bin/opslevel-runner-coding-agent +++ b/bin/opslevel-runner-coding-agent @@ -5,4 +5,11 @@ SCRIPT_DIR="${BASH_SOURCE[0]%/*}" # source for KUBECONFIG and k8s context to be set source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner -exec go run -C "$SCRIPT_DIR/../src" . --log-level TRACE --job-pod-helper-image=localhost/opslevel-runner:local run --mode=faktory --queues=coding-agent --queue=coding-agent --job-pod-max-wait=300 --runner-pod-namespace=default --job-agent-mode=true --metrics-port=10355 +exec go run -C "$SCRIPT_DIR/../src" . --log-level TRACE run \ + --mode=faktory \ + --queues=coding-agent \ + --queue=coding-agent \ + --job-pod-max-wait=300 \ + --runner-pod-namespace=default \ + --job-agent-mode=true --metrics-port=10355 \ + --job-pod-helper-image=localhost/opslevel-runner:local diff --git a/bin/opslevel-runner-runner b/bin/opslevel-runner-runner index 1e43ced..068226f 100755 --- a/bin/opslevel-runner-runner +++ b/bin/opslevel-runner-runner @@ -5,4 +5,9 @@ SCRIPT_DIR="${BASH_SOURCE[0]%/*}" # source for KUBECONFIG and k8s context to be set source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner -exec go run -C "$SCRIPT_DIR/../src" . run --mode=faktory --queues=runner --job-pod-max-wait=300 --runner-pod-namespace=default +exec go run -C "$SCRIPT_DIR/../src" . run \ + --mode=faktory \ + --queues=runner \ + --job-pod-max-wait=300 \ + --runner-pod-namespace=default \ + --job-pod-helper-image=localhost/opslevel-runner:local diff --git a/tests/enqueue-test-jobs.sh b/tests/enqueue-runner-jobs.sh similarity index 100% rename from tests/enqueue-test-jobs.sh rename to tests/enqueue-runner-jobs.sh From 8653234fd80e5d1be91a230f5511ae9611701548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Wed, 17 Jun 2026 17:06:29 -0400 Subject: [PATCH 04/13] test: keep coding agent job alive during enqueue tests - add a short sleep command so the test job remains available long enough for enqueue behavior to be observed --- tests/enqueue-coding-agent-job.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/enqueue-coding-agent-job.sh b/tests/enqueue-coding-agent-job.sh index 0555aaa..c293cf5 100755 --- a/tests/enqueue-coding-agent-job.sh +++ b/tests/enqueue-coding-agent-job.sh @@ -75,6 +75,7 @@ args: commands: - "echo Coding-agent proxy test pod up. Job ID: ${JOB_ID}" - "echo Squid sidecar reachable at localhost:3128" + - "sleep 1m" variables: - key: "PROXY_ALLOWED_DOMAINS" value: "example.com" From 68b1d506fd925f134d8162f5a3163d7850dc6d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Thu, 18 Jun 2026 08:10:56 -0400 Subject: [PATCH 05/13] fix: successfully exit the wait loop on first goreman exec --- Taskfile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Taskfile.yml b/Taskfile.yml index 5652dd1..6e9386e 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -91,7 +91,7 @@ tasks: until go tool goreman run list >/dev/null 2>&1; do sleep 1 elapsed=$((elapsed + 1)) - [ "$elapsed" -ge 10 ] && break + [ "$elapsed" -ge 10 ] && exit 0 done go tool goreman run restart runner coding-agent From 39b8d70ca8afc1dbd883c3b85931c5fe27a6044e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Thu, 18 Jun 2026 08:20:55 -0400 Subject: [PATCH 06/13] chore: gitignore taskfile checksum cache --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 04b4e20..6ec86bf 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,9 @@ src/go.work** # Host build artifacts (local kind dev) dist/ +# Taskfile checksum cache for watcher mode +.task/ + # ignore any user-created yaml files that may have been used for tophatting. src/*.yaml From 0a26b707acc4b72c906c1ddf90cc2112530abd5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Thu, 18 Jun 2026 14:13:06 -0400 Subject: [PATCH 07/13] fix: hot-reload opslevel-runner with watchexec initial approach didn't work --- Taskfile.yml | 57 ++++++++------------------------ bin/build-helper-image.sh | 2 +- bin/opslevel-runner-coding-agent | 18 +++++----- bin/opslevel-runner-runner | 14 ++++---- src/Procfile | 3 +- 5 files changed, 34 insertions(+), 60 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 6e9386e..42e1b29 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -11,6 +11,8 @@ env: vars: FAKTORY_VERSION: "1.9.3" + HELPER_IMAGE: "localhost/opslevel-runner:local" + KIND_CLUSTER: "opslevel-runner" tasks: ci: @@ -70,53 +72,23 @@ tasks: silent: true run: - desc: Start Faktory + runner + coding-agent workers via goreman; (re)build/load helper image on change. + desc: Start dev environment (Faktory + workers with hot-reload via watchexec). deps: - - build-helper-image - - run-watcher + - task: setup-kind dir: "{{.SRC_DIR}}" cmds: - - | - go tool goreman start - - run-watcher: - desc: "Restart both host workers when any .go file changes. Run under goreman (Procfile watcher)." - dir: "{{.SRC_DIR}}" - watch: true - sources: - - "**/*.go" - cmds: - - | - elapsed=0 - until go tool goreman run list >/dev/null 2>&1; do - sleep 1 - elapsed=$((elapsed + 1)) - [ "$elapsed" -ge 10 ] && exit 0 - done - go tool goreman run restart runner coding-agent + - go tool goreman start build-helper-image: desc: Build the runner helper image and load it into kind (loads on rebuild or when absent in kind). dir: "{{.SRC_DIR}}" - watch: true - sources: - - "**/*.go" - - "go.mod" - - "go.sum" - - "../Dockerfile" deps: - task: setup-kind - vars: { KIND_CLUSTER: "{{.KIND_CLUSTER}}" } - vars: - KIND_CLUSTER: '{{default "opslevel-runner" .KIND_CLUSTER}}' - HELPER_IMAGE: '{{default "opslevel-runner:local" .HELPER_IMAGE}}' cmds: - HELPER_IMAGE={{.HELPER_IMAGE}} {{.TASKFILE_DIR}}/bin/build-helper-image.sh {{.KIND_CLUSTER}} stop-kind: desc: Clean orphaned job pods and stop kind cluster - vars: - KIND_CLUSTER: '{{default "opslevel-runner" .KIND_CLUSTER}}' cmds: - "{{.TASKFILE_DIR}}/bin/stop-kind.sh {{.KIND_CLUSTER}}" @@ -124,26 +96,23 @@ tasks: setup-kind: internal: true - vars: - KIND_CLUSTER: '{{default "opslevel-runner" .KIND_CLUSTER}}' cmds: - # --wait: block until the cluster exists for tasks requiring - # a cluster to target; e.g. build-helper-image's `kind load` - "{{.TASKFILE_DIR}}/bin/setup-kind.sh --wait {{.KIND_CLUSTER}}" - install-redis: - desc: Install redis-server + install-deps: + desc: Install development dependencies (redis, watchexec) status: - command -v redis-server + - command -v watchexec cmds: - - task: install-redis-{{OS}} + - task: install-deps-{{OS}} - install-redis-darwin: + install-deps-darwin: internal: true cmds: - - brew install redis + - brew install redis watchexec - install-redis-linux: + install-deps-linux: internal: true cmds: - sudo apt-get install -y redis-server @@ -151,7 +120,7 @@ tasks: install-faktory: desc: Install Faktory from GitHub releases deps: - - install-redis + - install-deps cmds: - task: install-faktory-{{OS}} diff --git a/bin/build-helper-image.sh b/bin/build-helper-image.sh index 0193cc2..8ac60bb 100755 --- a/bin/build-helper-image.sh +++ b/bin/build-helper-image.sh @@ -6,7 +6,7 @@ set -eu CLUSTER_NAME="${1:-opslevel-runner}" -HELPER_IMAGE="${HELPER_IMAGE:-opslevel-runner:local}" +HELPER_IMAGE="${HELPER_IMAGE:-localhost/opslevel-runner:local}" SCRIPT_DIR="${BASH_SOURCE[0]%/*}" source "$SCRIPT_DIR/kind-env.sh" diff --git a/bin/opslevel-runner-coding-agent b/bin/opslevel-runner-coding-agent index 28239ff..e54238b 100755 --- a/bin/opslevel-runner-coding-agent +++ b/bin/opslevel-runner-coding-agent @@ -5,11 +5,13 @@ SCRIPT_DIR="${BASH_SOURCE[0]%/*}" # source for KUBECONFIG and k8s context to be set source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner -exec go run -C "$SCRIPT_DIR/../src" . --log-level TRACE run \ - --mode=faktory \ - --queues=coding-agent \ - --queue=coding-agent \ - --job-pod-max-wait=300 \ - --runner-pod-namespace=default \ - --job-agent-mode=true --metrics-port=10355 \ - --job-pod-helper-image=localhost/opslevel-runner:local + +exec watchexec --watch "$SCRIPT_DIR/../src" --exts go,mod,sum --restart \ + -- go run -C "$SCRIPT_DIR/../src" . --log-level TRACE run \ + --mode=faktory \ + --queues=coding-agent \ + --queue=coding-agent \ + --job-pod-max-wait=900 \ + --runner-pod-namespace=default \ + --job-agent-mode=true --metrics-port=10355 \ + --job-pod-helper-image=localhost/opslevel-runner:local diff --git a/bin/opslevel-runner-runner b/bin/opslevel-runner-runner index 068226f..4b8469b 100755 --- a/bin/opslevel-runner-runner +++ b/bin/opslevel-runner-runner @@ -5,9 +5,11 @@ SCRIPT_DIR="${BASH_SOURCE[0]%/*}" # source for KUBECONFIG and k8s context to be set source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner -exec go run -C "$SCRIPT_DIR/../src" . run \ - --mode=faktory \ - --queues=runner \ - --job-pod-max-wait=300 \ - --runner-pod-namespace=default \ - --job-pod-helper-image=localhost/opslevel-runner:local + +exec watchexec --watch "$SCRIPT_DIR/../src" --exts go,mod,sum --restart \ + -- go run -C "$SCRIPT_DIR/../src" . run \ + --mode=faktory \ + --queues=runner \ + --job-pod-max-wait=900 \ + --runner-pod-namespace=default \ + --job-pod-helper-image=localhost/opslevel-runner:local diff --git a/src/Procfile b/src/Procfile index 98d7b8d..98580db 100644 --- a/src/Procfile +++ b/src/Procfile @@ -1,3 +1,4 @@ faktory: faktory runner: ../bin/opslevel-runner-runner -coding-agent: ../bin/opslevel-runner-coding-agent \ No newline at end of file +coding-agent: ../bin/opslevel-runner-coding-agent +image-builder: watchexec --watch . --exts go,mod,sum --watch ../Dockerfile -- HELPER_IMAGE=localhost/opslevel-runner:local ../bin/build-helper-image.sh opslevel-runner From a9138b75d3fed0a54024a8a7467925befa70fa41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Thu, 18 Jun 2026 15:03:04 -0400 Subject: [PATCH 08/13] docs: Update README for local development --- README.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++-- Taskfile.yml | 2 +- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 96ac50f..a579463 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,6 @@ OpsLevel Runner is the Kubernetes based job processor for [OpsLevel](https://www | opslevel_runner_jobs_processing | `gauge` | The current number of active jobs being processed. | | opslevel_runner_jobs_started | `counter` | The count of jobs that started processing. | - ### Commands Testing a job @@ -69,9 +68,9 @@ Running ```sh # Production -OPSLEVEL_API_TOKEN=XXXXX go run main.go run +OPSLEVEL_API_TOKEN=XXXXX go run main.go run # Staging -OPSLEVEL_API_TOKEN=XXXXX go run main.go run --api-url=https://api.opslevel-staging.com/graphql --app-url=https://app.opslevel-staging.com +OPSLEVEL_API_TOKEN=XXXXX go run main.go run --api-url=https://api.opslevel-staging.com/graphql --app-url=https://app.opslevel-staging.com ``` ## Running @@ -113,3 +112,75 @@ Then run `go build` in `src` to build in the local directory, you can also use ` cd src go build ``` + +## Local Development + +The dev environment uses [kind](https://kind.sigs.k8s.io/) (Kubernetes in Docker/Podman), [Faktory](https://github.com/contribsys/faktory) as job queue, and [Task](https://taskfile.dev/) as task runner. + +### Prerequisites + +- Go (`brew install go`) +- [Task](https://taskfile.dev/) (`brew install go-task`) +- Docker or Podman + +### Quick Start + +```sh +task setup # install Faktory + workspace deps +task run # start Faktory + workers (creates kind cluster automatically) +``` + +### What `task run` Does + +The `run` task instantiates the kind cluster if it doesn't exist then starts +[goreman](https://github.com/mattn/goreman) which supervises 4 concurrent +processes defined in `src/Procfile`: + +| Process | Description | +|---------|-------------| +| `faktory` | Starts the Faktory work server (job queue) | +| `runner` | hot-reloads `opslevel-runner run --mode=faktory --queues=runner` through `watchexec` | +| `coding-agent` | hot-reloads `opslevel-runner run --mode=faktory --queues=coding-agent --job-agent-mode=true` through `watchexec` | +| `image-builder` | Watches Go sources and `Dockerfile` with `watchexec`; rebuilds the helper container image and reloads it into kind on change | + +> Note: `--mode faktory` does have `opslevel-runner` poll Faktory for runner +> jobs and launches them as pods in the kind cluster + +### Kubernetes Configuration + +Scripts source `.env.local` (gitignored) to set local environment overrides +before creating or connecting to the kind cluster. e.g.: to reuse a k8s cluster +from a specific KUBECONFIG file + +```sh +# .env.local +# Point at a dedicated kubeconfig to keep localdev contexts isolated. +export KUBECONFIG=${HOME}/.kube/opslevel.localdev.yaml +``` + +- `bin/kind-env.sh` loads this file, falling back to `~/.kube/config` when `KUBECONFIG` is unset. +- The kind cluster name defaults to `opslevel-runner`. + +### Container Runtime + +Podman is preferred; Docker is used as fallback. Handled in `bin/kind-env.sh`. + +### Other Noteworthy Tasks + +#### Helper Image + +Build and load the runner helper image into kind: + +```sh +task build-helper-image +``` + +This cross-compiles the Go binary for linux, builds the container image from +`Dockerfile`, and loads it into the kind cluster. The image is only rebuilt +when source checksums change. + +#### Stopping Kind Cluster + +```sh +task stop-kind # clean orphaned job pods and stop the cluster +``` diff --git a/Taskfile.yml b/Taskfile.yml index 42e1b29..a7ab410 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -110,7 +110,7 @@ tasks: install-deps-darwin: internal: true cmds: - - brew install redis watchexec + - brew install redis watchexec kind install-deps-linux: internal: true From b4d6805a9c02e6d4f4545fce90fd5ecce590f9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Thu, 18 Jun 2026 15:37:42 -0400 Subject: [PATCH 09/13] fix: tune down local dev runner default resources - lower the default enqueue count to make local test runs faster and less resource-intensive - pass configurable job pod CPU and memory requests so local runner scripts can match constrained environments --- bin/opslevel-runner-coding-agent | 4 +++- bin/opslevel-runner-runner | 4 +++- tests/enqueue-runner-jobs.sh | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/opslevel-runner-coding-agent b/bin/opslevel-runner-coding-agent index e54238b..80f51fc 100755 --- a/bin/opslevel-runner-coding-agent +++ b/bin/opslevel-runner-coding-agent @@ -14,4 +14,6 @@ exec watchexec --watch "$SCRIPT_DIR/../src" --exts go,mod,sum --restart \ --job-pod-max-wait=900 \ --runner-pod-namespace=default \ --job-agent-mode=true --metrics-port=10355 \ - --job-pod-helper-image=localhost/opslevel-runner:local + --job-pod-helper-image=localhost/opslevel-runner:local \ + --job-pod-requests-cpu="${OPSLEVEL_JOB_POD_REQUESTS_CPU:-50}" \ + --job-pod-requests-memory="${OPSLEVEL_JOB_POD_REQUESTS_MEMORY:-32}" diff --git a/bin/opslevel-runner-runner b/bin/opslevel-runner-runner index 4b8469b..1fc75b2 100755 --- a/bin/opslevel-runner-runner +++ b/bin/opslevel-runner-runner @@ -12,4 +12,6 @@ exec watchexec --watch "$SCRIPT_DIR/../src" --exts go,mod,sum --restart \ --queues=runner \ --job-pod-max-wait=900 \ --runner-pod-namespace=default \ - --job-pod-helper-image=localhost/opslevel-runner:local + --job-pod-helper-image=localhost/opslevel-runner:local \ + --job-pod-requests-cpu="${OPSLEVEL_JOB_POD_REQUESTS_CPU:-50}" \ + --job-pod-requests-memory="${OPSLEVEL_JOB_POD_REQUESTS_MEMORY:-32}" diff --git a/tests/enqueue-runner-jobs.sh b/tests/enqueue-runner-jobs.sh index bf641ae..ce7cc69 100755 --- a/tests/enqueue-runner-jobs.sh +++ b/tests/enqueue-runner-jobs.sh @@ -12,7 +12,7 @@ set -e SCRIPT_DIR="${BASH_SOURCE[0]%/*}/../bin" source "$SCRIPT_DIR/kind-env.sh" -NUM_JOBS=${1:-10} +NUM_JOBS=${1:-3} src="${BASH_SOURCE[0]%/*}/../src" echo "Enqueuing $NUM_JOBS test jobs to Faktory..." From 8801db8b94886d21ed135fb393e50d24e7e62b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Mon, 22 Jun 2026 15:40:50 -0400 Subject: [PATCH 10/13] feat: set lower resources request in tests/run-runner.sh --- tests/run-runner.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/run-runner.sh b/tests/run-runner.sh index 18b02fd..1a6682a 100755 --- a/tests/run-runner.sh +++ b/tests/run-runner.sh @@ -58,6 +58,8 @@ exec "$BINARY" \ --log-level "${OPSLEVEL_LOG_LEVEL:-TRACE}" \ --log-format "${OPSLEVEL_LOG_FORMAT:-TEXT}" \ --job-pod-helper-image "$HELPER_IMAGE" \ + --job-pod-requests-cpu "${OPSLEVEL_JOB_POD_REQUESTS_CPU:-50}" \ + --job-pod-requests-memory "${OPSLEVEL_JOB_POD_REQUESTS_MEMORY:-32}" \ "${EXTRA_FLAGS[@]}" \ run \ --mode faktory \ From b99b828d05b679ddb85a287dc516c3ec0d4416e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Tue, 23 Jun 2026 10:03:48 -0400 Subject: [PATCH 11/13] remove coding-agent local runner specifics --- README.md | 1 - bin/opslevel-runner-coding-agent | 19 ------ src/Procfile | 1 - tests/enqueue-coding-agent-job.sh | 102 ------------------------------ 4 files changed, 123 deletions(-) delete mode 100755 bin/opslevel-runner-coding-agent delete mode 100755 tests/enqueue-coding-agent-job.sh diff --git a/README.md b/README.md index a579463..d8b9599 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,6 @@ processes defined in `src/Procfile`: |---------|-------------| | `faktory` | Starts the Faktory work server (job queue) | | `runner` | hot-reloads `opslevel-runner run --mode=faktory --queues=runner` through `watchexec` | -| `coding-agent` | hot-reloads `opslevel-runner run --mode=faktory --queues=coding-agent --job-agent-mode=true` through `watchexec` | | `image-builder` | Watches Go sources and `Dockerfile` with `watchexec`; rebuilds the helper container image and reloads it into kind on change | > Note: `--mode faktory` does have `opslevel-runner` poll Faktory for runner diff --git a/bin/opslevel-runner-coding-agent b/bin/opslevel-runner-coding-agent deleted file mode 100755 index 80f51fc..0000000 --- a/bin/opslevel-runner-coding-agent +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -set -eu -SCRIPT_DIR="${BASH_SOURCE[0]%/*}" - -# source for KUBECONFIG and k8s context to be set -source "$SCRIPT_DIR/setup-kind.sh" opslevel-runner - -exec watchexec --watch "$SCRIPT_DIR/../src" --exts go,mod,sum --restart \ - -- go run -C "$SCRIPT_DIR/../src" . --log-level TRACE run \ - --mode=faktory \ - --queues=coding-agent \ - --queue=coding-agent \ - --job-pod-max-wait=900 \ - --runner-pod-namespace=default \ - --job-agent-mode=true --metrics-port=10355 \ - --job-pod-helper-image=localhost/opslevel-runner:local \ - --job-pod-requests-cpu="${OPSLEVEL_JOB_POD_REQUESTS_CPU:-50}" \ - --job-pod-requests-memory="${OPSLEVEL_JOB_POD_REQUESTS_MEMORY:-32}" diff --git a/src/Procfile b/src/Procfile index 98580db..edffda5 100644 --- a/src/Procfile +++ b/src/Procfile @@ -1,4 +1,3 @@ faktory: faktory runner: ../bin/opslevel-runner-runner -coding-agent: ../bin/opslevel-runner-coding-agent image-builder: watchexec --watch . --exts go,mod,sum --watch ../Dockerfile -- HELPER_IMAGE=localhost/opslevel-runner:local ../bin/build-helper-image.sh opslevel-runner diff --git a/tests/enqueue-coding-agent-job.sh b/tests/enqueue-coding-agent-job.sh deleted file mode 100755 index c293cf5..0000000 --- a/tests/enqueue-coding-agent-job.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -# -# Enqueue a coding-agent job to test the squid egress proxy sidecar. -# -# The job is placed on the 'coding-agent' Faktory queue, which is consumed -# exclusively by the coding-agent worker (src/Procfile). That worker passes -# --queue=coding-agent which triggers squid sidecar injection (k8s.go:262). -# The normal 'runner' worker ignores this queue, demonstrating production-like routing. -# -# Usage: ./tests/enqueue-coding-agent-job.sh -# -# Prerequisites: -# 1. kind cluster up with helper image loaded: -# task build-helper-image -# 2. squid-config ConfigMap applied to the default namespace: -# kubectl apply -f - <` (default 3600s) independently -# of the job commands, so the pod stays alive for exec after the job completes. -# -# Manual proxy probe: -# POD=$(kubectl get pods -n default -l app.kubernetes.io/managed-by=runner-faktory \ -# --sort-by=.metadata.creationTimestamp -o name | tail -1) -# -# # Confirm squid got the PROXY_ALLOWED_DOMAINS append: -# kubectl exec -n default $POD -c squid-proxy -- cat /etc/squid/conf.d/allowed-domains.txt -# -# # Exec into the job container: -# kubectl exec -it -n default $POD -c job -- sh -# Inside: -# export http_proxy=http://localhost:3128 https_proxy=http://localhost:3128 -# # Allowed via PROXY_ALLOWED_DOMAINS runtime append: -# wget -qO- http://example.com >/dev/null && echo "ALLOWED: example.com (PROXY_ALLOWED_DOMAINS)" -# # Allowed via base allowlist: -# wget -qO- https://github.com >/dev/null && echo "ALLOWED: github.com (base list)" -# # Denied (not in allowlist): -# wget -qO- https://wikipedia.org >/dev/null && echo "OPEN" || echo "DENIED: wikipedia.org" -# # For richer output: apk add --no-cache curl -# # curl -x http://localhost:3128 -v https://github.com -# -# # Check squid access log (TCP_DENIED vs allowed): -# kubectl logs -n default $POD -c squid-proxy -# -# Cleanup stale job pods after testing: -# kubectl delete pods -n default -l app.kubernetes.io/managed-by=runner-faktory -# - -set -e - -# load KUBECONFIG (.env.local) + set $cmd / KIND_EXPERIMENTAL_PROVIDER for k8s context -SCRIPT_DIR="${BASH_SOURCE[0]%/*}/../bin" -source "$SCRIPT_DIR/kind-env.sh" - -src="${BASH_SOURCE[0]%/*}/../src" -JOB_ID="coding-agent-proxy-test-$(date +%s)" - -echo "Enqueuing coding-agent proxy test job (ID: ${JOB_ID}) ..." - -JOB_FILE=$(mktemp) -cat > "$JOB_FILE" < Date: Fri, 26 Jun 2026 10:21:29 -0400 Subject: [PATCH 12/13] build: limit helper image rebuild triggers --- bin/build-helper-image.sh | 8 +------- src/Procfile | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/bin/build-helper-image.sh b/bin/build-helper-image.sh index 8ac60bb..a329850 100755 --- a/bin/build-helper-image.sh +++ b/bin/build-helper-image.sh @@ -23,13 +23,7 @@ image_in_kind() { checksum_sources() { { cd "$SCRIPT_DIR/../src" && \ - find . \ - \( -name '*.go' -o -name 'go.mod' -o -name 'go.sum' \) \ - -type f \ - -print0 | - LC_ALL=C sort -z | - xargs -0 shasum -a 256 - shasum -a 256 "$SCRIPT_DIR/../Dockerfile" + shasum -a 256 cmd/enqueue.go cmd/root.go main.go go.mod go.sum ../Dockerfile } | shasum -a 256 | cut -d' ' -f1 } diff --git a/src/Procfile b/src/Procfile index edffda5..8a619c9 100644 --- a/src/Procfile +++ b/src/Procfile @@ -1,3 +1,3 @@ faktory: faktory runner: ../bin/opslevel-runner-runner -image-builder: watchexec --watch . --exts go,mod,sum --watch ../Dockerfile -- HELPER_IMAGE=localhost/opslevel-runner:local ../bin/build-helper-image.sh opslevel-runner +image-builder: watchexec --watch cmd/enqueue.go --watch cmd/root.go --watch main.go --watch go.mod --watch go.sum --watch ../Dockerfile -- HELPER_IMAGE=localhost/opslevel-runner:local ../bin/build-helper-image.sh opslevel-runner From 183223311ac63d17003f8f954555ebda40b6edc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Archambault?= Date: Fri, 26 Jun 2026 10:26:19 -0400 Subject: [PATCH 13/13] rename helper image tag to 'dev' --- Taskfile.yml | 2 +- bin/build-helper-image.sh | 2 +- bin/opslevel-runner-runner | 2 +- src/Procfile | 2 +- tests/run-runner.sh | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index a7ab410..5f4761e 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -11,7 +11,7 @@ env: vars: FAKTORY_VERSION: "1.9.3" - HELPER_IMAGE: "localhost/opslevel-runner:local" + HELPER_IMAGE: "localhost/opslevel-runner:dev" KIND_CLUSTER: "opslevel-runner" tasks: diff --git a/bin/build-helper-image.sh b/bin/build-helper-image.sh index a329850..b17ca8a 100755 --- a/bin/build-helper-image.sh +++ b/bin/build-helper-image.sh @@ -6,7 +6,7 @@ set -eu CLUSTER_NAME="${1:-opslevel-runner}" -HELPER_IMAGE="${HELPER_IMAGE:-localhost/opslevel-runner:local}" +HELPER_IMAGE="${HELPER_IMAGE:-localhost/opslevel-runner:dev}" SCRIPT_DIR="${BASH_SOURCE[0]%/*}" source "$SCRIPT_DIR/kind-env.sh" diff --git a/bin/opslevel-runner-runner b/bin/opslevel-runner-runner index 1fc75b2..61a9676 100755 --- a/bin/opslevel-runner-runner +++ b/bin/opslevel-runner-runner @@ -12,6 +12,6 @@ exec watchexec --watch "$SCRIPT_DIR/../src" --exts go,mod,sum --restart \ --queues=runner \ --job-pod-max-wait=900 \ --runner-pod-namespace=default \ - --job-pod-helper-image=localhost/opslevel-runner:local \ + --job-pod-helper-image=localhost/opslevel-runner:dev \ --job-pod-requests-cpu="${OPSLEVEL_JOB_POD_REQUESTS_CPU:-50}" \ --job-pod-requests-memory="${OPSLEVEL_JOB_POD_REQUESTS_MEMORY:-32}" diff --git a/src/Procfile b/src/Procfile index 8a619c9..81387ce 100644 --- a/src/Procfile +++ b/src/Procfile @@ -1,3 +1,3 @@ faktory: faktory runner: ../bin/opslevel-runner-runner -image-builder: watchexec --watch cmd/enqueue.go --watch cmd/root.go --watch main.go --watch go.mod --watch go.sum --watch ../Dockerfile -- HELPER_IMAGE=localhost/opslevel-runner:local ../bin/build-helper-image.sh opslevel-runner +image-builder: watchexec --watch cmd/enqueue.go --watch cmd/root.go --watch main.go --watch go.mod --watch go.sum --watch ../Dockerfile -- HELPER_IMAGE=localhost/opslevel-runner:dev ../bin/build-helper-image.sh opslevel-runner diff --git a/tests/run-runner.sh b/tests/run-runner.sh index 1a6682a..8fc6843 100755 --- a/tests/run-runner.sh +++ b/tests/run-runner.sh @@ -14,7 +14,7 @@ # When set to "coding-agent", also enables # --job-agent-mode=true and the helper image override. # Default: empty (no sidecar). -# OPSLEVEL_JOB_POD_HELPER_IMAGE - default localhost/opslevel-runner:local +# OPSLEVEL_JOB_POD_HELPER_IMAGE - default localhost/opslevel-runner:dev # FAKTORY_URL - default tcp://localhost:7419 # # Examples: @@ -51,7 +51,7 @@ if [ -n "${OPSLEVEL_QUEUE:-}" ]; then fi fi -HELPER_IMAGE="${OPSLEVEL_JOB_POD_HELPER_IMAGE:-localhost/opslevel-runner:local}" +HELPER_IMAGE="${OPSLEVEL_JOB_POD_HELPER_IMAGE:-localhost/opslevel-runner:dev}" echo "Starting runner (mode=faktory queues=$FAKTORY_QUEUES queue=${OPSLEVEL_QUEUE:-}) ..." exec "$BINARY" \