From db98dfc4b804050bf759b0efd844c2b94a687395 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 1 Jul 2026 10:15:33 +1000 Subject: [PATCH 1/4] test(cli): drain PTY stdout before resolving E2E `exit` The E2E helper's `exit` promise resolved directly on `pty.onExit`, which is not ordered after the final `pty.onData`. On a loaded runner the OS pipe can deliver the exit event while trailing stdout is still in flight, so tests that do `await r.exit` then read `r.output` intermittently observe a truncated buffer (the flaky runner-aware-help failure, which hopped across Node 22/24 matrix nodes on identical code). Resolve `exit` only after the stdout stream has drained: capture the exit event, then wait until no further data has arrived for a short settle window (50ms, reset by each new chunk) bounded by a 2s hard cap so a chatty process can never hang the suite. A bare microtask is insufficient because pipe data may still be in flight after the exit event. Fixes the latent race for all E2E suites (smoke, runner-aware-help, doctor, database-url, auth-login-cancel) without per-test changes. Fixes #536 --- packages/cli/tests/helpers/pty.ts | 32 ++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/packages/cli/tests/helpers/pty.ts b/packages/cli/tests/helpers/pty.ts index 3d7b787e..1ccdb572 100644 --- a/packages/cli/tests/helpers/pty.ts +++ b/packages/cli/tests/helpers/pty.ts @@ -115,12 +115,42 @@ export function render(args: string[], opts: RenderOptions = {}): Rendered { }) let raw = '' + // Timestamp of the most recent stdout chunk. The `exit` resolver below + // waits for this to go quiet so trailing/in-flight output lands in `raw` + // before any test reads `r.output`. + let lastDataAt = Date.now() pty.onData((d) => { raw += d + lastDataAt = Date.now() }) + // Resolve `exit` only AFTER the stdout stream has drained. node-pty's + // `onExit` is NOT ordered after the final `onData`: on a loaded runner the + // OS pipe can deliver the exit event while trailing stdout is still in + // flight, so a test that does `await r.exit` then reads `r.output` can + // observe a truncated buffer (the flaky-help race). We capture the exit + // event, then wait until no further data has arrived for a short settle + // window (SETTLE_MS) — each new chunk resets the window — bounded by a hard + // cap (DRAIN_CAP_MS) so a chatty process can never hang the suite. A bare + // microtask/`setImmediate` is insufficient because pipe data may still be + // in flight after the exit event. + const SETTLE_MS = 50 + const DRAIN_CAP_MS = 2_000 const exit = new Promise<{ exitCode: number; signal?: number }>((res) => { - pty.onExit((e) => res(e)) + pty.onExit((e) => { + const deadline = Date.now() + DRAIN_CAP_MS + const check = () => { + const quietFor = Date.now() - lastDataAt + if (quietFor >= SETTLE_MS || Date.now() >= deadline) { + res(e) + return + } + // Re-check once the settle window would elapse, but never past the + // hard cap. + setTimeout(check, Math.min(SETTLE_MS - quietFor, deadline - Date.now())) + } + check() + }) }) const waitFor = async (match: string | RegExp, timeoutMs = 5_000) => { From c88d9d26b77b416a64134f2ffb0a3915d06593d4 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 1 Jul 2026 10:39:25 +1000 Subject: [PATCH 2/4] fix(cli): capture non-interactive E2E output over pipes, not a PTY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runner-aware `--help` E2E test flaked in CI, always dropping the tail of the help (the Examples section, where `bunx stash init` / `bunx stash db install` live) while the earlier Usage line survived. Root cause is a Linux-only node-pty behaviour, not a race: `--help` emits ~5KB in a single `console.log` and the process exits immediately. When the pty slave closes with data still unread, Linux discards the pty buffer, so under CI load node-pty never delivers the tail — `onData` fires for the head only. macOS ptys retain that buffer, which is why it never reproduced locally. A post-exit "drain" wait (the previous approach on this branch) cannot help: the bytes are gone before `onData` is ever called. Fix: add a pipe-based `run()` helper (`child_process.spawn`, piped stdio) for non-interactive command assertions. Pipes buffer in-process and deliver every byte before `close`, so large bursts are never truncated on any platform. Migrate `runner-aware-help.e2e.test.ts` to it. The PTY `render()` helper stays for interactive tests, and its speculative drain change is reverted. Also pass `quiet: true` to dotenv's `config()` calls: v17 prints an `injected env (N) from …` banner to stdout on every call, so the CLI was emitting four noisy, non-deterministic banner lines ahead of its own output on every invocation. A smoke-test guard keeps the banner from returning. --- packages/cli/src/bin/main.ts | 14 +++- .../tests/e2e/runner-aware-help.e2e.test.ts | 12 ++- packages/cli/tests/e2e/smoke.e2e.test.ts | 5 ++ packages/cli/tests/helpers/pty.ts | 32 +------- packages/cli/tests/helpers/run.ts | 80 +++++++++++++++++++ 5 files changed, 101 insertions(+), 42 deletions(-) create mode 100644 packages/cli/tests/helpers/run.ts diff --git a/packages/cli/src/bin/main.ts b/packages/cli/src/bin/main.ts index a880950d..c9cbbc27 100644 --- a/packages/cli/src/bin/main.ts +++ b/packages/cli/src/bin/main.ts @@ -4,10 +4,16 @@ import { config } from 'dotenv' // not overwrite vars that are already set, so loading .env.local first means // its values win over .env for the same keys. Users can still set anything in // the real environment to override both. -config({ path: '.env.local' }) -config({ path: '.env.development.local' }) -config({ path: '.env.development' }) -config({ path: '.env' }) +// +// `quiet: true` suppresses dotenv v17's `injected env (N) from …` banner, +// which it now prints to stdout on every `config()` call. Without it the CLI +// emits four noisy, non-deterministic banner lines (with rotating tips) ahead +// of its own output on every invocation — restoring the silent behaviour of +// dotenv v16. +config({ path: '.env.local', quiet: true }) +config({ path: '.env.development.local', quiet: true }) +config({ path: '.env.development', quiet: true }) +config({ path: '.env', quiet: true }) import { readFileSync } from 'node:fs' import { dirname, join } from 'node:path' diff --git a/packages/cli/tests/e2e/runner-aware-help.e2e.test.ts b/packages/cli/tests/e2e/runner-aware-help.e2e.test.ts index a2a636d8..786f9cae 100644 --- a/packages/cli/tests/e2e/runner-aware-help.e2e.test.ts +++ b/packages/cli/tests/e2e/runner-aware-help.e2e.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest' -import { render } from '../helpers/pty.js' +import { run } from '../helpers/run.js' /** * E2E coverage for the runner-aware help rendering. The smoke suite @@ -29,9 +29,8 @@ describe('--help — runner-aware Usage + Examples', () => { ua, label, }) => { - const r = render(['--help'], { env: { npm_config_user_agent: ua } }) - const { exitCode } = await r.exit - expect(exitCode).toBe(0) + const r = await run(['--help'], { env: { npm_config_user_agent: ua } }) + expect(r.exitCode).toBe(0) // Usage line must use the right runner. The leader is stable // (`messages.cli.usagePrefix === 'Usage: '`) so we assert on the // suffix the renderer composes at runtime. @@ -50,9 +49,8 @@ describe('auth — runner-aware Usage + Examples', () => { label, }) => { // `auth` with no subcommand prints the auth HELP and exits 0. - const r = render(['auth'], { env: { npm_config_user_agent: ua } }) - const { exitCode } = await r.exit - expect(exitCode).toBe(0) + const r = await run(['auth'], { env: { npm_config_user_agent: ua } }) + expect(r.exitCode).toBe(0) expect(r.output).toContain(`Usage: ${label} stash auth`) expect(r.output).toContain(`${label} stash auth login`) }) diff --git a/packages/cli/tests/e2e/smoke.e2e.test.ts b/packages/cli/tests/e2e/smoke.e2e.test.ts index 55f6373e..d64274b8 100644 --- a/packages/cli/tests/e2e/smoke.e2e.test.ts +++ b/packages/cli/tests/e2e/smoke.e2e.test.ts @@ -21,6 +21,11 @@ describe('stash CLI — non-interactive smoke', () => { // copy strings, so they stay inline. expect(r.output).toContain('init') expect(r.output).toContain('db install') + // dotenv v17 prints an `injected env (N) from …` banner on every + // `config()` call unless `quiet: true` is passed. Guard against that + // noise regressing back into the CLI's stdout (it also destabilises the + // PTY output capture these e2e tests rely on). + expect(r.output).not.toContain('injected env') }) it('--version prints the package version', async () => { diff --git a/packages/cli/tests/helpers/pty.ts b/packages/cli/tests/helpers/pty.ts index 1ccdb572..3d7b787e 100644 --- a/packages/cli/tests/helpers/pty.ts +++ b/packages/cli/tests/helpers/pty.ts @@ -115,42 +115,12 @@ export function render(args: string[], opts: RenderOptions = {}): Rendered { }) let raw = '' - // Timestamp of the most recent stdout chunk. The `exit` resolver below - // waits for this to go quiet so trailing/in-flight output lands in `raw` - // before any test reads `r.output`. - let lastDataAt = Date.now() pty.onData((d) => { raw += d - lastDataAt = Date.now() }) - // Resolve `exit` only AFTER the stdout stream has drained. node-pty's - // `onExit` is NOT ordered after the final `onData`: on a loaded runner the - // OS pipe can deliver the exit event while trailing stdout is still in - // flight, so a test that does `await r.exit` then reads `r.output` can - // observe a truncated buffer (the flaky-help race). We capture the exit - // event, then wait until no further data has arrived for a short settle - // window (SETTLE_MS) — each new chunk resets the window — bounded by a hard - // cap (DRAIN_CAP_MS) so a chatty process can never hang the suite. A bare - // microtask/`setImmediate` is insufficient because pipe data may still be - // in flight after the exit event. - const SETTLE_MS = 50 - const DRAIN_CAP_MS = 2_000 const exit = new Promise<{ exitCode: number; signal?: number }>((res) => { - pty.onExit((e) => { - const deadline = Date.now() + DRAIN_CAP_MS - const check = () => { - const quietFor = Date.now() - lastDataAt - if (quietFor >= SETTLE_MS || Date.now() >= deadline) { - res(e) - return - } - // Re-check once the settle window would elapse, but never past the - // hard cap. - setTimeout(check, Math.min(SETTLE_MS - quietFor, deadline - Date.now())) - } - check() - }) + pty.onExit((e) => res(e)) }) const waitFor = async (match: string | RegExp, timeoutMs = 5_000) => { diff --git a/packages/cli/tests/helpers/run.ts b/packages/cli/tests/helpers/run.ts new file mode 100644 index 00000000..b15855d8 --- /dev/null +++ b/packages/cli/tests/helpers/run.ts @@ -0,0 +1,80 @@ +import { spawn } from 'node:child_process' +import stripAnsi from 'strip-ansi' +import { STASH_BIN } from './pty.js' + +export interface RunOptions { + cwd?: string + env?: Record +} + +export interface RunResult { + exitCode: number + signal: NodeJS.Signals | null + /** ANSI-stripped combined stdout + stderr. */ + output: string + /** Raw combined stdout + stderr including ANSI escapes. */ + raw: string + stdout: string + stderr: string +} + +/** + * Run the built CLI to completion over plain pipes and capture all output. + * + * Prefer this over {@link render} for NON-INTERACTIVE commands (`--help`, + * `--version`, usage/error paths). A pipe buffers the child's output in the + * Node process and delivers every byte before `close`, so large bursts are + * never truncated. + * + * A PTY, by contrast, loses data on Linux: when the child writes a big burst + * (the ~5KB `--help` text is a single `console.log`) and exits immediately, + * the kernel discards whatever is still unread in the pty buffer when the + * slave closes. On a loaded CI runner node-pty can't drain fast enough, so the + * tail of the help — the Examples section — goes missing and assertions on it + * flake. (macOS ptys keep buffered data on close, which is why it only bites + * in CI.) Post-exit "drain" waits can't help because the bytes are gone before + * `onData` ever fires. Only interactive tests that must send keystrokes or + * exercise TTY-specific rendering need {@link render}. + */ +export function run(args: string[], opts: RunOptions = {}): Promise { + const env: Record = { + ...(process.env as Record), + // Mirror render()'s env so both helpers hit the same CLI code paths. + NO_COLOR: '1', + FORCE_COLOR: '0', + CI: 'true', + ...(opts.env ?? {}), + } + + const child = spawn(process.execPath, [STASH_BIN, ...args], { + cwd: opts.cwd ?? process.cwd(), + env, + stdio: ['ignore', 'pipe', 'pipe'], + }) + + let stdout = '' + let stderr = '' + child.stdout.setEncoding('utf8') + child.stderr.setEncoding('utf8') + child.stdout.on('data', (d: string) => { + stdout += d + }) + child.stderr.on('data', (d: string) => { + stderr += d + }) + + return new Promise((res, rej) => { + child.on('error', rej) + child.on('close', (code, signal) => { + const raw = stdout + stderr + res({ + exitCode: code ?? 0, + signal, + output: stripAnsi(raw), + raw, + stdout: stripAnsi(stdout), + stderr: stripAnsi(stderr), + }) + }) + }) +} From aedfd9d4e09488577e975daa7bbcda6c64380e14 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 1 Jul 2026 11:17:53 +1000 Subject: [PATCH 3/4] fix(cli): don't mask signal-terminated child as exit 0 in run() helper `child.on('close')` gives `code === null` when the child was killed by a signal. Defaulting `exitCode` to `code ?? 0` reported a crash/kill as a clean exit, so `expect(r.exitCode).toBe(0)` could pass over a SIGSEGV/SIGKILL. Keep `exitCode` as the real numeric code (or null on signal) and widen the type to `number | null` so tests can tell success from a crash. --- packages/cli/tests/helpers/run.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/cli/tests/helpers/run.ts b/packages/cli/tests/helpers/run.ts index b15855d8..2d164b5b 100644 --- a/packages/cli/tests/helpers/run.ts +++ b/packages/cli/tests/helpers/run.ts @@ -8,7 +8,12 @@ export interface RunOptions { } export interface RunResult { - exitCode: number + /** + * The process's numeric exit code, or `null` when it was terminated by a + * signal (see {@link RunResult.signal}). Never coerced to 0, so a + * signal-killed child (crash, SIGKILL) is distinguishable from a clean exit. + */ + exitCode: number | null signal: NodeJS.Signals | null /** ANSI-stripped combined stdout + stderr. */ output: string @@ -68,7 +73,9 @@ export function run(args: string[], opts: RunOptions = {}): Promise { child.on('close', (code, signal) => { const raw = stdout + stderr res({ - exitCode: code ?? 0, + // `code` is null when the child was terminated by `signal`; keep it + // null rather than masking a kill/crash as a clean 0 exit. + exitCode: code, signal, output: stripAnsi(raw), raw, From df71ed1ea77427f653365165bd16695c50a0452d Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 2 Jul 2026 12:03:16 +1000 Subject: [PATCH 4/4] test(cli): close run() coverage gaps from auxesis review - exercise the dotenv "injected env" banner guard against a real .env in a temp cwd (previous assertion passed vacuously) - add a run() test for non-zero exit code + error output on an unknown command - assert stdout/stderr land in independent RunResult fields, not just combined output - extract buildRunResult() as a pure function and unit-test the signal-terminated / exitCode:null path directly --- packages/cli/tests/e2e/smoke.e2e.test.ts | 59 +++++++++++++++++++++--- packages/cli/tests/helpers/run.test.ts | 48 +++++++++++++++++++ packages/cli/tests/helpers/run.ts | 37 ++++++++++----- 3 files changed, 126 insertions(+), 18 deletions(-) create mode 100644 packages/cli/tests/helpers/run.test.ts diff --git a/packages/cli/tests/e2e/smoke.e2e.test.ts b/packages/cli/tests/e2e/smoke.e2e.test.ts index d64274b8..777f4e5a 100644 --- a/packages/cli/tests/e2e/smoke.e2e.test.ts +++ b/packages/cli/tests/e2e/smoke.e2e.test.ts @@ -1,9 +1,11 @@ -import { readFileSync } from 'node:fs' -import { dirname, resolve } from 'node:path' +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { dirname, join, resolve } from 'node:path' import { fileURLToPath } from 'node:url' import { describe, expect, it } from 'vitest' import { messages } from '../../src/messages.js' import { render } from '../helpers/pty.js' +import { run } from '../helpers/run.js' const __dirname = dirname(fileURLToPath(import.meta.url)) const pkg = JSON.parse( @@ -21,11 +23,28 @@ describe('stash CLI — non-interactive smoke', () => { // copy strings, so they stay inline. expect(r.output).toContain('init') expect(r.output).toContain('db install') - // dotenv v17 prints an `injected env (N) from …` banner on every - // `config()` call unless `quiet: true` is passed. Guard against that - // noise regressing back into the CLI's stdout (it also destabilises the - // PTY output capture these e2e tests rely on). - expect(r.output).not.toContain('injected env') + // The dotenv "injected env" banner regression guard lives in the + // dedicated test below — this cwd has no .env file, so a bare + // `not.toContain('injected env')` here would pass vacuously. + }) + + it('suppresses dotenv v17\'s "injected env" banner when a .env file exists in cwd', async () => { + // dotenv v17 prints an `injected env (N) from …` banner to stdout on + // every `config()` call that actually injects a variable, unless + // `quiet: true` is passed. The repo has no `.env` anywhere, so exercising + // this requires a real .env file in the spawned process's cwd — without + // it, `config()` never finds anything to inject and the banner can never + // appear regardless of whether `quiet: true` is present in the CLI. + const tmpDir = mkdtempSync(join(tmpdir(), 'stash-dotenv-quiet-')) + try { + writeFileSync(join(tmpDir, '.env'), 'STASH_DOTENV_QUIET_TEST=1\n') + const r = await run(['--version'], { cwd: tmpDir }) + expect(r.exitCode).toBe(0) + expect(r.output).toContain(pkg.version) + expect(r.output).not.toContain('injected env') + } finally { + rmSync(tmpDir, { recursive: true, force: true }) + } }) it('--version prints the package version', async () => { @@ -78,4 +97,30 @@ describe('stash CLI — non-interactive smoke', () => { // suffix is the stable assertion target. expect(r.output).toContain('stash db migrate" is not yet implemented.') }) + + // The two cases below exercise `run()` directly rather than `render()`. + // Every existing `run()` consumer only asserted `exitCode === 0` + // (runner-aware-help.e2e.test.ts) and every exit-1 case above still went + // through `render()`, so `run()`'s actual purpose — correctly propagating + // a non-zero/null exit code instead of masking it — was untested by + // anything that used `run()`. + it('run(): surfaces a non-zero exit code + error output for an unknown command', async () => { + const r = await run(['definitely-not-a-command']) + expect(r.exitCode).toBe(1) + expect(r.output).toContain(messages.cli.unknownCommand) + expect(r.output).toContain('definitely-not-a-command') + }) + + it('run(): splits stdout/stderr into independent channels on a failure path', async () => { + // The unknown-command error is written via `console.error` (stderr); the + // HELP banner that follows it is written via `console.log` (stdout). + // Assert on the dedicated `stdout`/`stderr` fields — not just the + // combined `output` — so a regression that wired both `data` handlers + // to the same buffer would be caught. + const r = await run(['definitely-not-a-command']) + expect(r.stderr).toContain(messages.cli.unknownCommand) + expect(r.stderr).not.toContain(messages.cli.usagePrefix) + expect(r.stdout).toContain(messages.cli.usagePrefix) + expect(r.stdout).not.toContain(messages.cli.unknownCommand) + }) }) diff --git a/packages/cli/tests/helpers/run.test.ts b/packages/cli/tests/helpers/run.test.ts new file mode 100644 index 00000000..a4a024f4 --- /dev/null +++ b/packages/cli/tests/helpers/run.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from 'vitest' +import { buildRunResult } from './run.js' + +// Unit coverage for the pure `close`-event → RunResult mapping extracted +// from `run()`. `run()` itself always spawns the built CLI binary and +// exposes no handle to signal the child, which makes the signal-terminated +// path (the headline fix in aedfd9d) impractical to exercise deterministically +// through an E2E test. Testing the extracted pure function directly avoids +// needing a timeout/SIGKILL escape hatch or other API surface change. +describe('buildRunResult', () => { + it('reports a clean exit', () => { + const r = buildRunResult(0, null, 'hello\n', '') + expect(r.exitCode).toBe(0) + expect(r.signal).toBeNull() + expect(r.stdout).toBe('hello\n') + expect(r.output).toBe('hello\n') + }) + + it('reports a non-zero exit code', () => { + const r = buildRunResult(1, null, '', 'boom\n') + expect(r.exitCode).toBe(1) + expect(r.signal).toBeNull() + expect(r.stderr).toBe('boom\n') + }) + + it('keeps exitCode null (never masked to 0) when the child is signal-terminated', () => { + // Node guarantees exactly one of code/signal is non-null on 'close'. A + // naive `code ?? 0` would collapse this to a "successful" exit 0, hiding + // a crash/OOM/SIGTERM kill from callers. + const r = buildRunResult(null, 'SIGTERM', '', '') + expect(r.exitCode).toBeNull() + expect(r.signal).toBe('SIGTERM') + }) + + it('strips ANSI codes independently from stdout/stderr while combining into output/raw', () => { + // Build the ESC control byte at runtime (rather than embedding it + // literally in the source) to keep this file free of non-printable + // characters. + const ESC = String.fromCharCode(27) + const green = `${ESC}[32mok${ESC}[0m` + const red = `${ESC}[31merr${ESC}[0m` + const r = buildRunResult(0, null, green, red) + expect(r.stdout).toBe('ok') + expect(r.stderr).toBe('err') + expect(r.output).toBe('okerr') + expect(r.raw).toBe(green + red) + }) +}) diff --git a/packages/cli/tests/helpers/run.ts b/packages/cli/tests/helpers/run.ts index 2d164b5b..91254296 100644 --- a/packages/cli/tests/helpers/run.ts +++ b/packages/cli/tests/helpers/run.ts @@ -71,17 +71,32 @@ export function run(args: string[], opts: RunOptions = {}): Promise { return new Promise((res, rej) => { child.on('error', rej) child.on('close', (code, signal) => { - const raw = stdout + stderr - res({ - // `code` is null when the child was terminated by `signal`; keep it - // null rather than masking a kill/crash as a clean 0 exit. - exitCode: code, - signal, - output: stripAnsi(raw), - raw, - stdout: stripAnsi(stdout), - stderr: stripAnsi(stderr), - }) + res(buildRunResult(code, signal, stdout, stderr)) }) }) } + +/** + * Pure `close`-event → {@link RunResult} mapping, factored out of `run()` so + * the exit-code/signal handling can be unit-tested directly without spawning + * a real child process (see `run.test.ts`). Node guarantees exactly one of + * `code`/`signal` is non-null on `'close'` — this must never coerce a null + * `code` to `0`, or a signal-terminated child (crash, SIGKILL, OOM) would be + * misreported as a clean exit. + */ +export function buildRunResult( + code: number | null, + signal: NodeJS.Signals | null, + stdout: string, + stderr: string, +): RunResult { + const raw = stdout + stderr + return { + exitCode: code, + signal, + output: stripAnsi(raw), + raw, + stdout: stripAnsi(stdout), + stderr: stripAnsi(stderr), + } +}