From 3a1d2c38b7ffc684c2883d9103c9bbaf805746c8 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Sirois Date: Thu, 18 Jun 2026 14:53:11 -0400 Subject: [PATCH] feat: cost CI runs against stored production statistics At CI time, pull the project's stored production stats over the relay RPC (getProductionStats, @query-doctor/core 0.10.4) and cost queries with fromStatisticsExport when present, falling back to the synthetic assumption otherwise. The resulting statisticsMode flows through to the Site API payload, so runs modeled on real prod cardinality are labelled as such instead of being scored on 10M-row / 0.9-correlation defaults. Stats-mode precedence in determineStatsMode is now: API production stats > explicit stats file > synthetic assumption. This replaces the standing "grab recent stats from API if they exist" TODO. Closes the analyzer half of #3353 (Query-Doctor/Site). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/main.ts | 19 +++++++++++++++++ src/runner.test.ts | 52 ++++++++++++++++++++++++++++++++++++++++++++++ src/runner.ts | 23 +++++++++++++++----- 3 files changed, 89 insertions(+), 5 deletions(-) diff --git a/src/main.ts b/src/main.ts index fda1ea32..f05d316c 100644 --- a/src/main.ts +++ b/src/main.ts @@ -69,6 +69,24 @@ async function runInCI( ) : DEFAULT_CONFIG; + // Cost against the project's stored production statistics when available, so + // CI numbers reflect real prod cardinality instead of synthetic assumptions. + // Scoped server-side to this connection's project; null when none is stored + // or the pull fails, in which case the runner falls back to synthetic stats. + const productionStats = await api.getProductionStats().catch((err) => { + log.warn( + `Failed to fetch production stats via RPC: ${err}. Falling back to synthetic stats`, + "main", + ); + return null; + }); + if (productionStats && productionStats.length > 0) { + log.info( + `Costing against ${productionStats.length} table(s) of stored production statistics`, + "main", + ); + } + const source: RecentQuerySource = logPath ? new PgbadgerSource(logPath) : remoteDbManager.getConnectorFor(sourcePostgresUrl); @@ -80,6 +98,7 @@ async function runInCI( maxCost, ignoredQueryHashes: config.ignoredQueryHashes, remote, + productionStats: productionStats ?? undefined, }); let allResults: QueryProcessResult[]; let reportContext; diff --git a/src/runner.test.ts b/src/runner.test.ts index 0ddbe3b5..dacb7f02 100644 --- a/src/runner.test.ts +++ b/src/runner.test.ts @@ -1,5 +1,7 @@ import { test, expect, describe } from "vitest"; +import type { ExportedStats } from "@query-doctor/core"; import { buildQueries } from "./reporters/site-api.ts"; +import { Runner } from "./runner.ts"; import type { OptimizedQuery } from "./sql/recent-query.ts"; function fakeQuery(hash: string, state: string): OptimizedQuery { @@ -28,3 +30,53 @@ describe("queryStats.analyzed source of truth", () => { expect(buildQueries(results).length).toBe(3); }); }); + +describe("Runner.determineStatsMode precedence", () => { + const TABLE: ExportedStats = { + tableName: "users", + schemaName: "public", + relpages: 10, + reltuples: 166_000, + relallvisible: 8, + columns: [], + indexes: [], + }; + + const exportMode = { + type: "static", + stats: { + kind: "fromStatisticsExport", + source: { kind: "inline" }, + stats: [TABLE], + }, + }; + + const syntheticMode = { + type: "static", + stats: { kind: "fromAssumption", reltuples: 10_000_000 }, + }; + + test("costs against the production stats export when production stats are provided", async () => { + expect(await Runner.determineStatsMode(undefined, [TABLE])).toEqual( + exportMode, + ); + }); + + test("production stats take precedence over a stats file path", async () => { + // The path is never read because production stats win — proven by the + // absence of a filesystem error for this non-existent path. + expect( + await Runner.determineStatsMode("/nonexistent/stats.json", [TABLE]), + ).toEqual(exportMode); + }); + + test("falls back to synthetic assumption when production stats are empty", async () => { + expect(await Runner.determineStatsMode(undefined, [])).toEqual( + syntheticMode, + ); + }); + + test("falls back to synthetic assumption when no stats source is provided", async () => { + expect(await Runner.determineStatsMode()).toEqual(syntheticMode); + }); +}); diff --git a/src/runner.ts b/src/runner.ts index 4fcb3e36..de1c8ab2 100644 --- a/src/runner.ts +++ b/src/runner.ts @@ -14,7 +14,7 @@ import { Connectable } from "./sync/connectable.ts"; import { Remote, StatisticsStrategy } from "./remote/remote.ts"; import { ConnectionManager } from "./sync/connection-manager.ts"; import type { OptimizedQuery } from "./sql/recent-query.ts"; -import { ExportedStats } from "@query-doctor/core"; +import { ExportedStats, Statistics } from "@query-doctor/core"; import { readFile } from "node:fs/promises"; import { buildQueries } from "./reporters/site-api.ts"; @@ -34,6 +34,9 @@ export class Runner { source: RecentQuerySource; ignoredQueryHashes?: string[]; remote?: Remote; + // Real production statistics pulled from the Site API. When present, queries + // are costed against true prod cardinality instead of synthetic assumptions. + productionStats?: ExportedStats[]; }) { const remote = options.remote ?? new Remote( options.targetPostgresUrl, @@ -42,7 +45,7 @@ export class Runner { { disableQueryLoader: true } ); await remote.syncFrom(options.sourcePostgresUrl, - await Runner.determineStatsMode(options.statisticsPath) + await Runner.determineStatsMode(options.statisticsPath, options.productionStats) ); await remote.optimizer.finish; return new Runner( @@ -53,9 +56,19 @@ export class Runner { ); } - // CI either always pulls data from a file or sets a default. Never pulls from source - static async determineStatsMode(statsPath?: string): Promise { - // TODO: grab recent stats from API if they exist + // Stats-mode precedence for CI: real production stats pulled from the Site API + // win, then an explicit stats file, then synthetic assumptions. CI never dumps + // stats from the ephemeral target database itself. + static async determineStatsMode( + statsPath?: string, + productionStats?: ExportedStats[], + ): Promise { + if (productionStats && productionStats.length > 0) { + return { + type: "static", + stats: Statistics.statsModeFromExport(productionStats), + }; + } if (statsPath) { const file = await readFile(statsPath); const rawStats = JSON.parse(file.toString())