diff --git a/src/main.ts b/src/main.ts index fda1ea3..f05d316 100644 --- a/src/main.ts +++ b/src/main.ts @@ -69,6 +69,24 @@ async function runInCI( ) : DEFAULT_CONFIG; + // Cost against the project's stored production statistics when available, so + // CI numbers reflect real prod cardinality instead of synthetic assumptions. + // Scoped server-side to this connection's project; null when none is stored + // or the pull fails, in which case the runner falls back to synthetic stats. + const productionStats = await api.getProductionStats().catch((err) => { + log.warn( + `Failed to fetch production stats via RPC: ${err}. Falling back to synthetic stats`, + "main", + ); + return null; + }); + if (productionStats && productionStats.length > 0) { + log.info( + `Costing against ${productionStats.length} table(s) of stored production statistics`, + "main", + ); + } + const source: RecentQuerySource = logPath ? new PgbadgerSource(logPath) : remoteDbManager.getConnectorFor(sourcePostgresUrl); @@ -80,6 +98,7 @@ async function runInCI( maxCost, ignoredQueryHashes: config.ignoredQueryHashes, remote, + productionStats: productionStats ?? undefined, }); let allResults: QueryProcessResult[]; let reportContext; diff --git a/src/runner.test.ts b/src/runner.test.ts index 0ddbe3b..dacb7f0 100644 --- a/src/runner.test.ts +++ b/src/runner.test.ts @@ -1,5 +1,7 @@ import { test, expect, describe } from "vitest"; +import type { ExportedStats } from "@query-doctor/core"; import { buildQueries } from "./reporters/site-api.ts"; +import { Runner } from "./runner.ts"; import type { OptimizedQuery } from "./sql/recent-query.ts"; function fakeQuery(hash: string, state: string): OptimizedQuery { @@ -28,3 +30,53 @@ describe("queryStats.analyzed source of truth", () => { expect(buildQueries(results).length).toBe(3); }); }); + +describe("Runner.determineStatsMode precedence", () => { + const TABLE: ExportedStats = { + tableName: "users", + schemaName: "public", + relpages: 10, + reltuples: 166_000, + relallvisible: 8, + columns: [], + indexes: [], + }; + + const exportMode = { + type: "static", + stats: { + kind: "fromStatisticsExport", + source: { kind: "inline" }, + stats: [TABLE], + }, + }; + + const syntheticMode = { + type: "static", + stats: { kind: "fromAssumption", reltuples: 10_000_000 }, + }; + + test("costs against the production stats export when production stats are provided", async () => { + expect(await Runner.determineStatsMode(undefined, [TABLE])).toEqual( + exportMode, + ); + }); + + test("production stats take precedence over a stats file path", async () => { + // The path is never read because production stats win — proven by the + // absence of a filesystem error for this non-existent path. + expect( + await Runner.determineStatsMode("/nonexistent/stats.json", [TABLE]), + ).toEqual(exportMode); + }); + + test("falls back to synthetic assumption when production stats are empty", async () => { + expect(await Runner.determineStatsMode(undefined, [])).toEqual( + syntheticMode, + ); + }); + + test("falls back to synthetic assumption when no stats source is provided", async () => { + expect(await Runner.determineStatsMode()).toEqual(syntheticMode); + }); +}); diff --git a/src/runner.ts b/src/runner.ts index 4fcb3e3..de1c8ab 100644 --- a/src/runner.ts +++ b/src/runner.ts @@ -14,7 +14,7 @@ import { Connectable } from "./sync/connectable.ts"; import { Remote, StatisticsStrategy } from "./remote/remote.ts"; import { ConnectionManager } from "./sync/connection-manager.ts"; import type { OptimizedQuery } from "./sql/recent-query.ts"; -import { ExportedStats } from "@query-doctor/core"; +import { ExportedStats, Statistics } from "@query-doctor/core"; import { readFile } from "node:fs/promises"; import { buildQueries } from "./reporters/site-api.ts"; @@ -34,6 +34,9 @@ export class Runner { source: RecentQuerySource; ignoredQueryHashes?: string[]; remote?: Remote; + // Real production statistics pulled from the Site API. When present, queries + // are costed against true prod cardinality instead of synthetic assumptions. + productionStats?: ExportedStats[]; }) { const remote = options.remote ?? new Remote( options.targetPostgresUrl, @@ -42,7 +45,7 @@ export class Runner { { disableQueryLoader: true } ); await remote.syncFrom(options.sourcePostgresUrl, - await Runner.determineStatsMode(options.statisticsPath) + await Runner.determineStatsMode(options.statisticsPath, options.productionStats) ); await remote.optimizer.finish; return new Runner( @@ -53,9 +56,19 @@ export class Runner { ); } - // CI either always pulls data from a file or sets a default. Never pulls from source - static async determineStatsMode(statsPath?: string): Promise { - // TODO: grab recent stats from API if they exist + // Stats-mode precedence for CI: real production stats pulled from the Site API + // win, then an explicit stats file, then synthetic assumptions. CI never dumps + // stats from the ephemeral target database itself. + static async determineStatsMode( + statsPath?: string, + productionStats?: ExportedStats[], + ): Promise { + if (productionStats && productionStats.length > 0) { + return { + type: "static", + stats: Statistics.statsModeFromExport(productionStats), + }; + } if (statsPath) { const file = await readFile(statsPath); const rawStats = JSON.parse(file.toString())