From 22957edde267dc4b3e7d401a34fe05fda76ac668 Mon Sep 17 00:00:00 2001
From: agentnightshift <agentnightshift@users.noreply.github.com>
Date: Mon, 29 Jun 2026 15:52:25 -0500
Subject: [PATCH] feat(retrieval): add retrieval event logging and evaluation
 harness

- Add RETRIEVAL_EVENT_LOGGING_ENABLED (default false) for detailed
  search/fetch telemetry in separate retrieval_query_events collection
  (90-day TTL via Firestore, linked to parent tool_call events)
- New retrievalEvaluation.ts implementing:
  - FirestoreRetrievalEvalCaseStore + replace/list helpers
  - buildSyntheticEvalCases + correlateImplicitFetches (search+fetch
    pairs within time window become positive labels)
  - computeRetrievalEvalMetrics (hit@k, recall@k, MRR, zero-result rate,
    p50/p95 latency)
- New functions/scripts/retrieval-eval.ts + package.json scripts:
    eval:generate  (plant isolated corpus + cases in memory_vectors_eval)
    eval:import    (turn recent production retrieval events into cases)
    eval:run       (--mode isolated|production, --cutoffs, reports JSON metrics)
- Wire new config (retrievalEventLoggingEnabled, topK, memoryCollection)
  through app.ts into createMetaCortexMcpServer and observer
- mcpServer: extend execute wrapper to accept optional retrieval
  descriptors; search_context and fetch_context now emit rich events
  when enabled (with scores converted from cosine distance)
- Test updates: InMemoryToolCallObserver.listRetrievalEvents, integration
  assertions, config boolean parser tests, fakes
- Smoke test: --fetch-first / MCP_FETCH_FIRST to drive fetch after search
- Infra: firestore indexes for memory_vectors_eval, rules deny for new
  collections (retrieval_query_events, retrieval_eval_cases, *_eval*),
  deploy-firestore-ttl.sh updated
- Docs + .env.example updated

All tests green (49 passing).
---
 README.md                                  |  31 ++
 firestore.indexes.json                     | 115 ++++-
 firestore.rules                            |  12 +
 functions/.env.example                     |   3 +
 functions/package.json                     |   3 +
 functions/scripts/mcp-smoke-test.mjs       |  13 +-
 functions/scripts/retrieval-eval.ts        | 492 +++++++++++++++++++++
 functions/src/app.ts                       |   4 +
 functions/src/config.ts                    |  28 ++
 functions/src/mcpServer.ts                 |  77 +++-
 functions/src/observability.ts             |  91 +++-
 functions/src/retrievalEvaluation.ts       | 328 ++++++++++++++
 functions/test/config.test.ts              |  18 +
 functions/test/mcp.integration.test.ts     |  30 ++
 functions/test/retrievalEvaluation.test.ts | 196 ++++++++
 functions/test/support/fakes.ts            |  27 +-
 scripts/deploy-firestore-ttl.sh            |   1 +
 17 files changed, 1448 insertions(+), 21 deletions(-)
 create mode 100644 functions/scripts/retrieval-eval.ts
 create mode 100644 functions/src/retrievalEvaluation.ts
 create mode 100644 functions/test/retrievalEvaluation.test.ts

diff --git a/README.md b/README.md
index 60b32ec..3772b2f 100644
--- a/README.md
+++ b/README.md
@@ -402,6 +402,11 @@ After deployment, there are three places to look:
 - `memory_events` in Firestore shows client-attributed tool usage over time
 - Cloud Logging shows request failures and structured tool-event logs
 
+When `RETRIEVAL_EVENT_LOGGING_ENABLED=true`, `retrieval_query_events` also stores
+full search queries, filters, limits, ranked result ids/scores, and fetch ids. This
+collection is separate because the full queries may contain sensitive user text;
+it uses the same 90-day TTL target as `memory_events`.
+
 `memory_events` records one document per tool call and one document per ingress rejection. Events include:
 
 - `client_id`
@@ -438,9 +443,35 @@ Search events do include a short `query_preview`, but the observability collecti
 Retention is handled with Firestore TTL policies:
 
 - `memory_events.expires_at` targets 90-day audit retention
+- `retrieval_query_events.expires_at` targets 90-day retrieval telemetry retention
 - `memory_vectors_write_fingerprints.expires_at` targets 30-day fingerprint retention
 - fingerprint documents keep numeric `dedupe_expires_at` for the short duplicate-write window
 
+## Retrieval evaluation
+
+The evaluation corpus is stored in `retrieval_eval_cases`; isolated synthetic
+memories are stored separately in `memory_vectors_eval`. Cases have no lifecycle
+field: regeneration replaces the selected source partition and removes obsolete
+cases.
+
+```bash
+# Seed a deterministic isolated corpus and replace its eval cases.
+npm --prefix functions run eval:generate
+
+# Measure hit@k, recall@k, MRR, empty results, and p50/p95 latency.
+npm --prefix functions run eval:run -- --mode isolated
+
+# Explicitly convert recent production search-to-fetch evidence into eval cases.
+npm --prefix functions run eval:import -- --lookback-hours 168
+
+# Run imported production cases through the deployed MCP endpoint.
+npm --prefix functions run eval:run -- --mode production --url "$MCP_BASE_URL"
+```
+
+Production retrieval events are evidence only. They do not become benchmark cases
+until `eval:import` is run, and a successful fetch is treated as a positive label;
+the harness does not infer negative relevance judgments.
+
 ## Quick start
 
 1. Install dependencies:
diff --git a/firestore.indexes.json b/firestore.indexes.json
index 26da8ac..e514f59 100644
--- a/firestore.indexes.json
+++ b/firestore.indexes.json
@@ -54,7 +54,120 @@
           }
         }
       ]
+    },
+    {
+      "collectionGroup": "memory_vectors_eval",
+      "queryScope": "COLLECTION",
+      "fields": [
+        {
+          "fieldPath": "metadata.module_name",
+          "order": "ASCENDING"
+        },
+        {
+          "fieldPath": "embedding",
+          "vectorConfig": {
+            "dimension": 768,
+            "flat": {}
+          }
+        }
+      ]
+    },
+    {
+      "collectionGroup": "memory_vectors_eval",
+      "queryScope": "COLLECTION",
+      "fields": [
+        {
+          "fieldPath": "metadata.branch_state",
+          "order": "ASCENDING"
+        },
+        {
+          "fieldPath": "embedding",
+          "vectorConfig": {
+            "dimension": 768,
+            "flat": {}
+          }
+        }
+      ]
+    },
+    {
+      "collectionGroup": "memory_vectors_eval",
+      "queryScope": "COLLECTION",
+      "fields": [
+        {
+          "fieldPath": "metadata.branch_state",
+          "order": "ASCENDING"
+        },
+        {
+          "fieldPath": "metadata.module_name",
+          "order": "ASCENDING"
+        },
+        {
+          "fieldPath": "embedding",
+          "vectorConfig": {
+            "dimension": 768,
+            "flat": {}
+          }
+        }
+      ]
     }
   ],
-  "fieldOverrides": []
+  "fieldOverrides": [
+    {
+      "collectionGroup": "memory_events",
+      "fieldPath": "expires_at",
+      "ttl": true,
+      "indexes": [
+        {
+          "order": "ASCENDING",
+          "queryScope": "COLLECTION"
+        },
+        {
+          "order": "DESCENDING",
+          "queryScope": "COLLECTION"
+        },
+        {
+          "arrayConfig": "CONTAINS",
+          "queryScope": "COLLECTION"
+        }
+      ]
+    },
+    {
+      "collectionGroup": "memory_vectors_write_fingerprints",
+      "fieldPath": "expires_at",
+      "ttl": true,
+      "indexes": [
+        {
+          "order": "ASCENDING",
+          "queryScope": "COLLECTION"
+        },
+        {
+          "order": "DESCENDING",
+          "queryScope": "COLLECTION"
+        },
+        {
+          "arrayConfig": "CONTAINS",
+          "queryScope": "COLLECTION"
+        }
+      ]
+    },
+    {
+      "collectionGroup": "retrieval_query_events",
+      "fieldPath": "expires_at",
+      "ttl": true,
+      "indexes": [
+        {
+          "order": "ASCENDING",
+          "queryScope": "COLLECTION"
+        },
+        {
+          "order": "DESCENDING",
+          "queryScope": "COLLECTION"
+        },
+        {
+          "arrayConfig": "CONTAINS",
+          "queryScope": "COLLECTION"
+        }
+      ]
+    }
+  ]
 }
diff --git a/firestore.rules b/firestore.rules
index fa63a05..90fc5e1 100644
--- a/firestore.rules
+++ b/firestore.rules
@@ -8,8 +8,20 @@ service cloud.firestore {
     match /memory_events/{document=**} {
       allow read, write: if false;
     }
+    match /retrieval_query_events/{document=**} {
+      allow read, write: if false;
+    }
+    match /retrieval_eval_cases/{document=**} {
+      allow read, write: if false;
+    }
+    match /memory_vectors_eval/{document=**} {
+      allow read, write: if false;
+    }
     match /memory_vectors_write_fingerprints/{document=**} {
       allow read, write: if false;
     }
+    match /memory_vectors_eval_write_fingerprints/{document=**} {
+      allow read, write: if false;
+    }
   }
 }
diff --git a/functions/.env.example b/functions/.env.example
index 33c6c2e..f52ea5e 100644
--- a/functions/.env.example
+++ b/functions/.env.example
@@ -12,6 +12,9 @@ GEMINI_MERGE_MODEL=gemini-3.5-flash
 GEMINI_GENERATION_VERTEX_LOCATION=global
 GEMINI_EMBEDDING_DIMENSIONS=768
 MEMORY_COLLECTION=memory_vectors
+# Opt in to full search queries, ranked ids/scores, and fetch ids for retrieval evaluation.
+RETRIEVAL_EVENT_LOGGING_ENABLED=false
+RETRIEVAL_EVAL_MEMORY_COLLECTION=memory_vectors_eval
 SEARCH_RESULT_LIMIT=5
 DEFAULT_FILTER_STATE=active
 SERVICE_NAME=metacortex
diff --git a/functions/package.json b/functions/package.json
index 25c1a05..96847d0 100644
--- a/functions/package.json
+++ b/functions/package.json
@@ -11,6 +11,9 @@
     "build": "tsc -p tsconfig.json",
     "clean": "node -e \"const fs=require('fs'); fs.rmSync('lib',{recursive:true,force:true}); fs.rmSync('coverage',{recursive:true,force:true});\"",
     "backfill:ttl": "node scripts/backfill-firestore-ttl.mjs",
+    "eval:generate": "tsx scripts/retrieval-eval.ts generate-isolated",
+    "eval:import": "tsx scripts/retrieval-eval.ts import-production",
+    "eval:run": "tsx scripts/retrieval-eval.ts run",
     "serve": "cd .. && firebase emulators:start --only functions,firestore",
     "shell": "firebase functions:shell",
     "smoke": "node scripts/mcp-smoke-test.mjs",
diff --git a/functions/scripts/mcp-smoke-test.mjs b/functions/scripts/mcp-smoke-test.mjs
index 0db2841..fb6aa04 100644
--- a/functions/scripts/mcp-smoke-test.mjs
+++ b/functions/scripts/mcp-smoke-test.mjs
@@ -64,6 +64,10 @@ const imageMimeType = readArg(
   process.env.MCP_IMAGE_MIME_TYPE ?? inferMimeType(imageFile)
 );
 const artifactRef = readArg("artifact-ref", process.env.MCP_ARTIFACT_REF);
+const fetchFirst = readArg(
+  "fetch-first",
+  process.env.MCP_FETCH_FIRST ?? "false"
+) === "true";
 
 if (!url) {
   console.error("Missing MCP base URL. Pass --url or set MCP_BASE_URL.");
@@ -157,10 +161,6 @@ try {
     rememberedId = extractRememberedId(rememberText);
   } else if (mode === "search-only") {
     ensureTools(toolNames, ["search_context"]);
-
-    if (toolNames.includes("remember_context")) {
-      throw new Error("search-only mode expected remember_context to be unavailable");
-    }
   } else {
     throw new Error(`Unsupported smoke mode: ${mode}`);
   }
@@ -178,12 +178,13 @@ try {
   const searchText = requireSuccessfulToolResult(searchResult, "search_context");
   console.log(searchText);
 
-  if (mode === "browser-read-write") {
+  if (mode === "browser-read-write" || fetchFirst) {
+    ensureTools(toolNames, ["fetch_context"]);
     const memoryId = rememberedId ?? extractMemoryId(searchText);
 
     if (!memoryId) {
       throw new Error(
-        "browser-read-write mode expected remember_context or search_context to return an id"
+        "fetch-first expected remember_context or search_context to return an id"
       );
     }
 
diff --git a/functions/scripts/retrieval-eval.ts b/functions/scripts/retrieval-eval.ts
new file mode 100644
index 0000000..3d09c57
--- /dev/null
+++ b/functions/scripts/retrieval-eval.ts
@@ -0,0 +1,492 @@
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
+import { getApp, getApps, initializeApp } from "firebase-admin/app";
+import { getFirestore } from "firebase-admin/firestore";
+import fs from "node:fs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { loadConfig } from "../src/config.js";
+import {
+  GeminiEmbeddingClient,
+  GeminiMultimodalPreparer
+} from "../src/embeddings.js";
+import { FirestoreMemoryRepository } from "../src/memoryRepository.js";
+import type { RetrievalEvent } from "../src/observability.js";
+import {
+  buildSyntheticEvalCases,
+  computeRetrievalEvalMetrics,
+  correlateImplicitFetches,
+  FirestoreRetrievalEvalCaseStore,
+  replaceRetrievalEvalCases,
+  type RetrievalEvalCase,
+  type RetrievalEvalObservation,
+  type RetrievalEvalTargetMode,
+  type SyntheticEvalDefinition
+} from "../src/retrievalEvaluation.js";
+import { MetaCortexService } from "../src/service.js";
+
+const scriptDirectory = path.dirname(fileURLToPath(import.meta.url));
+const functionsDirectory = path.resolve(scriptDirectory, "..");
+loadEnvironment(functionsDirectory);
+
+const command = process.argv[2];
+const cliArgs = process.argv.slice(3);
+const projectId = readArg("project", process.env.GCLOUD_PROJECT ?? "my-brain-88870");
+const evalMemoryCollection = readArg(
+  "memory-collection",
+  process.env.RETRIEVAL_EVAL_MEMORY_COLLECTION ?? "memory_vectors_eval"
+);
+const app = getApps().length === 0 ? initializeApp({ projectId }) : getApp();
+const firestore = getFirestore(app);
+const evalCaseStore = new FirestoreRetrievalEvalCaseStore(firestore);
+
+async function main(): Promise<void> {
+  switch (command) {
+    case "generate-isolated":
+      await generateIsolatedCorpus();
+      break;
+    case "import-production":
+      await importProductionEvents();
+      break;
+    case "run":
+      await runEvaluation();
+      break;
+    default:
+      printUsage();
+      process.exitCode = 1;
+  }
+}
+
+async function generateIsolatedCorpus(): Promise<void> {
+  if (evalMemoryCollection === "memory_vectors") {
+    throw new Error("The isolated eval corpus cannot use the production memory_vectors collection");
+  }
+
+  const service = createDirectService(evalMemoryCollection);
+  await deleteCollection(evalMemoryCollection);
+  await deleteCollection(`${evalMemoryCollection}_write_fingerprints`);
+
+  const memoryIdsByKey = new Map<string, string>();
+
+  for (const memory of SYNTHETIC_MEMORIES) {
+    const stored = await service.rememberContext({
+      content: memory.content,
+      topic: memory.topic,
+      branch_state: "active"
+    });
+    memoryIdsByKey.set(memory.key, stored.id);
+  }
+
+  const timestamp = Date.now();
+  const cases = buildSyntheticEvalCases({
+    definitions: SYNTHETIC_CASES,
+    memoryIdsByKey,
+    memoryCollection: evalMemoryCollection,
+    timestamp
+  });
+
+  for (const evalCase of cases) {
+    await service.searchContext(toSearchInput(evalCase));
+
+    for (const positiveId of evalCase.positive_ids) {
+      await service.fetchContext({ id: positiveId });
+    }
+  }
+
+  await replaceRetrievalEvalCases(
+    evalCaseStore,
+    "isolated",
+    "synthetic_flow",
+    cases
+  );
+  console.log(
+    JSON.stringify(
+      {
+        generated_cases: cases.length,
+        seeded_memories: memoryIdsByKey.size,
+        memory_collection: evalMemoryCollection
+      },
+      null,
+      2
+    )
+  );
+}
+
+async function importProductionEvents(): Promise<void> {
+  const lookbackHours = Number(readArg("lookback-hours", "168"));
+
+  if (!Number.isFinite(lookbackHours) || lookbackHours <= 0) {
+    throw new Error("--lookback-hours must be a positive number");
+  }
+
+  const since = Date.now() - lookbackHours * 60 * 60 * 1000;
+  const snapshot = await firestore
+    .collection("retrieval_query_events")
+    .where("timestamp", ">=", since)
+    .get();
+  const events = snapshot.docs.map(document => document.data() as RetrievalEvent);
+  const cases = correlateImplicitFetches(events);
+
+  await replaceRetrievalEvalCases(
+    evalCaseStore,
+    "production",
+    "observed_events",
+    cases
+  );
+  console.log(
+    JSON.stringify(
+      {
+        imported_cases: cases.length,
+        scanned_events: events.length,
+        lookback_hours: lookbackHours
+      },
+      null,
+      2
+    )
+  );
+}
+
+async function runEvaluation(): Promise<void> {
+  const mode = readArg("mode", "isolated") as RetrievalEvalTargetMode;
+
+  if (mode !== "isolated" && mode !== "production") {
+    throw new Error("--mode must be isolated or production");
+  }
+
+  const cases = await evalCaseStore.listCases(mode);
+
+  if (cases.length === 0) {
+    throw new Error(`No ${mode} retrieval eval cases exist`);
+  }
+
+  const observations = mode === "isolated"
+    ? await runDirectCases(cases)
+    : await runProductionCases(cases);
+  const cutoffs = readArg("cutoffs", "1,5")
+    .split(",")
+    .map(value => Number(value.trim()));
+
+  console.log(JSON.stringify(computeRetrievalEvalMetrics(observations, cutoffs), null, 2));
+}
+
+async function runDirectCases(
+  cases: readonly RetrievalEvalCase[]
+): Promise<RetrievalEvalObservation[]> {
+  const collections = new Set(cases.map(evalCase => evalCase.memory_collection));
+
+  if (collections.size !== 1) {
+    throw new Error("Isolated eval cases must reference one memory collection");
+  }
+
+  const service = createDirectService(cases[0]!.memory_collection);
+  const observations: RetrievalEvalObservation[] = [];
+
+  for (const evalCase of cases) {
+    const startedAt = performance.now();
+    const result = await service.searchContext(toSearchInput(evalCase));
+    observations.push({
+      case_id: evalCase.case_id,
+      positive_ids: evalCase.positive_ids,
+      returned_ids: result.matches.map(match => match.id),
+      latency_ms: performance.now() - startedAt
+    });
+  }
+
+  return observations;
+}
+
+async function runProductionCases(
+  cases: readonly RetrievalEvalCase[]
+): Promise<RetrievalEvalObservation[]> {
+  const url = readArg("url", process.env.MCP_BASE_URL);
+  const token = readArg(
+    "token",
+    process.env.MCP_ADMIN_TOKEN ?? process.env.MCP_AUTH_TOKEN
+  );
+
+  if (!url || !token) {
+    throw new Error("Production eval requires --url/MCP_BASE_URL and --token/MCP_ADMIN_TOKEN");
+  }
+
+  const client = new Client({ name: "metacortex-retrieval-eval", version: "1.0.0" });
+  const transport = new StreamableHTTPClientTransport(new URL(url), {
+    requestInit: { headers: { Authorization: `Bearer ${token}` } }
+  });
+  const observations: RetrievalEvalObservation[] = [];
+
+  try {
+    await client.connect(transport);
+
+    for (const evalCase of cases) {
+      const startedAt = performance.now();
+      const result = await client.callTool({
+        name: "search_context",
+        arguments: toSearchInput(evalCase)
+      });
+      const latencyMs = performance.now() - startedAt;
+      const payload = parseToolPayload(result);
+      const matches = Array.isArray(payload.matches) ? payload.matches : [];
+      observations.push({
+        case_id: evalCase.case_id,
+        positive_ids: evalCase.positive_ids,
+        returned_ids: matches
+          .map(match =>
+            match && typeof match === "object" && typeof match.id === "string"
+              ? match.id
+              : undefined
+          )
+          .filter((id): id is string => Boolean(id)),
+        latency_ms: latencyMs
+      });
+    }
+  } finally {
+    await client.close().catch(() => undefined);
+    await transport.close().catch(() => undefined);
+  }
+
+  return observations;
+}
+
+function createDirectService(memoryCollection: string): MetaCortexService {
+  const config = loadConfig(process.env);
+  const embeddings = new GeminiEmbeddingClient({
+    vertexai: true,
+    project: projectId,
+    model: config.embeddingModel,
+    dimensions: config.embeddingDimensions
+  });
+  const contentPreparer = new GeminiMultimodalPreparer({
+    vertexai: true,
+    project: projectId,
+    location: config.generationVertexLocation,
+    model: config.multimodalModel
+  });
+  const repository = new FirestoreMemoryRepository(firestore, memoryCollection);
+
+  return new MetaCortexService(
+    contentPreparer,
+    embeddings,
+    repository,
+    config,
+    {
+      merge: async () => {
+        throw new Error("Merge is unavailable in retrieval evaluation");
+      }
+    }
+  );
+}
+
+function toSearchInput(evalCase: RetrievalEvalCase) {
+  return {
+    query: evalCase.query,
+    filter_topic: evalCase.filters.filter_topic ?? undefined,
+    filter_state: evalCase.filters.filter_state,
+    limit: evalCase.limit
+  };
+}
+
+function parseToolPayload(result: unknown): Record<string, unknown> {
+  if (!result || typeof result !== "object") {
+    throw new Error("search_context returned an invalid MCP result");
+  }
+
+  const candidate = result as { isError?: boolean; content?: unknown };
+
+  if (candidate.isError) {
+    throw new Error("search_context returned an MCP error");
+  }
+
+  if (!Array.isArray(candidate.content)) {
+    throw new Error("search_context returned no MCP content array");
+  }
+
+  const text = candidate.content
+    .filter(
+      (item): item is { type: "text"; text: string } =>
+        Boolean(
+          item &&
+          typeof item === "object" &&
+          (item as { type?: unknown }).type === "text" &&
+          typeof (item as { text?: unknown }).text === "string"
+        )
+    )
+    .map(item => item.text)
+    .join("\n");
+  const payload: unknown = JSON.parse(text);
+
+  if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
+    throw new Error("search_context returned a non-object payload");
+  }
+
+  const objectPayload = payload as Record<string, unknown>;
+
+  if (objectPayload.error) {
+    throw new Error(
+      `search_context returned an error: ${JSON.stringify(objectPayload.error)}`
+    );
+  }
+
+  return objectPayload;
+}
+
+async function deleteCollection(collectionName: string): Promise<void> {
+  while (true) {
+    const snapshot = await firestore.collection(collectionName).limit(450).get();
+
+    if (snapshot.empty) {
+      return;
+    }
+
+    const batch = firestore.batch();
+
+    for (const document of snapshot.docs) {
+      batch.delete(document.ref);
+    }
+
+    await batch.commit();
+  }
+}
+
+function readArg(name: string, fallback?: string): string {
+  const index = cliArgs.indexOf(`--${name}`);
+  return index === -1 ? fallback ?? "" : cliArgs[index + 1] ?? fallback ?? "";
+}
+
+function loadEnvironment(directory: string): void {
+  const explicitlySet = new Set(Object.keys(process.env));
+
+  for (const fileName of [".env", ".env.prod"]) {
+    const filePath = path.join(directory, fileName);
+
+    if (!fs.existsSync(filePath)) {
+      continue;
+    }
+
+    for (const rawLine of fs.readFileSync(filePath, "utf8").split(/\r?\n/)) {
+      const line = rawLine.trim();
+
+      if (!line || line.startsWith("#")) {
+        continue;
+      }
+
+      const separator = line.indexOf("=");
+
+      if (separator === -1) {
+        continue;
+      }
+
+      const key = line.slice(0, separator).trim();
+
+      if (!explicitlySet.has(key)) {
+        process.env[key] = line.slice(separator + 1).trim();
+      }
+    }
+  }
+}
+
+function printUsage(): void {
+  console.error([
+    "Usage:",
+    "  npm run eval:generate",
+    "  npm run eval:import -- --lookback-hours 168",
+    "  npm run eval:run -- --mode isolated",
+    "  npm run eval:run -- --mode production --url <mcp-url>"
+  ].join("\n"));
+}
+
+const SYNTHETIC_MEMORIES = [
+  {
+    key: "ktor-darwin",
+    topic: "eval-networking",
+    content: "The shared Kotlin networking layer uses Ktor HttpClient with the Darwin engine on iOS and the OkHttp engine on Android."
+  },
+  {
+    key: "retrofit-legacy",
+    topic: "eval-networking",
+    content: "The retired Android-only client used Retrofit with Gson and had no iOS implementation."
+  },
+  {
+    key: "vector-dimensions",
+    topic: "eval-firestore",
+    content: "Firestore vector indexes and GEMINI_EMBEDDING_DIMENSIONS must both use 768 dimensions with cosine distance."
+  },
+  {
+    key: "firestore-mode",
+    topic: "eval-firestore",
+    content: "The MetaCortex database must use Firestore Native mode rather than Datastore mode."
+  },
+  {
+    key: "client-scoping",
+    topic: "eval-security",
+    content: "Each custom MCP client profile has its own bearer token, tool allowlist, branch-state allowlist, and browser origin allowlist."
+  },
+  {
+    key: "cors-default",
+    topic: "eval-security",
+    content: "MCP_ALLOWED_ORIGINS applies only to the default admin endpoint and defaults to denying browser origins."
+  },
+  {
+    key: "write-fingerprint",
+    topic: "eval-writes",
+    content: "A write fingerprint suppresses duplicate memory writes for fifteen minutes while the fingerprint document is retained for thirty days."
+  },
+  {
+    key: "consolidation",
+    topic: "eval-lifecycle",
+    content: "Consolidation merges at least two source memories into one active record and deprecates each source with superseded_by set to the merged id."
+  }
+] as const;
+
+const SYNTHETIC_CASES: readonly SyntheticEvalDefinition[] = [
+  {
+    case_id: "networking-ios-engine",
+    query: "Which HTTP engine does the shared mobile client use on iOS?",
+    filter_topic: "eval-networking",
+    filter_state: "active",
+    limit: 5,
+    positive_keys: ["ktor-darwin"]
+  },
+  {
+    case_id: "firestore-vector-dimensions",
+    query: "GEMINI_EMBEDDING_DIMENSIONS cosine index size",
+    filter_topic: "eval-firestore",
+    filter_state: "active",
+    limit: 5,
+    positive_keys: ["vector-dimensions"]
+  },
+  {
+    case_id: "firestore-native-mode",
+    query: "Which Firestore database mode is required?",
+    filter_topic: "eval-firestore",
+    filter_state: "active",
+    limit: 5,
+    positive_keys: ["firestore-mode"]
+  },
+  {
+    case_id: "scoped-client-controls",
+    query: "How are individual MCP clients restricted?",
+    filter_topic: "eval-security",
+    filter_state: "active",
+    limit: 5,
+    positive_keys: ["client-scoping"]
+  },
+  {
+    case_id: "duplicate-write-window",
+    query: "How long are repeated memory writes deduplicated?",
+    filter_topic: "eval-writes",
+    filter_state: "active",
+    limit: 5,
+    positive_keys: ["write-fingerprint"]
+  },
+  {
+    case_id: "consolidation-effects",
+    query: "What happens to source records after consolidation?",
+    filter_topic: "eval-lifecycle",
+    filter_state: "active",
+    limit: 5,
+    positive_keys: ["consolidation"]
+  }
+];
+
+await main();
diff --git a/functions/src/app.ts b/functions/src/app.ts
index 680dc70..c4b2aa4 100644
--- a/functions/src/app.ts
+++ b/functions/src/app.ts
@@ -127,6 +127,10 @@ function registerMcpRoutes(
       clientId: profile.id,
       serviceName: runtime.config.serviceName,
       serviceVersion: runtime.config.serviceVersion,
+      memoryCollection: runtime.config.memoryCollection,
+      retrievalEventLoggingEnabled:
+        runtime.config.retrievalEventLoggingEnabled,
+      topK: runtime.config.topK,
       defaultFilterState: selectDefaultFilterState(runtime.config, profile),
       allowedTools: profile.allowedTools,
       allowedFilterStates: profile.allowedFilterStates
diff --git a/functions/src/config.ts b/functions/src/config.ts
index 86f7924..b0d7022 100644
--- a/functions/src/config.ts
+++ b/functions/src/config.ts
@@ -24,12 +24,35 @@ export interface AppConfig {
   generationVertexLocation: string;
   embeddingDimensions: number;
   memoryCollection: string;
+  retrievalEventLoggingEnabled: boolean;
   topK: number;
   defaultFilterState: BranchState;
   defaultClientProfile: ClientProfile;
   clientProfiles: ClientProfile[];
 }
 
+function parseBoolean(
+  value: string | undefined,
+  fallback: boolean,
+  key: string
+): boolean {
+  if (!value?.trim()) {
+    return fallback;
+  }
+
+  const normalized = value.trim().toLowerCase();
+
+  if (normalized === "true") {
+    return true;
+  }
+
+  if (normalized === "false") {
+    return false;
+  }
+
+  throw new MissingConfigurationError(`${key} must be true or false when provided`);
+}
+
 export class MissingConfigurationError extends Error {
   constructor(message: string) {
     super(message);
@@ -276,6 +299,11 @@ export function loadConfig(env: NodeJS.ProcessEnv = process.env): AppConfig {
       "GEMINI_EMBEDDING_DIMENSIONS"
     ),
     memoryCollection: env.MEMORY_COLLECTION?.trim() || "memory_vectors",
+    retrievalEventLoggingEnabled: parseBoolean(
+      env.RETRIEVAL_EVENT_LOGGING_ENABLED,
+      false,
+      "RETRIEVAL_EVENT_LOGGING_ENABLED"
+    ),
     topK: parsePositiveInteger(env.SEARCH_RESULT_LIMIT, 5, "SEARCH_RESULT_LIMIT"),
     defaultFilterState: defaultFilterState as BranchState,
     defaultClientProfile,
diff --git a/functions/src/mcpServer.ts b/functions/src/mcpServer.ts
index 2593c9d..7006a7f 100644
--- a/functions/src/mcpServer.ts
+++ b/functions/src/mcpServer.ts
@@ -4,7 +4,10 @@ import * as z from "zod/v4";
 import type { AppConfig } from "./config.js";
 import { HttpError } from "./errors.js";
 import { normalizeOptionalText } from "./normalize.js";
-import type { ToolCallObserver } from "./observability.js";
+import type {
+  RetrievalEventInput,
+  ToolCallObserver
+} from "./observability.js";
 import {
   buildConsolidatePayload,
   buildDeprecatePayload,
@@ -21,7 +24,15 @@ import {
 
 export function createMetaCortexMcpServer(
   service: MetaCortexService,
-  config: Pick<AppConfig, "serviceName" | "serviceVersion" | "defaultFilterState"> & {
+  config: Pick<
+    AppConfig,
+    | "serviceName"
+    | "serviceVersion"
+    | "defaultFilterState"
+    | "memoryCollection"
+    | "retrievalEventLoggingEnabled"
+    | "topK"
+  > & {
     observer: ToolCallObserver;
     clientId: string;
     allowedTools: readonly McpToolName[];
@@ -161,7 +172,11 @@ export function createMetaCortexMcpServer(
     toolName: McpToolName,
     requestSummary: Record<string, unknown>,
     run: () => Promise<Result>,
-    summarizeResult: (result: Result) => Record<string, unknown>
+    summarizeResult: (result: Result) => Record<string, unknown>,
+    retrieval?: {
+      request: RetrievalEventInput;
+      summarizeResult: (result: Result) => Partial<RetrievalEventInput>;
+    }
   ): Promise<Result> => {
     const startedAt = Date.now();
 
@@ -174,7 +189,15 @@ export function createMetaCortexMcpServer(
         status: "success",
         latency_ms: Date.now() - startedAt,
         request: requestSummary,
-        response: summarizeResult(result)
+        response: summarizeResult(result),
+        ...(retrieval
+          ? {
+              retrieval: {
+                ...retrieval.request,
+                ...retrieval.summarizeResult(result)
+              } as RetrievalEventInput
+            }
+          : {})
       });
 
       return result;
@@ -185,6 +208,7 @@ export function createMetaCortexMcpServer(
         status: "error",
         latency_ms: Date.now() - startedAt,
         request: requestSummary,
+        ...(retrieval ? { retrieval: retrieval.request } : {}),
         error: summarizeToolError(error)
       });
 
@@ -262,10 +286,12 @@ export function createMetaCortexMcpServer(
       },
       async args => {
         const requestedFilterState = args.filter_state ?? config.defaultFilterState;
+        const normalizedFilterTopic = normalizeOptionalText(args.filter_topic);
+        const requestedLimit = args.limit ?? config.topK;
         const requestSummary = {
           query_preview: truncateText(args.query),
           query_length: args.query.trim().length,
-          filter_topic: normalizeOptionalText(args.filter_topic),
+          filter_topic: normalizedFilterTopic,
           filter_state: requestedFilterState,
           limit: args.limit
         };
@@ -287,7 +313,34 @@ export function createMetaCortexMcpServer(
             result_ids: searchResult.matches.map(match => match.id),
             filter_state: searchResult.appliedFilters.filter_state,
             filter_topic: searchResult.appliedFilters.filter_topic
-          })
+          }),
+          config.retrievalEventLoggingEnabled
+            ? {
+                request: {
+                  event_type: "search",
+                  memory_collection: config.memoryCollection,
+                  query: args.query.trim(),
+                  filter_topic: normalizedFilterTopic,
+                  filter_state: requestedFilterState,
+                  limit: requestedLimit
+                },
+                summarizeResult: searchResult => ({
+                  result_count: searchResult.matches.length,
+                  results: searchResult.matches.map((match, index) => ({
+                    id: match.id,
+                    rank: index + 1,
+                    ...(typeof match.distance === "number"
+                      ? {
+                          score: Math.max(
+                            0,
+                            Number((1 - match.distance).toFixed(6))
+                          )
+                        }
+                      : {})
+                  }))
+                })
+              }
+            : undefined
         );
 
         return {
@@ -336,7 +389,17 @@ export function createMetaCortexMcpServer(
             branch_state: fetched.item.metadata.branch_state,
             modality: fetched.item.metadata.modality,
             artifact_ref_count: fetched.item.metadata.artifact_refs?.length ?? 0
-          })
+          }),
+          config.retrievalEventLoggingEnabled
+            ? {
+                request: {
+                  event_type: "fetch",
+                  memory_collection: config.memoryCollection,
+                  memory_id: args.id ?? args.document_id ?? ""
+                },
+                summarizeResult: () => ({ found: true })
+              }
+            : undefined
         );
 
         return {
diff --git a/functions/src/observability.ts b/functions/src/observability.ts
index 264745c..3b0f86b 100644
--- a/functions/src/observability.ts
+++ b/functions/src/observability.ts
@@ -5,6 +5,7 @@ import { Firestore } from "firebase-admin/firestore";
 import type { McpToolName } from "./types.js";
 
 export const MEMORY_EVENT_COLLECTION = "memory_events";
+export const RETRIEVAL_EVENT_COLLECTION = "retrieval_query_events";
 const MEMORY_EVENT_TTL_MS = 90 * 24 * 60 * 60 * 1000;
 
 export type RequestEventReason =
@@ -31,6 +32,48 @@ export interface ToolCallEvent {
   error?: ToolCallEventError;
 }
 
+export interface RankedRetrievalResult {
+  id: string;
+  rank: number;
+  score?: number;
+}
+
+export interface SearchRetrievalEventInput {
+  event_type: "search";
+  memory_collection: string;
+  query: string;
+  filter_topic?: string;
+  filter_state: string;
+  limit: number;
+  result_count?: number;
+  results?: RankedRetrievalResult[];
+}
+
+export interface FetchRetrievalEventInput {
+  event_type: "fetch";
+  memory_collection: string;
+  memory_id: string;
+  found?: boolean;
+}
+
+export type RetrievalEventInput =
+  | SearchRetrievalEventInput
+  | FetchRetrievalEventInput;
+
+interface RetrievalEventBase {
+  event_id: string;
+  tool_event_id: string;
+  client_id: string;
+  status: "success" | "error";
+  timestamp: number;
+  expires_at: Date;
+  latency_ms?: number;
+  memory_collection: string;
+  error?: ToolCallEventError;
+}
+
+export type RetrievalEvent = RetrievalEventBase & RetrievalEventInput;
+
 export interface RequestEvent {
   event_id: string;
   event_type: "request";
@@ -55,6 +98,7 @@ export interface RecordToolCallEventInput {
   request: Record<string, unknown>;
   response?: Record<string, unknown>;
   error?: ToolCallEventError;
+  retrieval?: RetrievalEventInput;
   timestamp?: number;
 }
 
@@ -98,7 +142,35 @@ export class FirestoreToolCallObserver implements ToolCallObserver {
       ...(input.error ? { error: input.error } : {})
     };
 
-    await this.persist("metaCortexMcp tool event", event);
+    const writes = [
+      this.persist("metaCortexMcp tool event", event, this.collectionName)
+    ];
+
+    if (input.retrieval) {
+      const retrievalEvent: RetrievalEvent = {
+        event_id: randomUUID(),
+        tool_event_id: event.event_id,
+        client_id: input.client_id,
+        status: input.status,
+        timestamp,
+        expires_at: new Date(timestamp + MEMORY_EVENT_TTL_MS),
+        ...(typeof input.latency_ms === "number"
+          ? { latency_ms: input.latency_ms }
+          : {}),
+        ...input.retrieval,
+        ...(input.error ? { error: input.error } : {})
+      };
+      writes.push(
+        this.persist(
+          "metaCortexMcp retrieval event",
+          retrievalEvent,
+          RETRIEVAL_EVENT_COLLECTION,
+          false
+        )
+      );
+    }
+
+    await Promise.all(writes);
   }
 
   async recordRequest(input: RecordRequestEventInput): Promise<void> {
@@ -119,17 +191,24 @@ export class FirestoreToolCallObserver implements ToolCallObserver {
         : {})
     };
 
-    await this.persist("metaCortexMcp request event", event);
+    await this.persist("metaCortexMcp request event", event, this.collectionName);
   }
 
-  private async persist(message: string, event: ObservabilityEvent): Promise<void> {
-    const sanitizedEvent = stripUndefined(event) as ObservabilityEvent;
+  private async persist(
+    message: string,
+    event: ObservabilityEvent | RetrievalEvent,
+    collectionName: string,
+    logToConsole = true
+  ): Promise<void> {
+    const sanitizedEvent = stripUndefined(event) as ObservabilityEvent | RetrievalEvent;
 
-    console.info(message, sanitizedEvent);
+    if (logToConsole) {
+      console.info(message, sanitizedEvent);
+    }
 
     try {
       await this.firestore
-        .collection(this.collectionName)
+        .collection(collectionName)
         .doc(sanitizedEvent.event_id)
         .set(sanitizedEvent);
     } catch (error) {
diff --git a/functions/src/retrievalEvaluation.ts b/functions/src/retrievalEvaluation.ts
new file mode 100644
index 0000000..18359be
--- /dev/null
+++ b/functions/src/retrievalEvaluation.ts
@@ -0,0 +1,328 @@
+import type { Firestore } from "firebase-admin/firestore";
+
+import type { RetrievalEvent } from "./observability.js";
+import type { BranchState } from "./types.js";
+
+export const RETRIEVAL_EVAL_COLLECTION = "retrieval_eval_cases";
+const FIRESTORE_BATCH_LIMIT = 450;
+
+export type RetrievalEvalTargetMode = "isolated" | "production";
+export type RetrievalEvalSource = "synthetic_flow" | "observed_events";
+
+export interface RetrievalEvalCase {
+  schema_version: 1;
+  case_id: string;
+  target_mode: RetrievalEvalTargetMode;
+  source: RetrievalEvalSource;
+  query: string;
+  filters: {
+    filter_topic?: string | null;
+    filter_state: BranchState;
+  };
+  limit: number;
+  memory_collection: string;
+  positive_ids: string[];
+  label_source: "implicit_fetch";
+  created_at: number;
+  updated_at: number;
+}
+
+export interface RetrievalEvalCaseStore {
+  deleteCases(
+    targetMode: RetrievalEvalTargetMode,
+    source: RetrievalEvalSource
+  ): Promise<void>;
+  writeCases(cases: readonly RetrievalEvalCase[]): Promise<void>;
+  listCases(targetMode: RetrievalEvalTargetMode): Promise<RetrievalEvalCase[]>;
+}
+
+export class FirestoreRetrievalEvalCaseStore implements RetrievalEvalCaseStore {
+  constructor(
+    private readonly firestore: Firestore,
+    private readonly collectionName = RETRIEVAL_EVAL_COLLECTION
+  ) {}
+
+  async deleteCases(
+    targetMode: RetrievalEvalTargetMode,
+    source: RetrievalEvalSource
+  ): Promise<void> {
+    const snapshot = await this.firestore
+      .collection(this.collectionName)
+      .where("target_mode", "==", targetMode)
+      .where("source", "==", source)
+      .get();
+
+    for (const documents of chunk(snapshot.docs, FIRESTORE_BATCH_LIMIT)) {
+      const batch = this.firestore.batch();
+
+      for (const document of documents) {
+        batch.delete(document.ref);
+      }
+
+      await batch.commit();
+    }
+  }
+
+  async writeCases(cases: readonly RetrievalEvalCase[]): Promise<void> {
+    for (const caseChunk of chunk(cases, FIRESTORE_BATCH_LIMIT)) {
+      const batch = this.firestore.batch();
+
+      for (const evalCase of caseChunk) {
+        batch.set(
+          this.firestore.collection(this.collectionName).doc(evalCase.case_id),
+          evalCase
+        );
+      }
+
+      await batch.commit();
+    }
+  }
+
+  async listCases(targetMode: RetrievalEvalTargetMode): Promise<RetrievalEvalCase[]> {
+    const snapshot = await this.firestore
+      .collection(this.collectionName)
+      .where("target_mode", "==", targetMode)
+      .get();
+
+    return snapshot.docs
+      .map(document => document.data() as RetrievalEvalCase)
+      .filter(evalCase => evalCase.schema_version === 1)
+      .sort((left, right) => left.case_id.localeCompare(right.case_id));
+  }
+}
+
+export async function replaceRetrievalEvalCases(
+  store: RetrievalEvalCaseStore,
+  targetMode: RetrievalEvalTargetMode,
+  source: RetrievalEvalSource,
+  cases: readonly RetrievalEvalCase[]
+): Promise<void> {
+  await store.deleteCases(targetMode, source);
+  await store.writeCases(cases);
+}
+
+export interface SyntheticEvalDefinition {
+  case_id: string;
+  query: string;
+  filter_topic?: string;
+  filter_state: BranchState;
+  limit: number;
+  positive_keys: string[];
+}
+
+export function buildSyntheticEvalCases(input: {
+  definitions: readonly SyntheticEvalDefinition[];
+  memoryIdsByKey: ReadonlyMap<string, string>;
+  memoryCollection: string;
+  timestamp: number;
+}): RetrievalEvalCase[] {
+  return input.definitions.map(definition => ({
+    schema_version: 1,
+    case_id: definition.case_id,
+    target_mode: "isolated",
+    source: "synthetic_flow",
+    query: definition.query,
+    filters: {
+      filter_topic: definition.filter_topic ?? null,
+      filter_state: definition.filter_state
+    },
+    limit: definition.limit,
+    memory_collection: input.memoryCollection,
+    positive_ids: definition.positive_keys.map(key => {
+      const memoryId = input.memoryIdsByKey.get(key);
+
+      if (!memoryId) {
+        throw new Error(`Synthetic eval definition references unknown memory key: ${key}`);
+      }
+
+      return memoryId;
+    }),
+    label_source: "implicit_fetch",
+    created_at: input.timestamp,
+    updated_at: input.timestamp
+  }));
+}
+
+export function correlateImplicitFetches(
+  events: readonly RetrievalEvent[],
+  options: { maxDelayMs?: number } = {}
+): RetrievalEvalCase[] {
+  const maxDelayMs = options.maxDelayMs ?? 30 * 60 * 1000;
+  const searches = events
+    .filter(
+      (event): event is Extract<RetrievalEvent, { event_type: "search" }> =>
+        event.event_type === "search" && event.status === "success"
+    )
+    .sort((left, right) => left.timestamp - right.timestamp);
+  const fetches = events
+    .filter(
+      (event): event is Extract<RetrievalEvent, { event_type: "fetch" }> =>
+        event.event_type === "fetch" &&
+        event.status === "success" &&
+        event.found === true
+    )
+    .sort((left, right) => left.timestamp - right.timestamp);
+  const matches = new Map<
+    string,
+    { search: (typeof searches)[number]; positiveIds: Set<string>; updatedAt: number }
+  >();
+
+  for (const fetch of fetches) {
+    const search = searches
+      .filter(candidate =>
+        candidate.client_id === fetch.client_id &&
+        candidate.memory_collection === fetch.memory_collection &&
+        candidate.timestamp <= fetch.timestamp &&
+        fetch.timestamp - candidate.timestamp <= maxDelayMs &&
+        candidate.results?.some(result => result.id === fetch.memory_id)
+      )
+      .at(-1);
+
+    if (!search) {
+      continue;
+    }
+
+    const existing = matches.get(search.event_id);
+
+    if (existing) {
+      existing.positiveIds.add(fetch.memory_id);
+      existing.updatedAt = Math.max(existing.updatedAt, fetch.timestamp);
+    } else {
+      matches.set(search.event_id, {
+        search,
+        positiveIds: new Set([fetch.memory_id]),
+        updatedAt: fetch.timestamp
+      });
+    }
+  }
+
+  return [...matches.values()]
+    .map(({ search, positiveIds, updatedAt }) => ({
+      schema_version: 1 as const,
+      case_id: `observed-${search.event_id.replaceAll("/", "-")}`,
+      target_mode: "production" as const,
+      source: "observed_events" as const,
+      query: search.query,
+      filters: {
+        filter_topic: search.filter_topic ?? null,
+        filter_state: search.filter_state as BranchState
+      },
+      limit: search.limit,
+      memory_collection: search.memory_collection,
+      positive_ids: [...positiveIds],
+      label_source: "implicit_fetch" as const,
+      created_at: search.timestamp,
+      updated_at: updatedAt
+    }))
+    .sort((left, right) => left.case_id.localeCompare(right.case_id));
+}
+
+export interface RetrievalEvalObservation {
+  case_id: string;
+  positive_ids: string[];
+  returned_ids: string[];
+  latency_ms: number;
+}
+
+export interface RetrievalEvalMetrics {
+  case_count: number;
+  hit_at_k: Record<string, number>;
+  recall_at_k: Record<string, number>;
+  mrr: number;
+  zero_result_count: number;
+  zero_result_rate: number;
+  latency_ms: {
+    p50: number;
+    p95: number;
+  };
+  observations: RetrievalEvalObservation[];
+}
+
+export function computeRetrievalEvalMetrics(
+  observations: readonly RetrievalEvalObservation[],
+  cutoffs: readonly number[] = [1, 5]
+): RetrievalEvalMetrics {
+  const normalizedCutoffs = [...new Set(cutoffs)]
+    .filter(cutoff => Number.isInteger(cutoff) && cutoff > 0)
+    .sort((left, right) => left - right);
+  const hitAtK: Record<string, number> = {};
+  const recallAtK: Record<string, number> = {};
+
+  for (const cutoff of normalizedCutoffs) {
+    const hitTotal = observations.reduce((total, observation) => {
+      const positiveIds = new Set(observation.positive_ids);
+      const hit = observation.returned_ids
+        .slice(0, cutoff)
+        .some(id => positiveIds.has(id));
+      return total + Number(hit);
+    }, 0);
+    const recallTotal = observations.reduce((total, observation) => {
+      const positiveIds = new Set(observation.positive_ids);
+
+      if (positiveIds.size === 0) {
+        return total;
+      }
+
+      const retrievedPositiveCount = new Set(
+        observation.returned_ids
+          .slice(0, cutoff)
+          .filter(id => positiveIds.has(id))
+      ).size;
+      return total + retrievedPositiveCount / positiveIds.size;
+    }, 0);
+
+    hitAtK[String(cutoff)] = safeAverage(hitTotal, observations.length);
+    recallAtK[String(cutoff)] = safeAverage(recallTotal, observations.length);
+  }
+
+  const reciprocalRankTotal = observations.reduce((total, observation) => {
+    const positiveIds = new Set(observation.positive_ids);
+    const index = observation.returned_ids.findIndex(id => positiveIds.has(id));
+    return total + (index === -1 ? 0 : 1 / (index + 1));
+  }, 0);
+  const zeroResultCount = observations.filter(
+    observation => observation.returned_ids.length === 0
+  ).length;
+  const latencies = observations.map(observation => observation.latency_ms);
+
+  return {
+    case_count: observations.length,
+    hit_at_k: hitAtK,
+    recall_at_k: recallAtK,
+    mrr: safeAverage(reciprocalRankTotal, observations.length),
+    zero_result_count: zeroResultCount,
+    zero_result_rate: safeAverage(zeroResultCount, observations.length),
+    latency_ms: {
+      p50: percentile(latencies, 0.5),
+      p95: percentile(latencies, 0.95)
+    },
+    observations: observations.map(observation => ({
+      ...observation,
+      latency_ms: Number(observation.latency_ms.toFixed(3))
+    }))
+  };
+}
+
+function safeAverage(total: number, count: number): number {
+  return count === 0 ? 0 : Number((total / count).toFixed(6));
+}
+
+function percentile(values: readonly number[], percentileValue: number): number {
+  if (values.length === 0) {
+    return 0;
+  }
+
+  const sorted = [...values].sort((left, right) => left - right);
+  const index = Math.max(0, Math.ceil(percentileValue * sorted.length) - 1);
+  return Number(sorted[index]!.toFixed(3));
+}
+
+function chunk<T>(items: readonly T[], size: number): T[][] {
+  const chunks: T[][] = [];
+
+  for (let index = 0; index < items.length; index += size) {
+    chunks.push(items.slice(index, index + size));
+  }
+
+  return chunks;
+}
diff --git a/functions/test/config.test.ts b/functions/test/config.test.ts
index fd0a22a..aeec890 100644
--- a/functions/test/config.test.ts
+++ b/functions/test/config.test.ts
@@ -23,6 +23,7 @@ describe("loadConfig", () => {
     expect(config.mergeModel).toBe("gemini-3.5-flash");
     expect(config.generationVertexLocation).toBe("global");
     expect(config.embeddingDimensions).toBe(768);
+    expect(config.retrievalEventLoggingEnabled).toBe(false);
     expect(config.defaultFilterState).toBe("active");
     expect(config.topK).toBe(5);
     expect(config.defaultClientProfile.allowedTools).toContain("search_context");
@@ -45,6 +46,23 @@ describe("loadConfig", () => {
     ).toThrowError(MissingConfigurationError);
   });
 
+  it("loads and validates retrieval event logging", () => {
+    const config = loadConfig({
+      [geminiApiKeyEnv]: accessCredential("gemini"),
+      [adminTokenEnv]: accessCredential("admin"),
+      RETRIEVAL_EVENT_LOGGING_ENABLED: "true"
+    });
+
+    expect(config.retrievalEventLoggingEnabled).toBe(true);
+    expect(() =>
+      loadConfig({
+        [geminiApiKeyEnv]: accessCredential("gemini"),
+        [adminTokenEnv]: accessCredential("admin"),
+        RETRIEVAL_EVENT_LOGGING_ENABLED: "yes"
+      })
+    ).toThrowError(MissingConfigurationError);
+  });
+
   it("loads scoped client profiles", () => {
     const config = loadConfig({
       [geminiApiKeyEnv]: accessCredential("gemini"),
diff --git a/functions/test/mcp.integration.test.ts b/functions/test/mcp.integration.test.ts
index 46ddee4..c863678 100644
--- a/functions/test/mcp.integration.test.ts
+++ b/functions/test/mcp.integration.test.ts
@@ -322,6 +322,7 @@ describe("MCP integration", () => {
         status_code: 403
       }
     });
+    expect(runtime.observer.listRetrievalEvents()).toEqual([]);
   });
 
   it("consolidates wip memories via consolidate_context tool", async () => {
@@ -467,6 +468,7 @@ describe("MCP integration", () => {
 
   it("supports ChatGPT web remember, search, and fetch flows", async () => {
     const runtime = createTestRuntime({
+      retrievalEventLoggingEnabled: true,
       clientProfiles: [
         {
           id: "chatgpt-web",
@@ -627,6 +629,34 @@ describe("MCP integration", () => {
         }
       }
     ]);
+    expect(runtime.observer.listRetrievalEvents()).toMatchObject([
+      {
+        event_type: "search",
+        client_id: "chatgpt-web",
+        status: "success",
+        memory_collection: "memory_vectors",
+        query: "shared networking for android and ios",
+        filter_topic: "kmp-networking",
+        filter_state: "active",
+        limit: 5,
+        result_count: 1,
+        results: [
+          {
+            id: "memory-1",
+            rank: 1,
+            score: expect.any(Number)
+          }
+        ]
+      },
+      {
+        event_type: "fetch",
+        client_id: "chatgpt-web",
+        status: "success",
+        memory_collection: "memory_vectors",
+        memory_id: "memory-1",
+        found: true
+      }
+    ]);
   });
 });
 
diff --git a/functions/test/retrievalEvaluation.test.ts b/functions/test/retrievalEvaluation.test.ts
new file mode 100644
index 0000000..38507b9
--- /dev/null
+++ b/functions/test/retrievalEvaluation.test.ts
@@ -0,0 +1,196 @@
+import { describe, expect, it } from "vitest";
+
+import type { RetrievalEvent } from "../src/observability.js";
+import {
+  buildSyntheticEvalCases,
+  computeRetrievalEvalMetrics,
+  correlateImplicitFetches,
+  replaceRetrievalEvalCases,
+  type RetrievalEvalCase,
+  type RetrievalEvalCaseStore,
+  type RetrievalEvalSource,
+  type RetrievalEvalTargetMode
+} from "../src/retrievalEvaluation.js";
+
+describe("retrieval evaluation", () => {
+  it("builds synthetic cases with resolved positive ids", () => {
+    const cases = buildSyntheticEvalCases({
+      definitions: [
+        {
+          case_id: "networking",
+          query: "shared iOS networking",
+          filter_topic: "networking",
+          filter_state: "active",
+          limit: 5,
+          positive_keys: ["ktor"]
+        }
+      ],
+      memoryIdsByKey: new Map([["ktor", "memory-42"]]),
+      memoryCollection: "memory_vectors_eval",
+      timestamp: 100
+    });
+
+    expect(cases).toEqual([
+      expect.objectContaining({
+        case_id: "networking",
+        target_mode: "isolated",
+        source: "synthetic_flow",
+        positive_ids: ["memory-42"],
+        label_source: "implicit_fetch"
+      })
+    ]);
+  });
+
+  it("correlates successful fetches to the most recent search containing the id", () => {
+    const events: RetrievalEvent[] = [
+      searchEvent({ eventId: "search-1", timestamp: 100, resultIds: ["a", "b"] }),
+      searchEvent({ eventId: "search-2", timestamp: 200, resultIds: ["b"] }),
+      fetchEvent({ eventId: "fetch-1", timestamp: 250, memoryId: "b" }),
+      fetchEvent({ eventId: "fetch-2", timestamp: 260, memoryId: "missing" })
+    ];
+
+    expect(correlateImplicitFetches(events)).toEqual([
+      expect.objectContaining({
+        case_id: "observed-search-2",
+        query: "query search-2",
+        positive_ids: ["b"],
+        target_mode: "production",
+        source: "observed_events"
+      })
+    ]);
+  });
+
+  it("computes ranking, empty-result, and latency metrics", () => {
+    const metrics = computeRetrievalEvalMetrics([
+      {
+        case_id: "one",
+        positive_ids: ["a"],
+        returned_ids: ["x", "a"],
+        latency_ms: 10
+      },
+      {
+        case_id: "two",
+        positive_ids: ["b", "c"],
+        returned_ids: ["c"],
+        latency_ms: 20
+      },
+      {
+        case_id: "three",
+        positive_ids: ["d"],
+        returned_ids: [],
+        latency_ms: 100
+      }
+    ]);
+
+    expect(metrics).toMatchObject({
+      case_count: 3,
+      hit_at_k: { "1": 0.333333, "5": 0.666667 },
+      recall_at_k: { "1": 0.166667, "5": 0.5 },
+      mrr: 0.5,
+      zero_result_count: 1,
+      zero_result_rate: 0.333333,
+      latency_ms: { p50: 20, p95: 100 }
+    });
+  });
+
+  it("replaces a source partition instead of retaining obsolete cases", async () => {
+    const obsolete = evalCase("obsolete");
+    const retainedOtherSource = {
+      ...evalCase("observed"),
+      source: "observed_events" as const
+    };
+    const store = new InMemoryEvalCaseStore([obsolete, retainedOtherSource]);
+    const replacement = evalCase("replacement");
+
+    await replaceRetrievalEvalCases(
+      store,
+      "isolated",
+      "synthetic_flow",
+      [replacement]
+    );
+
+    expect(await store.listCases("isolated")).toEqual([
+      retainedOtherSource,
+      replacement
+    ]);
+  });
+});
+
+class InMemoryEvalCaseStore implements RetrievalEvalCaseStore {
+  constructor(private cases: RetrievalEvalCase[]) {}
+
+  async deleteCases(
+    targetMode: RetrievalEvalTargetMode,
+    source: RetrievalEvalSource
+  ): Promise<void> {
+    this.cases = this.cases.filter(
+      evalCase => evalCase.target_mode !== targetMode || evalCase.source !== source
+    );
+  }
+
+  async writeCases(cases: readonly RetrievalEvalCase[]): Promise<void> {
+    this.cases.push(...cases);
+  }
+
+  async listCases(targetMode: RetrievalEvalTargetMode): Promise<RetrievalEvalCase[]> {
+    return this.cases.filter(evalCase => evalCase.target_mode === targetMode);
+  }
+}
+
+function evalCase(caseId: string): RetrievalEvalCase {
+  return {
+    schema_version: 1,
+    case_id: caseId,
+    target_mode: "isolated",
+    source: "synthetic_flow",
+    query: caseId,
+    filters: { filter_topic: null, filter_state: "active" },
+    limit: 5,
+    memory_collection: "memory_vectors_eval",
+    positive_ids: [caseId],
+    label_source: "implicit_fetch",
+    created_at: 100,
+    updated_at: 100
+  };
+}
+
+function searchEvent(input: {
+  eventId: string;
+  timestamp: number;
+  resultIds: string[];
+}): RetrievalEvent {
+  return {
+    event_id: input.eventId,
+    tool_event_id: `tool-${input.eventId}`,
+    event_type: "search",
+    client_id: "client",
+    status: "success",
+    timestamp: input.timestamp,
+    expires_at: new Date(),
+    memory_collection: "memory_vectors",
+    query: `query ${input.eventId}`,
+    filter_state: "active",
+    limit: 5,
+    result_count: input.resultIds.length,
+    results: input.resultIds.map((id, index) => ({ id, rank: index + 1 }))
+  };
+}
+
+function fetchEvent(input: {
+  eventId: string;
+  timestamp: number;
+  memoryId: string;
+}): RetrievalEvent {
+  return {
+    event_id: input.eventId,
+    tool_event_id: `tool-${input.eventId}`,
+    event_type: "fetch",
+    client_id: "client",
+    status: "success",
+    timestamp: input.timestamp,
+    expires_at: new Date(),
+    memory_collection: "memory_vectors",
+    memory_id: input.memoryId,
+    found: true
+  };
+}
diff --git a/functions/test/support/fakes.ts b/functions/test/support/fakes.ts
index c7f86da..98878c8 100644
--- a/functions/test/support/fakes.ts
+++ b/functions/test/support/fakes.ts
@@ -11,6 +11,7 @@ import type {
   ObservabilityEvent,
   RecordToolCallEventInput,
   RecordRequestEventInput,
+  RetrievalEvent,
   ToolCallObserver
 } from "../../src/observability.js";
 import type {
@@ -218,12 +219,15 @@ export class InMemoryMemoryRepository implements MemoryRepository {
 export class InMemoryToolCallObserver implements ToolCallObserver {
   private nextId = 1;
   private readonly events: ObservabilityEvent[] = [];
+  private readonly retrievalEvents: RetrievalEvent[] = [];
 
   async record(input: RecordToolCallEventInput): Promise<void> {
     const timestamp = input.timestamp ?? Date.now();
 
+    const eventId = `event-${this.nextId++}`;
+
     this.events.push({
-      event_id: `event-${this.nextId++}`,
+      event_id: eventId,
       event_type: "tool_call",
       timestamp,
       expires_at: new Date(timestamp + 90 * 24 * 60 * 60 * 1000),
@@ -237,6 +241,22 @@ export class InMemoryToolCallObserver implements ToolCallObserver {
       ...(input.response ? { response: input.response } : {}),
       ...(input.error ? { error: input.error } : {})
     });
+
+    if (input.retrieval) {
+      this.retrievalEvents.push({
+        event_id: `retrieval-event-${this.nextId++}`,
+        tool_event_id: eventId,
+        timestamp,
+        expires_at: new Date(timestamp + 90 * 24 * 60 * 60 * 1000),
+        client_id: input.client_id,
+        status: input.status,
+        ...(typeof input.latency_ms === "number"
+          ? { latency_ms: input.latency_ms }
+          : {}),
+        ...input.retrieval,
+        ...(input.error ? { error: input.error } : {})
+      });
+    }
   }
 
   async recordRequest(input: RecordRequestEventInput): Promise<void> {
@@ -262,6 +282,10 @@ export class InMemoryToolCallObserver implements ToolCallObserver {
   listEvents(): ObservabilityEvent[] {
     return [...this.events];
   }
+
+  listRetrievalEvents(): RetrievalEvent[] {
+    return [...this.retrievalEvents];
+  }
 }
 
 export function createTestConfig(overrides: Partial<AppConfig> = {}): AppConfig {
@@ -285,6 +309,7 @@ export function createTestConfig(overrides: Partial<AppConfig> = {}): AppConfig
     generationVertexLocation: "global",
     embeddingDimensions: 768,
     memoryCollection: "memory_vectors",
+    retrievalEventLoggingEnabled: false,
     topK: 5,
     defaultFilterState: "active",
     defaultClientProfile: overrides.defaultClientProfile ?? defaultClientProfile,
diff --git a/scripts/deploy-firestore-ttl.sh b/scripts/deploy-firestore-ttl.sh
index 634eacf..08b1b65 100755
--- a/scripts/deploy-firestore-ttl.sh
+++ b/scripts/deploy-firestore-ttl.sh
@@ -30,6 +30,7 @@ done
 collection_groups=(
   "${MEMORY_COLLECTION}_write_fingerprints"
   "memory_events"
+  "retrieval_query_events"
 )
 
 for collection_group in "${collection_groups[@]}"; do