Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,11 @@ After deployment, there are three places to look:
- `memory_events` in Firestore shows client-attributed tool usage over time
- Cloud Logging shows request failures and structured tool-event logs

When `RETRIEVAL_EVENT_LOGGING_ENABLED=true`, `retrieval_query_events` also stores
full search queries, filters, limits, ranked result ids/scores, and fetch ids. This
collection is separate because the full queries may contain sensitive user text;
it uses the same 90-day TTL target as `memory_events`.

`memory_events` records one document per tool call and one document per ingress rejection. Events include:

- `client_id`
Expand Down Expand Up @@ -438,9 +443,35 @@ Search events do include a short `query_preview`, but the observability collecti
Retention is handled with Firestore TTL policies:

- `memory_events.expires_at` targets 90-day audit retention
- `retrieval_query_events.expires_at` targets 90-day retrieval telemetry retention
- `memory_vectors_write_fingerprints.expires_at` targets 30-day fingerprint retention
- fingerprint documents keep numeric `dedupe_expires_at` for the short duplicate-write window

## Retrieval evaluation

The evaluation corpus is stored in `retrieval_eval_cases`; isolated synthetic
memories are stored separately in `memory_vectors_eval`. Cases have no lifecycle
field: regeneration replaces the selected source partition and removes obsolete
cases.

```bash
# Seed a deterministic isolated corpus and replace its eval cases.
npm --prefix functions run eval:generate

# Measure hit@k, recall@k, MRR, empty results, and p50/p95 latency.
npm --prefix functions run eval:run -- --mode isolated

# Explicitly convert recent production search-to-fetch evidence into eval cases.
npm --prefix functions run eval:import -- --lookback-hours 168

# Run imported production cases through the deployed MCP endpoint.
npm --prefix functions run eval:run -- --mode production --url "$MCP_BASE_URL"
```

Production retrieval events are evidence only. They do not become benchmark cases
until `eval:import` is run, and a successful fetch is treated as a positive label;
the harness does not infer negative relevance judgments.

## Quick start

1. Install dependencies:
Expand Down
115 changes: 114 additions & 1 deletion firestore.indexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,120 @@
}
}
]
},
{
"collectionGroup": "memory_vectors_eval",
"queryScope": "COLLECTION",
"fields": [
{
"fieldPath": "metadata.module_name",
"order": "ASCENDING"
},
{
"fieldPath": "embedding",
"vectorConfig": {
"dimension": 768,
"flat": {}
}
}
]
},
{
"collectionGroup": "memory_vectors_eval",
"queryScope": "COLLECTION",
"fields": [
{
"fieldPath": "metadata.branch_state",
"order": "ASCENDING"
},
{
"fieldPath": "embedding",
"vectorConfig": {
"dimension": 768,
"flat": {}
}
}
]
},
{
"collectionGroup": "memory_vectors_eval",
"queryScope": "COLLECTION",
"fields": [
{
"fieldPath": "metadata.branch_state",
"order": "ASCENDING"
},
{
"fieldPath": "metadata.module_name",
"order": "ASCENDING"
},
{
"fieldPath": "embedding",
"vectorConfig": {
"dimension": 768,
"flat": {}
}
}
]
}
],
"fieldOverrides": []
"fieldOverrides": [
{
"collectionGroup": "memory_events",
"fieldPath": "expires_at",
"ttl": true,
"indexes": [
{
"order": "ASCENDING",
"queryScope": "COLLECTION"
},
{
"order": "DESCENDING",
"queryScope": "COLLECTION"
},
{
"arrayConfig": "CONTAINS",
"queryScope": "COLLECTION"
}
]
},
{
"collectionGroup": "memory_vectors_write_fingerprints",
"fieldPath": "expires_at",
"ttl": true,
"indexes": [
{
"order": "ASCENDING",
"queryScope": "COLLECTION"
},
{
"order": "DESCENDING",
"queryScope": "COLLECTION"
},
{
"arrayConfig": "CONTAINS",
"queryScope": "COLLECTION"
}
]
},
{
"collectionGroup": "retrieval_query_events",
"fieldPath": "expires_at",
"ttl": true,
"indexes": [
{
"order": "ASCENDING",
"queryScope": "COLLECTION"
},
{
"order": "DESCENDING",
"queryScope": "COLLECTION"
},
{
"arrayConfig": "CONTAINS",
"queryScope": "COLLECTION"
}
]
}
]
}
12 changes: 12 additions & 0 deletions firestore.rules
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,20 @@ service cloud.firestore {
match /memory_events/{document=**} {
allow read, write: if false;
}
match /retrieval_query_events/{document=**} {
allow read, write: if false;
}
match /retrieval_eval_cases/{document=**} {
allow read, write: if false;
}
match /memory_vectors_eval/{document=**} {
allow read, write: if false;
}
match /memory_vectors_write_fingerprints/{document=**} {
allow read, write: if false;
}
match /memory_vectors_eval_write_fingerprints/{document=**} {
allow read, write: if false;
}
}
}
3 changes: 3 additions & 0 deletions functions/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ GEMINI_MERGE_MODEL=gemini-3.5-flash
GEMINI_GENERATION_VERTEX_LOCATION=global
GEMINI_EMBEDDING_DIMENSIONS=768
MEMORY_COLLECTION=memory_vectors
# Opt in to full search queries, ranked ids/scores, and fetch ids for retrieval evaluation.
RETRIEVAL_EVENT_LOGGING_ENABLED=false
RETRIEVAL_EVAL_MEMORY_COLLECTION=memory_vectors_eval
SEARCH_RESULT_LIMIT=5
DEFAULT_FILTER_STATE=active
SERVICE_NAME=metacortex
Expand Down
3 changes: 3 additions & 0 deletions functions/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
"build": "tsc -p tsconfig.json",
"clean": "node -e \"const fs=require('fs'); fs.rmSync('lib',{recursive:true,force:true}); fs.rmSync('coverage',{recursive:true,force:true});\"",
"backfill:ttl": "node scripts/backfill-firestore-ttl.mjs",
"eval:generate": "tsx scripts/retrieval-eval.ts generate-isolated",
"eval:import": "tsx scripts/retrieval-eval.ts import-production",
"eval:run": "tsx scripts/retrieval-eval.ts run",
"serve": "cd .. && firebase emulators:start --only functions,firestore",
"shell": "firebase functions:shell",
"smoke": "node scripts/mcp-smoke-test.mjs",
Expand Down
13 changes: 7 additions & 6 deletions functions/scripts/mcp-smoke-test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ const imageMimeType = readArg(
process.env.MCP_IMAGE_MIME_TYPE ?? inferMimeType(imageFile)
);
const artifactRef = readArg("artifact-ref", process.env.MCP_ARTIFACT_REF);
const fetchFirst = readArg(
"fetch-first",
process.env.MCP_FETCH_FIRST ?? "false"
) === "true";

if (!url) {
console.error("Missing MCP base URL. Pass --url or set MCP_BASE_URL.");
Expand Down Expand Up @@ -157,10 +161,6 @@ try {
rememberedId = extractRememberedId(rememberText);
} else if (mode === "search-only") {
ensureTools(toolNames, ["search_context"]);

if (toolNames.includes("remember_context")) {
throw new Error("search-only mode expected remember_context to be unavailable");
}
} else {
throw new Error(`Unsupported smoke mode: ${mode}`);
}
Expand All @@ -178,12 +178,13 @@ try {
const searchText = requireSuccessfulToolResult(searchResult, "search_context");
console.log(searchText);

if (mode === "browser-read-write") {
if (mode === "browser-read-write" || fetchFirst) {
ensureTools(toolNames, ["fetch_context"]);
const memoryId = rememberedId ?? extractMemoryId(searchText);

if (!memoryId) {
throw new Error(
"browser-read-write mode expected remember_context or search_context to return an id"
"fetch-first expected remember_context or search_context to return an id"
);
}

Expand Down
Loading
Loading