From 67e4586f6766007a276ea9bf8995d01dcb22aa87 Mon Sep 17 00:00:00 2001
From: os-zhuang <jack@objectstack.ai>
Date: Wed, 10 Jun 2026 08:17:44 +0500
Subject: [PATCH] docs(ws5): add Analytics Datasets guide (ADR-0021) + skill
 cross-link
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hand-written guide for the dataset semantic layer: why it exists (no joins /
metric drift / no source of truth), authoring (`defineDataset` with
dimensions/measures/derived/certified), binding dashboards (`dataset` +
`dimensions` + `values` + `filter`→runtimeFilter) and reports (`dataset` +
`rows` + `values` + `runtimeFilter`), cross-object joins via `include`, the
REST query surface, and the dual-form → single-form migration posture. Wired
into the Guides nav; objectstack-ui SKILL cross-links it and notes the
widget-filter → runtimeFilter rule. (Reference pages auto-generate from the
zod schemas.)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 content/docs/guides/analytics-datasets.mdx | 166 +++++++++++++++++++++
 content/docs/guides/meta.json              |   1 +
 skills/objectstack-ui/SKILL.md             |   7 +-
 3 files changed, 173 insertions(+), 1 deletion(-)
 create mode 100644 content/docs/guides/analytics-datasets.mdx

diff --git a/content/docs/guides/analytics-datasets.mdx b/content/docs/guides/analytics-datasets.mdx
new file mode 100644
index 000000000..d5e1b1f62
--- /dev/null
+++ b/content/docs/guides/analytics-datasets.mdx
@@ -0,0 +1,166 @@
+---
+title: Analytics Datasets
+description: The dataset semantic layer (ADR-0021) — define a metric once, bind reports and dashboards to it by name.
+---
+
+# Analytics Datasets
+
+> **Related ADR:** [ADR-0021 — Analytics: one semantic `dataset` layer](https://github.com/objectstack-ai/framework/blob/main/docs/adr/0021-analytics-dataset-semantic-layer.md)
+
+A **dataset** is a named, reusable analytical definition — a base object, the
+relationships to include, and the declared **dimensions** (groupable axes) and
+**measures** (aggregatable values). Reports and dashboards bind to a dataset *by
+reference* and select dimensions/measures *by name* — they never re-declare
+`object` / `field` / `aggregate` inline.
+
+This is the industry-convergent shape (Looker LookML, Power BI dataset+model,
+dbt metrics, Salesforce CRM-Analytics): **a governed semantic layer below; thin
+presentations above.**
+
+## Why a semantic layer
+
+Without one, the same metric is re-defined inline in every surface — a report's
+columns, a dashboard widget's `valueField` + `aggregate`, a list-view chart.
+That produces three defects fatal for an enterprise core system:
+
+1. **No joins** — "revenue by account region" needs `order ⋈ account`. An inline
+   single-object query can't reach it.
+2. **Metric drift** — "revenue" defined three times in three grammars diverges
+   across a report, a dashboard tile, and a list chart. A governance red line.
+3. **No source of truth** — no drill-through, no certification, no reuse.
+
+A dataset fixes all three: `revenue` is defined **once**, joins are derived from
+the object graph, and every surface references the same definition.
+
+## Authoring a dataset
+
+```ts
+// src/datasets/sales.dataset.ts
+import { defineDataset } from '@objectstack/spec/ui';
+
+export const SalesDataset = defineDataset({
+  name: 'sales',
+  label: 'Sales',
+  object: 'opportunity',
+
+  // Relationships to include BY NAME (lookup / master_detail field names).
+  // Joins are COMPILED from these — you never write an ON clause.
+  include: ['account'],
+
+  // Definition-level scope (the dataset's intrinsic filter).
+  filter: { is_deleted: { $ne: true } },
+
+  // Groupable axes — a base field, or a `relationship.field` path.
+  dimensions: [
+    { name: 'stage', field: 'stage', type: 'string' },
+    { name: 'region', field: 'account.region', type: 'string' },
+    { name: 'close_date', field: 'close_date', type: 'date', dateGranularity: 'month' },
+  ],
+
+  // Aggregatable values — defined ONCE here; referenced everywhere by name.
+  measures: [
+    { name: 'opp_count', aggregate: 'count' },
+    { name: 'revenue', aggregate: 'sum', field: 'amount', format: '$0,0', certified: true },
+    { name: 'won_amount', aggregate: 'sum', field: 'amount', filter: { stage: 'closed_won' } },
+    // Derived measure — references OTHER measures by name only (no raw fields/SQL).
+    { name: 'win_rate', derived: { op: 'ratio', of: ['won_amount', 'revenue'] }, format: '0.0%' },
+  ],
+});
+```
+
+Register it in your stack alongside `objects` / `dashboards`:
+
+```ts
+export default defineStack({
+  // ...
+  datasets: Object.values(datasets),
+});
+```
+
+### Key rules
+
+- **No raw SQL, no hand-authored joins.** The author declares *which*
+  relationships to include; the compiler derives the join from the object graph.
+- **`certified: true`** marks a human-blessed metric — the review checkpoint.
+  Reviewing AI output collapses to "did it use certified measures correctly."
+- **Derived measures** are first-class but *closed*: they reference other
+  measures by name only (`ratio` / `sum` / `difference` / `product`).
+- **RLS / tenant scoping is enforced by the runtime**, per joined object — never
+  declared in the dataset. There is one place to reason about access.
+
+## Binding a dashboard widget
+
+A widget selects dimensions/measures by name. Its presentation-scope `filter`
+flows into the query as the runtime filter:
+
+```ts
+{
+  id: 'revenue_by_stage',
+  type: 'bar',
+  title: 'Pipeline by Stage',
+  dataset: 'sales',
+  dimensions: ['stage'],      // X / group / split
+  values: ['revenue'],        // Y — the measure name, not amount+sum
+  filter: { stage: { $nin: ['closed_lost'] } }, // presentation scope (runtimeFilter)
+}
+```
+
+A `metric` (KPI) widget omits `dimensions` and shows the single measure value.
+
+## Binding a report
+
+```ts
+export const SalesByStageReport = {
+  name: 'sales_by_stage',
+  label: 'Sales by Stage',
+  dataset: 'sales',
+  rows: ['stage'],            // dimension names down
+  values: ['revenue'],        // measure names
+  runtimeFilter: { close_date: { $gte: '{current_quarter_start}' } },
+};
+```
+
+`rows` are the pivot's down-axis dimensions; `values` are measure names. A matrix
+report adds across-axis dimensions; `runtimeFilter` is the render-time scope
+(`{date-macro}` placeholders are resolved by the renderer before querying).
+
+## Cross-object joins
+
+Because the dataset's `include` compiles to the analytics runtime's join path,
+any report or widget can be multi-object **safely** — the headline enterprise
+capability the inline single-object query could never reach:
+
+```ts
+{ dataset: 'sales', dimensions: ['region'], values: ['revenue'] }
+// → revenue by account.region, joined + RLS-enforced per object.
+```
+
+## How it runs
+
+A dataset compiles to the **Cube analytics runtime** (`IAnalyticsService`). The
+REST surface is:
+
+```
+POST {basePath}/analytics/dataset/query
+{ datasetName: 'sales',
+  selection: { dimensions: ['stage'], measures: ['revenue'], runtimeFilter: {...} } }
+```
+
+The same governed path backs the Studio dataset preview, dashboard widgets, and
+dataset-bound reports — so the numbers match everywhere.
+
+## Migrating from inline queries
+
+ADR-0021's terminal state is **one** author-facing shape. The migration runs in
+two steps so it can be verified safely:
+
+1. **Dual-form (additive).** A report/widget keeps its legacy inline query AND
+   gains a `dataset` binding. A read-only reconciliation harness asserts both
+   forms return identical numbers (the financial-correctness gate).
+2. **Single-form (terminal).** Once every surface reconciles and `grep` shows no
+   inline residue, the inline query fields and `ListChartConfigSchema` are
+   removed and the union collapses to the single dataset shape.
+
+Author new analytics directly in dataset form; reach for a **named** dataset when
+a metric is shared or must be certified, and an inline anonymous dataset for a
+one-off single-object KPI.
diff --git a/content/docs/guides/meta.json b/content/docs/guides/meta.json
index 8fb77b65f..e601f0db2 100644
--- a/content/docs/guides/meta.json
+++ b/content/docs/guides/meta.json
@@ -9,6 +9,7 @@
     "seed-data",
     "common-patterns",
     "formula",
+    "analytics-datasets",
     "airtable-dashboard-analysis",
     "---Building---",
     "plugins",
diff --git a/skills/objectstack-ui/SKILL.md b/skills/objectstack-ui/SKILL.md
index e95ab5cac..69f3a4dc5 100644
--- a/skills/objectstack-ui/SKILL.md
+++ b/skills/objectstack-ui/SKILL.md
@@ -357,7 +357,12 @@ data-bound widget.
 For shared metrics, prefer the ADR-0021 dataset shape over per-widget inline
 queries. A widget binds to `dataset` and selects named `dimensions` and
 `values`; the dataset owns the base object, allowed joins, intrinsic filter,
-dimensions, and certified measures.
+dimensions, and certified measures. Reports bind the same way (`dataset` +
+`rows` + `values` + `runtimeFilter`). Full guide: **Guides → Analytics Datasets**
+(`content/docs/guides/analytics-datasets.mdx`).
+
+A widget's presentation-scope `filter` flows into the query as the runtime
+filter; keep `filter` on the widget when binding a dataset.
 
 ```typescript
 {