From 93dc2ae068db34eaf33b90ec2d8f6a102488f60e Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 10:18:55 +0000 Subject: [PATCH 1/8] Add memory diagnostic benchmark with cross-parser comparison and regression detection Measures parse-only and parse+walk memory across Wallace, csstree, and postcss, broken down into heap vs external (ArrayBuffer) so arena overhead is visible separately. Also reports arena stats (node count, capacity, waste%, growths) to diagnose over-allocation. Run: node --expose-gc benchmark/memory.ts Save baseline: pnpm run benchmark:memory:baseline Compare branch vs baseline: pnpm run benchmark:memory Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01TADcY37pEm5JGh8YJ7UuVQ --- .gitignore | 3 +- benchmark/memory.ts | 348 ++++++++++++++++++++++++++++++++++++++++++++ package.json | 2 + 3 files changed, 352 insertions(+), 1 deletion(-) create mode 100644 benchmark/memory.ts diff --git a/.gitignore b/.gitignore index 87953c0..389104e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ node_modules dist coverage .claude/settings.local.json -package-lock.json \ No newline at end of file +package-lock.json +benchmark/memory-results.json \ No newline at end of file diff --git a/benchmark/memory.ts b/benchmark/memory.ts new file mode 100644 index 0000000..aeb5340 --- /dev/null +++ b/benchmark/memory.ts @@ -0,0 +1,348 @@ +// Memory diagnostics: parse / walk / parse+walk across parsers and files +// +// Usage: +// node --expose-gc benchmark/memory.ts +// node --expose-gc benchmark/memory.ts --save-baseline +// +// Writes benchmark/memory-results.json every run. +// If benchmark/memory-baseline.json exists, prints a regression diff. +// Save a new baseline with --save-baseline (e.g. after merging to main). + +/// +// @ts-expect-error: no type definitions for css-tree +import * as csstree from 'css-tree' +import * as fs from 'node:fs' +import * as path from 'node:path' +import * as postcss from 'postcss' +import { fileURLToPath } from 'node:url' +import { parse, walk } from '../dist/index.js' + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) + +// ── Config ──────────────────────────────────────────────────────────────────── + +/** Measurement iterations per cell (median is taken) */ +const ITERATIONS = 5 +/** Flag a regression if memory grows more than this fraction vs baseline */ +const REGRESSION_THRESHOLD = 0.08 + +const BYTES_PER_NODE = 32 + +// ── CSS files ───────────────────────────────────────────────────────────────── + +function read(rel: string): string { + return fs.readFileSync(path.join(__dirname, rel), 'utf-8') +} + +const CSS_FILES: Record = { + Small: read('small.css'), + Medium: read('medium.css'), + Bootstrap: read('../node_modules/bootstrap/dist/css/bootstrap.css'), + Tailwind: read('../node_modules/tailwindcss/dist/tailwind.css'), +} + +// ── GC / snapshot ───────────────────────────────────────────────────────────── + +const _gc = (globalThis as { gc?: () => void }).gc + +if (!_gc) { + console.error('Run with --expose-gc: node --expose-gc benchmark/memory.ts') + process.exit(1) +} + +function force_gc(rounds = 5): void { + for (let i = 0; i < rounds; i++) _gc!() +} + +interface Mem { + heap: number // JS heap only (objects, closures, strings) + external: number // ArrayBuffers, native-backed memory + total: number // heap + external + rss: number // resident set size (whole process) +} + +function snap(): Mem { + const m = process.memoryUsage() + return { heap: m.heapUsed, external: m.external, total: m.heapUsed + m.external, rss: m.rss } +} + +function diff(before: Mem, after: Mem): Mem { + return { + heap: after.heap - before.heap, + external: after.external - before.external, + total: after.total - before.total, + rss: after.rss - before.rss, + } +} + +/** Run fn ITERATIONS times, return the median Mem delta. */ +function measure(fn: () => void): Mem { + const deltas: Mem[] = [] + for (let i = 0; i < ITERATIONS; i++) { + force_gc() + const before = snap() + fn() + const after = snap() + deltas.push(diff(before, after)) + } + deltas.sort((a, b) => a.total - b.total) + return deltas[Math.floor(deltas.length / 2)] +} + +// ── Arena stats ─────────────────────────────────────────────────────────────── + +interface ArenaStats { + node_count: number + capacity: number + growth_count: number + used_kb: number + total_kb: number + waste_pct: number +} + +function arena_stats(root: ReturnType): ArenaStats { + // __get_arena() is @internal but stable for diagnostics + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const arena = (root as any).__get_arena() + const node_count: number = arena.get_count() + const capacity: number = arena.get_capacity() + const growth_count: number = arena.get_growth_count() + const used_kb = (node_count * BYTES_PER_NODE) / 1024 + const total_kb = (capacity * BYTES_PER_NODE) / 1024 + const waste_pct = ((capacity - node_count) / capacity) * 100 + return { node_count, capacity, growth_count, used_kb, total_kb, waste_pct } +} + +// ── Formatters ──────────────────────────────────────────────────────────────── + +function fmt_kb(bytes: number): string { + return `${(bytes / 1024).toFixed(1)} KB` +} + +function fmt_mb(bytes: number): string { + return `${(bytes / 1024 / 1024).toFixed(2)} MB` +} + +function fmt_pct(n: number): string { + return `${n >= 0 ? '+' : ''}${n.toFixed(1)}%` +} + +// ── Results structures ──────────────────────────────────────────────────────── + +interface PhaseRow { + heap: number + external: number + total: number +} + +interface FileResult { + size_bytes: number + // per parser: parse-only, parse+walk + wallace: { parse: PhaseRow; parse_walk: PhaseRow } + csstree: { parse: PhaseRow; parse_walk: PhaseRow } + postcss: { parse: PhaseRow; parse_walk: PhaseRow } + arena: ArenaStats +} + +// ── Main ────────────────────────────────────────────────────────────────────── + +const results: Record = {} + +for (const [name, css] of Object.entries(CSS_FILES)) { + process.stdout.write(`Measuring ${name}...`) + + // Parse-only + const w_parse = measure(() => { + parse(css) + }) + const c_parse = measure(() => { + csstree.parse(css, { positions: true }) + }) + const p_parse = measure(() => { + postcss.parse(css) + }) + + // Parse+Walk + const w_parse_walk = measure(() => { + const ast = parse(css) + walk(ast, (node) => { + void node.type + void node.line + }) + }) + const c_parse_walk = measure(() => { + const ast = csstree.parse(css, { positions: true }) + csstree.walk(ast, (node: { type: unknown; loc?: { start: { line: unknown } } }) => { + void node.type + void node.loc?.start.line + }) + }) + const p_parse_walk = measure(() => { + const root = postcss.parse(css) + root.walk((node) => { + void node.type + void node.source?.start?.line + }) + }) + + // Arena stats (single parse, outside measurement window) + const ast = parse(css) + const stats = arena_stats(ast) + + results[name] = { + size_bytes: css.length, + wallace: { parse: w_parse, parse_walk: w_parse_walk }, + csstree: { parse: c_parse, parse_walk: c_parse_walk }, + postcss: { parse: p_parse, parse_walk: p_parse_walk }, + arena: stats, + } + + console.log(' done') +} + +// ── Tables ──────────────────────────────────────────────────────────────────── + +console.log('\n── Parse-only memory: heap / external / total ───────────────────────────\n') +console.log('(heap = JS objects | external = ArrayBuffer / native | total = heap+external)\n') + +console.table( + Object.entries(results).map(([name, r]) => ({ + File: name, + Size: fmt_kb(r.size_bytes), + // Wallace splits cleanly: arena lives in external, JS objects in heap + 'W heap': fmt_kb(r.wallace.parse.heap), + 'W ext': fmt_kb(r.wallace.parse.external), + 'W total': fmt_mb(r.wallace.parse.total), + // csstree: all in heap (plain JS objects) + 'C heap': fmt_kb(r.csstree.parse.heap), + 'C ext': fmt_kb(r.csstree.parse.external), + 'C total': fmt_mb(r.csstree.parse.total), + // postcss: all in heap + 'P heap': fmt_kb(r.postcss.parse.heap), + 'P ext': fmt_kb(r.postcss.parse.external), + 'P total': fmt_mb(r.postcss.parse.total), + 'W vs C': r.csstree.parse.total > 0 + ? `${(r.csstree.parse.total / r.wallace.parse.total).toFixed(2)}x` + : 'N/A', + 'W vs P': r.postcss.parse.total > 0 + ? `${(r.postcss.parse.total / r.wallace.parse.total).toFixed(2)}x` + : 'N/A', + })), +) + +console.log('\n── Parse+Walk memory: heap / external / total ───────────────────────────\n') +console.log('(Walk creates new CSSNode wrappers per node — they show up in heap)\n') + +console.table( + Object.entries(results).map(([name, r]) => ({ + File: name, + Size: fmt_kb(r.size_bytes), + 'W heap': fmt_kb(r.wallace.parse_walk.heap), + 'W ext': fmt_kb(r.wallace.parse_walk.external), + 'W total': fmt_mb(r.wallace.parse_walk.total), + 'C heap': fmt_kb(r.csstree.parse_walk.heap), + 'C ext': fmt_kb(r.csstree.parse_walk.external), + 'C total': fmt_mb(r.csstree.parse_walk.total), + 'P heap': fmt_kb(r.postcss.parse_walk.heap), + 'P ext': fmt_kb(r.postcss.parse_walk.external), + 'P total': fmt_mb(r.postcss.parse_walk.total), + 'W vs C': r.csstree.parse_walk.total > 0 + ? `${(r.csstree.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` + : 'N/A', + 'W vs P': r.postcss.parse_walk.total > 0 + ? `${(r.postcss.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` + : 'N/A', + })), +) + +console.log('\n── Arena stats (Wallace) ────────────────────────────────────────────────\n') +console.log('(waste% = (capacity - node_count) / capacity — unused pre-allocated slots)\n') + +console.table( + Object.entries(results).map(([name, r]) => { + const a = r.arena + return { + File: name, + 'Nodes used': a.node_count.toLocaleString(), + 'Capacity': a.capacity.toLocaleString(), + 'Growths': a.growth_count, + 'Arena used': fmt_kb(a.used_kb * 1024), + 'Arena total': fmt_kb(a.total_kb * 1024), + 'Waste %': `${a.waste_pct.toFixed(1)}%`, + // nodes/KB of source = accuracy of NODES_PER_KB heuristic + 'Nodes/KB src': (a.node_count / (r.size_bytes / 1024)).toFixed(0), + } + }), +) + +// ── Save results ────────────────────────────────────────────────────────────── + +const results_path = path.join(__dirname, 'memory-results.json') +const baseline_path = path.join(__dirname, 'memory-baseline.json') + +const snapshot_data = { + timestamp: new Date().toISOString(), + node_version: process.version, + results, +} + +fs.writeFileSync(results_path, JSON.stringify(snapshot_data, null, 2)) +console.log(`\nResults saved → ${results_path}`) + +// ── Baseline comparison ─────────────────────────────────────────────────────── + +if (process.argv.includes('--save-baseline')) { + fs.writeFileSync(baseline_path, JSON.stringify(snapshot_data, null, 2)) + console.log(`Baseline saved → ${baseline_path}`) +} else if (fs.existsSync(baseline_path)) { + const baseline = JSON.parse(fs.readFileSync(baseline_path, 'utf-8')) + + console.log('\n── Regression vs baseline ───────────────────────────────────────────────\n') + console.log(`Baseline: ${baseline.timestamp} (Node ${baseline.node_version})`) + console.log(`Threshold: >${(REGRESSION_THRESHOLD * 100).toFixed(0)}% increase = REGRESSION\n`) + + const regressions: string[] = [] + const rows: Record[] = [] + + for (const [file, cur] of Object.entries(results) as [string, FileResult][]) { + const base = baseline.results[file] as FileResult | undefined + if (!base) continue + + type Parser = 'wallace' | 'csstree' | 'postcss' + type Phase = 'parse' | 'parse_walk' + + for (const parser of ['wallace', 'csstree', 'postcss'] as Parser[]) { + for (const phase of ['parse', 'parse_walk'] as Phase[]) { + const cur_total = cur[parser][phase].total + const base_total = base[parser][phase].total + if (base_total === 0) continue + + const pct_change = (cur_total - base_total) / base_total + const is_regression = pct_change > REGRESSION_THRESHOLD + const label = `${file} / ${parser} / ${phase}` + + if (is_regression) regressions.push(label) + + rows.push({ + Label: label, + Baseline: fmt_mb(base_total), + Current: fmt_mb(cur_total), + Delta: fmt_pct(pct_change * 100), + Status: is_regression ? '❌ REGRESSION' : pct_change < -0.02 ? '✅ improvement' : ' ok', + }) + } + } + } + + console.table(rows) + + if (regressions.length > 0) { + console.log(`\n❌ ${regressions.length} regression(s) detected:`) + for (const r of regressions) console.log(` - ${r}`) + process.exit(1) + } else { + console.log('✅ No regressions detected.') + } +} else { + console.log(`\nNo baseline found. Run with --save-baseline on your main branch to enable regression detection.`) +} diff --git a/package.json b/package.json index b0c06fc..5492c83 100644 --- a/package.json +++ b/package.json @@ -80,6 +80,8 @@ "test-build": "pnpm run build && vitest run --config vitest.config.build.ts", "build": "tsdown", "benchmark": "pnpm run build && node --expose-gc benchmark/index.ts", + "benchmark:memory": "pnpm run build && node --expose-gc benchmark/memory.ts", + "benchmark:memory:baseline": "pnpm run build && node --expose-gc benchmark/memory.ts --save-baseline", "lint": "oxlint --config .oxlintrc.json && oxfmt --check", "check": "tsc --noEmit", "knip": "knip", From 24ed56f5c19f6c5663e8406136e5b4a28d8133d3 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 13:41:38 +0000 Subject: [PATCH 2/8] Trim arena to exact node count after parsing Adds CSSDataArena.trim() which shrinks the backing ArrayBuffer to count * BYTES_PER_NODE, eliminating the 50-65% of wasted pre-allocated capacity that was visible as external memory on the parse result. Before: Bootstrap parse = 3336 KB external (106k-node pre-alloc for 37k actual) After: Bootstrap parse = 1174 KB external (37k * 32 bytes, zero waste) The NODES_PER_KB heuristic is still useful for avoiding arena growths during parsing; it just no longer determines the final allocation size. Also fixes the memory benchmark's measurement loop to hold the parse result in a module-level sink across the post-operation GC. A local `void result` is insufficient because V8's JIT can shorten the variable's lifetime before the GC call runs, causing the arena buffer to be collected and the delta to read near-zero. Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01TADcY37pEm5JGh8YJ7UuVQ --- benchmark/memory.ts | 39 ++++++++++++++++++++---------- src/arena.test.ts | 59 +++++++++++++++++++++++++++++++++------------ src/arena.ts | 12 +++++++++ src/parse.ts | 3 +++ 4 files changed, 84 insertions(+), 29 deletions(-) diff --git a/benchmark/memory.ts b/benchmark/memory.ts index aeb5340..042eeea 100644 --- a/benchmark/memory.ts +++ b/benchmark/memory.ts @@ -43,6 +43,11 @@ const CSS_FILES: Record = { // ── GC / snapshot ───────────────────────────────────────────────────────────── +// Module-level sink: assigning here prevents V8 from treating fn()'s return +// value as dead before force_gc() runs. A local `void result` is not enough +// because the JIT may determine it's a no-op and shorten the variable's lifetime. +let _measurement_sink: unknown = null + const _gc = (globalThis as { gc?: () => void }).gc if (!_gc) { @@ -75,14 +80,25 @@ function diff(before: Mem, after: Mem): Mem { } } -/** Run fn ITERATIONS times, return the median Mem delta. */ -function measure(fn: () => void): Mem { +/** + * Run fn ITERATIONS times, return the median retained-memory delta. + * + * fn must return the primary result object (the parse tree / root node). + * We store it in _measurement_sink (module scope) so V8 cannot shorten its + * lifetime before the post-operation GC runs. That GC collects dead + * temporaries (e.g. the pre-trim arena buffer) without collecting the live + * result. We clear the sink before the next iteration so the next baseline + * snapshot starts clean. + */ +function measure(fn: () => unknown): Mem { const deltas: Mem[] = [] for (let i = 0; i < ITERATIONS; i++) { force_gc() const before = snap() - fn() + _measurement_sink = fn() + force_gc() // collect dead temporaries; _measurement_sink keeps result alive const after = snap() + _measurement_sink = null // release before next iteration's baseline GC deltas.push(diff(before, after)) } deltas.sort((a, b) => a.total - b.total) @@ -152,23 +168,18 @@ for (const [name, css] of Object.entries(CSS_FILES)) { process.stdout.write(`Measuring ${name}...`) // Parse-only - const w_parse = measure(() => { - parse(css) - }) - const c_parse = measure(() => { - csstree.parse(css, { positions: true }) - }) - const p_parse = measure(() => { - postcss.parse(css) - }) + const w_parse = measure(() => parse(css)) + const c_parse = measure(() => csstree.parse(css, { positions: true })) + const p_parse = measure(() => postcss.parse(css)) - // Parse+Walk + // Parse+Walk — return the AST so the arena stays alive through the post-GC const w_parse_walk = measure(() => { const ast = parse(css) walk(ast, (node) => { void node.type void node.line }) + return ast }) const c_parse_walk = measure(() => { const ast = csstree.parse(css, { positions: true }) @@ -176,6 +187,7 @@ for (const [name, css] of Object.entries(CSS_FILES)) { void node.type void node.loc?.start.line }) + return ast }) const p_parse_walk = measure(() => { const root = postcss.parse(css) @@ -183,6 +195,7 @@ for (const [name, css] of Object.entries(CSS_FILES)) { void node.type void node.source?.start?.line }) + return root }) // Arena stats (single parse, outside measurement window) diff --git a/src/arena.test.ts b/src/arena.test.ts index fe1f6bd..ba09ed9 100644 --- a/src/arena.test.ts +++ b/src/arena.test.ts @@ -321,16 +321,49 @@ describe('CSSDataArena', () => { }) }) + describe('trim', () => { + test('should set capacity equal to count', () => { + const arena = new CSSDataArena(100) + arena.create_node(STYLESHEET, 0, 0, 1, 1) + arena.create_node(DECLARATION, 0, 0, 1, 1) + expect(arena.get_capacity()).toBe(100) + arena.trim() + expect(arena.get_capacity()).toBe(arena.get_count()) + }) + + test('should preserve all node data after trim', () => { + const arena = new CSSDataArena(100) + const n1 = arena.create_node(STYLESHEET, 10, 500, 1, 1) + const n2 = arena.create_node(DECLARATION, 20, 30, 2, 5) + arena.set_flag(n2, FLAG_IMPORTANT) + arena.trim() + + expect(arena.get_type(n1)).toBe(STYLESHEET) + expect(arena.get_start_offset(n1)).toBe(10) + expect(arena.get_type(n2)).toBe(DECLARATION) + expect(arena.get_start_offset(n2)).toBe(20) + expect(arena.has_flag(n2, FLAG_IMPORTANT)).toBe(true) + }) + + test('should be a no-op when already tight', () => { + const arena = new CSSDataArena(2) + arena.create_node(STYLESHEET, 0, 0, 1, 1) // count = 2, triggers grow to capacity 3 + arena.create_node(STYLESHEET, 0, 0, 1, 1) // count = 3 = capacity + expect(arena.get_count()).toBe(arena.get_capacity()) + arena.trim() // should not throw or corrupt + expect(arena.get_count()).toBe(arena.get_capacity()) + }) + }) + describe('real-world CSS frameworks', () => { test('should not grow for Bootstrap CSS', () => { const css = readFileSync('node_modules/bootstrap/dist/css/bootstrap.css', 'utf-8') const result = parse(css) as unknown as CSSNode + const arena = result.__get_arena() - expect(result.__get_arena().get_growth_count()).toBe(0) - const utilization = - (result.__get_arena().get_count() / result.__get_arena().get_capacity()) * 100 - expect(utilization).toBeLessThan(85) - expect(utilization).toBeGreaterThan(30) + expect(arena.get_growth_count()).toBe(0) + // parse() calls trim(), so capacity must equal count + expect(arena.get_capacity()).toBe(arena.get_count()) }) test('should not grow for Bootstrap minified CSS', () => { @@ -339,9 +372,7 @@ describe('CSSDataArena', () => { const arena = result.__get_arena() expect(arena.get_growth_count()).toBe(0) - const utilization = (arena.get_count() / arena.get_capacity()) * 100 - expect(utilization).toBeLessThan(85) - expect(utilization).toBeGreaterThan(30) + expect(arena.get_capacity()).toBe(arena.get_count()) }) test('should not grow for Tailwind CSS', () => { @@ -350,20 +381,16 @@ describe('CSSDataArena', () => { const arena = result.__get_arena() expect(arena.get_growth_count()).toBe(0) - const utilization = (arena.get_count() / arena.get_capacity()) * 100 - expect(utilization).toBeLessThan(85) - expect(utilization).toBeGreaterThan(30) + expect(arena.get_capacity()).toBe(arena.get_count()) }) test('should not grow for Tailwind minified CSS', () => { const css = readFileSync('node_modules/tailwindcss/dist/tailwind.min.css', 'utf-8') const result = parse(css) as unknown as CSSNode + const arena = result.__get_arena() - expect(result.__get_arena().get_growth_count()).toBe(0) - const utilization = - (result.__get_arena().get_count() / result.__get_arena().get_capacity()) * 100 - expect(utilization).toBeLessThan(85) - expect(utilization).toBeGreaterThan(30) + expect(arena.get_growth_count()).toBe(0) + expect(arena.get_capacity()).toBe(arena.get_count()) }) }) }) diff --git a/src/arena.ts b/src/arena.ts index 98f67e6..0ad8d02 100644 --- a/src/arena.ts +++ b/src/arena.ts @@ -350,6 +350,18 @@ export class CSSDataArena { } } + // Shrink the buffer to exactly the number of live nodes, releasing wasted capacity. + // Call once after parsing is complete. Safe to call multiple times (no-op if already tight). + trim(): void { + if (this.count === this.capacity) return + let byte_count = this.count * BYTES_PER_NODE + let new_buffer = new ArrayBuffer(byte_count) + new Uint8Array(new_buffer).set(new Uint8Array(this.buffer, 0, byte_count)) + this.buffer = new_buffer + this.view = new DataView(new_buffer) + this.capacity = this.count + } + // Check if a node has any children has_children(node_index: number): boolean { return this.get_first_child(node_index) !== 0 diff --git a/src/parse.ts b/src/parse.ts index 0bb4603..5f80c8b 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -147,6 +147,9 @@ export class Parser { // Link all rules as children this.arena.append_children(stylesheet, rules) + // Release wasted pre-allocated capacity now that node count is final + this.arena.trim() + // Return wrapped node return new CSSNode(this.arena, this.source, stylesheet) as StyleSheet } From 15198291c5f4ede44795933664aa5dd52a96a6b4 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 14:15:25 +0000 Subject: [PATCH 3/8] =?UTF-8?q?Lower=20NODES=5FPER=5FKB=20325=E2=86=92210,?= =?UTF-8?q?=20CAPACITY=5FBUFFER=201.2=E2=86=921.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measured actual nodes/KB across real-world files: bootstrap 137 | bootstrap.min 166 | tailwind 157 | tailwind.min 195 | small 198 210 * 1.1 = 231 nodes/KB effective ceiling, ~16% above the observed max of 198. All tested files still parse with zero arena growths. Effect on peak arena allocation during parsing (before trim): Bootstrap: 3336 KB → 1976 KB (-41%) Tailwind: 43350 KB → 25677 KB (-41%) Retained memory after trim is unchanged (always exact node_count * 32 bytes). The constants now only govern transient peak during the parse call itself. Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01TADcY37pEm5JGh8YJ7UuVQ --- src/arena.ts | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/arena.ts b/src/arena.ts index 0ad8d02..0ee8f65 100644 --- a/src/arena.ts +++ b/src/arena.ts @@ -116,13 +116,15 @@ export class CSSDataArena { // Growth multiplier when capacity is exceeded private static readonly GROWTH_FACTOR = 1.3 - // Estimated nodes per KB of CSS (based on real-world data) - // Increased from 270 to 325 to account for VALUE wrapper nodes - // (~20% of nodes are declarations, +1 VALUE node per declaration = +20% nodes) - private static readonly NODES_PER_KB = 325 - - // Buffer to avoid frequent growth (15%) - private static readonly CAPACITY_BUFFER = 1.2 + // Estimated nodes per KB of CSS. + // Measured across real-world files (unminified and minified): + // bootstrap.css 137 | bootstrap.min 166 | tailwind.css 157 | tailwind.min 195 | small 198 + // 210 keeps ~16% headroom above the observed ceiling of 198 nodes/KB. + private static readonly NODES_PER_KB = 210 + + // Safety buffer on top of NODES_PER_KB to absorb variance without a grow. + // Combined with the constant above: effective ceiling = 210 × 1.1 = 231 nodes/KB. + private static readonly CAPACITY_BUFFER = 1.1 constructor(initial_capacity: number = 1024) { this.capacity = initial_capacity From da71b2f033fdc3bc727047b9fdc776183b472f8f Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 15:19:39 +0000 Subject: [PATCH 4/8] fix(benchmark): replace __dirname with import.meta.dirname Fixes oxlint "Do not construct dirname" error in benchmark/memory.ts. Co-Authored-By: Claude --- benchmark/memory.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/benchmark/memory.ts b/benchmark/memory.ts index 042eeea..c0f4e54 100644 --- a/benchmark/memory.ts +++ b/benchmark/memory.ts @@ -14,11 +14,8 @@ import * as csstree from 'css-tree' import * as fs from 'node:fs' import * as path from 'node:path' import * as postcss from 'postcss' -import { fileURLToPath } from 'node:url' import { parse, walk } from '../dist/index.js' -const __dirname = path.dirname(fileURLToPath(import.meta.url)) - // ── Config ──────────────────────────────────────────────────────────────────── /** Measurement iterations per cell (median is taken) */ @@ -31,7 +28,7 @@ const BYTES_PER_NODE = 32 // ── CSS files ───────────────────────────────────────────────────────────────── function read(rel: string): string { - return fs.readFileSync(path.join(__dirname, rel), 'utf-8') + return fs.readFileSync(path.join(import.meta.dirname, rel), 'utf-8') } const CSS_FILES: Record = { @@ -290,8 +287,8 @@ console.table( // ── Save results ────────────────────────────────────────────────────────────── -const results_path = path.join(__dirname, 'memory-results.json') -const baseline_path = path.join(__dirname, 'memory-baseline.json') +const results_path = path.join(import.meta.dirname, 'memory-results.json') +const baseline_path = path.join(import.meta.dirname, 'memory-baseline.json') const snapshot_data = { timestamp: new Date().toISOString(), From 7f2cee5e87bc3c191e62734925279cb40b82c009 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 15:20:26 +0000 Subject: [PATCH 5/8] fix(benchmark): apply oxfmt formatting Co-Authored-By: Claude --- benchmark/memory.ts | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/benchmark/memory.ts b/benchmark/memory.ts index c0f4e54..f774c7c 100644 --- a/benchmark/memory.ts +++ b/benchmark/memory.ts @@ -231,12 +231,14 @@ console.table( 'P heap': fmt_kb(r.postcss.parse.heap), 'P ext': fmt_kb(r.postcss.parse.external), 'P total': fmt_mb(r.postcss.parse.total), - 'W vs C': r.csstree.parse.total > 0 - ? `${(r.csstree.parse.total / r.wallace.parse.total).toFixed(2)}x` - : 'N/A', - 'W vs P': r.postcss.parse.total > 0 - ? `${(r.postcss.parse.total / r.wallace.parse.total).toFixed(2)}x` - : 'N/A', + 'W vs C': + r.csstree.parse.total > 0 + ? `${(r.csstree.parse.total / r.wallace.parse.total).toFixed(2)}x` + : 'N/A', + 'W vs P': + r.postcss.parse.total > 0 + ? `${(r.postcss.parse.total / r.wallace.parse.total).toFixed(2)}x` + : 'N/A', })), ) @@ -256,12 +258,14 @@ console.table( 'P heap': fmt_kb(r.postcss.parse_walk.heap), 'P ext': fmt_kb(r.postcss.parse_walk.external), 'P total': fmt_mb(r.postcss.parse_walk.total), - 'W vs C': r.csstree.parse_walk.total > 0 - ? `${(r.csstree.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` - : 'N/A', - 'W vs P': r.postcss.parse_walk.total > 0 - ? `${(r.postcss.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` - : 'N/A', + 'W vs C': + r.csstree.parse_walk.total > 0 + ? `${(r.csstree.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` + : 'N/A', + 'W vs P': + r.postcss.parse_walk.total > 0 + ? `${(r.postcss.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` + : 'N/A', })), ) @@ -274,8 +278,8 @@ console.table( return { File: name, 'Nodes used': a.node_count.toLocaleString(), - 'Capacity': a.capacity.toLocaleString(), - 'Growths': a.growth_count, + Capacity: a.capacity.toLocaleString(), + Growths: a.growth_count, 'Arena used': fmt_kb(a.used_kb * 1024), 'Arena total': fmt_kb(a.total_kb * 1024), 'Waste %': `${a.waste_pct.toFixed(1)}%`, @@ -354,5 +358,7 @@ if (process.argv.includes('--save-baseline')) { console.log('✅ No regressions detected.') } } else { - console.log(`\nNo baseline found. Run with --save-baseline on your main branch to enable regression detection.`) + console.log( + `\nNo baseline found. Run with --save-baseline on your main branch to enable regression detection.`, + ) } From 688a7d0890604a7560652b3b64358a7f205e1c3c Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 15:30:08 +0000 Subject: [PATCH 6/8] remove memory benchmark Co-Authored-By: Claude --- .gitignore | 3 +- benchmark/memory.ts | 364 -------------------------------------------- package.json | 2 - 3 files changed, 1 insertion(+), 368 deletions(-) delete mode 100644 benchmark/memory.ts diff --git a/.gitignore b/.gitignore index 389104e..87953c0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,4 @@ node_modules dist coverage .claude/settings.local.json -package-lock.json -benchmark/memory-results.json \ No newline at end of file +package-lock.json \ No newline at end of file diff --git a/benchmark/memory.ts b/benchmark/memory.ts deleted file mode 100644 index f774c7c..0000000 --- a/benchmark/memory.ts +++ /dev/null @@ -1,364 +0,0 @@ -// Memory diagnostics: parse / walk / parse+walk across parsers and files -// -// Usage: -// node --expose-gc benchmark/memory.ts -// node --expose-gc benchmark/memory.ts --save-baseline -// -// Writes benchmark/memory-results.json every run. -// If benchmark/memory-baseline.json exists, prints a regression diff. -// Save a new baseline with --save-baseline (e.g. after merging to main). - -/// -// @ts-expect-error: no type definitions for css-tree -import * as csstree from 'css-tree' -import * as fs from 'node:fs' -import * as path from 'node:path' -import * as postcss from 'postcss' -import { parse, walk } from '../dist/index.js' - -// ── Config ──────────────────────────────────────────────────────────────────── - -/** Measurement iterations per cell (median is taken) */ -const ITERATIONS = 5 -/** Flag a regression if memory grows more than this fraction vs baseline */ -const REGRESSION_THRESHOLD = 0.08 - -const BYTES_PER_NODE = 32 - -// ── CSS files ───────────────────────────────────────────────────────────────── - -function read(rel: string): string { - return fs.readFileSync(path.join(import.meta.dirname, rel), 'utf-8') -} - -const CSS_FILES: Record = { - Small: read('small.css'), - Medium: read('medium.css'), - Bootstrap: read('../node_modules/bootstrap/dist/css/bootstrap.css'), - Tailwind: read('../node_modules/tailwindcss/dist/tailwind.css'), -} - -// ── GC / snapshot ───────────────────────────────────────────────────────────── - -// Module-level sink: assigning here prevents V8 from treating fn()'s return -// value as dead before force_gc() runs. A local `void result` is not enough -// because the JIT may determine it's a no-op and shorten the variable's lifetime. -let _measurement_sink: unknown = null - -const _gc = (globalThis as { gc?: () => void }).gc - -if (!_gc) { - console.error('Run with --expose-gc: node --expose-gc benchmark/memory.ts') - process.exit(1) -} - -function force_gc(rounds = 5): void { - for (let i = 0; i < rounds; i++) _gc!() -} - -interface Mem { - heap: number // JS heap only (objects, closures, strings) - external: number // ArrayBuffers, native-backed memory - total: number // heap + external - rss: number // resident set size (whole process) -} - -function snap(): Mem { - const m = process.memoryUsage() - return { heap: m.heapUsed, external: m.external, total: m.heapUsed + m.external, rss: m.rss } -} - -function diff(before: Mem, after: Mem): Mem { - return { - heap: after.heap - before.heap, - external: after.external - before.external, - total: after.total - before.total, - rss: after.rss - before.rss, - } -} - -/** - * Run fn ITERATIONS times, return the median retained-memory delta. - * - * fn must return the primary result object (the parse tree / root node). - * We store it in _measurement_sink (module scope) so V8 cannot shorten its - * lifetime before the post-operation GC runs. That GC collects dead - * temporaries (e.g. the pre-trim arena buffer) without collecting the live - * result. We clear the sink before the next iteration so the next baseline - * snapshot starts clean. - */ -function measure(fn: () => unknown): Mem { - const deltas: Mem[] = [] - for (let i = 0; i < ITERATIONS; i++) { - force_gc() - const before = snap() - _measurement_sink = fn() - force_gc() // collect dead temporaries; _measurement_sink keeps result alive - const after = snap() - _measurement_sink = null // release before next iteration's baseline GC - deltas.push(diff(before, after)) - } - deltas.sort((a, b) => a.total - b.total) - return deltas[Math.floor(deltas.length / 2)] -} - -// ── Arena stats ─────────────────────────────────────────────────────────────── - -interface ArenaStats { - node_count: number - capacity: number - growth_count: number - used_kb: number - total_kb: number - waste_pct: number -} - -function arena_stats(root: ReturnType): ArenaStats { - // __get_arena() is @internal but stable for diagnostics - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const arena = (root as any).__get_arena() - const node_count: number = arena.get_count() - const capacity: number = arena.get_capacity() - const growth_count: number = arena.get_growth_count() - const used_kb = (node_count * BYTES_PER_NODE) / 1024 - const total_kb = (capacity * BYTES_PER_NODE) / 1024 - const waste_pct = ((capacity - node_count) / capacity) * 100 - return { node_count, capacity, growth_count, used_kb, total_kb, waste_pct } -} - -// ── Formatters ──────────────────────────────────────────────────────────────── - -function fmt_kb(bytes: number): string { - return `${(bytes / 1024).toFixed(1)} KB` -} - -function fmt_mb(bytes: number): string { - return `${(bytes / 1024 / 1024).toFixed(2)} MB` -} - -function fmt_pct(n: number): string { - return `${n >= 0 ? '+' : ''}${n.toFixed(1)}%` -} - -// ── Results structures ──────────────────────────────────────────────────────── - -interface PhaseRow { - heap: number - external: number - total: number -} - -interface FileResult { - size_bytes: number - // per parser: parse-only, parse+walk - wallace: { parse: PhaseRow; parse_walk: PhaseRow } - csstree: { parse: PhaseRow; parse_walk: PhaseRow } - postcss: { parse: PhaseRow; parse_walk: PhaseRow } - arena: ArenaStats -} - -// ── Main ────────────────────────────────────────────────────────────────────── - -const results: Record = {} - -for (const [name, css] of Object.entries(CSS_FILES)) { - process.stdout.write(`Measuring ${name}...`) - - // Parse-only - const w_parse = measure(() => parse(css)) - const c_parse = measure(() => csstree.parse(css, { positions: true })) - const p_parse = measure(() => postcss.parse(css)) - - // Parse+Walk — return the AST so the arena stays alive through the post-GC - const w_parse_walk = measure(() => { - const ast = parse(css) - walk(ast, (node) => { - void node.type - void node.line - }) - return ast - }) - const c_parse_walk = measure(() => { - const ast = csstree.parse(css, { positions: true }) - csstree.walk(ast, (node: { type: unknown; loc?: { start: { line: unknown } } }) => { - void node.type - void node.loc?.start.line - }) - return ast - }) - const p_parse_walk = measure(() => { - const root = postcss.parse(css) - root.walk((node) => { - void node.type - void node.source?.start?.line - }) - return root - }) - - // Arena stats (single parse, outside measurement window) - const ast = parse(css) - const stats = arena_stats(ast) - - results[name] = { - size_bytes: css.length, - wallace: { parse: w_parse, parse_walk: w_parse_walk }, - csstree: { parse: c_parse, parse_walk: c_parse_walk }, - postcss: { parse: p_parse, parse_walk: p_parse_walk }, - arena: stats, - } - - console.log(' done') -} - -// ── Tables ──────────────────────────────────────────────────────────────────── - -console.log('\n── Parse-only memory: heap / external / total ───────────────────────────\n') -console.log('(heap = JS objects | external = ArrayBuffer / native | total = heap+external)\n') - -console.table( - Object.entries(results).map(([name, r]) => ({ - File: name, - Size: fmt_kb(r.size_bytes), - // Wallace splits cleanly: arena lives in external, JS objects in heap - 'W heap': fmt_kb(r.wallace.parse.heap), - 'W ext': fmt_kb(r.wallace.parse.external), - 'W total': fmt_mb(r.wallace.parse.total), - // csstree: all in heap (plain JS objects) - 'C heap': fmt_kb(r.csstree.parse.heap), - 'C ext': fmt_kb(r.csstree.parse.external), - 'C total': fmt_mb(r.csstree.parse.total), - // postcss: all in heap - 'P heap': fmt_kb(r.postcss.parse.heap), - 'P ext': fmt_kb(r.postcss.parse.external), - 'P total': fmt_mb(r.postcss.parse.total), - 'W vs C': - r.csstree.parse.total > 0 - ? `${(r.csstree.parse.total / r.wallace.parse.total).toFixed(2)}x` - : 'N/A', - 'W vs P': - r.postcss.parse.total > 0 - ? `${(r.postcss.parse.total / r.wallace.parse.total).toFixed(2)}x` - : 'N/A', - })), -) - -console.log('\n── Parse+Walk memory: heap / external / total ───────────────────────────\n') -console.log('(Walk creates new CSSNode wrappers per node — they show up in heap)\n') - -console.table( - Object.entries(results).map(([name, r]) => ({ - File: name, - Size: fmt_kb(r.size_bytes), - 'W heap': fmt_kb(r.wallace.parse_walk.heap), - 'W ext': fmt_kb(r.wallace.parse_walk.external), - 'W total': fmt_mb(r.wallace.parse_walk.total), - 'C heap': fmt_kb(r.csstree.parse_walk.heap), - 'C ext': fmt_kb(r.csstree.parse_walk.external), - 'C total': fmt_mb(r.csstree.parse_walk.total), - 'P heap': fmt_kb(r.postcss.parse_walk.heap), - 'P ext': fmt_kb(r.postcss.parse_walk.external), - 'P total': fmt_mb(r.postcss.parse_walk.total), - 'W vs C': - r.csstree.parse_walk.total > 0 - ? `${(r.csstree.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` - : 'N/A', - 'W vs P': - r.postcss.parse_walk.total > 0 - ? `${(r.postcss.parse_walk.total / r.wallace.parse_walk.total).toFixed(2)}x` - : 'N/A', - })), -) - -console.log('\n── Arena stats (Wallace) ────────────────────────────────────────────────\n') -console.log('(waste% = (capacity - node_count) / capacity — unused pre-allocated slots)\n') - -console.table( - Object.entries(results).map(([name, r]) => { - const a = r.arena - return { - File: name, - 'Nodes used': a.node_count.toLocaleString(), - Capacity: a.capacity.toLocaleString(), - Growths: a.growth_count, - 'Arena used': fmt_kb(a.used_kb * 1024), - 'Arena total': fmt_kb(a.total_kb * 1024), - 'Waste %': `${a.waste_pct.toFixed(1)}%`, - // nodes/KB of source = accuracy of NODES_PER_KB heuristic - 'Nodes/KB src': (a.node_count / (r.size_bytes / 1024)).toFixed(0), - } - }), -) - -// ── Save results ────────────────────────────────────────────────────────────── - -const results_path = path.join(import.meta.dirname, 'memory-results.json') -const baseline_path = path.join(import.meta.dirname, 'memory-baseline.json') - -const snapshot_data = { - timestamp: new Date().toISOString(), - node_version: process.version, - results, -} - -fs.writeFileSync(results_path, JSON.stringify(snapshot_data, null, 2)) -console.log(`\nResults saved → ${results_path}`) - -// ── Baseline comparison ─────────────────────────────────────────────────────── - -if (process.argv.includes('--save-baseline')) { - fs.writeFileSync(baseline_path, JSON.stringify(snapshot_data, null, 2)) - console.log(`Baseline saved → ${baseline_path}`) -} else if (fs.existsSync(baseline_path)) { - const baseline = JSON.parse(fs.readFileSync(baseline_path, 'utf-8')) - - console.log('\n── Regression vs baseline ───────────────────────────────────────────────\n') - console.log(`Baseline: ${baseline.timestamp} (Node ${baseline.node_version})`) - console.log(`Threshold: >${(REGRESSION_THRESHOLD * 100).toFixed(0)}% increase = REGRESSION\n`) - - const regressions: string[] = [] - const rows: Record[] = [] - - for (const [file, cur] of Object.entries(results) as [string, FileResult][]) { - const base = baseline.results[file] as FileResult | undefined - if (!base) continue - - type Parser = 'wallace' | 'csstree' | 'postcss' - type Phase = 'parse' | 'parse_walk' - - for (const parser of ['wallace', 'csstree', 'postcss'] as Parser[]) { - for (const phase of ['parse', 'parse_walk'] as Phase[]) { - const cur_total = cur[parser][phase].total - const base_total = base[parser][phase].total - if (base_total === 0) continue - - const pct_change = (cur_total - base_total) / base_total - const is_regression = pct_change > REGRESSION_THRESHOLD - const label = `${file} / ${parser} / ${phase}` - - if (is_regression) regressions.push(label) - - rows.push({ - Label: label, - Baseline: fmt_mb(base_total), - Current: fmt_mb(cur_total), - Delta: fmt_pct(pct_change * 100), - Status: is_regression ? '❌ REGRESSION' : pct_change < -0.02 ? '✅ improvement' : ' ok', - }) - } - } - } - - console.table(rows) - - if (regressions.length > 0) { - console.log(`\n❌ ${regressions.length} regression(s) detected:`) - for (const r of regressions) console.log(` - ${r}`) - process.exit(1) - } else { - console.log('✅ No regressions detected.') - } -} else { - console.log( - `\nNo baseline found. Run with --save-baseline on your main branch to enable regression detection.`, - ) -} diff --git a/package.json b/package.json index 5492c83..b0c06fc 100644 --- a/package.json +++ b/package.json @@ -80,8 +80,6 @@ "test-build": "pnpm run build && vitest run --config vitest.config.build.ts", "build": "tsdown", "benchmark": "pnpm run build && node --expose-gc benchmark/index.ts", - "benchmark:memory": "pnpm run build && node --expose-gc benchmark/memory.ts", - "benchmark:memory:baseline": "pnpm run build && node --expose-gc benchmark/memory.ts --save-baseline", "lint": "oxlint --config .oxlintrc.json && oxfmt --check", "check": "tsc --noEmit", "knip": "knip", From d1e9da571e3abdce890cdfa59cf8d9d985b0a89e Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 15:30:32 +0000 Subject: [PATCH 7/8] chore: ignore benchmark/memory-results.json Co-Authored-By: Claude --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 87953c0..389104e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ node_modules dist coverage .claude/settings.local.json -package-lock.json \ No newline at end of file +package-lock.json +benchmark/memory-results.json \ No newline at end of file From c0f2d75f83dda59c962964876368e0af9f6df390 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 21 Jun 2026 17:59:02 +0000 Subject: [PATCH 8/8] docs(arena): add shrink_to_fit references to trim() jsdoc Co-Authored-By: Claude --- src/arena.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/arena.ts b/src/arena.ts index 0ee8f65..08fc3c1 100644 --- a/src/arena.ts +++ b/src/arena.ts @@ -352,8 +352,13 @@ export class CSSDataArena { } } - // Shrink the buffer to exactly the number of live nodes, releasing wasted capacity. - // Call once after parsing is complete. Safe to call multiple times (no-op if already tight). + /** + * Shrink the buffer to exactly the number of live nodes, releasing wasted capacity. + * Call once after parsing is complete. Safe to call multiple times (no-op if already tight). + * + * @see https://doc.rust-lang.org/std/vec/struct.Vec.html#method.shrink_to_fit + * @see https://en.cppreference.com/w/cpp/container/vector/shrink_to_fit + */ trim(): void { if (this.count === this.capacity) return let byte_count = this.count * BYTES_PER_NODE