diff --git a/.gitignore b/.gitignore index bf8360d..86ddc97 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ CMakeFiles cmake_install.cmake json.so json.dll +rapidjson.so +rapidjson.so.dSYM/ deps/ /*.src.rock /rapidjson/test/ @@ -26,3 +28,4 @@ rapidjson/rapidjson.autopkg rapidjson/travis-doxygen.sh /cmake-build-debug /.idea +/docs/superpowers/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0b1eb13 --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +.PHONY: fuzz + +LUA ?= lua +DURATION ?= 3600 +INTERVAL ?= 5 +WORKERS ?= 1 +SEED ?= $(shell date +%s) +SORT_KEYS ?= 1 +SAMPLE_INTERVAL ?= 0 +SAMPLE_LIMIT ?= + +fuzz: + @set -u; \ + tmpdir=$$(mktemp -d "$${TMPDIR:-/tmp}/lua-rapidjson-fuzz.XXXXXX"); \ + pids=""; \ + cleanup() { rm -rf "$$tmpdir"; }; \ + stop_workers() { for pid in $$pids; do kill "$$pid" 2>/dev/null || true; done; cleanup; }; \ + trap cleanup EXIT; \ + trap stop_workers INT TERM; \ + worker=1; \ + while [ "$$worker" -le "$(WORKERS)" ]; do \ + seed=$$(( $(SEED) + $$worker - 1 )); \ + ( \ + DURATION="$(DURATION)" \ + INTERVAL="$(INTERVAL)" \ + WORKERS="$(WORKERS)" \ + WORKER_ID="$$worker" \ + SEED="$$seed" \ + SORT_KEYS="$(SORT_KEYS)" \ + SAMPLE_INTERVAL="$(SAMPLE_INTERVAL)" \ + SAMPLE_LIMIT="$(SAMPLE_LIMIT)" \ + "$(LUA)" tools/fuzz_encode.lua; \ + rc=$$?; \ + if [ "$$rc" -ne 0 ]; then \ + echo "$$rc" > "$$tmpdir/fail.$$worker"; \ + fi; \ + echo "$$rc" > "$$tmpdir/done.$$worker"; \ + ) & \ + pids="$$pids $$!"; \ + worker=$$(( $$worker + 1 )); \ + done; \ + status=0; \ + while :; do \ + if ls "$$tmpdir"/fail.* >/dev/null 2>&1; then \ + status=1; \ + for pid in $$pids; do \ + kill "$$pid" 2>/dev/null || true; \ + done; \ + break; \ + fi; \ + done_count=$$(ls "$$tmpdir"/done.* 2>/dev/null | wc -l | tr -d ' '); \ + if [ "$$done_count" -ge "$(WORKERS)" ]; then \ + break; \ + fi; \ + sleep 1; \ + done; \ + for pid in $$pids; do \ + if ! wait "$$pid" 2>/dev/null; then \ + status=1; \ + fi; \ + done; \ + exit "$$status" diff --git a/spec/fuzz_encode_lib_spec.lua b/spec/fuzz_encode_lib_spec.lua new file mode 100644 index 0000000..d0c0f4c --- /dev/null +++ b/spec/fuzz_encode_lib_spec.lua @@ -0,0 +1,381 @@ +require 'busted.runner'() + +describe('tools.fuzz_encode_lib', function() + local fuzz = require('tools.fuzz_encode_lib') + local rapidjson = require('rapidjson') + + describe('parse_config', function() + it('uses production defaults', function() + local cfg = fuzz.parse_config({}) + + assert.are.equal(3600, cfg.duration) + assert.are.equal(5, cfg.interval) + assert.are.equal(1, cfg.workers) + assert.are.equal(1, cfg.worker_id) + assert.are.equal(true, cfg.sort_keys) + assert.are.equal(0, cfg.sample_interval) + assert.are.equal(0, cfg.sample_limit) + assert.are.equal('number', type(cfg.seed)) + end) + + it('accepts numeric and boolean overrides', function() + local cfg = fuzz.parse_config({ + DURATION = '12', + INTERVAL = '3', + WORKERS = '2', + WORKER_ID = '2', + SEED = '99', + SORT_KEYS = '0', + SAMPLE_INTERVAL = '3', + SAMPLE_LIMIT = '10', + }) + + assert.are.equal(12, cfg.duration) + assert.are.equal(3, cfg.interval) + assert.are.equal(2, cfg.workers) + assert.are.equal(2, cfg.worker_id) + assert.are.equal(99, cfg.seed) + assert.are.equal(false, cfg.sort_keys) + assert.are.equal(3, cfg.sample_interval) + assert.are.equal(10, cfg.sample_limit) + end) + + it('defaults time-based sampling to 10 samples when enabled', function() + local cfg = fuzz.parse_config({ SAMPLE_INTERVAL = '1' }) + + assert.are.equal(1, cfg.sample_interval) + assert.are.equal(10, cfg.sample_limit) + end) + + it('treats numeric zero as disabling sorted keys', function() + local cfg = fuzz.parse_config({ SORT_KEYS = 0 }) + + assert.are.equal(false, cfg.sort_keys) + end) + end) + + describe('env_from_args', function() + it('turns KEY=VALUE args into config environment entries', function() + local env = fuzz.env_from_args({ + 'DURATION=2', + 'INTERVAL=1', + 'SEED=123', + 'WORKERS=1', + }) + + assert.are.equal('2', env.DURATION) + assert.are.equal('1', env.INTERVAL) + assert.are.equal('123', env.SEED) + assert.are.equal('1', env.WORKERS) + end) + end) + + describe('new_rng', function() + it('is deterministic for the same seed', function() + local a = fuzz.new_rng(123) + local b = fuzz.new_rng(123) + + assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) + assert.are.equal(a:int(1, 1000000), b:int(1, 1000000)) + assert.are.equal(a:bool(), b:bool()) + end) + end) + + describe('format_summary', function() + it('formats the progress counters', function() + local line = fuzz.format_summary({ + elapsed = 5, + total = 100, + encoded = 99, + encode_errors = 1, + validation_failures = 0, + rate = 20, + seed = 123, + last_case_id = 100, + worker_id = 1, + }) + + assert.matches('worker=1', line, 1, true) + assert.matches('elapsed=5s', line, 1, true) + assert.matches('total=100', line, 1, true) + assert.matches('encoded=99', line, 1, true) + assert.matches('encode_errors=1', line, 1, true) + assert.matches('validation_failures=0', line, 1, true) + assert.matches('rate=20.00/s', line, 1, true) + assert.matches('seed=123', line, 1, true) + assert.matches('last_case=100', line, 1, true) + end) + end) + + describe('generate_case', function() + it('generates deterministic schema-guided cases with selected metadata', function() + local a = fuzz.generate_case(fuzz.new_rng(321), 1, rapidjson) + local b = fuzz.generate_case(fuzz.new_rng(321), 1, rapidjson) + + assert.are.same(a.value, b.value) + assert.are.same(a.expected, b.expected) + assert.are.equal('number', type(a.id)) + assert.are.equal('string', type(a.schema)) + assert.are.equal('schema_guided', a.kind) + assert.are.equal('object', a.expected.top_level_kind) + assert.are.equal('table', type(a.value.fuzz)) + assert.is_true(#a.expected.objects >= 1) + assert.is_true(#a.expected.arrays >= 1) + assert.is_true(#a.expected.scalars >= 1) + end) + + it('adds pure recursive random cases with nested objects and arrays', function() + local case = fuzz.generate_case(fuzz.new_rng(98765), 3, rapidjson) + + assert.are.equal('recursive_random', case.kind) + assert.are.equal('recursive_random', case.schema) + assert.are.equal('table', type(case.value)) + assert.are.equal('table', type(case.value.random)) + assert.are.equal('table', type(case.expected.random)) + assert.is_true(case.expected.random.max_depth >= 3) + assert.is_true(case.expected.random.object_count >= 2) + assert.is_true(case.expected.random.array_count >= 1) + assert.is_true(#case.expected.objects >= case.expected.random.object_count) + assert.is_true(#case.expected.arrays >= case.expected.random.array_count) + end) + + it('tracks recursive random arrays from the generated core', function() + local case = fuzz.generate_case(fuzz.new_rng(98765), 3, rapidjson) + local saw_core_array = false + + for _, entry in ipairs(case.expected.arrays) do + if entry.path:match('^%$%.random') then + saw_core_array = true + end + end + + assert.is_true(saw_core_array) + end) + + it('emits rapidjson null sentinels that round-trip as JSON null', function() + local case = fuzz.generate_case(fuzz.new_rng(100), 10, rapidjson) + + assert.are.equal('paginated_list', case.schema) + assert.are.equal(rapidjson.null, case.value.links.previous) + + local encoded = rapidjson.encode(case.value) + local decoded = rapidjson.decode(encoded) + + assert.matches('"previous":null', encoded, 1, true) + assert.are.equal(rapidjson.null, decoded.links.previous) + end) + + it('requires a real rapidjson null sentinel', function() + assert.has_error(function() + fuzz.generate_case(fuzz.new_rng(1), 1, {}) + end, 'rapidjson.null is required') + end) + + it('rejects fake table null sentinels', function() + assert.has_error(function() + fuzz.generate_case(fuzz.new_rng(1), 1, { null = {} }) + end, 'rapidjson.null is required') + end) + + it('runs pure recursive random cases at least as often as schema-guided cases', function() + local rng = fuzz.new_rng(1) + local seen = {} + local counts = { + schema_guided = 0, + recursive_random = 0, + } + + for case_id = 1, 30 do + local case = fuzz.generate_case(rng, case_id, rapidjson) + counts[case.kind] = counts[case.kind] + 1 + if case.kind == 'schema_guided' then + seen[case.schema] = true + end + end + + assert.is_true(counts.recursive_random >= counts.schema_guided) + assert.are.equal(10, counts.schema_guided) + assert.are.equal(20, counts.recursive_random) + assert.is_true(seen.llm_response) + assert.is_true(seen.github_issue) + assert.is_true(seen.social_feed) + assert.is_true(seen.paginated_list) + assert.is_true(seen.metadata_config) + end) + end) + + describe('validate_encoded_case', function() + it('accepts a generated case encoded with sorted keys', function() + local case = fuzz.generate_case(fuzz.new_rng(77), 1, rapidjson) + local json = rapidjson.encode(case.value, { sort_keys = true }) + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, json) + + assert.is_true(ok) + assert.is_nil(err) + end) + + it('rejects unsorted encoded object keys for tracked objects', function() + local case = { + id = 1, + kind = 'manual', + schema = 'manual', + value = { b = 1, a = 2 }, + expected = { + top_level_kind = 'object', + objects = { + { path = '$', key_count = 2, keys = { 'a', 'b' } }, + }, + arrays = {}, + scalars = {}, + }, + } + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, '{"b":1,"a":2}') + + assert.is_false(ok) + assert.matches('key order', err, 1, true) + end) + + it('rejects unsorted nested object keys for tracked object paths', function() + local case = { + id = 2, + kind = 'manual', + schema = 'manual', + value = { a = { b = 1, a = 2 } }, + expected = { + top_level_kind = 'object', + objects = { + { path = '$.a', key_count = 2, keys = { 'a', 'b' } }, + }, + arrays = {}, + scalars = {}, + }, + } + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, '{"a":{"b":1,"a":2}}') + + assert.is_false(ok) + assert.matches('key order', err, 1, true) + end) + + it('validates recursive_random core metadata after encode and decode', function() + local case = fuzz.generate_case(fuzz.new_rng(98765), 2, rapidjson) + local json = rapidjson.encode(case.value, { sort_keys = true }) + + assert.are.equal('recursive_random', case.kind) + assert.are.equal('recursive_random', case.schema) + assert.are.equal('table', type(case.expected.random)) + + local ok, err = fuzz.validate_encoded_case(rapidjson, case, json) + + assert.is_true(ok) + assert.is_nil(err) + end) + + it('accepts small floating point round-trip differences', function() + local case = { + id = 3, + kind = 'manual', + schema = 'manual', + value = { n = 12.931 }, + expected = { + top_level_kind = 'object', + objects = { + { path = '$', key_count = 1, keys = { 'n' } }, + }, + arrays = {}, + scalars = { + { path = '$.n', kind = 'float', value = 12.931 }, + }, + }, + } + + local ok, err = fuzz.validate_encoded_case( + rapidjson, + case, + '{"n":12.931000000000001}' + ) + + assert.is_true(ok) + assert.is_nil(err) + end) + + it('returns decode diagnostics when JSON cannot be decoded', function() + local ok, err = fuzz.validate_encoded_case(rapidjson, { expected = {} }, '{"a":}') + + assert.is_false(ok) + assert.matches('decode failed:', err, 1, true) + end) + end) + + describe('format_failure', function() + it('is reproducible and includes fuzz failure diagnostics', function() + local case = { + id = 42, + kind = 'manual', + schema = 'manual_schema', + value = { b = 1, a = { true, rapidjson.null } }, + } + local details = { + seed = 12345, + worker_id = 2, + case = case, + reason = 'key order mismatch at $', + json = '{"b":1,"a":[true,null]}', + } + + local first = fuzz.format_failure(details) + local second = fuzz.format_failure(details) + + assert.are.equal(first, second) + assert.matches('FUZZ FAILURE', first, 1, true) + assert.matches('seed=12345', first, 1, true) + assert.matches('worker=2', first, 1, true) + assert.matches('case=42', first, 1, true) + assert.matches('kind=manual', first, 1, true) + assert.matches('schema=manual_schema', first, 1, true) + assert.matches('reason=key order mismatch at $', first, 1, true) + assert.matches('value=', first, 1, true) + assert.matches('"a"', first, 1, true) + assert.matches('json={"b":1,"a":[true,null]}', first, 1, true) + end) + end) + + describe('format_sample', function() + it('prints full sample data with aligned value columns', function() + local case = { + id = 7, + kind = 'manual', + schema = 'manual_schema', + value = { + root = { + child = { + leaf = { + value = 'deep-value', + }, + }, + }, + }, + } + + local sample = fuzz.format_sample({ + seed = 123, + worker_id = 1, + elapsed = 2, + case = case, + raw_json_unsorted = '{"root":{"child":{"leaf":{"value":"deep-value"}}}}', + encoded_json_sort_keys = '{"root":{"child":{"leaf":{"value":"deep-value"}}}}', + }) + + assert.matches('FUZZ SAMPLE', sample, 1, true) + assert.matches('case=7', sample, 1, true) + assert.matches('input_lua= {', sample, 1, true) + assert.matches('raw_json_unsorted= {', sample, 1, true) + assert.matches('encoded_json_sort_keys={', sample, 1, true) + assert.matches('deep-value', sample, 1, true) + assert.is_nil(sample:find('{...}', 1, true)) + assert.is_nil(sample:find('[...]', 1, true)) + end) + end) +end) diff --git a/tools/fuzz_encode.lua b/tools/fuzz_encode.lua new file mode 100644 index 0000000..50b93c5 --- /dev/null +++ b/tools/fuzz_encode.lua @@ -0,0 +1,147 @@ +local rapidjson = require('rapidjson') +local fuzz = require('tools.fuzz_encode_lib') + +local env = fuzz.env_from_args(arg) +for _, key in ipairs({ + 'DURATION', + 'INTERVAL', + 'WORKERS', + 'WORKER_ID', + 'SEED', + 'SORT_KEYS', + 'SAMPLE_INTERVAL', + 'SAMPLE_LIMIT', +}) do + if env[key] == nil then + env[key] = os.getenv(key) + end +end + +local cfg = fuzz.parse_config(env) +local rng = fuzz.new_rng(cfg.seed) +local started = os.time() +local next_report = started + cfg.interval +local deadline = started + cfg.duration +local stats = { + worker_id = cfg.worker_id, + elapsed = 0, + total = 0, + encoded = 0, + encode_errors = 0, + validation_failures = 0, + rate = 0, + seed = cfg.seed, + last_case_id = 0, +} +local last_report_total = -1 +local last_report_elapsed = -1 +local sample_count = 0 +local next_sample_at = started + +local function update_stats(now) + stats.elapsed = now - started + if stats.elapsed <= 0 then + stats.rate = stats.total + else + stats.rate = stats.total / stats.elapsed + end +end + +local function print_summary() + print(fuzz.format_summary(stats)) + last_report_total = stats.total + last_report_elapsed = stats.elapsed +end + +local function sample_limit_reached() + return cfg.sample_limit > 0 and sample_count >= cfg.sample_limit +end + +local function advance_next_sample_at(now) + repeat + next_sample_at = next_sample_at + cfg.sample_interval + until next_sample_at > now +end + +local function maybe_print_sample(generated_case, encoded_json, now) + if cfg.sample_interval <= 0 or sample_limit_reached() or now < next_sample_at then + return + end + + local raw_ok, raw_json_or_err = pcall(rapidjson.encode, generated_case.value, { + sort_keys = false, + }) + local sorted_ok, sorted_json_or_err = true, encoded_json + if not cfg.sort_keys then + sorted_ok, sorted_json_or_err = pcall(rapidjson.encode, generated_case.value, { + sort_keys = true, + }) + end + + sample_count = sample_count + 1 + update_stats(now) + print(fuzz.format_sample({ + rapidjson = rapidjson, + seed = cfg.seed, + worker_id = cfg.worker_id, + elapsed = stats.elapsed, + case = generated_case, + raw_json_unsorted = raw_ok and raw_json_or_err or (''), + encoded_json_sort_keys = sorted_ok and sorted_json_or_err or (''), + })) + advance_next_sample_at(now) +end + +while os.time() < deadline do + local case_id = stats.total + 1 + local generated_case = fuzz.generate_case(rng, case_id, rapidjson) + local ok, json_or_err = pcall(rapidjson.encode, generated_case.value, { + sort_keys = cfg.sort_keys, + }) + + stats.total = stats.total + 1 + stats.last_case_id = case_id + + if ok then + stats.encoded = stats.encoded + 1 + local valid, reason = fuzz.validate_encoded_case(rapidjson, generated_case, json_or_err) + if not valid then + stats.validation_failures = stats.validation_failures + 1 + update_stats(os.time()) + io.stderr:write(fuzz.format_failure({ + seed = cfg.seed, + worker_id = cfg.worker_id, + case = generated_case, + json = json_or_err, + reason = reason, + }), '\n') + os.exit(1) + end + else + stats.encode_errors = stats.encode_errors + 1 + update_stats(os.time()) + io.stderr:write(fuzz.format_failure({ + seed = cfg.seed, + worker_id = cfg.worker_id, + case = generated_case, + reason = 'encode failed: ' .. tostring(json_or_err), + }), '\n') + os.exit(1) + end + + local now = os.time() + if ok then + maybe_print_sample(generated_case, json_or_err, now) + end + + if now >= next_report then + update_stats(now) + print_summary() + next_report = now + cfg.interval + end +end + +update_stats(os.time()) +if stats.total ~= last_report_total or stats.elapsed ~= last_report_elapsed then + print_summary() +end diff --git a/tools/fuzz_encode_lib.lua b/tools/fuzz_encode_lib.lua new file mode 100644 index 0000000..71a9f49 --- /dev/null +++ b/tools/fuzz_encode_lib.lua @@ -0,0 +1,1602 @@ +local M = {} + +local DEFAULTS = { + duration = 3600, + interval = 5, + workers = 1, + worker_id = 1, + sort_keys = true, + sample_interval = 0, + sample_limit = 0, +} + +local function tonumber_or(value, default) + local parsed = tonumber(value) + if parsed == nil then + return default + end + return parsed +end + +local function normalize_seed(value) + local parsed = tonumber(value) + if parsed == nil then + parsed = os.time() + end + parsed = math.floor(parsed) + parsed = parsed % 2147483647 + if parsed <= 0 then + parsed = 1 + end + return parsed +end + +function M.parse_config(env) + env = env or {} + local sample_interval = tonumber_or(env.SAMPLE_INTERVAL, DEFAULTS.sample_interval) + local sample_limit = tonumber(env.SAMPLE_LIMIT) + if sample_limit == nil then + sample_limit = sample_interval > 0 and 10 or DEFAULTS.sample_limit + end + + return { + duration = tonumber_or(env.DURATION, DEFAULTS.duration), + interval = tonumber_or(env.INTERVAL, DEFAULTS.interval), + workers = tonumber_or(env.WORKERS, DEFAULTS.workers), + worker_id = tonumber_or(env.WORKER_ID, DEFAULTS.worker_id), + seed = normalize_seed(env.SEED), + sort_keys = env.SORT_KEYS ~= '0' and env.SORT_KEYS ~= 0, + sample_interval = sample_interval, + sample_limit = sample_limit, + } +end + +function M.new_rng(seed) + local state = normalize_seed(seed) + local rng = {} + + function rng:next() + local hi = math.floor(state / 127773) + local lo = state % 127773 + local test = 16807 * lo - 2836 * hi + if test <= 0 then + test = test + 2147483647 + end + state = test + return state / 2147483647 + end + + function rng:int(min, max) + return min + math.floor(self:next() * (max - min + 1)) + end + + function rng:bool() + return self:int(0, 1) == 1 + end + + function rng:choice(values) + return values[self:int(1, #values)] + end + + return rng +end + +function M.format_summary(stats) + return string.format( + 'worker=%d elapsed=%ds total=%d encoded=%d encode_errors=%d validation_failures=%d rate=%.2f/s seed=%d last_case=%d', + stats.worker_id, + stats.elapsed, + stats.total, + stats.encoded, + stats.encode_errors, + stats.validation_failures, + stats.rate, + stats.seed, + stats.last_case_id + ) +end + +local SCHEMA_FAMILIES = { + 'llm_response', + 'github_issue', + 'social_feed', + 'paginated_list', + 'metadata_config', +} + +local CASE_KINDS = { + 'schema_guided', + 'recursive_random', + 'recursive_random', +} + +local EMPTY_ARRAY_MT = { __jsontype = 'array' } + +local KEY_PARTS = { + 'alpha', + 'body', + 'cache', + 'delta', + 'edge', + 'flags', + 'group', + 'hint', + 'index', + 'job', + 'kind', + 'limit', + 'meta', + 'node', + 'option', + 'payload', + 'query', + 'result', + 'state', + 'token', +} + +local STRING_PARTS = { + 'adapter', + 'batch', + 'cursor', + 'draft', + 'event', + 'filter', + 'gateway', + 'header', + 'intent', + 'journal', + 'kernel', + 'ledger', + 'message', + 'notice', + 'offset', + 'profile', + 'record', + 'signal', + 'thread', + 'update', +} + +local function json_null(rapidjson) + if rapidjson and type(rapidjson.encode) == 'function' and rapidjson.null ~= nil then + local ok, encoded = pcall(rapidjson.encode, rapidjson.null) + if ok and encoded == 'null' then + return rapidjson.null + end + end + + error('rapidjson.null is required', 0) +end + +local function empty_json_array() + return setmetatable({}, EMPTY_ARRAY_MT) +end + +local function random_integer(rng) + return rng:int(-1000000, 1000000) +end + +local function random_float(rng) + local whole = rng:int(1, 100000) + local fraction = rng:int(1, 999) / 1000 + local sign = rng:bool() and 1 or -1 + return sign * (whole + fraction) +end + +local function random_string(rng) + local count = rng:int(1, 4) + local parts = {} + + for i = 1, count do + parts[#parts + 1] = rng:choice(STRING_PARTS) + end + + return table.concat(parts, '-') .. '-' .. tostring(rng:int(1, 9999)) +end + +local function random_key(rng) + return rng:choice(KEY_PARTS) .. '_' .. tostring(rng:int(1, 999)) +end + +local function random_scalar(rng, rapidjson) + local scalar_kind = rng:int(1, 6) + + if scalar_kind == 1 then + return random_string(rng) + elseif scalar_kind == 2 then + return random_integer(rng) + elseif scalar_kind == 3 then + return random_float(rng) + elseif scalar_kind == 4 then + return rng:bool() + elseif scalar_kind == 5 then + return json_null(rapidjson) + end + + return '' +end + +local generate_random_value + +local function unique_random_key(rng, object) + local key = random_key(rng) + + while object[key] ~= nil do + key = random_key(rng) + end + + return key +end + +local function generate_random_object(rng, rapidjson, depth, max_depth) + local object = {} + local width = rng:int(1, 5) + local forced_nested_index + + if depth < max_depth - 1 then + forced_nested_index = rng:int(1, width) + end + + for i = 1, width do + local key = unique_random_key(rng, object) + + if i == forced_nested_index then + object[key] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + elseif depth >= max_depth - 1 or rng:int(1, 100) <= 45 then + object[key] = random_scalar(rng, rapidjson) + else + object[key] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + end + end + + return object +end + +local function generate_random_array(rng, rapidjson, depth, max_depth) + local array = {} + local length = rng:int(1, 5) + local forced_nested_index + + if depth < max_depth - 1 then + forced_nested_index = rng:int(1, length) + end + + for i = 1, length do + if i == forced_nested_index then + array[i] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + elseif depth >= max_depth - 1 or rng:int(1, 100) <= 45 then + array[i] = random_scalar(rng, rapidjson) + else + array[i] = generate_random_value(rng, rapidjson, depth + 1, max_depth) + end + end + + return array +end + +function generate_random_value(rng, rapidjson, depth, max_depth) + if depth >= max_depth then + return random_scalar(rng, rapidjson) + end + + if rng:bool() then + return generate_random_object(rng, rapidjson, depth, max_depth) + end + + return generate_random_array(rng, rapidjson, depth, max_depth) +end + +local function generate_random_payload(rng, rapidjson) + local max_depth = rng:int(3, 6) + local random_core = generate_random_object(rng, rapidjson, 1, max_depth) + + random_core[unique_random_key(rng, random_core)] = + generate_random_array(rng, rapidjson, 2, max_depth) + random_core[unique_random_key(rng, random_core)] = + generate_random_object(rng, rapidjson, 2, max_depth) + + return { + random = random_core, + empty_object = {}, + empty_array = empty_json_array(), + scalar_samples = { + boolean = rng:bool(), + empty_string = '', + float = random_float(rng), + integer = random_integer(rng), + null_value = json_null(rapidjson), + string = random_string(rng), + }, + } +end + +local function string_keys(value) + local keys = {} + + for key in pairs(value) do + if type(key) == 'string' then + keys[#keys + 1] = key + end + end + + table.sort(keys) + return keys +end + +local function path_value(path) + if path == nil or path == '' then + return '$' + end + return path +end + +local function is_json_null(value, rapidjson) + return value == json_null(rapidjson) +end + +local function is_json_array(value) + local metatable = getmetatable(value) + if metatable and metatable.__jsontype == 'array' then + return true + end + + local count = 0 + local max_index = 0 + + for key in pairs(value) do + if type(key) ~= 'number' or key < 1 or key % 1 ~= 0 then + return false + end + + count = count + 1 + if key > max_index then + max_index = key + end + end + + return count > 0 and max_index == count +end + +local function scalar_metadata(value, rapidjson) + if is_json_null(value, rapidjson) then + return 'null' + end + + if value == '' then + return 'empty_string' + end + + if type(value) == 'number' and value == math.floor(value) then + return 'integer' + end + + if type(value) == 'number' then + return 'float' + end + + return type(value) +end + +local function table_key_count(value) + local count = 0 + + for _ in pairs(value) do + count = count + 1 + end + + return count +end + +local function decoded_kind(value, rapidjson) + if type(value) == 'table' and not is_json_null(value, rapidjson) then + if is_json_array(value) then + return 'array' + end + + return 'object' + end + + return scalar_metadata(value, rapidjson) +end + +local function matches_expected_kind(value, expected_kind, rapidjson, expected_length) + if expected_kind == 'object' then + return type(value) == 'table' and not is_json_null(value, rapidjson) and not is_json_array(value) + end + + if expected_kind == 'array' then + if type(value) ~= 'table' or is_json_null(value, rapidjson) then + return false + end + + return is_json_array(value) or (expected_length == 0 and table_key_count(value) == 0) + end + + return scalar_metadata(value, rapidjson) == expected_kind +end + +local function lookup_path(value, path) + if path == '$' then + return true, value + end + + if type(path) ~= 'string' or path:sub(1, 1) ~= '$' then + return false, nil, 'invalid path: ' .. tostring(path) + end + + local current = value + local offset = 2 + + while offset <= #path do + local char = path:sub(offset, offset) + + if char == '.' then + offset = offset + 1 + + local start = offset + while offset <= #path do + local next_char = path:sub(offset, offset) + if next_char == '.' or next_char == '[' then + break + end + offset = offset + 1 + end + + if start == offset then + return false, nil, 'invalid path segment: ' .. path + end + + if type(current) ~= 'table' then + return false, nil, 'path not found: ' .. path + end + + current = current[path:sub(start, offset - 1)] + if current == nil then + return false, nil, 'path not found: ' .. path + end + elseif char == '[' then + local close = path:find(']', offset + 1, true) + if close == nil then + return false, nil, 'invalid path segment: ' .. path + end + + local index = tonumber(path:sub(offset + 1, close - 1)) + if index == nil or index < 1 or index % 1 ~= 0 then + return false, nil, 'invalid array index: ' .. path + end + + if type(current) ~= 'table' then + return false, nil, 'path not found: ' .. path + end + + current = current[index] + if current == nil then + return false, nil, 'path not found: ' .. path + end + + offset = close + 1 + else + return false, nil, 'invalid path segment: ' .. path + end + end + + return true, current +end + +local function format_keys(keys) + return table.concat(keys, ',') +end + +local function skip_json_ws(json, offset) + while offset <= #json do + local char = json:sub(offset, offset) + if char ~= ' ' and char ~= '\n' and char ~= '\r' and char ~= '\t' then + break + end + + offset = offset + 1 + end + + return offset +end + +local function scan_json_string(json, offset) + if json:sub(offset, offset) ~= '"' then + return nil, 'expected string at byte ' .. tostring(offset) + end + + offset = offset + 1 + while offset <= #json do + local char = json:sub(offset, offset) + if char == '"' then + return offset + end + if char == '\\' then + offset = offset + 2 + else + offset = offset + 1 + end + end + + return nil, 'unterminated string' +end + +local skip_json_value + +local function skip_json_object(json, offset) + offset = skip_json_ws(json, offset + 1) + if json:sub(offset, offset) == '}' then + return offset + 1 + end + + while offset <= #json do + local string_end, err = scan_json_string(json, offset) + if string_end == nil then + return nil, err + end + + offset = skip_json_ws(json, string_end + 1) + if json:sub(offset, offset) ~= ':' then + return nil, 'expected object member colon' + end + + offset = skip_json_value(json, offset + 1) + if offset == nil then + return nil, 'invalid object member value' + end + + offset = skip_json_ws(json, offset) + local char = json:sub(offset, offset) + if char == '}' then + return offset + 1 + end + if char ~= ',' then + return nil, 'expected object member separator' + end + + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated object' +end + +local function skip_json_array(json, offset) + offset = skip_json_ws(json, offset + 1) + if json:sub(offset, offset) == ']' then + return offset + 1 + end + + while offset <= #json do + offset = skip_json_value(json, offset) + if offset == nil then + return nil, 'invalid array value' + end + + offset = skip_json_ws(json, offset) + local char = json:sub(offset, offset) + if char == ']' then + return offset + 1 + end + if char ~= ',' then + return nil, 'expected array separator' + end + + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated array' +end + +skip_json_value = function(json, offset) + offset = skip_json_ws(json, offset) + + local char = json:sub(offset, offset) + if char == '"' then + local string_end, err = scan_json_string(json, offset) + if string_end == nil then + return nil, err + end + + return string_end + 1 + end + if char == '{' then + return skip_json_object(json, offset) + end + if char == '[' then + return skip_json_array(json, offset) + end + + while offset <= #json do + char = json:sub(offset, offset) + if char == ',' or char == '}' or char == ']' or char:match('%s') then + break + end + + offset = offset + 1 + end + + return offset +end + +local function json_object_members(json, object_start) + if json:sub(object_start, object_start) ~= '{' then + return nil, 'expected object at byte ' .. tostring(object_start) + end + + local members = {} + local offset = skip_json_ws(json, object_start + 1) + if json:sub(offset, offset) == '}' then + return members, offset + end + + while offset <= #json do + local key_start = offset + local key_end, err = scan_json_string(json, key_start) + if key_end == nil then + return nil, err + end + + offset = skip_json_ws(json, key_end + 1) + if json:sub(offset, offset) ~= ':' then + return nil, 'expected object member colon' + end + + local value_start = skip_json_ws(json, offset + 1) + members[#members + 1] = { + token = json:sub(key_start, key_end), + value_start = value_start, + } + + offset = skip_json_value(json, value_start) + if offset == nil then + return nil, 'invalid object member value' + end + + offset = skip_json_ws(json, offset) + local char = json:sub(offset, offset) + if char == '}' then + return members, offset + end + if char ~= ',' then + return nil, 'expected object member separator' + end + + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated object' +end + +local function tokenize_path(path) + local steps = {} + + if path == '$' then + return true, steps + end + if type(path) ~= 'string' or path:sub(1, 1) ~= '$' then + return false, 'invalid path: ' .. tostring(path) + end + + local offset = 2 + while offset <= #path do + local char = path:sub(offset, offset) + + if char == '.' then + offset = offset + 1 + + local start = offset + while offset <= #path do + local next_char = path:sub(offset, offset) + if next_char == '.' or next_char == '[' then + break + end + offset = offset + 1 + end + + if start == offset then + return false, 'invalid path segment: ' .. path + end + + steps[#steps + 1] = { + kind = 'field', + key = path:sub(start, offset - 1), + } + elseif char == '[' then + local close = path:find(']', offset + 1, true) + if close == nil then + return false, 'invalid path segment: ' .. path + end + + local index = tonumber(path:sub(offset + 1, close - 1)) + if index == nil or index < 1 or index % 1 ~= 0 then + return false, 'invalid array index: ' .. path + end + + steps[#steps + 1] = { + kind = 'index', + index = index, + } + offset = close + 1 + else + return false, 'invalid path segment: ' .. path + end + end + + return true, steps +end + +local function json_object_member_value_start(rapidjson, json, object_start, key) + local members, err = json_object_members(json, object_start) + if members == nil then + return nil, err + end + + local token = rapidjson.encode(key) + for _, member in ipairs(members) do + if member.token == token then + return member.value_start + end + end + + return nil, 'path not found' +end + +local function json_array_value_start(json, array_start, expected_index) + if json:sub(array_start, array_start) ~= '[' then + return nil, 'expected array at byte ' .. tostring(array_start) + end + + local offset = skip_json_ws(json, array_start + 1) + if json:sub(offset, offset) == ']' then + return nil, 'path not found' + end + + local index = 1 + while offset <= #json do + local value_start = offset + local next_offset = skip_json_value(json, value_start) + if next_offset == nil then + return nil, 'invalid array value' + end + + if index == expected_index then + return value_start + end + + offset = skip_json_ws(json, next_offset) + local char = json:sub(offset, offset) + if char == ']' then + return nil, 'path not found' + end + if char ~= ',' then + return nil, 'expected array separator' + end + + index = index + 1 + offset = skip_json_ws(json, offset + 1) + end + + return nil, 'unterminated array' +end + +local function json_value_start_for_path(rapidjson, json, path) + local ok, steps_or_err = tokenize_path(path) + if not ok then + return nil, steps_or_err + end + + local offset = skip_json_ws(json, 1) + for _, step in ipairs(steps_or_err) do + if step.kind == 'field' then + if json:sub(offset, offset) ~= '{' then + return nil, 'path not found' + end + + local value_start, err = + json_object_member_value_start(rapidjson, json, offset, step.key) + if value_start == nil then + return nil, err + end + + offset = skip_json_ws(json, value_start) + else + if json:sub(offset, offset) ~= '[' then + return nil, 'path not found' + end + + local value_start, err = json_array_value_start(json, offset, step.index) + if value_start == nil then + return nil, err + end + + offset = skip_json_ws(json, value_start) + end + end + + return offset +end + +local function validate_key_order(rapidjson, json, object_entry) + local object_start, err = json_value_start_for_path(rapidjson, json, object_entry.path) + if object_start == nil then + return false, string.format( + 'key order path lookup failed at %s: %s', + object_entry.path, + err + ) + end + + local members, parse_err = json_object_members(json, object_start) + if members == nil then + return false, string.format( + 'key order object lookup failed at %s: %s', + object_entry.path, + parse_err + ) + end + + for index, key in ipairs(object_entry.keys or {}) do + local member = members[index] + local token = rapidjson.encode(key) + + if member == nil or member.token ~= token then + return false, string.format( + 'key order mismatch at %s: expected key %s at position %d', + object_entry.path, + key, + index + ) + end + end + + return true +end + +local function loaded_null(value, rapidjson) + if rapidjson and rapidjson.null ~= nil and value == rapidjson.null then + return true + end + + local loaded = package.loaded.rapidjson + return loaded ~= nil and loaded.null ~= nil and value == loaded.null +end + +local function dump_string(value) + local truncated = value + if #truncated > 120 then + truncated = truncated:sub(1, 117) .. '...' + end + + return string.format('%q', truncated) +end + +local function dump_value_inner(value, rapidjson, depth, seen) + if loaded_null(value, rapidjson) then + return 'null' + end + + local value_type = type(value) + if value_type == 'string' then + return dump_string(value) + end + if value_type == 'number' or value_type == 'boolean' or value_type == 'nil' then + return tostring(value) + end + if value_type ~= 'table' then + return '<' .. value_type .. ':' .. tostring(value) .. '>' + end + + if seen[value] then + return '' + end + if depth >= 5 then + return is_json_array(value) and '[...]' or '{...}' + end + + seen[value] = true + + local parts = {} + if is_json_array(value) then + local limit = math.min(#value, 12) + for index = 1, limit do + parts[#parts + 1] = dump_value_inner(value[index], rapidjson, depth + 1, seen) + end + if #value > limit then + parts[#parts + 1] = '...' + end + seen[value] = nil + return '[' .. table.concat(parts, ',') .. ']' + end + + local keys = string_keys(value) + local limit = math.min(#keys, 12) + for index = 1, limit do + local key = keys[index] + parts[#parts + 1] = + dump_string(key) .. '=' .. dump_value_inner(value[key], rapidjson, depth + 1, seen) + end + if #keys > limit then + parts[#parts + 1] = '...' + end + + seen[value] = nil + return '{' .. table.concat(parts, ',') .. '}' +end + +function M.dump_value(value) + return dump_value_inner(value, nil, 1, {}) +end + +local function dump_full_string(value) + return string.format('%q', value) +end + +local dump_full_value_inner + +local function dump_full_table(value, rapidjson, seen) + if seen[value] then + return '' + end + + seen[value] = true + + local parts = {} + if is_json_array(value) then + for index = 1, #value do + parts[#parts + 1] = dump_full_value_inner(value[index], rapidjson, seen) + end + + seen[value] = nil + return '[' .. table.concat(parts, ',') .. ']' + end + + local keys = string_keys(value) + for _, key in ipairs(keys) do + parts[#parts + 1] = + dump_full_string(key) .. '=' .. dump_full_value_inner(value[key], rapidjson, seen) + end + + seen[value] = nil + return '{' .. table.concat(parts, ',') .. '}' +end + +function dump_full_value_inner(value, rapidjson, seen) + if loaded_null(value, rapidjson) then + return 'null' + end + + local value_type = type(value) + if value_type == 'string' then + return dump_full_string(value) + end + if value_type == 'number' or value_type == 'boolean' or value_type == 'nil' then + return tostring(value) + end + if value_type ~= 'table' then + return '<' .. value_type .. ':' .. tostring(value) .. '>' + end + + return dump_full_table(value, rapidjson, seen) +end + +function M.dump_full_value(value, rapidjson) + return dump_full_value_inner(value, rapidjson, {}) +end + +local SAMPLE_LABEL_WIDTH = #'encoded_json_sort_keys=' + +local function format_sample_value(label, value) + return label .. string.rep(' ', SAMPLE_LABEL_WIDTH - #label) .. tostring(value or '') +end + +function M.format_sample(details) + details = details or {} + + local case = details.case or {} + local value = details.value + if value == nil then + value = case.value + end + + local lines = { + 'FUZZ SAMPLE', + 'seed=' .. tostring(details.seed or '?'), + 'worker=' .. tostring(details.worker or details.worker_id or '?'), + 'elapsed=' .. tostring(details.elapsed or '?') .. 's', + 'case=' .. tostring(details.case_id or case.id or '?'), + 'kind=' .. tostring(details.kind or case.kind or '?'), + 'schema=' .. tostring(details.schema or case.schema or '?'), + format_sample_value('input_lua=', M.dump_full_value(value, details.rapidjson)), + format_sample_value('raw_json_unsorted=', details.raw_json_unsorted), + format_sample_value('encoded_json_sort_keys=', details.encoded_json_sort_keys), + } + + return table.concat(lines, '\n') +end + +function M.format_failure(details) + details = details or {} + + local case = details.case or {} + local case_id = details.case_id or case.id or '?' + local kind = details.kind or case.kind or '?' + local schema = details.schema or case.schema or '?' + local value = details.value + if value == nil then + value = case.value + end + + local lines = { + 'FUZZ FAILURE', + 'seed=' .. tostring(details.seed or '?'), + 'worker=' .. tostring(details.worker or details.worker_id or '?'), + 'case=' .. tostring(case_id), + 'kind=' .. tostring(kind), + 'schema=' .. tostring(schema), + 'reason=' .. tostring(details.reason or '?'), + 'value=' .. M.dump_value(value), + } + + if details.json ~= nil then + lines[#lines + 1] = 'json=' .. tostring(details.json) + end + + return table.concat(lines, '\n') +end + +local function numbers_equal(expected, actual) + local delta = math.abs(expected - actual) + local scale = math.max(1, math.abs(expected), math.abs(actual)) + + return delta <= scale * 1e-12 +end + +function M.validate_encoded_case(rapidjson, case, json) + local ok, decoded, decode_err = pcall(rapidjson.decode, json) + if not ok then + return false, 'decode failed: ' .. tostring(decoded) + end + if decoded == nil then + return false, 'decode failed: ' .. tostring(decode_err or 'nil result') + end + + local expected = case.expected or {} + if expected.top_level_kind ~= nil and + not matches_expected_kind(decoded, expected.top_level_kind, rapidjson) then + return false, string.format( + 'top-level kind mismatch: expected %s got %s', + expected.top_level_kind, + decoded_kind(decoded, rapidjson) + ) + end + + for _, entry in ipairs(expected.objects or {}) do + local found, value, err = lookup_path(decoded, entry.path) + if not found then + return false, err + end + + if not matches_expected_kind(value, 'object', rapidjson) then + return false, string.format( + 'object kind mismatch at %s: got %s', + entry.path, + decoded_kind(value, rapidjson) + ) + end + + local actual_keys = string_keys(value) + if #actual_keys ~= entry.key_count then + return false, string.format( + 'object key count mismatch at %s: expected %d got %d', + entry.path, + entry.key_count, + #actual_keys + ) + end + + for index, key in ipairs(entry.keys or {}) do + if actual_keys[index] ~= key then + return false, string.format( + 'object keys mismatch at %s: expected %s got %s', + entry.path, + format_keys(entry.keys or {}), + format_keys(actual_keys) + ) + end + end + + local ordered, order_err = validate_key_order(rapidjson, json, entry) + if not ordered then + return false, order_err + end + end + + for _, entry in ipairs(expected.arrays or {}) do + local found, value, err = lookup_path(decoded, entry.path) + if not found then + return false, err + end + + if not matches_expected_kind(value, 'array', rapidjson, entry.length) then + return false, string.format( + 'array kind mismatch at %s: got %s', + entry.path, + decoded_kind(value, rapidjson) + ) + end + + if #value ~= entry.length then + return false, string.format( + 'array length mismatch at %s: expected %d got %d', + entry.path, + entry.length, + #value + ) + end + end + + for _, entry in ipairs(expected.scalars or {}) do + local found, value, err = lookup_path(decoded, entry.path) + if not found then + return false, err + end + + local actual_kind = scalar_metadata(value, rapidjson) + if actual_kind ~= entry.kind then + return false, string.format( + 'scalar kind mismatch at %s: expected %s got %s', + entry.path, + entry.kind, + actual_kind + ) + end + + if entry.kind == 'null' then + if value ~= json_null(rapidjson) then + return false, 'scalar value mismatch at ' .. entry.path .. ': expected null' + end + elseif entry.kind == 'float' then + if not numbers_equal(entry.value, value) then + return false, string.format( + 'scalar value mismatch at %s: expected %s got %s', + entry.path, + dump_value_inner(entry.value, rapidjson, 1, {}), + dump_value_inner(value, rapidjson, 1, {}) + ) + end + elseif value ~= entry.value then + return false, string.format( + 'scalar value mismatch at %s: expected %s got %s', + entry.path, + dump_value_inner(entry.value, rapidjson, 1, {}), + dump_value_inner(value, rapidjson, 1, {}) + ) + end + end + + return true, nil +end + +local function track_object(expected, path, value) + local keys = string_keys(value) + + expected.objects[#expected.objects + 1] = { + path = path_value(path), + key_count = #keys, + keys = keys, + } +end + +local function track_array(expected, path, value) + expected.arrays[#expected.arrays + 1] = { + path = path_value(path), + length = #value, + } +end + +local function track_scalar(expected, path, value, rapidjson) + local kind = scalar_metadata(value, rapidjson) + local entry = { + path = path_value(path), + kind = kind, + } + + if kind ~= 'null' then + entry.value = value + end + + expected.scalars[#expected.scalars + 1] = entry +end + +-- Schema shells track selected scalars only. +-- Recursive-core metadata is exhaustive for that generated core. +local function base_expected(top_level_kind) + return { + top_level_kind = top_level_kind, + objects = {}, + arrays = {}, + scalars = {}, + } +end + +local function collect_random_metadata(expected, value, path, rapidjson) + local stats = { + max_depth = 0, + object_count = 0, + array_count = 0, + scalar_count = 0, + } + + local function visit(node, current_path, depth) + if depth > stats.max_depth then + stats.max_depth = depth + end + + if type(node) ~= 'table' or is_json_null(node, rapidjson) then + stats.scalar_count = stats.scalar_count + 1 + track_scalar(expected, current_path, node, rapidjson) + return + end + + if is_json_array(node) then + stats.array_count = stats.array_count + 1 + track_array(expected, current_path, node) + + for i = 1, #node do + visit(node[i], current_path .. '[' .. tostring(i) .. ']', depth + 1) + end + + return + end + + stats.object_count = stats.object_count + 1 + track_object(expected, current_path, node) + + local keys = string_keys(node) + for _, key in ipairs(keys) do + visit(node[key], current_path .. '.' .. key, depth + 1) + end + end + + visit(value, path_value(path), 1) + + if expected.random == nil then + expected.random = { + max_depth = 0, + object_count = 0, + array_count = 0, + scalar_count = 0, + } + end + + if stats.max_depth > expected.random.max_depth then + expected.random.max_depth = stats.max_depth + end + expected.random.object_count = expected.random.object_count + stats.object_count + expected.random.array_count = expected.random.array_count + stats.array_count + expected.random.scalar_count = expected.random.scalar_count + stats.scalar_count + + return stats +end + +local function collect_payload_metadata(expected, payload, path, rapidjson) + track_object(expected, path, payload) + track_object(expected, path .. '.empty_object', payload.empty_object) + track_array(expected, path .. '.empty_array', payload.empty_array) + track_object(expected, path .. '.scalar_samples', payload.scalar_samples) + track_scalar(expected, path .. '.scalar_samples.boolean', payload.scalar_samples.boolean, rapidjson) + track_scalar( + expected, + path .. '.scalar_samples.empty_string', + payload.scalar_samples.empty_string, + rapidjson + ) + track_scalar(expected, path .. '.scalar_samples.float', payload.scalar_samples.float, rapidjson) + track_scalar(expected, path .. '.scalar_samples.integer', payload.scalar_samples.integer, rapidjson) + track_scalar( + expected, + path .. '.scalar_samples.null_value', + payload.scalar_samples.null_value, + rapidjson + ) + track_scalar(expected, path .. '.scalar_samples.string', payload.scalar_samples.string, rapidjson) + collect_random_metadata(expected, payload.random, path .. '.random', rapidjson) +end + +local function build_llm_response(rng, rapidjson) + local value = { + id = 'chatcmpl-' .. tostring(rng:int(100000, 999999)), + object = 'chat.completion', + created = 1700000000 + rng:int(1, 100000), + model = 'fuzz-model-' .. tostring(rng:int(1, 7)), + choices = { + { + index = 0, + message = { + role = 'assistant', + content = random_string(rng), + }, + finish_reason = rng:choice({ 'stop', 'length', 'tool_calls' }), + }, + }, + usage = { + prompt_tokens = rng:int(1, 4096), + completion_tokens = rng:int(1, 4096), + total_tokens = 0, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + value.usage.total_tokens = value.usage.prompt_tokens + value.usage.completion_tokens + + local expected = base_expected('object') + track_object(expected, '$', value) + track_array(expected, '$.choices', value.choices) + track_object(expected, '$.choices[1]', value.choices[1]) + track_object(expected, '$.choices[1].message', value.choices[1].message) + track_object(expected, '$.usage', value.usage) + track_scalar(expected, '$.id', value.id, rapidjson) + track_scalar(expected, '$.model', value.model, rapidjson) + track_scalar(expected, '$.choices[1].message.content', value.choices[1].message.content, rapidjson) + track_scalar(expected, '$.usage.total_tokens', value.usage.total_tokens, rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_github_issue(rng, rapidjson) + local value = { + number = rng:int(1, 25000), + title = 'Issue: ' .. random_string(rng), + state = rng:choice({ 'open', 'closed' }), + locked = rng:bool(), + user = { + login = 'user-' .. tostring(rng:int(1, 9999)), + id = rng:int(1, 999999), + type = rng:choice({ 'User', 'Bot' }), + }, + labels = { + { name = 'bug', color = 'd73a4a' }, + { name = 'fuzz', color = '5319e7' }, + }, + assignees = { + { + login = 'maintainer-' .. tostring(rng:int(1, 99)), + id = rng:int(1, 999999), + }, + }, + comments = rng:int(0, 1000), + reactions = { + total_count = rng:int(0, 1000), + plus_one = rng:int(0, 250), + heart = rng:int(0, 250), + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.user', value.user) + track_array(expected, '$.labels', value.labels) + track_object(expected, '$.labels[1]', value.labels[1]) + track_object(expected, '$.labels[2]', value.labels[2]) + track_array(expected, '$.assignees', value.assignees) + track_object(expected, '$.assignees[1]', value.assignees[1]) + track_object(expected, '$.reactions', value.reactions) + track_scalar(expected, '$.number', value.number, rapidjson) + track_scalar(expected, '$.title', value.title, rapidjson) + track_scalar(expected, '$.state', value.state, rapidjson) + track_scalar(expected, '$.locked', value.locked, rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_social_feed(rng, rapidjson) + local value = { + feed_id = 'feed-' .. tostring(rng:int(1000, 9999)), + generated_at = '2026-05-' .. tostring(rng:int(10, 29)) .. 'T12:00:00Z', + viewer = { + id = rng:int(1, 99999), + handle = 'viewer-' .. tostring(rng:int(1, 999)), + premium = rng:bool(), + }, + posts = { + { + id = 'post-' .. tostring(rng:int(1, 999999)), + body = random_string(rng), + author = { + handle = 'author-' .. tostring(rng:int(1, 999)), + verified = rng:bool(), + }, + media = { + { + type = 'image', + url = 'https://example.test/media/' .. tostring(rng:int(1, 9999)), + }, + }, + reactions = { + likes = rng:int(0, 10000), + reposts = rng:int(0, 10000), + }, + }, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.viewer', value.viewer) + track_array(expected, '$.posts', value.posts) + track_object(expected, '$.posts[1]', value.posts[1]) + track_object(expected, '$.posts[1].author', value.posts[1].author) + track_array(expected, '$.posts[1].media', value.posts[1].media) + track_object(expected, '$.posts[1].media[1]', value.posts[1].media[1]) + track_object(expected, '$.posts[1].reactions', value.posts[1].reactions) + track_scalar(expected, '$.feed_id', value.feed_id, rapidjson) + track_scalar(expected, '$.posts[1].body', value.posts[1].body, rapidjson) + track_scalar(expected, '$.viewer.premium', value.viewer.premium, rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_paginated_list(rng, rapidjson) + local value = { + page = rng:int(1, 50), + per_page = rng:choice({ 10, 25, 50, 100 }), + total = rng:int(100, 10000), + has_next = rng:bool(), + links = { + self = '/v1/items?page=1', + next = '/v1/items?page=2', + previous = json_null(rapidjson), + }, + items = { + { + id = rng:int(1, 999999), + name = random_string(rng), + active = rng:bool(), + attributes = { + rank = rng:int(1, 100), + score = random_float(rng), + }, + }, + { + id = rng:int(1, 999999), + name = random_string(rng), + active = rng:bool(), + attributes = { + rank = rng:int(1, 100), + score = random_float(rng), + }, + }, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.links', value.links) + track_array(expected, '$.items', value.items) + track_object(expected, '$.items[1]', value.items[1]) + track_object(expected, '$.items[1].attributes', value.items[1].attributes) + track_object(expected, '$.items[2]', value.items[2]) + track_object(expected, '$.items[2].attributes', value.items[2].attributes) + track_scalar(expected, '$.page', value.page, rapidjson) + track_scalar(expected, '$.per_page', value.per_page, rapidjson) + track_scalar(expected, '$.has_next', value.has_next, rapidjson) + track_scalar(expected, '$.links.previous', value.links.previous, rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local function build_metadata_config(rng, rapidjson) + local value = { + version = 'v' .. tostring(rng:int(1, 9)) .. '.' .. tostring(rng:int(0, 20)), + environment = rng:choice({ 'dev', 'staging', 'prod' }), + flags = { + beta = rng:bool(), + strict = rng:bool(), + audit = rng:bool(), + }, + limits = { + requests_per_minute = rng:int(1, 10000), + burst = rng:int(1, 1000), + timeout_seconds = rng:int(1, 120), + }, + tags = { + 'json', + 'encode', + 'fuzz', + }, + rules = { + { + name = 'required-metadata', + enabled = true, + threshold = random_float(rng), + }, + { + name = 'optional-overrides', + enabled = rng:bool(), + threshold = random_float(rng), + }, + }, + fuzz = generate_random_payload(rng, rapidjson), + } + + local expected = base_expected('object') + track_object(expected, '$', value) + track_object(expected, '$.flags', value.flags) + track_object(expected, '$.limits', value.limits) + track_array(expected, '$.tags', value.tags) + track_array(expected, '$.rules', value.rules) + track_object(expected, '$.rules[1]', value.rules[1]) + track_object(expected, '$.rules[2]', value.rules[2]) + track_scalar(expected, '$.version', value.version, rapidjson) + track_scalar(expected, '$.environment', value.environment, rapidjson) + track_scalar(expected, '$.flags.strict', value.flags.strict, rapidjson) + track_scalar(expected, '$.limits.requests_per_minute', value.limits.requests_per_minute, rapidjson) + collect_payload_metadata(expected, value.fuzz, '$.fuzz', rapidjson) + + return value, expected +end + +local SCHEMA_BUILDERS = { + llm_response = build_llm_response, + github_issue = build_github_issue, + social_feed = build_social_feed, + paginated_list = build_paginated_list, + metadata_config = build_metadata_config, +} + +function M.generate_case(rng, case_id, rapidjson) + case_id = case_id or 1 + rng = rng or M.new_rng(case_id) + json_null(rapidjson) + + local kind = CASE_KINDS[((case_id - 1) % #CASE_KINDS) + 1] + + if kind == 'schema_guided' then + local schema_index = (math.floor((case_id - 1) / #CASE_KINDS) % #SCHEMA_FAMILIES) + 1 + local schema = SCHEMA_FAMILIES[schema_index] + local value, expected = SCHEMA_BUILDERS[schema](rng, rapidjson) + + return { + id = case_id, + kind = kind, + schema = schema, + value = value, + expected = expected, + } + end + + local value = generate_random_payload(rng, rapidjson) + value.case_id = case_id + + local expected = base_expected('object') + track_scalar(expected, '$.case_id', value.case_id, rapidjson) + collect_payload_metadata(expected, value, '$', rapidjson) + + return { + id = case_id, + kind = kind, + schema = 'recursive_random', + value = value, + expected = expected, + } +end + +function M.env_from_args(args) + local env = {} + for _, arg in ipairs(args or {}) do + local key, value = string.match(arg, '^([%w_]+)=(.*)$') + if key then + env[key] = value + end + end + return env +end + +return M