Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 95 additions & 7 deletions build_ast_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from __future__ import annotations

import argparse
import contextlib
import hashlib
import json
import logging
Expand Down Expand Up @@ -84,6 +85,53 @@ def _verbose_stderr_line(content: str) -> None:
print(content, file=sys.stderr, flush=True)


def _emit_graph_progress(parts: dict[str, object], *, verbose: bool) -> None:
"""Emit one ``JCIRAG_PROGRESS kind=graph …`` line to stderr (gated by verbose).

The parent process (``pipeline.run_build_ast_graph`` /
``run_incremental_graph``) passes ``--verbose`` in default AND verbose modes
(only suppressed for ``--quiet``), so this structured progress surfaces in
default mode (where the parent renders it) and verbose mode (raw relay). In
``--quiet`` the builder is never invoked with ``--verbose`` so nothing is
emitted. Field order is fixed so the parser and tests can pin substrings.
"""
if not verbose:
return
fields = ["kind=graph"]
for key in ("pass", "done", "total", "status", "elapsed_s"):
if key in parts:
fields.append(f"{key}={parts[key]}")
line = "JCIRAG_PROGRESS " + " ".join(fields)
_verbose_stderr_line(line)


# Pass-1 per-file tick cadence: bound stderr volume on huge trees without making
# the bar feel stale. A final tick on pass completion carries status=done.
_PASS1_TICK_EVERY = 25


@contextlib.contextmanager
def _graph_pass_progress(pass_label: str, *, verbose: bool):
"""Emit ``pass=N/6 status=running`` on entry and ``status=done elapsed_s=…``
on exit for passes 2–6 (each advances the rendered bar by 1/6).

Usage: ``with _graph_pass_progress("2/6", verbose=verbose): …``
"""
if not verbose:
yield
return
_emit_graph_progress({"pass": pass_label, "status": "running"}, verbose=verbose)
t0 = time.time()
try:
yield
finally:
elapsed = time.time() - t0
_emit_graph_progress(
{"pass": pass_label, "status": "done", "elapsed_s": f"{elapsed:.2f}"},
verbose=verbose,
)


class _VerbosePassHeartbeats:
"""Emit ``[tag] running … Ns elapsed`` every 5s on stderr while in scope (verbose only)."""

Expand Down Expand Up @@ -837,21 +885,50 @@ def _register_type(
return entry


def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool, scope_files: set[str] | None = None) -> dict[str, JavaFileAst]:
def pass1_parse(
root: Path,
tables: GraphTables,
*,
verbose: bool,
scope_files: set[str] | None = None,
removed_files: set[str] | None = None,
) -> dict[str, JavaFileAst]:
"""Walk files, parse them, populate node indexes. Returns path -> AST.

Args:
root: Source root directory.
tables: GraphTables to populate.
verbose: Whether to emit progress output.
scope_files: Optional set of relative POSIX paths to parse. If None, parse all files.
removed_files: Optional set of relative POSIX paths that no longer exist
on disk (incremental deletions). These are members of ``scope_files``
(they were deleted, so they participate in scoped deletion) but are
never visited by the parse walk, so they must be excluded from the
pass-1 total to keep ``done`` from undercounting then two-way-clamping.
"""
asts: dict[str, JavaFileAst] = {}
ignore = LayeredIgnore(root)
t0 = time.time()
n_files = 0
if verbose:
_verbose_stderr_line(_PASS1_START)
# Count-first: one filtered walk (no parsing) to set the EXACT total before
# the parse loop ticks. Single-layer ignore → the count is exact, so the
# rendered bar is determinate. For a scoped (incremental) parse the total is
# the number of files that will actually be visited: scope minus any removed
# files (which are members of scope for deletion but gone from disk, so the
# parse walk never ticks them); for a full rebuild it is the non-ignored
# .java count.
if verbose:
if scope_files is not None:
removed = removed_files if removed_files is not None else set()
pass1_total = len(scope_files - removed)
else:
pass1_total = sum(1 for _ in iter_java_source_files(root, ignore=ignore))
_emit_graph_progress(
{"pass": "1/6", "done": 0, "total": pass1_total, "status": "running"},
verbose=verbose,
)
slow_sec = 0.0
raw_slow = os.environ.get("JAVA_CODEBASE_RAG_TEST_GRAPH_SLOW_SEC", "").strip()
if raw_slow:
Expand All @@ -871,6 +948,11 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool, scope_files:
if scope_files is not None and rel not in scope_files:
continue
n_files += 1
if verbose and (n_files % _PASS1_TICK_EVERY == 0):
_emit_graph_progress(
{"pass": "1/6", "done": n_files, "status": "running"},
verbose=verbose,
)
try:
content = p.read_bytes()
except OSError:
Expand Down Expand Up @@ -906,6 +988,10 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool, scope_files:

if verbose:
elapsed = time.time() - t0
_emit_graph_progress(
{"pass": "1/6", "done": n_files, "status": "done", "elapsed_s": f"{elapsed:.2f}"},
verbose=verbose,
)
_verbose_stderr_line(
f"[graph] pass 1 · parsed {n_files} files in {elapsed:.2f}s: "
f"{len(tables.types)} types, {len(tables.members)} members, "
Expand Down Expand Up @@ -1145,7 +1231,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
seen_inj: set[tuple[str, str, str, str]] = set()
if verbose:
_verbose_stderr_line(_PASS2_START)
with _VerbosePassHeartbeats("[graph] pass 2", verbose=verbose):
with _graph_pass_progress("2/6", verbose=verbose), _VerbosePassHeartbeats("[graph] pass 2", verbose=verbose):
for fqn, entry in tables.types.items():
ast = asts.get(entry.file_path)
if ast is None:
Expand Down Expand Up @@ -1818,7 +1904,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
_verbose_stderr_line(_PASS3_START)
_build_member_indexes(tables)
stats = CallResolutionStats()
with _VerbosePassHeartbeats("[graph] pass 3", verbose=verbose):
with _graph_pass_progress("3/6", verbose=verbose), _VerbosePassHeartbeats("[graph] pass 3", verbose=verbose):
for rel_path, file_ast in asts.items():
try:
_process_file_calls(file_ast, rel_path, tables, stats)
Expand Down Expand Up @@ -1972,7 +2058,7 @@ def pass4_routes(
meta_chain = collect_annotation_meta_chain(prs)
if verbose:
_verbose_stderr_line(_PASS4_START)
with _VerbosePassHeartbeats("[graph] pass 4", verbose=verbose):
with _graph_pass_progress("4/6", verbose=verbose), _VerbosePassHeartbeats("[graph] pass 4", verbose=verbose):

for ast in asts.values():
stats.routes_skipped_unresolved += ast.routes_skipped_unresolved
Expand Down Expand Up @@ -2149,7 +2235,7 @@ def _phantom_async_route_id(call: OutgoingCallDecl) -> str:

if verbose:
_verbose_stderr_line(_PASS5_START)
with _VerbosePassHeartbeats("[graph] pass 5", verbose=verbose):
with _graph_pass_progress("5/6", verbose=verbose), _VerbosePassHeartbeats("[graph] pass 5", verbose=verbose):
for member in sorted(tables.members, key=lambda x: x.node_id):
if member.decl.is_constructor:
continue
Expand Down Expand Up @@ -2551,7 +2637,7 @@ def _micro_factor(member: MemberEntry | None) -> float:

if verbose:
_verbose_stderr_line(_PASS6_START)
with _VerbosePassHeartbeats("[graph] pass 6", verbose=verbose):
with _graph_pass_progress("6/6", verbose=verbose), _VerbosePassHeartbeats("[graph] pass 6", verbose=verbose):
for row in tables.http_call_rows:
if row.match != "unresolved":
continue
Expand Down Expand Up @@ -3586,7 +3672,9 @@ def incremental_rebuild(
_verbose_stderr_line("[increment] rebuilding scoped files (passes 1-4)")

tables = GraphTables()
asts = pass1_parse(source_root, tables, verbose=verbose, scope_files=scope_files)
asts = pass1_parse(
source_root, tables, verbose=verbose, scope_files=scope_files, removed_files=removed
)

# Load existing types and members for cross-file resolution (only from unchanged files)
_load_existing_types(conn, tables, exclude_files=scope_files)
Expand Down
79 changes: 67 additions & 12 deletions java_codebase_rag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,41 @@ def _run_with_pipeline_progress(
cfg: ResolvedOperatorConfig,
*,
quiet: bool,
work: Callable[[], int],
verbose: bool = False,
work: Callable[["PipelineProgress | None"], int],
) -> int:
if quiet:
return int(work())
"""Run ``work`` under the unified progress renderer (default TTY mode only).

``work`` receives a :class:`PipelineProgress` whose ``on_progress`` callback
should be forwarded to the graph/vectors pipeline helpers so their
``JCIRAG_PROGRESS`` events feed the renderer. In ``--quiet`` or ``--verbose``
mode the context is ``None`` (no Live region: quiet is silent, verbose
raw-relays subprocess output).
"""
if quiet or verbose:
return int(work(None))
from java_codebase_rag.progress import IndexProgressRenderer, ProgressEvent

# PR-2 owns the graph task only; vectors/optimize stay pending (PR-3).
phases = ["graph"]
renderer = IndexProgressRenderer(phases)
progress = PipelineProgress(renderer=renderer)

def on_progress(ev: ProgressEvent) -> None:
renderer.apply(ev)

progress.on_progress = on_progress
progress.console = renderer._console # noqa: SLF001 — shared with the drain for Live-safe routing

_pipeline_header(subcommand, cfg)
t0 = time.perf_counter()
code = 0
# start() always flips _started (the non-TTY fallback is a no-op for Live but
# still needs the flag so apply() routes to the concise-line printer). The
# TTY Live region is entered inside start() only when the console is a TTY.
renderer.start()
try:
code = int(work())
code = int(work(progress))
return code
except BaseException as exc:
# Keep footer aligned with process outcome (main maps unhandled Exception -> exit 2).
Expand All @@ -155,9 +181,26 @@ def _run_with_pipeline_progress(
code = 2
raise
finally:
renderer.stop()
_pipeline_footer(subcommand, t0, code)


class PipelineProgress:
"""Progress context handed to ``work``: the renderer + a ready ``on_progress``.

``on_progress``/``console`` are wired by :func:`_run_with_pipeline_progress`
and should be forwarded to the pipeline helpers' ``on_progress`` /
``on_progress_console`` parameters. ``console`` is the renderer's stderr
``rich.Console`` so the subprocess drain routes non-progress lines through
``console.print`` while the Live region is up (single-writer invariant).
"""

def __init__(self, *, renderer: "object | None") -> None:
self.renderer = renderer
self.on_progress: "Callable | None" = None
self.console: "object | None" = None


def _jsonable(value: Any) -> Any:
if hasattr(value, "model_dump"):
return value.model_dump()
Expand Down Expand Up @@ -266,7 +309,7 @@ def _cmd_init(args: argparse.Namespace) -> int:
return 2
cfg.index_dir.mkdir(parents=True, exist_ok=True)

def work() -> int:
def work(progress: "PipelineProgress | None") -> int:
env = cfg.subprocess_env()
verbose = bool(args.verbose)
coco = run_cocoindex_update(
Expand Down Expand Up @@ -295,6 +338,8 @@ def work() -> int:
verbose=verbose,
quiet=bool(args.quiet),
env=env,
on_progress=progress.on_progress if progress is not None else None,
on_progress_console=progress.console if progress is not None else None,
)
if g.returncode != 0:
_emit(
Expand All @@ -310,7 +355,9 @@ def work() -> int:
_emit({"success": True, "message": "init completed"})
return 0

return _run_with_pipeline_progress("init", cfg, quiet=bool(args.quiet), work=work)
return _run_with_pipeline_progress(
"init", cfg, quiet=bool(args.quiet), verbose=bool(args.verbose), work=work
)


def _cmd_increment(args: argparse.Namespace) -> int:
Expand All @@ -323,7 +370,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
if vectors_only:
_emit_increment_ladybug_warning()

def work() -> int:
def work(progress: "PipelineProgress | None") -> int:
env = cfg.subprocess_env()
coco = run_cocoindex_update(
env,
Expand Down Expand Up @@ -356,6 +403,8 @@ def work() -> int:
verbose=bool(args.verbose),
quiet=bool(args.quiet),
env=env,
on_progress=progress.on_progress if progress is not None else None,
on_progress_console=progress.console if progress is not None else None,
)

# Check if incremental fell back to full rebuild
Expand Down Expand Up @@ -389,15 +438,17 @@ def work() -> int:
_emit({"success": True, "message": "increment completed (Lance + graph updated)"})
return 0

return _run_with_pipeline_progress("increment", cfg, quiet=bool(args.quiet), work=work)
return _run_with_pipeline_progress(
"increment", cfg, quiet=bool(args.quiet), verbose=bool(args.verbose), work=work
)


def _cmd_reprocess(args: argparse.Namespace) -> int:
cfg = _resolved_from_ns(args)
_startup_hints(cfg)
cfg.apply_to_os_environ()

def work() -> int:
def work(progress: "PipelineProgress | None") -> int:
env = cfg.subprocess_env()
verbose = bool(args.verbose)
vectors_only = bool(getattr(args, "vectors_only", False))
Expand Down Expand Up @@ -443,6 +494,8 @@ def work() -> int:
verbose=verbose,
quiet=bool(args.quiet),
env=env,
on_progress=progress.on_progress if progress is not None else None,
on_progress_console=progress.console if progress is not None else None,
)
if _is_graph_preflight_blocker(g):
payload = {
Expand Down Expand Up @@ -482,7 +535,9 @@ def work() -> int:
_emit_reprocess_outcome(payload)
return _reprocess_exit_code(payload)

return _run_with_pipeline_progress("reprocess", cfg, quiet=bool(args.quiet), work=work)
return _run_with_pipeline_progress(
"reprocess", cfg, quiet=bool(args.quiet), verbose=bool(args.verbose), work=work
)


def _cmd_install(args: argparse.Namespace) -> int:
Expand Down Expand Up @@ -537,7 +592,7 @@ def _cmd_erase(args: argparse.Namespace) -> int:
print("Aborted.", file=sys.stderr)
return 2

def work() -> int:
def work(progress: "PipelineProgress | None") -> int:
env = cfg.subprocess_env()
drop = run_cocoindex_drop(env, quiet=bool(args.quiet))
if drop.returncode == 127:
Expand Down Expand Up @@ -570,7 +625,7 @@ def work() -> int:
_emit({"success": True, "message": "erase completed"})
return 0

return _run_with_pipeline_progress("erase", cfg, quiet=bool(args.quiet), work=work)
return _run_with_pipeline_progress("erase", cfg, quiet=bool(args.quiet), verbose=bool(getattr(args, "verbose", False)), work=work)


def _cmd_meta(args: argparse.Namespace) -> int:
Expand Down
8 changes: 8 additions & 0 deletions java_codebase_rag/cli_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
b'"event": "brownfield-',
b"unknown producer source strategy",
b"unknown client source strategy",
# Builder verbose heartbeats / pass banners: in default mode the renderer's
# bar subsumes these, so they must NOT also appear as raw lines above the
# Live region. --verbose raw-relay bypasses this filter and still shows them.
b"[graph] pass ",
b"[graph] scoped write ",
b"[graph] writing ",
b"[graph] done ",
b"[increment] ",
)


Expand Down
Loading
Loading