diff --git a/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py b/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py index ac1bee702..73da62010 100644 --- a/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py +++ b/packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py @@ -174,7 +174,7 @@ def parameters(): @pytest.fixture def maketgg(monkeypatch, parameters): - def inner(target_tasks=None, kinds=None, params=None, enable_verifications=True): + def inner(target_tasks=None, kinds=None, params=None, enable_verifications=True, cached_params={}, cached_graphs={}): kinds = kinds or [("_fake", [])] params = params or {} FakeKind.loaded_kinds = [] @@ -196,7 +196,7 @@ def target_tasks_method(full_task_graph, parameters, graph_config): monkeypatch.setattr(generator, "load_graph_config", fake_load_graph_config) return WithFakeKind( - "/root", parameters, enable_verifications=enable_verifications + "/root", parameters, enable_verifications=enable_verifications, cached_params=cached_params, cached_graphs=cached_graphs ) return inner diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py index a0c5381a8..dc885f02b 100644 --- a/src/taskgraph/decision.py +++ b/src/taskgraph/decision.py @@ -3,10 +3,12 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. +from functools import cache import logging import os import pathlib import shutil +import sys import time from pathlib import Path @@ -64,7 +66,7 @@ def full_task_graph_to_runnable_tasks(full_task_json): return runnable_tasks -def taskgraph_decision(options, parameters=None): +def taskgraph_decision(options, parameters=None, cache_dir=None): """ Run the decision task. This function implements `mach taskgraph decision`, and is responsible for @@ -104,6 +106,35 @@ def taskgraph_decision(options, parameters=None): decision_task_id = os.environ["TASK_ID"] + cached_results = {} + if cache_dir: + # TODO: don't load irrelevant files + # TODO: ensure that we only use cached results if we have cached results + # for all prior steps + for name in os.listdir(cache_dir): + if name == "graph_config": + pass + elif name == "parameters": + cached_results[name] = Parameters(**load_yaml(cache_dir, name)) + elif name == "kind_graph": + pass + elif name == "full_task_set": + # TODO: we should be able to get this from `full_task_graph`; don't need to load it separately + cached_results[name] = TaskGraph.from_json(json.load(open(os.path.join(cache_dir, name))))[0] + elif name == "full_task_graph": + cached_results[name] = TaskGraph.from_json(json.load(open(os.path.join(cache_dir, name))))[1] + elif name == "target_task_set": + # derivable from target_task_graph? + cached_results[name] = TaskGraph.from_json(json.load(open(os.path.join(cache_dir, name)))) + elif name == "target_task_graph": + cached_results[name] = TaskGraph.from_json(json.load(open(os.path.join(cache_dir, name)))) + elif name == "optimized_task_graph": + cached_results[name] = TaskGraph.from_json(json.load(open(os.path.join(cache_dir, name)))) + elif name == "label_to_taskid": + cached_results[name] = json.load(open(os.path.join(cache_dir, name))) + elif name == "morphed_task_graph": + cached_results[name] = TaskGraph.from_json(json.load(open(os.path.join(cache_dir, name)))) + # create a TaskGraphGenerator instance tgg = TaskGraphGenerator( root_dir=options.get("root"), @@ -111,6 +142,7 @@ def taskgraph_decision(options, parameters=None): decision_task_id=decision_task_id, write_artifacts=True, enable_verifications=options.get("verify", True), + initial_results=cached_results, ) # write out the parameters used to generate this graph @@ -126,6 +158,7 @@ def taskgraph_decision(options, parameters=None): full_task_json = tgg.full_task_graph.to_json() write_artifact("full-task-graph.json", full_task_json) + sys.exit(1) # write out the public/runnable-jobs.json file write_artifact( "runnable-jobs.json", full_task_graph_to_runnable_tasks(full_task_json) diff --git a/src/taskgraph/generator.py b/src/taskgraph/generator.py index a944a8b02..5579169b0 100644 --- a/src/taskgraph/generator.py +++ b/src/taskgraph/generator.py @@ -144,6 +144,8 @@ def __init__( decision_task_id: str = "DECISION-TASK", write_artifacts: bool = False, enable_verifications: bool = True, + cached_params: dict = {}, + cached_graphs: dict[str, TaskGraph] = {}, ): """ @param root_dir: root directory containing the Taskgraph config.yml file @@ -162,6 +164,28 @@ def __init__( # start the generator self._run = self._run() # type: ignore self._run_results = {} + # TODO: should we require all earlier results cached to cache a later result? + # this would mean that we would be required to cache and load graph_config, kind_graph + # the argument against it is that we still do things like, eg: load_kinds even + # when we use a cached full task graph + # it probably makes sense to have this requirement strictly for graphs, at least? + if cached_params: + self._run_results["parameters"] = cached_params + for k, v in cached_graphs.items(): + if k == "full_task_graph": + # full task set is always the full task graph with the edges removed + self._run_results["full_task_set"] = TaskGraph(v.tasks, Graph(frozenset(v.tasks), frozenset())) + self._run_results["full_task_graph"] = v + elif k == "target_task_graph": + # target task set is always the full task graph with the edges removed + self._run_results["target_task_set"] = TaskGraph(v.tasks, Graph(frozenset(v.tasks), frozenset())) + self._run_results["target_task_graph"] = v + elif k == "optimized_task_graph": + self._run_results["optimized_task_graph"] = v + elif k == "morphed_task_graph": + self._run_results["morphed_task_graph"] = v + else: + raise ValueError(f"cached graph {k} not supported") @property def parameters(self): @@ -564,7 +588,9 @@ def _run_until(self, name): k, v = next(self._run) # type: ignore except StopIteration: raise AttributeError(f"No such run result {name}") - self._run_results[k] = v + # might have been in `cached_results` + if k not in self._run_results: + self._run_results[k] = v return self._run_results[name] def verify(self, name, *args, **kwargs): diff --git a/src/taskgraph/main.py b/src/taskgraph/main.py index 18a3633db..2d9adac82 100644 --- a/src/taskgraph/main.py +++ b/src/taskgraph/main.py @@ -936,7 +936,10 @@ def load_task(args): def decision(options): from taskgraph.decision import taskgraph_decision # noqa: PLC0415 - taskgraph_decision(options) + # TODO: add parameter that instructs us to go fetch cached artifacts from + # elsewhere, eg: an index + cache_dir = "/home/bhearsum/tmp/2026-01-07/tgcache" + taskgraph_decision(options, cache_dir=cache_dir) @command("actions", help="Print the rendered actions.json") diff --git a/test/test_generator.py b/test/test_generator.py index 783073b21..c36ba5512 100644 --- a/test/test_generator.py +++ b/test/test_generator.py @@ -7,11 +7,13 @@ from concurrent.futures import ProcessPoolExecutor import pytest -from pytest_taskgraph import WithFakeKind, fake_load_graph_config +from pytest_taskgraph import WithFakeKind, fake_load_graph_config, make_task from taskgraph import generator, graph from taskgraph.generator import Kind, load_tasks_for_kind, load_tasks_for_kinds from taskgraph.loader.default import loader as default_loader +from taskgraph.parameters import Parameters +from taskgraph.taskgraph import TaskGraph linuxonly = pytest.mark.skipif( platform.system() != "Linux", @@ -386,3 +388,37 @@ def test_kind_graph_with_target_kinds(maketgg): # _fake3 and _other should not be included assert "_fake3" not in kind_graph.nodes assert "_other" not in kind_graph.nodes + + +def test_cached_results(maketgg): + """Initial results are returned instead of regenerating parts of the taskgraph""" + fake1 = make_task("fake1", kind="_fake1") + fake2 = make_task("fake2", kind="_fake2", dependencies={"fake1": "fake1"}) + fake3 = make_task("fake3", kind="_fake3", dependencies={"fake1": "fake1", "fake2": "fake2"}) + tasks, full_task_graph = TaskGraph.from_json({ + "fake1": fake1.to_json(), + "fake2": fake2.to_json(), + "fake3": fake3.to_json(), + }) + tgg = maketgg( + target_tasks=["fake1", "fake2", "fake3"], + kinds=[ + ("_fake3", {"kind-dependencies": ["_fake2", "_fake1"]}), + ("_fake2", {"kind-dependencies": ["_fake1"]}), + ("_fake1", {"kind-dependencies": []}), + ], + cached_graphs={"full_task_graph": full_task_graph}, + ) + assert tgg.full_task_set == TaskGraph(tasks, graph.Graph(frozenset(tasks), frozenset())) + assert tgg.full_task_graph == full_task_graph + # ensure we get different results when not using cached_results + tgg2 = maketgg( + target_tasks=["fake1", "fake2", "fake3"], + kinds=[ + ("_fake3", {"kind-dependencies": ["_fake2", "_fake1"]}), + ("_fake2", {"kind-dependencies": ["_fake1"]}), + ("_fake1", {"kind-dependencies": []}), + ], + ) + assert tgg.full_task_graph != tgg2.full_task_graph + tgg.target_task_graph