diff --git a/.agents/plans/pypi_hub_proxy_feature.md b/.agents/plans/pypi_hub_proxy_feature.md new file mode 100644 index 0000000000..1c07669160 --- /dev/null +++ b/.agents/plans/pypi_hub_proxy_feature.md @@ -0,0 +1,287 @@ +# Implementation Plan: Canonical Automatic PyPI Proxy Hub + +This document defines the locked, production-ready architectural, Starlark API, +and testing specifications for implementing dynamic PyPI dependency resolution in +`rules_python`. + +## 1. Architectural Strategy: The Canonical `@pypi` Proxy + +The `pip` bzlmod extension will automatically synthesize a canonical `@pypi` +proxy repository rule that orchestrates routing to underlying concrete hubs. + +### Bzlmod-Exclusive Scope + +The Unified PyPI Hub Proxy is an **exclusive feature of `bzlmod`**. Legacy +`WORKSPACE` evaluations using independent `pip_parse` repository macros are not +supported, as bzlmod's module extension architecture provides the required +centralized coordination to inspect and interlink cross-module hubs. + +### Automatic Proxy Construction & Collision Logic + +During the evaluation of the `pip` extension across the dependency graph: +1. **Unconditional Creation**: The extension will **always** synthesize a + proxy repository rule with the apparent name `pypi`, even if zero + `pip.parse` concrete hubs are defined in the dependency graph (in which + case the proxy is completely valid but empty). +2. **Collision Prevention**: If a user explicitly defines a concrete hub + named `pypi` (`pip.parse(hub_name = "pypi")`), the automatic proxy + synthesis is skipped so the user maintains absolute control over that + repository name. + +In `MODULE.bazel`: +```starlark +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") + +# Concrete hubs defined for different execution contexts +pip.parse(hub_name = "pypi_a", ...) +pip.parse(hub_name = "pypi_b", ...) +use_repo(pip, "pypi_a", "pypi_b") + +# The canonical proxy is automatically created unconditionally: +use_repo(pip, "pypi") +``` + +### Unified Pypi Hub + +The canonical `@pypi` proxy repository matches exactly how concrete hubs create +their directory structure: a root package for shared configuration settings, and +a dedicated subdirectory (subpackage) for each PyPI package. + +Here is a complete, representative code example of what the generated files in +`@pypi` will look like when resolving packages between `pypi_a` and `pypi_b`: + +#### 1. `@pypi//BUILD.bazel` (Root Package) +The root package contains the shared `config_setting` targets following the +`_is_pypi_hub_` private naming convention. Leading underscores are strictly +applied because these configuration settings are an internal implementation +detail of the proxy repository and are not intended to be a public API. + +```starlark +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "_is_pypi_hub_pypi_a", + flag_values = { + "@rules_python//python/config_settings:pypi_hub": "pypi_a", + }, +) + +config_setting( + name = "_is_pypi_hub_pypi_b", + flag_values = { + "@rules_python//python/config_settings:pypi_hub": "pypi_b", + }, +) +``` + +#### 2. `@pypi//foo/BUILD.bazel` (PyPI Package Subpackage) +Each PyPI package subpackage defines the standard aliases (`pkg`, `whl`, `data`, +`dist_info`, `extracted_wheel_files`), plus a complete **union of all custom +`extra_hub_aliases`** defined across all concrete hubs. + +Each alias resolves dynamically to the active concrete hub based on the root +private configuration settings: + +```starlark +package(default_visibility = ["//visibility:public"]) + +alias( + name = "foo", + actual = ":pkg", +) + +alias( + name = "pkg", + actual = select({ + "//:_is_pypi_hub_pypi_a": "@pypi_a//foo:pkg", + "//:_is_pypi_hub_pypi_b": "@pypi_b//foo:pkg", + # When pypi_hub is "auto" (unset), it defaults to the first defined + # concrete hub (or designated fallback via pip.default). + "//conditions:default": "@pypi_a//foo:pkg", + }), +) + +alias( + name = "whl", + actual = select({ + "//:_is_pypi_hub_pypi_a": "@pypi_a//foo:whl", + "//:_is_pypi_hub_pypi_b": "@pypi_b//foo:whl", + "//conditions:default": "@pypi_a//foo:whl", + }), +) + +# ... standard aliases for data, dist_info, extracted_wheel_files ... + +# 3. Unionized custom extra alias (defined in pypi_a but missing in pypi_b): +alias( + name = "my_custom_tool", + actual = select({ + "//:_is_pypi_hub_pypi_a": "@pypi_a//foo:my_custom_tool", + # Unrepresented branch routes to execution failure target: + "//:_is_pypi_hub_pypi_b": "//:_missing_package_error_pypi_b_foo", + "//conditions:default": "@pypi_a//foo:my_custom_tool", + }), +) +``` + +### Disjoint Hub Packages & Execution-Phase Failure + +If a package exists in one concrete hub but is missing in another (e.g., `scipy` +is in `pypi_b` but not `pypi_a`), our proxy synthesizes a package subpackage for +the union of all packages. + +To ensure that `bazel cquery` and `bazel query` successfully analyze over the +entire transitive build graph without failing, unrepresented select branches +must route to a dedicated **execution-phase error rule**. + +```starlark +# In @pypi//scipy/BUILD.bazel +alias( + name = "pkg", + actual = select({ + # Routes to execution-phase action failure target: + "//:_is_pypi_hub_pypi_a": "//:_missing_package_error_pypi_a_scipy", + "//:_is_pypi_hub_pypi_b": "@pypi_b//scipy:pkg", + "//conditions:default": "//:_missing_package_error_pypi_a_scipy", + }), +) +``` + +The synthesized `//:_missing_package_error_XX` rule in `@pypi//BUILD.bazel` +returns standard Starlark Python providers so analysis/cquery passes, but +registers a build action that fails when executed: + +``` +Dependency Error: Third-party package 'scipy' is not available when building under PyPI hub 'pypi_a'. +``` + +### Fallback Hub Precedence (`"auto"`) + +When a target depends on `@pypi//foo` and the active build setting is `"auto"`, +the proxy resolves to a concrete hub using the following precedence: +1. **Designated Fallback**: If the user has explicitly designated a fallback + concrete hub via `pip.default(default_hub = "...")` in their root + `MODULE.bazel`, the proxy routes to it. +2. **First Defined Hub**: If no fallback is explicitly designated via + `pip.default()`, the proxy **automatically routes to the first defined + concrete hub** parsed during extension evaluation (e.g., `pypi_a`). + +```starlark +# Optional: explicitly override the "auto" fallback hub +pip.default( + default_hub = "pypi_b", +) +``` + +## 2. Core Rule Integration: `config_settings` Transitions + +Users will switch active hubs using the standard, highly generic +`config_settings` transition attribute on executable targets. + +### Build Setting Definition + +In `python/config_settings/BUILD.bazel`: + +```starlark +string_flag( + name = "pypi_hub", + build_setting_default = "auto", # Default value is "auto" + visibility = ["//visibility:public"], +) +``` + +In `python/private/common_labels.bzl`: +```starlark + PYPI_HUB = str(Label("//python/config_settings:pypi_hub")), +``` + +In `python/private/transition_labels.bzl`: +```starlark +_BASE_TRANSITION_LABELS = [ + # ... existing transition labels ... + labels.PYPI_HUB, +] +``` + +Because `py_binary` and `py_test` implement an incoming transition +(`_transition_executable_impl`) that automatically processes any +`config_settings` keys matching `TRANSITION_LABELS`, **this provides complete +transition capabilities with zero changes to our core rule definitions**. + +### Usage in BUILD.bazel + +Libraries consume packages through the canonical proxy: + +```starlark +py_library( + name = "common", + deps = ["@pypi//foo"], # Apparent proxy repository +) +``` + +Binaries change the active hub by transitioning the build setting: + +```starlark +# Resolves @pypi -> pypi_a (first defined / designated fallback) +py_binary( + name = "bin_default", + deps = [":common"], +) + +# Resolves @pypi -> pypi_b via transition +py_binary( + name = "bin_b", + deps = [":common"], + config_settings = { + "//python/config_settings:pypi_hub": "pypi_b", + }, +) +``` + +### Analysis Cache & Memory Best Practices + +Because transitions fork the Bazel configuration, building targets with highly +diversified `config_settings` across large build graphs will result in +re-analysis and re-compilation of shared dependencies. + +We will include explicit documentation guidelines advising users to keep their +`pypi_hub` transition configurations localized and minimized to preserve Bazel +caching and memory efficiency. + +## 3. Integration Testing Specification + +We will construct a comprehensive Bazel-in-Bazel integration test suite in +`tests/integration/unified_pypi/` to guarantee correctness and verify +transitions. + +The integration test suite will assert: +1. **`"auto"` Precedence**: Author a test asserting `bazel run //:bin_default` + correctly inherits `"auto"` and resolves dependencies from the first + defined concrete hub (or designated fallback). +2. **Transitional Resolution**: Author a test asserting two binary targets in + the same package with different `config_settings` successfully resolve + dependencies and execute against their respective concrete hubs (`pypi_a` + vs `pypi_b`). +3. **Command Line Override**: Author a test asserting + `bazel run --//python/config_settings:pypi_hub=pypi_b //:bin_default` + successfully forces the executable to run using imports resolved from + `pypi_b`. +4. **Disjoint Execution Failure**: Author a test asserting `bazel cquery` over + a target depending on an unrepresented missing package succeeds, while + `bazel run` on that target gracefully fails during execution with the exact + synthesized error message. +5. **Unionized Extra Hub Aliases**: Author a test asserting that a binary + successfully runs using a custom `extra_hub_aliases` target resolved + through the `@pypi` proxy. + +## 4. Execution Steps + +1. **Phase 1**: Define `pypi_hub` `string_flag` and register it in + `common_labels.bzl` and `transition_labels.bzl`. +2. **Phase 2**: Update `python/private/pypi/extension.bzl` to synthesize the + canonical `pypi` proxy repository rule. +3. **Phase 3**: Implement `missing_package_error` execution failure rule and + the `proxy_hub_repository` generation logic. +4. **Phase 4**: Author the Bazel-in-Bazel integration test suite in + `tests/integration/unified_pypi/`. +5. **Phase 5**: Run all tests and verify full pass before PR submission. diff --git a/.bazelignore b/.bazelignore index 2cf1523aef..5c3bb7caea 100644 --- a/.bazelignore +++ b/.bazelignore @@ -35,4 +35,5 @@ tests/integration/compile_pip_requirements/bazel-compile_pip_requirements tests/integration/local_toolchains/bazel-local_toolchains tests/integration/py_cc_toolchain_registered/bazel-py_cc_toolchain_registered tests/integration/toolchain_target_settings/bazel-module_under_test +tests/integration/unified_pypi/bazel-unified_pypi tests/integration/uv_lock/bazel-uv_lock diff --git a/.bazelrc.deleted_packages b/.bazelrc.deleted_packages index 407fd1cb48..ce42333e6f 100644 --- a/.bazelrc.deleted_packages +++ b/.bazelrc.deleted_packages @@ -40,6 +40,7 @@ common --deleted_packages=tests/integration/pip_parse_isolated common --deleted_packages=tests/integration/py_cc_toolchain_registered common --deleted_packages=tests/integration/runtime_manifests common --deleted_packages=tests/integration/toolchain_target_settings +common --deleted_packages=tests/integration/unified_pypi common --deleted_packages=tests/integration/uv_lock common --deleted_packages=tests/modules/another_module common --deleted_packages=tests/modules/other diff --git a/docs/api/rules_python/python/config_settings/index.md b/docs/api/rules_python/python/config_settings/index.md index cd3cbc9829..81f11797cf 100644 --- a/docs/api/rules_python/python/config_settings/index.md +++ b/docs/api/rules_python/python/config_settings/index.md @@ -373,6 +373,20 @@ is created. ::: :::: +::::{bzl:flag} pypi_hub +Determines which PyPI repository hub is used when resolving package dependencies. + +This flag is transitioned on automatically by executable targets (`py_binary`, `py_test`) +to select the appropriate concrete PyPI hub (e.g., when fallback or disjoint packages exist across multiple hubs). + +Values: +* `auto`: (default) Resolves dependencies using the fallback or first available hub. +* ``: Explicitly forces resolution of packages from the specified concrete PyPI hub. + +:::{versionadded} VERSION_NEXT_FEATURE +::: +:::: + ## Removed Flags :::{versionremoved} 2.1.0 diff --git a/docs/pypi/download.md b/docs/pypi/download.md index f0e70cf850..c321462758 100644 --- a/docs/pypi/download.md +++ b/docs/pypi/download.md @@ -50,6 +50,73 @@ You can use the pip extension multiple times. This configuration will create multiple external repos that have no relation to one another and may result in downloading the same wheels numerous times. +(unified-pypi-hub)= +## Unified `@pypi` Hub for Multi-Hub Configurations + +:::{versionadded} VERSION_NEXT_FEATURE +Unified `@pypi` hub repository for Bzlmod multi-hub configurations. +::: + +When you call the `pip` extension multiple times with different `hub_name` +attributes, `rules_python` automatically generates a unified `@pypi` hub +repository (unless one of your concrete hubs is explicitly named `"pypi"`). + +This unified `@pypi` repository acts as a dynamic proxy that routes package +dependencies to the active concrete hub at build time. This is especially +useful in monorepos where shared library targets need to depend on PyPI +packages without knowing which specific hub or requirements lock file the +consuming binary is using. + +#### Configuring the Unified Hub + +To configure the unified hub, define your concrete hubs as usual, and +optionally designate a default hub using the `pip.default` tag's +`default_hub` attribute: + +```starlark +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") + +# Define concrete hub 'pypi_a' +pip.parse( + hub_name = "pypi_a", + python_version = "3.11", + requirements_lock = "//:requirements_a.txt", +) +use_repo(pip, "pypi_a") + +# Define concrete hub 'pypi_b' +pip.parse( + hub_name = "pypi_b", + python_version = "3.11", + requirements_lock = "//:requirements_b.txt", +) +use_repo(pip, "pypi_b") + +# Designate 'pypi_b' as the default hub for the unified '@pypi' repository +pip.default(default_hub = "pypi_b") + +# Import the unified hub repository +use_repo(pip, "pypi") +``` + +#### Dynamic Routing at Build Time + +By default, the unified `@pypi` repository will resolve packages from the +designated `default_hub`. You can dynamically switch the active hub for a build +using the `--@rules_python//python/config_settings:pypi_hub` command-line flag +or via target transitions: + +```bash +# Build using packages from 'pypi_a' +bazel build --@rules_python//python/config_settings:pypi_hub=pypi_a //my:binary +``` + +Shared library targets can simply depend on the unified hub (e.g., +`@pypi//numpy`), and the dependency will automatically resolve to the correct +wheel version from the active hub during the build. + + + As with any repository rule or extension, if you would like to ensure that `pip_parse` is re-executed to pick up a non-hermetic change to your environment (e.g., updating your system `python` interpreter), you can force it to re-execute by running `bazel sync --only [pip_parse @@ -334,6 +401,7 @@ into whatever HTTP(S) request it performs against `example.com`. See the [Credential Helper Spec][cred-helper-spec] for more details. + [rfc7617]: https://datatracker.ietf.org/doc/html/rfc7617 [cred-helper-design]: https://github.com/bazelbuild/proposals/blob/main/designs/2022-06-07-bazel-credential-helpers.md [cred-helper-spec]: https://github.com/EngFlow/credential-helper-spec/blob/main/spec.md diff --git a/news/3837.added.md b/news/3837.added.md new file mode 100644 index 0000000000..6d3e4b5504 --- /dev/null +++ b/news/3837.added.md @@ -0,0 +1 @@ +(pypi) Added `@pypi` repo: a unified hub of `pip.parse` hubs. diff --git a/python/config_settings/BUILD.bazel b/python/config_settings/BUILD.bazel index 5b1317872f..a17e3acded 100644 --- a/python/config_settings/BUILD.bazel +++ b/python/config_settings/BUILD.bazel @@ -148,6 +148,13 @@ string_flag( # pip.parse related flags +string_flag( + name = "pypi_hub", + build_setting_default = "auto", + # NOTE: Only public because it is used in pip hub repos and executable transitions. + visibility = ["//visibility:public"], +) + string_flag( name = "pip_whl_osx_version", build_setting_default = "", diff --git a/python/features.bzl b/python/features.bzl index fab44385c8..21651e0bad 100644 --- a/python/features.bzl +++ b/python/features.bzl @@ -97,6 +97,7 @@ _TARGETS = { "//command_line_option:enable_runfiles": True, "//command_line_option:extra_toolchains": True, "//python/cc:current_py_cc_headers_abi3": True, + "//python/config_settings:pypi_hub": True, } _LOADABLE_SYMBOLS = { diff --git a/python/private/common_labels.bzl b/python/private/common_labels.bzl index a83ba2b462..4ebe97a5ab 100644 --- a/python/private/common_labels.bzl +++ b/python/private/common_labels.bzl @@ -22,6 +22,7 @@ labels = struct( PRECOMPILE = str(Label("//python/config_settings:precompile")), PRECOMPILE_SOURCE_RETENTION = str(Label("//python/config_settings:precompile_source_retention")), PYC_COLLECTION = str(Label("//python/config_settings:pyc_collection")), + PYPI_HUB = str(Label("//python/config_settings:pypi_hub")), PYTHON_IMPORT_ALL_REPOSITORIES = str(Label("//python/config_settings:experimental_python_import_all_repositories")), PYTHON_SRC = str(Label("//python/bin:python_src")), PYTHON_VERSION = str(Label("//python/config_settings:python_version")), diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index b9a7a18aed..50c76144e5 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -145,6 +145,7 @@ bzl_library( ":platform_bzl", ":pypi_cache_bzl", ":simpleapi_download_bzl", + ":unified_hub_repo_bzl", ":whl_library_bzl", "//python/private:auth_bzl", "//python/private:normalize_name_bzl", @@ -225,6 +226,15 @@ bzl_library( srcs = ["labels.bzl"], ) +bzl_library( + name = "missing_package_bzl", + srcs = ["missing_package.bzl"], + deps = [ + "//python/private:py_info_bzl", + "//python/private:reexports_bzl", + ], +) + bzl_library( name = "multi_pip_parse_bzl", srcs = ["multi_pip_parse.bzl"], @@ -445,6 +455,22 @@ bzl_library( ], ) +bzl_library( + name = "unified_hub_repo_bzl", + srcs = ["unified_hub_repo.bzl"], + deps = [ + "//python/private:text_util_bzl", + ], +) + +bzl_library( + name = "unified_hub_setup_bzl", + srcs = ["unified_hub_setup.bzl"], + deps = [ + ":missing_package_bzl", + ], +) + bzl_library( name = "urllib_bzl", srcs = ["urllib.bzl"], diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index a0b59e9ed1..ec50ad5137 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -28,8 +28,27 @@ load(":pip_repository_attrs.bzl", "ATTRS") load(":platform.bzl", _plat = "platform") load(":pypi_cache.bzl", "pypi_cache") load(":simpleapi_download.bzl", "simpleapi_download") +load(":unified_hub_repo.bzl", "unified_hub_repo") load(":whl_library.bzl", "whl_library") +def _whl_mods_repo_impl(rctx): + rctx.file("BUILD.bazel", "") + for whl_name, mods in rctx.attr.whl_mods.items(): + rctx.file("{}.json".format(whl_name), mods) + +_whl_mods_repo = repository_rule( + doc = """\ +This rule creates json files based on the whl_mods attribute. +""", + implementation = _whl_mods_repo_impl, + attrs = { + "whl_mods": attr.string_dict( + mandatory = True, + doc = "JSON endcoded string that is provided to wheel_builder.py", + ), + }, +) + def _whl_mods_impl(whl_mods_dict): """Implementation of the pip.whl_mods tag class. @@ -404,8 +423,19 @@ You cannot use both the additive_build_content and additive_build_content_file a hub_group_map[hub.name] = out.group_map hub_whl_map[hub.name] = out.whl_map + default_hub = None + for mod in module_ctx.modules: + if not mod.is_root: + continue + for tag in mod.tags.default: + if tag.default_hub: + if default_hub: + fail("Duplicate pip.default tag: only one explicit default PyPI hub is allowed.") + default_hub = tag.default_hub + return struct( config = config, + default_hub = default_hub, exposed_packages = exposed_packages, extra_aliases = extra_aliases, facts = simpleapi_cache.get_facts(), @@ -422,6 +452,41 @@ You cannot use both the additive_build_content and additive_build_content_file a }, ) +def _create_unified_hub_repo(mods): + if "pypi" in mods.hub_whl_map: + return + + hubs = sorted(mods.hub_whl_map.keys()) + if mods.default_hub and mods.default_hub not in hubs: + fail("default_hub '%s' is not a defined PyPI hub. Available hubs: %s" % (mods.default_hub, ", ".join(hubs))) + + packages = {} + extra_aliases = {} + + for hub_name in hubs: + for pkg_name in mods.exposed_packages.get(hub_name, []): + norm_pkg = normalize_name(pkg_name) + if norm_pkg not in packages: + packages[norm_pkg] = [] + if hub_name not in packages[norm_pkg]: + packages[norm_pkg].append(hub_name) + + extra = mods.extra_aliases.get(hub_name, {}).get(norm_pkg, []) + for alias_name in extra: + qual_alias = "%s:%s" % (norm_pkg, alias_name) + if qual_alias not in extra_aliases: + extra_aliases[qual_alias] = [] + if hub_name not in extra_aliases[qual_alias]: + extra_aliases[qual_alias].append(hub_name) + + unified_hub_repo( + name = "pypi", + default_hub = mods.default_hub or (hubs[0] if hubs else ""), + extra_aliases = extra_aliases, + hubs = hubs, + packages = packages, + ) + def _pip_impl(module_ctx): """Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories. @@ -513,6 +578,8 @@ def _pip_impl(module_ctx): groups = mods.hub_group_map.get(hub_name), ) + _create_unified_hub_repo(mods) + # The code is smart to not return facts if we don't support the mechanism for that. # Hence we should not pass it to the metadata if mods.facts: @@ -535,12 +602,14 @@ Either this or {attr}`env` `platform_machine` key should be specified. """, ), "config_settings": attr.label_list( - mandatory = True, doc = """\ The list of labels to `config_setting` targets that need to be matched for the platform to be -selected. +selected. Mandatory if platform is specified. """, ), + "default_hub": attr.string( + doc = "The name of the concrete PyPI hub to use by default when pypi_hub is 'auto'.", + ), "env": attr.string_dict( doc = """\ The values to use for environment markers when evaluating an expression. @@ -930,6 +999,10 @@ can be made to configure different Python versions, and will be grouped by the `hub_name` argument. This allows the same logical name, e.g. `@pip//numpy` to automatically resolve to different, Python version-specific, libraries. +A unified `@pypi` proxy repository is always generated (unless a hub is +explicitly named "pypi") to route dependencies dynamically. See +[Unified @pypi Hub](unified-pypi-hub) for details. + pip.whl_mods: This tag class is used to help create JSON files to describe modifications to the BUILD files for wheels. @@ -941,6 +1014,10 @@ the BUILD files for wheels. doc = """\ This tag class allows for more customization of how the configuration for the hub repositories is built. +It can also be used to designate the default hub for the automatically +generated [Unified @pypi Hub](unified-pypi-hub) using the `default_hub` +attribute. + :::{seealso} The [environment markers][environment_markers] specification for the explanation of the @@ -961,6 +1038,9 @@ This tag class is used to create a pip hub and all of the spokes that are part o This tag class reuses most of the attributes found in {bzl:obj}`pip_parse`. The exception is it does not use the arg 'repo_prefix'. We set the repository prefix for the user and the alias arg is always True in bzlmod. + +You can use the automatically generated [Unified @pypi Hub](unified-pypi-hub) +repository to route package dependencies dynamically at build time. """, ), "whl_mods": tag_class( @@ -975,21 +1055,3 @@ extension. ), }, ) - -def _whl_mods_repo_impl(rctx): - rctx.file("BUILD.bazel", "") - for whl_name, mods in rctx.attr.whl_mods.items(): - rctx.file("{}.json".format(whl_name), mods) - -_whl_mods_repo = repository_rule( - doc = """\ -This rule creates json files based on the whl_mods attribute. -""", - implementation = _whl_mods_repo_impl, - attrs = { - "whl_mods": attr.string_dict( - mandatory = True, - doc = "JSON endcoded string that is provided to wheel_builder.py", - ), - }, -) diff --git a/python/private/pypi/missing_package.bzl b/python/private/pypi/missing_package.bzl new file mode 100644 index 0000000000..c59f754b10 --- /dev/null +++ b/python/private/pypi/missing_package.bzl @@ -0,0 +1,36 @@ +"""Rule for generating an execution-phase action failure when a PyPI package is missing.""" + +load("//python/private:py_info.bzl", "PyInfo") +load("//python/private:reexports.bzl", "BuiltinPyInfo") + +def _missing_package_error_impl(ctx): + out = ctx.actions.declare_file(ctx.label.name + ".error") + + # Register an action that fails when Bazel attempts to stage/build this file + ctx.actions.run_shell( + outputs = [out], + command = "echo 'ERROR: PyPI package \"{pkg}\" is not available{hub_clause}.' >&2 && exit 1".format( + pkg = ctx.attr.package_name, + hub_clause = (' when building under PyPI hub "%s"' % ctx.attr.hub_name) if ctx.attr.hub_name else " because no PyPI hub or default hub is requested", + ), + ) + + maybe_builtin = [BuiltinPyInfo(transitive_sources = depset([out]))] if BuiltinPyInfo != None else [] + + return [ + DefaultInfo( + files = depset([out]), + data_runfiles = ctx.runfiles([out]), + ), + PyInfo( + transitive_sources = depset([out]), + ), + ] + maybe_builtin + +missing_package_error = rule( + implementation = _missing_package_error_impl, + attrs = { + "hub_name": attr.string(mandatory = True), + "package_name": attr.string(mandatory = True), + }, +) diff --git a/python/private/pypi/unified_hub_repo.bzl b/python/private/pypi/unified_hub_repo.bzl new file mode 100644 index 0000000000..3ba25efc75 --- /dev/null +++ b/python/private/pypi/unified_hub_repo.bzl @@ -0,0 +1,81 @@ +"""Repository rule for creating the Unified PyPI Hub.""" + +load("//python/private:text_util.bzl", "render") + +_ROOT_BUILD_TMPL = """\ +load("@rules_python//python/private/pypi:unified_hub_setup.bzl", "define_pypi_hub_flag_config_settings") + +package(default_visibility = ["//visibility:public"]) + +define_pypi_hub_flag_config_settings( + name = "pypi_hub_config_settings", + hubs = {hubs}, +) +""" + +_PKG_BUILD_TMPL = """\ +load("@rules_python//python/private/pypi:unified_hub_setup.bzl", "define_pypi_package_targets") + +package(default_visibility = ["//visibility:public"]) + +define_pypi_package_targets( + name = "{pkg_name}", + default_hub = {default_hub}, + extra_aliases = {extra_aliases}, + hubs = {hubs}, + pkg_hubs = {pkg_hubs}, +) +""" + +def _unified_hub_repo_impl(rctx): + hubs = rctx.attr.hubs + default_hub = rctx.attr.default_hub or None + + # 1. Generate Root BUILD.bazel with shared config settings + rctx.file( + "BUILD.bazel", + _ROOT_BUILD_TMPL.format(hubs = hubs), + ) + + # 2. Organize extra aliases by package + extra_aliases_by_pkg = {} + for qual_alias, alias_hubs in rctx.attr.extra_aliases.items(): + if ":" not in qual_alias: + fail("extra_aliases keys must be in 'pkg:alias' format.") + pkg, alias = qual_alias.split(":", 1) + extra_aliases_by_pkg.setdefault(pkg, {})[alias] = alias_hubs + + # 3. Generate package subpackages + for pkg_name, pkg_hubs in rctx.attr.packages.items(): + extra_aliases = extra_aliases_by_pkg.get(pkg_name, {}) + rctx.file( + pkg_name + "/BUILD.bazel", + _PKG_BUILD_TMPL.format( + default_hub = render.str(default_hub), + extra_aliases = extra_aliases, + hubs = hubs, + pkg_hubs = pkg_hubs, + pkg_name = pkg_name, + ), + ) + +unified_hub_repo = repository_rule( + implementation = _unified_hub_repo_impl, + attrs = { + "default_hub": attr.string( + doc = "The PyPI hub to use when no other hub's conditions match.", + ), + "extra_aliases": attr.string_list_dict( + doc = "Dictionary mapping 'package:alias' to a list of hubs that support it.", + ), + "hubs": attr.string_list( + mandatory = True, + doc = "List of all concrete PyPI hub names.", + ), + "packages": attr.string_list_dict( + mandatory = True, + doc = "Dictionary mapping package names to a list of hubs that contain them.", + ), + }, + doc = "Private repository rule creating the automatic Unified PyPI Hub.", +) diff --git a/python/private/pypi/unified_hub_setup.bzl b/python/private/pypi/unified_hub_setup.bzl new file mode 100644 index 0000000000..a6e353c175 --- /dev/null +++ b/python/private/pypi/unified_hub_setup.bzl @@ -0,0 +1,102 @@ +"""Helper functions for setting up targets within the Unified PyPI Hub repository.""" + +load("@rules_python//python/private/pypi:missing_package.bzl", "missing_package_error") + +def define_pypi_hub_flag_config_settings(name, hubs): + """Defines the root config_settings for each PyPI spoke hub. + + Args: + name: unused macro name required by buildifier. + hubs: list of concrete hub names. + """ + for hub in hubs: + native.config_setting( + name = "_is_pypi_hub_" + hub, + flag_values = {"@rules_python//python/config_settings:pypi_hub": hub}, + ) + +_STANDARD_ALIASES = [ + "pkg", + "whl", + "data", + "dist_info", + "extracted_wheel_files", +] + +def define_pypi_package_targets(name, pkg_hubs, extra_aliases, hubs, default_hub = None): + """Define the targets for a PyPI package in the unified PyPI hub. + + Args: + name: normalized PyPI package name, serving as the main target name. + pkg_hubs: list of hubs that contain this package. + extra_aliases: dict mapping extra alias names to lists of hubs that support them. + hubs: list of all concrete hub names. + default_hub: the hub to use by default. + """ + pkg_name = name + + # Main apparent package target delegates to :pkg + native.alias( + name = pkg_name, + actual = ":pkg", + ) + + all_aliases = _STANDARD_ALIASES + sorted(extra_aliases.keys()) + missing_errors = {} + + for alias_name in all_aliases: + select_map = {} + for hub in hubs: + is_supported = ( + (alias_name in _STANDARD_ALIASES and hub in pkg_hubs) or + (alias_name not in _STANDARD_ALIASES and hub in extra_aliases.get(alias_name, [])) + ) + + if is_supported: + select_map["//:_is_pypi_hub_" + hub] = "@{hub}//{pkg}:{alias}".format( + hub = hub, + pkg = pkg_name, + alias = alias_name, + ) + else: + err_target = "_missing_{alias}_in_{hub}".format(alias = alias_name, hub = hub) + if err_target not in missing_errors: + missing_errors[err_target] = { + "hub_name": hub, + "package_name": pkg_name if alias_name in _STANDARD_ALIASES else (pkg_name + ":" + alias_name), + } + select_map["//:_is_pypi_hub_" + hub] = ":{}".format(err_target) + + # //conditions:default fallback + default_supported = ( + default_hub and + ((alias_name in _STANDARD_ALIASES and default_hub in pkg_hubs) or + (alias_name not in _STANDARD_ALIASES and default_hub in extra_aliases.get(alias_name, []))) + ) + + if default_supported: + select_map["//conditions:default"] = "@{hub}//{pkg}:{alias}".format( + hub = default_hub, + pkg = pkg_name, + alias = alias_name, + ) + else: + err_target = "_missing_{alias}_in_default".format(alias = alias_name) + if err_target not in missing_errors: + missing_errors[err_target] = { + "hub_name": default_hub or "", + "package_name": pkg_name if alias_name in _STANDARD_ALIASES else (pkg_name + ":" + alias_name), + } + select_map["//conditions:default"] = ":{}".format(err_target) + + native.alias( + name = alias_name, + actual = select(select_map), + ) + + # Generate missing package error targets + for err_name, err_args in missing_errors.items(): + missing_package_error( + name = err_name, + **err_args + ) diff --git a/python/private/text_util.bzl b/python/private/text_util.bzl index f725195978..eedf66009d 100644 --- a/python/private/text_util.bzl +++ b/python/private/text_util.bzl @@ -107,6 +107,18 @@ def _render_list(items, *, hanging_indent = "", value_repr = repr): return text def _render_str(value): + """Render a string value. + + If value is None, it is automatically rendered as the Starlark literal `None`. + + Args: + value: str or None. + + Returns: + The value represented as Starlark source text. + """ + if value == None: + return "None" return repr(value) def _render_string_list_dict(value): diff --git a/python/private/transition_labels.bzl b/python/private/transition_labels.bzl index 7a6531ed0f..46880b6c86 100644 --- a/python/private/transition_labels.bzl +++ b/python/private/transition_labels.bzl @@ -17,6 +17,7 @@ _BASE_TRANSITION_LABELS = [ labels.PIP_WHL_OSX_VERSION, labels.PRECOMPILE, labels.PRECOMPILE_SOURCE_RETENTION, + labels.PYPI_HUB, labels.PYTHON_SRC, labels.PYTHON_VERSION, labels.PY_FREETHREADED, diff --git a/tests/integration/BUILD.bazel b/tests/integration/BUILD.bazel index 904fb4c247..9301e19590 100644 --- a/tests/integration/BUILD.bazel +++ b/tests/integration/BUILD.bazel @@ -118,6 +118,11 @@ rules_python_integration_test( py_main = "toolchain_target_settings_test.py", ) +rules_python_integration_test( + name = "unified_pypi_test", + py_main = "unified_pypi_test.py", +) + rules_python_integration_test( name = "uv_lock_test", py_deps = [ diff --git a/tests/integration/bzlmod_lockfile/MODULE.bazel.lock b/tests/integration/bzlmod_lockfile/MODULE.bazel.lock index 2a0bc7d76b..8a1329f729 100644 --- a/tests/integration/bzlmod_lockfile/MODULE.bazel.lock +++ b/tests/integration/bzlmod_lockfile/MODULE.bazel.lock @@ -250,7 +250,7 @@ }, "@@rules_python+//python/uv:uv.bzl%uv": { "general": { - "bzlTransitiveDigest": "46RcxJnhOapMeaxdcMm3RmVdNp1nPCewOOXoZyIbQ20=", + "bzlTransitiveDigest": "fXy6MwOX96XOqVP2atme3jv61A0uLjd9YtmAIYVLhYM=", "usagesDigest": "6yXGw7XDyXjOfqBL0SBu1YBEMMYPQzCE3jTzUCkxPgg=", "recordedInputs": [ "REPO_MAPPING:rules_python+,bazel_tools bazel_tools", diff --git a/tests/integration/unified_pypi/.bazelrc b/tests/integration/unified_pypi/.bazelrc new file mode 100644 index 0000000000..b3a24e8605 --- /dev/null +++ b/tests/integration/unified_pypi/.bazelrc @@ -0,0 +1 @@ +common --experimental_enable_bzlmod diff --git a/tests/integration/unified_pypi/BUILD.bazel b/tests/integration/unified_pypi/BUILD.bazel new file mode 100644 index 0000000000..cb8a41791d --- /dev/null +++ b/tests/integration/unified_pypi/BUILD.bazel @@ -0,0 +1,48 @@ +load("@rules_python//python:py_binary.bzl", "py_binary") +load("@rules_python//python:py_test.bzl", "py_test") + +package(default_visibility = ["//visibility:public"]) + +py_test( + name = "test_default", + srcs = ["test_default.py"], + deps = ["@pypi//colorama"], +) + +py_test( + name = "test_cli", + srcs = ["test_cli.py"], + deps = ["@pypi//colorama"], +) + +py_test( + name = "test_a", + srcs = ["test_a.py"], + config_settings = { + "@rules_python//python/config_settings:pypi_hub": "pypi_a", + }, + deps = [ + "@pypi//colorama", + "@pypi//colorama:my_colorama", + ], +) + +# Sibling extra alias failure target (my_colorama is missing in pypi_b): +py_binary( + name = "bin_extra_b", + srcs = ["bin_extra_b.py"], + config_settings = { + "@rules_python//python/config_settings:pypi_hub": "pypi_b", + }, + deps = ["@pypi//colorama:my_colorama"], +) + +# Disjoint package failure target (six is missing in pypi_a): +py_binary( + name = "bin_six_a", + srcs = ["bin_six_a.py"], + config_settings = { + "@rules_python//python/config_settings:pypi_hub": "pypi_a", + }, + deps = ["@pypi//six"], +) diff --git a/tests/integration/unified_pypi/MODULE.bazel b/tests/integration/unified_pypi/MODULE.bazel new file mode 100644 index 0000000000..0d0f44f61c --- /dev/null +++ b/tests/integration/unified_pypi/MODULE.bazel @@ -0,0 +1,48 @@ +module(name = "unified_pypi") + +bazel_dep(name = "rules_python", version = "0.0.0") +local_path_override( + module_name = "rules_python", + path = "../../..", +) + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain(python_version = "3.11") + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") +pip.whl_mods( + additive_build_content = """\ +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "my_colorama", + deps = [":pkg"], +) +""", + hub_name = "whl_mods_hub", + whl_name = "colorama", +) +use_repo(pip, "whl_mods_hub") + +# pypi_a has colorama and an extra alias +pip.parse( + extra_hub_aliases = {"colorama": ["my_colorama"]}, + hub_name = "pypi_a", + python_version = "3.11", + requirements_lock = "//:requirements_a.txt", + whl_modifications = { + "@whl_mods_hub//:colorama.json": "colorama", + }, +) +use_repo(pip, "pypi_a") + +# pypi_b has colorama and six, and acts as designated fallback +pip.parse( + hub_name = "pypi_b", + python_version = "3.11", + requirements_lock = "//:requirements_b.txt", +) +use_repo(pip, "pypi_b") + +pip.default(default_hub = "pypi_b") +use_repo(pip, "pypi") diff --git a/tests/integration/unified_pypi/WORKSPACE b/tests/integration/unified_pypi/WORKSPACE new file mode 100644 index 0000000000..0a08afe832 --- /dev/null +++ b/tests/integration/unified_pypi/WORKSPACE @@ -0,0 +1 @@ +# Minimal WORKSPACE file diff --git a/tests/integration/unified_pypi/WORKSPACE.bzlmod b/tests/integration/unified_pypi/WORKSPACE.bzlmod new file mode 100644 index 0000000000..7bd1c969b9 --- /dev/null +++ b/tests/integration/unified_pypi/WORKSPACE.bzlmod @@ -0,0 +1 @@ +# Minimal WORKSPACE.bzlmod diff --git a/tests/integration/unified_pypi/bin_extra_b.py b/tests/integration/unified_pypi/bin_extra_b.py new file mode 100644 index 0000000000..f900d16fd2 --- /dev/null +++ b/tests/integration/unified_pypi/bin_extra_b.py @@ -0,0 +1 @@ +print("Should not be executed") diff --git a/tests/integration/unified_pypi/bin_six_a.py b/tests/integration/unified_pypi/bin_six_a.py new file mode 100644 index 0000000000..f900d16fd2 --- /dev/null +++ b/tests/integration/unified_pypi/bin_six_a.py @@ -0,0 +1 @@ +print("Should not be executed") diff --git a/tests/integration/unified_pypi/requirements_a.txt b/tests/integration/unified_pypi/requirements_a.txt new file mode 100644 index 0000000000..788f12f818 --- /dev/null +++ b/tests/integration/unified_pypi/requirements_a.txt @@ -0,0 +1,3 @@ +colorama==0.4.6 \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 diff --git a/tests/integration/unified_pypi/requirements_b.txt b/tests/integration/unified_pypi/requirements_b.txt new file mode 100644 index 0000000000..c69f3631b2 --- /dev/null +++ b/tests/integration/unified_pypi/requirements_b.txt @@ -0,0 +1,6 @@ +colorama==0.4.5 \ + --hash=sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da \ + --hash=sha256:e6c6b4334fc50988a639d9b98ae42f5c90ec94cb1495b4fe76c5f72cf7f79435 +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 diff --git a/tests/integration/unified_pypi/test_a.py b/tests/integration/unified_pypi/test_a.py new file mode 100644 index 0000000000..a9127ba0c7 --- /dev/null +++ b/tests/integration/unified_pypi/test_a.py @@ -0,0 +1,3 @@ +import colorama + +assert colorama.__version__ == "0.4.6" diff --git a/tests/integration/unified_pypi/test_cli.py b/tests/integration/unified_pypi/test_cli.py new file mode 100644 index 0000000000..a9127ba0c7 --- /dev/null +++ b/tests/integration/unified_pypi/test_cli.py @@ -0,0 +1,3 @@ +import colorama + +assert colorama.__version__ == "0.4.6" diff --git a/tests/integration/unified_pypi/test_default.py b/tests/integration/unified_pypi/test_default.py new file mode 100644 index 0000000000..559df59961 --- /dev/null +++ b/tests/integration/unified_pypi/test_default.py @@ -0,0 +1,3 @@ +import colorama + +assert colorama.__version__ == "0.4.5" diff --git a/tests/integration/unified_pypi_test.py b/tests/integration/unified_pypi_test.py new file mode 100644 index 0000000000..931c2aafd5 --- /dev/null +++ b/tests/integration/unified_pypi_test.py @@ -0,0 +1,79 @@ +"""Integration test for Unified PyPI Hub dynamic dependency resolution.""" + +import contextlib +import unittest + +from tests.integration import runner + + +class UnifiedPypiTest(runner.TestCase): + def test_default_fallback_hub(self): + self.run_bazel("test", "//:test_default") + + def test_transitioned_hub(self): + self.run_bazel("test", "//:test_a") + + def test_cli_override(self): + self.run_bazel( + "run", + "--@rules_python//python/config_settings:pypi_hub=pypi_a", + "//:test_cli", + ) + + def test_disjoint_package_cquery_succeeds_but_build_fails(self): + self.run_bazel("cquery", "//:bin_six_a") + result = self.run_bazel("build", "//:bin_six_a", check=False) + self.assertNotEqual( + result.exit_code, + 0, + "Expected build to fail during execution phase", + ) + self.assert_result_matches( + result, + 'ERROR: PyPI package "six" is not available when building under PyPI hub "pypi_a".', + ) + + def test_sibling_extra_alias_cquery_succeeds_but_build_fails(self): + self.run_bazel("cquery", "//:bin_extra_b") + result = self.run_bazel("build", "//:bin_extra_b", check=False) + self.assertNotEqual( + result.exit_code, + 0, + "Expected build to fail during execution phase", + ) + self.assert_result_matches( + result, + 'ERROR: PyPI package "colorama:my_colorama" is not available when building under PyPI hub "pypi_b".', + ) + + @contextlib.contextmanager + def _temp_modify_file(self, path, new_content): + original_content = path.read_text() + path.write_text(new_content) + try: + yield + finally: + path.write_text(original_content) + + def test_invalid_default_hub_fails_evaluation(self): + module_bazel = self.repo_root / "MODULE.bazel" + invalid_content = module_bazel.read_text().replace( + 'pip.default(default_hub = "pypi_b")', + 'pip.default(default_hub = "invalid_hub")', + ) + with self._temp_modify_file(module_bazel, invalid_content): + # Run bazel cquery and expect it to fail during loading/extension phase + result = self.run_bazel("cquery", "//:test_default", check=False) + self.assertNotEqual( + result.exit_code, + 0, + "Expected extension evaluation to fail due to invalid default_hub", + ) + self.assert_result_matches( + result, + "default_hub 'invalid_hub' is not a defined PyPI hub", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/pypi/extension/extension_tests.bzl b/tests/pypi/extension/extension_tests.bzl index 5a40714b64..b5058e5d97 100644 --- a/tests/pypi/extension/extension_tests.bzl +++ b/tests/pypi/extension/extension_tests.bzl @@ -44,6 +44,7 @@ def _default( arch_name = None, auth_patterns = None, config_settings = None, + default_hub = "", env = None, index_url = None, marker = None, @@ -56,6 +57,7 @@ def _default( arch_name = arch_name, auth_patterns = auth_patterns or {}, config_settings = config_settings, + default_hub = default_hub, env = env or {}, index_url = index_url or "", marker = marker or "", @@ -104,6 +106,7 @@ def _parse_modules(env, **kwargs): return env.expect.that_struct( parse_modules(**kwargs), attrs = dict( + default_hub = subjects.str, exposed_packages = subjects.dict, hub_group_map = subjects.dict, hub_whl_map = subjects.dict, @@ -283,6 +286,40 @@ def _test_build_pipstar_platform(env): _tests.append(_test_build_pipstar_platform) +def _test_multiple_default_tags(env): + """Test that multiple pip.default tags do not trigger duplicate default hub failures. + + Only when multiple tags explicitly define default_hub should it fail. + """ + pypi = _parse_modules( + env, + module_ctx = _pypi_mock_mctx( + _mod( + name = "rules_python", + default = _default_tags_default + [ + _default(platform = "extra_custom_platform"), + ], + parse = [ + _parse( + hub_name = "pypi", + python_version = "3.15", + simpleapi_skip = ["simple"], + requirements_lock = "requirements.txt", + ), + ], + ), + os_name = "linux", + arch_name = "x86_64", + ), + available_interpreters = { + "python_3_15_host": "unit_test_interpreter_target", + }, + minor_mapping = {"3.15": "3.15.19"}, + ) + pypi.exposed_packages().contains_exactly({"pypi": ["simple"]}) + +_tests.append(_test_multiple_default_tags) + def extension_test_suite(name): """Create the test suite.