From 9feb34467de303658640736a5c23211514a056bb Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 22 Jun 2026 16:34:21 -0400 Subject: [PATCH] Reduce CUDA build matrix, better fallback for lib loading --- .github/workflows/python-package.yml | 2 +- bitsandbytes/cextension.py | 185 ++++++++++++++------------- bitsandbytes/diagnostics/cuda.py | 73 +++-------- bitsandbytes/diagnostics/main.py | 52 +++++--- docs/source/errors.mdx | 20 +-- tests/test_cuda_setup_evaluator.py | 171 +++++++++++++++---------- 6 files changed, 261 insertions(+), 242 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4b104cb30..22a6649aa 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -59,7 +59,7 @@ jobs: matrix: os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] cuda_version: - ["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] + ["11.8.0", "12.1.1", "12.4.1", "12.6.3", "12.8.1", "13.0.2", "13.2.0"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index 7796a8e84..6f0a0916b 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -21,63 +21,63 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path: """ - Get the disk path to the CUDA BNB native library specified by the - given CUDA specs, taking into account the `BNB_CUDA_VERSION` override environment variable. - - The library is not guaranteed to exist at the returned path. + Get the path to the best matching CUDA/ROCm BNB native library for the given specs. + + When no override is set, selects from packaged libraries using the following priority: + 1. Exact version match. + 2. Highest packaged version <= runtime version, same major (e.g. runtime 12.9, ship 12.8). + 3. Lowest packaged version > runtime version, same major (e.g. runtime 12.0, ship 12.1). + No cross-major fallback: if no same-major library exists, returns the exact non-existent + path so the caller raises a clear "not found" error. + A warning is logged when falling back. Override env vars bypass selection entirely + and load the named version with no fallback. The returned path is not guaranteed to + exist when no packaged libs are found, or when an override names an absent version. """ + is_hip = bool(torch.version.hip) + prefix = "rocm" if is_hip else "cuda" + override_var = "BNB_ROCM_VERSION" if is_hip else "BNB_CUDA_VERSION" + + override_value = os.environ.get(override_var) + + if override_value is not None: + if not override_value.isdigit(): + raise RuntimeError(f"{override_var}={override_value!r}: value must be digits only (e.g. '124' for 12.4).") + library_name = f"libbitsandbytes_{prefix}{override_value}{DYNAMIC_LIBRARY_SUFFIX}" + logger.warning( + f"WARNING: {override_var}={override_value} environment variable detected; loading {library_name}.\n" + f"This overrides automatic {'ROCm' if is_hip else 'CUDA'} version selection.\n" + f"If this was unintended clear the variable and retry: unset {override_var}\n", + ) + return PACKAGE_DIR / library_name - prefix = "rocm" if torch.version.hip else "cuda" - library_name = f"libbitsandbytes_{prefix}{cuda_specs.cuda_version_string}{DYNAMIC_LIBRARY_SUFFIX}" + available = _find_cuda_libs(prefix, is_hip) + runtime_version = cuda_specs.cuda_version_tuple - cuda_override_value = os.environ.get("BNB_CUDA_VERSION") - rocm_override_value = os.environ.get("BNB_ROCM_VERSION") + if not available: + return PACKAGE_DIR / f"libbitsandbytes_{prefix}{cuda_specs.cuda_version_string}{DYNAMIC_LIBRARY_SUFFIX}" - if torch.version.hip: - if cuda_override_value: - if not rocm_override_value: - raise RuntimeError( - f"BNB_CUDA_VERSION={cuda_override_value} detected but this is not a CUDA build!\n" - "Use BNB_ROCM_VERSION instead: export BNB_ROCM_VERSION=\n" - "Clear the variable and retry: unset BNB_CUDA_VERSION\n" - ) - logger.warning( - f"WARNING: BNB_CUDA_VERSION={cuda_override_value} is set but ignored on this ROCm build. " - "Clear the variable: unset BNB_CUDA_VERSION", - ) - if rocm_override_value: - library_name = re.sub(r"rocm\d+", f"rocm{rocm_override_value}", library_name, count=1) - logger.warning( - f"WARNING: BNB_ROCM_VERSION={rocm_override_value} environment variable detected; loading {library_name}.\n" - "This can be used to load a bitsandbytes version built with a ROCm version that is different from the PyTorch ROCm version.\n" - "If this was unintended clear the variable and retry: unset BNB_ROCM_VERSION\n", - ) - elif torch.version.cuda: - if rocm_override_value: - if not cuda_override_value: - raise RuntimeError( - f"BNB_ROCM_VERSION={rocm_override_value} detected but this is not a ROCm build!\n" - "Use BNB_CUDA_VERSION instead: export BNB_CUDA_VERSION=\n" - "Clear the variable and retry: unset BNB_ROCM_VERSION\n" - ) - logger.warning( - f"WARNING: BNB_ROCM_VERSION={rocm_override_value} is set but ignored on this CUDA build. " - "Clear the variable: unset BNB_ROCM_VERSION", - ) - if cuda_override_value: - library_name = re.sub(r"cuda\d+", f"cuda{cuda_override_value}", library_name, count=1) - logger.warning( - f"WARNING: BNB_CUDA_VERSION={cuda_override_value} environment variable detected; loading {library_name}.\n" - "This can be used to load a bitsandbytes version built with a CUDA version that is different from the PyTorch CUDA version.\n" - "If this was unintended clear the variable and retry: unset BNB_CUDA_VERSION\n", - ) - else: - if rocm_override_value or cuda_override_value: - raise RuntimeError( - "BNB_ROCM_VERSION / BNB_CUDA_VERSION overrides are not supported on this backend.", - ) + if runtime_version in available: + return available[runtime_version] - return PACKAGE_DIR / library_name + lower = [v for v in available if v[0] == runtime_version[0] and v < runtime_version] + if lower: + selected = max(lower) + else: + higher_same = [v for v in available if v[0] == runtime_version[0] and v > runtime_version] + if higher_same: + selected = min(higher_same) + else: + # No same-major library available. Return the non-existent exact path so + # get_native_library() raises a clear "not found" error. + return PACKAGE_DIR / f"libbitsandbytes_{prefix}{cuda_specs.cuda_version_string}{DYNAMIC_LIBRARY_SUFFIX}" + + logger.warning( + f"No prebuilt binary for {'ROCm' if is_hip else 'CUDA'} " + f"{runtime_version[0]}.{runtime_version[1]}, loading " + f"{'ROCm' if is_hip else 'CUDA'} {selected[0]}.{selected[1]} instead. " + f"Set {override_var} to override." + ) + return available[selected] class BNBNativeLibrary: @@ -124,26 +124,48 @@ def __init__(self, lib: ct.CDLL): lib.cget_managed_ptr.restype = ct.c_void_p -def get_available_cuda_binary_versions() -> list[str]: - """Get formatted CUDA versions from existing library files using cuda_specs logic""" - lib_pattern = f"libbitsandbytes_{BNB_BACKEND.lower()}*{DYNAMIC_LIBRARY_SUFFIX}" - versions = [] - for lib in Path(__file__).parent.glob(lib_pattern): - pattern = rf"{BNB_BACKEND.lower()}(\d+)" - match = re.search(pattern, lib.name) +def _split_cuda_version(compact: str, is_hip: bool) -> tuple[int, int]: + """Split a compact CUDA/ROCm version string from a library filename into (major, minor). + + CUDA: major is always 2 digits (11, 12, 13...), e.g. '118' -> (11, 8), '132' -> (13, 2). + ROCm: major is always 1 digit for now (6, 7...), e.g. '72' -> (7, 2), '713' -> (7, 13). + Note: revisit if ROCm major reaches 10. + """ + if is_hip: + return int(compact[:1]), int(compact[1:]) + return int(compact[:2]), int(compact[2:]) + + +def _find_cuda_libs(prefix: str, is_hip: bool) -> dict[tuple[int, int], Path]: + """Return a {(major, minor): Path} mapping for all packaged CUDA/ROCm library files.""" + result = {} + for lib in PACKAGE_DIR.glob(f"libbitsandbytes_{prefix}*{DYNAMIC_LIBRARY_SUFFIX}"): + match = re.search(rf"{prefix}(\d+)", lib.name) if match: - ver_code = int(match.group(1)) - major = ver_code // 10 - minor = ver_code % 10 - versions.append(f"{major}.{minor}") - return sorted(versions) + try: + result[_split_cuda_version(match.group(1), is_hip)] = lib + except (ValueError, IndexError): + continue + return result + + +def get_available_cuda_binary_versions() -> list[str]: + """Get formatted CUDA/ROCm versions from existing library files.""" + is_hip = bool(torch.version.hip) + prefix = "rocm" if is_hip else "cuda" + return sorted(f"{major}.{minor}" for major, minor in _find_cuda_libs(prefix, is_hip)) def parse_cuda_version(version_str: str) -> str: - """Convert raw version string (e.g. '118' from env var) to formatted version (e.g. '11.8')""" + """Convert a raw version code string (e.g. '118', '713') to a dotted version (e.g. '11.8', '7.13').""" if version_str.isdigit(): - return f"{version_str[:-1]}.{version_str[-1]}" - return version_str # fallback as safety net + is_hip = bool(torch.version.hip) + try: + major, minor = _split_cuda_version(version_str, is_hip) + return f"{major}.{minor}" + except (ValueError, IndexError): + pass + return version_str class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary): @@ -169,18 +191,11 @@ class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary): def __init__(self, error_msg: str): self.error_msg = error_msg - self.user_cuda_version = get_cuda_version_tuple() self.available_versions = get_available_cuda_binary_versions() - self.override_value = ( - os.environ.get("BNB_ROCM_VERSION") if HIP_ENVIRONMENT else os.environ.get("BNB_CUDA_VERSION") - ) - self.requested_version = ( - parse_cuda_version(self.override_value) - if self.override_value - else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}" - if self.user_cuda_version - else "unknown" - ) + override_value = os.environ.get("BNB_ROCM_VERSION") if HIP_ENVIRONMENT else os.environ.get("BNB_CUDA_VERSION") + user_version = get_cuda_version_tuple() + user_version_str = f"{user_version[0]}.{user_version[1]}" if user_version else "unknown" + self.requested_version = parse_cuda_version(override_value) if override_value else user_version_str # Pre-generate the error message based on error type if "cannot open shared object file" in error_msg: @@ -188,9 +203,7 @@ def __init__(self, error_msg: str): else: # lib loading errors self.formatted_error = self._format_lib_error_message( available_versions=self.available_versions, - user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}" - if self.user_cuda_version - else "unknown", + user_cuda_version=user_version_str, original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "", requested_version=self.requested_version, ) @@ -241,8 +254,8 @@ def _format_lib_error_message( note = ( ( - f"To make bitsandbytes work, the compiled library version MUST exactly match the linked {BNB_BACKEND} version.\n" - f"If your {BNB_BACKEND} version doesn't have a pre-compiled binary, you MUST compile from source.\n\n" + f"bitsandbytes tried to find a compatible {BNB_BACKEND} binary but none could be loaded.\n" + f"If your {BNB_BACKEND} version isn't among the available pre-compiled versions above, you must compile from source.\n\n" ) if no_cuda_lib_found else "" @@ -294,8 +307,8 @@ def _format_dependency_error(self) -> str: f"1. You have installed {BNB_BACKEND} {cuda_major_version}.x toolkit on your system\n" f"2. The {BNB_BACKEND} runtime libraries are in your LD_LIBRARY_PATH\n\n" f"You can add them with (and persist the change by adding the line to your .bashrc):\n" - f" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/{BNB_BACKEND.lower()}-{cuda_major_version}.x/\ - {'lib64' if not HIP_ENVIRONMENT else 'lib'}\n\n" + f" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/{BNB_BACKEND.lower()}-{cuda_major_version}.x/" + f"{'lib64' if not HIP_ENVIRONMENT else 'lib'}\n\n" f"Original error: {self.error_msg}\n\n" f"🔍 Run this command for detailed diagnostics:\n" f"python -m bitsandbytes\n\n" @@ -329,7 +342,7 @@ def get_native_library() -> BNBNativeLibrary: cuda_binary_path = get_cuda_bnb_library_path(cuda_specs) if not cuda_binary_path.exists(): - raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}") + raise RuntimeError(f"No compatible {BNB_BACKEND} binary found at {cuda_binary_path}") binary_path = cuda_binary_path diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 0b38e2f72..655da84a0 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -120,7 +120,8 @@ def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: if not binary_path.exists(): print_dedented( f""" - Library not found: {binary_path}. Maybe you need to compile it from source? + No compatible CUDA library found (tried: {binary_path.name}). You may need to compile from source: + https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile """, ) @@ -146,12 +147,9 @@ def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: if not binary_path.exists(): print_dedented( f""" - Library not found: {binary_path}. - Maybe you need to compile it from source? If you compiled from source, check that ROCm version - in PyTorch Settings matches your ROCm install. If not, you can either: - 1. Reinstall PyTorch for your ROCm version and rebuild bitsandbytes. - 2. Set BNB_ROCM_VERSION to match the version the library was built with. - For example: export BNB_ROCM_VERSION=72 + No compatible ROCm library found (tried: {binary_path.name}). You may need to compile from source: + https://huggingface.co/docs/bitsandbytes/main/en/installation#rocm-compile + Use BNB_ROCM_VERSION to force a specific version if needed. """, ) @@ -171,62 +169,25 @@ def print_diagnostics(cuda_specs: CUDASpecs) -> None: _print_cuda_diagnostics(cuda_specs) -def _print_cuda_runtime_diagnostics() -> None: - cudart_paths = list(find_cudart_libraries()) - if not cudart_paths: - print("CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.") - elif len(cudart_paths) > 1: - print_dedented( - f""" - Found duplicate CUDA runtime files (see below). - - We select the PyTorch default CUDA runtime, which is {torch.version.cuda}, - but this might mismatch with the CUDA version that is needed for bitsandbytes. - To override this behavior set the `BNB_CUDA_VERSION=` environmental variable. - - For example, if you want to use the CUDA version 122, - BNB_CUDA_VERSION=122 python ... - - OR set the environmental variable in your .bashrc: - export BNB_CUDA_VERSION=122 - - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2, - """, - ) - for pth in cudart_paths: - print(f"* Found CUDA runtime at: {pth}") - +def print_runtime_diagnostics() -> None: + backend = "ROCm" if HIP_ENVIRONMENT else "CUDA" + runtime_version = torch.version.hip if HIP_ENVIRONMENT else torch.version.cuda + override_var = "BNB_ROCM_VERSION" if HIP_ENVIRONMENT else "BNB_CUDA_VERSION" + override_example = "72" if HIP_ENVIRONMENT else "122" -def _print_hip_runtime_diagnostics() -> None: cudart_paths = list(find_cudart_libraries()) if not cudart_paths: - print("ROCm SETUP: WARNING! ROCm runtime files not found in any environmental path.") + print(f"{backend} SETUP: WARNING! {backend} runtime files not found in any environmental path.") elif len(cudart_paths) > 1: print_dedented( f""" - Found duplicate ROCm runtime files (see below). - - We select the PyTorch default ROCm runtime, which is {torch.version.hip}, - but this might mismatch with the ROCm version that is needed for bitsandbytes. - To override this behavior set the `BNB_ROCM_VERSION=` environmental variable. - - For example, if you want to use the ROCm version 7.2, - BNB_ROCM_VERSION=72 python ... - - OR set the environmental variable in your .bashrc: - export BNB_ROCM_VERSION=72 + Found duplicate {backend} runtime files (see below). - In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g. - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-7.2.0/lib, + bitsandbytes will use PyTorch's {backend} runtime ({runtime_version}) and auto-select + the closest available library version. If you need to force a specific version, + set {override_var}, e.g.: + export {override_var}={override_example} """, ) for pth in cudart_paths: - print(f"* Found ROCm runtime at: {pth}") - - -def print_runtime_diagnostics() -> None: - if HIP_ENVIRONMENT: - _print_hip_runtime_diagnostics() - else: - _print_cuda_runtime_diagnostics() + print(f"* Found {backend} runtime at: {pth}") diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py index 74da662b6..a64925c06 100644 --- a/bitsandbytes/diagnostics/main.py +++ b/bitsandbytes/diagnostics/main.py @@ -6,12 +6,9 @@ import torch from bitsandbytes import __version__ as bnb_version -from bitsandbytes.cextension import BNB_BACKEND from bitsandbytes.consts import PACKAGE_GITHUB_URL from bitsandbytes.cuda_specs import get_cuda_specs -from bitsandbytes.diagnostics.cuda import ( - print_diagnostics, -) +from bitsandbytes.diagnostics.cuda import print_diagnostics from bitsandbytes.diagnostics.utils import print_dedented, print_header _RELATED_PACKAGES = [ @@ -80,29 +77,48 @@ def main(): if cuda_specs: print_diagnostics(cuda_specs) - # TODO: There's a lot of noise in this; needs improvement. - # print_cuda_runtime_diagnostics() + has_rocm = torch.version.hip is not None + has_cuda = not has_rocm and torch.version.cuda is not None and torch.cuda.is_available() + has_xpu = hasattr(torch, "xpu") and torch.xpu.is_available() - if not torch.cuda.is_available(): - print(f"PyTorch says {BNB_BACKEND} is not available. Possible reasons:") - print(f"1. {BNB_BACKEND} driver not installed") - print("2. Using a CPU-only PyTorch build") - print("3. No GPU detected") + from bitsandbytes.cextension import ErrorHandlerMockBNBNativeLibrary, lib - else: - print(f"Checking that the library is importable and {BNB_BACKEND} is callable...") + lib_loaded = not isinstance(lib, ErrorHandlerMockBNBNativeLibrary) + if not (has_cuda or has_rocm or has_xpu): + print( + f"No CUDA, ROCm, or XPU detected; CPU library {'loaded successfully' if lib_loaded else 'failed to load'}." + ) + elif has_xpu: + from bitsandbytes.backends.utils import triton_available + + if not isinstance(lib, ErrorHandlerMockBNBNativeLibrary): + print("XPU native library loaded successfully.") + elif triton_available: + print("XPU native library not loaded; using triton fallback.") + else: + print("XPU native library not loaded and triton not available.") + else: + if not lib_loaded: + print_dedented( + f""" + See above for details on why the library failed to load. + Please provide this info when creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose + WARNING: Please be sure to sanitize sensitive info from the output before posting it. + """, + ) + sys.exit(1) + + print("Checking that the library is importable and callable...") try: sanity_check() print("SUCCESS!") return except RuntimeError as e: if "not available in CPU-only" in str(e): - print( - f"WARNING: {__package__} is currently running as CPU-only!\n" - "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n" - f"If you think that this is so erroneously,\nplease report an issue!", - ) + print("WARNING: bitsandbytes is running as CPU-only!") + print("8-bit optimizers and GPU quantization are unavailable.") + print("If you think this is an error, please report an issue.") else: raise e except Exception: diff --git a/docs/source/errors.mdx b/docs/source/errors.mdx index d232a8c5b..987488770 100644 --- a/docs/source/errors.mdx +++ b/docs/source/errors.mdx @@ -23,9 +23,7 @@ If this does not work, please open an issue and paste the printed environment if ## Library not found: version mismatch -If you see an error like `Library not found: libbitsandbytes_cuda128.dll` or `libbitsandbytes_rocm72.so`, it means the pre-compiled library version doesn't match the CUDA/ROCm version reported by your PyTorch installation. - -The library filename encodes the version: `libbitsandbytes_cuda{major}{minor}` for CUDA, `libbitsandbytes_rocm{major}{minor}` for ROCm. bitsandbytes picks which one to load based on what PyTorch reports: +The library filename encodes the version: `libbitsandbytes_cuda{major}{minor}` for CUDA, `libbitsandbytes_rocm{major}{minor}` for ROCm. bitsandbytes selects which one to load based on what PyTorch reports: ```python import torch @@ -33,12 +31,12 @@ print(torch.version.cuda) # e.g. "12.8" -> looks for libbitsandbytes_cuda128 print(torch.version.hip) # e.g. "7.2" -> looks for libbitsandbytes_rocm72 ``` -This commonly happens when your PyTorch was compiled against a different CUDA/ROCm version than what you have installed on your system. For example, PyTorch built with ROCm 7.2 reports `torch.version.hip = "7.2"` and bitsandbytes looks for `libbitsandbytes_rocm72`, even if your system has a different ROCm version installed. +bitsandbytes will automatically fall back to the closest available pre-compiled version if an exact match is not found, and log a warning. For example, if your PyTorch was built with CUDA 12.9 but bitsandbytes only ships 12.8, it will load 12.8 automatically. -To resolve this: +If you see an error like `No compatible CUDA library found`, it means no compatible pre-compiled library could be found at all. To resolve this: -1. **Install a matching PyTorch version** that aligns with the pre-compiled libraries shipped in the bitsandbytes wheel. -2. **Override the version at runtime** with an environment variable so bitsandbytes loads a different library: +1. **Compile from source** to produce a library matching your exact toolkit version. See the [installation guide](installation) for instructions. +2. **Override the version at runtime** with an environment variable to force loading a specific pre-compiled version: ```bash # Linux / macOS export BNB_CUDA_VERSION=128 # or BNB_ROCM_VERSION=72 @@ -46,10 +44,4 @@ To resolve this: # Windows (cmd) set BNB_CUDA_VERSION=128 ``` -3. **Compile from source** to produce a library matching your exact toolkit version. For ROCm, you can override the library name with `-DROCM_VERSION`: - ```bash - cmake -DCOMPUTE_BACKEND=hip -DROCM_VERSION=72 -S . # produces libbitsandbytes_rocm72 - ``` - For CUDA, the version is detected automatically from the CUDA compiler on your PATH and cannot be overridden -- make sure the correct CUDA Toolkit is first on your PATH. - - See the [installation guide](installation) for full compile-from-source instructions. + The value must be digits only, e.g. `128` for CUDA 12.8 or `72` for ROCm 7.2. diff --git a/tests/test_cuda_setup_evaluator.py b/tests/test_cuda_setup_evaluator.py index a42b026f7..56a52736e 100644 --- a/tests/test_cuda_setup_evaluator.py +++ b/tests/test_cuda_setup_evaluator.py @@ -1,6 +1,10 @@ +from pathlib import Path +from unittest.mock import patch + import pytest -from bitsandbytes.cextension import BNB_BACKEND, get_cuda_bnb_library_path +from bitsandbytes.cextension import get_cuda_bnb_library_path +from bitsandbytes.consts import DYNAMIC_LIBRARY_SUFFIX from bitsandbytes.cuda_specs import CUDASpecs @@ -14,46 +18,9 @@ def cuda120_spec() -> CUDASpecs: ) -@pytest.mark.skipif(BNB_BACKEND != "CUDA", reason="this test requires a CUDA backend") -def test_get_cuda_bnb_library_path(monkeypatch, cuda120_spec): - """Without overrides, library path uses the detected CUDA 12.0 version.""" - monkeypatch.delenv("BNB_ROCM_VERSION", raising=False) - monkeypatch.delenv("BNB_CUDA_VERSION", raising=False) - assert get_cuda_bnb_library_path(cuda120_spec).stem == "libbitsandbytes_cuda120" - - -@pytest.mark.skipif(BNB_BACKEND != "CUDA", reason="this test requires a CUDA backend") -def test_get_cuda_bnb_library_path_override(monkeypatch, cuda120_spec, caplog): - """BNB_CUDA_VERSION=110 overrides path selection to the CUDA 11.0 binary.""" - monkeypatch.delenv("BNB_ROCM_VERSION", raising=False) - monkeypatch.setenv("BNB_CUDA_VERSION", "110") - assert get_cuda_bnb_library_path(cuda120_spec).stem == "libbitsandbytes_cuda110" - assert "BNB_CUDA_VERSION" in caplog.text # did we get the warning? - - -@pytest.mark.skipif(BNB_BACKEND != "CUDA", reason="this test requires a CUDA backend") -def test_get_cuda_bnb_library_path_rejects_rocm_override(monkeypatch, cuda120_spec): - """BNB_ROCM_VERSION alone should be rejected on CUDA with a helpful error.""" - monkeypatch.delenv("BNB_CUDA_VERSION", raising=False) - monkeypatch.setenv("BNB_ROCM_VERSION", "72") - with pytest.raises(RuntimeError, match=r"BNB_ROCM_VERSION.*not a ROCm build"): - get_cuda_bnb_library_path(cuda120_spec) - - -@pytest.mark.skipif(BNB_BACKEND != "CUDA", reason="this test requires a CUDA backend") -def test_get_cuda_bnb_library_path_cuda_override_takes_priority(monkeypatch, cuda120_spec, caplog): - """When both overrides are set on CUDA, the CUDA override wins and the ROCm one is warned about.""" - monkeypatch.setenv("BNB_CUDA_VERSION", "110") - monkeypatch.setenv("BNB_ROCM_VERSION", "72") - assert get_cuda_bnb_library_path(cuda120_spec).stem == "libbitsandbytes_cuda110" - assert "BNB_CUDA_VERSION" in caplog.text - assert "BNB_ROCM_VERSION" in caplog.text - assert "ignored on this CUDA build" in caplog.text - - @pytest.fixture def rocm70_spec() -> CUDASpecs: - """Simulates torch+rocm7.0 (bundled ROCm) when the system ROCm is newer.""" + """Simulates torch+rocm7.0.""" return CUDASpecs( cuda_version_string="70", highest_compute_capability=(0, 0), @@ -61,38 +28,108 @@ def rocm70_spec() -> CUDASpecs: ) -@pytest.mark.skipif(BNB_BACKEND != "ROCm", reason="this test requires a ROCm backend") -def test_get_rocm_bnb_library_path(monkeypatch, rocm70_spec): - """Without override, library path uses PyTorch's ROCm 7.0 version.""" - monkeypatch.delenv("BNB_ROCM_VERSION", raising=False) +@pytest.mark.parametrize( + "spec,fake_libs,hip_version,expected_name,expect_warning", + [ + # exact match + ( + CUDASpecs(cuda_version_string="124", highest_compute_capability=(8, 6), cuda_version_tuple=(12, 4)), + {(12, 4): Path(f"libbitsandbytes_cuda124{DYNAMIC_LIBRARY_SUFFIX}")}, + None, + f"libbitsandbytes_cuda124{DYNAMIC_LIBRARY_SUFFIX}", + False, + ), + # forward fallback within major: 12.0 -> 12.1 + ( + CUDASpecs(cuda_version_string="120", highest_compute_capability=(8, 6), cuda_version_tuple=(12, 0)), + { + (12, 1): Path(f"libbitsandbytes_cuda121{DYNAMIC_LIBRARY_SUFFIX}"), + (12, 4): Path(f"libbitsandbytes_cuda124{DYNAMIC_LIBRARY_SUFFIX}"), + }, + None, + f"libbitsandbytes_cuda121{DYNAMIC_LIBRARY_SUFFIX}", + True, + ), + # backward fallback: 12.9 -> 12.8 + ( + CUDASpecs(cuda_version_string="129", highest_compute_capability=(8, 9), cuda_version_tuple=(12, 9)), + { + (12, 4): Path(f"libbitsandbytes_cuda124{DYNAMIC_LIBRARY_SUFFIX}"), + (12, 8): Path(f"libbitsandbytes_cuda128{DYNAMIC_LIBRARY_SUFFIX}"), + }, + None, + f"libbitsandbytes_cuda128{DYNAMIC_LIBRARY_SUFFIX}", + True, + ), + # ROCm double-digit minor: 7.13 -> 7.2 + ( + CUDASpecs(cuda_version_string="713", highest_compute_capability=(0, 0), cuda_version_tuple=(7, 13)), + {(7, 2): Path(f"libbitsandbytes_rocm72{DYNAMIC_LIBRARY_SUFFIX}")}, + "7.13.0", + f"libbitsandbytes_rocm72{DYNAMIC_LIBRARY_SUFFIX}", + True, + ), + # no same-major match: 11.8 with only 12.x -> non-existent exact path, no warning + ( + CUDASpecs(cuda_version_string="118", highest_compute_capability=(7, 5), cuda_version_tuple=(11, 8)), + {(12, 1): Path("libbitsandbytes_cuda121.so"), (12, 4): Path("libbitsandbytes_cuda124.so")}, + None, + f"libbitsandbytes_cuda118{DYNAMIC_LIBRARY_SUFFIX}", + False, + ), + # no libs at all -> non-existent exact path, no warning + ( + CUDASpecs(cuda_version_string="129", highest_compute_capability=(8, 9), cuda_version_tuple=(12, 9)), + {}, + None, + f"libbitsandbytes_cuda129{DYNAMIC_LIBRARY_SUFFIX}", + False, + ), + ], +) +def test_version_selection(monkeypatch, caplog, spec, fake_libs, hip_version, expected_name, expect_warning): + """Library selection: exact match, fallback, no-same-major, no-libs.""" monkeypatch.delenv("BNB_CUDA_VERSION", raising=False) - assert get_cuda_bnb_library_path(rocm70_spec).stem == "libbitsandbytes_rocm70" - - -@pytest.mark.skipif(BNB_BACKEND != "ROCm", reason="this test requires a ROCm backend") -def test_get_rocm_bnb_library_path_override(monkeypatch, rocm70_spec, caplog): - """BNB_ROCM_VERSION=72 overrides to load the ROCm 7.2 library instead of 7.0.""" - monkeypatch.delenv("BNB_CUDA_VERSION", raising=False) - monkeypatch.setenv("BNB_ROCM_VERSION", "72") - assert get_cuda_bnb_library_path(rocm70_spec).stem == "libbitsandbytes_rocm72" - assert "BNB_ROCM_VERSION" in caplog.text + monkeypatch.delenv("BNB_ROCM_VERSION", raising=False) + is_hip = spec.cuda_version_tuple[0] < 10 + with ( + patch("torch.version.hip", hip_version if is_hip else None), + patch("bitsandbytes.cextension._find_cuda_libs", return_value=fake_libs), + ): + with caplog.at_level("WARNING"): + result = get_cuda_bnb_library_path(spec) + assert result.name == expected_name + if expect_warning: + assert caplog.text + else: + assert not caplog.text + + +def test_override(monkeypatch, cuda120_spec, caplog): + """BNB_CUDA_VERSION overrides path selection.""" + monkeypatch.setenv("BNB_CUDA_VERSION", "110") + with patch("bitsandbytes.cextension._find_cuda_libs", return_value={}): + with caplog.at_level("WARNING"): + result = get_cuda_bnb_library_path(cuda120_spec) + assert result.stem == "libbitsandbytes_cuda110" + assert "BNB_CUDA_VERSION" in caplog.text -@pytest.mark.skipif(BNB_BACKEND != "ROCm", reason="this test requires a ROCm backend") -def test_get_rocm_bnb_library_path_rocm_override_takes_priority(monkeypatch, rocm70_spec, caplog): - """When both overrides are set on ROCm, the ROCm override wins and the CUDA one is warned about.""" +def test_rocm_override(monkeypatch, rocm70_spec, caplog): + """BNB_ROCM_VERSION overrides path selection.""" monkeypatch.setenv("BNB_ROCM_VERSION", "72") - monkeypatch.setenv("BNB_CUDA_VERSION", "110") - assert get_cuda_bnb_library_path(rocm70_spec).stem == "libbitsandbytes_rocm72" + with ( + patch("torch.version.hip", "7.0.0"), + patch("bitsandbytes.cextension._find_cuda_libs", return_value={}), + ): + with caplog.at_level("WARNING"): + result = get_cuda_bnb_library_path(rocm70_spec) + assert result.stem == "libbitsandbytes_rocm72" assert "BNB_ROCM_VERSION" in caplog.text - assert "BNB_CUDA_VERSION" in caplog.text - assert "ignored on this ROCm build" in caplog.text -@pytest.mark.skipif(BNB_BACKEND != "ROCm", reason="this test requires a ROCm backend") -def test_get_rocm_bnb_library_path_rejects_cuda_override(monkeypatch, rocm70_spec): - """BNB_CUDA_VERSION alone should be rejected on ROCm with a helpful error.""" - monkeypatch.delenv("BNB_ROCM_VERSION", raising=False) - monkeypatch.setenv("BNB_CUDA_VERSION", "110") - with pytest.raises(RuntimeError, match=r"BNB_CUDA_VERSION.*not a CUDA build"): - get_cuda_bnb_library_path(rocm70_spec) +def test_override_invalid_format(monkeypatch, cuda120_spec): + """Override value must be digits only (e.g. '124'), not dotted or alphanumeric.""" + monkeypatch.setenv("BNB_CUDA_VERSION", "12.4") + with pytest.raises(RuntimeError, match="digits only"): + get_cuda_bnb_library_path(cuda120_spec)