Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cachekit prioritizes explicit, predictable behavior.
| **StandardSerializer** | Language-agnostic MessagePack | Default, works everywhere |
| **AutoSerializer** | Python-optimized (NumPy, pandas, datetime) | Named "Auto" to be transparent |
| **ArrowSerializer** | Apache Arrow for DataFrames | 60%+ faster for pandas |
| **OrjsonSerializer** | JSON via orjson | JSON compatibility |
| **OrjsonSerializer** | JSON via orjson — requires `pip install 'cachekit[json]'` | JSON compatibility |

> [!IMPORTANT]
> **NO auto-detection of business logic**: Pydantic models, SQLAlchemy ORM objects, and custom classes require explicit conversion to dict.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def test_cached_function():
| Serializer | Speed | Use Case |
|:-----------|:-----:|:---------|
| **StandardSerializer** | ★★★★☆ | General Python types, NumPy, Pandas |
| **OrjsonSerializer** | ★★★★★ | JSON APIs (2-5x faster than stdlib) |
| **OrjsonSerializer** | ★★★★★ | JSON APIs (2-5x faster than stdlib) — requires `cachekit[json]` |
| **ArrowSerializer** | ★★★★★ | Large DataFrames (6-23x faster for 10K+ rows) |
| **EncryptionWrapper** | ★★★★☆ | Wraps any serializer with AES-256-GCM |

Expand Down
2 changes: 1 addition & 1 deletion docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ result = process_numpy_data()

### Using OrjsonSerializer (JSON-Optimized)

Use OrjsonSerializer for JSON-heavy workloads and APIs:
Use OrjsonSerializer for JSON-heavy workloads and APIs. Requires the `[json]` extra: `pip install 'cachekit[json]'` (or `uv add 'cachekit[json]'`).

```python notest
from cachekit import cache
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def analyze_data():
import numpy as np
return np.array([1, 2, 3, 4, 5])

# For JSON APIs: OrjsonSerializer (2-5x faster)
# For JSON APIs: OrjsonSerializer (2-5x faster) — requires: pip install 'cachekit[json]'
@cache(ttl=900, serializer="orjson")
def get_api_response():
return {"status": "ok", "data": "response"}
Expand Down
2 changes: 2 additions & 0 deletions docs/serializers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Each serializer integrates transparently with the `@cache` decorator. You can co
| [EncryptionWrapper](encryption.md) | Adds ~3-5 μs | Zero-knowledge caching, GDPR/HIPAA/PCI-DSS compliance |
| [Custom Serializers](custom.md) | Varies | Specialized data types not covered above |

> **OrjsonSerializer** requires the `[json]` extra: `pip install 'cachekit[json]'` (or `uv add 'cachekit[json]'`).

For caching Pydantic models, see [Caching Pydantic Models](pydantic.md).

## Decision Matrix
Expand Down
10 changes: 10 additions & 0 deletions docs/serializers/orjson.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@

**JSON-optimized serializer** — Fast JSON serialization powered by Rust (orjson library). Ideal for JSON-heavy workloads and API response caching.

**Requires the `[json]` extra** — orjson is an optional dependency:

```bash
pip install 'cachekit[json]'
# or
uv add 'cachekit[json]'
```

Without orjson installed, `get_serializer("orjson")` raises `ImportError: orjson is not installed. OrjsonSerializer requires the [json] extra: pip install 'cachekit[json]'`.

## Overview

**Best for:**
Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ dependencies = [
# Serialization and hashing
"blake3>=1.0.5",
"msgpack>=1.2.1",
"orjson>=3.9.0",
"xxhash>=3.5.0",
# HTTP client for SaaS backend (cachekit.io)
"httpx[http2]>=0.28.1",
Expand All @@ -78,6 +77,10 @@ data = [
"pandas>=1.3.0",
"pyarrow>=21.0.0",
]
json = [
# OrjsonSerializer (serializer="orjson") — fast JSON via orjson
"orjson>=3.9.0",
]
memcached = [
"pymemcache>=4.0.0",
]
Expand Down Expand Up @@ -228,6 +231,9 @@ dev = [
"numpy>=2.0.2",
"pandas>=1.3.0",
"pyarrow>=21.0.0",
# OrjsonSerializer support — now the [json] optional extra; kept here so the
# orjson tests, doctests, and markdown-docs still resolve it in dev/CI.
"orjson>=3.9.0",
"pytest-xdist>=3.8.0",
"time-machine>=2.19.0",
]
Expand Down
36 changes: 30 additions & 6 deletions src/cachekit/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
SerializerProtocol,
)
from .encryption_wrapper import EncryptionWrapper
from .orjson_serializer import OrjsonSerializer
from .standard_serializer import StandardSerializer

if TYPE_CHECKING:
from .arrow_serializer import ArrowSerializer
from .orjson_serializer import OrjsonSerializer

logger = logging.getLogger(__name__)

Expand All @@ -37,6 +37,20 @@ def _get_arrow_serializer() -> type:
return _ArrowSerializer


# Lazy import for optional OrjsonSerializer (requires orjson from [json] extra)
_OrjsonSerializer: type | None = None


def _get_orjson_serializer() -> type:
"""Lazy-load OrjsonSerializer. Raises ImportError if orjson not installed."""
global _OrjsonSerializer
if _OrjsonSerializer is None:
from .orjson_serializer import OrjsonSerializer

_OrjsonSerializer = OrjsonSerializer
return _OrjsonSerializer


# Validate ByteStorage works correctly
test_storage = ByteStorage("msgpack")
test_data = b"test validation data"
Expand All @@ -57,7 +71,7 @@ def _get_arrow_serializer() -> type:
"default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches
"std": StandardSerializer, # Explicit StandardSerializer alias
"arrow": None, # Lazy-loaded: requires pyarrow from [data] extra
"orjson": OrjsonSerializer,
"orjson": None, # Lazy-loaded: requires orjson from [json] extra
"encrypted": EncryptionWrapper, # StandardSerializer + AES-256-GCM encryption
}

Expand Down Expand Up @@ -116,9 +130,11 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali
f"@cache(serializer=MySerializer())"
)

# Get serializer class (lazy-load arrow if needed)
# Get serializer class (lazy-load optional serializers if needed)
if name == "arrow":
serializer_class = _get_arrow_serializer()
elif name == "orjson":
serializer_class = _get_orjson_serializer()
else:
serializer_class = SERIALIZER_REGISTRY[name]

Expand Down Expand Up @@ -177,9 +193,15 @@ def get_serializer_info() -> dict[str, dict[str, Any]]:
if hasattr(instance, "get_info"):
info[name].update(instance.get_info()) # type: ignore[attr-defined]
except ImportError as e:
# Optional serializer whose backing dependency (pyarrow / orjson) is absent.
optional_modules = {
"arrow": ("ArrowSerializer", "cachekit.serializers.arrow_serializer"),
"orjson": ("OrjsonSerializer", "cachekit.serializers.orjson_serializer"),
}
cls, module = optional_modules.get(name, ("Unknown", "unknown"))
info[name] = {
"class": "ArrowSerializer" if name == "arrow" else "Unknown",
"module": "cachekit.serializers.arrow_serializer",
"class": cls,
"module": module,
"available": False,
"error": str(e),
}
Expand All @@ -194,9 +216,11 @@ def get_serializer_info() -> dict[str, dict[str, Any]]:


def __getattr__(name: str) -> Any:
"""Lazy attribute access for optional ArrowSerializer."""
"""Lazy attribute access for optional ArrowSerializer / OrjsonSerializer."""
if name == "ArrowSerializer":
return _get_arrow_serializer()
if name == "OrjsonSerializer":
return _get_orjson_serializer()
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


Expand Down
7 changes: 6 additions & 1 deletion src/cachekit/serializers/orjson_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@

from typing import Any, ClassVar

import orjson
# Required dependency: orjson (fail-fast with install hint when the [json] extra is absent)
try:
import orjson
except ImportError as e: # pragma: no cover - only reachable without the [json] extra (behavior tested via subprocess)
raise ImportError("orjson is not installed. OrjsonSerializer requires the [json] extra: pip install 'cachekit[json]'") from e

import xxhash

from .base import SerializationError, SerializationFormat, SerializationMetadata
Expand Down
8 changes: 6 additions & 2 deletions tests/unit/test_orjson_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,12 @@ def test_factory_caches_orjson_serializer(self):
assert serializer1 is serializer2

def test_orjson_serializer_in_registry(self):
"""Test that OrjsonSerializer is registered."""
"""orjson is registered as a lazy (None) placeholder — like arrow.

orjson moved to the optional [json] extra, so the registry holds None and
get_serializer('orjson') resolves the class on demand (covered above).
"""
from cachekit.serializers import SERIALIZER_REGISTRY

assert "orjson" in SERIALIZER_REGISTRY
assert SERIALIZER_REGISTRY["orjson"] == OrjsonSerializer
assert SERIALIZER_REGISTRY["orjson"] is None
92 changes: 92 additions & 0 deletions tests/unit/test_serializer_lazy_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,23 @@

from __future__ import annotations

import subprocess
import sys

import pytest

from cachekit.serializers import (
SERIALIZER_REGISTRY,
_get_arrow_serializer,
_get_orjson_serializer,
benchmark_serializers,
get_available_serializers,
get_serializer,
get_serializer_info,
)
from cachekit.serializers.arrow_serializer import ArrowSerializer
from cachekit.serializers.base import SerializerProtocol
from cachekit.serializers.orjson_serializer import OrjsonSerializer


class TestLazyArrowSerializerLoading:
Expand Down Expand Up @@ -129,3 +134,90 @@ def test_arrow_is_none_in_registry(self):
"""Arrow entry is None in the raw registry (lazy placeholder)."""
available = get_available_serializers()
assert available["arrow"] is None


class TestLazyOrjsonSerializerLoading:
"""Test lazy loading mechanism for OrjsonSerializer (optional [json] extra)."""

def test_registry_has_none_for_orjson(self):
"""SERIALIZER_REGISTRY stores None for orjson (lazy placeholder)."""
assert "orjson" in SERIALIZER_REGISTRY
assert SERIALIZER_REGISTRY["orjson"] is None

def test_get_orjson_serializer_returns_class(self):
"""_get_orjson_serializer() returns the OrjsonSerializer class."""
assert _get_orjson_serializer() is OrjsonSerializer

def test_get_orjson_serializer_caches_result(self):
"""_get_orjson_serializer() caches the imported class."""
assert _get_orjson_serializer() is _get_orjson_serializer()

def test_get_serializer_orjson_returns_instance(self):
"""get_serializer('orjson') returns an OrjsonSerializer instance."""
serializer = get_serializer("orjson")
assert isinstance(serializer, OrjsonSerializer)
assert isinstance(serializer, SerializerProtocol)

def test_module_getattr_returns_orjson_serializer(self):
"""Module __getattr__ returns OrjsonSerializer for lazy access."""
from cachekit import serializers

assert serializers.OrjsonSerializer is OrjsonSerializer

def test_get_serializer_info_includes_orjson(self):
"""get_serializer_info() reports orjson as available with the right class."""
info = get_serializer_info()
assert info["orjson"]["available"] is True
assert info["orjson"]["class"] == "OrjsonSerializer"

def test_get_serializer_info_reports_orjson_unavailable(self, monkeypatch):
"""When orjson is absent, get_serializer_info() labels it OrjsonSerializer/unavailable.

Guards the generalized optional-dep branch — before it was hardcoded to
ArrowSerializer and would have mislabeled a missing orjson.
"""
import cachekit.serializers as serializers_mod

def _missing() -> type:
raise ImportError("orjson is not installed. OrjsonSerializer requires the [json] extra")

monkeypatch.setattr(serializers_mod, "_get_orjson_serializer", _missing)
# Bypass the factory cache so get_serializer re-resolves orjson and the
# ImportError reaches get_serializer_info's except branch.
monkeypatch.delitem(serializers_mod._serializer_cache, "orjson:True", raising=False)

info = serializers_mod.get_serializer_info()
assert info["orjson"]["available"] is False
assert info["orjson"]["class"] == "OrjsonSerializer"
assert info["orjson"]["module"] == "cachekit.serializers.orjson_serializer"


class TestOrjsonIsOptional:
"""orjson is an optional dependency (the [json] extra): it must not be pulled
eagerly, and when absent it must yield a helpful install error while the rest of
cachekit keeps working. Verified in fresh subprocesses because sys.modules is
shared across the test session (orjson is installed in the dev environment).
"""

def test_import_cachekit_does_not_pull_orjson(self):
"""Importing cachekit must NOT eagerly import orjson (the optionality regression guard)."""
code = "import cachekit, sys; assert 'orjson' not in sys.modules, 'orjson was imported eagerly'"
result = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True) # noqa: S603 (trusted: sys.executable + literal code)
assert result.returncode == 0, result.stderr

def test_orjson_absent_raises_helpful_error(self):
"""Without orjson, cachekit + the default serializer still work, and requesting
the orjson serializer raises a helpful, actionable [json]-extra ImportError."""
code = (
'import sys; sys.modules["orjson"] = None\n'
"import cachekit\n"
"from cachekit.serializers import get_serializer\n"
'assert type(get_serializer("default")).__name__ == "StandardSerializer"\n'
"try:\n"
' get_serializer("orjson")\n'
' raise SystemExit("expected ImportError")\n'
"except ImportError as e:\n"
' assert "[json] extra" in str(e), str(e)\n'
)
result = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True) # noqa: S603 (trusted: sys.executable + literal code)
assert result.returncode == 0, result.stderr
10 changes: 7 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading