Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ All notable changes to `atomicmemory` will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.1.2] - 2026-06-15

### Security
- `api_url` is now validated against SSRF across all six SDK configs (the three
provider configs, the storage/client configs, and `EntitiesClientConfig`) via
one shared validator. It always rejects link-local / cloud-metadata addresses
(AWS IMDS `169.254.169.254`, IPv6 `fe80::/10`) — including their decimal
(`http://2852039166/`), hex, octal, short-form, and IPv4-mapped-IPv6
(`::ffff:169.254.169.254`) encodings, which are canonicalized so they cannot
bypass the guard. Loopback / private / reserved IP literals remain allowed by
default — the SDK routinely connects to local and self-hosted cores — and are
rejected only when you opt into strict mode with `allowPrivateNetworks=False`.
Hostnames (incl. the `localhost` default) are intentionally not DNS-resolved
at config time. This matches the Node SDK's posture for cross-SDK parity, and
a reflective enumeration test fails if a new `api_url` config omits the guard.
(FailSafe AGNT-PY-001.)

## [1.1.1] - 2026-06-11

### Added
Expand Down
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Before changing code, read the relevant local files first:
- Snake_case for Python attributes; Pydantic `Field(alias="apiUrl")` aliases preserve TS camelCase wire format.
- Keep public API behavior aligned with `atomicmemory-sdk` where both SDKs expose the same concept.
- Prefer integration tests with a real HTTP path for client behavior; use mocks only for narrow transport errors.
- **Cross-cutting controls live at one chokepoint, enumerated and bypass-tested.** When a security/correctness rule must hold for *all* of a category (every config with an `api_url`, every input reaching a sink), apply it through one shared helper, not per-surface — and back it with a **reflective enumeration test** that fails when a new surface lacks it (e.g. `test_every_api_url_config_blocks_imds` discovers every `BaseModel` with an `api_url` field). Tests must exercise the **adversarial bypass** (the encoding, the key, the header), not just the canonical example, and validate against the **downstream consumer's interpretation** (the resolver, Postgres, the server), not your own parser. This is the gap that caused AGNT-PY-001's missed `EntitiesClientConfig` and numeric-IP bypass.

## Pre-commit verification

Expand Down
2 changes: 1 addition & 1 deletion atomicmemory/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
__version__: The current package version string (PEP 440).
"""

__version__ = "1.1.1"
__version__ = "1.1.2"
15 changes: 5 additions & 10 deletions atomicmemory/client/atomic_memory_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

from types import TracebackType
from typing import Any
from urllib.parse import urlparse

from pydantic import BaseModel, ConfigDict, Field, SecretStr, field_validator, model_validator
from pydantic import ValidationError as PydanticValidationError

from atomicmemory.client.async_memory_client import AsyncMemoryClient
from atomicmemory.client.memory_client import MemoryClient, MemoryProviderConfigs
from atomicmemory.core.errors import ConfigError
from atomicmemory.core.url import validate_api_url
from atomicmemory.core.validation import sanitized_pydantic_errors
from atomicmemory.entities import AsyncEntitiesClient, EntitiesClient
from atomicmemory.entities.client import EntitiesClientConfig
Expand Down Expand Up @@ -48,17 +48,11 @@ class AtomicMemoryClientConfig(BaseModel):
api_key: SecretStr = Field(alias="apiKey")
user_id: str = Field(alias="userId")
timeout_seconds: float = Field(default=30.0, alias="timeoutSeconds")
allow_private_networks: bool = Field(default=True, alias="allowPrivateNetworks")
"""Permit loopback/private/reserved IP literals in ``api_url`` (default True;
set False to harden). Link-local / cloud-metadata stay blocked regardless."""
memory: MemoryNamespaceConfig | None = None

@field_validator("api_url")
@classmethod
def _validate_api_url(cls, value: str) -> str:
stripped = value.strip()
parsed = urlparse(stripped)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError("api_url must be an http(s) URL")
return stripped

@field_validator("api_key", mode="before")
@classmethod
def _validate_api_key(cls, value: object) -> object:
Expand Down Expand Up @@ -88,6 +82,7 @@ def _validate_timeout(cls, value: float) -> float:
def _require_non_empty(self) -> AtomicMemoryClientConfig:
if not self.api_url:
raise ValueError("api_url is required")
self.api_url = validate_api_url(self.api_url, allow_private_networks=self.allow_private_networks)
# api_key is always truthy as SecretStr; empty string rejected by _validate_api_key above.
if not self.user_id:
raise ValueError("user_id is required")
Expand Down
114 changes: 114 additions & 0 deletions atomicmemory/core/url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""Shared ``api_url`` validation used by every SDK config boundary.

Centralizes the rule that an ``api_url`` must be an http(s) URL with a
host, and adds SSRF defense: link-local / cloud-metadata addresses
(notably the ``169.254.169.254`` IMDS endpoint) are always rejected.
Loopback / private / reserved IP literals are *allowed by default* — the
SDK routinely connects to local and self-hosted cores — and only rejected
when the caller opts into strict mode via ``allow_private_networks=False``.
This mirrors the Node SDK's posture for cross-SDK parity.

Hostnames are intentionally NOT resolved here. Config-time DNS resolution
would be slow, racy, and still bypassable via DNS rebinding, so a literal
hostname (including ``localhost`` and ``metadata.google.internal``) passes
the scheme/host checks. Deployments that must defend against
hostname-based metadata access should pin ``api_url`` to a vetted host.
"""

from __future__ import annotations

import ipaddress
import socket
from urllib.parse import urlparse

_ALLOWED_SCHEMES = frozenset({"http", "https"})


def _parse_ip(host: str) -> ipaddress.IPv4Address | ipaddress.IPv6Address | None:
"""Return the parsed IP when ``host`` is an IP literal, else ``None``.

Covers canonical literals AND the legacy IPv4 encodings the C resolver
(``inet_aton``/``getaddrinfo``) still accepts — decimal (``2852039166``),
hex (``0xA9FEA9FE``), octal (``0251.0376.0251.0376``) and short forms
(``127.1``). Without this they slip through as un-resolved "hostnames" and
defeat the SSRF checks, since the HTTP client resolves them to the real
address (e.g. ``http://2852039166/`` → ``169.254.169.254``).

Args:
host: The URL host component.

Returns:
The parsed/canonicalized IP address, or ``None`` when ``host`` is a
genuine (non-numeric) hostname.
"""
try:
return _collapse_mapped(ipaddress.ip_address(host))
except ValueError:
pass
try:
return ipaddress.IPv4Address(socket.inet_aton(host))
except (OSError, ValueError):
return None


def _collapse_mapped(
ip: ipaddress.IPv4Address | ipaddress.IPv6Address,
) -> ipaddress.IPv4Address | ipaddress.IPv6Address:
"""Reclassify an IPv4-mapped IPv6 address (``::ffff:a.b.c.d``) as its IPv4.

``IPv6Address.is_link_local`` only delegates to the embedded IPv4 on
newer CPython, so on Python 3.10/3.11 ``::ffff:169.254.169.254`` would
otherwise read as a benign global IPv6 and bypass the metadata block.
Collapsing to the embedded IPv4 makes classification deterministic
across all supported interpreters and matches the Node SDK.

Args:
ip: A parsed IP literal.

Returns:
The embedded IPv4 when ``ip`` is IPv4-mapped, otherwise ``ip``.
"""
mapped = getattr(ip, "ipv4_mapped", None)
return mapped if mapped is not None else ip


def validate_api_url(value: str, *, allow_private_networks: bool = True) -> str:
"""Validate and normalize an ``api_url``, guarding against SSRF.

Args:
value: The candidate URL.
allow_private_networks: Defaults to ``True`` — loopback / private /
reserved IP literals are permitted because the SDK routinely
connects to local and self-hosted cores. Pass ``False`` to reject
those too (hardened multi-tenant deployments). Link-local /
cloud-metadata addresses are rejected regardless of this flag.

Returns:
The whitespace-stripped URL.

Raises:
ValueError: If the scheme is not http(s), the host is missing, or
the host is a disallowed IP literal.
"""
stripped = value.strip()
parsed = urlparse(stripped)
if parsed.scheme not in _ALLOWED_SCHEMES or not parsed.netloc:
raise ValueError("api_url must be an http(s) URL")
host = parsed.hostname
if not host:
raise ValueError("api_url must include a host")

ip = _parse_ip(host)
if ip is None:
return stripped

if ip.is_link_local:
raise ValueError("api_url must not target a link-local or cloud-metadata address")
if not allow_private_networks and (
ip.is_loopback or ip.is_private or ip.is_reserved or ip.is_multicast or ip.is_unspecified
):
raise ValueError(
"api_url must not target a loopback, private, or reserved address; "
"set allow_private_networks=True to permit it"
)
return stripped
20 changes: 10 additions & 10 deletions atomicmemory/entities/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@
import json
from types import TracebackType
from typing import Any, TypeVar, cast
from urllib.parse import quote, urlencode, urlparse
from urllib.parse import quote, urlencode

import httpx
from pydantic import BaseModel, ConfigDict, Field, SecretStr, field_validator
from pydantic import BaseModel, ConfigDict, Field, SecretStr, field_validator, model_validator
from pydantic import ValidationError as PydanticValidationError

from atomicmemory.core.url import validate_api_url
from atomicmemory.entities.errors import EntitiesClientError
from atomicmemory.entities.types import (
DeleteEntityResult,
Expand Down Expand Up @@ -63,15 +64,14 @@ class EntitiesClientConfig(BaseModel):
api_url: str = Field(alias="apiUrl")
api_key: SecretStr = Field(alias="apiKey")
timeout_seconds: float = Field(default=30.0, alias="timeoutSeconds")
allow_private_networks: bool = Field(default=True, alias="allowPrivateNetworks")
"""Permit loopback/private/reserved IP literals in ``api_url`` (default True;
set False to harden). Link-local / cloud-metadata stay blocked regardless."""

@field_validator("api_url")
@classmethod
def _validate_api_url(cls, value: str) -> str:
stripped = value.strip()
parsed = urlparse(stripped)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError("api_url must be an http(s) URL")
return stripped
@model_validator(mode="after")
def _validate_api_url(self) -> EntitiesClientConfig:
self.api_url = validate_api_url(self.api_url, allow_private_networks=self.allow_private_networks)
return self

@field_validator("api_key", mode="before")
@classmethod
Expand Down
12 changes: 11 additions & 1 deletion atomicmemory/providers/atomicmemory/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from __future__ import annotations

from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, model_validator

from atomicmemory.core.url import validate_api_url
from atomicmemory.memory.meta_fact_filter import MetaFactFilterConfig

ATOMICMEMORY_DEFAULT_TIMEOUT_SECONDS: float = 30.0
Expand Down Expand Up @@ -41,3 +42,12 @@ class AtomicMemoryProviderConfig(BaseModel):

meta_fact_filter: MetaFactFilterConfig | None = Field(default=None, alias="metaFactFilter")
"""Optional opt-in post-retrieval meta-fact filter. Off when unset."""

allow_private_networks: bool = Field(default=True, alias="allowPrivateNetworks")
"""Permit loopback/private/reserved IP literals in ``api_url`` (default True;
set False to harden). Link-local / cloud-metadata stay blocked regardless."""

@model_validator(mode="after")
def _validate_api_url(self) -> AtomicMemoryProviderConfig:
self.api_url = validate_api_url(self.api_url, allow_private_networks=self.allow_private_networks)
return self
11 changes: 10 additions & 1 deletion atomicmemory/providers/hindsight/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from collections.abc import Awaitable, Callable
from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, model_validator

from atomicmemory.core.url import validate_api_url
from atomicmemory.memory.types import IngestInput, Scope

HindsightRecallBudget = Literal["low", "mid", "high"]
Expand Down Expand Up @@ -38,6 +39,14 @@ class HindsightProviderConfig(BaseModel):
project_id: str = Field(default=HINDSIGHT_DEFAULT_PROJECT_ID, alias="projectId")
default_budget: HindsightRecallBudget | None = Field(default=None, alias="defaultBudget")
default_max_tokens: int | None = Field(default=None, alias="defaultMaxTokens")
allow_private_networks: bool = Field(default=True, alias="allowPrivateNetworks")
"""Permit loopback/private/reserved IP literals in ``api_url`` (default True;
set False to harden). Link-local / cloud-metadata stay blocked regardless."""

@model_validator(mode="after")
def _validate_api_url(self) -> HindsightProviderConfig:
self.api_url = validate_api_url(self.api_url, allow_private_networks=self.allow_private_networks)
return self


class HindsightRetainResponse(BaseModel):
Expand Down
13 changes: 12 additions & 1 deletion atomicmemory/providers/mem0/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from __future__ import annotations

from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, model_validator

from atomicmemory.core.url import validate_api_url

MEM0_DEFAULT_TIMEOUT_SECONDS: float = 30.0
MEM0_DEFAULT_PATH_PREFIX: str = "/v1"
Expand Down Expand Up @@ -49,3 +51,12 @@ class Mem0ProviderConfig(BaseModel):

org_id: str | None = Field(default=None, alias="orgId")
project_id: str | None = Field(default=None, alias="projectId")

allow_private_networks: bool = Field(default=True, alias="allowPrivateNetworks")
"""Permit loopback/private/reserved IP literals in ``api_url`` (default True;
set False to harden). Link-local / cloud-metadata stay blocked regardless."""

@model_validator(mode="after")
def _validate_api_url(self) -> Mem0ProviderConfig:
self.api_url = validate_api_url(self.api_url, allow_private_networks=self.allow_private_networks)
return self
16 changes: 6 additions & 10 deletions atomicmemory/storage/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from __future__ import annotations

from typing import Any, Literal
from urllib.parse import urlparse

from pydantic import BaseModel, ConfigDict, Field, SecretStr, field_validator, model_validator

from atomicmemory.core.url import validate_api_url

StorageArtifactStatus = Literal[
"stored",
"pending",
Expand Down Expand Up @@ -42,15 +43,9 @@ class StorageClientConfig(BaseModel):
api_key: SecretStr = Field(alias="apiKey")
user_id: str = Field(alias="userId")
timeout_seconds: float = Field(default=30.0, alias="timeoutSeconds")

@field_validator("api_url")
@classmethod
def _validate_api_url(cls, value: str) -> str:
stripped = value.strip()
parsed = urlparse(stripped)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError("api_url must be an http(s) URL")
return stripped
allow_private_networks: bool = Field(default=True, alias="allowPrivateNetworks")
"""Permit loopback/private/reserved IP literals in ``api_url`` (default True;
set False to harden). Link-local / cloud-metadata stay blocked regardless."""

@field_validator("api_key", mode="before")
@classmethod
Expand Down Expand Up @@ -81,6 +76,7 @@ def _validate_timeout(cls, value: float) -> float:
def _require_non_empty(self) -> StorageClientConfig:
if not self.api_url:
raise ValueError("api_url is required")
self.api_url = validate_api_url(self.api_url, allow_private_networks=self.allow_private_networks)
# api_key is always truthy as SecretStr; empty string rejected by _validate_api_key above.
if not self.user_id:
raise ValueError("user_id is required")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "atomicmemory"
version = "1.1.1"
version = "1.1.2"
description = "Python client SDK for AtomicMemory memory and artifact storage."
readme = "README.md"
requires-python = ">=3.10"
Expand Down
Loading
Loading