diff --git a/README.md b/README.md index 6a55c8e..c15b33d 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,11 @@ rows = client.query("SELECT id, name FROM orders WHERE status = ?", ["confirmed" ``` Sync is the default; use `create_async_snowflake_client(...)` inside a Lambda -already on an event loop. Requires the `[snowflake]` extra. +or FastAPI app already on an event loop. Snowflake SQL API bindings use +positional `?` placeholders, not connector-style `%s` placeholders. Requires +the `[snowflake]` extra. + +**[→ See full Snowflake guide](docs/guides/snowflake-integration.md)** ### CloudWatch Metrics diff --git a/docs/README.md b/docs/README.md index b3bf395..32acf16 100644 --- a/docs/README.md +++ b/docs/README.md @@ -20,6 +20,7 @@ Component-specific guides for major features: - **[Lambda Context Helpers](guides/lambda-utilities.md)** - Environment info extraction for logging and metrics - **[Slack Integration](guides/slack-integration.md)** - Messaging, formatting, and file uploads - **[Elasticsearch Integration](guides/elasticsearch-integration.md)** - Search, bulk indexing, health checks +- **[Snowflake Integration](guides/snowflake-integration.md)** - SQL API client, Secrets Manager credentials, sync/async usage - **[JWT Authentication](guides/jwt-authentication.md)** - RS256 token validation for API Gateway Lambdas - **[Log Processing](guides/log-processing.md)** - Kinesis log extraction and ES index naming - Database Connections (planned) @@ -124,6 +125,7 @@ When contributing to documentation: - Lambda context helpers guide (guides/lambda-utilities.md) - Slack integration guide (guides/slack-integration.md) - Elasticsearch integration guide (guides/elasticsearch-integration.md) +- Snowflake integration guide (guides/snowflake-integration.md) - JWT authentication guide (guides/jwt-authentication.md) - Shared types reference (guides/shared-types.md) - CLI tools guide (guides/cli-tools.md) diff --git a/docs/guides/snowflake-integration.md b/docs/guides/snowflake-integration.md new file mode 100644 index 0000000..2f1a8ae --- /dev/null +++ b/docs/guides/snowflake-integration.md @@ -0,0 +1,159 @@ +# Snowflake Integration + +The Snowflake adapter wraps `snowflake-sql-api`, a pure-Python SQL API client. +Use it when a Lambda, CLI, or async API needs Snowflake reads or simple SQL +execution without bundling `snowflake-connector-python`. + +Install the optional extra: + +```bash +pip install "nui-python-shared-utils[snowflake]" +``` + +## Credential Resolution + +Credentials resolve in the same order as the other shared clients: + +1. explicit arguments +2. `SNOWFLAKE_*` environment variables +3. AWS Secrets Manager + +Supported direct environment variables: + +| Variable | Purpose | +| --- | --- | +| `SNOWFLAKE_ACCOUNT` | Snowflake account locator, for example `xy12345.ap-southeast-2` | +| `SNOWFLAKE_USER` | Service user | +| `SNOWFLAKE_PRIVATE_KEY` | Inline PEM private key | +| `SNOWFLAKE_PRIVATE_KEY_PATH` | Local PEM/DER private key path | +| `SNOWFLAKE_PRIVATE_KEY_PASSPHRASE` | Optional private key passphrase | +| `SNOWFLAKE_CREDENTIALS_SECRET` | Secret name override | + +Secrets Manager values use this JSON shape: + +```json +{ + "account": "xy12345.ap-southeast-2", + "user": "SERVICE_USER", + "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----", + "private_key_passphrase": "optional" +} +``` + +Existing `snowflake-agent-credentials` secrets and service-specific secrets such +as a future `NUI_ANALYTICS_SVC` secret fit this shape. Pass `secret_name=` when a +service should not use the default or environment-selected secret. + +## Sync Usage + +```python +from nui_shared_utils import create_snowflake_client + +with create_snowflake_client( + secret_name="snowflake-agent-credentials", + role="NUI_LAMBDA", + warehouse="AGENT_WH", + database="NUI_MARKETS", + schema="ANALYTICS", +) as client: + rows = client.query( + "SELECT source_id, title FROM source_items WHERE source_id = ? LIMIT 10", + ["edairynews"], + ) +``` + +The adapter applies NUI defaults for `role="NUI_LAMBDA"` and +`timezone="Pacific/Auckland"`. Override role, warehouse, database, schema, and +timezone explicitly for service accounts that need narrower production access. + +## Async Usage + +Use the async factory in FastAPI routes or other runtimes that already own an +event loop: + +```python +from contextlib import asynccontextmanager +from fastapi import FastAPI +from nui_shared_utils import create_async_snowflake_client + +@asynccontextmanager +async def lifespan(app: FastAPI): + app.state.snowflake = create_async_snowflake_client( + secret_name="nui-analytics-snowflake", + role="NUI_ANALYTICS_API", + warehouse="ANALYTICS_WH", + database="NUI_MARKETS", + schema="ANALYTICS_API", + statement_timeout=30, + ) + try: + yield + finally: + await app.state.snowflake.aclose() + +app = FastAPI(lifespan=lifespan) +``` + +For Lambda handlers that use an async framework, create the client during +startup and close it during shutdown. For normal synchronous Lambda handlers, +prefer `create_snowflake_client`. + +## Query Parameters + +Snowflake SQL API bindings are positional `?` bindings: + +```python +rows = client.query("SELECT id FROM trades WHERE solution_id = ?", [solution_id]) +``` + +Do not use connector-style `%s` placeholders or named `:name` bindings with this +adapter. + +## Logging And Metrics Hooks + +By default the adapter installs a redacting query logger. It logs SQL text and +bind parameter count, never bind values: + +```python +client = create_snowflake_client(log_sql_max_chars=500) +``` + +For service metrics, pass a custom `on_query` hook: + +```python +def record_query(sql, params): + metrics.put_metric("SnowflakeQuery", 1, "Count") + +client = create_snowflake_client(on_query=record_query) +``` + +Keep hooks generic. Domain-specific table names, market-intelligence state +transitions, and analytics endpoint logic belong in the consuming service. + +## Offline Tests + +Use `snowflake_sql_api.testing.FakeSnowflake` to test service code without a +network or real Snowflake account: + +```python +import httpx +from snowflake_sql_api.testing import FakeSnowflake +from nui_shared_utils import create_async_snowflake_client + +async def test_query(rsa_private_key_pem): + fake = FakeSnowflake() + fake.register("SELECT 1 AS value", [{"value": 1}]) + + async with httpx.AsyncClient(transport=fake.transport) as http_client: + client = create_async_snowflake_client( + account="xy12345.ap-southeast-2", + user="TEST_USER", + private_key=rsa_private_key_pem, + http_client=http_client, + log_queries=False, + ) + assert await client.query_scalar("SELECT 1 AS value") == 1 +``` + +Add live smoke tests for behavior that FakeSnowflake cannot prove, such as +`MERGE` row counts, `PARSE_JSON(?)`, and timestamp/session timezone semantics. diff --git a/pyproject.toml b/pyproject.toml index 74ce855..46ec43a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,11 +45,11 @@ powertools = [ "coloredlogs>=15.0", ] jwt = ["rsa>=4.9"] -# Pre-release pin to the public snowflake-sql-api repo (PR #1 merge commit on -# master). Repinned to a PyPI release (>=0.1.0) once that package ships. Kept -# out of `all` so nothing pulls the git dependency implicitly. +# Tag pin to the public snowflake-sql-api repo. Repin to a PyPI release once +# that package is published. Kept out of `all` so nothing pulls the git +# dependency implicitly. snowflake = [ - "snowflake-sql-api @ git+https://github.com/hampsterx/snowflake-sql-api.git@50205b0cb63df008c9c453ee05f0f130dcfe4805", + "snowflake-sql-api @ git+https://github.com/hampsterx/snowflake-sql-api.git@v0.1.1", ] all = [ "elasticsearch>=7.17.0,<8.0.0", @@ -74,6 +74,7 @@ dev = [ "build>=0.8.0", "rsa>=4.9", "cryptography>=41.0.0", + "snowflake-sql-api @ git+https://github.com/hampsterx/snowflake-sql-api.git@v0.1.1", ] [project.urls] @@ -149,4 +150,4 @@ exclude_lines = [ # Automatically derive version from git tags # Tag format: v1.0.1 -> version 1.0.1 version_scheme = "guess-next-dev" -local_scheme = "no-local-version" \ No newline at end of file +local_scheme = "no-local-version" diff --git a/requirements-test.txt b/requirements-test.txt index 826dde6..2709268 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -26,5 +26,5 @@ slack-sdk>=3.19.0 rsa>=4.9 cryptography>=41.0.0 -# snowflake adapter (pre-release pin; see pyproject [snowflake] extra) -snowflake-sql-api @ git+https://github.com/hampsterx/snowflake-sql-api.git@50205b0cb63df008c9c453ee05f0f130dcfe4805 +# snowflake adapter (tag pin; see pyproject [snowflake] extra) +snowflake-sql-api @ git+https://github.com/hampsterx/snowflake-sql-api.git@v0.1.1 diff --git a/tests/test_snowflake_client.py b/tests/test_snowflake_client.py index 07d5372..9341259 100644 --- a/tests/test_snowflake_client.py +++ b/tests/test_snowflake_client.py @@ -479,6 +479,42 @@ def test_real_async_client_constructs(self, clear_snowflake_env, rsa_private_key finally: asyncio.run(client.aclose()) + def test_async_factory_queries_through_fake_snowflake(self, clear_snowflake_env, rsa_private_key_pem): + """Offline smoke path: shared-utils async factory can run a real query.""" + import asyncio + + import httpx + from snowflake_sql_api.testing import FakeSnowflake + + async def run_query(): + fake = FakeSnowflake() + fake.register( + "SELECT id, payload FROM events WHERE kind = ?", + [{"id": 1, "payload": {"ok": True}}], + ) + async with httpx.AsyncClient(transport=fake.transport) as http_client: + client = sc.create_async_snowflake_client( + account="ab12345.ap-southeast-2", + user="tim", + private_key=rsa_private_key_pem, + role="NUI_ANALYTICS_API", + warehouse="ANALYTICS_WH", + database="NUI_MARKETS", + schema="ANALYTICS_API", + http_client=http_client, + log_queries=False, + ) + rows = await client.query("SELECT id, payload FROM events WHERE kind = ?", ["news"]) + await client.aclose() + return client, rows + + client, rows = asyncio.run(run_query()) + assert client.role == "NUI_ANALYTICS_API" + assert client.warehouse == "ANALYTICS_WH" + assert client.database == "NUI_MARKETS" + assert client.schema == "ANALYTICS_API" + assert rows == [{"id": 1, "payload": {"ok": True}}] + # --------------------------------------------------------------------------- # Redaction: no key material in logs end-to-end