From 6400d5f0506fb9225794747ce1cf67278142fb2a Mon Sep 17 00:00:00 2001 From: Hang Yin Date: Thu, 25 Jun 2026 18:11:43 +0000 Subject: [PATCH 1/3] add tsm-shim example: run unmodified configfs-tsm binaries via a sidecar tsm-shim re-exposes the dstack guest-agent GetQuote RPC under the standard configfs-tsm file ABI (inblob/outblob), so unmodified TDX attestation binaries run on a stock dstack CVM with no OS change -- only docker-compose edits. Pure userspace: no FUSE, no CAP_SYS_ADMIN, no privileged container. - tsm-shim/: shim daemon (stdlib), Dockerfile, demo client, compose - .github/workflows/build-tsm-shim.yml: build & publish image to GHCR - README: index entry under Core Patterns > Attestation Verified on real Phala Cloud TDX (tdx.small / dstack-0.5.7): genuine 5010-byte DCAP quote, header 0400, report_data bound. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/build-tsm-shim.yml | 55 ++++++++++ README.md | 1 + tsm-shim/Dockerfile | 21 ++++ tsm-shim/README.md | 114 +++++++++++++++++++++ tsm-shim/demo-app.py | 60 +++++++++++ tsm-shim/docker-compose.yaml | 40 ++++++++ tsm-shim/tsm_shim.py | 144 +++++++++++++++++++++++++++ 7 files changed, 435 insertions(+) create mode 100644 .github/workflows/build-tsm-shim.yml create mode 100644 tsm-shim/Dockerfile create mode 100644 tsm-shim/README.md create mode 100644 tsm-shim/demo-app.py create mode 100644 tsm-shim/docker-compose.yaml create mode 100644 tsm-shim/tsm_shim.py diff --git a/.github/workflows/build-tsm-shim.yml b/.github/workflows/build-tsm-shim.yml new file mode 100644 index 0000000..e8eb966 --- /dev/null +++ b/.github/workflows/build-tsm-shim.yml @@ -0,0 +1,55 @@ +name: build tsm-shim + +on: + push: + branches: [main] + paths: + - 'tsm-shim/**' + - '.github/workflows/build-tsm-shim.yml' + pull_request: + paths: + - 'tsm-shim/**' + - '.github/workflows/build-tsm-shim.yml' + workflow_dispatch: + +permissions: + contents: read + packages: write + +jobs: + build: + runs-on: ubuntu-latest + env: + IMAGE: ghcr.io/dstack-tee/dstack-tsm-shim + steps: + - uses: actions/checkout@v4 + + - uses: docker/setup-buildx-action@v3 + + # On PRs (incl. forks) GITHUB_TOKEN lacks packages:write, so only build. + - name: Log in to GHCR + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + tags: | + type=raw,value=latest,enable={{is_default_branch}} + type=sha,format=short + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: tsm-shim + file: tsm-shim/Dockerfile + platforms: linux/amd64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/README.md b/README.md index 4e6e5e5..5270b9c 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ volumes: |---------|-------------|--------| | [timelock-nts](./timelock-nts) | Raw socket usage (what the SDK wraps) | Available | | [attestation/configid-based](./attestation/configid-based) | ConfigID-based verification | Available | +| [tsm-shim](./tsm-shim) | Run unmodified `configfs-tsm` binaries (inblob/outblob) via a sidecar | Available | ### Gateway & Domains diff --git a/tsm-shim/Dockerfile b/tsm-shim/Dockerfile new file mode 100644 index 0000000..d8c8072 --- /dev/null +++ b/tsm-shim/Dockerfile @@ -0,0 +1,21 @@ +# Pure stdlib — no pip, no build deps. Built & pushed to GHCR by +# .github/workflows/build-tsm-shim.yml. +FROM python:3.12-slim + +LABEL org.opencontainers.image.title="dstack-tsm-shim" +LABEL org.opencontainers.image.description="configfs-tsm compatibility shim: re-exposes dstack guest-agent GetQuote as inblob/outblob FIFOs" +LABEL org.opencontainers.image.source="https://github.com/Dstack-TEE/dstack-examples" + +COPY tsm_shim.py /usr/local/bin/tsm_shim.py +COPY demo-app.py /usr/local/bin/demo-app.py + +ENV TSM_REPORT_DIR=/run/tsm/report \ + DSTACK_SOCKET=/var/run/dstack.sock + +# Report healthy only once both FIFOs exist, so an app can gate on +# `depends_on: { tsm-shim: { condition: service_healthy } }` with no race. +HEALTHCHECK --interval=2s --timeout=2s --retries=30 --start-period=1s \ + CMD test -p "$TSM_REPORT_DIR/inblob" && test -p "$TSM_REPORT_DIR/outblob" || exit 1 + +# Default role is the shim daemon (reads TSM_REPORT_DIR / DSTACK_SOCKET from env). +ENTRYPOINT ["python3", "/usr/local/bin/tsm_shim.py"] diff --git a/tsm-shim/README.md b/tsm-shim/README.md new file mode 100644 index 0000000..cbe78d6 --- /dev/null +++ b/tsm-shim/README.md @@ -0,0 +1,114 @@ +# configfs-tsm shim (run unmodified TDX attestation binaries) + +Some programs request a TDX quote through the **standard Linux interfaces** — +`configfs-tsm` (`/sys/kernel/config/tsm/report/*`, with `inblob`/`outblob`) and a +`/dev/tdx-guest` device — rather than through the dstack SDK / guest-agent +socket. On a stock dstack CVM those kernel interfaces aren't exposed to app +containers, so such binaries fail out of the box. + +This example ships a small **sidecar** that bridges the gap. It re-exposes the +guest-agent's `GetQuote` RPC under the configfs-tsm file ABI: your app writes +`report_data` to `inblob` and reads the raw Intel DCAP TDX quote from `outblob`, +exactly as it would against the kernel. + +- **No OS change** — pure userspace, runs in a normal container. +- **No FUSE, no `CAP_SYS_ADMIN`, no privileged mode, no device passthrough.** +- **No weaker attestation** — the quote is the genuine hardware quote and + `report_data` is forwarded byte-for-byte. + +The shim image is built and published to GHCR by +[`.github/workflows/build-tsm-shim.yml`](../.github/workflows/build-tsm-shim.yml): +`ghcr.io/dstack-tee/dstack-tsm-shim`. + +## Try it + +```bash +phala deploy -n tsm-shim-example -c docker-compose.yaml +phala cvms logs -c app +``` + +Expected `app` log: + +``` +quote length : 5010 bytes +quote header : 0400 (a TDX v4 quote starts with 0400) +report_data bound in quote: True +PASS - unmodified configfs-tsm app got a real TDX quote via the shim +``` + +## Adopt it in your app + +Add the `tsm-shim` service and two volumes, then add the four lines marked `(+)` +to your existing service: + +```yaml +services: + tsm-shim: + image: ghcr.io/dstack-tee/dstack-tsm-shim:latest + restart: unless-stopped + volumes: + - /var/run/dstack.sock:/var/run/dstack.sock + - tsm-report:/run/tsm/report + + my-app: + image: your-app:latest + # ... your existing config ... + depends_on: + tsm-shim: + condition: service_healthy # (+) wait until the shim is ready + environment: + - TSM_REPORT_PATH=/run/tsm/report # (+) point your app at the shim dir + volumes: + - tsm-report:/run/tsm/report # (+) see the shim's inblob/outblob + - tsm-devstub:/dev/tdx-guest # (+) satisfy /dev/tdx-guest checks + +volumes: + tsm-report: {} + tsm-devstub: {} +``` + +If your binary **hard-codes** `/sys/kernel/config/tsm/report`, mount the shared +volume there instead of using `TSM_REPORT_PATH`: + +```yaml + volumes: + - tsm-report:/sys/kernel/config/tsm/report +``` + +For production, pin the image by digest (e.g. +`ghcr.io/dstack-tee/dstack-tsm-shim:latest@sha256:...`). + +## How it works + +`tsm-shim` exposes `inblob` and `outblob` as **named pipes (FIFOs)** in a shared +volume. A read of `outblob` blocks until the quote for the most recent `inblob` +write is ready — which matches configfs-tsm's write-then-read contract with no +race and no privileges. When `inblob` is written, the shim calls +`POST /GetQuote` on `/var/run/dstack.sock` and writes the returned quote to +`outblob`. The image reports healthy only once both FIFOs exist, so the app can +gate on `depends_on: { condition: service_healthy }`. + +The `/dev/tdx-guest` device can't be created inside another container from a +sidecar, so an empty volume is mounted at that path — enough for the common +"does the device exist?" check. (dstack permits mounting under `/dev`.) + +## Files + +| File | Purpose | +|------|---------| +| `tsm_shim.py` | the sidecar daemon (pure Python stdlib, no dependencies) | +| `demo-app.py` | a stand-in unmodified configfs-tsm consumer (bundled self-test) | +| `Dockerfile` | builds the published image | +| `docker-compose.yaml` | the shim + demo app wired together | + +## Limitations + +- Covers the **configfs-tsm `inblob`/`outblob`** path (used by `go-configfs-tsm`, + recent `libtdx-attest`, etc.). +- Does **not** emulate the `/dev/tdx-guest` `TDX_CMD_GET_REPORT0` ioctl. That + returns a raw, locally-MAC'd TDREPORT, which dstack does not expose and which + can't be derived from a quote — so it isn't recoverable in userspace by any + shim. Binaries that drive the device by ioctl rather than configfs are out of + scope. +- One quote at a time per shim instance (matches the kernel's single + configfs-tsm entry). Run one shim per app. diff --git a/tsm-shim/demo-app.py b/tsm-shim/demo-app.py new file mode 100644 index 0000000..e9ba3f3 --- /dev/null +++ b/tsm-shim/demo-app.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +"""Demo "unmodified" attestation app — a standard configfs-tsm consumer. + +Does exactly what a real binary built against the Linux TSM interface does: + 1. check that the TDX guest device exists, + 2. write up to 64 bytes of report_data to /inblob, + 3. read the raw Intel DCAP TDX quote from /outblob. + +The only deployment-specific knob is TSM_REPORT_PATH. On a stock dstack CVM that +directory is served by the tsm-shim sidecar instead of the kernel. +""" +import hashlib +import os +import sys +import time + + +def detect_tdx() -> bool: + return os.path.exists("/dev/tdx-guest") or os.path.exists("/dev/tdx_guest") + + +def main() -> None: + report_dir = os.environ.get("TSM_REPORT_PATH", "/sys/kernel/config/tsm/report/dstack") + + if not detect_tdx(): + print("FAIL: no TDX guest device (/dev/tdx-guest)") + sys.exit(1) + + # `depends_on: condition: service_healthy` already gates startup on the shim + # FIFOs existing; this short retry just mirrors what real attestation libs do. + for _ in range(100): + if os.path.exists(f"{report_dir}/inblob"): + break + time.sleep(0.1) + + report_data = hashlib.sha256(b"dstack-tsm-shim-demo").digest() # 32 bytes + with open(f"{report_dir}/inblob", "wb") as f: + f.write(report_data[:64].ljust(64, b"\0")) + with open(f"{report_dir}/outblob", "rb") as f: + quote = f.read() + + print(f"report_dir : {report_dir}") + print(f"report_data : {report_data.hex()}") + print(f"quote length : {len(quote)} bytes") + print(f"quote header : {quote[:2].hex()} (a TDX v4 quote starts with 0400)") + bound = report_data[:32] in quote + print(f"report_data bound in quote: {bound}") + print( + "PASS - unmodified configfs-tsm app got a real TDX quote via the shim" + if (quote[:2].hex() == "0400" and bound) + else "FAIL - unexpected quote (header or report_data binding off)" + ) + + sys.stdout.flush() + while True: + time.sleep(3600) + + +if __name__ == "__main__": + main() diff --git a/tsm-shim/docker-compose.yaml b/tsm-shim/docker-compose.yaml new file mode 100644 index 0000000..ea223e8 --- /dev/null +++ b/tsm-shim/docker-compose.yaml @@ -0,0 +1,40 @@ +name: tsm-shim-example + +# Run an UNMODIFIED configfs-tsm attestation binary on a stock dstack CVM. +# +# `tsm-shim` is a prebuilt sidecar (built & published to GHCR by +# .github/workflows/build-tsm-shim.yml) that re-exposes the dstack guest-agent's +# GetQuote RPC under the standard configfs-tsm file ABI (inblob/outblob). Your +# app talks to the kernel-style interface it already expects; the shim forwards +# report_data to real TDX hardware and returns the genuine quote. No OS change, +# no FUSE, no privileged container, no extra capabilities. +# +# The `app` service below is a self-test (it reuses the shim image only to run a +# bundled demo client). In your own deployment, replace `app` with your service +# and add the four lines marked (+). + +services: + # ---------- the shim sidecar ---------- + tsm-shim: + image: ghcr.io/dstack-tee/dstack-tsm-shim:latest + restart: unless-stopped + volumes: + - /var/run/dstack.sock:/var/run/dstack.sock # source of real quotes + - tsm-report:/run/tsm/report # FIFOs shared with the app + + # ---------- your app (here: a bundled demo client) ---------- + app: + image: ghcr.io/dstack-tee/dstack-tsm-shim:latest # <- replace with your image + entrypoint: ["python3", "/usr/local/bin/demo-app.py"] # <- your app's entrypoint + depends_on: + tsm-shim: + condition: service_healthy # (+) wait until FIFOs exist + environment: + - TSM_REPORT_PATH=/run/tsm/report # (+) point the app at the shim + volumes: + - tsm-report:/run/tsm/report # (+) see the shim's FIFOs + - tsm-devstub:/dev/tdx-guest # (+) make /dev/tdx-guest "exist" + +volumes: + tsm-report: {} + tsm-devstub: {} diff --git a/tsm-shim/tsm_shim.py b/tsm-shim/tsm_shim.py new file mode 100644 index 0000000..2bec2d5 --- /dev/null +++ b/tsm-shim/tsm_shim.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +"""dstack -> configfs-tsm compatibility shim (FIFO mode, zero privileges). + +Re-exposes the dstack guest-agent `GetQuote` RPC under the standard +configfs-tsm file ABI that unmodified attestation binaries expect: + + /inblob write <=64 bytes of report_data + /outblob read -> raw Intel DCAP TDX quote + +`inblob` and `outblob` are implemented as named pipes (FIFOs), so a read of +`outblob` naturally blocks until the quote for the most recent `inblob` write +is ready. That matches the canonical configfs-tsm usage (write inblob, then +read outblob) with no race, and -- crucially -- needs NO FUSE, NO +CAP_SYS_ADMIN, and NO remount of /sys. It runs as an ordinary process in a +sidecar or a pre-launch wrapper. + +The quote itself still comes from real TDX hardware via the guest-agent over +its unix socket, so attestation is not weakened: report_data is forwarded +byte-for-byte and the returned quote is the genuine hardware quote. + +Usage: + tsm_shim.py --report-dir /run/tsm/report --socket /var/run/dstack.sock +""" +import argparse +import http.client +import json +import os +import socket +import sys +import time +import traceback + + +def log(msg): + sys.stderr.write(f"[tsm-shim] {msg}\n") + sys.stderr.flush() + + +class _UDSConnection(http.client.HTTPConnection): + """HTTPConnection that dials an AF_UNIX socket instead of TCP.""" + + def __init__(self, uds_path, timeout): + super().__init__("localhost", timeout=timeout) + self._uds_path = uds_path + + def connect(self): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.settimeout(self.timeout) + s.connect(self._uds_path) + self.sock = s + + +def get_quote(sock_path, report_data, timeout=30): + """Call dstack guest-agent DstackGuest.GetQuote, return raw quote bytes. + + Wire format mirrors the official dstack SDK: POST /GetQuote over the unix + socket with JSON body {"report_data": ""}; response is JSON with a + hex-encoded "quote". http.client transparently handles chunked / + content-length response framing, so no extra deps are required. + """ + body = json.dumps({"report_data": report_data.hex()}) + conn = _UDSConnection(sock_path, timeout) + try: + conn.request( + "POST", + "/GetQuote", + body=body, + headers={"Host": "localhost", "Content-Type": "application/json"}, + ) + resp = conn.getresponse() + data = resp.read() + if resp.status != 200: + raise RuntimeError( + f"guest-agent GetQuote returned HTTP {resp.status}: {data[:200]!r}" + ) + obj = json.loads(data) + if "quote" not in obj: + raise RuntimeError(f"GetQuote response missing 'quote': {obj}") + return bytes.fromhex(obj["quote"]) + finally: + conn.close() + + +def _make_fifo(path): + if os.path.lexists(path): + os.remove(path) + os.mkfifo(path, 0o600) + + +def serve(report_dir, sock_path): + os.makedirs(report_dir, exist_ok=True) + inblob = os.path.join(report_dir, "inblob") + outblob = os.path.join(report_dir, "outblob") + _make_fifo(inblob) + _make_fifo(outblob) + # Best-effort: expose a `provider` attribute for apps that sanity-check it. + try: + with open(os.path.join(report_dir, "provider"), "w") as f: + f.write("tdx_guest\n") + except OSError: + pass + + log(f"ready: {inblob} (write report_data), {outblob} (read quote) -> {sock_path}") + while True: + try: + # 1. Block until the app writes report_data to inblob. + with open(inblob, "rb") as f: + report_data = f.read() + if not report_data: + continue # opened+closed with no payload; ignore. + rd64 = report_data[:64].ljust(64, b"\0") + log(f"inblob: {len(report_data)} bytes; requesting hardware quote...") + try: + quote = get_quote(sock_path, rd64) + log(f"quote: {len(quote)} bytes, header={quote[:2].hex()}") + except Exception as exc: # noqa: BLE001 - surface to logs, keep serving + log(f"GetQuote failed: {exc}") + quote = b"" # deliver empty so the reader doesn't hang forever. + # 2. Block until the app opens outblob for reading, then deliver. + with open(outblob, "wb") as f: + f.write(quote) + except Exception: # noqa: BLE001 - never let the daemon die. + log("serve loop error:\n" + traceback.format_exc()) + time.sleep(0.5) + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument( + "--report-dir", + default=os.environ.get("TSM_REPORT_DIR", "/run/tsm/report"), + help="directory in which to expose inblob/outblob FIFOs", + ) + ap.add_argument( + "--socket", + default=os.environ.get("DSTACK_SOCKET", "/var/run/dstack.sock"), + help="path to the dstack guest-agent unix socket", + ) + args = ap.parse_args() + serve(args.report_dir, args.socket) + + +if __name__ == "__main__": + main() From 059809a3ee4e6974ff64f0d5830331feb8e49153 Mon Sep 17 00:00:00 2001 From: Hang Yin Date: Thu, 25 Jun 2026 18:40:00 +0000 Subject: [PATCH 2/3] tsm-shim: fix FIFO wedge + concurrent-writer desync, trim verbosity Addresses the review of the configfs-tsm shim: - outblob write now opens non-blocking with a deadline (TSM_OUTBLOB_DEADLINE), so a caller that writes inblob then dies no longer wedges the daemon forever. - reject inblob writes >64 bytes (a sign of racing writers) instead of returning a quote bound to ambiguous data -- fail closed, not silently wrong. - document the single in-flight requester constraint; drop the false "no race" claims; document that an empty outblob read means the quote failed. - pin the base image by digest; trim docstrings, README, and compose comments. Verified: happy path, >64B fail-closed guard, and no-wedge recovery covered by a local test; image e2e green; dev.sh validate + yamllint clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- tsm-shim/Dockerfile | 4 +- tsm-shim/README.md | 76 ++++++--------- tsm-shim/demo-app.py | 12 +-- tsm-shim/docker-compose.yaml | 17 +--- tsm-shim/tsm_shim.py | 180 ++++++++++++++++------------------- 5 files changed, 123 insertions(+), 166 deletions(-) diff --git a/tsm-shim/Dockerfile b/tsm-shim/Dockerfile index d8c8072..ba0f64f 100644 --- a/tsm-shim/Dockerfile +++ b/tsm-shim/Dockerfile @@ -1,6 +1,6 @@ # Pure stdlib — no pip, no build deps. Built & pushed to GHCR by # .github/workflows/build-tsm-shim.yml. -FROM python:3.12-slim +FROM python:3.12-slim@sha256:6c4dd321d176d61ea848dc8c73a4f7dbae8f70e0ee48bb411ea2f045b599fa8e LABEL org.opencontainers.image.title="dstack-tsm-shim" LABEL org.opencontainers.image.description="configfs-tsm compatibility shim: re-exposes dstack guest-agent GetQuote as inblob/outblob FIFOs" @@ -13,7 +13,7 @@ ENV TSM_REPORT_DIR=/run/tsm/report \ DSTACK_SOCKET=/var/run/dstack.sock # Report healthy only once both FIFOs exist, so an app can gate on -# `depends_on: { tsm-shim: { condition: service_healthy } }` with no race. +# `depends_on: { tsm-shim: { condition: service_healthy } }`. HEALTHCHECK --interval=2s --timeout=2s --retries=30 --start-period=1s \ CMD test -p "$TSM_REPORT_DIR/inblob" && test -p "$TSM_REPORT_DIR/outblob" || exit 1 diff --git a/tsm-shim/README.md b/tsm-shim/README.md index cbe78d6..c03004d 100644 --- a/tsm-shim/README.md +++ b/tsm-shim/README.md @@ -2,22 +2,18 @@ Some programs request a TDX quote through the **standard Linux interfaces** — `configfs-tsm` (`/sys/kernel/config/tsm/report/*`, with `inblob`/`outblob`) and a -`/dev/tdx-guest` device — rather than through the dstack SDK / guest-agent -socket. On a stock dstack CVM those kernel interfaces aren't exposed to app -containers, so such binaries fail out of the box. +`/dev/tdx-guest` device — rather than through the dstack SDK / guest-agent socket. +A stock dstack CVM doesn't expose those kernel interfaces to app containers, so +such binaries fail out of the box. -This example ships a small **sidecar** that bridges the gap. It re-exposes the -guest-agent's `GetQuote` RPC under the configfs-tsm file ABI: your app writes +This example ships a small **sidecar** that bridges the gap: it re-exposes the +guest-agent's `GetQuote` RPC under the configfs-tsm file ABI. Your app writes `report_data` to `inblob` and reads the raw Intel DCAP TDX quote from `outblob`, -exactly as it would against the kernel. +exactly as against the kernel. No OS change, no FUSE, no privileged container; the +quote is the genuine hardware quote and `report_data` is forwarded byte-for-byte. -- **No OS change** — pure userspace, runs in a normal container. -- **No FUSE, no `CAP_SYS_ADMIN`, no privileged mode, no device passthrough.** -- **No weaker attestation** — the quote is the genuine hardware quote and - `report_data` is forwarded byte-for-byte. - -The shim image is built and published to GHCR by -[`.github/workflows/build-tsm-shim.yml`](../.github/workflows/build-tsm-shim.yml): +The image is built and published to GHCR by +[`build-tsm-shim.yml`](../.github/workflows/build-tsm-shim.yml): `ghcr.io/dstack-tee/dstack-tsm-shim`. ## Try it @@ -69,46 +65,32 @@ volumes: If your binary **hard-codes** `/sys/kernel/config/tsm/report`, mount the shared volume there instead of using `TSM_REPORT_PATH`: - -```yaml - volumes: - - tsm-report:/sys/kernel/config/tsm/report -``` - -For production, pin the image by digest (e.g. -`ghcr.io/dstack-tee/dstack-tsm-shim:latest@sha256:...`). +`- tsm-report:/sys/kernel/config/tsm/report`. For production, pin the image by +digest (`ghcr.io/dstack-tee/dstack-tsm-shim@sha256:...`). ## How it works -`tsm-shim` exposes `inblob` and `outblob` as **named pipes (FIFOs)** in a shared -volume. A read of `outblob` blocks until the quote for the most recent `inblob` -write is ready — which matches configfs-tsm's write-then-read contract with no -race and no privileges. When `inblob` is written, the shim calls -`POST /GetQuote` on `/var/run/dstack.sock` and writes the returned quote to -`outblob`. The image reports healthy only once both FIFOs exist, so the app can -gate on `depends_on: { condition: service_healthy }`. - -The `/dev/tdx-guest` device can't be created inside another container from a -sidecar, so an empty volume is mounted at that path — enough for the common -"does the device exist?" check. (dstack permits mounting under `/dev`.) - -## Files +`inblob`/`outblob` are **named pipes (FIFOs)** in a shared volume; a read of +`outblob` blocks until the quote is ready (configfs-tsm's write-then-read +contract). On an `inblob` write the shim `POST`s `/GetQuote` to +`/var/run/dstack.sock` and writes the quote to `outblob`. The image reports +healthy only once both FIFOs exist, so the app gates on `service_healthy`. An +**empty `outblob` read means the quote failed** (the shim logs why). -| File | Purpose | -|------|---------| -| `tsm_shim.py` | the sidecar daemon (pure Python stdlib, no dependencies) | -| `demo-app.py` | a stand-in unmodified configfs-tsm consumer (bundled self-test) | -| `Dockerfile` | builds the published image | -| `docker-compose.yaml` | the shim + demo app wired together | +`/dev/tdx-guest` can't be created in another container from a sidecar, so an +empty volume is mounted there to satisfy the common "does the device exist?" +check (dstack permits mounting under `/dev`). ## Limitations - Covers the **configfs-tsm `inblob`/`outblob`** path (used by `go-configfs-tsm`, recent `libtdx-attest`, etc.). -- Does **not** emulate the `/dev/tdx-guest` `TDX_CMD_GET_REPORT0` ioctl. That - returns a raw, locally-MAC'd TDREPORT, which dstack does not expose and which - can't be derived from a quote — so it isn't recoverable in userspace by any - shim. Binaries that drive the device by ioctl rather than configfs are out of - scope. -- One quote at a time per shim instance (matches the kernel's single - configfs-tsm entry). Run one shim per app. +- Does **not** emulate the `/dev/tdx-guest` `TDX_CMD_GET_REPORT0` ioctl: it + returns a raw, locally-MAC'd TDREPORT, which dstack doesn't expose and which + can't be derived from a quote, so it isn't recoverable in userspace by any + shim. Binaries that drive the device by ioctl are out of scope. +- **Single in-flight requester** per shim — the shared `inblob`/`outblob` pair + can't correlate concurrent callers (the kernel gives each opener its own + `report//`; this doesn't). Run one shim per app; the shim rejects an + `inblob` write larger than 64 bytes (a sign of racing writers) rather than + return an ambiguous quote. diff --git a/tsm-shim/demo-app.py b/tsm-shim/demo-app.py index e9ba3f3..96e284a 100644 --- a/tsm-shim/demo-app.py +++ b/tsm-shim/demo-app.py @@ -1,13 +1,7 @@ #!/usr/bin/env python3 -"""Demo "unmodified" attestation app — a standard configfs-tsm consumer. - -Does exactly what a real binary built against the Linux TSM interface does: - 1. check that the TDX guest device exists, - 2. write up to 64 bytes of report_data to /inblob, - 3. read the raw Intel DCAP TDX quote from /outblob. - -The only deployment-specific knob is TSM_REPORT_PATH. On a stock dstack CVM that -directory is served by the tsm-shim sidecar instead of the kernel. +"""Demo configfs-tsm consumer: check the device, write report_data to inblob, +read the quote from outblob. TSM_REPORT_PATH points at the shim (vs the kernel's +/sys/kernel/config/tsm/report). An empty outblob read means the quote failed. """ import hashlib import os diff --git a/tsm-shim/docker-compose.yaml b/tsm-shim/docker-compose.yaml index ea223e8..1a6a878 100644 --- a/tsm-shim/docker-compose.yaml +++ b/tsm-shim/docker-compose.yaml @@ -1,17 +1,10 @@ name: tsm-shim-example -# Run an UNMODIFIED configfs-tsm attestation binary on a stock dstack CVM. -# -# `tsm-shim` is a prebuilt sidecar (built & published to GHCR by -# .github/workflows/build-tsm-shim.yml) that re-exposes the dstack guest-agent's -# GetQuote RPC under the standard configfs-tsm file ABI (inblob/outblob). Your -# app talks to the kernel-style interface it already expects; the shim forwards -# report_data to real TDX hardware and returns the genuine quote. No OS change, -# no FUSE, no privileged container, no extra capabilities. -# -# The `app` service below is a self-test (it reuses the shim image only to run a -# bundled demo client). In your own deployment, replace `app` with your service -# and add the four lines marked (+). +# Run an unmodified configfs-tsm attestation binary on a stock dstack CVM: the +# tsm-shim sidecar re-exposes the guest-agent GetQuote RPC as inblob/outblob. +# `app` is a self-test (reuses the shim image to run a bundled demo client); in +# your deployment, replace it with your service and add the four (+) lines. +# See README.md for details. services: # ---------- the shim sidecar ---------- diff --git a/tsm-shim/tsm_shim.py b/tsm-shim/tsm_shim.py index 2bec2d5..f70a95a 100644 --- a/tsm-shim/tsm_shim.py +++ b/tsm-shim/tsm_shim.py @@ -1,34 +1,33 @@ #!/usr/bin/env python3 -"""dstack -> configfs-tsm compatibility shim (FIFO mode, zero privileges). +"""dstack -> configfs-tsm shim. -Re-exposes the dstack guest-agent `GetQuote` RPC under the standard -configfs-tsm file ABI that unmodified attestation binaries expect: +Serves /inblob (write report_data, <=64 bytes) and /outblob (read the +raw Intel DCAP TDX quote) by forwarding to the dstack guest-agent GetQuote RPC. +inblob/outblob are FIFOs, so a read of outblob blocks until the quote is ready. +report_data is forwarded byte-for-byte, so the quote is the genuine hardware +quote. - /inblob write <=64 bytes of report_data - /outblob read -> raw Intel DCAP TDX quote +Serves ONE request at a time and supports a SINGLE in-flight requester -- like a +single configfs-tsm report entry, it cannot correlate concurrent callers. Run one +shim per app. An empty outblob read means the quote failed. -`inblob` and `outblob` are implemented as named pipes (FIFOs), so a read of -`outblob` naturally blocks until the quote for the most recent `inblob` write -is ready. That matches the canonical configfs-tsm usage (write inblob, then -read outblob) with no race, and -- crucially -- needs NO FUSE, NO -CAP_SYS_ADMIN, and NO remount of /sys. It runs as an ordinary process in a -sidecar or a pre-launch wrapper. - -The quote itself still comes from real TDX hardware via the guest-agent over -its unix socket, so attestation is not weakened: report_data is forwarded -byte-for-byte and the returned quote is the genuine hardware quote. - -Usage: - tsm_shim.py --report-dir /run/tsm/report --socket /var/run/dstack.sock +Env: TSM_REPORT_DIR (default /run/tsm/report), DSTACK_SOCKET (default +/var/run/dstack.sock). """ -import argparse +import errno +import fcntl import http.client import json import os import socket import sys import time -import traceback + +REPORT_DIR = os.environ.get("TSM_REPORT_DIR", "/run/tsm/report") +SOCKET = os.environ.get("DSTACK_SOCKET", "/var/run/dstack.sock") +# How long to wait for the app to open outblob for reading before giving up, so a +# caller that writes inblob then dies can't wedge the daemon. +OUTBLOB_DEADLINE = float(os.environ.get("TSM_OUTBLOB_DEADLINE", "30")) def log(msg): @@ -36,109 +35,98 @@ def log(msg): sys.stderr.flush() -class _UDSConnection(http.client.HTTPConnection): - """HTTPConnection that dials an AF_UNIX socket instead of TCP.""" +class _UDS(http.client.HTTPConnection): + """HTTPConnection over an AF_UNIX socket.""" - def __init__(self, uds_path, timeout): + def __init__(self, path, timeout): super().__init__("localhost", timeout=timeout) - self._uds_path = uds_path + self._path = path def connect(self): - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - s.settimeout(self.timeout) - s.connect(self._uds_path) - self.sock = s - + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.sock.settimeout(self.timeout) + self.sock.connect(self._path) -def get_quote(sock_path, report_data, timeout=30): - """Call dstack guest-agent DstackGuest.GetQuote, return raw quote bytes. - Wire format mirrors the official dstack SDK: POST /GetQuote over the unix - socket with JSON body {"report_data": ""}; response is JSON with a - hex-encoded "quote". http.client transparently handles chunked / - content-length response framing, so no extra deps are required. - """ - body = json.dumps({"report_data": report_data.hex()}) - conn = _UDSConnection(sock_path, timeout) +def get_quote(report_data, timeout=30): + conn = _UDS(SOCKET, timeout) try: - conn.request( - "POST", - "/GetQuote", - body=body, - headers={"Host": "localhost", "Content-Type": "application/json"}, - ) + conn.request("POST", "/GetQuote", + body=json.dumps({"report_data": report_data.hex()}), + headers={"Host": "localhost", "Content-Type": "application/json"}) resp = conn.getresponse() data = resp.read() if resp.status != 200: - raise RuntimeError( - f"guest-agent GetQuote returned HTTP {resp.status}: {data[:200]!r}" - ) - obj = json.loads(data) - if "quote" not in obj: - raise RuntimeError(f"GetQuote response missing 'quote': {obj}") - return bytes.fromhex(obj["quote"]) + raise RuntimeError(f"guest-agent returned http {resp.status}: {data[:200]!r}") + quote = json.loads(data).get("quote") + if not quote: + raise RuntimeError(f"no quote in response: {data[:200]!r}") + return bytes.fromhex(quote) finally: conn.close() -def _make_fifo(path): +def open_write_deadline(path, deadline=30.0): + """open a FIFO for writing, waiting up to `deadline`s for a reader. + + Returns a blocking fd, or None if no reader showed up -- so a caller that + writes inblob then dies can't wedge the daemon forever. + """ + end = time.monotonic() + deadline + while True: + try: + fd = os.open(path, os.O_WRONLY | os.O_NONBLOCK) + except OSError as exc: + if exc.errno == errno.ENXIO and time.monotonic() < end: + time.sleep(0.05) + continue + return None + fcntl.fcntl(fd, fcntl.F_SETFL, fcntl.fcntl(fd, fcntl.F_GETFL) & ~os.O_NONBLOCK) + return fd + + +def make_fifo(path): if os.path.lexists(path): os.remove(path) os.mkfifo(path, 0o600) -def serve(report_dir, sock_path): - os.makedirs(report_dir, exist_ok=True) - inblob = os.path.join(report_dir, "inblob") - outblob = os.path.join(report_dir, "outblob") - _make_fifo(inblob) - _make_fifo(outblob) - # Best-effort: expose a `provider` attribute for apps that sanity-check it. - try: - with open(os.path.join(report_dir, "provider"), "w") as f: - f.write("tdx_guest\n") - except OSError: - pass +def main(): + os.makedirs(REPORT_DIR, exist_ok=True) + inblob = os.path.join(REPORT_DIR, "inblob") + outblob = os.path.join(REPORT_DIR, "outblob") + make_fifo(inblob) + make_fifo(outblob) + log(f"ready: {REPORT_DIR} -> {SOCKET}") - log(f"ready: {inblob} (write report_data), {outblob} (read quote) -> {sock_path}") while True: try: - # 1. Block until the app writes report_data to inblob. - with open(inblob, "rb") as f: + with open(inblob, "rb") as f: # blocks until the app writes report_data = f.read() if not report_data: - continue # opened+closed with no payload; ignore. - rd64 = report_data[:64].ljust(64, b"\0") - log(f"inblob: {len(report_data)} bytes; requesting hardware quote...") - try: - quote = get_quote(sock_path, rd64) - log(f"quote: {len(quote)} bytes, header={quote[:2].hex()}") - except Exception as exc: # noqa: BLE001 - surface to logs, keep serving - log(f"GetQuote failed: {exc}") - quote = b"" # deliver empty so the reader doesn't hang forever. - # 2. Block until the app opens outblob for reading, then deliver. - with open(outblob, "wb") as f: + continue + if len(report_data) > 64: + # >64 bytes means more than one writer raced on inblob -- fail + # closed rather than hand back a quote bound to ambiguous data. + log(f"rejecting inblob: {len(report_data)} bytes (>64); concurrent writers?") + quote = b"" + else: + try: + quote = get_quote(report_data.ljust(64, b"\0")) + log(f"quote {len(quote)} bytes, header={quote[:2].hex()}") + except Exception as exc: + log(f"getquote failed: {exc}") # deliver empty == failure signal + quote = b"" + fd = open_write_deadline(outblob, OUTBLOB_DEADLINE) + if fd is None: + log("no reader for outblob within deadline; dropping") + continue + with os.fdopen(fd, "wb") as f: f.write(quote) - except Exception: # noqa: BLE001 - never let the daemon die. - log("serve loop error:\n" + traceback.format_exc()) + except Exception as exc: + log(f"serve loop error: {exc}") time.sleep(0.5) -def main(): - ap = argparse.ArgumentParser(description=__doc__) - ap.add_argument( - "--report-dir", - default=os.environ.get("TSM_REPORT_DIR", "/run/tsm/report"), - help="directory in which to expose inblob/outblob FIFOs", - ) - ap.add_argument( - "--socket", - default=os.environ.get("DSTACK_SOCKET", "/var/run/dstack.sock"), - help="path to the dstack guest-agent unix socket", - ) - args = ap.parse_args() - serve(args.report_dir, args.socket) - - if __name__ == "__main__": main() From c1278e93eb96b636a843255b40213ebb974be677 Mon Sep 17 00:00:00 2001 From: Hang Yin Date: Thu, 25 Jun 2026 18:43:56 +0000 Subject: [PATCH 3/3] =?UTF-8?q?tsm-shim:=20tighten=20README=20=E2=80=94=20?= =?UTF-8?q?lead=20with=20the=20point,=20drop=20the=20rest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- tsm-shim/README.md | 86 +++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 62 deletions(-) diff --git a/tsm-shim/README.md b/tsm-shim/README.md index c03004d..94d1a95 100644 --- a/tsm-shim/README.md +++ b/tsm-shim/README.md @@ -1,41 +1,18 @@ -# configfs-tsm shim (run unmodified TDX attestation binaries) +# configfs-tsm shim -Some programs request a TDX quote through the **standard Linux interfaces** — -`configfs-tsm` (`/sys/kernel/config/tsm/report/*`, with `inblob`/`outblob`) and a -`/dev/tdx-guest` device — rather than through the dstack SDK / guest-agent socket. -A stock dstack CVM doesn't expose those kernel interfaces to app containers, so -such binaries fail out of the box. +Some attestation binaries get their TDX quote through the kernel's `configfs-tsm` +files (`/sys/kernel/config/tsm/report/*` — write `inblob`, read `outblob`) instead +of the dstack SDK. dstack doesn't expose those files to containers, so they fail. -This example ships a small **sidecar** that bridges the gap: it re-exposes the -guest-agent's `GetQuote` RPC under the configfs-tsm file ABI. Your app writes -`report_data` to `inblob` and reads the raw Intel DCAP TDX quote from `outblob`, -exactly as against the kernel. No OS change, no FUSE, no privileged container; the -quote is the genuine hardware quote and `report_data` is forwarded byte-for-byte. +This sidecar bridges them: it serves `inblob`/`outblob` from a shared volume and +forwards each request to the guest-agent's `GetQuote`. The quote is the real +hardware quote (`report_data` passed through unchanged), so an unmodified binary +works with only docker-compose changes — no OS change, no FUSE, no privileged +container. CI publishes the image to `ghcr.io/dstack-tee/dstack-tsm-shim`. -The image is built and published to GHCR by -[`build-tsm-shim.yml`](../.github/workflows/build-tsm-shim.yml): -`ghcr.io/dstack-tee/dstack-tsm-shim`. +## Use it -## Try it - -```bash -phala deploy -n tsm-shim-example -c docker-compose.yaml -phala cvms logs -c app -``` - -Expected `app` log: - -``` -quote length : 5010 bytes -quote header : 0400 (a TDX v4 quote starts with 0400) -report_data bound in quote: True -PASS - unmodified configfs-tsm app got a real TDX quote via the shim -``` - -## Adopt it in your app - -Add the `tsm-shim` service and two volumes, then add the four lines marked `(+)` -to your existing service: +Add the sidecar, then point your app at it with the `(+)` lines: ```yaml services: @@ -48,7 +25,6 @@ services: my-app: image: your-app:latest - # ... your existing config ... depends_on: tsm-shim: condition: service_healthy # (+) wait until the shim is ready @@ -63,34 +39,20 @@ volumes: tsm-devstub: {} ``` -If your binary **hard-codes** `/sys/kernel/config/tsm/report`, mount the shared -volume there instead of using `TSM_REPORT_PATH`: -`- tsm-report:/sys/kernel/config/tsm/report`. For production, pin the image by -digest (`ghcr.io/dstack-tee/dstack-tsm-shim@sha256:...`). - -## How it works +If your binary hard-codes `/sys/kernel/config/tsm/report`, mount the volume there +instead of setting `TSM_REPORT_PATH`. For production, pin the image by digest. -`inblob`/`outblob` are **named pipes (FIFOs)** in a shared volume; a read of -`outblob` blocks until the quote is ready (configfs-tsm's write-then-read -contract). On an `inblob` write the shim `POST`s `/GetQuote` to -`/var/run/dstack.sock` and writes the quote to `outblob`. The image reports -healthy only once both FIFOs exist, so the app gates on `service_healthy`. An -**empty `outblob` read means the quote failed** (the shim logs why). +## Try the demo -`/dev/tdx-guest` can't be created in another container from a sidecar, so an -empty volume is mounted there to satisfy the common "does the device exist?" -check (dstack permits mounting under `/dev`). +```bash +phala deploy -n tsm-shim-example -c docker-compose.yaml +phala cvms logs -c app # expect PASS and a ~5 KB quote +``` -## Limitations +## Good to know -- Covers the **configfs-tsm `inblob`/`outblob`** path (used by `go-configfs-tsm`, - recent `libtdx-attest`, etc.). -- Does **not** emulate the `/dev/tdx-guest` `TDX_CMD_GET_REPORT0` ioctl: it - returns a raw, locally-MAC'd TDREPORT, which dstack doesn't expose and which - can't be derived from a quote, so it isn't recoverable in userspace by any - shim. Binaries that drive the device by ioctl are out of scope. -- **Single in-flight requester** per shim — the shared `inblob`/`outblob` pair - can't correlate concurrent callers (the kernel gives each opener its own - `report//`; this doesn't). Run one shim per app; the shim rejects an - `inblob` write larger than 64 bytes (a sign of racing writers) rather than - return an ambiguous quote. +- Covers the configfs-tsm `inblob`/`outblob` path (go-configfs-tsm, recent + libtdx-attest). It does **not** handle the `/dev/tdx-guest` ioctl, which needs a + raw TDREPORT that dstack doesn't expose. +- One request at a time, one shim per app — a shared `inblob`/`outblob` can't tell + concurrent callers apart. An empty `outblob` read means the quote failed.