diff --git a/.github/workflows/build-tsm-shim.yml b/.github/workflows/build-tsm-shim.yml new file mode 100644 index 0000000..e8eb966 --- /dev/null +++ b/.github/workflows/build-tsm-shim.yml @@ -0,0 +1,55 @@ +name: build tsm-shim + +on: + push: + branches: [main] + paths: + - 'tsm-shim/**' + - '.github/workflows/build-tsm-shim.yml' + pull_request: + paths: + - 'tsm-shim/**' + - '.github/workflows/build-tsm-shim.yml' + workflow_dispatch: + +permissions: + contents: read + packages: write + +jobs: + build: + runs-on: ubuntu-latest + env: + IMAGE: ghcr.io/dstack-tee/dstack-tsm-shim + steps: + - uses: actions/checkout@v4 + + - uses: docker/setup-buildx-action@v3 + + # On PRs (incl. forks) GITHUB_TOKEN lacks packages:write, so only build. + - name: Log in to GHCR + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + tags: | + type=raw,value=latest,enable={{is_default_branch}} + type=sha,format=short + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: tsm-shim + file: tsm-shim/Dockerfile + platforms: linux/amd64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/README.md b/README.md index 4e6e5e5..5270b9c 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ volumes: |---------|-------------|--------| | [timelock-nts](./timelock-nts) | Raw socket usage (what the SDK wraps) | Available | | [attestation/configid-based](./attestation/configid-based) | ConfigID-based verification | Available | +| [tsm-shim](./tsm-shim) | Run unmodified `configfs-tsm` binaries (inblob/outblob) via a sidecar | Available | ### Gateway & Domains diff --git a/tsm-shim/Dockerfile b/tsm-shim/Dockerfile new file mode 100644 index 0000000..ba0f64f --- /dev/null +++ b/tsm-shim/Dockerfile @@ -0,0 +1,21 @@ +# Pure stdlib — no pip, no build deps. Built & pushed to GHCR by +# .github/workflows/build-tsm-shim.yml. +FROM python:3.12-slim@sha256:6c4dd321d176d61ea848dc8c73a4f7dbae8f70e0ee48bb411ea2f045b599fa8e + +LABEL org.opencontainers.image.title="dstack-tsm-shim" +LABEL org.opencontainers.image.description="configfs-tsm compatibility shim: re-exposes dstack guest-agent GetQuote as inblob/outblob FIFOs" +LABEL org.opencontainers.image.source="https://github.com/Dstack-TEE/dstack-examples" + +COPY tsm_shim.py /usr/local/bin/tsm_shim.py +COPY demo-app.py /usr/local/bin/demo-app.py + +ENV TSM_REPORT_DIR=/run/tsm/report \ + DSTACK_SOCKET=/var/run/dstack.sock + +# Report healthy only once both FIFOs exist, so an app can gate on +# `depends_on: { tsm-shim: { condition: service_healthy } }`. +HEALTHCHECK --interval=2s --timeout=2s --retries=30 --start-period=1s \ + CMD test -p "$TSM_REPORT_DIR/inblob" && test -p "$TSM_REPORT_DIR/outblob" || exit 1 + +# Default role is the shim daemon (reads TSM_REPORT_DIR / DSTACK_SOCKET from env). +ENTRYPOINT ["python3", "/usr/local/bin/tsm_shim.py"] diff --git a/tsm-shim/README.md b/tsm-shim/README.md new file mode 100644 index 0000000..94d1a95 --- /dev/null +++ b/tsm-shim/README.md @@ -0,0 +1,58 @@ +# configfs-tsm shim + +Some attestation binaries get their TDX quote through the kernel's `configfs-tsm` +files (`/sys/kernel/config/tsm/report/*` — write `inblob`, read `outblob`) instead +of the dstack SDK. dstack doesn't expose those files to containers, so they fail. + +This sidecar bridges them: it serves `inblob`/`outblob` from a shared volume and +forwards each request to the guest-agent's `GetQuote`. The quote is the real +hardware quote (`report_data` passed through unchanged), so an unmodified binary +works with only docker-compose changes — no OS change, no FUSE, no privileged +container. CI publishes the image to `ghcr.io/dstack-tee/dstack-tsm-shim`. + +## Use it + +Add the sidecar, then point your app at it with the `(+)` lines: + +```yaml +services: + tsm-shim: + image: ghcr.io/dstack-tee/dstack-tsm-shim:latest + restart: unless-stopped + volumes: + - /var/run/dstack.sock:/var/run/dstack.sock + - tsm-report:/run/tsm/report + + my-app: + image: your-app:latest + depends_on: + tsm-shim: + condition: service_healthy # (+) wait until the shim is ready + environment: + - TSM_REPORT_PATH=/run/tsm/report # (+) point your app at the shim dir + volumes: + - tsm-report:/run/tsm/report # (+) see the shim's inblob/outblob + - tsm-devstub:/dev/tdx-guest # (+) satisfy /dev/tdx-guest checks + +volumes: + tsm-report: {} + tsm-devstub: {} +``` + +If your binary hard-codes `/sys/kernel/config/tsm/report`, mount the volume there +instead of setting `TSM_REPORT_PATH`. For production, pin the image by digest. + +## Try the demo + +```bash +phala deploy -n tsm-shim-example -c docker-compose.yaml +phala cvms logs -c app # expect PASS and a ~5 KB quote +``` + +## Good to know + +- Covers the configfs-tsm `inblob`/`outblob` path (go-configfs-tsm, recent + libtdx-attest). It does **not** handle the `/dev/tdx-guest` ioctl, which needs a + raw TDREPORT that dstack doesn't expose. +- One request at a time, one shim per app — a shared `inblob`/`outblob` can't tell + concurrent callers apart. An empty `outblob` read means the quote failed. diff --git a/tsm-shim/demo-app.py b/tsm-shim/demo-app.py new file mode 100644 index 0000000..96e284a --- /dev/null +++ b/tsm-shim/demo-app.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +"""Demo configfs-tsm consumer: check the device, write report_data to inblob, +read the quote from outblob. TSM_REPORT_PATH points at the shim (vs the kernel's +/sys/kernel/config/tsm/report). An empty outblob read means the quote failed. +""" +import hashlib +import os +import sys +import time + + +def detect_tdx() -> bool: + return os.path.exists("/dev/tdx-guest") or os.path.exists("/dev/tdx_guest") + + +def main() -> None: + report_dir = os.environ.get("TSM_REPORT_PATH", "/sys/kernel/config/tsm/report/dstack") + + if not detect_tdx(): + print("FAIL: no TDX guest device (/dev/tdx-guest)") + sys.exit(1) + + # `depends_on: condition: service_healthy` already gates startup on the shim + # FIFOs existing; this short retry just mirrors what real attestation libs do. + for _ in range(100): + if os.path.exists(f"{report_dir}/inblob"): + break + time.sleep(0.1) + + report_data = hashlib.sha256(b"dstack-tsm-shim-demo").digest() # 32 bytes + with open(f"{report_dir}/inblob", "wb") as f: + f.write(report_data[:64].ljust(64, b"\0")) + with open(f"{report_dir}/outblob", "rb") as f: + quote = f.read() + + print(f"report_dir : {report_dir}") + print(f"report_data : {report_data.hex()}") + print(f"quote length : {len(quote)} bytes") + print(f"quote header : {quote[:2].hex()} (a TDX v4 quote starts with 0400)") + bound = report_data[:32] in quote + print(f"report_data bound in quote: {bound}") + print( + "PASS - unmodified configfs-tsm app got a real TDX quote via the shim" + if (quote[:2].hex() == "0400" and bound) + else "FAIL - unexpected quote (header or report_data binding off)" + ) + + sys.stdout.flush() + while True: + time.sleep(3600) + + +if __name__ == "__main__": + main() diff --git a/tsm-shim/docker-compose.yaml b/tsm-shim/docker-compose.yaml new file mode 100644 index 0000000..1a6a878 --- /dev/null +++ b/tsm-shim/docker-compose.yaml @@ -0,0 +1,33 @@ +name: tsm-shim-example + +# Run an unmodified configfs-tsm attestation binary on a stock dstack CVM: the +# tsm-shim sidecar re-exposes the guest-agent GetQuote RPC as inblob/outblob. +# `app` is a self-test (reuses the shim image to run a bundled demo client); in +# your deployment, replace it with your service and add the four (+) lines. +# See README.md for details. + +services: + # ---------- the shim sidecar ---------- + tsm-shim: + image: ghcr.io/dstack-tee/dstack-tsm-shim:latest + restart: unless-stopped + volumes: + - /var/run/dstack.sock:/var/run/dstack.sock # source of real quotes + - tsm-report:/run/tsm/report # FIFOs shared with the app + + # ---------- your app (here: a bundled demo client) ---------- + app: + image: ghcr.io/dstack-tee/dstack-tsm-shim:latest # <- replace with your image + entrypoint: ["python3", "/usr/local/bin/demo-app.py"] # <- your app's entrypoint + depends_on: + tsm-shim: + condition: service_healthy # (+) wait until FIFOs exist + environment: + - TSM_REPORT_PATH=/run/tsm/report # (+) point the app at the shim + volumes: + - tsm-report:/run/tsm/report # (+) see the shim's FIFOs + - tsm-devstub:/dev/tdx-guest # (+) make /dev/tdx-guest "exist" + +volumes: + tsm-report: {} + tsm-devstub: {} diff --git a/tsm-shim/tsm_shim.py b/tsm-shim/tsm_shim.py new file mode 100644 index 0000000..f70a95a --- /dev/null +++ b/tsm-shim/tsm_shim.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +"""dstack -> configfs-tsm shim. + +Serves /inblob (write report_data, <=64 bytes) and /outblob (read the +raw Intel DCAP TDX quote) by forwarding to the dstack guest-agent GetQuote RPC. +inblob/outblob are FIFOs, so a read of outblob blocks until the quote is ready. +report_data is forwarded byte-for-byte, so the quote is the genuine hardware +quote. + +Serves ONE request at a time and supports a SINGLE in-flight requester -- like a +single configfs-tsm report entry, it cannot correlate concurrent callers. Run one +shim per app. An empty outblob read means the quote failed. + +Env: TSM_REPORT_DIR (default /run/tsm/report), DSTACK_SOCKET (default +/var/run/dstack.sock). +""" +import errno +import fcntl +import http.client +import json +import os +import socket +import sys +import time + +REPORT_DIR = os.environ.get("TSM_REPORT_DIR", "/run/tsm/report") +SOCKET = os.environ.get("DSTACK_SOCKET", "/var/run/dstack.sock") +# How long to wait for the app to open outblob for reading before giving up, so a +# caller that writes inblob then dies can't wedge the daemon. +OUTBLOB_DEADLINE = float(os.environ.get("TSM_OUTBLOB_DEADLINE", "30")) + + +def log(msg): + sys.stderr.write(f"[tsm-shim] {msg}\n") + sys.stderr.flush() + + +class _UDS(http.client.HTTPConnection): + """HTTPConnection over an AF_UNIX socket.""" + + def __init__(self, path, timeout): + super().__init__("localhost", timeout=timeout) + self._path = path + + def connect(self): + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.sock.settimeout(self.timeout) + self.sock.connect(self._path) + + +def get_quote(report_data, timeout=30): + conn = _UDS(SOCKET, timeout) + try: + conn.request("POST", "/GetQuote", + body=json.dumps({"report_data": report_data.hex()}), + headers={"Host": "localhost", "Content-Type": "application/json"}) + resp = conn.getresponse() + data = resp.read() + if resp.status != 200: + raise RuntimeError(f"guest-agent returned http {resp.status}: {data[:200]!r}") + quote = json.loads(data).get("quote") + if not quote: + raise RuntimeError(f"no quote in response: {data[:200]!r}") + return bytes.fromhex(quote) + finally: + conn.close() + + +def open_write_deadline(path, deadline=30.0): + """open a FIFO for writing, waiting up to `deadline`s for a reader. + + Returns a blocking fd, or None if no reader showed up -- so a caller that + writes inblob then dies can't wedge the daemon forever. + """ + end = time.monotonic() + deadline + while True: + try: + fd = os.open(path, os.O_WRONLY | os.O_NONBLOCK) + except OSError as exc: + if exc.errno == errno.ENXIO and time.monotonic() < end: + time.sleep(0.05) + continue + return None + fcntl.fcntl(fd, fcntl.F_SETFL, fcntl.fcntl(fd, fcntl.F_GETFL) & ~os.O_NONBLOCK) + return fd + + +def make_fifo(path): + if os.path.lexists(path): + os.remove(path) + os.mkfifo(path, 0o600) + + +def main(): + os.makedirs(REPORT_DIR, exist_ok=True) + inblob = os.path.join(REPORT_DIR, "inblob") + outblob = os.path.join(REPORT_DIR, "outblob") + make_fifo(inblob) + make_fifo(outblob) + log(f"ready: {REPORT_DIR} -> {SOCKET}") + + while True: + try: + with open(inblob, "rb") as f: # blocks until the app writes + report_data = f.read() + if not report_data: + continue + if len(report_data) > 64: + # >64 bytes means more than one writer raced on inblob -- fail + # closed rather than hand back a quote bound to ambiguous data. + log(f"rejecting inblob: {len(report_data)} bytes (>64); concurrent writers?") + quote = b"" + else: + try: + quote = get_quote(report_data.ljust(64, b"\0")) + log(f"quote {len(quote)} bytes, header={quote[:2].hex()}") + except Exception as exc: + log(f"getquote failed: {exc}") # deliver empty == failure signal + quote = b"" + fd = open_write_deadline(outblob, OUTBLOB_DEADLINE) + if fd is None: + log("no reader for outblob within deadline; dropping") + continue + with os.fdopen(fd, "wb") as f: + f.write(quote) + except Exception as exc: + log(f"serve loop error: {exc}") + time.sleep(0.5) + + +if __name__ == "__main__": + main()