From 8f47481da40ea1aa1d8825ab5a76338fc5bd163a Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 25 Jun 2026 18:07:34 -0700 Subject: [PATCH] feat: add TDX measurement verification mode --- Cargo.lock | 2 + dstack-attest/src/attestation.rs | 45 ++- dstack-attest/src/v1.rs | 132 ++++++- dstack-mr/src/kernel.rs | 85 ++++- dstack-mr/src/lib.rs | 2 + dstack-mr/src/main.rs | 98 ++++- dstack-mr/src/measurement.rs | 49 +++ dstack-mr/src/sev.rs | 101 +++-- dstack-mr/src/tdvf.rs | 416 ++++++++++++++------ dstack-mr/src/tdx.rs | 625 +++++++++++++++++++++++++++++++ dstack-types/Cargo.toml | 2 + dstack-types/src/lib.rs | 481 +++++++++++++++++++++++- gateway/src/config.rs | 7 + gateway/src/main_service.rs | 30 +- verifier/src/verification.rs | 238 +++++++++++- vmm/src/app.rs | 89 +++-- vmm/src/app/image.rs | 35 +- vmm/src/config.rs | 7 + vmm/vmm.toml | 3 + 19 files changed, 2193 insertions(+), 254 deletions(-) create mode 100644 dstack-mr/src/measurement.rs create mode 100644 dstack-mr/src/tdx.rs diff --git a/Cargo.lock b/Cargo.lock index 7d475254e..c60356720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2695,6 +2695,8 @@ dependencies = [ name = "dstack-types" version = "0.5.11" dependencies = [ + "ciborium", + "hex", "or-panic", "parity-scale-codec", "serde", diff --git a/dstack-attest/src/attestation.rs b/dstack-attest/src/attestation.rs index 4f177343b..c56652799 100644 --- a/dstack-attest/src/attestation.rs +++ b/dstack-attest/src/attestation.rs @@ -32,6 +32,7 @@ use tpm_qvl::verify::VerifiedReport as TpmVerifiedReport; pub use tpm_types::TpmQuote; use crate::amd_sev_snp::VerifiedAmdSnpReport; +use crate::v1::{strip_tdx_event_log_for_config, strip_tdx_runtime_event_log}; pub use crate::v1::{Attestation as AttestationV1, PlatformEvidence, StackEvidence}; pub const SNP_REPORT_DATA_RANGE: std::ops::Range = 0x50..0x90; @@ -596,17 +597,24 @@ impl VersionedAttestation { } } - /// Strip data for certificate embedding (e.g. keep RTMR3 event logs only). + /// Strip data for certificate embedding. pub fn into_stripped(self) -> Self { match self { Self::V0 { mut attestation } => { - if let Some(tdx_quote) = attestation.tdx_quote_mut() { - tdx_quote.event_log = tdx_quote - .event_log - .iter() - .filter(|e| e.imr == 3) - .map(|e| e.stripped()) - .collect(); + match &mut attestation.quote { + AttestationQuote::DstackTdx(tdx_quote) => { + tdx_quote.event_log = strip_tdx_event_log_for_config( + std::mem::take(&mut tdx_quote.event_log), + &attestation.config, + ); + } + AttestationQuote::DstackGcpTdx(quote) => { + quote.tdx_quote.event_log = strip_tdx_runtime_event_log(std::mem::take( + &mut quote.tdx_quote.event_log, + )); + } + AttestationQuote::DstackAmdSevSnp(_) + | AttestationQuote::DstackNitroEnclave(_) => {} } Self::V0 { attestation } } @@ -983,17 +991,16 @@ pub enum AttestationQuote { DstackTdx(TdxQuote), DstackGcpTdx(DstackGcpTdxQuote), DstackNitroEnclave(DstackNitroQuote), - /// Keep this last to preserve SCALE discriminants for existing variants. DstackAmdSevSnp(SnpQuote), } impl AttestationQuote { pub fn mode(&self) -> AttestationMode { match self { - AttestationQuote::DstackTdx { .. } => AttestationMode::DstackTdx, - AttestationQuote::DstackAmdSevSnp { .. } => AttestationMode::DstackAmdSevSnp, - AttestationQuote::DstackGcpTdx { .. } => AttestationMode::DstackGcpTdx, - AttestationQuote::DstackNitroEnclave { .. } => AttestationMode::DstackNitroEnclave, + AttestationQuote::DstackTdx(_) => AttestationMode::DstackTdx, + AttestationQuote::DstackAmdSevSnp(_) => AttestationMode::DstackAmdSevSnp, + AttestationQuote::DstackGcpTdx(_) => AttestationMode::DstackGcpTdx, + AttestationQuote::DstackNitroEnclave(_) => AttestationMode::DstackNitroEnclave, } } } @@ -1665,6 +1672,14 @@ impl Attestation { .map_err(|_| anyhow!("Quote lock poisoned"))?; let mode = AttestationMode::detect()?; + let config = match mode { + AttestationMode::DstackAmdSevSnp + | AttestationMode::DstackTdx + | AttestationMode::DstackGcpTdx => { + read_vm_config().context("Failed to read vm config")? + } + AttestationMode::DstackNitroEnclave => String::new(), + }; let runtime_events = match mode { AttestationMode::DstackTdx | AttestationMode::DstackGcpTdx => { RuntimeEvent::read_all().context("Failed to read runtime events")? @@ -1713,9 +1728,7 @@ impl Attestation { let config = match "e { AttestationQuote::DstackAmdSevSnp(_) | AttestationQuote::DstackTdx(_) - | AttestationQuote::DstackGcpTdx(_) => { - read_vm_config().context("Failed to read vm config")? - } + | AttestationQuote::DstackGcpTdx(_) => config, AttestationQuote::DstackNitroEnclave(quote) => { let os_image_hash = quote .decode_image_hash() diff --git a/dstack-attest/src/v1.rs b/dstack-attest/src/v1.rs index a91e9393a..30235b5bb 100644 --- a/dstack-attest/src/v1.rs +++ b/dstack-attest/src/v1.rs @@ -10,6 +10,56 @@ use tpm_types::TpmQuote; pub const ATTESTATION_VERSION: u64 = 1; +pub(crate) fn strip_tdx_runtime_event_log(event_log: Vec) -> Vec { + event_log + .into_iter() + .filter(|event| event.imr == 3) + .map(|event| event.stripped()) + .collect() +} + +pub(crate) fn strip_tdx_measurement_event_log(event_log: Vec) -> Vec { + let rtmr0_count = event_log.iter().filter(|event| event.imr == 0).count(); + let acpi_indexes = if rtmr0_count >= 17 { + [10usize, 11, 12] + } else { + [8usize, 9, 10] + }; + let mut rtmr0_index = 0usize; + + event_log + .into_iter() + .filter_map(|event| { + if event.imr == 0 { + let keep = acpi_indexes.contains(&rtmr0_index); + rtmr0_index += 1; + keep.then(|| event.stripped()) + } else if event.imr == 3 { + Some(event.stripped()) + } else { + None + } + }) + .collect() +} + +pub(crate) fn is_tdx_measurement_config(config: &str) -> bool { + serde_json::from_str::(config) + .map(|config| config.tdx_attestation_variant.is_measurement()) + .unwrap_or(false) +} + +pub(crate) fn strip_tdx_event_log_for_config( + event_log: Vec, + config: &str, +) -> Vec { + if is_tdx_measurement_config(config) { + strip_tdx_measurement_event_log(event_log) + } else { + strip_tdx_runtime_event_log(event_log) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", content = "data")] pub enum PlatformEvidence { @@ -92,14 +142,14 @@ impl PlatformEvidence { } pub fn into_stripped(self) -> Self { + self.into_stripped_for_config("") + } + + pub fn into_stripped_for_config(self, config: &str) -> Self { match self { Self::Tdx { quote, event_log } => Self::Tdx { quote, - event_log: event_log - .into_iter() - .filter(|event| event.imr == 3) - .map(|event| event.stripped()) - .collect(), + event_log: strip_tdx_event_log_for_config(event_log, config), }, Self::GcpTdx { quote, @@ -107,11 +157,7 @@ impl PlatformEvidence { tpm_quote, } => Self::GcpTdx { quote, - event_log: event_log - .into_iter() - .filter(|event| event.imr == 3) - .map(|event| event.stripped()) - .collect(), + event_log: strip_tdx_runtime_event_log(event_log), tpm_quote, }, other => other, @@ -242,9 +288,10 @@ impl Attestation { } pub fn into_stripped(self) -> Self { + let config = self.stack.config().to_string(); Self { version: self.version, - platform: self.platform.into_stripped(), + platform: self.platform.into_stripped_for_config(&config), stack: self.stack, } } @@ -414,6 +461,69 @@ mod tests { ); } + fn boot_event(idx: usize) -> TdxEvent { + TdxEvent { + imr: 0, + event_type: idx as u32, + digest: vec![idx as u8; 48], + event: String::new(), + event_payload: vec![0xff; idx + 1], + } + } + + fn runtime_event() -> TdxEvent { + RuntimeEvent { + event: "app-id".into(), + payload: vec![0x42], + } + .into() + } + + #[test] + fn measurement_stripping_keeps_only_pre202505_acpi_digests_and_runtime_payloads() { + let mut event_log = (0..13).map(boot_event).collect::>(); + event_log.push(runtime_event()); + + let stripped = strip_tdx_measurement_event_log(event_log); + + assert_eq!(stripped.len(), 4); + assert_eq!( + stripped[0..3] + .iter() + .map(|event| event.digest.clone()) + .collect::>(), + vec![vec![8u8; 48], vec![9u8; 48], vec![10u8; 48]] + ); + assert!(stripped[0..3] + .iter() + .all(|event| event.imr == 0 && event.event_payload.is_empty())); + assert_eq!(stripped[3].imr, 3); + assert_eq!(stripped[3].event, "app-id"); + assert_eq!(stripped[3].event_payload, vec![0x42]); + } + + #[test] + fn measurement_stripping_keeps_only_stable202505_acpi_digests_and_runtime_payloads() { + let mut event_log = (0..17).map(boot_event).collect::>(); + event_log.push(runtime_event()); + + let stripped = strip_tdx_measurement_event_log(event_log); + + assert_eq!(stripped.len(), 4); + assert_eq!( + stripped[0..3] + .iter() + .map(|event| event.digest.clone()) + .collect::>(), + vec![vec![10u8; 48], vec![11u8; 48], vec![12u8; 48]] + ); + assert!(stripped[0..3] + .iter() + .all(|event| event.imr == 0 && event.event_payload.is_empty())); + assert_eq!(stripped[3].imr, 3); + assert_eq!(stripped[3].event_payload, vec![0x42]); + } + #[test] fn sev_snp_with_report_data_patches_report_and_stack() { let mut report = vec![0x11; 1184]; diff --git a/dstack-mr/src/kernel.rs b/dstack-mr/src/kernel.rs index 878a2b012..a4e969563 100644 --- a/dstack-mr/src/kernel.rs +++ b/dstack-mr/src/kernel.rs @@ -7,6 +7,19 @@ use anyhow::{bail, Context, Result}; use object::pe; use sha2::{Digest, Sha384}; +/// QEMU's TDX setup-header patch places the initrd at a memory-dependent +/// address below this guest-memory size. At and above this threshold the +/// patched kernel Authenticode hash is stable for a given kernel/initrd pair. +pub const TDX_KERNEL_HASH_STABLE_MIN_MEMORY: u64 = 0xB0000000; +/// QEMU's low-memory initrd placement also resolves to the same below-4G +/// placement at exactly 2 GiB, so it shares the high-memory patched kernel hash. +pub const TDX_KERNEL_HASH_COMPAT_2G_MEMORY: u64 = 0x80000000; + +pub fn tdx_kernel_hash_uses_precomputed_high_mem(memory_size: u64) -> bool { + memory_size == TDX_KERNEL_HASH_COMPAT_2G_MEMORY + || memory_size >= TDX_KERNEL_HASH_STABLE_MIN_MEMORY +} + /// Calculates the Authenticode hash of a PE/COFF file fn authenticode_sha384_hash(data: &[u8]) -> Result> { let lfanew_offset = 0x3c; @@ -177,8 +190,8 @@ fn patch_kernel( 0x37ffffff }; - let lowmem = if mem_size < 0xb0000000 { - 0xb0000000 + let lowmem = if mem_size < TDX_KERNEL_HASH_STABLE_MIN_MEMORY { + TDX_KERNEL_HASH_STABLE_MIN_MEMORY } else { 0x80000000 }; @@ -211,6 +224,19 @@ fn patch_kernel( Ok(kd) } +/// Compute the first RTMR[1] event digest: the Authenticode SHA-384 hash of the +/// kernel after QEMU applies its setup-header patches. +pub(crate) fn patched_kernel_authenticode_sha384( + kernel_data: &[u8], + initrd_size: u32, + mem_size: u64, + acpi_data_size: u32, +) -> Result> { + let kd = patch_kernel(kernel_data, initrd_size, mem_size, acpi_data_size) + .context("Failed to patch kernel")?; + authenticode_sha384_hash(&kd).context("Failed to compute kernel hash") +} + /// Measures a QEMU-patched TDX kernel image. pub(crate) fn rtmr1_log( kernel_data: &[u8], @@ -218,9 +244,8 @@ pub(crate) fn rtmr1_log( mem_size: u64, acpi_data_size: u32, ) -> Result>> { - let kd = patch_kernel(kernel_data, initrd_size, mem_size, acpi_data_size) - .context("Failed to patch kernel")?; - let kernel_hash = authenticode_sha384_hash(&kd).context("Failed to compute kernel hash")?; + let kernel_hash = + patched_kernel_authenticode_sha384(kernel_data, initrd_size, mem_size, acpi_data_size)?; Ok(vec![ kernel_hash, measure_sha384(b"Calling EFI Application from Boot Option"), @@ -236,3 +261,53 @@ pub(crate) fn measure_cmdline(cmdline: &str) -> Vec { utf16_cmdline.extend([0, 0]); measure_sha384(&utf16_cmdline) } + +#[cfg(test)] +mod tests { + use super::*; + + fn initrd_addr(kernel: &[u8]) -> u32 { + u32::from_le_bytes(kernel[0x218..0x21c].try_into().unwrap()) + } + + #[test] + fn tdx_kernel_patch_uses_precomputed_digest_at_2g_and_high_memory() { + let mut kernel = vec![0u8; 0x1000]; + // Linux boot protocol >= 2.12 with XLF_CAN_BE_LOADED_ABOVE_4G makes + // QEMU derive the initrd address from available low memory. + kernel[0x206..0x208].copy_from_slice(&0x020cu16.to_le_bytes()); + kernel[0x236..0x238].copy_from_slice(&0x0040u16.to_le_bytes()); + + let below_2g = patch_kernel(&kernel, 0x100000, 0x80000000 - 0x1000, 0x28000).unwrap(); + let at_2g = patch_kernel(&kernel, 0x100000, 0x80000000, 0x28000).unwrap(); + let between_2g_and_high_mem = patch_kernel( + &kernel, + 0x100000, + TDX_KERNEL_HASH_STABLE_MIN_MEMORY - 0x1000, + 0x28000, + ) + .unwrap(); + let at_threshold = patch_kernel( + &kernel, + 0x100000, + TDX_KERNEL_HASH_STABLE_MIN_MEMORY, + 0x28000, + ) + .unwrap(); + let above_threshold = patch_kernel( + &kernel, + 0x100000, + TDX_KERNEL_HASH_STABLE_MIN_MEMORY + 0x4000_0000, + 0x28000, + ) + .unwrap(); + + assert_ne!(initrd_addr(&below_2g), initrd_addr(&at_2g)); + assert_ne!( + initrd_addr(&between_2g_and_high_mem), + initrd_addr(&at_threshold) + ); + assert_eq!(initrd_addr(&at_2g), initrd_addr(&at_threshold)); + assert_eq!(initrd_addr(&at_threshold), initrd_addr(&above_threshold)); + } +} diff --git a/dstack-mr/src/lib.rs b/dstack-mr/src/lib.rs index ad71c0aee..2513c2897 100644 --- a/dstack-mr/src/lib.rs +++ b/dstack-mr/src/lib.rs @@ -17,9 +17,11 @@ pub type RtmrLogs = [RtmrLog; 3]; mod acpi; mod kernel; mod machine; +pub mod measurement; mod num; pub mod sev; mod tdvf; +pub mod tdx; mod uefi_var; mod util; diff --git a/dstack-mr/src/main.rs b/dstack-mr/src/main.rs index 2dca7574f..a6ace663f 100644 --- a/dstack-mr/src/main.rs +++ b/dstack-mr/src/main.rs @@ -4,17 +4,51 @@ //! `dstack-mr` CLI. //! -//! Currently exposes the AMD SEV-SNP `os_image_hash` computation used by the -//! image build to emit `digest.sev.txt`. +//! Exposes build-time OS-image measurement material/hash computations. use anyhow::{bail, Context, Result}; +use dstack_types::OsImageMeasurementDocument; +use serde_json::Value; use std::path::Path; -const USAGE: &str = "usage: dstack-mr sev-os-image-hash "; +const USAGE: &str = "\ +usage: + dstack-mr measure-os + dstack-mr inspect-measurement + dstack-mr sev-os-image-hash + dstack-mr tdx-os-image-measurement + dstack-mr tdx-os-image-hash + +features: + cbor-measurement-v2"; fn main() -> Result<()> { let mut args = std::env::args().skip(1); match args.next().as_deref() { + Some("measure-os") => { + let image_dir = args.next().context(USAGE)?; + let document = dstack_mr::measurement::os_image_measurement_document_for_image_dir( + Path::new(&image_dir), + ) + .context("failed to compute os image measurement document")?; + println!( + "{}", + serde_json::to_string(&document) + .context("failed to serialize os image measurement document")? + ); + Ok(()) + } + Some("inspect-measurement") => { + let measurement_json = args.next().context(USAGE)?; + let document = inspect_measurement(Path::new(&measurement_json)) + .context("failed to inspect os image measurement document")?; + println!( + "{}", + serde_json::to_string_pretty(&document) + .context("failed to serialize decoded measurement document")? + ); + Ok(()) + } Some("sev-os-image-hash") => { let image_dir = args.next().context(USAGE)?; let hash = dstack_mr::sev::sev_os_image_hash_for_image_dir(Path::new(&image_dir)) @@ -22,6 +56,26 @@ fn main() -> Result<()> { println!("{}", hex::encode(hash)); Ok(()) } + Some("tdx-os-image-measurement") => { + let image_dir = args.next().context(USAGE)?; + let document = dstack_mr::tdx::tdx_os_image_measurement_document_for_image_dir( + Path::new(&image_dir), + ) + .context("failed to compute tdx os image measurement material")?; + println!( + "{}", + serde_json::to_string(&document) + .context("failed to serialize tdx measurement material")? + ); + Ok(()) + } + Some("tdx-os-image-hash") => { + let image_dir = args.next().context(USAGE)?; + let hash = dstack_mr::tdx::tdx_os_image_hash_for_image_dir(Path::new(&image_dir)) + .context("failed to compute tdx os_image_hash")?; + println!("{}", hex::encode(hash)); + Ok(()) + } Some("-h") | Some("--help") => { println!("{USAGE}"); Ok(()) @@ -30,3 +84,41 @@ fn main() -> Result<()> { None => bail!("{USAGE}"), } } + +fn inspect_measurement(path: &Path) -> Result { + let document_text = fs_err::read_to_string(path) + .with_context(|| format!("failed to read {}", path.display()))?; + let document: OsImageMeasurementDocument = serde_json::from_str(&document_text) + .with_context(|| format!("failed to parse {}", path.display()))?; + let mut out: Value = serde_json::from_str(&document_text) + .with_context(|| format!("failed to parse {}", path.display()))?; + + if let (Some(tdx), Some(tdx_value)) = (&document.tdx, out.get_mut("tdx")) { + replace_measurement_field( + tdx_value, + tdx.decode_measurement_value() + .map_err(anyhow::Error::msg) + .context("failed to decode tdx measurement CBOR")?, + ); + } + if let (Some(snp), Some(snp_value)) = (&document.snp, out.get_mut("snp")) { + replace_measurement_field( + snp_value, + snp.decode_measurement_value() + .map_err(anyhow::Error::msg) + .context("failed to decode snp measurement CBOR")?, + ); + } + Ok(out) +} + +fn replace_measurement_field(section: &mut Value, decoded_measurement: Value) { + let Some(section) = section.as_object_mut() else { + return; + }; + if section.contains_key("measurement") { + section.insert("measurement".to_string(), decoded_measurement); + } else if section.contains_key("m") { + section.insert("m".to_string(), decoded_measurement); + } +} diff --git a/dstack-mr/src/measurement.rs b/dstack-mr/src/measurement.rs new file mode 100644 index 000000000..602afee60 --- /dev/null +++ b/dstack-mr/src/measurement.rs @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: © 2026 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +//! Unified build-time OS-image measurement document. + +use anyhow::{Context, Result}; +use dstack_types::{ + OsImageMeasurementDocument, SevOsImageMeasurementDocument, TdxOsImageMeasurementDocument, +}; +use fs_err as fs; +use serde::Deserialize; +use std::path::Path; + +#[derive(Debug, Deserialize)] +struct ImageMetadata { + #[serde(default, rename = "bios-sev")] + bios_sev: Option, +} + +/// Generate `measurement.json` for an image directory. +/// +/// TDX material is mandatory for the normal dstack image. SNP material is +/// included when metadata declares a dedicated `bios-sev` firmware. +pub fn os_image_measurement_document_for_image_dir( + image_dir: &Path, +) -> Result { + let meta_path = image_dir.join("metadata.json"); + let meta_str = fs::read_to_string(&meta_path) + .with_context(|| format!("cannot read {}", meta_path.display()))?; + let meta: ImageMetadata = + serde_json::from_str(&meta_str).context("failed to parse image metadata.json")?; + + let tdx = TdxOsImageMeasurementDocument::new( + crate::tdx::tdx_os_image_measurement_for_image_dir(image_dir) + .context("failed to build TDX measurement document")?, + ); + + let snp = if meta.bios_sev.is_some() { + Some(SevOsImageMeasurementDocument::new( + crate::sev::sev_os_image_measurement_for_image_dir(image_dir) + .context("failed to build SNP measurement document")?, + )) + } else { + None + }; + + Ok(OsImageMeasurementDocument::new(Some(tdx), snp)) +} diff --git a/dstack-mr/src/sev.rs b/dstack-mr/src/sev.rs index 1d97d2c2f..dedab9453 100644 --- a/dstack-mr/src/sev.rs +++ b/dstack-mr/src/sev.rs @@ -321,6 +321,27 @@ fn build_sev_hashes_page( Ok(page) } +fn measured_kernel_cmdline(input: Option<&str>) -> String { + match input { + Some(base) if !base.trim().is_empty() => base.trim().to_string(), + _ => "console=ttyS0 loglevel=7".to_string(), + } +} + +fn kernel_cmdline_sha256(input: Option<&str>) -> Vec { + let cmdline = measured_kernel_cmdline(input); + let mut cmdline_bytes = cmdline.as_bytes().to_vec(); + cmdline_bytes.push(0); + Sha256::digest(&cmdline_bytes).to_vec() +} + +fn effective_initrd_hash_from_hex(value: &str) -> Result> { + if value.is_empty() { + return Ok(Sha256::digest(b"").to_vec()); + } + decode_required_hex("initrd_hash", value, 32) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SectionType { SnpSecMemory = 1, @@ -664,10 +685,7 @@ pub fn compute_expected_measurement(input: &MeasurementInput) -> Result<[u8; 48] .as_deref() .ok_or_else(|| anyhow::anyhow!("vcpu_type is required"))?; - let cmdline = match input.base_cmdline.as_deref() { - Some(base) if !base.trim().is_empty() => base.trim().to_string(), - _ => "console=ttyS0 loglevel=7".to_string(), - }; + let cmdline = measured_kernel_cmdline(input.base_cmdline.as_deref()); let resolved_sections = input .ovmf_sections .iter() @@ -737,12 +755,15 @@ pub fn compute_expected_measurement(input: &MeasurementInput) -> Result<[u8; 48] fn sev_os_image_measurement( input: &MeasurementInput, ) -> Result { + // Validate that the measured command line commits the rootfs identity. The + // compact image projection does not carry a separate rootfs_hash because it + // is already committed by `kernel_cmdline_sha256`. + rootfs_hash_from_cmdline(input.base_cmdline.as_deref())?; Ok(dstack_types::SevOsImageMeasurement { - rootfs_hash: rootfs_hash_from_cmdline(input.base_cmdline.as_deref())?, - base_cmdline: input.base_cmdline.clone(), - ovmf_hash: input.ovmf_hash.clone(), - kernel_hash: input.kernel_hash.clone(), - initrd_hash: input.initrd_hash.clone(), + kernel_cmdline_sha256: kernel_cmdline_sha256(input.base_cmdline.as_deref()), + ovmf_hash: decode_required_hex("ovmf_hash", &input.ovmf_hash, 48)?, + kernel_hash: decode_required_hex("kernel_hash", &input.kernel_hash, 32)?, + initrd_hash: effective_initrd_hash_from_hex(&input.initrd_hash)?, sev_hashes_table_gpa: input.sev_hashes_table_gpa, sev_es_reset_eip: input.sev_es_reset_eip, ovmf_sections: input @@ -821,9 +842,9 @@ struct ImageMetadata { bios_sev: Option, } -fn file_sha256_hex(path: &Path) -> Result { +fn file_sha256(path: &Path) -> Result> { let data = fs::read(path).with_context(|| format!("cannot read {}", path.display()))?; - Ok(hex::encode(Sha256::digest(data))) + Ok(Sha256::digest(data).to_vec()) } pub fn rootfs_hash_from_cmdline(cmdline: Option<&str>) -> Result { @@ -840,14 +861,12 @@ pub fn rootfs_hash_from_cmdline(cmdline: Option<&str>) -> Result { )?)) } -/// Compute the AMD SEV-SNP `os_image_hash` from an OS image directory containing -/// `metadata.json` plus the SEV firmware, kernel and initrd. -/// -/// This is the canonical producer of `digest.sev.txt`. The value equals the -/// `os_image_hash` the KMS and verifier derive from a hardware-verified launch -/// measurement, because both go through [`snp_measurement_os_image_hash`] / -/// `dstack_types::SevOsImageMeasurement`. -pub fn sev_os_image_hash_for_image_dir(image_dir: &Path) -> Result<[u8; 32]> { +/// Compute the AMD SEV-SNP image-invariant measurement projection from an OS +/// image directory containing `metadata.json` plus the SEV firmware, kernel and +/// initrd. +pub fn sev_os_image_measurement_for_image_dir( + image_dir: &Path, +) -> Result { let meta_path = image_dir.join("metadata.json"); let meta_str = fs::read_to_string(&meta_path) .with_context(|| format!("cannot read {}", meta_path.display()))?; @@ -862,13 +881,16 @@ pub fn sev_os_image_hash_for_image_dir(image_dir: &Path) -> Result<[u8; 32]> { .or(meta.bios.as_deref()) .context("bios-sev/bios is required for amd sev-snp os_image_hash")?; let ovmf = ovmf_measurement_info(&image_dir.join(bios))?; + // Validate that the measured command line commits the rootfs identity. The + // compact image projection does not carry a separate rootfs_hash because it + // is already committed by `kernel_cmdline_sha256`. + rootfs_hash_from_cmdline(meta.cmdline.as_deref())?; - let measurement = dstack_types::SevOsImageMeasurement { - rootfs_hash: rootfs_hash_from_cmdline(meta.cmdline.as_deref())?, - base_cmdline: meta.cmdline.as_deref().map(|c| c.trim().to_string()), - ovmf_hash: ovmf.ovmf_hash, - kernel_hash: file_sha256_hex(&image_dir.join(&meta.kernel))?, - initrd_hash: file_sha256_hex(&image_dir.join(&meta.initrd))?, + Ok(dstack_types::SevOsImageMeasurement { + kernel_cmdline_sha256: kernel_cmdline_sha256(meta.cmdline.as_deref()), + ovmf_hash: decode_required_hex("ovmf_hash", &ovmf.ovmf_hash, 48)?, + kernel_hash: file_sha256(&image_dir.join(&meta.kernel))?, + initrd_hash: file_sha256(&image_dir.join(&meta.initrd))?, sev_hashes_table_gpa: ovmf.sev_hashes_table_gpa, sev_es_reset_eip: ovmf.sev_es_reset_eip, ovmf_sections: ovmf @@ -880,8 +902,27 @@ pub fn sev_os_image_hash_for_image_dir(image_dir: &Path) -> Result<[u8; 32]> { section_type: s.section_type, }) .collect(), - }; - Ok(measurement.os_image_hash()) + }) +} + +/// Compute the AMD SEV-SNP `os_image_hash` from an OS image directory. +/// +/// This is the canonical legacy producer of `digest.sev.txt`. New images carry +/// the same value in `measurement.json.snp.os_image_hash`. The value equals the +/// `os_image_hash` the KMS and verifier derive from a hardware-verified launch +/// measurement, because both go through [`snp_measurement_os_image_hash`] / +/// `dstack_types::SevOsImageMeasurement`. +pub fn sev_os_image_hash_for_image_dir(image_dir: &Path) -> Result<[u8; 32]> { + Ok(sev_os_image_measurement_for_image_dir(image_dir)?.os_image_hash()) +} + +/// Build the SNP section of `measurement.json`. +pub fn sev_os_image_measurement_document_for_image_dir( + image_dir: &Path, +) -> Result { + Ok(dstack_types::SevOsImageMeasurementDocument::new( + sev_os_image_measurement_for_image_dir(image_dir)?, + )) } /// `sha256(MEASUREMENT || HOST_DATA)` — the SNP aggregated identity digest. @@ -1313,13 +1354,13 @@ mod tests { "7f51e17f72a04d5422cb2c00998166536019a217376f3aa45a630e59c805a599847ff250dbffcd07e1ba639771d6f05d", ); - // os_image_hash derived from the same document must match the value the - // CVM advertised in its vm_config (and digest.sev.txt). + // os_image_hash derived from the same document must match the current + // measurement.json projection for these launch inputs. let os_image_hash = snp_measurement_os_image_hash(REAL_MEASUREMENT_DOC).expect("derive os_image_hash"); assert_eq!( hex::encode(os_image_hash), - "32b4767373ad7fa0f9c418925006194d5c3f5619529f309fe81156789fecd8bc", + "b6e8403b8f6167bcef4e39aa1039d8728fe624532ca6cedf2625a87fac2e5fda", ); } diff --git a/dstack-mr/src/tdvf.rs b/dstack-mr/src/tdvf.rs index f3791e8fc..90847a504 100644 --- a/dstack-mr/src/tdvf.rs +++ b/dstack-mr/src/tdvf.rs @@ -49,6 +49,136 @@ pub enum PageAddOrder { SinglePass, } +#[derive(Debug, Clone)] +pub(crate) struct AcpiTableHashes { + pub loader: Vec, + pub rsdp: Vec, + pub tables: Vec, +} + +pub(crate) fn rtmr0_log_from_td_hob_hash_with_acpi_hashes( + td_hob_hash: Vec, + ovmf_variant: OvmfVariant, + acpi_hashes: &AcpiTableHashes, +) -> Result { + let cfv_image_hash = hex!("344BC51C980BA621AAA00DA3ED7436F7D6E549197DFE699515DFA2C6583D95E6412AF21C097D473155875FFD561D6790"); + + let secureboot_hash = + measure_tdx_efi_variable("8BE4DF61-93CA-11D2-AA0D-00E098032B8C", "SecureBoot")?; + let pk_hash = measure_tdx_efi_variable("8BE4DF61-93CA-11D2-AA0D-00E098032B8C", "PK")?; + let kek_hash = measure_tdx_efi_variable("8BE4DF61-93CA-11D2-AA0D-00E098032B8C", "KEK")?; + let db_hash = measure_tdx_efi_variable("D719B2CB-3D3A-4596-A3BC-DAD00E67656F", "db")?; + let dbx_hash = measure_tdx_efi_variable("D719B2CB-3D3A-4596-A3BC-DAD00E67656F", "dbx")?; + let separator_hash = measure_sha384(&[0x00, 0x00, 0x00, 0x00]); + + let log = match ovmf_variant { + OvmfVariant::Pre202505 => { + // Boot0000 = OVMF UiApp (fixed digest for pre-202505 firmware). + let boot000_hash = hex!("23ADA07F5261F12F34A0BD8E46760962D6B4D576A416F1FEA1C64BC656B1D28EACF7047AE6E967C58FD2A98BFA74C298"); + vec![ + td_hob_hash, + cfv_image_hash.to_vec(), + secureboot_hash, + pk_hash, + kek_hash, + db_hash, + dbx_hash, + separator_hash, + acpi_hashes.loader.clone(), + acpi_hashes.rsdp.clone(), + acpi_hashes.tables.clone(), + measure_sha384(&[0x00, 0x00]), // BootOrder (raw 2 bytes in legacy OVMF) + boot000_hash.to_vec(), + ] + } + OvmfVariant::Stable202505 => { + // edk2-stable202505 emits 17 RTMR[0] events instead of 13. + // Everything except the three QEMU-generated ACPI blob digests is + // derivable from dstack's launch policy and the shipped OVMF build. + + // fw_cfg `BootMenu` is a u16; dstack doesn't pass `-boot + // menu=on`, so it defaults to 0x0000. + let bootmenu_fwcfg_hash = measure_sha384(&[0x00, 0x00]); + + // fw_cfg `bootorder` is the NUL-separated list of QEMU device + // paths whose backing devices have `bootindex` set. For + // `-kernel` boot, QEMU (hw/i386/x86.c::x86_load_linux) injects + // a single option ROM with `bootindex = 0`: + // * `linuxboot_dma.bin` if fw_cfg DMA is enabled (q35 default) + // * `linuxboot.bin` otherwise + // dstack-vmm always uses q35 → DMA is on → the bootorder file + // contains just the single path below (31 bytes, trailing NUL). + // No other dstack device gets an implicit bootindex. + // + // Verified end-to-end: gdb-attached the live QEMU and called + // get_boot_devices_list() — returned exactly these 31 bytes. + let bootorder_fwcfg_hash = measure_sha384(b"/rom@genroms/linuxboot_dma.bin\0"); + + // EV_EFI_VARIABLE_AUTHORITY: OVMF emits this once during BDS even + // when Secure Boot is disabled. The 32-byte event blob in the log is + // a sentinel; the actual measured payload is OVMF-internal. + // Captured digest is a constant for the edk2-stable202505 build + // dstack ships. + let variable_authority_hash = + hex!("FB66919801F1DFC9C4C273B6A739380790CB0FD3CB706A42F6AC050510EBC8618E7FBA53A1564522F5C6F0DC9E1F41A6"); + + // BootOrder UEFI variable holds [0x0000, 0x0001] — the two boot + // options OVMF's BDS publishes (UiApp and FrontPage). The TCG digest + // for `EV_EFI_VARIABLE_BOOT2` is over the raw variable data, NOT a + // UEFI_VARIABLE_DATA wrapper. + let boot_order_var_hash = measure_sha384(&boot_order_bytes(&[0x0000, 0x0001])); + + // Boot0000 = OVMF's BootManagerMenuApp; Boot0001 = "EFI Firmware + // Setup" (FrontPage). Both live in the OVMF FV and are baked into + // the firmware at build time. The attribute bits and descriptions + // come from MdeModulePkg's BdsBootManagerLib in edk2-stable202505. + // 0x101 = LOAD_OPTION_ACTIVE | LOAD_OPTION_CATEGORY_APP + // 0x109 = + LOAD_OPTION_HIDDEN + let boot0000_hash = measure_sha384(&boot_option_bytes( + 0x0000_0109, + "BootManagerMenuApp", + &[ + fv_node(&OVMF_FV_GUID_LE), + fv_file_node(&OVMF_UIAPP_FILE_GUID_LE), + END_OF_DEVICE_PATH, + ], + &[], + )); + let boot0001_hash = measure_sha384(&boot_option_bytes( + 0x0000_0101, + "EFI Firmware Setup", + &[ + fv_node(&OVMF_FV_GUID_LE), + fv_file_node(&OVMF_FRONTPAGE_FILE_GUID_LE), + END_OF_DEVICE_PATH, + ], + &[], + )); + vec![ + td_hob_hash, + cfv_image_hash.to_vec(), + bootmenu_fwcfg_hash, + bootorder_fwcfg_hash.to_vec(), + secureboot_hash, + pk_hash, + kek_hash, + db_hash, + dbx_hash, + separator_hash, + acpi_hashes.loader.clone(), + acpi_hashes.rsdp.clone(), + acpi_hashes.tables.clone(), + variable_authority_hash.to_vec(), + boot_order_var_hash, + boot0000_hash, + boot0001_hash, + ] + } + }; + + Ok(log) +} + /// Helper to decode little-endian integers from byte slice using scale codec fn decode_le(data: &[u8], context: &str) -> Result { T::decode(&mut &data[..]) @@ -279,6 +409,14 @@ impl<'a> Tdvf<'a> { Ok(h.finalize().to_vec()) } + pub(crate) fn mrtd_single_pass(&self) -> Result> { + self.compute_mrtd(PageAddOrder::SinglePass) + } + + pub(crate) fn mrtd_two_pass(&self) -> Result> { + self.compute_mrtd(PageAddOrder::TwoPass) + } + pub fn mrtd(&self, machine: &Machine) -> Result> { let opts = machine .versioned_options() @@ -290,6 +428,89 @@ impl<'a> Tdvf<'a> { }) } + /// Build the compact TdHobWitnessV1 byte string for this TDVF. + /// + /// The witness contains only the accepted TD HOB/TEMP_MEM ranges needed to + /// reconstruct the TD HOB for any VM memory size. All addresses/sizes are + /// represented in 4 KiB pages using unsigned LEB128 varints: + /// + /// varuint base_page + /// varuint td_hob_page_delta + /// varuint range_count + /// repeated range_count: + /// varuint start_page_delta + /// varuint page_count + /// + /// `base_page` is the minimum accepted range start page. Deltas are relative + /// to it. Ranges are sorted by start page and intentionally not merged; the + /// TD HOB measurement code emits adjacent accepted ranges as separate HOB + /// resources when TDVF metadata describes them separately. + pub(crate) fn td_hob_witness_v1(&self) -> Result> { + fn put_varuint(mut value: u64, out: &mut Vec) { + loop { + let mut byte = (value & 0x7f) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + out.push(byte); + if value == 0 { + break; + } + } + } + + let mut ranges = Vec::<(u64, u64)>::new(); + let mut td_hob_page = None; + + for s in &self.sections { + if matches!(s.sec_type, TDVF_SECTION_TD_HOB | TDVF_SECTION_TEMP_MEM) { + let start_page = s.memory_address / PAGE_SIZE; + let page_count = s.memory_data_size / PAGE_SIZE; + if page_count == 0 { + bail!("TD HOB witness range must not be empty"); + } + ranges.push((start_page, page_count)); + } + if s.sec_type == TDVF_SECTION_TD_HOB { + if td_hob_page.replace(s.memory_address / PAGE_SIZE).is_some() { + bail!("TDVF metadata contains more than one TD_HOB section"); + } + } + } + + if ranges.is_empty() { + bail!("TDVF metadata has no TD_HOB/TEMP_MEM sections"); + } + let td_hob_page = td_hob_page.context("TDVF metadata is missing TD_HOB section")?; + + ranges.sort_by_key(|&(start_page, _)| start_page); + let mut prev_end = None; + for &(start_page, page_count) in &ranges { + if let Some(end) = prev_end { + if start_page < end { + bail!("TD HOB witness ranges must not overlap"); + } + } + prev_end = Some(start_page + page_count); + } + + let base_page = ranges[0].0; + if td_hob_page < base_page { + bail!("TD_HOB page is below TD HOB witness base page"); + } + + let mut out = Vec::with_capacity(4 + ranges.len() * 2); + put_varuint(base_page, &mut out); + put_varuint(td_hob_page - base_page, &mut out); + put_varuint(ranges.len() as u64, &mut out); + for (start_page, page_count) in ranges { + put_varuint(start_page - base_page, &mut out); + put_varuint(page_count, &mut out); + } + Ok(out) + } + #[allow(dead_code)] pub fn rtmr0(&self, machine: &Machine) -> Result> { let (rtmr0_log, _) = self.rtmr0_log(machine)?; @@ -297,135 +518,30 @@ impl<'a> Tdvf<'a> { } pub fn rtmr0_log(&self, machine: &Machine) -> Result<(RtmrLog, Tables)> { - let td_hob_hash = self.measure_td_hob(machine.memory_size)?; - let cfv_image_hash = hex!("344BC51C980BA621AAA00DA3ED7436F7D6E549197DFE699515DFA2C6583D95E6412AF21C097D473155875FFD561D6790"); - let tables = machine.build_tables()?; - let acpi_tables_hash = measure_sha384(&tables.tables); - let acpi_rsdp_hash = measure_sha384(&tables.rsdp); - let acpi_loader_hash = measure_sha384(&tables.loader); - - let secureboot_hash = - measure_tdx_efi_variable("8BE4DF61-93CA-11D2-AA0D-00E098032B8C", "SecureBoot")?; - let pk_hash = measure_tdx_efi_variable("8BE4DF61-93CA-11D2-AA0D-00E098032B8C", "PK")?; - let kek_hash = measure_tdx_efi_variable("8BE4DF61-93CA-11D2-AA0D-00E098032B8C", "KEK")?; - let db_hash = measure_tdx_efi_variable("D719B2CB-3D3A-4596-A3BC-DAD00E67656F", "db")?; - let dbx_hash = measure_tdx_efi_variable("D719B2CB-3D3A-4596-A3BC-DAD00E67656F", "dbx")?; - let separator_hash = measure_sha384(&[0x00, 0x00, 0x00, 0x00]); - - let log = match machine.ovmf_variant { - OvmfVariant::Pre202505 => { - // Boot0000 = OVMF UiApp (fixed digest for pre-202505 firmware). - let boot000_hash = hex!("23ADA07F5261F12F34A0BD8E46760962D6B4D576A416F1FEA1C64BC656B1D28EACF7047AE6E967C58FD2A98BFA74C298"); - vec![ - td_hob_hash, - cfv_image_hash.to_vec(), - secureboot_hash, - pk_hash, - kek_hash, - db_hash, - dbx_hash, - separator_hash, - acpi_loader_hash, - acpi_rsdp_hash, - acpi_tables_hash, - measure_sha384(&[0x00, 0x00]), // BootOrder (raw 2 bytes in legacy OVMF) - boot000_hash.to_vec(), - ] - } - OvmfVariant::Stable202505 => { - // edk2-stable202505 emits 17 RTMR[0] events instead of 13. The - // boot-option set is fully derivable from OVMF-internal - // constants (FV and file GUIDs, descriptions, attributes); the - // remaining two — the bootorder fw_cfg measurement and - // EV_EFI_VARIABLE_AUTHORITY — stay as captured digests because - // their content depends on QEMU's emitted device list and on - // OVMF-internal logic that's not worth shadowing here. - - // fw_cfg `BootMenu` is a u16; dstack doesn't pass `-boot - // menu=on`, so it defaults to 0x0000. - let bootmenu_fwcfg_hash = measure_sha384(&[0x00, 0x00]); - - // fw_cfg `bootorder` is the NUL-separated list of QEMU device - // paths whose backing devices have `bootindex` set. For - // `-kernel` boot, QEMU (hw/i386/x86.c::x86_load_linux) injects - // a single option ROM with `bootindex = 0`: - // * `linuxboot_dma.bin` if fw_cfg DMA is enabled (q35 default) - // * `linuxboot.bin` otherwise - // dstack-vmm always uses q35 → DMA is on → the bootorder file - // contains just the single path below (31 bytes, trailing - // NUL). No other dstack device gets an implicit bootindex. - // - // Verified end-to-end: gdb-attached the live QEMU and called - // get_boot_devices_list() — returned exactly these 31 bytes. - let bootorder_fwcfg_hash = measure_sha384(b"/rom@genroms/linuxboot_dma.bin\0"); - - // EV_EFI_VARIABLE_AUTHORITY: OVMF emits this once during BDS - // even when Secure Boot is disabled. The 32-byte event blob in - // the log is a sentinel; the actual measured payload is - // OVMF-internal. Captured digest is a constant for the - // edk2-stable202505 build dstack ships. - let variable_authority_hash = - hex!("FB66919801F1DFC9C4C273B6A739380790CB0FD3CB706A42F6AC050510EBC8618E7FBA53A1564522F5C6F0DC9E1F41A6"); - - // BootOrder UEFI variable holds [0x0000, 0x0001] — the two - // boot options OVMF's BDS publishes (UiApp and FrontPage). - // The TCG digest for `EV_EFI_VARIABLE_BOOT2` is over the raw - // variable data, NOT a UEFI_VARIABLE_DATA wrapper. - let boot_order_var_hash = measure_sha384(&boot_order_bytes(&[0x0000, 0x0001])); - - // Boot0000 = OVMF's BootManagerMenuApp; Boot0001 = "EFI - // Firmware Setup" (FrontPage). Both live in the OVMF FV and - // are baked into the firmware at build time. The attribute - // bits and descriptions come from MdeModulePkg's - // BdsBootManagerLib in edk2-stable202505. - // 0x101 = LOAD_OPTION_ACTIVE | LOAD_OPTION_CATEGORY_APP - // 0x109 = + LOAD_OPTION_HIDDEN - let boot0000_hash = measure_sha384(&boot_option_bytes( - 0x0000_0109, - "BootManagerMenuApp", - &[ - fv_node(&OVMF_FV_GUID_LE), - fv_file_node(&OVMF_UIAPP_FILE_GUID_LE), - END_OF_DEVICE_PATH, - ], - &[], - )); - let boot0001_hash = measure_sha384(&boot_option_bytes( - 0x0000_0101, - "EFI Firmware Setup", - &[ - fv_node(&OVMF_FV_GUID_LE), - fv_file_node(&OVMF_FRONTPAGE_FILE_GUID_LE), - END_OF_DEVICE_PATH, - ], - &[], - )); - vec![ - td_hob_hash, - cfv_image_hash.to_vec(), - bootmenu_fwcfg_hash, - bootorder_fwcfg_hash.to_vec(), - secureboot_hash, - pk_hash, - kek_hash, - db_hash, - dbx_hash, - separator_hash, - acpi_loader_hash, - acpi_rsdp_hash, - acpi_tables_hash, - variable_authority_hash.to_vec(), - boot_order_var_hash, - boot0000_hash, - boot0001_hash, - ] - } + let acpi_hashes = AcpiTableHashes { + tables: measure_sha384(&tables.tables), + rsdp: measure_sha384(&tables.rsdp), + loader: measure_sha384(&tables.loader), }; - + let log = self.rtmr0_log_with_acpi_hashes( + machine.memory_size, + machine.ovmf_variant, + &acpi_hashes, + )?; Ok((log, tables)) } + pub(crate) fn rtmr0_log_with_acpi_hashes( + &self, + memory_size: u64, + ovmf_variant: OvmfVariant, + acpi_hashes: &AcpiTableHashes, + ) -> Result { + let td_hob_hash = self.measure_td_hob(memory_size)?; + rtmr0_log_from_td_hob_hash_with_acpi_hashes(td_hob_hash, ovmf_variant, acpi_hashes) + } + fn measure_td_hob(&self, memory_size: u64) -> Result> { let mut memory_acceptor = MemoryAcceptor::new(0, memory_size); let mut td_hob = Vec::new(); @@ -533,3 +649,55 @@ impl MemoryAcceptor { self.ranges = new_ranges; } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn td_hob_witness_v1_encodes_current_dstack_ranges_compactly() -> Result<()> { + let tdvf = Tdvf { + fw: &[], + sections: vec![ + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x810000, + memory_data_size: 0x10000, + sec_type: TDVF_SECTION_TEMP_MEM, + attributes: 0, + }, + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x80b000, + memory_data_size: 0x2000, + sec_type: TDVF_SECTION_TEMP_MEM, + attributes: 0, + }, + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x809000, + memory_data_size: 0x2000, + sec_type: TDVF_SECTION_TD_HOB, + attributes: 0, + }, + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x800000, + memory_data_size: 0x6000, + sec_type: TDVF_SECTION_TEMP_MEM, + attributes: 0, + }, + ], + }; + + assert_eq!( + hex::encode(tdvf.td_hob_witness_v1()?), + "80100904000609020b021010" + ); + Ok(()) + } +} diff --git a/dstack-mr/src/tdx.rs b/dstack-mr/src/tdx.rs new file mode 100644 index 000000000..c7406309a --- /dev/null +++ b/dstack-mr/src/tdx.rs @@ -0,0 +1,625 @@ +// SPDX-FileCopyrightText: © 2026 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +//! Build-time TDX OS-image static measurement material. +//! +//! The current verifier path recomputes TDX MRs from a downloaded image. This +//! module emits the image-static material needed by the no-image-download path: +//! MRTD candidates, compact TD HOB witness, command line, kernel/initrd digests +//! and sizes. VM-specific inputs (RAM size, vCPU count, QEMU topology knobs) are +//! intentionally excluded and must come from `VmConfig`. + +use crate::kernel::{ + patched_kernel_authenticode_sha384, tdx_kernel_hash_uses_precomputed_high_mem, + TDX_KERNEL_HASH_COMPAT_2G_MEMORY, TDX_KERNEL_HASH_STABLE_MIN_MEMORY, +}; +use crate::tdvf::{rtmr0_log_from_td_hob_hash_with_acpi_hashes, AcpiTableHashes, Tdvf}; +use crate::util::{measure_log, measure_sha384}; +use anyhow::{bail, Context, Result}; +use dstack_types::{ + OvmfVariant, TdxImageMeasurement, TdxMrtdCandidates, TdxOsImageMeasurement, + TdxOsImageMeasurementDocument, TdxTdvfMeasurement, VmConfig, +}; +use fs_err as fs; +use serde::Deserialize; +use std::path::Path; + +#[derive(Debug, Deserialize)] +struct ImageMetadata { + #[serde(default)] + cmdline: Option, + kernel: String, + initrd: String, + bios: String, + #[serde(default)] + version: String, + #[serde(default)] + ovmf_variant: Option, +} + +#[derive(Debug, Clone)] +pub struct TdxRtmr0AcpiHashes { + pub loader: Vec, + pub rsdp: Vec, + pub tables: Vec, +} + +#[derive(Debug, Clone)] +pub struct TdxMeasurementsWithoutRtmr0 { + pub mrtd: Vec, + pub rtmr1: Vec, + pub rtmr2: Vec, +} + +fn validate_bytes_field(value: &[u8], field: &str, expected_len: usize) -> Result> { + if value.len() != expected_len { + bail!( + "{field} has invalid length {}, expected {expected_len}", + value.len() + ); + } + Ok(value.to_vec()) +} + +fn select_mrtd(measurement: &TdxOsImageMeasurement, vm_config: &VmConfig) -> Result> { + let machine = crate::Machine::builder() + .cpu_count(vm_config.cpu_count) + .memory_size(vm_config.memory_size) + .firmware("") + .kernel("") + .initrd("") + .kernel_cmdline("") + .root_verity(true) + .hotplug_off(vm_config.hotplug_off) + .maybe_two_pass_add_pages(vm_config.qemu_single_pass_add_pages) + .maybe_pic(vm_config.pic) + .maybe_qemu_version(vm_config.qemu_version.clone()) + .maybe_pci_hole64_size(if vm_config.pci_hole64_size > 0 { + Some(vm_config.pci_hole64_size) + } else { + None + }) + .hugepages(vm_config.hugepages) + .num_gpus(vm_config.num_gpus) + .num_nvswitches(vm_config.num_nvswitches) + .host_share_mode(vm_config.host_share_mode.clone()) + .ovmf_variant(measurement.tdvf.ovmf_variant) + .build(); + let opts = machine + .versioned_options() + .context("failed to resolve QEMU measurement options")?; + let mrtd = if opts.two_pass_add_pages { + &measurement.tdvf.mrtd.two_pass + } else { + &measurement.tdvf.mrtd.single_pass + }; + validate_bytes_field(mrtd, "tdx.measurement.tdvf.mrtd", 48) +} + +fn read_varuint(input: &mut &[u8]) -> Result { + let mut value = 0u64; + let mut shift = 0u32; + loop { + let (&byte, rest) = input + .split_first() + .context("truncated TD HOB witness varuint")?; + *input = rest; + value |= ((byte & 0x7f) as u64) << shift; + if byte & 0x80 == 0 { + return Ok(value); + } + shift += 7; + if shift >= 64 { + bail!("TD HOB witness varuint is too large"); + } + } +} + +fn measure_td_hob_from_witness_data(data: &[u8], memory_size: u64) -> Result> { + let mut input = data; + let base_page = read_varuint(&mut input)?; + let td_hob_page_delta = read_varuint(&mut input)?; + let range_count = read_varuint(&mut input)?; + let td_hob_base_addr = (base_page + td_hob_page_delta) + .checked_mul(0x1000) + .context("TD HOB base address overflow")?; + + let mut memory_acceptor = MemoryAcceptor::new(0, memory_size); + for _ in 0..range_count { + let start_page_delta = read_varuint(&mut input)?; + let page_count = read_varuint(&mut input)?; + let start = (base_page + start_page_delta) + .checked_mul(0x1000) + .context("TD HOB range start overflow")?; + let len = page_count + .checked_mul(0x1000) + .context("TD HOB range length overflow")?; + memory_acceptor.accept(start, start + len); + } + if !input.is_empty() { + bail!("TD HOB witness has trailing bytes"); + } + + let mut td_hob = Vec::new(); + td_hob.extend_from_slice(&[0x01, 0x00]); // HobType + td_hob.extend_from_slice(&56u16.to_le_bytes()); // HobLength + td_hob.extend_from_slice(&[0u8; 4]); // Reserved + td_hob.extend_from_slice(&9u32.to_le_bytes()); // Version + td_hob.extend_from_slice(&[0u8; 4]); // BootMode + td_hob.extend_from_slice(&[0u8; 8]); // EfiMemoryTop + td_hob.extend_from_slice(&[0u8; 8]); // EfiMemoryBottom + td_hob.extend_from_slice(&[0u8; 8]); // EfiFreeMemoryTop + td_hob.extend_from_slice(&[0u8; 8]); // EfiFreeMemoryBottom + td_hob.extend_from_slice(&[0u8; 8]); // EfiEndOfHobList (placeholder) + + let mut add_memory_resource_hob = |resource_type: u8, start: u64, length: u64| { + td_hob.extend_from_slice(&[0x03, 0x00]); // HobType + td_hob.extend_from_slice(&48u16.to_le_bytes()); // HobLength + td_hob.extend_from_slice(&[0u8; 4]); // Reserved + td_hob.extend_from_slice(&[0u8; 16]); // Owner + td_hob.extend_from_slice(&resource_type.to_le_bytes()); + td_hob.extend_from_slice(&[0u8; 3]); // Padding for resource type + td_hob.extend_from_slice(&7u32.to_le_bytes()); // ResourceAttribute + td_hob.extend_from_slice(&start.to_le_bytes()); + td_hob.extend_from_slice(&length.to_le_bytes()); + }; + + let (_, last_start, last_end) = memory_acceptor.ranges.pop().context("No ranges")?; + + for (accepted, start, end) in memory_acceptor.ranges { + if end < start { + bail!("Invalid memory range: end < start"); + } + let size = end - start; + if accepted { + add_memory_resource_hob(0x00, start, size); + } else { + add_memory_resource_hob(0x07, start, size); + } + } + + if last_end < last_start { + bail!("Invalid last memory range: end < start"); + } + if memory_size >= TDX_KERNEL_HASH_STABLE_MIN_MEMORY { + if last_start < 0x80000000u64 { + add_memory_resource_hob(0x07, last_start, 0x80000000u64 - last_start); + } + if last_end > 0x80000000u64 { + add_memory_resource_hob(0x07, 0x100000000, last_end - 0x80000000u64); + } + } else { + add_memory_resource_hob(0x07, last_start, last_end - last_start); + } + + let end_of_hob_list = td_hob_base_addr + td_hob.len() as u64 + 8; + td_hob[48..56].copy_from_slice(&end_of_hob_list.to_le_bytes()); + + Ok(measure_sha384(&td_hob)) +} + +struct MemoryAcceptor { + ranges: Vec<(bool, u64, u64)>, +} + +impl MemoryAcceptor { + fn new(start: u64, size: u64) -> Self { + Self { + ranges: vec![(false, start, start + size)], + } + } + + fn accept(&mut self, start: u64, end: u64) { + if start >= end { + return; + } + + let mut new_ranges = Vec::new(); + + for &(is_accepted, range_start, range_end) in &self.ranges { + if is_accepted || range_end <= start || range_start >= end { + new_ranges.push((is_accepted, range_start, range_end)); + } else { + if range_start < start { + new_ranges.push((false, range_start, start)); + } + if range_end > end { + new_ranges.push((false, end, range_end)); + } + } + } + new_ranges.push((true, start, end)); + new_ranges.sort_by_key(|&(_, start, _)| start); + self.ranges = new_ranges; + } +} + +fn rtmr1_log_from_kernel_hash(kernel_hash: Vec) -> Vec> { + vec![ + kernel_hash, + measure_sha384(b"Calling EFI Application from Boot Option"), + measure_sha384(&[0x00, 0x00, 0x00, 0x00]), // Separator + measure_sha384(b"Exit Boot Services Invocation"), + measure_sha384(b"Exit Boot Services Returned with Success"), + ] +} + +/// Return the measured TDX kernel command line for a metadata cmdline. +/// +/// This mirrors the existing dstack TDX measurement replay path, which measures +/// the image-provided cmdline plus OVMF/QEMU's `initrd=initrd` suffix. +pub fn measured_kernel_cmdline(base_cmdline: &str) -> String { + format!("{base_cmdline} initrd=initrd") +} + +/// Generate the image-static TDX measurement material from an image directory. +pub fn tdx_os_image_measurement_for_image_dir(image_dir: &Path) -> Result { + let meta_path = image_dir.join("metadata.json"); + let meta_str = fs::read_to_string(&meta_path) + .with_context(|| format!("cannot read {}", meta_path.display()))?; + let meta: ImageMetadata = + serde_json::from_str(&meta_str).context("failed to parse image metadata.json")?; + + let base_cmdline = meta + .cmdline + .filter(|s| !s.trim().is_empty()) + .context("metadata.json cmdline is required for TDX os_image_hash")? + .to_string(); + + // Validate that the image identity carried by the measured cmdline is + // well-formed. The normalized rootfs hash is not stored separately to keep + // the TDX projection compact; it is already committed by the measured + // kernel command line digest. + crate::sev::rootfs_hash_from_cmdline(Some(&base_cmdline)) + .context("failed to parse dstack.rootfs_hash from TDX cmdline")?; + + let ovmf_variant = meta + .ovmf_variant + .or_else(|| { + if meta.version.is_empty() { + None + } else { + crate::ovmf_variant_for_version(&meta.version).ok() + } + }) + .unwrap_or_default(); + + let fw_data = fs::read(image_dir.join(&meta.bios)) + .with_context(|| format!("cannot read {}", image_dir.join(&meta.bios).display()))?; + let tdvf = Tdvf::parse(&fw_data).context("failed to parse TDX TDVF metadata")?; + + let initrd_path = image_dir.join(&meta.initrd); + let initrd = + fs::read(&initrd_path).with_context(|| format!("cannot read {}", initrd_path.display()))?; + let kernel_path = image_dir.join(&meta.kernel); + let kernel = + fs::read(&kernel_path).with_context(|| format!("cannot read {}", kernel_path.display()))?; + let kernel_authenticode = patched_kernel_authenticode_sha384( + &kernel, + initrd.len() as u32, + TDX_KERNEL_HASH_STABLE_MIN_MEMORY, + 0x28000, + ) + .context("failed to compute high-memory QEMU-patched kernel hash")?; + + Ok(TdxOsImageMeasurement { + image: TdxImageMeasurement { + kernel_cmdline_sha384: crate::kernel::measure_cmdline(&measured_kernel_cmdline( + &base_cmdline, + )), + kernel_authenticode, + initrd_sha384: measure_sha384(&initrd), + }, + tdvf: TdxTdvfMeasurement { + ovmf_variant, + mrtd: TdxMrtdCandidates { + single_pass: tdvf.mrtd_single_pass()?, + two_pass: tdvf.mrtd_two_pass()?, + }, + td_hob_witness: tdvf.td_hob_witness_v1()?, + }, + }) +} + +/// Generate the self-contained TDX measurement document for an image directory. +/// +/// The document contains both the hash projection and the resulting +/// `os_image_hash`, avoiding a separate `digest.tdx.txt` artifact. +pub fn tdx_os_image_measurement_document_for_image_dir( + image_dir: &Path, +) -> Result { + Ok(TdxOsImageMeasurementDocument::new( + tdx_os_image_measurement_for_image_dir(image_dir)?, + )) +} + +/// Compute the TDX static-material OS image hash for an image directory. +pub fn tdx_os_image_hash_for_image_dir(image_dir: &Path) -> Result<[u8; 32]> { + Ok(tdx_os_image_measurement_for_image_dir(image_dir)?.os_image_hash()) +} + +/// Compute expected TDX measurements from the self-contained `measurement.json` +/// TDX document and the three ACPI table digests captured in RTMR[0]. +/// +/// This path intentionally does not download or read the OS image. Because +/// QEMU's patched kernel Authenticode hash depends on exact guest RAM below +/// `TDX_KERNEL_HASH_STABLE_MIN_MEMORY`, the no-image-download path supports +/// CVMs at or above that threshold plus the exact 2 GiB placement, which QEMU +/// patches to the same kernel bytes as the high-memory case. +pub fn tdx_measurements_from_measurement_document( + document: &TdxOsImageMeasurementDocument, + vm_config: &VmConfig, + acpi_hashes: &TdxRtmr0AcpiHashes, +) -> Result { + if document.version != TdxOsImageMeasurementDocument::VERSION { + bail!( + "unsupported TDX measurement document version {}", + document.version + ); + } + if !tdx_kernel_hash_uses_precomputed_high_mem(vm_config.memory_size) { + bail!( + "TDX measurement attestation without image download requires memory_size == {} bytes ({} MiB) or >= {} bytes ({} MiB); got {} bytes", + TDX_KERNEL_HASH_COMPAT_2G_MEMORY, + TDX_KERNEL_HASH_COMPAT_2G_MEMORY / 1024 / 1024, + TDX_KERNEL_HASH_STABLE_MIN_MEMORY, + TDX_KERNEL_HASH_STABLE_MIN_MEMORY / 1024 / 1024, + vm_config.memory_size + ); + } + + let measurement = document + .decode_measurement() + .map_err(anyhow::Error::msg) + .context("failed to decode TDX measurement CBOR")?; + let mrtd = select_mrtd(&measurement, vm_config)?; + + let td_hob_hash = + measure_td_hob_from_witness_data(&measurement.tdvf.td_hob_witness, vm_config.memory_size) + .context("failed to measure TD HOB from witness")?; + let rtmr0_log = rtmr0_log_from_td_hob_hash_with_acpi_hashes( + td_hob_hash, + measurement.tdvf.ovmf_variant, + &AcpiTableHashes { + loader: acpi_hashes.loader.clone(), + rsdp: acpi_hashes.rsdp.clone(), + tables: acpi_hashes.tables.clone(), + }, + ) + .context("failed to compute RTMR0 from measurement document")?; + let rtmr0 = measure_log(&rtmr0_log); + + let kernel_hash = validate_bytes_field( + &measurement.image.kernel_authenticode, + "tdx.measurement.image.kernel_authenticode", + 48, + )?; + let rtmr1 = measure_log(&rtmr1_log_from_kernel_hash(kernel_hash)); + + let initrd_hash = validate_bytes_field( + &measurement.image.initrd_sha384, + "tdx.measurement.image.initrd_sha384", + 48, + )?; + let kernel_cmdline_hash = validate_bytes_field( + &measurement.image.kernel_cmdline_sha384, + "tdx.measurement.image.kernel_cmdline_sha384", + 48, + )?; + let rtmr2 = measure_log(&[kernel_cmdline_hash, initrd_hash]); + + Ok(crate::TdxMeasurements { + mrtd, + rtmr0, + rtmr1, + rtmr2, + }) +} + +/// Compute image-critical TDX measurements without RTMR[0]. +/// +/// RTMR[0] contains QEMU-generated ACPI blobs and other launch-environment +/// material. This helper verifies the OS-image binding pieces that do not need +/// QEMU: MRTD (TDVF firmware), RTMR[1] (QEMU-patched kernel image), and RTMR[2] +/// (kernel command line + initrd). +pub fn tdx_measurements_for_image_dir_without_rtmr0( + image_dir: &Path, + vm_config: &VmConfig, +) -> Result { + let meta_path = image_dir.join("metadata.json"); + let meta_str = fs::read_to_string(&meta_path) + .with_context(|| format!("cannot read {}", meta_path.display()))?; + let meta: ImageMetadata = + serde_json::from_str(&meta_str).context("failed to parse image metadata.json")?; + + let base_cmdline = meta + .cmdline + .filter(|s| !s.trim().is_empty()) + .context("metadata.json cmdline is required for TDX measurement")? + .to_string(); + let kernel_cmdline = measured_kernel_cmdline(&base_cmdline); + + let firmware_path = image_dir.join(&meta.bios); + let kernel_path = image_dir.join(&meta.kernel); + let initrd_path = image_dir.join(&meta.initrd); + + let fw_data = fs::read(&firmware_path) + .with_context(|| format!("cannot read {}", firmware_path.display()))?; + let kernel_data = + fs::read(&kernel_path).with_context(|| format!("cannot read {}", kernel_path.display()))?; + let initrd_data = + fs::read(&initrd_path).with_context(|| format!("cannot read {}", initrd_path.display()))?; + + let ovmf_variant = vm_config + .ovmf_variant + .or(meta.ovmf_variant) + .or_else(|| { + if meta.version.is_empty() { + None + } else { + crate::ovmf_variant_for_version(&meta.version).ok() + } + }) + .unwrap_or_else(|| crate::ovmf_variant_for_image(vm_config.image.as_deref())); + + let firmware = firmware_path.display().to_string(); + let kernel = kernel_path.display().to_string(); + let initrd = initrd_path.display().to_string(); + let machine = crate::Machine::builder() + .cpu_count(vm_config.cpu_count) + .memory_size(vm_config.memory_size) + .firmware(&firmware) + .kernel(&kernel) + .initrd(&initrd) + .kernel_cmdline(&kernel_cmdline) + .root_verity(true) + .hotplug_off(vm_config.hotplug_off) + .maybe_two_pass_add_pages(vm_config.qemu_single_pass_add_pages) + .maybe_pic(vm_config.pic) + .maybe_qemu_version(vm_config.qemu_version.clone()) + .maybe_pci_hole64_size(if vm_config.pci_hole64_size > 0 { + Some(vm_config.pci_hole64_size) + } else { + None + }) + .hugepages(vm_config.hugepages) + .num_gpus(vm_config.num_gpus) + .num_nvswitches(vm_config.num_nvswitches) + .host_share_mode(vm_config.host_share_mode.clone()) + .ovmf_variant(ovmf_variant) + .build(); + + let tdvf = Tdvf::parse(&fw_data).context("failed to parse TDX TDVF metadata")?; + let mrtd = tdvf.mrtd(&machine).context("failed to compute MRTD")?; + + let rtmr1_log = crate::kernel::rtmr1_log( + &kernel_data, + initrd_data.len() as u32, + vm_config.memory_size, + 0x28000, + ) + .context("failed to compute RTMR1")?; + let rtmr1 = measure_log(&rtmr1_log); + + let rtmr2_log = vec![ + crate::kernel::measure_cmdline(&kernel_cmdline), + measure_sha384(&initrd_data), + ]; + let rtmr2 = measure_log(&rtmr2_log); + + Ok(TdxMeasurementsWithoutRtmr0 { mrtd, rtmr1, rtmr2 }) +} + +/// Compute TDX measurements without invoking QEMU-derived helper binaries. +/// +/// RTMR[0] includes ACPI blobs generated by QEMU at launch time. The caller +/// supplies the already-measured ACPI event digests from the hardware-bound +/// event log; this function recomputes the rest of the TDX image measurement +/// from image files and VM configuration. +pub fn tdx_measurements_for_image_dir_with_acpi_hashes( + image_dir: &Path, + vm_config: &VmConfig, + acpi_hashes: &TdxRtmr0AcpiHashes, +) -> Result { + let meta_path = image_dir.join("metadata.json"); + let meta_str = fs::read_to_string(&meta_path) + .with_context(|| format!("cannot read {}", meta_path.display()))?; + let meta: ImageMetadata = + serde_json::from_str(&meta_str).context("failed to parse image metadata.json")?; + + let base_cmdline = meta + .cmdline + .filter(|s| !s.trim().is_empty()) + .context("metadata.json cmdline is required for TDX measurement")? + .to_string(); + let kernel_cmdline = measured_kernel_cmdline(&base_cmdline); + + let firmware_path = image_dir.join(&meta.bios); + let kernel_path = image_dir.join(&meta.kernel); + let initrd_path = image_dir.join(&meta.initrd); + + let fw_data = fs::read(&firmware_path) + .with_context(|| format!("cannot read {}", firmware_path.display()))?; + let kernel_data = + fs::read(&kernel_path).with_context(|| format!("cannot read {}", kernel_path.display()))?; + let initrd_data = + fs::read(&initrd_path).with_context(|| format!("cannot read {}", initrd_path.display()))?; + + let ovmf_variant = vm_config + .ovmf_variant + .or(meta.ovmf_variant) + .or_else(|| { + if meta.version.is_empty() { + None + } else { + crate::ovmf_variant_for_version(&meta.version).ok() + } + }) + .unwrap_or_else(|| crate::ovmf_variant_for_image(vm_config.image.as_deref())); + + let firmware = firmware_path.display().to_string(); + let kernel = kernel_path.display().to_string(); + let initrd = initrd_path.display().to_string(); + let machine = crate::Machine::builder() + .cpu_count(vm_config.cpu_count) + .memory_size(vm_config.memory_size) + .firmware(&firmware) + .kernel(&kernel) + .initrd(&initrd) + .kernel_cmdline(&kernel_cmdline) + .root_verity(true) + .hotplug_off(vm_config.hotplug_off) + .maybe_two_pass_add_pages(vm_config.qemu_single_pass_add_pages) + .maybe_pic(vm_config.pic) + .maybe_qemu_version(vm_config.qemu_version.clone()) + .maybe_pci_hole64_size(if vm_config.pci_hole64_size > 0 { + Some(vm_config.pci_hole64_size) + } else { + None + }) + .hugepages(vm_config.hugepages) + .num_gpus(vm_config.num_gpus) + .num_nvswitches(vm_config.num_nvswitches) + .host_share_mode(vm_config.host_share_mode.clone()) + .ovmf_variant(ovmf_variant) + .build(); + + let tdvf = Tdvf::parse(&fw_data).context("failed to parse TDX TDVF metadata")?; + let mrtd = tdvf.mrtd(&machine).context("failed to compute MRTD")?; + + let rtmr0_log = tdvf + .rtmr0_log_with_acpi_hashes( + vm_config.memory_size, + ovmf_variant, + &AcpiTableHashes { + loader: acpi_hashes.loader.clone(), + rsdp: acpi_hashes.rsdp.clone(), + tables: acpi_hashes.tables.clone(), + }, + ) + .context("failed to compute RTMR0 without ACPI table generation")?; + let rtmr0 = measure_log(&rtmr0_log); + + let rtmr1_log = crate::kernel::rtmr1_log( + &kernel_data, + initrd_data.len() as u32, + vm_config.memory_size, + 0x28000, + ) + .context("failed to compute RTMR1")?; + let rtmr1 = measure_log(&rtmr1_log); + + let rtmr2_log = vec![ + crate::kernel::measure_cmdline(&kernel_cmdline), + measure_sha384(&initrd_data), + ]; + let rtmr2 = measure_log(&rtmr2_log); + + Ok(crate::TdxMeasurements { + mrtd, + rtmr0, + rtmr1, + rtmr2, + }) +} diff --git a/dstack-types/Cargo.toml b/dstack-types/Cargo.toml index 1bea45ec5..526d5192b 100644 --- a/dstack-types/Cargo.toml +++ b/dstack-types/Cargo.toml @@ -10,6 +10,8 @@ edition.workspace = true license.workspace = true [dependencies] +ciborium.workspace = true +hex = { workspace = true, features = ["std"] } or-panic.workspace = true scale = { workspace = true, features = ["derive"] } serde = { workspace = true, features = ["derive"] } diff --git a/dstack-types/src/lib.rs b/dstack-types/src/lib.rs index d891eee93..cac0313c1 100644 --- a/dstack-types/src/lib.rs +++ b/dstack-types/src/lib.rs @@ -2,9 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 -use std::path::Path; +use std::{io::Cursor, path::Path}; -use or_panic::ResultOrPanic; use scale::{Decode, Encode}; use serde::{Deserialize, Serialize}; use serde_human_bytes as hex_bytes; @@ -34,6 +33,52 @@ pub enum OvmfVariant { Stable202505, } +impl OvmfVariant { + pub fn to_u8(self) -> u8 { + match self { + Self::Pre202505 => 0, + Self::Stable202505 => 1, + } + } + + pub fn from_u8(value: u8) -> Option { + match value { + 0 => Some(Self::Pre202505), + 1 => Some(Self::Stable202505), + _ => None, + } + } +} + +/// Selects how a TDX attestation should bind the OS image. +/// +/// `Legacy` preserves the existing verifier behavior: `vm_config.os_image_hash` +/// is the content digest (`digest.txt`) and the verifier recomputes the full +/// TDX launch measurement using the legacy image/QEMU-derived path. +/// +/// `Measurement` opts into the no-QEMU verifier path: `vm_config.os_image_hash` +/// is `measurement.json.tdx.os_image_hash`, `vm_config.tdx_measurement` carries +/// the self-contained measurement material, and KMS/verifier select the new +/// logic from this vm_config flag while the attestation quote remains the +/// existing `DstackTdx`. +#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum TdxAttestationVariant { + #[default] + Legacy, + Measurement, +} + +impl TdxAttestationVariant { + pub fn is_legacy(&self) -> bool { + matches!(self, Self::Legacy) + } + + pub fn is_measurement(&self) -> bool { + matches!(self, Self::Measurement) + } +} + #[derive(Deserialize, Serialize, Debug, Clone)] pub struct AppCompose { pub manifest_version: u32, @@ -259,6 +304,14 @@ pub struct VmConfig { /// (e.g. parsing the OS version out of `image`). #[serde(default, skip_serializing_if = "Option::is_none")] pub ovmf_variant: Option, + /// TDX-only attestation/hash scheme selector. Defaults to `legacy` and is + /// omitted from legacy configs to keep old behavior and wire shape stable. + #[serde(default, skip_serializing_if = "TdxAttestationVariant::is_legacy")] + pub tdx_attestation_variant: TdxAttestationVariant, + /// TDX-only no-image-download measurement material. Present only when + /// `tdx_attestation_variant = "measurement"` and omitted for legacy TDX. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tdx_measurement: Option, } /// One OVMF SEV metadata section (gpa/size/type) that affects the SEV-SNP @@ -270,34 +323,422 @@ pub struct OvmfSection { pub section_type: u32, } -/// Image-invariant projection that determines the AMD SEV-SNP OS image identity. -/// -/// `os_image_hash` is the SHA-256 of this projection, canonically serialized -/// (JCS). It is shared by the VMM/KMS (which derive it from a verified launch -/// measurement) and the image build (which precomputes `digest.sev.txt`), so -/// both sides agree. It deliberately EXCLUDES per-deployment values (vcpus, -/// vcpu_type, guest_features, app_id, compose_hash): the same OS image must hash +fn cbor_to_vec(value: &T, context: &str) -> Vec { + let mut out = Vec::new(); + ciborium::ser::into_writer(value, &mut out) + .unwrap_or_else(|e| panic!("{context}: failed to encode CBOR: {e}")); + out +} + +fn cbor_from_slice( + bytes: &[u8], + context: &str, +) -> Result { + ciborium::de::from_reader(Cursor::new(bytes)) + .map_err(|e| format!("{context}: failed to decode CBOR: {e}")) +} + +fn sha256(bytes: &[u8]) -> [u8; 32] { + use sha2::{Digest, Sha256}; + Sha256::digest(bytes).into() +} + +fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(sha256(bytes)) +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct CborOvmfSection { + gpa: u64, + size: u64, + #[serde(rename = "type")] + section_type: u32, +} + +impl From<&OvmfSection> for CborOvmfSection { + fn from(section: &OvmfSection) -> Self { + Self { + gpa: section.gpa, + size: section.size, + section_type: section.section_type, + } + } +} + +impl From for OvmfSection { + fn from(section: CborOvmfSection) -> Self { + Self { + gpa: section.gpa, + size: section.size, + section_type: section.section_type, + } + } +} + +/// Image-invariant projection that determines the AMD SEV-SNP OS image +/// identity. It deliberately excludes per-deployment values (vcpus, vcpu_type, +/// guest_features, app_id, compose_hash): the same OS image must hash /// identically regardless of how it is launched. +/// +/// `os_image_hash` is SHA-256 over the CBOR representation of this projection, +/// not over the outer measurement.json field names. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct SevOsImageMeasurement { - pub rootfs_hash: String, - pub base_cmdline: Option, - pub ovmf_hash: String, - pub kernel_hash: String, - pub initrd_hash: String, + /// SHA-256 of the kernel command line bytes as measured in the SEV-SNP hash + /// table (trimmed command line plus trailing NUL byte). This avoids carrying + /// the full plaintext command line in image metadata while preserving the + /// exact measured value used by OVMF/QEMU. + #[serde(with = "hex_bytes")] + pub kernel_cmdline_sha256: Vec, + #[serde(with = "hex_bytes")] + pub ovmf_hash: Vec, + #[serde(with = "hex_bytes")] + pub kernel_hash: Vec, + #[serde(with = "hex_bytes")] + pub initrd_hash: Vec, pub sev_hashes_table_gpa: u64, pub sev_es_reset_eip: u32, pub ovmf_sections: Vec, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct CborSevOsImageMeasurement { + /// Measured kernel cmdline SHA-256. + #[serde(rename = "cmdline_sha256", with = "hex_bytes")] + kernel_cmdline_sha256: Vec, + /// OVMF launch digest. + #[serde(with = "hex_bytes")] + ovmf_hash: Vec, + /// Kernel SHA-256. + #[serde(with = "hex_bytes")] + kernel_hash: Vec, + /// Initrd SHA-256. + #[serde(with = "hex_bytes")] + initrd_hash: Vec, + /// SEV hash table GPA. + hashes_table_gpa: u64, + /// SEV-ES AP reset EIP. + reset_eip: u32, + /// OVMF metadata sections. + ovmf_sections: Vec, +} + +impl From<&SevOsImageMeasurement> for CborSevOsImageMeasurement { + fn from(measurement: &SevOsImageMeasurement) -> Self { + Self { + kernel_cmdline_sha256: measurement.kernel_cmdline_sha256.clone(), + ovmf_hash: measurement.ovmf_hash.clone(), + kernel_hash: measurement.kernel_hash.clone(), + initrd_hash: measurement.initrd_hash.clone(), + hashes_table_gpa: measurement.sev_hashes_table_gpa, + reset_eip: measurement.sev_es_reset_eip, + ovmf_sections: measurement.ovmf_sections.iter().map(Into::into).collect(), + } + } +} + +impl From for SevOsImageMeasurement { + fn from(measurement: CborSevOsImageMeasurement) -> Self { + Self { + kernel_cmdline_sha256: measurement.kernel_cmdline_sha256, + ovmf_hash: measurement.ovmf_hash, + kernel_hash: measurement.kernel_hash, + initrd_hash: measurement.initrd_hash, + sev_hashes_table_gpa: measurement.hashes_table_gpa, + sev_es_reset_eip: measurement.reset_eip, + ovmf_sections: measurement + .ovmf_sections + .into_iter() + .map(Into::into) + .collect(), + } + } +} + impl SevOsImageMeasurement { - /// SHA-256 over the canonical (JCS) serialization of this projection. + /// CBOR representation used as the `os_image_hash` input. + pub fn to_cbor_vec(&self) -> Vec { + cbor_to_vec( + &CborSevOsImageMeasurement::from(self), + "SevOsImageMeasurement", + ) + } + + pub fn from_cbor_slice(bytes: &[u8]) -> Result { + cbor_from_slice::(bytes, "SevOsImageMeasurement").map(Into::into) + } + + pub fn cbor_json_value_from_slice(bytes: &[u8]) -> Result { + let cbor = cbor_from_slice::(bytes, "SevOsImageMeasurement")?; + serde_json::to_value(cbor) + .map_err(|e| format!("SevOsImageMeasurement: failed to convert CBOR to JSON: {e}")) + } + + /// SHA-256 over the CBOR representation of this projection. + pub fn os_image_hash(&self) -> [u8; 32] { + sha256(&self.to_cbor_vec()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SevOsImageMeasurementDocument { + /// Document schema version. + #[serde(alias = "v")] + pub version: u32, + /// SHA-256 over the CBOR `measurement` bytes. This field is not included in + /// its own hash input. + #[serde(alias = "h")] + pub os_image_hash: String, + /// CBOR bytes for `SevOsImageMeasurement`. + #[serde(alias = "m", with = "hex_bytes")] + pub measurement: Vec, +} + +impl SevOsImageMeasurementDocument { + pub const VERSION: u32 = 2; + + pub fn new(measurement: SevOsImageMeasurement) -> Self { + let measurement = measurement.to_cbor_vec(); + let os_image_hash = sha256_hex(&measurement); + Self { + version: Self::VERSION, + os_image_hash, + measurement, + } + } + + pub fn decode_measurement(&self) -> Result { + SevOsImageMeasurement::from_cbor_slice(&self.measurement) + } + + pub fn decode_measurement_value(&self) -> Result { + SevOsImageMeasurement::cbor_json_value_from_slice(&self.measurement) + } + + pub fn measurement_os_image_hash(&self) -> [u8; 32] { + sha256(&self.measurement) + } +} + +/// Image-invariant projection that determines the TDX OS image identity. +/// +/// This is the build-time, image-static material for the verifier-side +/// no-image-download TDX path. Dynamic VM parameters (vCPU count, RAM size, +/// QEMU PCI topology, GPU count, etc.) are deliberately excluded and must be +/// supplied by `VmConfig` when replaying RTMRs. +/// +/// `os_image_hash` is SHA-256 over the CBOR representation of this projection, +/// not over the outer measurement.json field names. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TdxOsImageMeasurement { + pub image: TdxImageMeasurement, + pub tdvf: TdxTdvfMeasurement, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TdxImageMeasurement { + /// SHA-384 of the exact kernel command line event measured into RTMR[2]. + /// + /// The measured value is the image-provided command line plus OVMF/QEMU's + /// `initrd=initrd` suffix, encoded as UTF-16LE with a trailing NUL. + #[serde(with = "hex_bytes")] + pub kernel_cmdline_sha384: Vec, + /// Authenticode SHA-384 digest of the QEMU-patched kernel image when the + /// guest memory is at or above QEMU's high-memory TDX initrd placement + /// threshold. Below that threshold the patched kernel header depends on the + /// exact guest memory size, so the no-image-download verifier rejects it. + #[serde(with = "hex_bytes")] + pub kernel_authenticode: Vec, + /// SHA-384 of the initrd file bytes. This is the second RTMR[2] event. + #[serde(with = "hex_bytes")] + pub initrd_sha384: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TdxTdvfMeasurement { + /// OVMF RTMR[0] event layout. + pub ovmf_variant: OvmfVariant, + pub mrtd: TdxMrtdCandidates, + /// Compact TdHobWitnessV1 byte string. + #[serde(with = "hex_bytes")] + pub td_hob_witness: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TdxMrtdCandidates { + /// Candidate MRTD for QEMU's single-pass MEM.PAGE.ADD/MR.EXTEND order. + #[serde(with = "hex_bytes")] + pub single_pass: Vec, + /// Candidate MRTD for QEMU's two-pass MEM.PAGE.ADD then MR.EXTEND order. + #[serde(with = "hex_bytes")] + pub two_pass: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct CborTdxImageMeasurement { + /// Measured kernel cmdline SHA-384. + #[serde(rename = "cmdline_sha384", with = "hex_bytes")] + kernel_cmdline_sha384: Vec, + /// QEMU-patched kernel Authenticode SHA-384. + #[serde(with = "hex_bytes")] + kernel_authenticode: Vec, + /// Initrd SHA-384. + #[serde(with = "hex_bytes")] + initrd_sha384: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct CborTdxMrtdCandidates { + #[serde(with = "hex_bytes")] + single_pass: Vec, + #[serde(with = "hex_bytes")] + two_pass: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct CborTdxTdvfMeasurement { + #[serde(rename = "ovmf")] + ovmf_variant: OvmfVariant, + mrtd: CborTdxMrtdCandidates, + #[serde(rename = "td_hob", with = "hex_bytes")] + td_hob_witness: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct CborTdxOsImageMeasurement { + image: CborTdxImageMeasurement, + tdvf: CborTdxTdvfMeasurement, +} + +impl From<&TdxOsImageMeasurement> for CborTdxOsImageMeasurement { + fn from(measurement: &TdxOsImageMeasurement) -> Self { + Self { + image: CborTdxImageMeasurement { + kernel_cmdline_sha384: measurement.image.kernel_cmdline_sha384.clone(), + kernel_authenticode: measurement.image.kernel_authenticode.clone(), + initrd_sha384: measurement.image.initrd_sha384.clone(), + }, + tdvf: CborTdxTdvfMeasurement { + ovmf_variant: measurement.tdvf.ovmf_variant, + mrtd: CborTdxMrtdCandidates { + single_pass: measurement.tdvf.mrtd.single_pass.clone(), + two_pass: measurement.tdvf.mrtd.two_pass.clone(), + }, + td_hob_witness: measurement.tdvf.td_hob_witness.clone(), + }, + } + } +} + +impl From for TdxOsImageMeasurement { + fn from(measurement: CborTdxOsImageMeasurement) -> Self { + Self { + image: TdxImageMeasurement { + kernel_cmdline_sha384: measurement.image.kernel_cmdline_sha384, + kernel_authenticode: measurement.image.kernel_authenticode, + initrd_sha384: measurement.image.initrd_sha384, + }, + tdvf: TdxTdvfMeasurement { + ovmf_variant: measurement.tdvf.ovmf_variant, + mrtd: TdxMrtdCandidates { + single_pass: measurement.tdvf.mrtd.single_pass, + two_pass: measurement.tdvf.mrtd.two_pass, + }, + td_hob_witness: measurement.tdvf.td_hob_witness, + }, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TdxOsImageMeasurementDocument { + /// Document schema version. + #[serde(alias = "v")] + pub version: u32, + /// SHA-256 over the CBOR `measurement` bytes. This field is not included in + /// its own hash input. + #[serde(alias = "h")] + pub os_image_hash: String, + /// CBOR bytes for `TdxOsImageMeasurement`. + #[serde(alias = "m", with = "hex_bytes")] + pub measurement: Vec, +} + +impl TdxOsImageMeasurement { + /// CBOR representation used as the `os_image_hash` input. + pub fn to_cbor_vec(&self) -> Vec { + cbor_to_vec( + &CborTdxOsImageMeasurement::from(self), + "TdxOsImageMeasurement", + ) + } + + pub fn from_cbor_slice(bytes: &[u8]) -> Result { + let cbor = cbor_from_slice::(bytes, "TdxOsImageMeasurement")?; + Ok(cbor.into()) + } + + pub fn cbor_json_value_from_slice(bytes: &[u8]) -> Result { + let cbor = cbor_from_slice::(bytes, "TdxOsImageMeasurement")?; + serde_json::to_value(cbor) + .map_err(|e| format!("TdxOsImageMeasurement: failed to convert CBOR to JSON: {e}")) + } + + /// SHA-256 over the CBOR representation of this projection. pub fn os_image_hash(&self) -> [u8; 32] { - use sha2::{Digest, Sha256}; - // JCS serialization of this plain owned struct (strings/ints/array) - // cannot fail; panic loudly if that invariant is ever broken. - let canonical = serde_jcs::to_vec(self).or_panic("SevOsImageMeasurement JCS serialization"); - Sha256::digest(canonical).into() + sha256(&self.to_cbor_vec()) + } +} + +impl TdxOsImageMeasurementDocument { + pub const VERSION: u32 = 2; + + pub fn new(measurement: TdxOsImageMeasurement) -> Self { + let measurement = measurement.to_cbor_vec(); + let os_image_hash = sha256_hex(&measurement); + Self { + version: Self::VERSION, + os_image_hash, + measurement, + } + } + + pub fn decode_measurement(&self) -> Result { + TdxOsImageMeasurement::from_cbor_slice(&self.measurement) + } + + pub fn decode_measurement_value(&self) -> Result { + TdxOsImageMeasurement::cbor_json_value_from_slice(&self.measurement) + } + + pub fn measurement_os_image_hash(&self) -> [u8; 32] { + sha256(&self.measurement) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct OsImageMeasurementDocument { + /// Document schema version. + #[serde(alias = "v")] + pub version: u32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tdx: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub snp: Option, +} + +impl OsImageMeasurementDocument { + pub const VERSION: u32 = 2; + + pub fn new( + tdx: Option, + snp: Option, + ) -> Self { + Self { + version: Self::VERSION, + tdx, + snp, + } } } diff --git a/gateway/src/config.rs b/gateway/src/config.rs index 68db41c84..e43bf54bd 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -9,6 +9,7 @@ use load_config::load_config; use rocket::figment::Figment; use serde::{Deserialize, Serialize}; use std::net::Ipv4Addr; +use std::path::PathBuf; use std::time::Duration; use tracing::info; @@ -113,6 +114,12 @@ pub struct ProxyConfig { pub connect_top_n: usize, pub localhost_enabled: bool, pub workers: usize, + #[serde(default)] + pub base_domain: Option, + #[serde(default)] + pub cert_chain: Option, + #[serde(default)] + pub cert_key: Option, pub app_address_ns_prefix: String, pub app_address_ns_compat: bool, /// Maximum concurrent connections per app. 0 means unlimited. diff --git a/gateway/src/main_service.rs b/gateway/src/main_service.rs index 74b640a2d..14b0f93dd 100644 --- a/gateway/src/main_service.rs +++ b/gateway/src/main_service.rs @@ -39,8 +39,8 @@ use crate::{ cert_store::{CertResolver, CertStoreBuilder}, config::{Config, TlsConfig}, kv::{ - fetch_peers_from_bootnode, AppIdValidator, HttpsClientConfig, InstanceData, KvStore, - NodeData, NodeStatus, PortPolicy, WaveKvSyncService, + fetch_peers_from_bootnode, AppIdValidator, CertData, HttpsClientConfig, InstanceData, + KvStore, NodeData, NodeStatus, PortPolicy, WaveKvSyncService, }, models::{InstanceInfo, PortPolicyView, WgConf}, proxy::{create_acceptor_with_cert_resolver, AddressGroup, AddressInfo}, @@ -267,6 +267,32 @@ impl ProxyInner { all_cert_data.len() ); } + if let (Some(base_domain), Some(cert_chain), Some(cert_key)) = ( + &config.proxy.base_domain, + &config.proxy.cert_chain, + &config.proxy.cert_key, + ) { + let cert_pem = std::fs::read_to_string(cert_chain).with_context(|| { + format!("failed to read proxy cert_chain {}", cert_chain.display()) + })?; + let key_pem = std::fs::read_to_string(cert_key) + .with_context(|| format!("failed to read proxy cert_key {}", cert_key.display()))?; + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let cert_data = CertData { + cert_pem, + key_pem, + not_after: now + 14 * 24 * 60 * 60, + issued_by: config.sync.node_id, + issued_at: now, + }; + cert_resolver + .update_cert(base_domain, &cert_data) + .with_context(|| format!("failed to load static proxy cert for {base_domain}"))?; + info!("CertStore: loaded static proxy certificate for *.{base_domain}"); + } // Create multi-domain certbot (uses KvStore configs for DNS credentials and domains) let certbot = Arc::new(DistributedCertBot::new( diff --git a/verifier/src/verification.rs b/verifier/src/verification.rs index 49326d30c..43b9be3ff 100644 --- a/verifier/src/verification.rs +++ b/verifier/src/verification.rs @@ -10,7 +10,9 @@ use std::{ use anyhow::{anyhow, bail, Context, Result}; use cc_eventlog::TdxEvent; -use dstack_mr::{RtmrLog, TdxMeasurementDetails, TdxMeasurements}; +use dstack_mr::{ + tdx::TdxRtmr0AcpiHashes, RtmrLog, RtmrLogs, TdxMeasurementDetails, TdxMeasurements, +}; use dstack_types::VmConfig; use hex_literal::hex; use ra_tls::attestation::{ @@ -149,6 +151,7 @@ struct CachedMeasurement { } struct ImagePaths { + image_dir: PathBuf, fw_path: PathBuf, kernel_path: PathBuf, initrd_path: PathBuf, @@ -359,6 +362,68 @@ impl CvmVerifier { Ok(measurements) } + fn image_content_digest(image_dir: &Path) -> Result>> { + let sha256sum_path = image_dir.join("sha256sum.txt"); + if !sha256sum_path.exists() { + return Ok(None); + } + let files_doc = + fs_err::read_to_string(&sha256sum_path).context("Failed to read sha256sum.txt")?; + Ok(Some( + Sha256::new_with_prefix(files_doc.as_bytes()) + .finalize() + .to_vec(), + )) + } + + fn image_hash_matches_legacy_digest(image_dir: &Path, expected: &[u8]) -> Result { + Ok(Self::image_content_digest(image_dir)? + .as_deref() + .is_some_and(|digest| digest == expected)) + } + + fn tdx_acpi_hashes_from_event_log( + ovmf_variant: dstack_types::OvmfVariant, + event_log: &[TdxEvent], + ) -> Result { + let rtmr0_events = event_log + .iter() + .filter(|event| event.imr == 0) + .collect::>(); + let (loader_idx, rsdp_idx, tables_idx) = match ovmf_variant { + dstack_types::OvmfVariant::Pre202505 => (8, 9, 10), + dstack_types::OvmfVariant::Stable202505 => (10, 11, 12), + }; + let (loader_idx, rsdp_idx, tables_idx) = if rtmr0_events.len() == 3 { + (0, 1, 2) + } else { + (loader_idx, rsdp_idx, tables_idx) + }; + let get_digest = |idx: usize, label: &str| -> Result> { + let event = rtmr0_events.get(idx).with_context(|| { + format!( + "TDX RTMR0 event log is missing {label} event at index {idx} \ + for {:?} OVMF; found {} RTMR0 events", + ovmf_variant, + rtmr0_events.len() + ) + })?; + let digest = event.digest(); + if digest.len() != 48 { + bail!( + "TDX RTMR0 {label} event digest has invalid length {}, expected 48", + digest.len() + ); + } + Ok(digest) + }; + Ok(TdxRtmr0AcpiHashes { + loader: get_digest(loader_idx, "ACPI loader")?, + rsdp: get_digest(rsdp_idx, "ACPI RSDP")?, + tables: get_digest(tables_idx, "ACPI tables")?, + }) + } + /// Helper method to ensure image is downloaded and return image paths async fn ensure_image_downloaded(&self, vm_config: &VmConfig) -> Result { let hex_os_image_hash = hex::encode(&vm_config.os_image_hash); @@ -391,6 +456,7 @@ impl CvmVerifier { let kernel_cmdline = image_info.cmdline + " initrd=initrd"; Ok(ImagePaths { + image_dir, fw_path, kernel_path, initrd_path, @@ -526,8 +592,23 @@ impl CvmVerifier { .await?; } AttestationQuote::DstackTdx(_) => { - self.verify_os_image_hash_for_dstack_tdx(&vm_config, attestation, debug, details) + if vm_config.tdx_attestation_variant.is_measurement() { + self.verify_os_image_hash_for_dstack_tdx_measurement( + &vm_config, + attestation, + debug, + details, + ) + .await?; + } else { + self.verify_os_image_hash_for_dstack_tdx( + &vm_config, + attestation, + debug, + details, + ) .await?; + } } AttestationQuote::DstackNitroEnclave(_) => { let DstackVerifiedReport::DstackNitroEnclave(report) = &attestation.report else { @@ -596,13 +677,11 @@ impl CvmVerifier { bail!("No TDX quote"); }; let event_log = &tdx_quote.event_log; - // Get boot info from attestation let report = report .report .as_td10() .context("Failed to decode TD report")?; - // Extract the verified MRs from the report let verified_mrs = Mrs { mrtd: report.mr_td.to_vec(), rtmr0: report.rt_mr0.to_vec(), @@ -610,16 +689,21 @@ impl CvmVerifier { rtmr2: report.rt_mr2.to_vec(), }; - // one download serves both measurement computation and the dev/version flags + // Legacy TDX attestation keeps the original KMS verifier semantics: + // os_image_hash must be the image content digest, and expected MRs are + // recomputed through the existing full-image path. let image_paths = self.ensure_image_downloaded(vm_config).await?; + if !Self::image_hash_matches_legacy_digest(&image_paths.image_dir, &vm_config.os_image_hash) + .context("Failed to check legacy image digest")? + { + bail!("legacy TDX attestation requires the digest.txt os_image_hash"); + } details.os_image_is_dev = Some(image_paths.is_dev); if !image_paths.version.is_empty() { details.os_image_version = Some(image_paths.version.clone()); } - // Compute expected measurements let (mrs, expected_logs) = if debug { - // For debug mode, we need detailed logs and ACPI tables let TdxMeasurementDetails { measurements, rtmr_logs, @@ -642,7 +726,6 @@ impl CvmVerifier { (measurements, Some(rtmr_logs)) } else { - // For non-debug mode, use the cached-measurement path. ( self.load_or_compute_measurements( vm_config, @@ -656,6 +739,100 @@ impl CvmVerifier { ) }; + self.compare_tdx_mrs( + Mrs { + mrtd: mrs.mrtd, + rtmr0: mrs.rtmr0, + rtmr1: mrs.rtmr1, + rtmr2: mrs.rtmr2, + }, + verified_mrs, + expected_logs.as_ref(), + event_log, + debug, + details, + ) + } + + async fn verify_os_image_hash_for_dstack_tdx_measurement( + &self, + vm_config: &VmConfig, + attestation: &VerifiedAttestation, + debug: bool, + _details: &mut VerificationDetails, + ) -> Result<()> { + let Some(report) = &attestation.report.tdx_report() else { + bail!("No TDX report"); + }; + let Some(tdx_quote) = attestation.tdx_quote() else { + bail!("No TDX quote"); + }; + let event_log = &tdx_quote.event_log; + // Get boot info from attestation + let report = report + .report + .as_td10() + .context("Failed to decode TD report")?; + + // Extract the verified MRs from the report + let verified_mrs = Mrs { + mrtd: report.mr_td.to_vec(), + rtmr0: report.rt_mr0.to_vec(), + rtmr1: report.rt_mr1.to_vec(), + rtmr2: report.rt_mr2.to_vec(), + }; + + let document = vm_config + .tdx_measurement + .as_ref() + .context("tdx measurement attestation requires vm_config.tdx_measurement")?; + let document_hash = hex::decode(&document.os_image_hash) + .context("vm_config.tdx_measurement.os_image_hash is not valid hex")?; + if document_hash != vm_config.os_image_hash { + bail!( + "tdx measurement os_image_hash mismatch: vm_config={}, document={}", + hex::encode(&vm_config.os_image_hash), + document.os_image_hash + ); + } + let computed_hash = document.measurement_os_image_hash(); + if computed_hash.as_slice() != vm_config.os_image_hash { + bail!( + "tdx measurement document hash mismatch: vm_config={}, computed={}", + hex::encode(&vm_config.os_image_hash), + hex::encode(computed_hash) + ); + } + let measurement = document + .decode_measurement() + .map_err(anyhow::Error::msg) + .context("failed to decode vm_config.tdx_measurement CBOR")?; + if let Some(config_ovmf_variant) = vm_config.ovmf_variant { + if config_ovmf_variant != measurement.tdvf.ovmf_variant { + bail!( + "tdx measurement ovmf_variant mismatch: vm_config={:?}, document={:?}", + config_ovmf_variant, + measurement.tdvf.ovmf_variant + ); + } + } + + // Compute expected measurements. New TDX images advertise the + // measurement.json-derived TDX os_image_hash; verify those without + // downloading the image or running QEMU-derived ACPI table generators. + // The stripped certificate carries just the three hardware-bound RTMR0 + // ACPI digests (loader, RSDP, tables); every other RTMR0 digest is + // derived locally from vm_config + tdx_measurement. + let acpi_hashes = + Self::tdx_acpi_hashes_from_event_log(measurement.tdvf.ovmf_variant, event_log) + .context("TDX measurement attestation is missing RTMR0 ACPI table digests")?; + let mrs = dstack_mr::tdx::tdx_measurements_from_measurement_document( + document, + vm_config, + &acpi_hashes, + ) + .context("Failed to compute TDX expected measurements without image download")?; + let expected_mrs = Mrs { mrtd: mrs.mrtd.clone(), rtmr0: mrs.rtmr0.clone(), @@ -670,7 +847,28 @@ impl CvmVerifier { if !debug { return result; } - let Some(expected_logs) = expected_logs.as_ref() else { + result + } + } + } + + fn compare_tdx_mrs( + &self, + expected_mrs: Mrs, + verified_mrs: Mrs, + expected_logs: Option<&RtmrLogs>, + event_log: &[TdxEvent], + debug: bool, + details: &mut VerificationDetails, + ) -> Result<()> { + match expected_mrs.assert_eq(&verified_mrs) { + Ok(()) => Ok(()), + Err(e) => { + let result = Err(e).context("MRs do not match"); + if !debug { + return result; + } + let Some(expected_logs) = expected_logs else { return result; }; let mut rtmr_debug = Vec::new(); @@ -894,10 +1092,24 @@ impl CvmVerifier { } } - // os_image_hash should eq to sha256sum of the sha256sum.txt - let os_image_hash = Sha256::new_with_prefix(files_doc.as_bytes()).finalize(); - if hex::encode(os_image_hash) != hex_os_image_hash { - bail!("os_image_hash does not match sha256sum of the sha256sum.txt"); + // Legacy images use sha256(sha256sum.txt) as os_image_hash. Newer + // TDX/SNP images may instead be addressed by measurement.json-derived + // hashes, so accept those too after recomputing them from extracted + // image files. + let legacy_os_image_hash = Sha256::new_with_prefix(files_doc.as_bytes()).finalize(); + let mut image_hash_matches = hex::encode(legacy_os_image_hash) == hex_os_image_hash; + if !image_hash_matches { + image_hash_matches = dstack_mr::tdx::tdx_os_image_hash_for_image_dir(&extracted_dir) + .map(|hash| hex::encode(hash) == hex_os_image_hash) + .unwrap_or(false) + || dstack_mr::sev::sev_os_image_hash_for_image_dir(&extracted_dir) + .map(|hash| hex::encode(hash) == hex_os_image_hash) + .unwrap_or(false); + } + if !image_hash_matches { + bail!( + "os_image_hash matches neither sha256sum.txt nor measurement.json-derived hashes" + ); } // Move the extracted files to the destination directory diff --git a/vmm/src/app.rs b/vmm/src/app.rs index fa21297a0..1510851f0 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -1344,17 +1344,30 @@ fn make_vm_config( ) -> Result { let is_amd_sev_snp = cfg.cvm.resolved_platform() == crate::config::TeePlatform::AmdSevSnp && !manifest.no_tee; + let is_tdx = cfg.cvm.resolved_platform() == crate::config::TeePlatform::Tdx && !manifest.no_tee; + let tdx_attestation_variant = if is_tdx { + cfg.cvm.tdx_attestation_variant + } else { + dstack_types::TdxAttestationVariant::Legacy + }; // AMD SEV-SNP binds the OS image through the launch-measurement-derived - // os_image_hash, computed at image build time by `dstack-mr sev-os-image-hash` - // and shipped as `digest.sev.txt` (the same value KMS/verifier derive from a - // verified launch measurement). The VMM reads it from the image rather than - // recomputing it; TDX still uses the generic content digest. + // os_image_hash, computed at image build time and shipped in + // `measurement.json.snp.os_image_hash` (legacy images used `digest.sev.txt`). TDX keeps + // using the generic content digest unless the + // operator explicitly opts into the measurement attestation variant. let os_image_hash = if is_amd_sev_snp { let digest = image.sev_digest.as_deref().context( - "amd sev-snp image is missing digest.sev.txt; \ - rebuild the image so `dstack-mr sev-os-image-hash` emits it", + "amd sev-snp image is missing measurement.json SNP hash; \ + rebuild the image so `dstack-mr os-image-measurement` emits it", )?; - hex::decode(digest).context("digest.sev.txt is not valid hex")? + hex::decode(digest).context("SNP os_image_hash is not valid hex")? + } else if tdx_attestation_variant.is_measurement() { + let digest = image.tdx_digest.as_deref().context( + "tdx measurement attestation requested but image is missing \ + measurement.json TDX hash; rebuild the image so \ + `dstack-mr os-image-measurement` emits it", + )?; + hex::decode(digest).context("TDX os_image_hash is not valid hex")? } else { image .digest @@ -1362,6 +1375,14 @@ fn make_vm_config( .and_then(|d| hex::decode(d).ok()) .unwrap_or_default() }; + let tdx_measurement = if tdx_attestation_variant.is_measurement() { + Some(image.tdx_measurement.clone().context( + "tdx measurement attestation requested but image is missing \ + measurement.json TDX measurement material", + )?) + } else { + None + }; let gpus = if cfg.cvm.gpu.enabled { manifest.gpus.clone().unwrap_or_default() } else { @@ -1383,6 +1404,8 @@ fn make_vm_config( hotplug_off: cfg.cvm.qemu_hotplug_off, image: Some(manifest.image.clone()), ovmf_variant: image.info.ovmf_variant, + tdx_attestation_variant, + tdx_measurement, })?; // For backward compatibility config["spec_version"] = serde_json::Value::from(1); @@ -1580,11 +1603,19 @@ mod tests { ) .to_canonical_json(); - // digest.sev.txt is produced at build time by the `dstack-mr - // sev-os-image-hash` command; the VMM reads it instead of recomputing. + // measurement.json is produced at build time by the `dstack-mr + // os-image-measurement` command; the VMM reads it instead of recomputing. // Emit it here so the deploy path (make_vm_config) can read it back. - let build_hash = dstack_mr::sev::sev_os_image_hash_for_image_dir(&image_dir)?; - fs::write(image_dir.join("digest.sev.txt"), hex::encode(build_hash))?; + let snp_document = + dstack_mr::sev::sev_os_image_measurement_document_for_image_dir(&image_dir)?; + let build_hash = + hex::decode(&snp_document.os_image_hash).context("snp os_image_hash must be hex")?; + let measurement_document = + dstack_types::OsImageMeasurementDocument::new(None, Some(snp_document)); + fs::write( + image_dir.join("measurement.json"), + serde_json::to_string(&measurement_document)?, + )?; let sys_config_document = make_sys_config(&config, &manifest, &compose_hash, Some(mr_config))?; @@ -1607,13 +1638,13 @@ mod tests { assert_eq!(parsed_mr_config.compose_hash, vec![0x22; 32]); assert_eq!(vm_config["mr_config"], sys_config["mr_config"]); // The deploy path must surface the os_image_hash straight from - // digest.sev.txt (not recompute it). + // measurement.json (not recompute it). assert_eq!( vm_config["os_image_hash"] .as_str() .context("os_image_hash must be a string")?, - hex::encode(build_hash), - "vm_config os_image_hash must come from digest.sev.txt" + hex::encode(&build_hash), + "vm_config os_image_hash must come from measurement.json" ); assert!(measurement.get("app_id").is_none()); assert!(measurement.get("compose_hash").is_none()); @@ -1650,18 +1681,24 @@ mod tests { 4 ); - // The build-time os_image_hash (dstack-mr sev-os-image-hash -> - // digest.sev.txt) must equal the os_image_hash a verifier derives from + // The build-time os_image_hash (measurement.json.snp.os_image_hash) must + // equal the os_image_hash a verifier derives from // the launch measurement document, i.e. the image-invariant projection. - let as_str = |v: &serde_json::Value| v.as_str().unwrap().to_string(); - let rootfs_hash = - dstack_mr::sev::rootfs_hash_from_cmdline(measurement["base_cmdline"].as_str())?; + let as_bytes = |v: &serde_json::Value| hex::decode(v.as_str().unwrap()).unwrap(); + dstack_mr::sev::rootfs_hash_from_cmdline(measurement["base_cmdline"].as_str())?; let projected = dstack_types::SevOsImageMeasurement { - rootfs_hash, - base_cmdline: measurement["base_cmdline"].as_str().map(str::to_string), - ovmf_hash: as_str(&measurement["ovmf_hash"]), - kernel_hash: as_str(&measurement["kernel_hash"]), - initrd_hash: as_str(&measurement["initrd_hash"]), + kernel_cmdline_sha256: { + let mut cmdline = measurement["base_cmdline"] + .as_str() + .unwrap() + .as_bytes() + .to_vec(); + cmdline.push(0); + Sha256::digest(&cmdline).to_vec() + }, + ovmf_hash: as_bytes(&measurement["ovmf_hash"]), + kernel_hash: as_bytes(&measurement["kernel_hash"]), + initrd_hash: as_bytes(&measurement["initrd_hash"]), sev_hashes_table_gpa: measurement["sev_hashes_table_gpa"].as_u64().unwrap(), sev_es_reset_eip: measurement["sev_es_reset_eip"].as_u64().unwrap() as u32, ovmf_sections: measurement["ovmf_sections"] @@ -1677,8 +1714,8 @@ mod tests { }; assert_eq!( build_hash, - projected.os_image_hash(), - "digest.sev.txt must match the os_image_hash derived from the launch measurement" + projected.os_image_hash().to_vec(), + "measurement.json SNP hash must match the os_image_hash derived from the launch measurement" ); Ok(()) } diff --git a/vmm/src/app/image.rs b/vmm/src/app/image.rs index c8e7d255d..f7bdb2e7f 100644 --- a/vmm/src/app/image.rs +++ b/vmm/src/app/image.rs @@ -7,6 +7,7 @@ use path_absolutize::Absolutize; use std::path::{Path, PathBuf}; use anyhow::{bail, Context, Result}; +use dstack_types::{OsImageMeasurementDocument, TdxOsImageMeasurementDocument}; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] @@ -71,9 +72,12 @@ pub struct Image { pub bios: Option, pub bios_sev: Option, pub digest: Option, - /// AMD SEV-SNP os_image_hash, read from `digest.sev.txt` (produced at image - /// build time by `dstack-mr sev-os-image-hash`). The VMM does not recompute - /// it; the deploy path reads this value directly. + /// TDX os_image_hash, read from `measurement.json.tdx.os_image_hash`. + pub tdx_digest: Option, + /// TDX no-image-download measurement material, read from `measurement.json.tdx`. + pub tdx_measurement: Option, + /// AMD SEV-SNP os_image_hash, read from `measurement.json.snp.os_image_hash` + /// for new images, falling back to legacy `digest.sev.txt`. pub sev_digest: Option, } @@ -103,10 +107,31 @@ impl Image { let digest = fs::read_to_string(base_path.join("digest.txt")) .ok() .map(|s| s.trim().to_string()); - let sev_digest = fs::read_to_string(base_path.join("digest.sev.txt")) + let measurement_path = base_path.join("measurement.json"); + let measurement = if measurement_path.exists() { + let file = fs::File::open(&measurement_path) + .with_context(|| format!("failed to open {}", measurement_path.display()))?; + Some( + serde_json::from_reader::<_, OsImageMeasurementDocument>(file) + .with_context(|| format!("failed to parse {}", measurement_path.display()))?, + ) + } else { + None + }; + let legacy_sev_digest = fs::read_to_string(base_path.join("digest.sev.txt")) .ok() .map(|s| s.trim().to_string()) .filter(|s| !s.is_empty()); + let sev_digest = measurement + .as_ref() + .and_then(|m| m.snp.as_ref()) + .map(|snp| snp.os_image_hash.clone()) + .or(legacy_sev_digest); + let tdx_digest = measurement + .as_ref() + .and_then(|m| m.tdx.as_ref()) + .map(|tdx| tdx.os_image_hash.clone()); + let tdx_measurement = measurement.as_ref().and_then(|m| m.tdx.clone()); if info.version.is_empty() { // Older images does not have version field. Fallback to the version of the image folder name info.version = guess_version(&base_path).unwrap_or_default(); @@ -120,6 +145,8 @@ impl Image { bios, bios_sev, digest, + tdx_digest, + tdx_measurement, sev_digest, } .ensure_exists() diff --git a/vmm/src/config.rs b/vmm/src/config.rs index b0b234a29..523b56edf 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -10,6 +10,7 @@ use path_absolutize::Absolutize; use rocket::figment::Figment; use serde::{Deserialize, Serialize}; +use dstack_types::TdxAttestationVariant; use lspci::{lspci_filtered, Device}; use tracing::{info, warn}; @@ -260,6 +261,12 @@ pub struct CvmConfig { /// QEMU hotplug_off pub qemu_hotplug_off: bool, + /// TDX attestation/hash scheme. `legacy` keeps the existing digest.txt + + /// dstack-acpi-tables verifier path; `measurement` opts into the + /// measurement.json + no-QEMU verifier path. + #[serde(default)] + pub tdx_attestation_variant: TdxAttestationVariant, + /// Networking configuration pub networking: Networking, diff --git a/vmm/vmm.toml b/vmm/vmm.toml index 73d8c124a..6487502d1 100644 --- a/vmm/vmm.toml +++ b/vmm/vmm.toml @@ -45,6 +45,9 @@ use_mrconfigid = true #qemu_version = "" qemu_pci_hole64_size = 0 qemu_hotplug_off = false +# TDX attestation/hash scheme: "legacy" (digest.txt + legacy verifier) or +# "measurement" (measurement.json.tdx.os_image_hash + no-QEMU verifier). +tdx_attestation_variant = "legacy" host_share_mode = "9p"