diff --git a/architecture/security-policy.md b/architecture/security-policy.md index b0d56e3a2..ad9de7afa 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -21,6 +21,15 @@ For the field-by-field YAML reference, use Filesystem and process policy are startup-time controls. Network policy is dynamic and can be hot-reloaded when the new policy validates successfully. +The sandbox supervisor also injects runtime baseline filesystem paths before +the child process starts. Proxy mode adds the standard read-only system paths +and writable work paths needed by the proxy and shell environment. GPU runtimes +add the NVIDIA or WSL2 device nodes exposed inside the sandbox and promote +`/proc` to read-write for default-like policies because CUDA initialization +writes `/proc//task//comm`. Custom policies that explicitly keep a +GPU-required path read-only fail at startup with an actionable diagnostic +instead of being silently widened. + ## Network Decisions Ordinary network traffic follows this order: diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index ded56ce9e..d49d4903e 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -1450,41 +1450,136 @@ fn enumerate_gpu_device_nodes() -> Vec { paths } -/// Collect all baseline paths for enrichment: proxy defaults + GPU (if present). +#[derive(Debug, Clone)] +struct BaselineEnrichmentPaths { + read_only: Vec, + read_write: Vec, + gpu_read_write: Vec, +} + +impl BaselineEnrichmentPaths { + fn is_empty(&self) -> bool { + self.read_only.is_empty() && self.read_write.is_empty() + } + + fn is_gpu_read_write(&self, path: &str) -> bool { + self.gpu_read_write.iter().any(|p| p == path) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BaselinePolicySource { + DefaultLike, + Custom, +} + +fn push_unique(paths: &mut Vec, path: String) { + if !paths.iter().any(|p| p == &path) { + paths.push(path); + } +} + +fn collect_baseline_enrichment_paths( + include_proxy: bool, + include_gpu: bool, + gpu_device_nodes: Vec, +) -> BaselineEnrichmentPaths { + let mut read_only = Vec::new(); + let mut read_write = Vec::new(); + let mut gpu_read_write = Vec::new(); + + if include_proxy { + for &path in PROXY_BASELINE_READ_ONLY { + push_unique(&mut read_only, path.to_string()); + } + for &path in PROXY_BASELINE_READ_WRITE { + push_unique(&mut read_write, path.to_string()); + } + } + + if include_gpu { + for &path in GPU_BASELINE_READ_ONLY { + push_unique(&mut read_only, path.to_string()); + } + for &path in GPU_BASELINE_READ_WRITE { + push_unique(&mut read_write, path.to_string()); + push_unique(&mut gpu_read_write, path.to_string()); + } + for path in gpu_device_nodes { + push_unique(&mut read_write, path.clone()); + push_unique(&mut gpu_read_write, path); + } + } + + // A path promoted to read_write (e.g. /proc for GPU) should not also + // appear in read_only. Landlock handles the overlap correctly, but the + // duplicate obscures the effective policy when operators inspect it. + read_only.retain(|p| !read_write.contains(p)); + + BaselineEnrichmentPaths { + read_only, + read_write, + gpu_read_write, + } +} + +fn active_baseline_enrichment_paths(include_proxy: bool) -> BaselineEnrichmentPaths { + let include_gpu = has_gpu_devices(); + let gpu_device_nodes = if include_gpu { + enumerate_gpu_device_nodes() + } else { + Vec::new() + }; + collect_baseline_enrichment_paths(include_proxy, include_gpu, gpu_device_nodes) +} + +/// Collect all active baseline paths for tests and diagnostics. /// Returns `(read_only, read_write)` as owned `String` vecs. +#[cfg(test)] fn baseline_enrichment_paths() -> (Vec, Vec) { - let mut ro: Vec = PROXY_BASELINE_READ_ONLY - .iter() - .map(|&s| s.to_string()) - .collect(); - let mut rw: Vec = PROXY_BASELINE_READ_WRITE - .iter() - .map(|&s| s.to_string()) - .collect(); + let paths = active_baseline_enrichment_paths(true); + (paths.read_only, paths.read_write) +} - if has_gpu_devices() { - ro.extend(GPU_BASELINE_READ_ONLY.iter().map(|&s| s.to_string())); - rw.extend(GPU_BASELINE_READ_WRITE.iter().map(|&s| s.to_string())); - rw.extend(enumerate_gpu_device_nodes()); +fn proto_policy_source(proto: &openshell_core::proto::SandboxPolicy) -> BaselinePolicySource { + if proto == &openshell_policy::restrictive_default_policy() { + BaselinePolicySource::DefaultLike + } else { + BaselinePolicySource::Custom } +} - // A path promoted to read_write (e.g. /proc for GPU) should not also - // appear in read_only — Landlock handles the overlap correctly but the - // duplicate is confusing when inspecting the effective policy. - ro.retain(|p| !rw.contains(p)); +fn gpu_read_write_conflict(path: &str) -> miette::Report { + miette::miette!( + "GPU sandbox policy keeps required path '{path}' read-only. \ + CUDA workloads require this path to be read-write under the current Landlock runtime. \ + Move '{path}' from filesystem_policy.read_only to filesystem_policy.read_write, \ + or omit the custom filesystem policy to use OpenShell's GPU default." + ) +} - (ro, rw) +fn emit_baseline_enriched_event() { + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enriched") + .message("Enriched policy with baseline filesystem paths") + .build() + ); } -/// Ensure a proto `SandboxPolicy` includes the baseline filesystem paths -/// required for proxy-mode sandboxes. Paths are only added if missing; -/// user-specified paths are never removed. -/// -/// Returns `true` if the policy was modified (caller may want to sync back). -fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) -> bool { - // Only enrich if network_policies are present (proxy mode indicator). - if proto.network_policies.is_empty() { - return false; +fn enrich_proto_baseline_paths_with( + proto: &mut openshell_core::proto::SandboxPolicy, + paths: &BaselineEnrichmentPaths, + source: BaselinePolicySource, + path_exists: F, +) -> Result +where + F: Fn(&str) -> bool, +{ + if paths.is_empty() { + return Ok(false); } let fs = proto @@ -1494,107 +1589,170 @@ fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) ..Default::default() }); - let (ro, rw) = baseline_enrichment_paths(); - - // Baseline paths are system-injected, not user-specified. Skip paths - // that do not exist in this container image to avoid noisy warnings from - // Landlock and, more critically, to prevent a single missing baseline - // path from abandoning the entire Landlock ruleset under best-effort - // mode (see issue #664). + // Baseline paths are system-injected. Skip paths that do not exist in + // this container image to avoid noisy warnings from Landlock and, more + // critically, to prevent a single missing baseline path from abandoning + // the entire Landlock ruleset under best-effort mode (see issue #664). let mut modified = false; - for path in &ro { - if !fs.read_only.iter().any(|p| p == path) && !fs.read_write.iter().any(|p| p == path) { - if !std::path::Path::new(path).exists() { - debug!( - path, - "Baseline read-only path does not exist, skipping enrichment" - ); - continue; - } - fs.read_only.push(path.clone()); - modified = true; - } - } - for path in &rw { + for path in &paths.read_only { if fs.read_only.iter().any(|p| p == path) || fs.read_write.iter().any(|p| p == path) { continue; } - if !std::path::Path::new(path).exists() { + if !path_exists(path) { + debug!( + path, + "Baseline read-only path does not exist, skipping enrichment" + ); + continue; + } + fs.read_only.push(path.clone()); + modified = true; + } + for path in &paths.read_write { + if !path_exists(path) { debug!( path, "Baseline read-write path does not exist, skipping enrichment" ); continue; } + + if fs.read_write.iter().any(|p| p == path) { + if source == BaselinePolicySource::DefaultLike && paths.is_gpu_read_write(path) { + let before = fs.read_only.len(); + fs.read_only.retain(|p| p != path); + modified |= fs.read_only.len() != before; + } + continue; + } + + if fs.read_only.iter().any(|p| p == path) { + if paths.is_gpu_read_write(path) { + if source == BaselinePolicySource::Custom { + return Err(gpu_read_write_conflict(path)); + } + fs.read_only.retain(|p| p != path); + fs.read_write.push(path.clone()); + modified = true; + } + continue; + } + fs.read_write.push(path.clone()); modified = true; } if modified { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Informational) - .status(StatusId::Success) - .state(StateId::Enabled, "enriched") - .message("Enriched policy with baseline filesystem paths for proxy mode") - .build() - ); + emit_baseline_enriched_event(); } - modified + Ok(modified) } -/// Ensure a `SandboxPolicy` (Rust type) includes the baseline filesystem -/// paths required for proxy-mode sandboxes. Used for the local-file code -/// path where no proto is available. -fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) { - if !matches!(policy.network.mode, NetworkMode::Proxy) { - return; - } +/// Ensure a proto `SandboxPolicy` includes the active baseline filesystem +/// paths required by proxy mode and GPU runtimes. +/// +/// Returns `true` if the policy was modified (caller may want to sync back). +fn enrich_proto_baseline_paths_from_source( + proto: &mut openshell_core::proto::SandboxPolicy, + source: BaselinePolicySource, +) -> Result { + let include_proxy = !proto.network_policies.is_empty(); + let paths = active_baseline_enrichment_paths(include_proxy); + enrich_proto_baseline_paths_with(proto, &paths, source, |path| { + std::path::Path::new(path).exists() + }) +} - let (ro, rw) = baseline_enrichment_paths(); +fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) -> Result { + enrich_proto_baseline_paths_from_source(proto, proto_policy_source(proto)) +} - let mut modified = false; - for path in &ro { - let p = std::path::PathBuf::from(path); - if !policy.filesystem.read_only.contains(&p) && !policy.filesystem.read_write.contains(&p) { - if !p.exists() { - debug!( - path, - "Baseline read-only path does not exist, skipping enrichment" - ); - continue; - } - policy.filesystem.read_only.push(p); - modified = true; - } +fn enrich_sandbox_baseline_paths_with( + policy: &mut SandboxPolicy, + paths: &BaselineEnrichmentPaths, + source: BaselinePolicySource, + path_exists: F, +) -> Result +where + F: Fn(&str) -> bool, +{ + if paths.is_empty() { + return Ok(false); } - for path in &rw { + + let mut modified = false; + for path in &paths.read_only { let p = std::path::PathBuf::from(path); if policy.filesystem.read_only.contains(&p) || policy.filesystem.read_write.contains(&p) { continue; } - if !p.exists() { + if !path_exists(path) { + debug!( + path, + "Baseline read-only path does not exist, skipping enrichment" + ); + continue; + } + policy.filesystem.read_only.push(p); + modified = true; + } + for path in &paths.read_write { + let p = std::path::PathBuf::from(path); + if !path_exists(path) { debug!( path, "Baseline read-write path does not exist, skipping enrichment" ); continue; } + + if policy.filesystem.read_write.contains(&p) { + if source == BaselinePolicySource::DefaultLike && paths.is_gpu_read_write(path) { + let before = policy.filesystem.read_only.len(); + policy + .filesystem + .read_only + .retain(|existing| existing != &p); + modified |= policy.filesystem.read_only.len() != before; + } + continue; + } + + if policy.filesystem.read_only.contains(&p) { + if paths.is_gpu_read_write(path) { + if source == BaselinePolicySource::Custom { + return Err(gpu_read_write_conflict(path)); + } + policy + .filesystem + .read_only + .retain(|existing| existing != &p); + policy.filesystem.read_write.push(p); + modified = true; + } + continue; + } + policy.filesystem.read_write.push(p); modified = true; } if modified { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Informational) - .status(StatusId::Success) - .state(StateId::Enabled, "enriched") - .message("Enriched policy with baseline filesystem paths for proxy mode") - .build() - ); + emit_baseline_enriched_event(); } + + Ok(modified) +} + +/// Ensure a `SandboxPolicy` (Rust type) includes active baseline filesystem +/// paths. Used for the local-file code path where no proto is available. +fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) -> Result { + let include_proxy = matches!(policy.network.mode, NetworkMode::Proxy); + let paths = active_baseline_enrichment_paths(include_proxy); + enrich_sandbox_baseline_paths_with(policy, &paths, BaselinePolicySource::Custom, |path| { + std::path::Path::new(path).exists() + }) } #[cfg(test)] @@ -1695,7 +1853,7 @@ mod baseline_tests { }, ); - enrich_proto_baseline_paths(&mut policy); + enrich_proto_baseline_paths(&mut policy).expect("baseline enrichment should succeed"); let filesystem = policy.filesystem.expect("filesystem policy"); assert!( @@ -1708,6 +1866,121 @@ mod baseline_tests { ); } + #[test] + fn proto_gpu_enrichment_promotes_default_proc_without_network_policy() { + let mut policy = openshell_policy::restrictive_default_policy(); + assert!( + policy.network_policies.is_empty(), + "regression setup must exercise the no-network default path" + ); + + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + let enriched = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::DefaultLike, + |_| true, + ) + .expect("default GPU enrichment should succeed"); + + let filesystem = policy.filesystem.expect("filesystem policy"); + assert!(enriched, "default GPU enrichment should modify policy"); + assert!( + !filesystem.read_only.contains(&"/proc".to_string()), + "default GPU enrichment should remove /proc from read_only" + ); + assert!( + filesystem.read_write.contains(&"/proc".to_string()), + "default GPU enrichment should promote /proc to read_write" + ); + assert!( + filesystem.read_write.contains(&"/dev/nvidia0".to_string()), + "default GPU enrichment should add enumerated GPU devices" + ); + } + + #[test] + fn proto_gpu_enrichment_promotes_discovered_image_policy_proc() { + let mut policy = openshell_policy::restrictive_default_policy(); + policy.network_policies.insert( + "image-policy".into(), + openshell_core::proto::NetworkPolicyRule { + name: "image-policy".into(), + endpoints: vec![openshell_core::proto::NetworkEndpoint { + host: "example.com".into(), + port: 443, + ..Default::default() + }], + ..Default::default() + }, + ); + + let paths = collect_baseline_enrichment_paths(true, true, vec!["/dev/nvidia0".to_string()]); + let enriched = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::DefaultLike, + |_| true, + ) + .expect("discovered image policy GPU enrichment should succeed"); + + let filesystem = policy.filesystem.expect("filesystem policy"); + assert!(enriched, "discovered image policy should be enriched"); + assert!( + !filesystem.read_only.contains(&"/proc".to_string()), + "image-discovered GPU policy should remove /proc from read_only" + ); + assert!( + filesystem.read_write.contains(&"/proc".to_string()), + "image-discovered GPU policy should promote /proc to read_write" + ); + } + + #[test] + fn proto_gpu_enrichment_adds_missing_custom_gpu_paths() { + let mut policy = openshell_policy::restrictive_default_policy(); + policy.filesystem = Some(openshell_core::proto::FilesystemPolicy { + read_only: vec![], + read_write: vec![], + include_workdir: true, + }); + + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + let enriched = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::Custom, + |path| matches!(path, "/proc" | "/dev/nvidia0"), + ) + .expect("custom GPU enrichment should add omitted baseline paths"); + + let filesystem = policy.filesystem.expect("filesystem policy"); + assert!(enriched, "custom GPU enrichment should modify policy"); + assert!(filesystem.read_write.contains(&"/proc".to_string())); + assert!(filesystem.read_write.contains(&"/dev/nvidia0".to_string())); + } + + #[test] + fn proto_gpu_enrichment_rejects_custom_read_only_proc() { + let mut policy = openshell_policy::restrictive_default_policy(); + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + + let error = enrich_proto_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::Custom, + |path| path == "/proc", + ) + .expect_err("custom read-only /proc should conflict with GPU baseline"); + + let message = error.to_string(); + assert!(message.contains("GPU sandbox policy keeps required path '/proc' read-only")); + assert!(message.contains("filesystem_policy.read_write")); + } + #[test] fn gpu_baseline_read_write_contains_dxg() { // /dev/dxg must be present so WSL2 sandboxes get the Landlock @@ -1736,7 +2009,7 @@ mod baseline_tests { process: ProcessPolicy::default(), }; - enrich_sandbox_baseline_paths(&mut policy); + enrich_sandbox_baseline_paths(&mut policy).expect("baseline enrichment should succeed"); assert!( policy @@ -1754,6 +2027,40 @@ mod baseline_tests { ); } + #[test] + fn local_gpu_enrichment_rejects_custom_read_only_proc() { + let mut policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy { + read_only: vec![std::path::PathBuf::from("/proc")], + read_write: vec![], + include_workdir: false, + }, + network: NetworkPolicy { + mode: NetworkMode::Block, + proxy: None, + }, + landlock: LandlockPolicy::default(), + process: ProcessPolicy::default(), + }; + let paths = + collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); + + let error = enrich_sandbox_baseline_paths_with( + &mut policy, + &paths, + BaselinePolicySource::Custom, + |path| path == "/proc", + ) + .expect_err("custom read-only /proc should conflict with GPU baseline"); + + assert!( + error + .to_string() + .contains("GPU sandbox policy keeps required path '/proc' read-only") + ); + } + #[test] fn gpu_baseline_read_only_contains_usr_lib_wsl() { // /usr/lib/wsl must be present so CDI-injected WSL2 GPU library @@ -1889,7 +2196,7 @@ async fn load_policy( landlock: config.landlock, process: config.process, }; - enrich_sandbox_baseline_paths(&mut policy); + enrich_sandbox_baseline_paths(&mut policy)?; return Ok((policy, Some(Arc::new(engine)), None)); } @@ -1920,7 +2227,10 @@ async fn load_policy( let mut discovered = discover_policy_from_disk_or_default(); // Enrich before syncing so the gateway baseline includes // baseline paths from the start. - enrich_proto_baseline_paths(&mut discovered); + enrich_proto_baseline_paths_from_source( + &mut discovered, + BaselinePolicySource::DefaultLike, + )?; let sandbox = sandbox.as_deref().ok_or_else(|| { miette::miette!( "Cannot sync discovered policy: sandbox not available.\n\ @@ -1939,7 +2249,7 @@ async fn load_policy( // Ensure baseline filesystem paths are present for proxy-mode // sandboxes. If the policy was enriched, sync the updated version // back to the gateway so users can see the effective policy. - let enriched = enrich_proto_baseline_paths(&mut proto_policy); + let enriched = enrich_proto_baseline_paths(&mut proto_policy)?; if enriched && let Some(sandbox_name) = sandbox.as_deref() && let Err(e) = grpc_client::sync_policy(endpoint, sandbox_name, &proto_policy).await diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index a62f57bf5..083e620c5 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -55,6 +55,11 @@ For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker CDI after the gateway starts, restart the gateway so OpenShell can detect the updated Docker daemon capability. +The built-in default policy includes the filesystem access CUDA needs after GPU +devices are injected. If you use a custom policy with `--gpu`, keep the injected +GPU device nodes and `/proc` in `filesystem_policy.read_write`; CUDA +initialization may fail when `/proc` remains read-only. + ### Custom Containers Use `--from` to create a sandbox from the base image, another pre-built sandbox name, a local directory, or a container image: diff --git a/docs/sandboxes/policies.mdx b/docs/sandboxes/policies.mdx index 4ead66a5e..b3c64ccfc 100644 --- a/docs/sandboxes/policies.mdx +++ b/docs/sandboxes/policies.mdx @@ -62,6 +62,8 @@ Raw streams are connection-scoped and outside L7 live-reload guarantees. This in When a sandbox runs in proxy mode (the default), OpenShell automatically adds baseline filesystem paths required for the sandbox child process to function: `/usr`, `/lib`, `/etc`, `/var/log` (read-only) and `/sandbox`, `/tmp` (read-write). Paths like `/app` are included in the baseline set but are only added if they exist in the container image. +When GPU devices are present in the sandbox container, OpenShell also adds the GPU filesystem baseline. This includes the injected NVIDIA or WSL2 device nodes as read-write paths. For the built-in default policy, OpenShell promotes `/proc` from read-only to read-write because CUDA initialization writes `/proc//task//comm`. If a custom policy explicitly keeps a GPU-required path such as `/proc` in `read_only`, sandbox startup fails and tells you to move that path to `read_write`. + This filtering prevents a missing baseline path from degrading Landlock enforcement. Without it, a single missing path could cause the entire Landlock ruleset to fail, leaving the sandbox with no filesystem restrictions at all. User-specified paths in your policy YAML are not pre-filtered. If you list a path that does not exist: