diff --git a/AGENTS.md b/AGENTS.md index 71770bd..8c28a06 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -132,6 +132,16 @@ Coverage is a side effect of good tests, never the target. ## Other conventions +- **Comments earn their place.** No section-divider banners (`# --- foo ---`), + no comment that just restates what the next line does. A comment explains a + non-obvious *why* a reader cannot recover from the code — a workaround, an + invariant, a deliberate trade-off. Prefer self-documenting code: a named + function, a `Unit.*` quantity instead of a bare `1024 * 1024`, a descriptive + variable — over a comment narrating the mechanics. If you reach for a comment + to explain *what*, rename the thing instead. +- Don't hand-roll magic numbers for sizes/durations/bandwidth: use the + `Unit.*` types (`Unit.Information.mib(8)`, not `8 * 1024 * 1024`) and + `use Unit.Operators` for unit-aware arithmetic. - Add `@spec` to public functions. Dialyzer runs with `:unmatched_returns`, `:extra_return`, `:missing_return` and will fail the gate on a missing/wrong spec. diff --git a/docs/cookbook/intro.md b/docs/cookbook/intro.md index 1459f74..a0030d3 100644 --- a/docs/cookbook/intro.md +++ b/docs/cookbook/intro.md @@ -17,6 +17,20 @@ with it. The absolute best way to get started with `Hyper` is to play with it. +### Requirements + +Hyper requires the following software be installed on each node running it: + + - [`skopeo`](https://github.com/containers/skopeo) + - [`e2fsprogs`](https://github.com/tytso/e2fsprogs) + +Hyper has more runtime dependencies, but they are automatically redistributed +by Hyper. + +### Installation + + + ### Configuration Running `Hyper` is involved and requires a large number of pre-requisites. The @@ -41,3 +55,27 @@ config :hyper, uid_gid_range: {900_000, 999_999}, layer_dir: "/srv/hyper/layers" ``` + + + +### Usage + + + +#### Loading Images + +Before an image can be booted, it needs to be loaded into Hyper. Currently, the +only way to load images is through an OCI image, either natively or through the +native interface, or through [gRPC](../grpc.md): + +```elixir +{:ok, img_id} = Hyper.Img.OciLoader.load("docker.io/library/alpine:3.19") +``` + +#### Booting a VM + +With the image loaded, and an `img_id` in hand, you can boot it: + +```elixir +{:ok, vm} = Hyper.create_vm(%Hyper.Vm.Spec{ img_id: img_id }) +``` diff --git a/docs/grpc.md b/docs/grpc.md index e1ed5cd..5b76276 100644 --- a/docs/grpc.md +++ b/docs/grpc.md @@ -51,6 +51,23 @@ from hyper.grpc.v0 import hyper_pb2, hyper_pb2_grpc client = hyper_pb2_grpc.HyperStub(grpc.aio.insecure_channel("localhost:50051")) ``` +### Loading Images + +Before you can create a VM you need an image in the cluster. `LoadImage` pulls an +OCI image, builds its rootfs, and records it -- returning the `img_id` you pass to +`CreateVm`. It blocks until the load finishes (this can take minutes), so set a +generous deadline. + +```python +loaded = await client.LoadImage( + hyper_pb2.LoadImageRequest( + image_ref="docker.io/library/alpine:3.19", + # label is optional; defaults to image_ref. + ) +) +print(loaded.img_id) # pass this to CreateVm +``` + ### Creating VMs You can create new VMs with the `CreateVm` RPC. @@ -58,7 +75,7 @@ You can create new VMs with the `CreateVm` RPC. ```python created = await client.CreateVm( hyper_pb2.CreateVmRequest( - img_id="img-abc", + img_id=loaded.img_id, instance_type=hyper_pb2.INSTANCE_TYPE_DECI, arch=hyper_pb2.ARCHITECTURE_X86_64, # boot_args is optional; omit it for the default kernel cmdline. diff --git a/lib/hyper.ex b/lib/hyper.ex index 4273c32..fb0a626 100644 --- a/lib/hyper.ex +++ b/lib/hyper.ex @@ -8,11 +8,6 @@ defmodule Hyper do @type id :: String.t() end - defmodule Img do - @moduledoc "A content-addressed image: an ordered stack of layers." - @type id :: String.t() - end - @doc """ Create a new virtual machine from an image. diff --git a/lib/hyper/config.ex b/lib/hyper/config.ex index 6f83461..2b8a088 100644 --- a/lib/hyper/config.ex +++ b/lib/hyper/config.ex @@ -19,6 +19,9 @@ defmodule Hyper.Config do @losetup_path Application.compile_env(:hyper, :losetup_path, "losetup") @dmsetup_path Application.compile_env(:hyper, :dmsetup_path, "dmsetup") @blockdev_path Application.compile_env(:hyper, :blockdev_path, "blockdev") + @skopeo_path Application.compile_env(:hyper, :skopeo_path, "skopeo") + @umoci_path Application.compile_env(:hyper, :umoci_path, nil) + @mke2fs_path Application.compile_env(:hyper, :mke2fs_path, "mke2fs") @vmlinux Application.compile_env(:hyper, :vmlinux, %{}) @doc """ @@ -62,6 +65,10 @@ defmodule Hyper.Config do @spec vmlinux_install_dir :: Path.t() def vmlinux_install_dir, do: Path.join(redist_dir(), "vmlinux") + @doc "Directory where `Hyper.Img.OciLoader.Umoci` installs the default umoci binary." + @spec umoci_install_dir :: Path.t() + def umoci_install_dir, do: Path.join(redist_dir(), "umoci") + @doc """ Path to the directory where all VM chroot's are created (`/jails`). @@ -114,6 +121,18 @@ defmodule Hyper.Config do @doc "Path to the blockdev binary." def blockdev_path, do: @blockdev_path + @doc "Path to the skopeo binary (used by `Hyper.Img.OciLoader` to pull OCI images)." + def skopeo_path, do: @skopeo_path + + @doc """ + Operator-configured path to the umoci binary, or `nil` (the default) to let + `Hyper.Img.OciLoader.Umoci` download and manage a pinned default. + """ + def umoci_path, do: @umoci_path + + @doc "Path to the mke2fs binary (used by `Hyper.Img.OciLoader` to build the ext4 rootfs)." + def mke2fs_path, do: @mke2fs_path + @doc """ Path to the setuid-root device helper (`hyper-suidhelper`). Required: the node runs unprivileged and routes every `losetup`/`dmsetup`/`blockdev` operation diff --git a/lib/hyper/grpc/codec.ex b/lib/hyper/grpc/codec.ex index ca466be..650040a 100644 --- a/lib/hyper/grpc/codec.ex +++ b/lib/hyper/grpc/codec.ex @@ -15,6 +15,8 @@ defmodule Hyper.Grpc.Codec do CreateVmResponse, GetVmResponse, ListVmsResponse, + LoadImageRequest, + LoadImageResponse, Vm } @@ -72,6 +74,16 @@ defmodule Hyper.Grpc.Codec do end end + @spec from_grpc(LoadImageRequest.t()) :: + {:ok, {String.t(), keyword()}} | {:error, :missing_image_ref} + def from_grpc(%LoadImageRequest{image_ref: ref}) when ref in [nil, ""], + do: {:error, :missing_image_ref} + + def from_grpc(%LoadImageRequest{image_ref: ref, label: label}) do + opts = if label in [nil, ""], do: [], else: [label: label] + {:ok, {ref, opts}} + end + @doc "Convert a domain result to an outbound response message, or an error to `GRPC.RPCError`." @spec to_grpc({:created, Hyper.Vm.id(), node()}) :: CreateVmResponse.t() def to_grpc({:created, vm_id, node}) when is_binary(vm_id), @@ -85,6 +97,10 @@ defmodule Hyper.Grpc.Codec do def to_grpc({:vms, vms}), do: %ListVmsResponse{vms: Enum.map(vms, &vm/1)} + @spec to_grpc({:loaded, Hyper.Img.id()}) :: LoadImageResponse.t() + def to_grpc({:loaded, img_id}) when is_binary(img_id), + do: %LoadImageResponse{img_id: img_id} + @spec to_grpc(:stopped) :: Empty.t() def to_grpc(:stopped), do: %Empty{} @@ -124,6 +140,19 @@ defmodule Hyper.Grpc.Codec do defp rpc_error(reason) when reason in [:no_capacity, :exhausted], do: GRPC.RPCError.exception(:resource_exhausted, "no capacity") + defp rpc_error(:missing_image_ref), + do: GRPC.RPCError.exception(:invalid_argument, "image_ref is required") + + defp rpc_error(:invalid_ref), + do: GRPC.RPCError.exception(:invalid_argument, "image_ref is malformed") + + defp rpc_error({:missing_tools, tools}), + do: + GRPC.RPCError.exception( + :failed_precondition, + "node is missing required image tools: #{Enum.join(tools, ", ")}" + ) + defp rpc_error(reason), do: GRPC.RPCError.exception(:internal, "internal error: #{inspect(reason)}") end diff --git a/lib/hyper/grpc/server.ex b/lib/hyper/grpc/server.ex index 08d45bf..39c6482 100644 --- a/lib/hyper/grpc/server.ex +++ b/lib/hyper/grpc/server.ex @@ -17,9 +17,21 @@ defmodule Hyper.Grpc.Server do GetVmRequest, GetVmResponse, ListVmsResponse, + LoadImageRequest, + LoadImageResponse, StopVmRequest } + @spec load_image(LoadImageRequest.t(), GRPC.Server.Stream.t()) :: LoadImageResponse.t() + def load_image(%LoadImageRequest{} = req, _stream) do + with {:ok, {ref, opts}} <- Codec.from_grpc(req), + {:ok, img_id} <- Hyper.Img.OciLoader.load(ref, opts) do + Codec.to_grpc({:loaded, img_id}) + else + {:error, reason} -> raise Codec.to_grpc({:error, reason}) + end + end + @spec create_vm(CreateVmRequest.t(), GRPC.Server.Stream.t()) :: CreateVmResponse.t() def create_vm(%CreateVmRequest{} = req, _stream) do with {:ok, spec} <- Codec.from_grpc(req), diff --git a/lib/hyper/img.ex b/lib/hyper/img.ex new file mode 100644 index 0000000..8573b9d --- /dev/null +++ b/lib/hyper/img.ex @@ -0,0 +1,161 @@ +defmodule Hyper.Img do + @moduledoc """ + A content-addressed image: an ordered stack of layers, and the entry point for + putting one into the cluster. + + `create/2` ingests a prepared image file -- e.g. the ext4 rootfs produced by + `Hyper.Img.OciLoader` -- into the shared media store and the image database. It + content-addresses the file (sha256 of its bytes = the image id), publishes it + into `Hyper.Config.layer_dir/0` at `layer_.img`, then records it as a + one-layer base image (`blobs` + `images` + `image_layers`). Producers of image + files stay decoupled from the store and DB: they hand a path to `create/2`. + """ + + use OpenTelemetryDecorator + + require Logger + + alias Hyper.Config + alias Hyper.Img.Db.{Blob, Image, ImageLayer, Repo} + + @type id :: String.t() + + # `Ecto.Multi` is an opaque struct; building it through the pipe trips + # dialyzer's opacity check (a known Ecto false positive), so silence it for the + # one function that assembles a Multi. + @dialyzer {:no_opaque, record: 3} + + @doc """ + Ingest the image file at `path` into the cluster and return its + content-addressed id. + + Content-addresses `path` (sha256 of its bytes = the id), publishes it into the + media store at `layer_.img`, and records it as a one-layer base image. The + file at `path` is consumed -- moved into the store on success, removed on + failure -- so the caller hands off ownership. + + `opts[:label]` sets the human-readable `images.label` (defaults to the basename + of `path`). + + Idempotent: creating identical bytes again is a no-op that returns the same id. + """ + @spec create(Path.t(), keyword()) :: {:ok, id()} | {:error, term()} + @decorate with_span("Hyper.Img.create", include: [:path, :label]) + def create(path, opts \\ []) do + label = Keyword.get(opts, :label, Path.basename(path)) + + with {:ok, %File.Stat{size: size}} <- File.stat(path), + {:ok, id} <- content_id(path), + {:ok, final, origin} <- publish(path, id), + :ok <- record_or_rollback(id, label, size, final, origin) do + {:ok, id} + else + {:error, _} = err -> + _ = File.rm(path) + err + end + end + + # Record the image; if the DB write fails, roll back a file we just created (a + # reused file pre-existed and may back another image, so leave it). + @spec record_or_rollback(id(), String.t(), non_neg_integer(), Path.t(), :created | :reused) :: + :ok | {:error, term()} + defp record_or_rollback(id, label, size, final, origin) do + case record(id, label, size) do + :ok -> + :ok + + {:error, _} = err -> + _ = if origin == :created, do: File.rm(final), else: :ok + err + end + end + + # Streaming sha256 of `path`, lowercase hex -- the content address. + @spec content_id(Path.t()) :: {:ok, id()} | {:error, term()} + @decorate with_span("Hyper.Img.content_id", include: [:path]) + defp content_id(path) do + {:ok, Hyper.Redist.Sha256.file(path)} + rescue + e -> {:error, {:hash_failed, Exception.message(e)}} + end + + # Move `src` into the store at its content-addressed path. If the destination + # already exists (identical bytes already published), drop `src` and reuse it. + @spec publish(Path.t(), id()) :: {:ok, Path.t(), :created | :reused} | {:error, term()} + @decorate with_span("Hyper.Img.publish", include: [:id]) + defp publish(src, id) do + File.mkdir_p!(Config.layer_dir()) + final = final_path(id) + + if File.exists?(final) do + Logger.info("image #{id} already present in store; reusing") + _ = File.rm(src) + {:ok, final, :reused} + else + case place(src, final) do + {:ok, ^final} -> {:ok, final, :created} + {:error, _} = err -> err + end + end + end + + # An atomic rename when `src` is on the store's filesystem; a copy-then-drop + # across filesystems (rename can't cross a mount). + @spec place(Path.t(), Path.t()) :: {:ok, Path.t()} | {:error, term()} + defp place(src, final) do + case File.rename(src, final) do + :ok -> + {:ok, final} + + {:error, :exdev} -> + case File.cp(src, final) do + :ok -> + _ = File.rm(src) + {:ok, final} + + {:error, reason} -> + _ = File.rm(final) + {:error, {:publish_failed, reason}} + end + + {:error, reason} -> + {:error, {:publish_failed, reason}} + end + end + + @spec final_path(id()) :: Path.t() + defp final_path(id), do: Path.join(Config.layer_dir(), "layer_#{id}.img") + + # Record the base image: one blob, one image (id == blob id), one layer at + # position 0. All upserts are idempotent so a re-publish of the same bytes is a + # no-op. The blob is inserted before the layer so the FK is satisfied. + @spec record(id(), String.t(), non_neg_integer()) :: :ok | {:error, term()} + defp record(id, label, size) do + multi = + Ecto.Multi.new() + |> Ecto.Multi.insert( + :blob, + Blob.changeset(%Blob{}, %{id: id, kind: :base, size: size}), + on_conflict: :nothing, + conflict_target: :id + ) + |> Ecto.Multi.insert( + :image, + Image.changeset(%Image{}, %{id: id, label: label}), + on_conflict: :nothing, + conflict_target: :id + ) + |> Ecto.Multi.insert( + :layer, + ImageLayer.changeset(%ImageLayer{}, %{image_id: id, position: 0, blob_id: id}), + on_conflict: :nothing, + conflict_target: [:image_id, :position] + ) + + case Repo.transaction(multi) do + {:ok, _} -> :ok + {:error, step, reason, _changes} -> {:error, {:record_failed, step, reason}} + end + end +end diff --git a/lib/hyper/img/oci_loader.ex b/lib/hyper/img/oci_loader.ex new file mode 100644 index 0000000..a261c6f --- /dev/null +++ b/lib/hyper/img/oci_loader.ex @@ -0,0 +1,214 @@ +defmodule Hyper.Img.OciLoader do + @moduledoc """ + Builds an ext4 rootfs from an OCI image and hands it to `Hyper.Img.create/2`. + + `load/1` takes a registry reference (e.g. `"docker.io/library/alpine:3.19"`) + and: + + 1. **pulls** it with `skopeo`, selecting the manifest entry matching this node's + architecture, into a local OCI layout. + 2. **flattens** it with `umoci unpack`, which applies OCI whiteouts/opaque dirs correctly, + yielding a merged rootfs directory. + 3. **builds** an ext4 image of that rootfs with `mke2fs -d`. + 4. hands the ext4 file to `Hyper.Img.create/2`, which content-addresses it, + publishes it into the media store, and records it as a base image -- + returning the `img_id`. + + This module owns only the OCI-to-ext4 conversion; ingesting a prepared image + file into the store and database is `Hyper.Img`'s job. + """ + + use Unit.Operators + + alias Hyper.Config + alias Hyper.Img.OciLoader.Umoci + alias Unit.Information + + require Logger + + @doc "Load `ref` into the store and DB. See the module doc. Label defaults to `ref`." + @spec load(String.t()) :: {:ok, Hyper.Img.id()} | {:error, term()} + def load(ref), do: load(ref, []) + + @doc """ + Load `ref`. `opts[:label]` sets the human-readable `images.label` (defaults to + `ref`). + + Returns `{:error, {:missing_tools, names}}` when the node lacks a required + external tool (`skopeo`/`umoci`/`mke2fs`); the check runs up front so the load + fails fast before the multi-minute pull. + """ + @spec load(String.t(), keyword()) :: {:ok, Hyper.Img.id()} | {:error, term()} + def load(ref, opts) when is_binary(ref) and is_list(opts) do + Logger.info("oci: loading image #{ref}") + + case do_load(ref, opts) do + {:ok, id} = ok -> + Logger.info("oci: loaded #{ref} as image #{id}") + ok + + {:error, reason} = err -> + Logger.warning("oci: failed to load #{ref}: #{inspect(reason)}") + err + end + end + + @spec do_load(String.t(), keyword()) :: {:ok, Hyper.Img.id()} | {:error, term()} + defp do_load(ref, opts) do + label = Keyword.get(opts, :label, ref) + + with {:ok, source} <- source(ref), + :ok <- Umoci.ensure_installed(), + :ok <- test_system(), + {:ok, arch} <- Sys.Arch.current() do + Sys.Tmp.with_tempdir("hyper-oci", fn tmp -> + with {:ok, rootfs} <- pull_and_unpack(source, goarch(arch), tmp), + {:ok, {content, files}} <- dir_usage(rootfs), + params = ext4_params(content, files), + {:ok, staged} <- build_ext4(rootfs, params) do + Hyper.Img.create(staged, label: label) + end + end) + end + end + + @doc """ + Verify the external tools the loader needs (`skopeo`, `umoci`, `mke2fs`) are + resolvable on this host. Returns `{:error, {:missing_tools, names}}` listing + any that are absent. + """ + @spec test_system() :: :ok | {:error, term()} + def test_system do + with {:ok, _arch} <- Sys.Arch.current() do + tools = [ + {"skopeo", Config.skopeo_path()}, + {"umoci", Umoci.bin()}, + {"mke2fs", Config.mke2fs_path()} + ] + + missing = for {name, path} <- tools, System.find_executable(path) == nil, do: name + + if missing == [], do: :ok, else: {:error, {:missing_tools, missing}} + end + end + + # Validate `ref` and return the `skopeo` source `"docker://" <> ref`. A ref must + # be non-empty and contain no whitespace (refs never do; rejecting whitespace + # also closes the door on accidental arg-splitting surprises). + @doc false + @spec source(String.t()) :: {:ok, String.t()} | {:error, :invalid_ref} + def source(ref) when is_binary(ref) do + if ref != "" and not String.match?(ref, ~r/\s/), + do: {:ok, "docker://" <> ref}, + else: {:error, :invalid_ref} + end + + # Map a Hyper architecture to the Go/OCI arch name `skopeo --override-arch` wants. + @doc false + @spec goarch(Sys.Arch.t()) :: String.t() + def goarch(:x86_64), do: "amd64" + def goarch(:aarch64), do: "arm64" + + # `du` apparent bytes undercount ext4 block usage and the default inode ratio + # starves file-dense trees, so the size carries the inode table plus slack and + # the inode count is the file count with headroom. + @doc false + @spec ext4_params(Information.t(), non_neg_integer()) :: {Information.t(), pos_integer()} + def ext4_params(content, files) do + inodes = files + div(files, 10) + 256 + metadata = Information.bytes(inodes * 256) + Information.mib(16) + size = ceil_mib(content + Information.bytes(div(Information.as_bytes(content), 4)) + metadata) + {size, inodes} + end + + @spec ceil_mib(Information.t()) :: Information.t() + defp ceil_mib(size) do + mib = Information.as_bytes(Information.mib(1)) + Information.mib(div(Information.as_bytes(size) + mib - 1, mib)) + end + + # `skopeo copy` into a local OCI layout, then `umoci unpack` into a bundle. + # Returns the path to the flattened rootfs directory. + @spec pull_and_unpack(String.t(), String.t(), Path.t()) :: + {:ok, Path.t()} | {:error, term()} + defp pull_and_unpack(source, goarch, tmp) do + Logger.debug("oci: pulling and flattening #{source}") + oci = Path.join(tmp, "oci") + bundle = Path.join(tmp, "bundle") + + skopeo = + cmd(Config.skopeo_path(), [ + "copy", + "--override-os", + "linux", + "--override-arch", + goarch, + source, + "oci:#{oci}:img" + ]) + + umoci = cmd(Umoci.bin(), ["unpack", "--rootless", "--image", "#{oci}:img", bundle]) + + with :ok <- tag(skopeo, :skopeo), + :ok <- tag(umoci, :umoci) do + {:ok, Path.join(bundle, "rootfs")} + end + end + + # Block-aware actual usage (`du -sB1`) and the file count (`du -s --inodes`). + @spec dir_usage(Path.t()) :: {:ok, {Information.t(), non_neg_integer()}} | {:error, term()} + defp dir_usage(rootfs) do + with {:ok, bytes} <- du(rootfs, ["-sB1"]), + {:ok, files} <- du(rootfs, ["-s", "--inodes"]) do + {:ok, {Information.bytes(bytes), files}} + end + end + + @spec du(Path.t(), [String.t()]) :: {:ok, non_neg_integer()} | {:error, term()} + defp du(rootfs, flags) do + case System.cmd("du", flags ++ [rootfs], stderr_to_stdout: true) do + {out, 0} -> + case Integer.parse(out) do + {n, _rest} -> {:ok, n} + :error -> {:error, {:du_unparsable, out}} + end + + {out, status} -> + {:error, {:du_failed, status, out}} + end + end + + # Staged inside `layer_dir` so the later publish is an atomic same-filesystem + # rename. `-N` pins the inode count (the default ratio starves file-dense + # trees); the staged file is removed if mke2fs fails. + @spec build_ext4(Path.t(), {Information.t(), pos_integer()}) :: + {:ok, Path.t()} | {:error, term()} + defp build_ext4(rootfs, {size, inodes}) do + Logger.debug("oci: building #{Information.as_mib(size)} MiB ext4 rootfs (#{inodes} inodes)") + File.mkdir_p!(Config.layer_dir()) + staged = Path.join(Config.layer_dir(), ".incoming-#{System.unique_integer([:positive])}.img") + + args = + ["-t", "ext4", "-F", "-q", "-N", to_string(inodes), "-d", rootfs, staged] ++ + ["#{Information.as_mib(size)}M"] + + case tag(cmd(Config.mke2fs_path(), args), :mke2fs) do + :ok -> + {:ok, staged} + + {:error, _} = err -> + _ = File.rm(staged) + err + end + end + + # Run `bin` with `args`, no shell (System.cmd takes an arg list), merging + # stderr so failures carry diagnostics. Returns `{output, exit_status}`. + @spec cmd(Path.t(), [String.t()]) :: {String.t(), non_neg_integer()} + defp cmd(bin, args), do: System.cmd(bin, args, stderr_to_stdout: true) + + # Tag a command result: `:ok` on exit 0, else `{:error, {_failed, status, output}}`. + @spec tag({String.t(), non_neg_integer()}, atom()) :: :ok | {:error, term()} + defp tag({_out, 0}, _tool), do: :ok + defp tag({out, status}, tool), do: {:error, {:"#{tool}_failed", status, out}} +end diff --git a/lib/hyper/img/oci_loader/umoci.ex b/lib/hyper/img/oci_loader/umoci.ex new file mode 100644 index 0000000..8b5da65 --- /dev/null +++ b/lib/hyper/img/oci_loader/umoci.ex @@ -0,0 +1,85 @@ +defmodule Hyper.Img.OciLoader.Umoci do + @moduledoc """ + Resolves and (when not operator-provided) installs the `umoci` binary that + `Hyper.Img.OciLoader` uses to flatten OCI image layers. + + Two sources, in priority order (mirrors `Hyper.Node.Vmlinux`): + + 1. An operator-configured path via `config :hyper, umoci_path: + "/path/to/umoci"` (`Hyper.Config.umoci_path/0`). If set, it wins and is + never downloaded. + 2. Otherwise the pinned static binary downloaded by `ensure_installed/0` + into `Hyper.Config.umoci_install_dir/0` (`/redist/umoci`). + """ + + alias Hyper.Config + alias Hyper.Redist + + require Logger + + # Pinned umoci release per architecture: the static binary's filename, its + # download URL, and its SHA-256 (verified on download). umoci ships one raw + # binary per arch -- https://github.com/opencontainers/umoci/releases. + @downloads %{ + x86_64: %{ + asset: "umoci.linux.amd64", + url: "https://github.com/opencontainers/umoci/releases/download/v0.6.0/umoci.linux.amd64", + sha256: "b51c267ec394499e42c6fde47f240b7b7dba57ea49df0b5acd304378b82a3b71" + }, + aarch64: %{ + asset: "umoci.linux.arm64", + url: "https://github.com/opencontainers/umoci/releases/download/v0.6.0/umoci.linux.arm64", + sha256: "5cfd17f2e7a4bcf9ed67ea1b955ca893d200349b9ce6a3d3707dba415f458a1f" + } + } + + @doc """ + Ensure a usable `umoci` is available on this node. A no-op when the operator + configured `umoci_path` (they own it); otherwise downloads the pinned static + binary for this node's architecture into the redist cache if it is not already + present and executable, then marks it executable. Idempotent. + """ + @spec ensure_installed() :: :ok | {:error, term()} + def ensure_installed do + if Config.umoci_path() != nil do + :ok + else + with {:ok, arch} <- Sys.Arch.current() do + path = default_path(arch) + if Sys.Posix.executable?(path), do: :ok, else: install(arch, path) + end + end + end + + @doc """ + Absolute path to the `umoci` binary: the operator-configured path if set, + otherwise the downloaded default for this node's architecture. Raises if the + architecture is unsupported. + """ + @spec bin() :: Path.t() + def bin do + configured = Config.umoci_path() + + if configured != nil do + configured + else + {:ok, arch} = Sys.Arch.current() + default_path(arch) + end + end + + @spec default_path(Sys.Arch.t()) :: Path.t() + defp default_path(arch) do + Path.join(Config.umoci_install_dir(), Map.fetch!(@downloads, arch).asset) + end + + @spec install(Sys.Arch.t(), Path.t()) :: :ok | {:error, term()} + defp install(arch, path) do + dl = Map.fetch!(@downloads, arch) + Logger.info("umoci: downloading #{dl.url}") + + with :ok <- Redist.File.install(dl.url, dl.sha256, path) do + File.chmod(path, 0o755) + end + end +end diff --git a/lib/hyper/node.ex b/lib/hyper/node.ex index 1a4adac..6282eaa 100644 --- a/lib/hyper/node.ex +++ b/lib/hyper/node.ex @@ -147,6 +147,7 @@ defmodule Hyper.Node do :ok <- Hyper.Node.FireVMM.Provider.ensure_installed(), :ok <- Hyper.Node.FireVMM.VmLinux.Provider.ensure_installed(), :ok <- Hyper.Node.Vmlinux.test_system(), + :ok <- Hyper.Img.OciLoader.Umoci.ensure_installed(), :ok <- Hyper.Node.Users.test_system(), :ok <- Hyper.Node.Layer.Repo.test_system(), :ok <- Hyper.SuidHelper.test_system(), diff --git a/proto/hyper/grpc/v0/hyper.proto b/proto/hyper/grpc/v0/hyper.proto index f43d0bf..f668ff8 100644 --- a/proto/hyper/grpc/v0/hyper.proto +++ b/proto/hyper/grpc/v0/hyper.proto @@ -52,6 +52,20 @@ service Hyper { // List every microVM currently known to the cluster, across all nodes. rpc ListVms(google.protobuf.Empty) returns (ListVmsResponse); + + // Load an OCI image into the cluster's shared media store and image database. + // + // Pulls the referenced image for this node's architecture, flattens it, builds + // an ext4 rootfs, content-addresses it, and records a base image. Runs on the + // node that receives the call; the result is visible cluster-wide. Blocks until + // the load completes -- this can take minutes -- so set a generous deadline. + // Returns the content-addressed `img_id` to pass to CreateVm. + // + // Errors: + // INVALID_ARGUMENT -- `image_ref` is empty or malformed. + // FAILED_PRECONDITION -- the node lacks the required tools (skopeo/umoci/mke2fs). + // INTERNAL -- the pull, unpack, build, publish, or DB record failed. + rpc LoadImage(LoadImageRequest) returns (LoadImageResponse); } // A fixed (vCPU, memory, disk) size, like a cloud instance class. Each step up @@ -142,3 +156,18 @@ message Vm { // The cluster node (Erlang node name) the VM runs on. string node = 2; } + +// Request to load an OCI image. +message LoadImageRequest { + // Required. An OCI image reference, e.g. "docker.io/library/alpine:3.19". + string image_ref = 1; + + // Optional. A human-readable label stored with the image (defaults to image_ref). + optional string label = 2; +} + +// Result of a successful LoadImage. +message LoadImageResponse { + // The content-addressed image id; pass it to CreateVm. + string img_id = 1; +} diff --git a/test/hyper/img/oci_loader_test.exs b/test/hyper/img/oci_loader_test.exs new file mode 100644 index 0000000..8927777 --- /dev/null +++ b/test/hyper/img/oci_loader_test.exs @@ -0,0 +1,45 @@ +defmodule Hyper.Img.OciLoaderTest do + use ExUnit.Case, async: false + use ExUnitProperties + + alias Hyper.Config + alias Hyper.Img.Db.{Blob, Repo} + alias Hyper.Img.OciLoader + alias Unit.Information + + describe "ext4_params/2" do + test "provisions inode headroom above the file count" do + {_size, inodes} = OciLoader.ext4_params(Information.mib(100), 10_000) + assert inodes > 10_000 + end + + property "size is a whole MiB that holds the content and the inode table" do + check all( + bytes <- integer(0..Information.as_bytes(Information.gib(8))), + files <- integer(0..500_000) + ) do + {size, inodes} = OciLoader.ext4_params(Information.bytes(bytes), files) + size_b = Information.as_bytes(size) + + assert inodes >= files + assert rem(size_b, Information.as_bytes(Information.mib(1))) == 0 + assert size_b >= bytes + inodes * 256 + end + end + end + + # Opt-in: needs skopeo, umoci, mke2fs, network, and Postgres. mix test --include external + @tag :external + test "load/1 publishes a busybox base image to the store and DB" do + assert OciLoader.test_system() == :ok + + assert {:ok, id} = OciLoader.load("docker.io/library/busybox:1.36") + + path = Path.join(Config.layer_dir(), "layer_#{id}.img") + assert File.exists?(path) + assert File.stat!(path).size > 0 + + assert %Blob{kind: :base} = Repo.get(Blob, id) + assert Repo.get(Blob, id).size == File.stat!(path).size + end +end diff --git a/test/test_helper.exs b/test/test_helper.exs index 81ef3bf..52a1229 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -1,4 +1,10 @@ # JUnitFormatter writes JUnit XML (consumed by Codecov Test Analytics) as a side # effect of the normal test run. Listing formatters explicitly REPLACES the # defaults, so ExUnit.CLIFormatter must be named here to keep console output. -ExUnit.start(formatters: [ExUnit.CLIFormatter, JUnitFormatter]) +# +# `:external` tests shell out to real tools (skopeo/umoci/mke2fs) and touch the +# image DB + media store. They are opt-in: run with `mix test --include external`. +ExUnit.start( + formatters: [ExUnit.CLIFormatter, JUnitFormatter], + exclude: [:external] +)