Skip to content
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vortex-layout/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ vortex-utils = { workspace = true, features = ["dashmap"] }

[dev-dependencies]
futures = { workspace = true, features = ["executor"] }
insta = { workspace = true }
rstest = { workspace = true }
temp-env = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros"] }
Expand Down
271 changes: 271 additions & 0 deletions vortex-layout/src/layouts/list/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

mod reader;
pub mod writer;

use std::sync::Arc;

use reader::ListReader;
use vortex_array::DeserializeMetadata;
use vortex_array::ProstMetadata;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;
use vortex_error::vortex_panic;
use vortex_session::VortexSession;
use vortex_session::registry::ReadContext;

use crate::LayoutChildType;
use crate::LayoutEncodingRef;
use crate::LayoutId;
use crate::LayoutReaderRef;
use crate::LayoutRef;
use crate::VTable;
use crate::children::LayoutChildren;
use crate::segments::SegmentId;
use crate::segments::SegmentSource;
use crate::vtable;

/// Child index of the `elements` layout.
pub const ELEMENTS_CHILD_INDEX: usize = 0;
/// Child index of the `offsets` layout.
pub const OFFSETS_CHILD_INDEX: usize = 1;
/// Child index of the `validity` layout (only present when the list dtype is nullable).
pub const VALIDITY_CHILD_INDEX: usize = 2;

/// Number of children when the list dtype is non-nullable.
pub const NUM_CHILDREN_NON_NULLABLE: usize = 2;

vtable!(List);

impl VTable for List {
type Layout = ListLayout;
type Encoding = ListLayoutEncoding;
type Metadata = ProstMetadata<ListLayoutMetadata>;

fn id(_encoding: &Self::Encoding) -> LayoutId {
LayoutId::new("vortex.list")
}

fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef {
LayoutEncodingRef::new_ref(ListLayoutEncoding.as_ref())
}

fn row_count(layout: &Self::Layout) -> u64 {
layout.row_count()
}

fn dtype(layout: &Self::Layout) -> &DType {
&layout.dtype
}

fn metadata(layout: &Self::Layout) -> Self::Metadata {
ProstMetadata(ListLayoutMetadata::new(layout.offsets_ptype()))
}

fn segment_ids(_layout: &Self::Layout) -> Vec<SegmentId> {
vec![]
}

fn nchildren(layout: &Self::Layout) -> usize {
let mut n = NUM_CHILDREN_NON_NULLABLE;
if layout.dtype.is_nullable() {
n += 1;
}

n
Comment on lines +77 to +82
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like I would prefer reading just an if branch and then return either NUM_CHILDREN_NON_NULLABLE or NUM_CHILDREN_NON_NULLABLE + 1?

}

fn child(layout: &Self::Layout, idx: usize) -> VortexResult<LayoutRef> {
match (idx, layout.validity.as_ref()) {
(ELEMENTS_CHILD_INDEX, _) => Ok(Arc::clone(&layout.elements)),
(OFFSETS_CHILD_INDEX, _) => Ok(Arc::clone(&layout.offsets)),
(VALIDITY_CHILD_INDEX, Some(validity)) => Ok(Arc::clone(validity)),
_ => vortex_bail!("Invalid child index {idx} for ListLayout"),
}
}

fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType {
match (idx, layout.validity.is_some()) {
(ELEMENTS_CHILD_INDEX, _) => LayoutChildType::Auxiliary("elements".into()),
(OFFSETS_CHILD_INDEX, _) => LayoutChildType::Auxiliary("offsets".into()),
(VALIDITY_CHILD_INDEX, true) => LayoutChildType::Auxiliary("validity".into()),
_ => vortex_panic!("Invalid child index {idx} for ListLayout"),
}
}

fn new_reader(
layout: &Self::Layout,
name: Arc<str>,
segment_source: Arc<dyn SegmentSource>,
session: &VortexSession,
) -> VortexResult<LayoutReaderRef> {
Ok(Arc::new(ListReader::try_new(
layout.clone(),
name,
segment_source,
session.clone(),
)?))
}

fn build(
_encoding: &Self::Encoding,
dtype: &DType,
_row_count: u64,
metadata: &<Self::Metadata as DeserializeMetadata>::Output,
_segment_ids: Vec<SegmentId>,
children: &dyn LayoutChildren,
_ctx: &ReadContext,
) -> VortexResult<Self::Layout> {
validate_children(dtype, children.nchildren())?;

let elements_dtype = dtype
.as_list_element_opt()
.ok_or_else(|| vortex_err!("ListLayout requires a List dtype, got {dtype}"))?;
let elements = children.child(ELEMENTS_CHILD_INDEX, elements_dtype.as_ref())?;

let offsets_dtype = DType::Primitive(metadata.offsets_ptype(), Nullability::NonNullable);
let offsets = children.child(OFFSETS_CHILD_INDEX, &offsets_dtype)?;

let validity = dtype
.is_nullable()
.then(|| children.child(VALIDITY_CHILD_INDEX, &DType::Bool(Nullability::NonNullable)))
.transpose()?;

Ok(ListLayout {
dtype: dtype.clone(),
elements,
offsets,
validity,
})
}

fn with_children(layout: &mut Self::Layout, children: Vec<LayoutRef>) -> VortexResult<()> {
validate_children(layout.dtype(), children.len())?;

let mut iter = children.into_iter();
layout.elements = iter
.next()
.ok_or_else(|| vortex_err!("missing elements child"))?;
layout.offsets = iter
.next()
.ok_or_else(|| vortex_err!("missing offsets child"))?;
layout.validity = layout
.dtype
.is_nullable()
.then(|| {
iter.next()
.ok_or_else(|| vortex_err!("missing validity child"))
})
.transpose()?;
Ok(())
}
}

/// Validates expected number of children based on `dtype`
#[inline]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We generally dont want to inline functions with branches, and also because this is a private function the compiler will likely inline this anyways.

inline is only useful for public functions that are small

fn validate_children(dtype: &DType, n_children: usize) -> VortexResult<()> {
let mut expected = NUM_CHILDREN_NON_NULLABLE;

if dtype.is_nullable() {
expected += 1;
};

vortex_ensure!(
n_children == expected,
Comment on lines +180 to +181
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vortex_ensure_eq!

"ListLayout expects {expected} children, got {n_children}",
);

Ok(())
}

#[derive(Debug)]
pub struct ListLayoutEncoding;

/// Stores a list-typed array by shredding `elements`, `offsets`, and optional `validity` children.
#[derive(Clone, Debug)]
pub struct ListLayout {
dtype: DType,
elements: LayoutRef,
offsets: LayoutRef,
validity: Option<LayoutRef>,
}

impl ListLayout {
/// Construct a new `ListLayout` from its components.
///
/// # Invariants
///
/// - `dtype` must be a [`DType::List`].
/// - `validity` must be `Some` iff `dtype.is_nullable()`.
/// - `offsets.dtype()` must be a non-nullable integer.
/// - `offsets.row_count()` is the Arrow-canonical `n+1` for `n` lists (or `0` for empty).
/// - When present, `validity.row_count() == offsets.row_count().saturating_sub(1)`.
pub fn new(
dtype: DType,
elements: LayoutRef,
offsets: LayoutRef,
validity: Option<LayoutRef>,
) -> Self {
Self {
dtype,
elements,
offsets,
validity,
}
}

/// Number of lists in this layout.
#[inline]
pub fn row_count(&self) -> u64 {
self.offsets.row_count().saturating_sub(1)
}

#[inline]
pub fn elements(&self) -> &LayoutRef {
&self.elements
}

#[inline]
pub fn offsets(&self) -> &LayoutRef {
&self.offsets
}

#[inline]
pub fn validity(&self) -> Option<&LayoutRef> {
self.validity.as_ref()
}

/// The integer type used for the `offsets` child layout.
#[inline]
pub fn offsets_ptype(&self) -> PType {
self.offsets.dtype().as_ptype()
}

/// The dtype of the inner elements column.
pub fn elements_dtype(&self) -> &DType {
self.dtype
.as_list_element_opt()
.vortex_expect("ListLayout dtype must be a List")
}
}

#[derive(prost::Message)]
pub struct ListLayoutMetadata {
#[prost(enumeration = "PType", tag = "1")]
offsets_ptype: i32,
}

impl ListLayoutMetadata {
pub fn new(offsets_ptype: PType) -> Self {
let mut metadata = Self::default();
metadata.set_offsets_ptype(offsets_ptype);
metadata
}
Comment on lines +266 to +270
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a strange constructor? Why not just do Self { offsets_ptype }?

}
Loading
Loading