diff --git a/Cargo.lock b/Cargo.lock index 46324a3db3e..9421b4573e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10182,6 +10182,7 @@ dependencies = [ "rustc-hash", "serde", "simdutf8", + "static_assertions", "tabled", "termtree", "tracing", @@ -10196,7 +10197,6 @@ dependencies = [ "vortex-scalar", "vortex-session", "vortex-utils", - "vortex-vector", ] [[package]] diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index d71d8eadd81..cdb4806dc2b 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -53,6 +53,7 @@ rstest_reuse = { workspace = true, optional = true } rustc-hash = { workspace = true } serde = { workspace = true, optional = true, features = ["derive"] } simdutf8 = { workspace = true } +static_assertions = { workspace = true } tabled = { workspace = true, optional = true, default-features = false, features = [ "std", ] } @@ -68,7 +69,6 @@ vortex-proto = { workspace = true, features = ["expr"] } vortex-scalar = { workspace = true } vortex-session = { workspace = true } vortex-utils = { workspace = true, features = ["dyn-traits"] } -vortex-vector = { workspace = true } [features] arbitrary = [ diff --git a/vortex-array/benches/take_primitive.rs b/vortex-array/benches/take_primitive.rs index 994a52f9cf4..b2e1822e916 100644 --- a/vortex-array/benches/take_primitive.rs +++ b/vortex-array/benches/take_primitive.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Benchmarks comparing [`PVector`] take vs [`DictArray`] canonicalization. +//! Benchmarks for [`DictArray`] canonicalization. //! //! Both are tracked by number of indices/codes for fair comparison. @@ -15,10 +15,6 @@ use rand_distr::Zipf; use vortex_array::IntoArray; use vortex_array::arrays::DictArray; use vortex_array::arrays::PrimitiveArray; -use vortex_buffer::Buffer; -use vortex_compute::take::Take; -use vortex_mask::Mask; -use vortex_vector::primitive::PVector; fn main() { divan::main(); @@ -30,40 +26,6 @@ const NUM_INDICES: &[usize] = &[1_000, 10_000, 100_000]; /// Size of the source vector / dictionary values. const VECTOR_SIZE: &[usize] = &[16, 256, 2048, 8192]; -// --- PVector take benchmarks --- - -#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)] -fn pvector_take_uniform(bencher: Bencher, num_indices: usize) { - let data: Buffer = (0..VECTOR_SIZE as u32).collect(); - let pvector = PVector::new(data, Mask::AllTrue(VECTOR_SIZE)); - - let rng = StdRng::seed_from_u64(0); - let range = Uniform::new(0u32, VECTOR_SIZE as u32).unwrap(); - let indices: Vec = rng.sample_iter(range).take(num_indices).collect(); - - bencher - .with_inputs(|| (&pvector, indices.as_slice())) - .bench_refs(|(pv, idx)| pv.take(*idx)); -} - -#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)] -fn pvector_take_zipfian(bencher: Bencher, num_indices: usize) { - let data: Buffer = (0..VECTOR_SIZE as u32).collect(); - let pvector = PVector::new(data, Mask::AllTrue(VECTOR_SIZE)); - - let rng = StdRng::seed_from_u64(0); - let zipf = Zipf::new(VECTOR_SIZE as f64, 1.0).unwrap(); - let indices: Vec = rng - .sample_iter(&zipf) - .take(num_indices) - .map(|i: f64| (i as u32 - 1).min(VECTOR_SIZE as u32 - 1)) - .collect(); - - bencher - .with_inputs(|| (&pvector, indices.as_slice())) - .bench_refs(|(pv, idx)| pv.take(*idx)); -} - // --- DictArray canonicalization benchmarks --- #[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)] diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index 3a9dad220d1..7e8edce1ea0 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -22,7 +22,6 @@ use vortex_scalar::ListScalar; use vortex_scalar::Scalar; use vortex_scalar::StructScalar; use vortex_scalar::Utf8Scalar; -use vortex_vector::binaryview::BinaryView; use crate::Canonical; use crate::IntoArray; @@ -34,6 +33,7 @@ use crate::arrays::ListViewArray; use crate::arrays::NullArray; use crate::arrays::StructArray; use crate::arrays::VarBinViewArray; +use crate::arrays::build_views::BinaryView; use crate::arrays::constant::ConstantArray; use crate::arrays::primitive::PrimitiveArray; use crate::builders::builder_with_capacity; diff --git a/vortex-array/src/arrays/list/compute/filter.rs b/vortex-array/src/arrays/list/compute/filter.rs index ccfd23e576a..2f11edba389 100644 --- a/vortex-array/src/arrays/list/compute/filter.rs +++ b/vortex-array/src/arrays/list/compute/filter.rs @@ -138,10 +138,8 @@ fn compute_filtered_elements_and_offsets( } /// Construct an element mask from contiguous list offsets and a selection mask. -pub fn element_mask_from_offsets( - offsets: &[O], - selection: &Arc, -) -> Mask { +#[allow(dead_code)] +fn element_mask_from_offsets(offsets: &[O], selection: &Arc) -> Mask { let first_offset = offsets.first().map_or(0, |first_offset| first_offset.as_()); let last_offset = offsets.last().map_or(0, |last_offset| last_offset.as_()); let len = last_offset - first_offset; diff --git a/vortex-array/src/arrays/list/compute/mod.rs b/vortex-array/src/arrays/list/compute/mod.rs index bb896dfc522..26832bf4360 100644 --- a/vortex-array/src/arrays/list/compute/mod.rs +++ b/vortex-array/src/arrays/list/compute/mod.rs @@ -9,8 +9,6 @@ mod mask; mod min_max; mod take; -pub(super) use filter::element_mask_from_offsets; - #[cfg(test)] mod tests { use rstest::rstest; diff --git a/vortex-array/src/arrays/list/vtable/kernel/filter.rs b/vortex-array/src/arrays/list/vtable/kernel/filter.rs deleted file mode 100644 index 10cfd0b4183..00000000000 --- a/vortex-array/src/arrays/list/vtable/kernel/filter.rs +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::sync::Arc; - -use vortex_buffer::BufferMut; -use vortex_dtype::PTypeDowncastExt; -use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexResult; -use vortex_mask::Mask; -use vortex_vector::Vector; -use vortex_vector::VectorMutOps; -use vortex_vector::listview::ListViewVector; -use vortex_vector::listview::ListViewVectorMut; -use vortex_vector::primitive::PVector; -use vortex_vector::primitive::PrimitiveVector; - -use crate::Array; -use crate::Canonical; -use crate::ExecutionCtx; -use crate::IntoArray; -use crate::arrays::FilterArray; -use crate::arrays::FilterVTable; -use crate::arrays::ListArray; -use crate::arrays::ListVTable; -use crate::arrays::list::compute::element_mask_from_offsets; -use crate::kernel::ExecuteParentKernel; -use crate::matchers::Exact; -use crate::validity::Validity; -use crate::vectors::VectorIntoArray; -use crate::vtable::ValidityHelper; - -#[derive(Debug)] -pub(super) struct ListFilterKernel; - -impl ExecuteParentKernel for ListFilterKernel { - type Parent = Exact; - - fn parent(&self) -> Self::Parent { - Exact::new() - } - - // TODO(joe): Remove the vector usage? - fn execute_parent( - &self, - array: &ListArray, - parent: &FilterArray, - _child_idx: usize, - ctx: &mut ExecutionCtx, - ) -> VortexResult> { - let selection = match parent.filter_mask() { - Mask::AllTrue(_) | Mask::AllFalse(_) => return Ok(None), - Mask::Values(v) => v, - }; - - // TODO(ngates): for ultra-sparse masks, we don't need to optimize the entire offsets. - let offsets = array - .offsets() - .clone() - .execute::(ctx)? - .into_primitive(); - - let new_validity = match array.validity() { - Validity::NonNullable | Validity::AllValid => Mask::new_true(selection.true_count()), - Validity::AllInvalid => { - let mut vec = ListViewVectorMut::with_capacity( - array.elements().dtype(), - selection.true_count(), - 0, - ); - vec.append_nulls(selection.true_count()); - return Ok(Some( - vec.freeze() - .into_array(array.dtype()) - .into_array() - .execute::(ctx)?, - )); - } - Validity::Array(a) => a - .filter(parent.filter_mask().clone())? - .execute::(ctx)?, - }; - - let (new_offsets, new_sizes) = match_each_integer_ptype!(offsets.ptype(), |O| { - let offsets = (&offsets).downcast::().elements().as_slice(); - let mut new_offsets = BufferMut::::with_capacity(selection.true_count()); - let mut new_sizes = BufferMut::::with_capacity(selection.true_count()); - - let mut offset = 0; - for idx in selection.indices() { - let start = offsets[*idx]; - let end = offsets[idx + 1]; - let size = end - start; - unsafe { new_offsets.push_unchecked(offset) }; - unsafe { new_sizes.push_unchecked(size) }; - offset += size; - } - - let new_offsets = PrimitiveVector::from(PVector::::new( - new_offsets.freeze(), - Mask::new_true(selection.true_count()), - )); - let new_sizes = PrimitiveVector::from(PVector::::new( - new_sizes.freeze(), - Mask::new_true(selection.true_count()), - )); - - (new_offsets, new_sizes) - }); - - // TODO(ngates): for very dense masks, there may be no point in filtering the elements, - // and instead we should construct a view against the unfiltered elements. - let element_mask = match_each_integer_ptype!(offsets.ptype(), |O| { - element_mask_from_offsets::((&offsets).downcast::().elements(), selection) - }); - - let new_elements = array - .sliced_elements()? - .filter(element_mask)? - .execute::(ctx)?; - - Ok(Some( - unsafe { - ListViewVector::new_unchecked( - Arc::new(new_elements), - new_offsets, - new_sizes, - new_validity, - ) - } - .into_array(array.dtype()) - .into_array() - .execute::(ctx)?, - )) - } -} diff --git a/vortex-array/src/arrays/list/vtable/kernel/mod.rs b/vortex-array/src/arrays/list/vtable/kernel/mod.rs index c95e7cceebc..77f330f7ca5 100644 --- a/vortex-array/src/arrays/list/vtable/kernel/mod.rs +++ b/vortex-array/src/arrays/list/vtable/kernel/mod.rs @@ -1,11 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -mod filter; - use crate::arrays::ListVTable; -use crate::arrays::list::vtable::kernel::filter::ListFilterKernel; use crate::kernel::ParentKernelSet; -pub(super) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&ListFilterKernel)]); +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[]); diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 5d0b1b99662..dae8f13cdd5 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -58,4 +58,6 @@ pub use scalar_fn::*; pub use slice::*; pub use struct_::*; pub use varbin::*; +pub use varbinview::build_views; +pub use varbinview::compact; pub use varbinview::*; diff --git a/vortex-array/src/arrays/primitive/array/conversion.rs b/vortex-array/src/arrays/primitive/array/conversion.rs index d827e7e8591..5c8c7495b54 100644 --- a/vortex-array/src/arrays/primitive/array/conversion.rs +++ b/vortex-array/src/arrays/primitive/array/conversion.rs @@ -7,13 +7,7 @@ use vortex_buffer::BitBufferMut; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_dtype::NativePType; -use vortex_dtype::Nullability; -use vortex_error::VortexResult; -use vortex_error::vortex_ensure; use vortex_error::vortex_panic; -use vortex_vector::VectorOps; -use vortex_vector::match_each_pvector; -use vortex_vector::primitive::PrimitiveVector; use crate::ArrayRef; use crate::IntoArray; @@ -21,34 +15,6 @@ use crate::arrays::PrimitiveArray; use crate::validity::Validity; impl PrimitiveArray { - /// Attempts to create a `PrimitiveArray` from a [`PrimitiveVector`] given a [`Nullability`]. - /// - /// # Errors - /// - /// Returns an error if the nullability is [`NonNullable`](Nullability::NonNullable) and there - /// are nulls present in the vector. - pub fn try_from_vector( - primitive_vector: PrimitiveVector, - nullability: Nullability, - ) -> VortexResult { - // If we want to create a non-nullable array, then the vector should not have any nulls. - vortex_ensure!( - nullability.is_nullable() || primitive_vector.validity().all_true(), - "tried to create a non-nullable `PrimitiveArray` from a `PrimitiveVector` that had nulls" - ); - - match_each_pvector!(primitive_vector, |v| { - let (buffer, mask) = v.into_parts(); - debug_assert_eq!(buffer.len(), mask.len()); - - let validity = Validity::from_mask(mask, nullability); - - // SAFETY: Since the buffer and the mask came from a valid vector, we know that the - // length of the buffer and the validity are the same. - Ok(unsafe { Self::new_unchecked(buffer, validity) }) - }) - } - /// Create a PrimitiveArray from an iterator of `T`. /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator`. pub fn from_option_iter>>(iter: I) -> Self { @@ -138,70 +104,3 @@ impl IntoArray for BufferMut { self.freeze().into_array() } } - -#[cfg(test)] -mod tests { - use vortex_buffer::BufferMut; - use vortex_dtype::Nullability; - use vortex_dtype::PType; - use vortex_mask::MaskMut; - use vortex_vector::primitive::PVector; - - use super::*; - - #[test] - fn test_try_from_vector_with_nulls_nullable() { - // Create a vector with some null values: [Some(1), None, Some(3), Some(4), None]. - let mut values = BufferMut::::with_capacity(5); - values.extend_from_slice(&[1, 0, 3, 4, 0]); - - let mut validity = MaskMut::with_capacity(5); - validity.append_n(true, 1); - validity.append_n(false, 1); - validity.append_n(true, 1); - validity.append_n(true, 1); - validity.append_n(false, 1); - - let pvector = - PVector::try_new(values.freeze(), validity.freeze()).expect("Failed to create PVector"); - - // This should succeed since we're allowing nulls. - let result = - PrimitiveArray::try_from_vector(pvector.into(), Nullability::Nullable).unwrap(); - - assert_eq!(result.len(), 5); - assert_eq!(result.ptype(), PType::I32); - assert!(result.is_valid(0).unwrap()); - assert!(!result.is_valid(1).unwrap()); - assert!(result.is_valid(2).unwrap()); - assert!(result.is_valid(3).unwrap()); - assert!(!result.is_valid(4).unwrap()); - } - - #[test] - fn test_try_from_vector_non_nullable_with_nulls_errors() { - // Create a vector with null values: [Some(1), None, Some(3)]. - let mut values = BufferMut::::with_capacity(3); - values.extend_from_slice(&[1, 0, 3]); - - let mut validity = MaskMut::with_capacity(3); - validity.append_n(true, 1); - validity.append_n(false, 1); - validity.append_n(true, 1); - - let pvector = - PVector::try_new(values.freeze(), validity.freeze()).expect("Failed to create PVector"); - - // This should fail because we're trying to create a non-nullable array from data with - // nulls. - let result = PrimitiveArray::try_from_vector(pvector.into(), Nullability::NonNullable); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("non-nullable `PrimitiveArray`") - ); - } -} diff --git a/vortex-array/src/arrays/primitive/compute/take/avx2.rs b/vortex-array/src/arrays/primitive/compute/take/avx2.rs index c330a9226a3..cd670ebb6f0 100644 --- a/vortex-array/src/arrays/primitive/compute/take/avx2.rs +++ b/vortex-array/src/arrays/primitive/compute/take/avx2.rs @@ -6,13 +6,13 @@ //! Only enabled for x86_64 hosts and it is gated at runtime behind feature detection to //! ensure AVX2 instructions are available. -use vortex_compute::take::slice::avx2; use vortex_dtype::NativePType; use vortex_dtype::UnsignedPType; use vortex_dtype::match_each_native_ptype; use vortex_dtype::match_each_unsigned_integer_ptype; use vortex_error::VortexResult; +use super::slice_avx2; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::primitive::PrimitiveArray; @@ -59,7 +59,7 @@ where I: UnsignedPType, { // SAFETY: The caller guarantees that the `avx2` feature is enabled. - let buffer = unsafe { avx2::take_avx2(values, indices) }; + let buffer = unsafe { slice_avx2::take_avx2(values, indices) }; debug_assert!( validity diff --git a/vortex-array/src/arrays/primitive/compute/take/mod.rs b/vortex-array/src/arrays/primitive/compute/take/mod.rs index fd2ddb63cf3..30857b4b1b6 100644 --- a/vortex-array/src/arrays/primitive/compute/take/mod.rs +++ b/vortex-array/src/arrays/primitive/compute/take/mod.rs @@ -3,6 +3,11 @@ #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] mod avx2; +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +mod slice_avx2; + +#[cfg(vortex_nightly)] +mod slice_portable; #[cfg(vortex_nightly)] mod portable; diff --git a/vortex-array/src/arrays/primitive/compute/take/portable.rs b/vortex-array/src/arrays/primitive/compute/take/portable.rs index 48f31896f60..21e4d4b3d20 100644 --- a/vortex-array/src/arrays/primitive/compute/take/portable.rs +++ b/vortex-array/src/arrays/primitive/compute/take/portable.rs @@ -18,13 +18,13 @@ use num_traits::AsPrimitive; use vortex_buffer::Alignment; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; -use vortex_compute::take::slice::portable; use vortex_dtype::NativePType; use vortex_dtype::PType; use vortex_dtype::match_each_native_simd_ptype; use vortex_dtype::match_each_unsigned_integer_ptype; use vortex_error::VortexResult; +use super::slice_portable as portable; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::PrimitiveArray; diff --git a/vortex-array/src/arrays/primitive/compute/take/slice_avx2.rs b/vortex-array/src/arrays/primitive/compute/take/slice_avx2.rs new file mode 100644 index 00000000000..689e53c96dc --- /dev/null +++ b/vortex-array/src/arrays/primitive/compute/take/slice_avx2.rs @@ -0,0 +1,535 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! An AVX2 implementation of take operation using gather instructions. +//! +//! Only enabled for x86_64 hosts and it is gated at runtime behind feature detection to ensure AVX2 +//! instructions are available. + +#![cfg(any(target_arch = "x86_64", target_arch = "x86"))] + +use std::arch::x86_64::__m256i; +use std::arch::x86_64::_mm_loadu_si128; +use std::arch::x86_64::_mm_setzero_si128; +use std::arch::x86_64::_mm_shuffle_epi32; +use std::arch::x86_64::_mm_storeu_si128; +use std::arch::x86_64::_mm_unpacklo_epi64; +use std::arch::x86_64::_mm256_cmpgt_epi32; +use std::arch::x86_64::_mm256_cmpgt_epi64; +use std::arch::x86_64::_mm256_cvtepu8_epi32; +use std::arch::x86_64::_mm256_cvtepu8_epi64; +use std::arch::x86_64::_mm256_cvtepu16_epi32; +use std::arch::x86_64::_mm256_cvtepu16_epi64; +use std::arch::x86_64::_mm256_cvtepu32_epi64; +use std::arch::x86_64::_mm256_extracti128_si256; +use std::arch::x86_64::_mm256_loadu_si256; +use std::arch::x86_64::_mm256_mask_i32gather_epi32; +use std::arch::x86_64::_mm256_mask_i64gather_epi32; +use std::arch::x86_64::_mm256_mask_i64gather_epi64; +use std::arch::x86_64::_mm256_set1_epi32; +use std::arch::x86_64::_mm256_set1_epi64x; +use std::arch::x86_64::_mm256_setzero_si256; +use std::arch::x86_64::_mm256_storeu_si256; +use std::convert::identity; + +use vortex_buffer::Alignment; +use vortex_buffer::Buffer; +use vortex_buffer::BufferMut; +use vortex_dtype::NativePType; +use vortex_dtype::PType; +use vortex_dtype::UnsignedPType; + +/// Scalar fallback implementation for take operation. +fn take_scalar(buffer: &[T], indices: &[I]) -> Buffer { + let mut result = BufferMut::with_capacity(indices.len()); + let ptr = result.spare_capacity_mut().as_mut_ptr().cast::(); + + for (i, idx) in indices.iter().enumerate() { + // SAFETY: We reserved `indices.len()` capacity, so `ptr.add(i)` is valid. + unsafe { ptr.add(i).write(buffer[idx.as_()]) }; + } + + // SAFETY: We just wrote exactly `indices.len()` elements. + unsafe { result.set_len(indices.len()) }; + result.freeze() +} + +/// Takes the specified indices into a new [`Buffer`] using AVX2 SIMD. +/// +/// This returns None if the AVX2 feature is not detected at runtime, signalling to the caller +/// that it should fall back to the scalar implementation. +/// +/// If AVX2 is available, this returns a PrimitiveArray containing the result of the take operation +/// accelerated using AVX2 instructions. +/// +/// # Panics +/// +/// This function panics if any of the provided `indices` are out of bounds for `values` +/// +/// # Safety +/// +/// The caller must ensure the `avx2` feature is enabled. +#[allow(dead_code, unused_variables, reason = "TODO(connor): Implement this")] +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn take_avx2( + buffer: &[V], + indices: &[I], +) -> Buffer { + macro_rules! dispatch_avx2 { + ($indices:ty, $values:ty) => { + { let result = dispatch_avx2!($indices, $values, cast: $values); result } + }; + ($indices:ty, $values:ty, cast: $cast:ty) => {{ + let indices = unsafe { std::mem::transmute::<&[I], &[$indices]>(indices) }; + let values = unsafe { std::mem::transmute::<&[V], &[$cast]>(buffer) }; + + let result = exec_take::<$cast, $indices, AVX2Gather>(values, indices); + unsafe { result.transmute::() } + }}; + } + + match (I::PTYPE, V::PTYPE) { + // Int value types. Only 32 and 64 bit types are supported. + (PType::U8, PType::I32) => dispatch_avx2!(u8, i32), + (PType::U8, PType::U32) => dispatch_avx2!(u8, u32), + (PType::U8, PType::I64) => dispatch_avx2!(u8, i64), + (PType::U8, PType::U64) => dispatch_avx2!(u8, u64), + (PType::U16, PType::I32) => dispatch_avx2!(u16, i32), + (PType::U16, PType::U32) => dispatch_avx2!(u16, u32), + (PType::U16, PType::I64) => dispatch_avx2!(u16, i64), + (PType::U16, PType::U64) => dispatch_avx2!(u16, u64), + (PType::U32, PType::I32) => dispatch_avx2!(u32, i32), + (PType::U32, PType::U32) => dispatch_avx2!(u32, u32), + (PType::U32, PType::I64) => dispatch_avx2!(u32, i64), + (PType::U32, PType::U64) => dispatch_avx2!(u32, u64), + + // Float value types, treat them as if they were corresponding int types. + (PType::U8, PType::F32) => dispatch_avx2!(u8, f32, cast: u32), + (PType::U16, PType::F32) => dispatch_avx2!(u16, f32, cast: u32), + (PType::U32, PType::F32) => dispatch_avx2!(u32, f32, cast: u32), + (PType::U64, PType::F32) => dispatch_avx2!(u64, f32, cast: u32), + + (PType::U8, PType::F64) => dispatch_avx2!(u8, f64, cast: u64), + (PType::U16, PType::F64) => dispatch_avx2!(u16, f64, cast: u64), + (PType::U32, PType::F64) => dispatch_avx2!(u32, f64, cast: u64), + (PType::U64, PType::F64) => dispatch_avx2!(u64, f64, cast: u64), + + // Scalar fallback for unsupported value types. + _ => { + tracing::trace!( + "take AVX2 kernel missing for indices {} values {}, falling back to scalar", + I::PTYPE, + V::PTYPE + ); + + take_scalar(buffer, indices) + } + } +} + +/// The main gather function that is used by the inner loop kernel for AVX2 gather. +pub(crate) trait GatherFn { + /// The number of data elements that are written to the `dst` on each loop iteration. + const WIDTH: usize; + /// The number of indices read from `indices` on each loop iteration. + /// Depending on the available instructions and bit-width we may stride by a larger amount + /// than we actually end up reading from `src` (governed by the `WIDTH` parameter). + const STRIDE: usize = Self::WIDTH; + + /// Gather values from `src` into the `dst` using the `indices`, optionally using + /// SIMD instructions. + /// + /// # Safety + /// + /// This function can read up to `STRIDE` elements through `indices`, and read/write up to + /// `WIDTH` elements through `src` and `dst` respectively. + unsafe fn gather(indices: *const Idx, max_idx: Idx, src: *const Values, dst: *mut Values); +} + +/// AVX2 version of GatherFn defined for 32- and 64-bit value types. +enum AVX2Gather {} + +macro_rules! impl_gather { + ($idx:ty, $({$value:ty => load: $load:ident, extend: $extend:ident, splat: $splat:ident, zero_vec: $zero_vec:ident, mask_indices: $mask_indices:ident, mask_cvt: |$mask_var:ident| $mask_cvt:block, gather: $masked_gather:ident, store: $store:ident, WIDTH = $WIDTH:literal, STRIDE = $STRIDE:literal }),+) => { + $( + impl_gather!(single; $idx, $value, load: $load, extend: $extend, splat: $splat, zero_vec: $zero_vec, mask_indices: $mask_indices, mask_cvt: |$mask_var| $mask_cvt, gather: $masked_gather, store: $store, WIDTH = $WIDTH, STRIDE = $STRIDE); + )* + }; + (single; $idx:ty, $value:ty, load: $load:ident, extend: $extend:ident, splat: $splat:ident, zero_vec: $zero_vec:ident, mask_indices: $mask_indices:ident, mask_cvt: |$mask_var:ident| $mask_cvt:block, gather: $masked_gather:ident, store: $store:ident, WIDTH = $WIDTH:literal, STRIDE = $STRIDE:literal) => { + impl GatherFn<$idx, $value> for AVX2Gather { + const WIDTH: usize = $WIDTH; + const STRIDE: usize = $STRIDE; + + #[allow(unused_unsafe, clippy::cast_possible_truncation)] + #[inline(always)] + unsafe fn gather(indices: *const $idx, max_idx: $idx, src: *const $value, dst: *mut $value) { + const { + assert!($WIDTH <= $STRIDE, "dst cannot advance by more than the stride"); + } + + const SCALE: i32 = std::mem::size_of::<$value>() as i32; + + let indices_vec = unsafe { $load(indices.cast()) }; + // Extend indices to fill vector register + let indices_vec = unsafe { $extend(indices_vec) }; + + // create a vec of the max idx + let max_idx_vec = unsafe { $splat(max_idx as _) }; + // create a mask for valid indices (where the max_idx > provided index). + let invalid_mask = unsafe { $mask_indices(max_idx_vec, indices_vec) }; + let invalid_mask = { + let $mask_var = invalid_mask; + $mask_cvt + }; + let zero_vec = unsafe { $zero_vec() }; + + // Gather the values into new vector register, for masked positions + // it substitutes zero instead of accessing the src. + let values_vec = unsafe { $masked_gather::(zero_vec, src.cast(), indices_vec, invalid_mask) }; + + // Write the vec out to dst. + unsafe { $store(dst.cast(), values_vec) }; + } + } + }; +} + +// kernels for u8 indices +impl_gather!(u8, + // 32-bit values, loaded 8 at a time + { u32 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu8_epi32, + splat: _mm256_set1_epi32, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi32, + mask_cvt: |x| { x }, + gather: _mm256_mask_i32gather_epi32, + store: _mm256_storeu_si256, + WIDTH = 8, STRIDE = 16 + }, + { i32 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu8_epi32, + splat: _mm256_set1_epi32, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi32, + mask_cvt: |x| { x }, + gather: _mm256_mask_i32gather_epi32, + store: _mm256_storeu_si256, + WIDTH = 8, STRIDE = 16 + }, + + // 64-bit values, loaded 4 at a time + { u64 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu8_epi64, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 16 + }, + { i64 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu8_epi64, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 16 + } +); + +// kernels for u16 indices +impl_gather!(u16, + // 32-bit values. 8x indices loaded at a time and 8x values written at a time + { u32 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu16_epi32, + splat: _mm256_set1_epi32, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi32, + mask_cvt: |x| { x }, + gather: _mm256_mask_i32gather_epi32, + store: _mm256_storeu_si256, + WIDTH = 8, STRIDE = 8 + }, + { i32 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu16_epi32, + splat: _mm256_set1_epi32, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi32, + mask_cvt: |x| { x }, + gather: _mm256_mask_i32gather_epi32, + store: _mm256_storeu_si256, + WIDTH = 8, STRIDE = 8 + }, + + // 64-bit values. 8x indices loaded at a time and 4x values loaded at a time. + { u64 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu16_epi64, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 8 + }, + { i64 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu16_epi64, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 8 + } +); + +// kernels for u32 indices +impl_gather!(u32, + // 32-bit values. 8x indices loaded at a time and 8x values written + { u32 => + load: _mm256_loadu_si256, + extend: identity, + splat: _mm256_set1_epi32, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi32, + mask_cvt: |x| { x }, + gather: _mm256_mask_i32gather_epi32, + store: _mm256_storeu_si256, + WIDTH = 8, STRIDE = 8 + }, + { i32 => + load: _mm256_loadu_si256, + extend: identity, + splat: _mm256_set1_epi32, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi32, + mask_cvt: |x| { x }, + gather: _mm256_mask_i32gather_epi32, + store: _mm256_storeu_si256, + WIDTH = 8, STRIDE = 8 + }, + + // 64-bit values + { u64 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu32_epi64, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 4 + }, + { i64 => + load: _mm_loadu_si128, + extend: _mm256_cvtepu32_epi64, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 4 + } +); + +// kernels for u64 indices +impl_gather!(u64, + { u32 => + load: _mm256_loadu_si256, + extend: identity, + splat: _mm256_set1_epi64x, + zero_vec: _mm_setzero_si128, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |m| { + unsafe { + let lo_bits = _mm256_extracti128_si256::<0>(m); // lower half + let hi_bits = _mm256_extracti128_si256::<1>(m); // upper half + let lo_packed = _mm_shuffle_epi32::<0b01_01_01_01>(lo_bits); + let hi_packed = _mm_shuffle_epi32::<0b01_01_01_01>(hi_bits); + _mm_unpacklo_epi64(lo_packed, hi_packed) + } + }, + gather: _mm256_mask_i64gather_epi32, + store: _mm_storeu_si128, + WIDTH = 4, STRIDE = 4 + }, + { i32 => + load: _mm256_loadu_si256, + extend: identity, + splat: _mm256_set1_epi64x, + zero_vec: _mm_setzero_si128, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |m| { + unsafe { + let lo_bits = _mm256_extracti128_si256::<0>(m); // lower half + let hi_bits = _mm256_extracti128_si256::<1>(m); // upper half + let lo_packed = _mm_shuffle_epi32::<0b01_01_01_01>(lo_bits); + let hi_packed = _mm_shuffle_epi32::<0b01_01_01_01>(hi_bits); + _mm_unpacklo_epi64(lo_packed, hi_packed) + } + }, + gather: _mm256_mask_i64gather_epi32, + store: _mm_storeu_si128, + WIDTH = 4, STRIDE = 4 + }, + + // 64-bit values + { u64 => + load: _mm256_loadu_si256, + extend: identity, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 4 + }, + { i64 => + load: _mm256_loadu_si256, + extend: identity, + splat: _mm256_set1_epi64x, + zero_vec: _mm256_setzero_si256, + mask_indices: _mm256_cmpgt_epi64, + mask_cvt: |x| { x }, + gather: _mm256_mask_i64gather_epi64, + store: _mm256_storeu_si256, + WIDTH = 4, STRIDE = 4 + } +); + +/// AVX2 core inner loop for certain `Idx` and `Value` type. +#[inline(always)] +fn exec_take(values: &[Value], indices: &[Idx]) -> Buffer +where + Value: Copy, + Idx: UnsignedPType, + Gather: GatherFn, +{ + let indices_len = indices.len(); + let max_index = Idx::from(values.len()).unwrap_or_else(|| Idx::max_value()); + let mut buffer = + BufferMut::::with_capacity_aligned(indices_len, Alignment::of::<__m256i>()); + let buf_uninit = buffer.spare_capacity_mut(); + + let mut offset = 0; + // Loop terminates STRIDE elements before end of the indices array because the GatherFn + // might read up to STRIDE src elements at a time, even though it only advances WIDTH elements + // in the dst. + while offset + Gather::STRIDE < indices_len { + // SAFETY: gather_simd preconditions satisfied: + // 1. `(indices + offset)..(indices + offset + STRIDE)` is in-bounds for indices allocation + // 2. `buffer` has same len as indices so `buffer + offset + STRIDE` is always valid. + unsafe { + Gather::gather( + indices.as_ptr().add(offset), + max_index, + values.as_ptr(), + buf_uninit.as_mut_ptr().add(offset).cast(), + ) + }; + offset += Gather::WIDTH; + } + + // Remainder + while offset < indices_len { + buf_uninit[offset].write(values[indices[offset].as_()]); + offset += 1; + } + + assert_eq!(offset, indices_len); + + // SAFETY: all elements have been initialized. + unsafe { buffer.set_len(indices_len) }; + + // Reset the buffer alignment to the Value type + // NOTE: if we don't do this, we pass back a Buffer which is over-aligned to the + // SIMD register width. The caller expects that this memory should be aligned to the value + // type so that we can slice it at value boundaries. + buffer = buffer.aligned(Alignment::of::()); + + buffer.freeze() +} + +#[cfg(test)] +#[cfg_attr(miri, ignore)] +#[cfg(target_arch = "x86_64")] +mod tests { + use super::*; + + macro_rules! test_cases { + (index_type => $IDX:ty, value_types => $($VAL:ty),+) => { + paste::paste! { + $( + // test "happy path" take, valid indices on valid array + #[test] + #[allow(clippy::cast_possible_truncation)] + fn []() { + let values: Vec<$VAL> = (1..=127).map(|x| x as $VAL).collect(); + let indices: Vec<$IDX> = (0..127).collect(); + + let result = unsafe { take_avx2(&values, &indices) }; + assert_eq!(&values, result.as_slice()); + } + + // test take on empty array + #[test] + #[should_panic] + #[allow(clippy::cast_possible_truncation)] + fn []() { + let values: Vec<$VAL> = vec![]; + let indices: Vec<$IDX> = (0..127).collect(); + let result = unsafe { take_avx2(&values, &indices) }; + assert!(result.is_empty()); + } + + // test all invalid take indices mapping to zeros + #[test] + #[should_panic] + #[allow(clippy::cast_possible_truncation)] + fn []() { + let values: Vec<$VAL> = (1..=127).map(|x| x as $VAL).collect(); + // all out of bounds indices + let indices: Vec<$IDX> = (127..=254).collect(); + + let result = unsafe { take_avx2(&values, &indices) }; + assert_eq!(&[0 as $VAL; 127], result.as_slice()); + } + )+ + } + }; + } + + test_cases!( + index_type => u8, + value_types => u32, i32, u64, i64, f32, f64 + ); + test_cases!( + index_type => u16, + value_types => u32, i32, u64, i64, f32, f64 + ); + test_cases!( + index_type => u32, + value_types => u32, i32, u64, i64, f32, f64 + ); + test_cases!( + index_type => u64, + value_types => u32, i32, u64, i64, f32, f64 + ); +} diff --git a/vortex-array/src/arrays/primitive/compute/take/slice_portable.rs b/vortex-array/src/arrays/primitive/compute/take/slice_portable.rs new file mode 100644 index 00000000000..71ca180ebcf --- /dev/null +++ b/vortex-array/src/arrays/primitive/compute/take/slice_portable.rs @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Take function implementations on slices using `portable_simd`. + +#![cfg(vortex_nightly)] + +use std::mem::MaybeUninit; +use std::mem::size_of; +use std::mem::transmute; +use std::simd; +use std::simd::num::SimdUint; + +use multiversion::multiversion; +use vortex_buffer::Alignment; +use vortex_buffer::Buffer; +use vortex_buffer::BufferMut; +use vortex_dtype::NativePType; +use vortex_dtype::PType; +use vortex_dtype::UnsignedPType; +use vortex_dtype::match_each_native_simd_ptype; +use vortex_dtype::match_each_unsigned_integer_ptype; + +/// SIMD types larger than the SIMD register size are beneficial for +/// performance as this leads to better instruction level parallelism. +pub const SIMD_WIDTH: usize = 64; + +/// Takes the specified indices into a new [`Buffer`] using portable SIMD. +/// +/// This function handles the type matching required to satisfy `SimdElement` bounds. +/// For `f16` values, it reinterprets them as `u16` since `f16` doesn't implement `SimdElement`. +#[inline] +pub fn take_portable(buffer: &[T], indices: &[I]) -> Buffer { + if T::PTYPE == PType::F16 { + assert_eq!(size_of::(), size_of::()); + + // Since Rust does not actually support 16-bit floats, we first reinterpret the data as + // `u16` integers. + // SAFETY: We know that f16 has the same bit pattern as u16, so this transmute is fine to + // make. + let u16_slice: &[u16] = + unsafe { std::slice::from_raw_parts(buffer.as_ptr().cast(), buffer.len()) }; + return unsafe { take_with_indices(u16_slice, indices).transmute::() }; + } + + match_each_native_simd_ptype!(T::PTYPE, |TC| { + assert_eq!(size_of::(), size_of::()); + + // SAFETY: This is essentially a no-op that tricks the compiler into adding the + // `simd::SimdElement` bound we need to call `take_with_indices`. + let buffer: &[TC] = + unsafe { std::slice::from_raw_parts(buffer.as_ptr().cast::(), buffer.len()) }; + unsafe { take_with_indices(buffer, indices).transmute::() } + }) +} + +/// Helper that matches on index type and calls `take_portable_simd`. +/// +/// We separate this code out from above to add the [`simd::SimdElement`] constraint. +#[inline] +fn take_with_indices( + buffer: &[T], + indices: &[I], +) -> Buffer { + match_each_unsigned_integer_ptype!(I::PTYPE, |IC| { + let indices: &[IC] = + unsafe { std::slice::from_raw_parts(indices.as_ptr().cast::(), indices.len()) }; + take_portable_simd::(buffer, indices) + }) +} + +/// Takes elements from an array using SIMD indexing. +/// +/// Performs a gather operation that takes values at specified indices and returns them in a new +/// buffer. Uses SIMD instructions to process `LANE_COUNT` indices in parallel. +/// +/// Returns a `Buffer` where each element corresponds to `values[indices[i]]`. +#[multiversion(targets("x86_64+avx2", "x86_64+avx", "aarch64+neon"))] +pub fn take_portable_simd(values: &[T], indices: &[I]) -> Buffer +where + T: NativePType + simd::SimdElement, + I: UnsignedPType + simd::SimdElement, + simd::LaneCount: simd::SupportedLaneCount, + simd::Simd: SimdUint = simd::Simd>, +{ + let indices_len = indices.len(); + + let mut buffer = BufferMut::::with_capacity_aligned( + indices_len, + Alignment::of::>(), + ); + + let buf_slice = buffer.spare_capacity_mut(); + + for chunk_idx in 0..(indices_len / LANE_COUNT) { + let offset = chunk_idx * LANE_COUNT; + let mask = simd::Mask::from_bitmask(u64::MAX); + let codes_chunk = simd::Simd::::from_slice(&indices[offset..]); + + let selection = simd::Simd::gather_select( + values, + mask, + codes_chunk.cast::(), + simd::Simd::::default(), + ); + + unsafe { + selection.store_select_unchecked( + transmute::<&mut [MaybeUninit], &mut [T]>(&mut buf_slice[offset..][..64]), + mask.cast(), + ); + } + } + + for idx in ((indices_len / LANE_COUNT) * LANE_COUNT)..indices_len { + unsafe { + buf_slice + .get_unchecked_mut(idx) + .write(values[indices[idx].as_()]); + } + } + + unsafe { + buffer.set_len(indices_len); + } + + // NOTE: if we don't do this, we pass back a Buffer which is over-aligned to the + // SIMD register width. The caller expects that this memory should be aligned to the value + // type so that we can slice it at value boundaries. + buffer = buffer.aligned(Alignment::of::()); + + buffer.freeze() +} + +#[cfg(test)] +mod tests { + use super::take_portable_simd; + + #[test] + fn test_take_out_of_bounds() { + let indices = vec![2_000_000u32; 64]; + let values = vec![1i32]; + + let result = take_portable_simd::(&values, &indices); + assert_eq!(result.as_slice(), [0i32; 64]); + } + + /// Tests SIMD gather with a mix of sequential, strided, and repeated indices. This exercises + /// irregular access patterns that stress the gather operation. + #[test] + fn test_take_mixed_access_patterns() { + // Create a values array with distinct elements. + let values: Vec = (0..256).map(|i| i * 100).collect(); + + // Build indices with mixed patterns: + // - Sequential access (0, 1, 2, ...) + // - Strided access (0, 4, 8, ...) + // - Repeated indices (same index multiple times) + // - Reverse order + let mut indices: Vec = Vec::with_capacity(200); + + // Sequential: indices 0..64. + indices.extend(0u32..64); + // Strided by 4: 0, 4, 8, ..., 252. + indices.extend((0u32..64).map(|i| i * 4)); + // Repeated: index 42 repeated 32 times. + indices.extend(std::iter::repeat(42u32).take(32)); + // Reverse: 255, 254, ..., 216. + indices.extend((216u32..256).rev()); + + let result = take_portable_simd::(&values, &indices); + let result_slice = result.as_slice(); + + // Verify sequential portion. + for i in 0..64 { + assert_eq!(result_slice[i], (i as i64) * 100, "sequential at index {i}"); + } + + // Verify strided portion. + for i in 0..64 { + assert_eq!( + result_slice[64 + i], + (i as i64) * 4 * 100, + "strided at index {i}" + ); + } + + // Verify repeated portion. + for i in 0..32 { + assert_eq!(result_slice[128 + i], 42 * 100, "repeated at index {i}"); + } + + // Verify reverse portion. + for i in 0..40 { + assert_eq!( + result_slice[160 + i], + (255 - i as i64) * 100, + "reverse at index {i}" + ); + } + } + + /// Tests that the scalar remainder path works correctly when the number of indices is not + /// evenly divisible by the SIMD lane count. + #[test] + fn test_take_with_remainder() { + let values: Vec = (0..1000).collect(); + + // Use 64 + 37 = 101 indices to test both the SIMD loop (64 elements) and the scalar + // remainder (37 elements). + let indices: Vec = (0u8..101).collect(); + + let result = take_portable_simd::(&values, &indices); + let result_slice = result.as_slice(); + + assert_eq!(result_slice.len(), 101); + + // Verify all elements. + for i in 0..101 { + assert_eq!(result_slice[i], i as u16, "mismatch at index {i}"); + } + + // Also test with exactly 1 remainder element. + let indices_one_remainder: Vec = (0u8..65).collect(); + let result_one = take_portable_simd::(&values, &indices_one_remainder); + assert_eq!(result_one.as_slice().len(), 65); + assert_eq!(result_one.as_slice()[64], 64); + } + + /// Tests gather with large 64-bit values and various index types to ensure no truncation + /// occurs during the operation. + #[test] + fn test_take_large_values_no_truncation() { + // Create values near the edges of i64 range. + let values: Vec = vec![ + i64::MIN, + i64::MIN + 1, + -1_000_000_000_000i64, + -1, + 0, + 1, + 1_000_000_000_000i64, + i64::MAX - 1, + i64::MAX, + ]; + + // Indices that access each value multiple times in different orders. + let indices: Vec = vec![ + 0, 8, 1, 7, 2, 6, 3, 5, 4, // Forward-backward interleaved. + 8, 8, 8, 0, 0, 0, // Repeated extremes. + 4, 4, 4, 4, 4, 4, 4, 4, // Repeated zero. + 0, 1, 2, 3, 4, 5, 6, 7, 8, // Sequential. + 8, 7, 6, 5, 4, 3, 2, 1, 0, // Reverse. + // Pad to 64 to ensure we hit the SIMD path. + 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, + ]; + + let result = take_portable_simd::(&values, &indices); + let result_slice = result.as_slice(); + + // Verify each result matches the expected value. + for (i, &idx) in indices.iter().enumerate() { + assert_eq!( + result_slice[i], values[idx as usize], + "mismatch at position {i} for index {idx}" + ); + } + } +} diff --git a/vortex-array/src/arrays/validation_tests.rs b/vortex-array/src/arrays/validation_tests.rs index 5d539c8e6d3..277201c5ef0 100644 --- a/vortex-array/src/arrays/validation_tests.rs +++ b/vortex-array/src/arrays/validation_tests.rs @@ -17,9 +17,9 @@ mod tests { use vortex_dtype::Nullability; use vortex_dtype::PType; use vortex_error::VortexError; - use vortex_vector::binaryview::BinaryView; use crate::IntoArray; + use crate::arrays::build_views::BinaryView; use crate::arrays::*; use crate::validity::Validity; diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index 63c8636a167..b4fb8af1b1f 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -13,8 +13,8 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; -use vortex_vector::binaryview::BinaryView; +use crate::arrays::build_views::BinaryView; use crate::builders::ArrayBuilder; use crate::builders::VarBinViewBuilder; use crate::stats::ArrayStats; diff --git a/vortex-array/src/arrays/varbinview/build_views.rs b/vortex-array/src/arrays/varbinview/build_views.rs index 7cc81ede968..eefca1f3baa 100644 --- a/vortex-array/src/arrays/varbinview/build_views.rs +++ b/vortex-array/src/arrays/varbinview/build_views.rs @@ -1,17 +1,288 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; +use std::ops::Range; + use itertools::Itertools; use num_traits::AsPrimitive; +use static_assertions::assert_eq_align; +use static_assertions::assert_eq_size; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_buffer::ByteBuffer; use vortex_buffer::ByteBufferMut; use vortex_dtype::NativePType; -// These will be moved to array soon -pub use vortex_vector::binaryview::BinaryView; -pub use vortex_vector::binaryview::Inlined; -pub use vortex_vector::binaryview::Ref; +use vortex_error::VortexExpect; + +/// A view over a variable-length binary value. +/// +/// Either an inlined representation (for values <= 12 bytes) or a reference +/// to an external buffer (for values > 12 bytes). +#[derive(Clone, Copy)] +#[repr(C, align(16))] +pub union BinaryView { + /// Numeric representation. This is logically `u128`, but we split it into the high and low + /// bits to preserve the alignment. + pub(crate) le_bytes: [u8; 16], + + /// Inlined representation: strings <= 12 bytes + pub(crate) inlined: Inlined, + + /// Reference type: strings > 12 bytes. + pub(crate) _ref: Ref, +} + +assert_eq_align!(BinaryView, u128); +assert_eq_size!(BinaryView, [u8; 16]); +assert_eq_size!(Inlined, [u8; 16]); +assert_eq_size!(Ref, [u8; 16]); + +/// Variant of a [`BinaryView`] that holds an inlined value. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C, align(8))] +pub struct Inlined { + /// The size of the full value. + pub size: u32, + /// The full inlined value. + pub data: [u8; BinaryView::MAX_INLINED_SIZE], +} + +impl Inlined { + /// Creates a new inlined representation from the provided value of constant size. + #[inline] + fn new(value: &[u8]) -> Self { + debug_assert_eq!(value.len(), N); + let mut inlined = Self { + size: N.try_into().vortex_expect("inlined size must fit in u32"), + data: [0u8; BinaryView::MAX_INLINED_SIZE], + }; + inlined.data[..N].copy_from_slice(&value[..N]); + inlined + } + + /// Returns the full inlined value. + #[inline] + pub fn value(&self) -> &[u8] { + &self.data[0..(self.size as usize)] + } +} + +/// Variant of a [`BinaryView`] that holds a reference to an external buffer. +#[derive(Clone, Copy, Debug)] +#[repr(C, align(8))] +pub struct Ref { + /// The size of the full value. + pub size: u32, + /// The prefix bytes of the value (first 4 bytes). + pub prefix: [u8; 4], + /// The index of the buffer where the full value is stored. + pub buffer_index: u32, + /// The offset within the buffer where the full value starts. + pub offset: u32, +} + +impl Ref { + /// Returns the range within the buffer where the full value is stored. + #[inline] + pub fn as_range(&self) -> Range { + self.offset as usize..(self.offset + self.size) as usize + } + + /// Replaces the buffer index and offset of the reference, returning a new `Ref`. + #[inline] + pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref { + Self { + size: self.size, + prefix: self.prefix, + buffer_index, + offset, + } + } +} + +impl PartialEq for BinaryView { + fn eq(&self, other: &Self) -> bool { + let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }; + let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) }; + a == b + } +} +impl Eq for BinaryView {} + +impl Hash for BinaryView { + fn hash(&self, state: &mut H) { + unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state); + } +} + +impl Default for BinaryView { + fn default() -> Self { + Self::make_view(&[], 0, 0) + } +} + +impl BinaryView { + /// Maximum size of an inlined binary value. + pub const MAX_INLINED_SIZE: usize = 12; + + /// Create a view from a value, block and offset + /// + /// Depending on the length of the provided value either a new inlined + /// or a reference view will be constructed. + /// + /// Adapted from arrow-rs + /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores + #[inline(never)] + pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self { + match value.len() { + 0 => Self { + inlined: Inlined::new::<0>(value), + }, + 1 => Self { + inlined: Inlined::new::<1>(value), + }, + 2 => Self { + inlined: Inlined::new::<2>(value), + }, + 3 => Self { + inlined: Inlined::new::<3>(value), + }, + 4 => Self { + inlined: Inlined::new::<4>(value), + }, + 5 => Self { + inlined: Inlined::new::<5>(value), + }, + 6 => Self { + inlined: Inlined::new::<6>(value), + }, + 7 => Self { + inlined: Inlined::new::<7>(value), + }, + 8 => Self { + inlined: Inlined::new::<8>(value), + }, + 9 => Self { + inlined: Inlined::new::<9>(value), + }, + 10 => Self { + inlined: Inlined::new::<10>(value), + }, + 11 => Self { + inlined: Inlined::new::<11>(value), + }, + 12 => Self { + inlined: Inlined::new::<12>(value), + }, + _ => Self { + _ref: Ref { + size: u32::try_from(value.len()).vortex_expect("value length must fit in u32"), + prefix: value[0..4] + .try_into() + .vortex_expect("prefix must be exactly 4 bytes"), + buffer_index: block, + offset, + }, + }, + } + } + + /// Create a new empty view + #[inline] + pub fn empty_view() -> Self { + Self { le_bytes: [0; 16] } + } + + /// Create a new inlined binary view + /// + /// # Panics + /// + /// Panics if the provided string is too long to inline. + #[inline] + pub fn new_inlined(value: &[u8]) -> Self { + assert!( + value.len() <= Self::MAX_INLINED_SIZE, + "expected inlined value to be <= 12 bytes, was {}", + value.len() + ); + + Self::make_view(value, 0, 0) + } + + /// Returns the length of the binary value. + #[inline] + pub fn len(&self) -> u32 { + unsafe { self.inlined.size } + } + + /// Returns true if the binary value is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns true if the binary value is inlined. + #[inline] + #[expect( + clippy::cast_possible_truncation, + reason = "MAX_INLINED_SIZE is a small constant" + )] + pub fn is_inlined(&self) -> bool { + self.len() <= (Self::MAX_INLINED_SIZE as u32) + } + + /// Returns the inlined representation of the binary value. + pub fn as_inlined(&self) -> &Inlined { + debug_assert!(self.is_inlined()); + unsafe { &self.inlined } + } + + /// Returns the reference representation of the binary value. + pub fn as_view(&self) -> &Ref { + debug_assert!(!self.is_inlined()); + unsafe { &self._ref } + } + + /// Returns a mutable reference to the reference representation of the binary value. + pub fn as_view_mut(&mut self) -> &mut Ref { + unsafe { &mut self._ref } + } + + /// Returns the binary view as u128 representation. + pub fn as_u128(&self) -> u128 { + // SAFETY: binary view always safe to read as u128 LE bytes + unsafe { u128::from_le_bytes(self.le_bytes) } + } +} + +impl From for BinaryView { + fn from(value: u128) -> Self { + BinaryView { + le_bytes: value.to_le_bytes(), + } + } +} + +impl From for BinaryView { + fn from(value: Ref) -> Self { + BinaryView { _ref: value } + } +} + +impl fmt::Debug for BinaryView { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct("BinaryView"); + if self.is_inlined() { + s.field("inline", &self.as_inlined()); + } else { + s.field("ref", &self.as_view()); + } + s.finish() + } +} /// Convert an offsets buffer to a buffer of element lengths. #[inline] @@ -70,8 +341,8 @@ pub fn build_views>( mod tests { use vortex_buffer::ByteBuffer; use vortex_buffer::ByteBufferMut; - use vortex_vector::binaryview::BinaryView; + use super::BinaryView; use crate::arrays::build_views::build_views; #[test] diff --git a/vortex-array/src/arrays/varbinview/compact.rs b/vortex-array/src/arrays/varbinview/compact.rs index 8f3808a2659..128482d71d9 100644 --- a/vortex-array/src/arrays/varbinview/compact.rs +++ b/vortex-array/src/arrays/varbinview/compact.rs @@ -9,9 +9,9 @@ use std::ops::Range; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_mask::Mask; -use vortex_vector::binaryview::Ref; use crate::arrays::VarBinViewArray; +use crate::arrays::build_views::Ref; use crate::builders::ArrayBuilder; use crate::builders::VarBinViewBuilder; diff --git a/vortex-array/src/arrays/varbinview/compute/is_constant.rs b/vortex-array/src/arrays/varbinview/compute/is_constant.rs index daad2c5fa2a..431ad36724d 100644 --- a/vortex-array/src/arrays/varbinview/compute/is_constant.rs +++ b/vortex-array/src/arrays/varbinview/compute/is_constant.rs @@ -3,10 +3,10 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_vector::binaryview::Ref; use crate::arrays::VarBinViewArray; use crate::arrays::VarBinViewVTable; +use crate::arrays::build_views::Ref; use crate::compute::IsConstantKernel; use crate::compute::IsConstantKernelAdapter; use crate::compute::IsConstantOpts; diff --git a/vortex-array/src/arrays/varbinview/compute/take.rs b/vortex-array/src/arrays/varbinview/compute/take.rs index 90475e6698b..642c0f34e17 100644 --- a/vortex-array/src/arrays/varbinview/compute/take.rs +++ b/vortex-array/src/arrays/varbinview/compute/take.rs @@ -10,7 +10,6 @@ use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use vortex_mask::AllOr; use vortex_mask::Mask; -use vortex_vector::binaryview::BinaryView; use crate::Array; use crate::ArrayRef; @@ -18,6 +17,7 @@ use crate::IntoArray; use crate::ToCanonical; use crate::arrays::VarBinViewArray; use crate::arrays::VarBinViewVTable; +use crate::arrays::build_views::BinaryView; use crate::compute::TakeKernel; use crate::compute::TakeKernelAdapter; use crate::register_kernel; diff --git a/vortex-array/src/arrays/varbinview/compute/zip.rs b/vortex-array/src/arrays/varbinview/compute/zip.rs index e5674632715..0fda76fae8c 100644 --- a/vortex-array/src/arrays/varbinview/compute/zip.rs +++ b/vortex-array/src/arrays/varbinview/compute/zip.rs @@ -8,12 +8,12 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_mask::AllOr; use vortex_mask::Mask; -use vortex_vector::binaryview::BinaryView; use crate::Array; use crate::ArrayRef; use crate::arrays::VarBinViewArray; use crate::arrays::VarBinViewVTable; +use crate::arrays::build_views::BinaryView; use crate::builders::DeduplicatedBuffers; use crate::builders::LazyBitBufferBuilder; use crate::compute::ZipKernel; diff --git a/vortex-array/src/arrays/varbinview/mod.rs b/vortex-array/src/arrays/varbinview/mod.rs index c0a488f0226..a79538cc469 100644 --- a/vortex-array/src/arrays/varbinview/mod.rs +++ b/vortex-array/src/arrays/varbinview/mod.rs @@ -6,7 +6,7 @@ pub use array::VarBinViewArray; pub use array::VarBinViewArrayParts; mod accessor; -pub(crate) mod compact; +pub mod compact; mod compute; @@ -14,11 +14,9 @@ mod vtable; pub use vtable::VarBinViewVTable; pub mod build_views; - -// Re-export BinaryView types from vortex-vector -pub use vortex_vector::binaryview::BinaryView; -pub use vortex_vector::binaryview::Inlined; -pub use vortex_vector::binaryview::Ref; +pub use build_views::BinaryView; +pub use build_views::Inlined; +pub use build_views::Ref; #[cfg(test)] mod tests; diff --git a/vortex-array/src/arrays/varbinview/tests.rs b/vortex-array/src/arrays/varbinview/tests.rs index 63c7bbb0b66..a0e7d99db81 100644 --- a/vortex-array/src/arrays/varbinview/tests.rs +++ b/vortex-array/src/arrays/varbinview/tests.rs @@ -1,10 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_vector::binaryview::BinaryView; - use crate::ToCanonical; use crate::arrays::VarBinViewArray; +use crate::arrays::build_views::BinaryView; use crate::assert_arrays_eq; #[test] diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index e987fbeeed5..08e5ff5bedb 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -11,13 +11,13 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_err; -use vortex_vector::binaryview::BinaryView; use crate::ArrayRef; use crate::Canonical; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::IntoArray; +use crate::arrays::build_views::BinaryView; use crate::arrays::varbinview::VarBinViewArray; use crate::buffer::BufferHandle; use crate::serde::ArrayChildren; diff --git a/vortex-array/src/builders/dict/bytes.rs b/vortex-array/src/builders/dict/bytes.rs index a66f8ed5efa..50488f56c55 100644 --- a/vortex-array/src/builders/dict/bytes.rs +++ b/vortex-array/src/builders/dict/bytes.rs @@ -17,7 +17,6 @@ use vortex_utils::aliases::hash_map::DefaultHashBuilder; use vortex_utils::aliases::hash_map::HashTable; use vortex_utils::aliases::hash_map::HashTableEntry; use vortex_utils::aliases::hash_map::RandomState; -use vortex_vector::binaryview::BinaryView; use super::DictConstraints; use super::DictEncoder; @@ -29,6 +28,7 @@ use crate::arrays::PrimitiveArray; use crate::arrays::VarBinVTable; use crate::arrays::VarBinViewArray; use crate::arrays::VarBinViewVTable; +use crate::arrays::build_views::BinaryView; use crate::canonical::ToCanonical; use crate::validity::Validity; diff --git a/vortex-array/src/builders/varbinview.rs b/vortex-array/src/builders/varbinview.rs index e89fc824c19..267885ce8f0 100644 --- a/vortex-array/src/builders/varbinview.rs +++ b/vortex-array/src/builders/varbinview.rs @@ -20,12 +20,12 @@ use vortex_scalar::Scalar; use vortex_scalar::Utf8Scalar; use vortex_utils::aliases::hash_map::Entry; use vortex_utils::aliases::hash_map::HashMap; -use vortex_vector::binaryview::BinaryView; use crate::Array; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::VarBinViewArray; +use crate::arrays::build_views::BinaryView; use crate::arrays::compact::BufferUtilization; use crate::builders::ArrayBuilder; use crate::builders::LazyBitBufferBuilder; diff --git a/vortex-array/src/canonical_to_vector.rs b/vortex-array/src/canonical_to_vector.rs deleted file mode 100644 index cb8068b3ba5..00000000000 --- a/vortex-array/src/canonical_to_vector.rs +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Conversion from Canonical arrays to Vectors. - -use std::sync::Arc; - -use vortex_buffer::Buffer; -use vortex_dtype::BigCast; -use vortex_dtype::DType; -use vortex_dtype::PrecisionScale; -use vortex_dtype::match_each_decimal_value_type; -use vortex_dtype::match_each_native_ptype; -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_mask::Mask; -use vortex_vector::Vector; -use vortex_vector::binaryview::BinaryVector; -use vortex_vector::binaryview::StringVector; -use vortex_vector::bool::BoolVector; -use vortex_vector::decimal::DVector; -use vortex_vector::fixed_size_list::FixedSizeListVector; -use vortex_vector::listview::ListViewVector; -use vortex_vector::null::NullVector; -use vortex_vector::primitive::PVector; -use vortex_vector::struct_::StructVector; - -use crate::ArrayRef; -use crate::Canonical; -use crate::Executable; -use crate::ExecutionCtx; -use crate::arrays::ListViewArrayParts; -use crate::arrays::PrimitiveArray; - -impl Executable for Vector { - #[expect(deprecated)] - fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - let canonical = array.execute::(ctx)?; - canonical.to_vector(ctx) - } -} - -impl Canonical { - /// Convert a Canonical array to a Vector. - /// - /// This is the reverse of `VectorIntoArray` - it takes a fully materialized - /// canonical array and converts it into the corresponding vector type. - /// TODO(joe): move over the execute_mask - #[deprecated] - pub fn to_vector(self, ctx: &mut ExecutionCtx) -> VortexResult { - Ok(match self { - Canonical::Null(a) => Vector::Null(NullVector::new(a.len())), - Canonical::Bool(a) => { - Vector::Bool(BoolVector::new(a.bit_buffer().clone(), a.validity_mask()?)) - } - Canonical::Primitive(a) => { - let ptype = a.ptype(); - let validity = a.validity_mask()?; - match_each_native_ptype!(ptype, |T| { - let buffer = a.to_buffer::(); - Vector::Primitive(PVector::::new(buffer, validity).into()) - }) - } - Canonical::Decimal(a) => { - // Match on the storage type first to read the buffer - match_each_decimal_value_type!(a.values_type(), |D| { - // Use the smallest type that can represent the precision/scale. - // The array may store values in a smaller type (if values fit), but - // DVector requires a PrecisionScale that matches its type parameter. - let min_value_type = - DecimalType::smallest_decimal_value_type(&a.decimal_dtype()); - match_each_decimal_value_type!(min_value_type, |E| { - let decimal_dtype = a.decimal_dtype(); - let buffer = a.buffer::(); - let validity_mask = a.validity_mask()?; - - // Copy from D to E, possibly widening, possibly narrowing - let values = Buffer::::from_trusted_len_iter(buffer.iter().map(|d| { - ::from(*d).vortex_expect("Decimal cast failed") - })); - - // SAFETY: values came from a valid DecimalArray with the same precision/scale - Vector::Decimal( - unsafe { - DVector::::new_unchecked( - PrecisionScale::new_unchecked( - decimal_dtype.precision(), - decimal_dtype.scale(), - ), - values, - validity_mask, - ) - } - .into(), - ) - }) - }) - } - Canonical::VarBinView(a) => { - let validity = a.validity_mask()?; - match a.dtype() { - DType::Utf8(_) => { - let views = a.views().clone(); - // Convert Arc<[ByteBuffer]> to Arc> - let buffers: Box<[_]> = a.buffers().iter().cloned().collect(); - Vector::String(unsafe { - StringVector::new_unchecked(views, Arc::new(buffers), validity) - }) - } - DType::Binary(_) => { - let views = a.views().clone(); - // Convert Arc<[ByteBuffer]> to Arc> - let buffers: Box<[_]> = a.buffers().iter().cloned().collect(); - Vector::Binary(unsafe { - BinaryVector::new_unchecked(views, Arc::new(buffers), validity) - }) - } - _ => unreachable!("VarBinView must be Utf8 or Binary"), - } - } - Canonical::List(a) => { - let ListViewArrayParts { - elements, - offsets, - sizes, - validity, - .. - } = a.into_parts(); - - let validity = validity.to_array(offsets.len()).execute::(ctx)?; - let elements_vector = elements.execute::(ctx)?; - let offsets = offsets.execute::(ctx)?; - let sizes = sizes.execute::(ctx)?; - let offsets_ptype = offsets.ptype(); - let sizes_ptype = sizes.ptype(); - - match_each_native_ptype!(offsets_ptype, |O| { - match_each_native_ptype!(sizes_ptype, |S| { - let offsets_vec = - PVector::::new(offsets.to_buffer::(), offsets.validity_mask()?); - let sizes_vec = - PVector::::new(sizes.to_buffer::(), sizes.validity_mask()?); - Vector::List(unsafe { - ListViewVector::new_unchecked( - Arc::new(elements_vector), - offsets_vec.into(), - sizes_vec.into(), - validity, - ) - }) - }) - }) - } - Canonical::FixedSizeList(a) => { - let validity = a.validity_mask()?; - let list_size = a.list_size(); - let elements_vector = a.elements().clone().execute::(ctx)?; - Vector::FixedSizeList(unsafe { - FixedSizeListVector::new_unchecked( - Arc::new(elements_vector), - list_size, - validity, - ) - }) - } - Canonical::Struct(a) => { - let validity = a.validity_mask()?; - let mut fields = Vec::with_capacity(a.fields().len()); - for f in a.fields().iter().cloned() { - fields.push(f.execute::(ctx)?); - } - let fields: Box<[Vector]> = fields.into_boxed_slice(); - Vector::Struct(StructVector::new(Arc::new(fields), validity)) - } - Canonical::Extension(a) => { - // For extension arrays, convert the underlying storage - a.storage().clone().execute::(ctx)? - } - }) - } -} diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index d315de33d8b..9a0f1bb6ff4 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -36,7 +36,6 @@ pub mod buffer; pub mod builders; pub mod builtins; mod canonical; -pub(crate) mod canonical_to_vector; pub mod compute; mod context; pub mod display; @@ -62,7 +61,6 @@ pub mod stream; pub mod test_harness; pub mod validity; pub mod variants; -pub mod vectors; pub mod vtable; pub mod flatbuffers { diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index 5cf14730dc3..44b0553e30e 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -15,10 +15,8 @@ use vortex_dtype::IntegerPType; use vortex_dtype::NativePType; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::PType; -use vortex_dtype::PTypeDowncastExt; use vortex_dtype::UnsignedPType; use vortex_dtype::match_each_integer_ptype; -use vortex_dtype::match_each_native_ptype; use vortex_dtype::match_each_unsigned_integer_ptype; use vortex_error::VortexError; use vortex_error::VortexResult; @@ -31,8 +29,6 @@ use vortex_mask::MaskMut; use vortex_scalar::PValue; use vortex_scalar::Scalar; use vortex_utils::aliases::hash_map::HashMap; -use vortex_vector::primitive::PVectorMut; -use vortex_vector::primitive::PrimitiveVectorMut; use crate::Array; use crate::ArrayRef; @@ -844,28 +840,6 @@ impl Patches { })) } - /// Applies patches to a primitive vector, returning the patched vector. - pub fn apply_to_primitive_vector(&self, vector: PrimitiveVectorMut) -> PrimitiveVectorMut { - match_each_native_ptype!(vector.ptype(), |T| { - self.apply_to_pvector(vector.downcast::()).into() - }) - } - - /// Applies patches to a [`PVectorMut`], returning the patched vector. - /// - /// This function modifies the elements buffer in-place at the positions specified by the patch - /// indices. It also updates the validity mask to reflect the nullability of patch values. - pub fn apply_to_pvector(&self, pvector: PVectorMut) -> PVectorMut { - let (mut elements, mut validity) = pvector.into_parts(); - - // SAFETY: We maintain the invariant that elements and validity have the same length, and all - // patch indices are valid after offset adjustment (guaranteed by `Patches`). - unsafe { self.apply_to_buffer(elements.as_mut_slice(), &mut validity) }; - - // SAFETY: We have not modified the length of elements or validity. - unsafe { PVectorMut::new_unchecked(elements, validity) } - } - /// Apply patches to a mutable buffer and validity mask. /// /// This method applies the patch values to the given buffer at the positions specified by the diff --git a/vortex-array/src/vectors.rs b/vortex-array/src/vectors.rs deleted file mode 100644 index 2abaa0f3210..00000000000 --- a/vortex-array/src/vectors.rs +++ /dev/null @@ -1,238 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::sync::Arc; - -use vortex_dtype::DType; -use vortex_dtype::NativeDecimalType; -use vortex_dtype::NativePType; -use vortex_dtype::Nullability::NonNullable; -use vortex_dtype::match_each_decimal_value_type; -use vortex_dtype::match_each_native_ptype; -use vortex_error::VortexExpect; -use vortex_vector::Vector; -use vortex_vector::VectorOps; -use vortex_vector::binaryview::BinaryViewType; -use vortex_vector::binaryview::BinaryViewVector; -use vortex_vector::bool::BoolVector; -use vortex_vector::decimal::DVector; -use vortex_vector::decimal::DecimalVector; -use vortex_vector::fixed_size_list::FixedSizeListVector; -use vortex_vector::listview::ListViewVector; -use vortex_vector::null::NullVector; -use vortex_vector::primitive::PVector; -use vortex_vector::primitive::PrimitiveVector; -use vortex_vector::struct_::StructVector; - -use crate::ArrayRef; -use crate::Canonical; -use crate::IntoArray; -use crate::arrays::BoolArray; -use crate::arrays::DecimalArray; -use crate::arrays::ExtensionArray; -use crate::arrays::FixedSizeListArray; -use crate::arrays::ListViewArray; -use crate::arrays::NullArray; -use crate::arrays::PrimitiveArray; -use crate::arrays::StructArray; -use crate::arrays::VarBinViewArray; -use crate::validity::Validity; - -/// Trait for converting vector types into arrays. -pub trait VectorIntoArray { - /// Converts the vector into an array of the specified data type. - fn into_array(self, dtype: &DType) -> T; -} - -impl VectorIntoArray for Vector { - fn into_array(self, dtype: &DType) -> ArrayRef { - VectorIntoArray::::into_array(self, dtype).into_array() - } -} - -impl VectorIntoArray for Vector { - fn into_array(self, dtype: &DType) -> Canonical { - match dtype { - DType::Null => Canonical::Null(self.into_null().into_array(dtype)), - DType::Bool(_) => Canonical::Bool(self.into_bool().into_array(dtype)), - DType::Primitive(..) => Canonical::Primitive(self.into_primitive().into_array(dtype)), - DType::Decimal(..) => Canonical::Decimal(self.into_decimal().into_array(dtype)), - DType::Utf8(_) => Canonical::VarBinView(self.into_string().into_array(dtype)), - DType::Binary(_) => Canonical::VarBinView(self.into_binary().into_array(dtype)), - DType::List(..) => Canonical::List(self.into_list().into_array(dtype)), - DType::FixedSizeList(..) => { - Canonical::FixedSizeList(self.into_fixed_size_list().into_array(dtype)) - } - DType::Struct(..) => Canonical::Struct(self.into_struct().into_array(dtype)), - DType::Extension(ext_dtype) => { - let storage: Canonical = self.into_array(ext_dtype.storage_dtype()); - Canonical::Extension(ExtensionArray::new(ext_dtype.clone(), storage.into_array())) - } - } - } -} - -impl VectorIntoArray for NullVector { - fn into_array(self, dtype: &DType) -> NullArray { - assert!(matches!(dtype, DType::Null)); - NullArray::new(self.len()) - } -} - -impl VectorIntoArray for BoolVector { - fn into_array(self, dtype: &DType) -> BoolArray { - assert!(matches!(dtype, DType::Bool(_))); - - let (bits, validity) = self.into_parts(); - BoolArray::from_bit_buffer(bits, Validity::from_mask(validity, dtype.nullability())) - } -} - -impl VectorIntoArray for PrimitiveVector { - fn into_array(self, dtype: &DType) -> PrimitiveArray { - match_each_native_ptype!(self.ptype(), |T| { - ::downcast(self).into_array(dtype) - }) - } -} - -impl VectorIntoArray for PVector { - fn into_array(self, dtype: &DType) -> PrimitiveArray { - assert!(matches!(dtype, DType::Primitive(_, _))); - assert_eq!(T::PTYPE, dtype.as_ptype()); - - let (values, validity) = self.into_parts(); - // SAFETY: vectors maintain all invariants required for array creation - unsafe { - PrimitiveArray::new_unchecked::( - values, - Validity::from_mask(validity, dtype.nullability()), - ) - } - } -} - -impl VectorIntoArray for DecimalVector { - fn into_array(self, dtype: &DType) -> DecimalArray { - match_each_decimal_value_type!(self.decimal_type(), |D| { - ::downcast(self).into_array(dtype) - }) - } -} - -impl VectorIntoArray for DVector { - fn into_array(self, dtype: &DType) -> DecimalArray { - assert!(matches!(dtype, DType::Decimal(_, _))); - - let nullability = dtype.nullability(); - let dec_dtype = dtype - .as_decimal_opt() - .vortex_expect("expected decimal DType"); - assert_eq!(dec_dtype.precision(), self.precision()); - assert_eq!(dec_dtype.scale(), self.scale()); - - let (_ps, values, validity) = self.into_parts(); - // SAFETY: vectors maintain all invariants required for array creation - unsafe { - DecimalArray::new_unchecked::( - values, - *dec_dtype, - Validity::from_mask(validity, nullability), - ) - } - } -} - -impl VectorIntoArray for BinaryViewVector { - fn into_array(self, dtype: &DType) -> VarBinViewArray { - assert!(matches!(dtype, DType::Utf8(_) | DType::Binary(_))); - - let (views, buffers, validity) = self.into_parts(); - let validity = Validity::from_mask(validity, dtype.nullability()); - - let buffers = Arc::try_unwrap(buffers).unwrap_or_else(|b| (*b).clone()); - - // SAFETY: vectors maintain all invariants required for array creation - unsafe { - VarBinViewArray::new_unchecked( - views, - buffers.into_iter().collect(), - dtype.clone(), - validity, - ) - } - } -} - -impl VectorIntoArray for ListViewVector { - fn into_array(self, dtype: &DType) -> ListViewArray { - assert!(matches!(dtype, DType::List(_, _))); - - let (elements, offsets, sizes, validity) = self.into_parts(); - let validity = Validity::from_mask(validity, dtype.nullability()); - - let elements_dtype = dtype.as_list_element_opt().vortex_expect("expected list"); - let elements = Arc::try_unwrap(elements) - .unwrap_or_else(|e| (*e).clone()) - .into_array(elements_dtype); - - let offsets_dtype = DType::Primitive(offsets.ptype(), NonNullable); - let offsets = offsets.into_array(&offsets_dtype); - - let sizes_dtype = DType::Primitive(sizes.ptype(), NonNullable); - let sizes = sizes.into_array(&sizes_dtype); - - // SAFETY: vectors maintain all invariants required for array creation - unsafe { - ListViewArray::new_unchecked( - elements, - offsets.into_array(), - sizes.into_array(), - validity, - ) - } - } -} - -impl VectorIntoArray for FixedSizeListVector { - fn into_array(self, dtype: &DType) -> FixedSizeListArray { - assert!(matches!(dtype, DType::FixedSizeList(_, _, _))); - - let len = self.len(); - let (elements, size, validity) = self.into_parts(); - let validity = Validity::from_mask(validity, dtype.nullability()); - - let elements_dtype = dtype - .as_fixed_size_list_element_opt() - .vortex_expect("expected fixed size list"); - let elements = Arc::try_unwrap(elements) - .unwrap_or_else(|e| (*e).clone()) - .into_array(elements_dtype); - - // SAFETY: vectors maintain all invariants required for array creation - unsafe { FixedSizeListArray::new_unchecked(elements, size, validity, len) } - } -} - -impl VectorIntoArray for StructVector { - fn into_array(self, dtype: &DType) -> StructArray { - assert!(matches!(dtype, DType::Struct(_, _))); - - let len = self.len(); - let (fields, validity) = self.into_parts(); - let validity = Validity::from_mask(validity, dtype.nullability()); - - let struct_fields = dtype.as_struct_fields(); - assert_eq!(fields.len(), struct_fields.nfields()); - - let field_arrays: Vec = Arc::try_unwrap(fields) - .unwrap_or_else(|f| (*f).clone()) - .into_iter() - .zip(struct_fields.fields()) - .map(|(field_vector, field_dtype)| field_vector.into_array(&field_dtype)) - .collect(); - - // SAFETY: vectors maintain all invariants required for array creation - unsafe { StructArray::new_unchecked(field_arrays, struct_fields.clone(), len, validity) } - } -}