diff --git a/Cargo.lock b/Cargo.lock
index 46324a3db3e..9421b4573e4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10182,6 +10182,7 @@ dependencies = [
  "rustc-hash",
  "serde",
  "simdutf8",
+ "static_assertions",
  "tabled",
  "termtree",
  "tracing",
@@ -10196,7 +10197,6 @@ dependencies = [
  "vortex-scalar",
  "vortex-session",
  "vortex-utils",
- "vortex-vector",
 ]
 
 [[package]]
diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml
index d71d8eadd81..cdb4806dc2b 100644
--- a/vortex-array/Cargo.toml
+++ b/vortex-array/Cargo.toml
@@ -53,6 +53,7 @@ rstest_reuse = { workspace = true, optional = true }
 rustc-hash = { workspace = true }
 serde = { workspace = true, optional = true, features = ["derive"] }
 simdutf8 = { workspace = true }
+static_assertions = { workspace = true }
 tabled = { workspace = true, optional = true, default-features = false, features = [
     "std",
 ] }
@@ -68,7 +69,6 @@ vortex-proto = { workspace = true, features = ["expr"] }
 vortex-scalar = { workspace = true }
 vortex-session = { workspace = true }
 vortex-utils = { workspace = true, features = ["dyn-traits"] }
-vortex-vector = { workspace = true }
 
 [features]
 arbitrary = [
diff --git a/vortex-array/benches/take_primitive.rs b/vortex-array/benches/take_primitive.rs
index 994a52f9cf4..b2e1822e916 100644
--- a/vortex-array/benches/take_primitive.rs
+++ b/vortex-array/benches/take_primitive.rs
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Benchmarks comparing [`PVector`] take vs [`DictArray`] canonicalization.
+//! Benchmarks for [`DictArray`] canonicalization.
 //!
 //! Both are tracked by number of indices/codes for fair comparison.
 
@@ -15,10 +15,6 @@ use rand_distr::Zipf;
 use vortex_array::IntoArray;
 use vortex_array::arrays::DictArray;
 use vortex_array::arrays::PrimitiveArray;
-use vortex_buffer::Buffer;
-use vortex_compute::take::Take;
-use vortex_mask::Mask;
-use vortex_vector::primitive::PVector;
 
 fn main() {
     divan::main();
@@ -30,40 +26,6 @@ const NUM_INDICES: &[usize] = &[1_000, 10_000, 100_000];
 /// Size of the source vector / dictionary values.
 const VECTOR_SIZE: &[usize] = &[16, 256, 2048, 8192];
 
-// --- PVector take benchmarks ---
-
-#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
-fn pvector_take_uniform<const VECTOR_SIZE: usize>(bencher: Bencher, num_indices: usize) {
-    let data: Buffer<u32> = (0..VECTOR_SIZE as u32).collect();
-    let pvector = PVector::new(data, Mask::AllTrue(VECTOR_SIZE));
-
-    let rng = StdRng::seed_from_u64(0);
-    let range = Uniform::new(0u32, VECTOR_SIZE as u32).unwrap();
-    let indices: Vec<u32> = rng.sample_iter(range).take(num_indices).collect();
-
-    bencher
-        .with_inputs(|| (&pvector, indices.as_slice()))
-        .bench_refs(|(pv, idx)| pv.take(*idx));
-}
-
-#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
-fn pvector_take_zipfian<const VECTOR_SIZE: usize>(bencher: Bencher, num_indices: usize) {
-    let data: Buffer<u32> = (0..VECTOR_SIZE as u32).collect();
-    let pvector = PVector::new(data, Mask::AllTrue(VECTOR_SIZE));
-
-    let rng = StdRng::seed_from_u64(0);
-    let zipf = Zipf::new(VECTOR_SIZE as f64, 1.0).unwrap();
-    let indices: Vec<u32> = rng
-        .sample_iter(&zipf)
-        .take(num_indices)
-        .map(|i: f64| (i as u32 - 1).min(VECTOR_SIZE as u32 - 1))
-        .collect();
-
-    bencher
-        .with_inputs(|| (&pvector, indices.as_slice()))
-        .bench_refs(|(pv, idx)| pv.take(*idx));
-}
-
 // --- DictArray canonicalization benchmarks ---
 
 #[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs
index 3a9dad220d1..7e8edce1ea0 100644
--- a/vortex-array/src/arrays/constant/vtable/canonical.rs
+++ b/vortex-array/src/arrays/constant/vtable/canonical.rs
@@ -22,7 +22,6 @@ use vortex_scalar::ListScalar;
 use vortex_scalar::Scalar;
 use vortex_scalar::StructScalar;
 use vortex_scalar::Utf8Scalar;
-use vortex_vector::binaryview::BinaryView;
 
 use crate::Canonical;
 use crate::IntoArray;
@@ -34,6 +33,7 @@ use crate::arrays::ListViewArray;
 use crate::arrays::NullArray;
 use crate::arrays::StructArray;
 use crate::arrays::VarBinViewArray;
+use crate::arrays::build_views::BinaryView;
 use crate::arrays::constant::ConstantArray;
 use crate::arrays::primitive::PrimitiveArray;
 use crate::builders::builder_with_capacity;
diff --git a/vortex-array/src/arrays/list/compute/filter.rs b/vortex-array/src/arrays/list/compute/filter.rs
index ccfd23e576a..2f11edba389 100644
--- a/vortex-array/src/arrays/list/compute/filter.rs
+++ b/vortex-array/src/arrays/list/compute/filter.rs
@@ -138,10 +138,8 @@ fn compute_filtered_elements_and_offsets<O: IntegerPType>(
 }
 
 /// Construct an element mask from contiguous list offsets and a selection mask.
-pub fn element_mask_from_offsets<O: IntegerPType>(
-    offsets: &[O],
-    selection: &Arc<MaskValues>,
-) -> Mask {
+#[allow(dead_code)]
+fn element_mask_from_offsets<O: IntegerPType>(offsets: &[O], selection: &Arc<MaskValues>) -> Mask {
     let first_offset = offsets.first().map_or(0, |first_offset| first_offset.as_());
     let last_offset = offsets.last().map_or(0, |last_offset| last_offset.as_());
     let len = last_offset - first_offset;
diff --git a/vortex-array/src/arrays/list/compute/mod.rs b/vortex-array/src/arrays/list/compute/mod.rs
index bb896dfc522..26832bf4360 100644
--- a/vortex-array/src/arrays/list/compute/mod.rs
+++ b/vortex-array/src/arrays/list/compute/mod.rs
@@ -9,8 +9,6 @@ mod mask;
 mod min_max;
 mod take;
 
-pub(super) use filter::element_mask_from_offsets;
-
 #[cfg(test)]
 mod tests {
     use rstest::rstest;
diff --git a/vortex-array/src/arrays/list/vtable/kernel/filter.rs b/vortex-array/src/arrays/list/vtable/kernel/filter.rs
deleted file mode 100644
index 10cfd0b4183..00000000000
--- a/vortex-array/src/arrays/list/vtable/kernel/filter.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::sync::Arc;
-
-use vortex_buffer::BufferMut;
-use vortex_dtype::PTypeDowncastExt;
-use vortex_dtype::match_each_integer_ptype;
-use vortex_error::VortexResult;
-use vortex_mask::Mask;
-use vortex_vector::Vector;
-use vortex_vector::VectorMutOps;
-use vortex_vector::listview::ListViewVector;
-use vortex_vector::listview::ListViewVectorMut;
-use vortex_vector::primitive::PVector;
-use vortex_vector::primitive::PrimitiveVector;
-
-use crate::Array;
-use crate::Canonical;
-use crate::ExecutionCtx;
-use crate::IntoArray;
-use crate::arrays::FilterArray;
-use crate::arrays::FilterVTable;
-use crate::arrays::ListArray;
-use crate::arrays::ListVTable;
-use crate::arrays::list::compute::element_mask_from_offsets;
-use crate::kernel::ExecuteParentKernel;
-use crate::matchers::Exact;
-use crate::validity::Validity;
-use crate::vectors::VectorIntoArray;
-use crate::vtable::ValidityHelper;
-
-#[derive(Debug)]
-pub(super) struct ListFilterKernel;
-
-impl ExecuteParentKernel<ListVTable> for ListFilterKernel {
-    type Parent = Exact<FilterVTable>;
-
-    fn parent(&self) -> Self::Parent {
-        Exact::new()
-    }
-
-    // TODO(joe): Remove the vector usage?
-    fn execute_parent(
-        &self,
-        array: &ListArray,
-        parent: &FilterArray,
-        _child_idx: usize,
-        ctx: &mut ExecutionCtx,
-    ) -> VortexResult<Option<Canonical>> {
-        let selection = match parent.filter_mask() {
-            Mask::AllTrue(_) | Mask::AllFalse(_) => return Ok(None),
-            Mask::Values(v) => v,
-        };
-
-        // TODO(ngates): for ultra-sparse masks, we don't need to optimize the entire offsets.
-        let offsets = array
-            .offsets()
-            .clone()
-            .execute::<Vector>(ctx)?
-            .into_primitive();
-
-        let new_validity = match array.validity() {
-            Validity::NonNullable | Validity::AllValid => Mask::new_true(selection.true_count()),
-            Validity::AllInvalid => {
-                let mut vec = ListViewVectorMut::with_capacity(
-                    array.elements().dtype(),
-                    selection.true_count(),
-                    0,
-                );
-                vec.append_nulls(selection.true_count());
-                return Ok(Some(
-                    vec.freeze()
-                        .into_array(array.dtype())
-                        .into_array()
-                        .execute::<Canonical>(ctx)?,
-                ));
-            }
-            Validity::Array(a) => a
-                .filter(parent.filter_mask().clone())?
-                .execute::<Mask>(ctx)?,
-        };
-
-        let (new_offsets, new_sizes) = match_each_integer_ptype!(offsets.ptype(), |O| {
-            let offsets = (&offsets).downcast::<O>().elements().as_slice();
-            let mut new_offsets = BufferMut::<O>::with_capacity(selection.true_count());
-            let mut new_sizes = BufferMut::<O>::with_capacity(selection.true_count());
-
-            let mut offset = 0;
-            for idx in selection.indices() {
-                let start = offsets[*idx];
-                let end = offsets[idx + 1];
-                let size = end - start;
-                unsafe { new_offsets.push_unchecked(offset) };
-                unsafe { new_sizes.push_unchecked(size) };
-                offset += size;
-            }
-
-            let new_offsets = PrimitiveVector::from(PVector::<O>::new(
-                new_offsets.freeze(),
-                Mask::new_true(selection.true_count()),
-            ));
-            let new_sizes = PrimitiveVector::from(PVector::<O>::new(
-                new_sizes.freeze(),
-                Mask::new_true(selection.true_count()),
-            ));
-
-            (new_offsets, new_sizes)
-        });
-
-        // TODO(ngates): for very dense masks, there may be no point in filtering the elements,
-        //  and instead we should construct a view against the unfiltered elements.
-        let element_mask = match_each_integer_ptype!(offsets.ptype(), |O| {
-            element_mask_from_offsets::<O>((&offsets).downcast::<O>().elements(), selection)
-        });
-
-        let new_elements = array
-            .sliced_elements()?
-            .filter(element_mask)?
-            .execute::<Vector>(ctx)?;
-
-        Ok(Some(
-            unsafe {
-                ListViewVector::new_unchecked(
-                    Arc::new(new_elements),
-                    new_offsets,
-                    new_sizes,
-                    new_validity,
-                )
-            }
-            .into_array(array.dtype())
-            .into_array()
-            .execute::<Canonical>(ctx)?,
-        ))
-    }
-}
diff --git a/vortex-array/src/arrays/list/vtable/kernel/mod.rs b/vortex-array/src/arrays/list/vtable/kernel/mod.rs
index c95e7cceebc..77f330f7ca5 100644
--- a/vortex-array/src/arrays/list/vtable/kernel/mod.rs
+++ b/vortex-array/src/arrays/list/vtable/kernel/mod.rs
@@ -1,11 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-mod filter;
-
 use crate::arrays::ListVTable;
-use crate::arrays::list::vtable::kernel::filter::ListFilterKernel;
 use crate::kernel::ParentKernelSet;
 
-pub(super) const PARENT_KERNELS: ParentKernelSet<ListVTable> =
-    ParentKernelSet::new(&[ParentKernelSet::lift(&ListFilterKernel)]);
+pub(super) const PARENT_KERNELS: ParentKernelSet<ListVTable> = ParentKernelSet::new(&[]);
diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs
index 5d0b1b99662..dae8f13cdd5 100644
--- a/vortex-array/src/arrays/mod.rs
+++ b/vortex-array/src/arrays/mod.rs
@@ -58,4 +58,6 @@ pub use scalar_fn::*;
 pub use slice::*;
 pub use struct_::*;
 pub use varbin::*;
+pub use varbinview::build_views;
+pub use varbinview::compact;
 pub use varbinview::*;
diff --git a/vortex-array/src/arrays/primitive/array/conversion.rs b/vortex-array/src/arrays/primitive/array/conversion.rs
index d827e7e8591..5c8c7495b54 100644
--- a/vortex-array/src/arrays/primitive/array/conversion.rs
+++ b/vortex-array/src/arrays/primitive/array/conversion.rs
@@ -7,13 +7,7 @@ use vortex_buffer::BitBufferMut;
 use vortex_buffer::Buffer;
 use vortex_buffer::BufferMut;
 use vortex_dtype::NativePType;
-use vortex_dtype::Nullability;
-use vortex_error::VortexResult;
-use vortex_error::vortex_ensure;
 use vortex_error::vortex_panic;
-use vortex_vector::VectorOps;
-use vortex_vector::match_each_pvector;
-use vortex_vector::primitive::PrimitiveVector;
 
 use crate::ArrayRef;
 use crate::IntoArray;
@@ -21,34 +15,6 @@ use crate::arrays::PrimitiveArray;
 use crate::validity::Validity;
 
 impl PrimitiveArray {
-    /// Attempts to create a `PrimitiveArray` from a [`PrimitiveVector`] given a [`Nullability`].
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if the nullability is [`NonNullable`](Nullability::NonNullable) and there
-    /// are nulls present in the vector.
-    pub fn try_from_vector(
-        primitive_vector: PrimitiveVector,
-        nullability: Nullability,
-    ) -> VortexResult<Self> {
-        // If we want to create a non-nullable array, then the vector should not have any nulls.
-        vortex_ensure!(
-            nullability.is_nullable() || primitive_vector.validity().all_true(),
-            "tried to create a non-nullable `PrimitiveArray` from a `PrimitiveVector` that had nulls"
-        );
-
-        match_each_pvector!(primitive_vector, |v| {
-            let (buffer, mask) = v.into_parts();
-            debug_assert_eq!(buffer.len(), mask.len());
-
-            let validity = Validity::from_mask(mask, nullability);
-
-            // SAFETY: Since the buffer and the mask came from a valid vector, we know that the
-            // length of the buffer and the validity are the same.
-            Ok(unsafe { Self::new_unchecked(buffer, validity) })
-        })
-    }
-
     /// Create a PrimitiveArray from an iterator of `T`.
     /// NOTE: we cannot impl FromIterator trait since it conflicts with `FromIterator<T>`.
     pub fn from_option_iter<T: NativePType, I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
@@ -138,70 +104,3 @@ impl<T: NativePType> IntoArray for BufferMut<T> {
         self.freeze().into_array()
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use vortex_buffer::BufferMut;
-    use vortex_dtype::Nullability;
-    use vortex_dtype::PType;
-    use vortex_mask::MaskMut;
-    use vortex_vector::primitive::PVector;
-
-    use super::*;
-
-    #[test]
-    fn test_try_from_vector_with_nulls_nullable() {
-        // Create a vector with some null values: [Some(1), None, Some(3), Some(4), None].
-        let mut values = BufferMut::<i32>::with_capacity(5);
-        values.extend_from_slice(&[1, 0, 3, 4, 0]);
-
-        let mut validity = MaskMut::with_capacity(5);
-        validity.append_n(true, 1);
-        validity.append_n(false, 1);
-        validity.append_n(true, 1);
-        validity.append_n(true, 1);
-        validity.append_n(false, 1);
-
-        let pvector =
-            PVector::try_new(values.freeze(), validity.freeze()).expect("Failed to create PVector");
-
-        // This should succeed since we're allowing nulls.
-        let result =
-            PrimitiveArray::try_from_vector(pvector.into(), Nullability::Nullable).unwrap();
-
-        assert_eq!(result.len(), 5);
-        assert_eq!(result.ptype(), PType::I32);
-        assert!(result.is_valid(0).unwrap());
-        assert!(!result.is_valid(1).unwrap());
-        assert!(result.is_valid(2).unwrap());
-        assert!(result.is_valid(3).unwrap());
-        assert!(!result.is_valid(4).unwrap());
-    }
-
-    #[test]
-    fn test_try_from_vector_non_nullable_with_nulls_errors() {
-        // Create a vector with null values: [Some(1), None, Some(3)].
-        let mut values = BufferMut::<i32>::with_capacity(3);
-        values.extend_from_slice(&[1, 0, 3]);
-
-        let mut validity = MaskMut::with_capacity(3);
-        validity.append_n(true, 1);
-        validity.append_n(false, 1);
-        validity.append_n(true, 1);
-
-        let pvector =
-            PVector::try_new(values.freeze(), validity.freeze()).expect("Failed to create PVector");
-
-        // This should fail because we're trying to create a non-nullable array from data with
-        // nulls.
-        let result = PrimitiveArray::try_from_vector(pvector.into(), Nullability::NonNullable);
-
-        assert!(result.is_err());
-        assert!(
-            result
-                .unwrap_err()
-                .to_string()
-                .contains("non-nullable `PrimitiveArray`")
-        );
-    }
-}
diff --git a/vortex-array/src/arrays/primitive/compute/take/avx2.rs b/vortex-array/src/arrays/primitive/compute/take/avx2.rs
index c330a9226a3..cd670ebb6f0 100644
--- a/vortex-array/src/arrays/primitive/compute/take/avx2.rs
+++ b/vortex-array/src/arrays/primitive/compute/take/avx2.rs
@@ -6,13 +6,13 @@
 //! Only enabled for x86_64 hosts and it is gated at runtime behind feature detection to
 //! ensure AVX2 instructions are available.
 
-use vortex_compute::take::slice::avx2;
 use vortex_dtype::NativePType;
 use vortex_dtype::UnsignedPType;
 use vortex_dtype::match_each_native_ptype;
 use vortex_dtype::match_each_unsigned_integer_ptype;
 use vortex_error::VortexResult;
 
+use super::slice_avx2;
 use crate::ArrayRef;
 use crate::IntoArray;
 use crate::arrays::primitive::PrimitiveArray;
@@ -59,7 +59,7 @@ where
     I: UnsignedPType,
 {
     // SAFETY: The caller guarantees that the `avx2` feature is enabled.
-    let buffer = unsafe { avx2::take_avx2(values, indices) };
+    let buffer = unsafe { slice_avx2::take_avx2(values, indices) };
 
     debug_assert!(
         validity
diff --git a/vortex-array/src/arrays/primitive/compute/take/mod.rs b/vortex-array/src/arrays/primitive/compute/take/mod.rs
index fd2ddb63cf3..30857b4b1b6 100644
--- a/vortex-array/src/arrays/primitive/compute/take/mod.rs
+++ b/vortex-array/src/arrays/primitive/compute/take/mod.rs
@@ -3,6 +3,11 @@
 
 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 mod avx2;
+#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+mod slice_avx2;
+
+#[cfg(vortex_nightly)]
+mod slice_portable;
 
 #[cfg(vortex_nightly)]
 mod portable;
diff --git a/vortex-array/src/arrays/primitive/compute/take/portable.rs b/vortex-array/src/arrays/primitive/compute/take/portable.rs
index 48f31896f60..21e4d4b3d20 100644
--- a/vortex-array/src/arrays/primitive/compute/take/portable.rs
+++ b/vortex-array/src/arrays/primitive/compute/take/portable.rs
@@ -18,13 +18,13 @@ use num_traits::AsPrimitive;
 use vortex_buffer::Alignment;
 use vortex_buffer::Buffer;
 use vortex_buffer::BufferMut;
-use vortex_compute::take::slice::portable;
 use vortex_dtype::NativePType;
 use vortex_dtype::PType;
 use vortex_dtype::match_each_native_simd_ptype;
 use vortex_dtype::match_each_unsigned_integer_ptype;
 use vortex_error::VortexResult;
 
+use super::slice_portable as portable;
 use crate::ArrayRef;
 use crate::IntoArray;
 use crate::arrays::PrimitiveArray;
diff --git a/vortex-array/src/arrays/primitive/compute/take/slice_avx2.rs b/vortex-array/src/arrays/primitive/compute/take/slice_avx2.rs
new file mode 100644
index 00000000000..689e53c96dc
--- /dev/null
+++ b/vortex-array/src/arrays/primitive/compute/take/slice_avx2.rs
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! An AVX2 implementation of take operation using gather instructions.
+//!
+//! Only enabled for x86_64 hosts and it is gated at runtime behind feature detection to ensure AVX2
+//! instructions are available.
+
+#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+
+use std::arch::x86_64::__m256i;
+use std::arch::x86_64::_mm_loadu_si128;
+use std::arch::x86_64::_mm_setzero_si128;
+use std::arch::x86_64::_mm_shuffle_epi32;
+use std::arch::x86_64::_mm_storeu_si128;
+use std::arch::x86_64::_mm_unpacklo_epi64;
+use std::arch::x86_64::_mm256_cmpgt_epi32;
+use std::arch::x86_64::_mm256_cmpgt_epi64;
+use std::arch::x86_64::_mm256_cvtepu8_epi32;
+use std::arch::x86_64::_mm256_cvtepu8_epi64;
+use std::arch::x86_64::_mm256_cvtepu16_epi32;
+use std::arch::x86_64::_mm256_cvtepu16_epi64;
+use std::arch::x86_64::_mm256_cvtepu32_epi64;
+use std::arch::x86_64::_mm256_extracti128_si256;
+use std::arch::x86_64::_mm256_loadu_si256;
+use std::arch::x86_64::_mm256_mask_i32gather_epi32;
+use std::arch::x86_64::_mm256_mask_i64gather_epi32;
+use std::arch::x86_64::_mm256_mask_i64gather_epi64;
+use std::arch::x86_64::_mm256_set1_epi32;
+use std::arch::x86_64::_mm256_set1_epi64x;
+use std::arch::x86_64::_mm256_setzero_si256;
+use std::arch::x86_64::_mm256_storeu_si256;
+use std::convert::identity;
+
+use vortex_buffer::Alignment;
+use vortex_buffer::Buffer;
+use vortex_buffer::BufferMut;
+use vortex_dtype::NativePType;
+use vortex_dtype::PType;
+use vortex_dtype::UnsignedPType;
+
+/// Scalar fallback implementation for take operation.
+fn take_scalar<T: Copy, I: UnsignedPType>(buffer: &[T], indices: &[I]) -> Buffer<T> {
+    let mut result = BufferMut::with_capacity(indices.len());
+    let ptr = result.spare_capacity_mut().as_mut_ptr().cast::<T>();
+
+    for (i, idx) in indices.iter().enumerate() {
+        // SAFETY: We reserved `indices.len()` capacity, so `ptr.add(i)` is valid.
+        unsafe { ptr.add(i).write(buffer[idx.as_()]) };
+    }
+
+    // SAFETY: We just wrote exactly `indices.len()` elements.
+    unsafe { result.set_len(indices.len()) };
+    result.freeze()
+}
+
+/// Takes the specified indices into a new [`Buffer`] using AVX2 SIMD.
+///
+/// This returns None if the AVX2 feature is not detected at runtime, signalling to the caller
+/// that it should fall back to the scalar implementation.
+///
+/// If AVX2 is available, this returns a PrimitiveArray containing the result of the take operation
+/// accelerated using AVX2 instructions.
+///
+/// # Panics
+///
+/// This function panics if any of the provided `indices` are out of bounds for `values`
+///
+/// # Safety
+///
+/// The caller must ensure the `avx2` feature is enabled.
+#[allow(dead_code, unused_variables, reason = "TODO(connor): Implement this")]
+#[target_feature(enable = "avx2")]
+#[inline]
+pub unsafe fn take_avx2<V: NativePType, I: UnsignedPType>(
+    buffer: &[V],
+    indices: &[I],
+) -> Buffer<V> {
+    macro_rules! dispatch_avx2 {
+        ($indices:ty, $values:ty) => {
+            { let result = dispatch_avx2!($indices, $values, cast: $values); result }
+        };
+        ($indices:ty, $values:ty, cast: $cast:ty) => {{
+            let indices = unsafe { std::mem::transmute::<&[I], &[$indices]>(indices) };
+            let values = unsafe { std::mem::transmute::<&[V], &[$cast]>(buffer) };
+
+            let result = exec_take::<$cast, $indices, AVX2Gather>(values, indices);
+            unsafe { result.transmute::<V>() }
+        }};
+    }
+
+    match (I::PTYPE, V::PTYPE) {
+        // Int value types. Only 32 and 64 bit types are supported.
+        (PType::U8, PType::I32) => dispatch_avx2!(u8, i32),
+        (PType::U8, PType::U32) => dispatch_avx2!(u8, u32),
+        (PType::U8, PType::I64) => dispatch_avx2!(u8, i64),
+        (PType::U8, PType::U64) => dispatch_avx2!(u8, u64),
+        (PType::U16, PType::I32) => dispatch_avx2!(u16, i32),
+        (PType::U16, PType::U32) => dispatch_avx2!(u16, u32),
+        (PType::U16, PType::I64) => dispatch_avx2!(u16, i64),
+        (PType::U16, PType::U64) => dispatch_avx2!(u16, u64),
+        (PType::U32, PType::I32) => dispatch_avx2!(u32, i32),
+        (PType::U32, PType::U32) => dispatch_avx2!(u32, u32),
+        (PType::U32, PType::I64) => dispatch_avx2!(u32, i64),
+        (PType::U32, PType::U64) => dispatch_avx2!(u32, u64),
+
+        // Float value types, treat them as if they were corresponding int types.
+        (PType::U8, PType::F32) => dispatch_avx2!(u8, f32, cast: u32),
+        (PType::U16, PType::F32) => dispatch_avx2!(u16, f32, cast: u32),
+        (PType::U32, PType::F32) => dispatch_avx2!(u32, f32, cast: u32),
+        (PType::U64, PType::F32) => dispatch_avx2!(u64, f32, cast: u32),
+
+        (PType::U8, PType::F64) => dispatch_avx2!(u8, f64, cast: u64),
+        (PType::U16, PType::F64) => dispatch_avx2!(u16, f64, cast: u64),
+        (PType::U32, PType::F64) => dispatch_avx2!(u32, f64, cast: u64),
+        (PType::U64, PType::F64) => dispatch_avx2!(u64, f64, cast: u64),
+
+        // Scalar fallback for unsupported value types.
+        _ => {
+            tracing::trace!(
+                "take AVX2 kernel missing for indices {} values {}, falling back to scalar",
+                I::PTYPE,
+                V::PTYPE
+            );
+
+            take_scalar(buffer, indices)
+        }
+    }
+}
+
+/// The main gather function that is used by the inner loop kernel for AVX2 gather.
+pub(crate) trait GatherFn<Idx, Values> {
+    /// The number of data elements that are written to the `dst` on each loop iteration.
+    const WIDTH: usize;
+    /// The number of indices read from `indices` on each loop iteration.
+    /// Depending on the available instructions and bit-width we may stride by a larger amount
+    /// than we actually end up reading from `src` (governed by the `WIDTH` parameter).
+    const STRIDE: usize = Self::WIDTH;
+
+    /// Gather values from `src` into the `dst` using the `indices`, optionally using
+    /// SIMD instructions.
+    ///
+    /// # Safety
+    ///
+    /// This function can read up to `STRIDE` elements through `indices`, and read/write up to
+    /// `WIDTH` elements through `src` and `dst` respectively.
+    unsafe fn gather(indices: *const Idx, max_idx: Idx, src: *const Values, dst: *mut Values);
+}
+
+/// AVX2 version of GatherFn defined for 32- and 64-bit value types.
+enum AVX2Gather {}
+
+macro_rules! impl_gather {
+    ($idx:ty, $({$value:ty => load: $load:ident, extend: $extend:ident, splat: $splat:ident, zero_vec: $zero_vec:ident, mask_indices: $mask_indices:ident, mask_cvt: |$mask_var:ident| $mask_cvt:block, gather: $masked_gather:ident, store: $store:ident, WIDTH = $WIDTH:literal, STRIDE = $STRIDE:literal }),+) => {
+        $(
+            impl_gather!(single; $idx, $value, load: $load, extend: $extend, splat: $splat, zero_vec: $zero_vec, mask_indices: $mask_indices, mask_cvt: |$mask_var| $mask_cvt, gather: $masked_gather, store: $store, WIDTH = $WIDTH, STRIDE = $STRIDE);
+        )*
+    };
+    (single; $idx:ty, $value:ty, load: $load:ident, extend: $extend:ident, splat: $splat:ident, zero_vec: $zero_vec:ident, mask_indices: $mask_indices:ident, mask_cvt: |$mask_var:ident| $mask_cvt:block, gather: $masked_gather:ident, store: $store:ident, WIDTH = $WIDTH:literal, STRIDE = $STRIDE:literal) => {
+            impl GatherFn<$idx, $value> for AVX2Gather {
+                const WIDTH: usize = $WIDTH;
+                const STRIDE: usize = $STRIDE;
+
+                #[allow(unused_unsafe, clippy::cast_possible_truncation)]
+                #[inline(always)]
+                unsafe fn gather(indices: *const $idx, max_idx: $idx, src: *const $value, dst: *mut $value) {
+                    const {
+                        assert!($WIDTH <= $STRIDE, "dst cannot advance by more than the stride");
+                    }
+
+                    const SCALE: i32 = std::mem::size_of::<$value>() as i32;
+
+                    let indices_vec = unsafe { $load(indices.cast()) };
+                    // Extend indices to fill vector register
+                    let indices_vec = unsafe { $extend(indices_vec) };
+
+                    // create a vec of the max idx
+                    let max_idx_vec = unsafe { $splat(max_idx as _) };
+                    // create a mask for valid indices (where the max_idx > provided index).
+                    let invalid_mask = unsafe { $mask_indices(max_idx_vec, indices_vec) };
+                    let invalid_mask = {
+                        let $mask_var = invalid_mask;
+                        $mask_cvt
+                    };
+                    let zero_vec = unsafe { $zero_vec() };
+
+                    // Gather the values into new vector register, for masked positions
+                    // it substitutes zero instead of accessing the src.
+                    let values_vec = unsafe { $masked_gather::<SCALE>(zero_vec, src.cast(), indices_vec, invalid_mask) };
+
+                    // Write the vec out to dst.
+                    unsafe { $store(dst.cast(), values_vec) };
+                }
+            }
+    };
+}
+
+// kernels for u8 indices
+impl_gather!(u8,
+    // 32-bit values, loaded 8 at a time
+    { u32 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu8_epi32,
+        splat: _mm256_set1_epi32,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi32,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i32gather_epi32,
+        store: _mm256_storeu_si256,
+        WIDTH = 8, STRIDE = 16
+    },
+    { i32 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu8_epi32,
+        splat: _mm256_set1_epi32,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi32,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i32gather_epi32,
+        store: _mm256_storeu_si256,
+        WIDTH = 8, STRIDE = 16
+    },
+
+    // 64-bit values, loaded 4 at a time
+    { u64 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu8_epi64,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 16
+    },
+    { i64 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu8_epi64,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 16
+    }
+);
+
+// kernels for u16 indices
+impl_gather!(u16,
+    // 32-bit values. 8x indices loaded at a time and 8x values written at a time
+    { u32 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu16_epi32,
+        splat: _mm256_set1_epi32,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi32,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i32gather_epi32,
+        store: _mm256_storeu_si256,
+        WIDTH = 8, STRIDE = 8
+    },
+    { i32 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu16_epi32,
+        splat: _mm256_set1_epi32,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi32,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i32gather_epi32,
+        store: _mm256_storeu_si256,
+        WIDTH = 8, STRIDE = 8
+    },
+
+    // 64-bit values. 8x indices loaded at a time and 4x values loaded at a time.
+    { u64 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu16_epi64,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 8
+    },
+    { i64 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu16_epi64,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 8
+    }
+);
+
+// kernels for u32 indices
+impl_gather!(u32,
+    // 32-bit values. 8x indices loaded at a time and 8x values written
+    { u32 =>
+        load: _mm256_loadu_si256,
+        extend: identity,
+        splat: _mm256_set1_epi32,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi32,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i32gather_epi32,
+        store: _mm256_storeu_si256,
+        WIDTH = 8, STRIDE = 8
+    },
+    { i32 =>
+        load: _mm256_loadu_si256,
+        extend: identity,
+        splat: _mm256_set1_epi32,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi32,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i32gather_epi32,
+        store: _mm256_storeu_si256,
+        WIDTH = 8, STRIDE = 8
+    },
+
+    // 64-bit values
+    { u64 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu32_epi64,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 4
+    },
+    { i64 =>
+        load: _mm_loadu_si128,
+        extend: _mm256_cvtepu32_epi64,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 4
+    }
+);
+
+// kernels for u64 indices
+impl_gather!(u64,
+    { u32 =>
+        load: _mm256_loadu_si256,
+        extend: identity,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm_setzero_si128,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |m| {
+            unsafe {
+                let lo_bits = _mm256_extracti128_si256::<0>(m);    // lower half
+                let hi_bits = _mm256_extracti128_si256::<1>(m);    // upper half
+                let lo_packed = _mm_shuffle_epi32::<0b01_01_01_01>(lo_bits);
+                let hi_packed = _mm_shuffle_epi32::<0b01_01_01_01>(hi_bits);
+                _mm_unpacklo_epi64(lo_packed, hi_packed)
+            }
+        },
+        gather: _mm256_mask_i64gather_epi32,
+        store: _mm_storeu_si128,
+        WIDTH = 4, STRIDE = 4
+    },
+    { i32 =>
+        load: _mm256_loadu_si256,
+        extend: identity,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm_setzero_si128,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |m| {
+            unsafe {
+                let lo_bits = _mm256_extracti128_si256::<0>(m);    // lower half
+                let hi_bits = _mm256_extracti128_si256::<1>(m);    // upper half
+                let lo_packed = _mm_shuffle_epi32::<0b01_01_01_01>(lo_bits);
+                let hi_packed = _mm_shuffle_epi32::<0b01_01_01_01>(hi_bits);
+                _mm_unpacklo_epi64(lo_packed, hi_packed)
+            }
+        },
+        gather: _mm256_mask_i64gather_epi32,
+        store: _mm_storeu_si128,
+        WIDTH = 4, STRIDE = 4
+    },
+
+    // 64-bit values
+    { u64 =>
+        load: _mm256_loadu_si256,
+        extend: identity,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 4
+    },
+    { i64 =>
+        load: _mm256_loadu_si256,
+        extend: identity,
+        splat: _mm256_set1_epi64x,
+        zero_vec: _mm256_setzero_si256,
+        mask_indices: _mm256_cmpgt_epi64,
+        mask_cvt: |x| { x },
+        gather: _mm256_mask_i64gather_epi64,
+        store: _mm256_storeu_si256,
+        WIDTH = 4, STRIDE = 4
+    }
+);
+
+/// AVX2 core inner loop for certain `Idx` and `Value` type.
+#[inline(always)]
+fn exec_take<Value, Idx, Gather>(values: &[Value], indices: &[Idx]) -> Buffer<Value>
+where
+    Value: Copy,
+    Idx: UnsignedPType,
+    Gather: GatherFn<Idx, Value>,
+{
+    let indices_len = indices.len();
+    let max_index = Idx::from(values.len()).unwrap_or_else(|| Idx::max_value());
+    let mut buffer =
+        BufferMut::<Value>::with_capacity_aligned(indices_len, Alignment::of::<__m256i>());
+    let buf_uninit = buffer.spare_capacity_mut();
+
+    let mut offset = 0;
+    // Loop terminates STRIDE elements before end of the indices array because the GatherFn
+    // might read up to STRIDE src elements at a time, even though it only advances WIDTH elements
+    // in the dst.
+    while offset + Gather::STRIDE < indices_len {
+        // SAFETY: gather_simd preconditions satisfied:
+        //  1. `(indices + offset)..(indices + offset + STRIDE)` is in-bounds for indices allocation
+        //  2. `buffer` has same len as indices so `buffer + offset + STRIDE` is always valid.
+        unsafe {
+            Gather::gather(
+                indices.as_ptr().add(offset),
+                max_index,
+                values.as_ptr(),
+                buf_uninit.as_mut_ptr().add(offset).cast(),
+            )
+        };
+        offset += Gather::WIDTH;
+    }
+
+    // Remainder
+    while offset < indices_len {
+        buf_uninit[offset].write(values[indices[offset].as_()]);
+        offset += 1;
+    }
+
+    assert_eq!(offset, indices_len);
+
+    // SAFETY: all elements have been initialized.
+    unsafe { buffer.set_len(indices_len) };
+
+    // Reset the buffer alignment to the Value type
+    // NOTE: if we don't do this, we pass back a Buffer which is over-aligned to the
+    //  SIMD register width. The caller expects that this memory should be aligned to the value
+    //  type so that we can slice it at value boundaries.
+    buffer = buffer.aligned(Alignment::of::<Value>());
+
+    buffer.freeze()
+}
+
+#[cfg(test)]
+#[cfg_attr(miri, ignore)]
+#[cfg(target_arch = "x86_64")]
+mod tests {
+    use super::*;
+
+    macro_rules! test_cases {
+        (index_type => $IDX:ty, value_types => $($VAL:ty),+) => {
+            paste::paste! {
+                $(
+                    // test "happy path" take, valid indices on valid array
+                    #[test]
+                    #[allow(clippy::cast_possible_truncation)]
+                    fn [<test_avx2_take_simple_ $IDX _ $VAL>]() {
+                        let values: Vec<$VAL> = (1..=127).map(|x| x as $VAL).collect();
+                        let indices: Vec<$IDX> = (0..127).collect();
+
+                        let result = unsafe { take_avx2(&values, &indices) };
+                        assert_eq!(&values, result.as_slice());
+                    }
+
+                    // test take on empty array
+                    #[test]
+                    #[should_panic]
+                    #[allow(clippy::cast_possible_truncation)]
+                    fn [<test_avx2_take_empty_ $IDX _ $VAL>]() {
+                        let values: Vec<$VAL> = vec![];
+                        let indices: Vec<$IDX> = (0..127).collect();
+                        let result = unsafe { take_avx2(&values, &indices) };
+                        assert!(result.is_empty());
+                    }
+
+                    // test all invalid take indices mapping to zeros
+                    #[test]
+                    #[should_panic]
+                    #[allow(clippy::cast_possible_truncation)]
+                    fn [<test_avx2_take_invalid_ $IDX _ $VAL>]() {
+                        let values: Vec<$VAL> = (1..=127).map(|x| x as $VAL).collect();
+                        // all out of bounds indices
+                        let indices: Vec<$IDX> = (127..=254).collect();
+
+                        let result = unsafe { take_avx2(&values, &indices) };
+                        assert_eq!(&[0 as $VAL; 127], result.as_slice());
+                    }
+                )+
+            }
+        };
+    }
+
+    test_cases!(
+        index_type => u8,
+        value_types => u32, i32, u64, i64, f32, f64
+    );
+    test_cases!(
+        index_type => u16,
+        value_types => u32, i32, u64, i64, f32, f64
+    );
+    test_cases!(
+        index_type => u32,
+        value_types => u32, i32, u64, i64, f32, f64
+    );
+    test_cases!(
+        index_type => u64,
+        value_types => u32, i32, u64, i64, f32, f64
+    );
+}
diff --git a/vortex-array/src/arrays/primitive/compute/take/slice_portable.rs b/vortex-array/src/arrays/primitive/compute/take/slice_portable.rs
new file mode 100644
index 00000000000..71ca180ebcf
--- /dev/null
+++ b/vortex-array/src/arrays/primitive/compute/take/slice_portable.rs
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Take function implementations on slices using `portable_simd`.
+
+#![cfg(vortex_nightly)]
+
+use std::mem::MaybeUninit;
+use std::mem::size_of;
+use std::mem::transmute;
+use std::simd;
+use std::simd::num::SimdUint;
+
+use multiversion::multiversion;
+use vortex_buffer::Alignment;
+use vortex_buffer::Buffer;
+use vortex_buffer::BufferMut;
+use vortex_dtype::NativePType;
+use vortex_dtype::PType;
+use vortex_dtype::UnsignedPType;
+use vortex_dtype::match_each_native_simd_ptype;
+use vortex_dtype::match_each_unsigned_integer_ptype;
+
+/// SIMD types larger than the SIMD register size are beneficial for
+/// performance as this leads to better instruction level parallelism.
+pub const SIMD_WIDTH: usize = 64;
+
+/// Takes the specified indices into a new [`Buffer`] using portable SIMD.
+///
+/// This function handles the type matching required to satisfy `SimdElement` bounds.
+/// For `f16` values, it reinterprets them as `u16` since `f16` doesn't implement `SimdElement`.
+#[inline]
+pub fn take_portable<T: NativePType, I: UnsignedPType>(buffer: &[T], indices: &[I]) -> Buffer<T> {
+    if T::PTYPE == PType::F16 {
+        assert_eq!(size_of::<half::f16>(), size_of::<T>());
+
+        // Since Rust does not actually support 16-bit floats, we first reinterpret the data as
+        // `u16` integers.
+        // SAFETY: We know that f16 has the same bit pattern as u16, so this transmute is fine to
+        // make.
+        let u16_slice: &[u16] =
+            unsafe { std::slice::from_raw_parts(buffer.as_ptr().cast(), buffer.len()) };
+        return unsafe { take_with_indices(u16_slice, indices).transmute::<T>() };
+    }
+
+    match_each_native_simd_ptype!(T::PTYPE, |TC| {
+        assert_eq!(size_of::<TC>(), size_of::<T>());
+
+        // SAFETY: This is essentially a no-op that tricks the compiler into adding the
+        // `simd::SimdElement` bound we need to call `take_with_indices`.
+        let buffer: &[TC] =
+            unsafe { std::slice::from_raw_parts(buffer.as_ptr().cast::<TC>(), buffer.len()) };
+        unsafe { take_with_indices(buffer, indices).transmute::<T>() }
+    })
+}
+
+/// Helper that matches on index type and calls `take_portable_simd`.
+///
+/// We separate this code out from above to add the [`simd::SimdElement`] constraint.
+#[inline]
+fn take_with_indices<T: NativePType + simd::SimdElement, I: UnsignedPType>(
+    buffer: &[T],
+    indices: &[I],
+) -> Buffer<T> {
+    match_each_unsigned_integer_ptype!(I::PTYPE, |IC| {
+        let indices: &[IC] =
+            unsafe { std::slice::from_raw_parts(indices.as_ptr().cast::<IC>(), indices.len()) };
+        take_portable_simd::<T, IC, SIMD_WIDTH>(buffer, indices)
+    })
+}
+
+/// Takes elements from an array using SIMD indexing.
+///
+/// Performs a gather operation that takes values at specified indices and returns them in a new
+/// buffer. Uses SIMD instructions to process `LANE_COUNT` indices in parallel.
+///
+/// Returns a `Buffer<T>` where each element corresponds to `values[indices[i]]`.
+#[multiversion(targets("x86_64+avx2", "x86_64+avx", "aarch64+neon"))]
+pub fn take_portable_simd<T, I, const LANE_COUNT: usize>(values: &[T], indices: &[I]) -> Buffer<T>
+where
+    T: NativePType + simd::SimdElement,
+    I: UnsignedPType + simd::SimdElement,
+    simd::LaneCount<LANE_COUNT>: simd::SupportedLaneCount,
+    simd::Simd<I, LANE_COUNT>: SimdUint<Cast<usize> = simd::Simd<usize, LANE_COUNT>>,
+{
+    let indices_len = indices.len();
+
+    let mut buffer = BufferMut::<T>::with_capacity_aligned(
+        indices_len,
+        Alignment::of::<simd::Simd<T, LANE_COUNT>>(),
+    );
+
+    let buf_slice = buffer.spare_capacity_mut();
+
+    for chunk_idx in 0..(indices_len / LANE_COUNT) {
+        let offset = chunk_idx * LANE_COUNT;
+        let mask = simd::Mask::from_bitmask(u64::MAX);
+        let codes_chunk = simd::Simd::<I, LANE_COUNT>::from_slice(&indices[offset..]);
+
+        let selection = simd::Simd::gather_select(
+            values,
+            mask,
+            codes_chunk.cast::<usize>(),
+            simd::Simd::<T, LANE_COUNT>::default(),
+        );
+
+        unsafe {
+            selection.store_select_unchecked(
+                transmute::<&mut [MaybeUninit<T>], &mut [T]>(&mut buf_slice[offset..][..64]),
+                mask.cast(),
+            );
+        }
+    }
+
+    for idx in ((indices_len / LANE_COUNT) * LANE_COUNT)..indices_len {
+        unsafe {
+            buf_slice
+                .get_unchecked_mut(idx)
+                .write(values[indices[idx].as_()]);
+        }
+    }
+
+    unsafe {
+        buffer.set_len(indices_len);
+    }
+
+    // NOTE: if we don't do this, we pass back a Buffer which is over-aligned to the
+    //  SIMD register width. The caller expects that this memory should be aligned to the value
+    //  type so that we can slice it at value boundaries.
+    buffer = buffer.aligned(Alignment::of::<T>());
+
+    buffer.freeze()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::take_portable_simd;
+
+    #[test]
+    fn test_take_out_of_bounds() {
+        let indices = vec![2_000_000u32; 64];
+        let values = vec![1i32];
+
+        let result = take_portable_simd::<i32, u32, 64>(&values, &indices);
+        assert_eq!(result.as_slice(), [0i32; 64]);
+    }
+
+    /// Tests SIMD gather with a mix of sequential, strided, and repeated indices. This exercises
+    /// irregular access patterns that stress the gather operation.
+    #[test]
+    fn test_take_mixed_access_patterns() {
+        // Create a values array with distinct elements.
+        let values: Vec<i64> = (0..256).map(|i| i * 100).collect();
+
+        // Build indices with mixed patterns:
+        // - Sequential access (0, 1, 2, ...)
+        // - Strided access (0, 4, 8, ...)
+        // - Repeated indices (same index multiple times)
+        // - Reverse order
+        let mut indices: Vec<u32> = Vec::with_capacity(200);
+
+        // Sequential: indices 0..64.
+        indices.extend(0u32..64);
+        // Strided by 4: 0, 4, 8, ..., 252.
+        indices.extend((0u32..64).map(|i| i * 4));
+        // Repeated: index 42 repeated 32 times.
+        indices.extend(std::iter::repeat(42u32).take(32));
+        // Reverse: 255, 254, ..., 216.
+        indices.extend((216u32..256).rev());
+
+        let result = take_portable_simd::<i64, u32, 64>(&values, &indices);
+        let result_slice = result.as_slice();
+
+        // Verify sequential portion.
+        for i in 0..64 {
+            assert_eq!(result_slice[i], (i as i64) * 100, "sequential at index {i}");
+        }
+
+        // Verify strided portion.
+        for i in 0..64 {
+            assert_eq!(
+                result_slice[64 + i],
+                (i as i64) * 4 * 100,
+                "strided at index {i}"
+            );
+        }
+
+        // Verify repeated portion.
+        for i in 0..32 {
+            assert_eq!(result_slice[128 + i], 42 * 100, "repeated at index {i}");
+        }
+
+        // Verify reverse portion.
+        for i in 0..40 {
+            assert_eq!(
+                result_slice[160 + i],
+                (255 - i as i64) * 100,
+                "reverse at index {i}"
+            );
+        }
+    }
+
+    /// Tests that the scalar remainder path works correctly when the number of indices is not
+    /// evenly divisible by the SIMD lane count.
+    #[test]
+    fn test_take_with_remainder() {
+        let values: Vec<u16> = (0..1000).collect();
+
+        // Use 64 + 37 = 101 indices to test both the SIMD loop (64 elements) and the scalar
+        // remainder (37 elements).
+        let indices: Vec<u8> = (0u8..101).collect();
+
+        let result = take_portable_simd::<u16, u8, 64>(&values, &indices);
+        let result_slice = result.as_slice();
+
+        assert_eq!(result_slice.len(), 101);
+
+        // Verify all elements.
+        for i in 0..101 {
+            assert_eq!(result_slice[i], i as u16, "mismatch at index {i}");
+        }
+
+        // Also test with exactly 1 remainder element.
+        let indices_one_remainder: Vec<u8> = (0u8..65).collect();
+        let result_one = take_portable_simd::<u16, u8, 64>(&values, &indices_one_remainder);
+        assert_eq!(result_one.as_slice().len(), 65);
+        assert_eq!(result_one.as_slice()[64], 64);
+    }
+
+    /// Tests gather with large 64-bit values and various index types to ensure no truncation
+    /// occurs during the operation.
+    #[test]
+    fn test_take_large_values_no_truncation() {
+        // Create values near the edges of i64 range.
+        let values: Vec<i64> = vec![
+            i64::MIN,
+            i64::MIN + 1,
+            -1_000_000_000_000i64,
+            -1,
+            0,
+            1,
+            1_000_000_000_000i64,
+            i64::MAX - 1,
+            i64::MAX,
+        ];
+
+        // Indices that access each value multiple times in different orders.
+        let indices: Vec<u16> = vec![
+            0, 8, 1, 7, 2, 6, 3, 5, 4, // Forward-backward interleaved.
+            8, 8, 8, 0, 0, 0, // Repeated extremes.
+            4, 4, 4, 4, 4, 4, 4, 4, // Repeated zero.
+            0, 1, 2, 3, 4, 5, 6, 7, 8, // Sequential.
+            8, 7, 6, 5, 4, 3, 2, 1, 0, // Reverse.
+            // Pad to 64 to ensure we hit the SIMD path.
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3,
+        ];
+
+        let result = take_portable_simd::<i64, u16, 64>(&values, &indices);
+        let result_slice = result.as_slice();
+
+        // Verify each result matches the expected value.
+        for (i, &idx) in indices.iter().enumerate() {
+            assert_eq!(
+                result_slice[i], values[idx as usize],
+                "mismatch at position {i} for index {idx}"
+            );
+        }
+    }
+}
diff --git a/vortex-array/src/arrays/validation_tests.rs b/vortex-array/src/arrays/validation_tests.rs
index 5d539c8e6d3..277201c5ef0 100644
--- a/vortex-array/src/arrays/validation_tests.rs
+++ b/vortex-array/src/arrays/validation_tests.rs
@@ -17,9 +17,9 @@ mod tests {
     use vortex_dtype::Nullability;
     use vortex_dtype::PType;
     use vortex_error::VortexError;
-    use vortex_vector::binaryview::BinaryView;
 
     use crate::IntoArray;
+    use crate::arrays::build_views::BinaryView;
     use crate::arrays::*;
     use crate::validity::Validity;
 
diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs
index 63c8636a167..b4fb8af1b1f 100644
--- a/vortex-array/src/arrays/varbinview/array.rs
+++ b/vortex-array/src/arrays/varbinview/array.rs
@@ -13,8 +13,8 @@ use vortex_error::vortex_bail;
 use vortex_error::vortex_ensure;
 use vortex_error::vortex_err;
 use vortex_error::vortex_panic;
-use vortex_vector::binaryview::BinaryView;
 
+use crate::arrays::build_views::BinaryView;
 use crate::builders::ArrayBuilder;
 use crate::builders::VarBinViewBuilder;
 use crate::stats::ArrayStats;
diff --git a/vortex-array/src/arrays/varbinview/build_views.rs b/vortex-array/src/arrays/varbinview/build_views.rs
index 7cc81ede968..eefca1f3baa 100644
--- a/vortex-array/src/arrays/varbinview/build_views.rs
+++ b/vortex-array/src/arrays/varbinview/build_views.rs
@@ -1,17 +1,288 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
+use std::fmt;
+use std::hash::Hash;
+use std::hash::Hasher;
+use std::ops::Range;
+
 use itertools::Itertools;
 use num_traits::AsPrimitive;
+use static_assertions::assert_eq_align;
+use static_assertions::assert_eq_size;
 use vortex_buffer::Buffer;
 use vortex_buffer::BufferMut;
 use vortex_buffer::ByteBuffer;
 use vortex_buffer::ByteBufferMut;
 use vortex_dtype::NativePType;
-// These will be moved to array soon
-pub use vortex_vector::binaryview::BinaryView;
-pub use vortex_vector::binaryview::Inlined;
-pub use vortex_vector::binaryview::Ref;
+use vortex_error::VortexExpect;
+
+/// A view over a variable-length binary value.
+///
+/// Either an inlined representation (for values <= 12 bytes) or a reference
+/// to an external buffer (for values > 12 bytes).
+#[derive(Clone, Copy)]
+#[repr(C, align(16))]
+pub union BinaryView {
+    /// Numeric representation. This is logically `u128`, but we split it into the high and low
+    /// bits to preserve the alignment.
+    pub(crate) le_bytes: [u8; 16],
+
+    /// Inlined representation: strings <= 12 bytes
+    pub(crate) inlined: Inlined,
+
+    /// Reference type: strings > 12 bytes.
+    pub(crate) _ref: Ref,
+}
+
+assert_eq_align!(BinaryView, u128);
+assert_eq_size!(BinaryView, [u8; 16]);
+assert_eq_size!(Inlined, [u8; 16]);
+assert_eq_size!(Ref, [u8; 16]);
+
+/// Variant of a [`BinaryView`] that holds an inlined value.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(C, align(8))]
+pub struct Inlined {
+    /// The size of the full value.
+    pub size: u32,
+    /// The full inlined value.
+    pub data: [u8; BinaryView::MAX_INLINED_SIZE],
+}
+
+impl Inlined {
+    /// Creates a new inlined representation from the provided value of constant size.
+    #[inline]
+    fn new<const N: usize>(value: &[u8]) -> Self {
+        debug_assert_eq!(value.len(), N);
+        let mut inlined = Self {
+            size: N.try_into().vortex_expect("inlined size must fit in u32"),
+            data: [0u8; BinaryView::MAX_INLINED_SIZE],
+        };
+        inlined.data[..N].copy_from_slice(&value[..N]);
+        inlined
+    }
+
+    /// Returns the full inlined value.
+    #[inline]
+    pub fn value(&self) -> &[u8] {
+        &self.data[0..(self.size as usize)]
+    }
+}
+
+/// Variant of a [`BinaryView`] that holds a reference to an external buffer.
+#[derive(Clone, Copy, Debug)]
+#[repr(C, align(8))]
+pub struct Ref {
+    /// The size of the full value.
+    pub size: u32,
+    /// The prefix bytes of the value (first 4 bytes).
+    pub prefix: [u8; 4],
+    /// The index of the buffer where the full value is stored.
+    pub buffer_index: u32,
+    /// The offset within the buffer where the full value starts.
+    pub offset: u32,
+}
+
+impl Ref {
+    /// Returns the range within the buffer where the full value is stored.
+    #[inline]
+    pub fn as_range(&self) -> Range<usize> {
+        self.offset as usize..(self.offset + self.size) as usize
+    }
+
+    /// Replaces the buffer index and offset of the reference, returning a new `Ref`.
+    #[inline]
+    pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref {
+        Self {
+            size: self.size,
+            prefix: self.prefix,
+            buffer_index,
+            offset,
+        }
+    }
+}
+
+impl PartialEq for BinaryView {
+    fn eq(&self, other: &Self) -> bool {
+        let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) };
+        let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) };
+        a == b
+    }
+}
+impl Eq for BinaryView {}
+
+impl Hash for BinaryView {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state);
+    }
+}
+
+impl Default for BinaryView {
+    fn default() -> Self {
+        Self::make_view(&[], 0, 0)
+    }
+}
+
+impl BinaryView {
+    /// Maximum size of an inlined binary value.
+    pub const MAX_INLINED_SIZE: usize = 12;
+
+    /// Create a view from a value, block and offset
+    ///
+    /// Depending on the length of the provided value either a new inlined
+    /// or a reference view will be constructed.
+    ///
+    /// Adapted from arrow-rs <https://github.com/apache/arrow-rs/blob/f4fde769ab6e1a9b75f890b7f8b47bc22800830b/arrow-array/src/builder/generic_bytes_view_builder.rs#L524>
+    /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores
+    #[inline(never)]
+    pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self {
+        match value.len() {
+            0 => Self {
+                inlined: Inlined::new::<0>(value),
+            },
+            1 => Self {
+                inlined: Inlined::new::<1>(value),
+            },
+            2 => Self {
+                inlined: Inlined::new::<2>(value),
+            },
+            3 => Self {
+                inlined: Inlined::new::<3>(value),
+            },
+            4 => Self {
+                inlined: Inlined::new::<4>(value),
+            },
+            5 => Self {
+                inlined: Inlined::new::<5>(value),
+            },
+            6 => Self {
+                inlined: Inlined::new::<6>(value),
+            },
+            7 => Self {
+                inlined: Inlined::new::<7>(value),
+            },
+            8 => Self {
+                inlined: Inlined::new::<8>(value),
+            },
+            9 => Self {
+                inlined: Inlined::new::<9>(value),
+            },
+            10 => Self {
+                inlined: Inlined::new::<10>(value),
+            },
+            11 => Self {
+                inlined: Inlined::new::<11>(value),
+            },
+            12 => Self {
+                inlined: Inlined::new::<12>(value),
+            },
+            _ => Self {
+                _ref: Ref {
+                    size: u32::try_from(value.len()).vortex_expect("value length must fit in u32"),
+                    prefix: value[0..4]
+                        .try_into()
+                        .vortex_expect("prefix must be exactly 4 bytes"),
+                    buffer_index: block,
+                    offset,
+                },
+            },
+        }
+    }
+
+    /// Create a new empty view
+    #[inline]
+    pub fn empty_view() -> Self {
+        Self { le_bytes: [0; 16] }
+    }
+
+    /// Create a new inlined binary view
+    ///
+    /// # Panics
+    ///
+    /// Panics if the provided string is too long to inline.
+    #[inline]
+    pub fn new_inlined(value: &[u8]) -> Self {
+        assert!(
+            value.len() <= Self::MAX_INLINED_SIZE,
+            "expected inlined value to be <= 12 bytes, was {}",
+            value.len()
+        );
+
+        Self::make_view(value, 0, 0)
+    }
+
+    /// Returns the length of the binary value.
+    #[inline]
+    pub fn len(&self) -> u32 {
+        unsafe { self.inlined.size }
+    }
+
+    /// Returns true if the binary value is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns true if the binary value is inlined.
+    #[inline]
+    #[expect(
+        clippy::cast_possible_truncation,
+        reason = "MAX_INLINED_SIZE is a small constant"
+    )]
+    pub fn is_inlined(&self) -> bool {
+        self.len() <= (Self::MAX_INLINED_SIZE as u32)
+    }
+
+    /// Returns the inlined representation of the binary value.
+    pub fn as_inlined(&self) -> &Inlined {
+        debug_assert!(self.is_inlined());
+        unsafe { &self.inlined }
+    }
+
+    /// Returns the reference representation of the binary value.
+    pub fn as_view(&self) -> &Ref {
+        debug_assert!(!self.is_inlined());
+        unsafe { &self._ref }
+    }
+
+    /// Returns a mutable reference to the reference representation of the binary value.
+    pub fn as_view_mut(&mut self) -> &mut Ref {
+        unsafe { &mut self._ref }
+    }
+
+    /// Returns the binary view as u128 representation.
+    pub fn as_u128(&self) -> u128 {
+        // SAFETY: binary view always safe to read as u128 LE bytes
+        unsafe { u128::from_le_bytes(self.le_bytes) }
+    }
+}
+
+impl From<u128> for BinaryView {
+    fn from(value: u128) -> Self {
+        BinaryView {
+            le_bytes: value.to_le_bytes(),
+        }
+    }
+}
+
+impl From<Ref> for BinaryView {
+    fn from(value: Ref) -> Self {
+        BinaryView { _ref: value }
+    }
+}
+
+impl fmt::Debug for BinaryView {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut s = f.debug_struct("BinaryView");
+        if self.is_inlined() {
+            s.field("inline", &self.as_inlined());
+        } else {
+            s.field("ref", &self.as_view());
+        }
+        s.finish()
+    }
+}
 
 /// Convert an offsets buffer to a buffer of element lengths.
 #[inline]
@@ -70,8 +341,8 @@ pub fn build_views<P: NativePType + AsPrimitive<usize>>(
 mod tests {
     use vortex_buffer::ByteBuffer;
     use vortex_buffer::ByteBufferMut;
-    use vortex_vector::binaryview::BinaryView;
 
+    use super::BinaryView;
     use crate::arrays::build_views::build_views;
 
     #[test]
diff --git a/vortex-array/src/arrays/varbinview/compact.rs b/vortex-array/src/arrays/varbinview/compact.rs
index 8f3808a2659..128482d71d9 100644
--- a/vortex-array/src/arrays/varbinview/compact.rs
+++ b/vortex-array/src/arrays/varbinview/compact.rs
@@ -9,9 +9,9 @@ use std::ops::Range;
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_mask::Mask;
-use vortex_vector::binaryview::Ref;
 
 use crate::arrays::VarBinViewArray;
+use crate::arrays::build_views::Ref;
 use crate::builders::ArrayBuilder;
 use crate::builders::VarBinViewBuilder;
 
diff --git a/vortex-array/src/arrays/varbinview/compute/is_constant.rs b/vortex-array/src/arrays/varbinview/compute/is_constant.rs
index daad2c5fa2a..431ad36724d 100644
--- a/vortex-array/src/arrays/varbinview/compute/is_constant.rs
+++ b/vortex-array/src/arrays/varbinview/compute/is_constant.rs
@@ -3,10 +3,10 @@
 
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
-use vortex_vector::binaryview::Ref;
 
 use crate::arrays::VarBinViewArray;
 use crate::arrays::VarBinViewVTable;
+use crate::arrays::build_views::Ref;
 use crate::compute::IsConstantKernel;
 use crate::compute::IsConstantKernelAdapter;
 use crate::compute::IsConstantOpts;
diff --git a/vortex-array/src/arrays/varbinview/compute/take.rs b/vortex-array/src/arrays/varbinview/compute/take.rs
index 90475e6698b..642c0f34e17 100644
--- a/vortex-array/src/arrays/varbinview/compute/take.rs
+++ b/vortex-array/src/arrays/varbinview/compute/take.rs
@@ -10,7 +10,6 @@ use vortex_dtype::match_each_integer_ptype;
 use vortex_error::VortexResult;
 use vortex_mask::AllOr;
 use vortex_mask::Mask;
-use vortex_vector::binaryview::BinaryView;
 
 use crate::Array;
 use crate::ArrayRef;
@@ -18,6 +17,7 @@ use crate::IntoArray;
 use crate::ToCanonical;
 use crate::arrays::VarBinViewArray;
 use crate::arrays::VarBinViewVTable;
+use crate::arrays::build_views::BinaryView;
 use crate::compute::TakeKernel;
 use crate::compute::TakeKernelAdapter;
 use crate::register_kernel;
diff --git a/vortex-array/src/arrays/varbinview/compute/zip.rs b/vortex-array/src/arrays/varbinview/compute/zip.rs
index e5674632715..0fda76fae8c 100644
--- a/vortex-array/src/arrays/varbinview/compute/zip.rs
+++ b/vortex-array/src/arrays/varbinview/compute/zip.rs
@@ -8,12 +8,12 @@ use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
 use vortex_mask::AllOr;
 use vortex_mask::Mask;
-use vortex_vector::binaryview::BinaryView;
 
 use crate::Array;
 use crate::ArrayRef;
 use crate::arrays::VarBinViewArray;
 use crate::arrays::VarBinViewVTable;
+use crate::arrays::build_views::BinaryView;
 use crate::builders::DeduplicatedBuffers;
 use crate::builders::LazyBitBufferBuilder;
 use crate::compute::ZipKernel;
diff --git a/vortex-array/src/arrays/varbinview/mod.rs b/vortex-array/src/arrays/varbinview/mod.rs
index c0a488f0226..a79538cc469 100644
--- a/vortex-array/src/arrays/varbinview/mod.rs
+++ b/vortex-array/src/arrays/varbinview/mod.rs
@@ -6,7 +6,7 @@ pub use array::VarBinViewArray;
 pub use array::VarBinViewArrayParts;
 
 mod accessor;
-pub(crate) mod compact;
+pub mod compact;
 
 mod compute;
 
@@ -14,11 +14,9 @@ mod vtable;
 pub use vtable::VarBinViewVTable;
 
 pub mod build_views;
-
-// Re-export BinaryView types from vortex-vector
-pub use vortex_vector::binaryview::BinaryView;
-pub use vortex_vector::binaryview::Inlined;
-pub use vortex_vector::binaryview::Ref;
+pub use build_views::BinaryView;
+pub use build_views::Inlined;
+pub use build_views::Ref;
 
 #[cfg(test)]
 mod tests;
diff --git a/vortex-array/src/arrays/varbinview/tests.rs b/vortex-array/src/arrays/varbinview/tests.rs
index 63c7bbb0b66..a0e7d99db81 100644
--- a/vortex-array/src/arrays/varbinview/tests.rs
+++ b/vortex-array/src/arrays/varbinview/tests.rs
@@ -1,10 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_vector::binaryview::BinaryView;
-
 use crate::ToCanonical;
 use crate::arrays::VarBinViewArray;
+use crate::arrays::build_views::BinaryView;
 use crate::assert_arrays_eq;
 
 #[test]
diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs
index e987fbeeed5..08e5ff5bedb 100644
--- a/vortex-array/src/arrays/varbinview/vtable/mod.rs
+++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs
@@ -11,13 +11,13 @@ use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
 use vortex_error::vortex_err;
-use vortex_vector::binaryview::BinaryView;
 
 use crate::ArrayRef;
 use crate::Canonical;
 use crate::EmptyMetadata;
 use crate::ExecutionCtx;
 use crate::IntoArray;
+use crate::arrays::build_views::BinaryView;
 use crate::arrays::varbinview::VarBinViewArray;
 use crate::buffer::BufferHandle;
 use crate::serde::ArrayChildren;
diff --git a/vortex-array/src/builders/dict/bytes.rs b/vortex-array/src/builders/dict/bytes.rs
index a66f8ed5efa..50488f56c55 100644
--- a/vortex-array/src/builders/dict/bytes.rs
+++ b/vortex-array/src/builders/dict/bytes.rs
@@ -17,7 +17,6 @@ use vortex_utils::aliases::hash_map::DefaultHashBuilder;
 use vortex_utils::aliases::hash_map::HashTable;
 use vortex_utils::aliases::hash_map::HashTableEntry;
 use vortex_utils::aliases::hash_map::RandomState;
-use vortex_vector::binaryview::BinaryView;
 
 use super::DictConstraints;
 use super::DictEncoder;
@@ -29,6 +28,7 @@ use crate::arrays::PrimitiveArray;
 use crate::arrays::VarBinVTable;
 use crate::arrays::VarBinViewArray;
 use crate::arrays::VarBinViewVTable;
+use crate::arrays::build_views::BinaryView;
 use crate::canonical::ToCanonical;
 use crate::validity::Validity;
 
diff --git a/vortex-array/src/builders/varbinview.rs b/vortex-array/src/builders/varbinview.rs
index e89fc824c19..267885ce8f0 100644
--- a/vortex-array/src/builders/varbinview.rs
+++ b/vortex-array/src/builders/varbinview.rs
@@ -20,12 +20,12 @@ use vortex_scalar::Scalar;
 use vortex_scalar::Utf8Scalar;
 use vortex_utils::aliases::hash_map::Entry;
 use vortex_utils::aliases::hash_map::HashMap;
-use vortex_vector::binaryview::BinaryView;
 
 use crate::Array;
 use crate::ArrayRef;
 use crate::IntoArray;
 use crate::arrays::VarBinViewArray;
+use crate::arrays::build_views::BinaryView;
 use crate::arrays::compact::BufferUtilization;
 use crate::builders::ArrayBuilder;
 use crate::builders::LazyBitBufferBuilder;
diff --git a/vortex-array/src/canonical_to_vector.rs b/vortex-array/src/canonical_to_vector.rs
deleted file mode 100644
index cb8068b3ba5..00000000000
--- a/vortex-array/src/canonical_to_vector.rs
+++ /dev/null
@@ -1,181 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-//! Conversion from Canonical arrays to Vectors.
-
-use std::sync::Arc;
-
-use vortex_buffer::Buffer;
-use vortex_dtype::BigCast;
-use vortex_dtype::DType;
-use vortex_dtype::PrecisionScale;
-use vortex_dtype::match_each_decimal_value_type;
-use vortex_dtype::match_each_native_ptype;
-use vortex_error::VortexExpect;
-use vortex_error::VortexResult;
-use vortex_mask::Mask;
-use vortex_vector::Vector;
-use vortex_vector::binaryview::BinaryVector;
-use vortex_vector::binaryview::StringVector;
-use vortex_vector::bool::BoolVector;
-use vortex_vector::decimal::DVector;
-use vortex_vector::fixed_size_list::FixedSizeListVector;
-use vortex_vector::listview::ListViewVector;
-use vortex_vector::null::NullVector;
-use vortex_vector::primitive::PVector;
-use vortex_vector::struct_::StructVector;
-
-use crate::ArrayRef;
-use crate::Canonical;
-use crate::Executable;
-use crate::ExecutionCtx;
-use crate::arrays::ListViewArrayParts;
-use crate::arrays::PrimitiveArray;
-
-impl Executable for Vector {
-    #[expect(deprecated)]
-    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
-        let canonical = array.execute::<Canonical>(ctx)?;
-        canonical.to_vector(ctx)
-    }
-}
-
-impl Canonical {
-    /// Convert a Canonical array to a Vector.
-    ///
-    /// This is the reverse of `VectorIntoArray` - it takes a fully materialized
-    /// canonical array and converts it into the corresponding vector type.
-    /// TODO(joe): move over the execute_mask
-    #[deprecated]
-    pub fn to_vector(self, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
-        Ok(match self {
-            Canonical::Null(a) => Vector::Null(NullVector::new(a.len())),
-            Canonical::Bool(a) => {
-                Vector::Bool(BoolVector::new(a.bit_buffer().clone(), a.validity_mask()?))
-            }
-            Canonical::Primitive(a) => {
-                let ptype = a.ptype();
-                let validity = a.validity_mask()?;
-                match_each_native_ptype!(ptype, |T| {
-                    let buffer = a.to_buffer::<T>();
-                    Vector::Primitive(PVector::<T>::new(buffer, validity).into())
-                })
-            }
-            Canonical::Decimal(a) => {
-                // Match on the storage type first to read the buffer
-                match_each_decimal_value_type!(a.values_type(), |D| {
-                    // Use the smallest type that can represent the precision/scale.
-                    // The array may store values in a smaller type (if values fit), but
-                    // DVector requires a PrecisionScale that matches its type parameter.
-                    let min_value_type =
-                        DecimalType::smallest_decimal_value_type(&a.decimal_dtype());
-                    match_each_decimal_value_type!(min_value_type, |E| {
-                        let decimal_dtype = a.decimal_dtype();
-                        let buffer = a.buffer::<D>();
-                        let validity_mask = a.validity_mask()?;
-
-                        // Copy from D to E, possibly widening, possibly narrowing
-                        let values = Buffer::<E>::from_trusted_len_iter(buffer.iter().map(|d| {
-                            <E as BigCast>::from(*d).vortex_expect("Decimal cast failed")
-                        }));
-
-                        // SAFETY: values came from a valid DecimalArray with the same precision/scale
-                        Vector::Decimal(
-                            unsafe {
-                                DVector::<E>::new_unchecked(
-                                    PrecisionScale::new_unchecked(
-                                        decimal_dtype.precision(),
-                                        decimal_dtype.scale(),
-                                    ),
-                                    values,
-                                    validity_mask,
-                                )
-                            }
-                            .into(),
-                        )
-                    })
-                })
-            }
-            Canonical::VarBinView(a) => {
-                let validity = a.validity_mask()?;
-                match a.dtype() {
-                    DType::Utf8(_) => {
-                        let views = a.views().clone();
-                        // Convert Arc<[ByteBuffer]> to Arc<Box<[ByteBuffer]>>
-                        let buffers: Box<[_]> = a.buffers().iter().cloned().collect();
-                        Vector::String(unsafe {
-                            StringVector::new_unchecked(views, Arc::new(buffers), validity)
-                        })
-                    }
-                    DType::Binary(_) => {
-                        let views = a.views().clone();
-                        // Convert Arc<[ByteBuffer]> to Arc<Box<[ByteBuffer]>>
-                        let buffers: Box<[_]> = a.buffers().iter().cloned().collect();
-                        Vector::Binary(unsafe {
-                            BinaryVector::new_unchecked(views, Arc::new(buffers), validity)
-                        })
-                    }
-                    _ => unreachable!("VarBinView must be Utf8 or Binary"),
-                }
-            }
-            Canonical::List(a) => {
-                let ListViewArrayParts {
-                    elements,
-                    offsets,
-                    sizes,
-                    validity,
-                    ..
-                } = a.into_parts();
-
-                let validity = validity.to_array(offsets.len()).execute::<Mask>(ctx)?;
-                let elements_vector = elements.execute::<Vector>(ctx)?;
-                let offsets = offsets.execute::<PrimitiveArray>(ctx)?;
-                let sizes = sizes.execute::<PrimitiveArray>(ctx)?;
-                let offsets_ptype = offsets.ptype();
-                let sizes_ptype = sizes.ptype();
-
-                match_each_native_ptype!(offsets_ptype, |O| {
-                    match_each_native_ptype!(sizes_ptype, |S| {
-                        let offsets_vec =
-                            PVector::<O>::new(offsets.to_buffer::<O>(), offsets.validity_mask()?);
-                        let sizes_vec =
-                            PVector::<S>::new(sizes.to_buffer::<S>(), sizes.validity_mask()?);
-                        Vector::List(unsafe {
-                            ListViewVector::new_unchecked(
-                                Arc::new(elements_vector),
-                                offsets_vec.into(),
-                                sizes_vec.into(),
-                                validity,
-                            )
-                        })
-                    })
-                })
-            }
-            Canonical::FixedSizeList(a) => {
-                let validity = a.validity_mask()?;
-                let list_size = a.list_size();
-                let elements_vector = a.elements().clone().execute::<Vector>(ctx)?;
-                Vector::FixedSizeList(unsafe {
-                    FixedSizeListVector::new_unchecked(
-                        Arc::new(elements_vector),
-                        list_size,
-                        validity,
-                    )
-                })
-            }
-            Canonical::Struct(a) => {
-                let validity = a.validity_mask()?;
-                let mut fields = Vec::with_capacity(a.fields().len());
-                for f in a.fields().iter().cloned() {
-                    fields.push(f.execute::<Vector>(ctx)?);
-                }
-                let fields: Box<[Vector]> = fields.into_boxed_slice();
-                Vector::Struct(StructVector::new(Arc::new(fields), validity))
-            }
-            Canonical::Extension(a) => {
-                // For extension arrays, convert the underlying storage
-                a.storage().clone().execute::<Vector>(ctx)?
-            }
-        })
-    }
-}
diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs
index d315de33d8b..9a0f1bb6ff4 100644
--- a/vortex-array/src/lib.rs
+++ b/vortex-array/src/lib.rs
@@ -36,7 +36,6 @@ pub mod buffer;
 pub mod builders;
 pub mod builtins;
 mod canonical;
-pub(crate) mod canonical_to_vector;
 pub mod compute;
 mod context;
 pub mod display;
@@ -62,7 +61,6 @@ pub mod stream;
 pub mod test_harness;
 pub mod validity;
 pub mod variants;
-pub mod vectors;
 pub mod vtable;
 
 pub mod flatbuffers {
diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs
index 5cf14730dc3..44b0553e30e 100644
--- a/vortex-array/src/patches.rs
+++ b/vortex-array/src/patches.rs
@@ -15,10 +15,8 @@ use vortex_dtype::IntegerPType;
 use vortex_dtype::NativePType;
 use vortex_dtype::Nullability::NonNullable;
 use vortex_dtype::PType;
-use vortex_dtype::PTypeDowncastExt;
 use vortex_dtype::UnsignedPType;
 use vortex_dtype::match_each_integer_ptype;
-use vortex_dtype::match_each_native_ptype;
 use vortex_dtype::match_each_unsigned_integer_ptype;
 use vortex_error::VortexError;
 use vortex_error::VortexResult;
@@ -31,8 +29,6 @@ use vortex_mask::MaskMut;
 use vortex_scalar::PValue;
 use vortex_scalar::Scalar;
 use vortex_utils::aliases::hash_map::HashMap;
-use vortex_vector::primitive::PVectorMut;
-use vortex_vector::primitive::PrimitiveVectorMut;
 
 use crate::Array;
 use crate::ArrayRef;
@@ -844,28 +840,6 @@ impl Patches {
         }))
     }
 
-    /// Applies patches to a primitive vector, returning the patched vector.
-    pub fn apply_to_primitive_vector(&self, vector: PrimitiveVectorMut) -> PrimitiveVectorMut {
-        match_each_native_ptype!(vector.ptype(), |T| {
-            self.apply_to_pvector(vector.downcast::<T>()).into()
-        })
-    }
-
-    /// Applies patches to a [`PVectorMut<T>`], returning the patched vector.
-    ///
-    /// This function modifies the elements buffer in-place at the positions specified by the patch
-    /// indices. It also updates the validity mask to reflect the nullability of patch values.
-    pub fn apply_to_pvector<T: NativePType>(&self, pvector: PVectorMut<T>) -> PVectorMut<T> {
-        let (mut elements, mut validity) = pvector.into_parts();
-
-        // SAFETY: We maintain the invariant that elements and validity have the same length, and all
-        // patch indices are valid after offset adjustment (guaranteed by `Patches`).
-        unsafe { self.apply_to_buffer(elements.as_mut_slice(), &mut validity) };
-
-        // SAFETY: We have not modified the length of elements or validity.
-        unsafe { PVectorMut::new_unchecked(elements, validity) }
-    }
-
     /// Apply patches to a mutable buffer and validity mask.
     ///
     /// This method applies the patch values to the given buffer at the positions specified by the
diff --git a/vortex-array/src/vectors.rs b/vortex-array/src/vectors.rs
deleted file mode 100644
index 2abaa0f3210..00000000000
--- a/vortex-array/src/vectors.rs
+++ /dev/null
@@ -1,238 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-use std::sync::Arc;
-
-use vortex_dtype::DType;
-use vortex_dtype::NativeDecimalType;
-use vortex_dtype::NativePType;
-use vortex_dtype::Nullability::NonNullable;
-use vortex_dtype::match_each_decimal_value_type;
-use vortex_dtype::match_each_native_ptype;
-use vortex_error::VortexExpect;
-use vortex_vector::Vector;
-use vortex_vector::VectorOps;
-use vortex_vector::binaryview::BinaryViewType;
-use vortex_vector::binaryview::BinaryViewVector;
-use vortex_vector::bool::BoolVector;
-use vortex_vector::decimal::DVector;
-use vortex_vector::decimal::DecimalVector;
-use vortex_vector::fixed_size_list::FixedSizeListVector;
-use vortex_vector::listview::ListViewVector;
-use vortex_vector::null::NullVector;
-use vortex_vector::primitive::PVector;
-use vortex_vector::primitive::PrimitiveVector;
-use vortex_vector::struct_::StructVector;
-
-use crate::ArrayRef;
-use crate::Canonical;
-use crate::IntoArray;
-use crate::arrays::BoolArray;
-use crate::arrays::DecimalArray;
-use crate::arrays::ExtensionArray;
-use crate::arrays::FixedSizeListArray;
-use crate::arrays::ListViewArray;
-use crate::arrays::NullArray;
-use crate::arrays::PrimitiveArray;
-use crate::arrays::StructArray;
-use crate::arrays::VarBinViewArray;
-use crate::validity::Validity;
-
-/// Trait for converting vector types into arrays.
-pub trait VectorIntoArray<T> {
-    /// Converts the vector into an array of the specified data type.
-    fn into_array(self, dtype: &DType) -> T;
-}
-
-impl VectorIntoArray<ArrayRef> for Vector {
-    fn into_array(self, dtype: &DType) -> ArrayRef {
-        VectorIntoArray::<Canonical>::into_array(self, dtype).into_array()
-    }
-}
-
-impl VectorIntoArray<Canonical> for Vector {
-    fn into_array(self, dtype: &DType) -> Canonical {
-        match dtype {
-            DType::Null => Canonical::Null(self.into_null().into_array(dtype)),
-            DType::Bool(_) => Canonical::Bool(self.into_bool().into_array(dtype)),
-            DType::Primitive(..) => Canonical::Primitive(self.into_primitive().into_array(dtype)),
-            DType::Decimal(..) => Canonical::Decimal(self.into_decimal().into_array(dtype)),
-            DType::Utf8(_) => Canonical::VarBinView(self.into_string().into_array(dtype)),
-            DType::Binary(_) => Canonical::VarBinView(self.into_binary().into_array(dtype)),
-            DType::List(..) => Canonical::List(self.into_list().into_array(dtype)),
-            DType::FixedSizeList(..) => {
-                Canonical::FixedSizeList(self.into_fixed_size_list().into_array(dtype))
-            }
-            DType::Struct(..) => Canonical::Struct(self.into_struct().into_array(dtype)),
-            DType::Extension(ext_dtype) => {
-                let storage: Canonical = self.into_array(ext_dtype.storage_dtype());
-                Canonical::Extension(ExtensionArray::new(ext_dtype.clone(), storage.into_array()))
-            }
-        }
-    }
-}
-
-impl VectorIntoArray<NullArray> for NullVector {
-    fn into_array(self, dtype: &DType) -> NullArray {
-        assert!(matches!(dtype, DType::Null));
-        NullArray::new(self.len())
-    }
-}
-
-impl VectorIntoArray<BoolArray> for BoolVector {
-    fn into_array(self, dtype: &DType) -> BoolArray {
-        assert!(matches!(dtype, DType::Bool(_)));
-
-        let (bits, validity) = self.into_parts();
-        BoolArray::from_bit_buffer(bits, Validity::from_mask(validity, dtype.nullability()))
-    }
-}
-
-impl VectorIntoArray<PrimitiveArray> for PrimitiveVector {
-    fn into_array(self, dtype: &DType) -> PrimitiveArray {
-        match_each_native_ptype!(self.ptype(), |T| {
-            <T as NativePType>::downcast(self).into_array(dtype)
-        })
-    }
-}
-
-impl<T: NativePType> VectorIntoArray<PrimitiveArray> for PVector<T> {
-    fn into_array(self, dtype: &DType) -> PrimitiveArray {
-        assert!(matches!(dtype, DType::Primitive(_, _)));
-        assert_eq!(T::PTYPE, dtype.as_ptype());
-
-        let (values, validity) = self.into_parts();
-        // SAFETY: vectors maintain all invariants required for array creation
-        unsafe {
-            PrimitiveArray::new_unchecked::<T>(
-                values,
-                Validity::from_mask(validity, dtype.nullability()),
-            )
-        }
-    }
-}
-
-impl VectorIntoArray<DecimalArray> for DecimalVector {
-    fn into_array(self, dtype: &DType) -> DecimalArray {
-        match_each_decimal_value_type!(self.decimal_type(), |D| {
-            <D as NativeDecimalType>::downcast(self).into_array(dtype)
-        })
-    }
-}
-
-impl<D: NativeDecimalType> VectorIntoArray<DecimalArray> for DVector<D> {
-    fn into_array(self, dtype: &DType) -> DecimalArray {
-        assert!(matches!(dtype, DType::Decimal(_, _)));
-
-        let nullability = dtype.nullability();
-        let dec_dtype = dtype
-            .as_decimal_opt()
-            .vortex_expect("expected decimal DType");
-        assert_eq!(dec_dtype.precision(), self.precision());
-        assert_eq!(dec_dtype.scale(), self.scale());
-
-        let (_ps, values, validity) = self.into_parts();
-        // SAFETY: vectors maintain all invariants required for array creation
-        unsafe {
-            DecimalArray::new_unchecked::<D>(
-                values,
-                *dec_dtype,
-                Validity::from_mask(validity, nullability),
-            )
-        }
-    }
-}
-
-impl<T: BinaryViewType> VectorIntoArray<VarBinViewArray> for BinaryViewVector<T> {
-    fn into_array(self, dtype: &DType) -> VarBinViewArray {
-        assert!(matches!(dtype, DType::Utf8(_) | DType::Binary(_)));
-
-        let (views, buffers, validity) = self.into_parts();
-        let validity = Validity::from_mask(validity, dtype.nullability());
-
-        let buffers = Arc::try_unwrap(buffers).unwrap_or_else(|b| (*b).clone());
-
-        // SAFETY: vectors maintain all invariants required for array creation
-        unsafe {
-            VarBinViewArray::new_unchecked(
-                views,
-                buffers.into_iter().collect(),
-                dtype.clone(),
-                validity,
-            )
-        }
-    }
-}
-
-impl VectorIntoArray<ListViewArray> for ListViewVector {
-    fn into_array(self, dtype: &DType) -> ListViewArray {
-        assert!(matches!(dtype, DType::List(_, _)));
-
-        let (elements, offsets, sizes, validity) = self.into_parts();
-        let validity = Validity::from_mask(validity, dtype.nullability());
-
-        let elements_dtype = dtype.as_list_element_opt().vortex_expect("expected list");
-        let elements = Arc::try_unwrap(elements)
-            .unwrap_or_else(|e| (*e).clone())
-            .into_array(elements_dtype);
-
-        let offsets_dtype = DType::Primitive(offsets.ptype(), NonNullable);
-        let offsets = offsets.into_array(&offsets_dtype);
-
-        let sizes_dtype = DType::Primitive(sizes.ptype(), NonNullable);
-        let sizes = sizes.into_array(&sizes_dtype);
-
-        // SAFETY: vectors maintain all invariants required for array creation
-        unsafe {
-            ListViewArray::new_unchecked(
-                elements,
-                offsets.into_array(),
-                sizes.into_array(),
-                validity,
-            )
-        }
-    }
-}
-
-impl VectorIntoArray<FixedSizeListArray> for FixedSizeListVector {
-    fn into_array(self, dtype: &DType) -> FixedSizeListArray {
-        assert!(matches!(dtype, DType::FixedSizeList(_, _, _)));
-
-        let len = self.len();
-        let (elements, size, validity) = self.into_parts();
-        let validity = Validity::from_mask(validity, dtype.nullability());
-
-        let elements_dtype = dtype
-            .as_fixed_size_list_element_opt()
-            .vortex_expect("expected fixed size list");
-        let elements = Arc::try_unwrap(elements)
-            .unwrap_or_else(|e| (*e).clone())
-            .into_array(elements_dtype);
-
-        // SAFETY: vectors maintain all invariants required for array creation
-        unsafe { FixedSizeListArray::new_unchecked(elements, size, validity, len) }
-    }
-}
-
-impl VectorIntoArray<StructArray> for StructVector {
-    fn into_array(self, dtype: &DType) -> StructArray {
-        assert!(matches!(dtype, DType::Struct(_, _)));
-
-        let len = self.len();
-        let (fields, validity) = self.into_parts();
-        let validity = Validity::from_mask(validity, dtype.nullability());
-
-        let struct_fields = dtype.as_struct_fields();
-        assert_eq!(fields.len(), struct_fields.nfields());
-
-        let field_arrays: Vec<ArrayRef> = Arc::try_unwrap(fields)
-            .unwrap_or_else(|f| (*f).clone())
-            .into_iter()
-            .zip(struct_fields.fields())
-            .map(|(field_vector, field_dtype)| field_vector.into_array(&field_dtype))
-            .collect();
-
-        // SAFETY: vectors maintain all invariants required for array creation
-        unsafe { StructArray::new_unchecked(field_arrays, struct_fields.clone(), len, validity) }
-    }
-}