Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions encodings/runend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ harness = false
[[bench]]
name = "run_end_compress"
harness = false

[[bench]]
name = "run_end_decode"
harness = false
110 changes: 110 additions & 0 deletions encodings/runend/benches/run_end_decode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![allow(clippy::unwrap_used, clippy::cast_possible_truncation)]

use divan::Bencher;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::compute::warm_up_vtables;
use vortex_array::validity::Validity;
use vortex_buffer::BitBuffer;
use vortex_buffer::BufferMut;
use vortex_runend::decompress_bool::runend_decode_bools;

fn main() {
warm_up_vtables();
divan::main();
}

/// Distribution types for bool benchmarks
#[derive(Clone, Copy)]
enum BoolDistribution {
/// Alternating true/false (50/50)
Alternating,
/// Mostly true (90% true runs)
MostlyTrue,
/// Mostly false (90% false runs)
MostlyFalse,
/// All true
AllTrue,
/// All false
AllFalse,
}

/// Creates bool test data with configurable distribution
fn create_bool_test_data(
total_length: usize,
avg_run_length: usize,
distribution: BoolDistribution,
) -> (PrimitiveArray, BoolArray) {
let mut ends = BufferMut::<u32>::with_capacity(total_length / avg_run_length + 1);
let mut values = Vec::with_capacity(total_length / avg_run_length + 1);

let mut pos = 0usize;
let mut run_index = 0usize;

while pos < total_length {
let run_len = avg_run_length.min(total_length - pos);
pos += run_len;
ends.push(pos as u32);

let val = match distribution {
BoolDistribution::Alternating => run_index % 2 == 0,
BoolDistribution::MostlyTrue => run_index % 10 != 0, // 90% true
BoolDistribution::MostlyFalse => run_index % 10 == 0, // 10% true (90% false)
BoolDistribution::AllTrue => true,
BoolDistribution::AllFalse => false,
};
values.push(val);
run_index += 1;
}

(
PrimitiveArray::new(ends.freeze(), Validity::NonNullable),
BoolArray::from(BitBuffer::from(values)),
)
}

// Medium size: 10k elements with various run lengths
const BOOL_ARGS: &[(usize, usize)] = &[
(10_000, 2), // Very short runs (5000 runs)
(10_000, 10), // Short runs (1000 runs)
(10_000, 100), // Medium runs (100 runs)
(10_000, 1000), // Long runs (10 runs)
];

#[divan::bench(args = BOOL_ARGS)]
fn decode_bool_alternating(bencher: Bencher, (total_length, avg_run_length): (usize, usize)) {
let (ends, values) =
create_bool_test_data(total_length, avg_run_length, BoolDistribution::Alternating);
bencher.bench(|| runend_decode_bools(ends.clone(), values.clone(), 0, total_length));
}

#[divan::bench(args = BOOL_ARGS)]
fn decode_bool_mostly_true(bencher: Bencher, (total_length, avg_run_length): (usize, usize)) {
let (ends, values) =
create_bool_test_data(total_length, avg_run_length, BoolDistribution::MostlyTrue);
bencher.bench(|| runend_decode_bools(ends.clone(), values.clone(), 0, total_length));
}

#[divan::bench(args = BOOL_ARGS)]
fn decode_bool_mostly_false(bencher: Bencher, (total_length, avg_run_length): (usize, usize)) {
let (ends, values) =
create_bool_test_data(total_length, avg_run_length, BoolDistribution::MostlyFalse);
bencher.bench(|| runend_decode_bools(ends.clone(), values.clone(), 0, total_length));
}

#[divan::bench(args = BOOL_ARGS)]
fn decode_bool_all_true(bencher: Bencher, (total_length, avg_run_length): (usize, usize)) {
let (ends, values) =
create_bool_test_data(total_length, avg_run_length, BoolDistribution::AllTrue);
bencher.bench(|| runend_decode_bools(ends.clone(), values.clone(), 0, total_length));
}

#[divan::bench(args = BOOL_ARGS)]
fn decode_bool_all_false(bencher: Bencher, (total_length, avg_run_length): (usize, usize)) {
let (ends, values) =
create_bool_test_data(total_length, avg_run_length, BoolDistribution::AllFalse);
bencher.bench(|| runend_decode_bools(ends.clone(), values.clone(), 0, total_length));
}
59 changes: 1 addition & 58 deletions encodings/runend/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,23 +186,7 @@ pub fn runend_decode_primitive(
}))
}

pub fn runend_decode_bools(
ends: PrimitiveArray,
values: BoolArray,
offset: usize,
length: usize,
) -> VortexResult<BoolArray> {
let validity_mask = values.validity_mask()?;
Ok(match_each_unsigned_integer_ptype!(ends.ptype(), |E| {
runend_decode_typed_bool(
trimmed_ends_iter(ends.as_slice::<E>(), offset, length),
&values.to_bit_buffer(),
validity_mask,
values.dtype().nullability(),
length,
)
}))
}
pub use crate::decompress_bool::runend_decode_bools;

pub fn runend_decode_typed_primitive<T: NativePType>(
run_ends: impl Iterator<Item = usize>,
Expand Down Expand Up @@ -263,47 +247,6 @@ pub fn runend_decode_typed_primitive<T: NativePType>(
}
}

pub fn runend_decode_typed_bool(
run_ends: impl Iterator<Item = usize>,
values: &BitBuffer,
values_validity: Mask,
values_nullability: Nullability,
length: usize,
) -> BoolArray {
match values_validity {
Mask::AllTrue(_) => {
let mut decoded = BitBufferMut::with_capacity(length);
for (end, value) in run_ends.zip_eq(values.iter()) {
decoded.append_n(value, end - decoded.len());
}
BoolArray::new(decoded.freeze(), values_nullability.into())
}
Mask::AllFalse(_) => BoolArray::new(BitBuffer::new_unset(length), Validity::AllInvalid),
Mask::Values(mask) => {
let mut decoded = BitBufferMut::with_capacity(length);
let mut decoded_validity = BitBufferMut::with_capacity(length);
for (end, value) in run_ends.zip_eq(
values
.iter()
.zip(mask.bit_buffer().iter())
.map(|(v, is_valid)| is_valid.then_some(v)),
) {
match value {
None => {
decoded_validity.append_n(false, end - decoded.len());
decoded.append_n(false, end - decoded.len());
}
Some(value) => {
decoded_validity.append_n(true, end - decoded.len());
decoded.append_n(value, end - decoded.len());
}
}
}
BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze()))
}
}
}

#[cfg(test)]
mod test {
use vortex_array::ToCanonical;
Expand Down
Loading
Loading