Skip to content

Commit 40e5aa0

Browse files
committed
chore: readme and remove deprecations
1 parent 69e6a92 commit 40e5aa0

File tree

7 files changed

+148
-86
lines changed

7 files changed

+148
-86
lines changed

CLAUDE.md

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ cargo clippy --all-targets --all-features -- -D warnings && cargo fmt --check
4949
### Unified Architecture Transformation
5050
- **ZiporaHashMap**: Single implementation replacing 6+ hash maps
5151
- **ZiporaTrie**: Single implementation replacing 5+ tries
52+
- **EnhancedLoserTree**: Unified tournament tree (removed LoserTree backward compatibility)
5253
- Strategy-based configuration (HashStrategy, TrieStrategy, etc.)
53-
- Clean module exports, backward-compatible APIs
54+
- Clean module exports, no backward compatibility code
5455
- Version 2.0.0 with migration guide
5556

5657
### Advanced Multi-Way Merge
@@ -263,9 +264,31 @@ sorter.sort(&mut data)?;
263264
**Status**: Production-ready SIMD acceleration framework
264265
**Performance**: 4-12x memory ops, 0.3-0.4 Gops/s rank/select, 4-8x radix sort, 2-8x string processing
265266
**Cross-Platform**: x86_64 (AVX-512/AVX2/BMI2/POPCNT) + ARM64 (NEON) + scalar fallbacks
266-
**Tests**: 2,176+ passing (100% pass rate)
267+
**Tests**: 2,178+ passing (100% pass rate)
267268
**Safety**: Zero unsafe in public APIs (MANDATORY)
268269

270+
## Deprecated Code Removal (2025-10-15)
271+
272+
### ✅ ALL BACKWARD COMPATIBILITY CODE REMOVED
273+
274+
**Tournament Tree**:
275+
- Removed `LoserTree` type alias → Use `EnhancedLoserTree` directly
276+
- Updated all imports and usages across codebase
277+
- Fixed: `src/algorithms/external_sort.rs`, `src/lib.rs`, `src/algorithms/mod.rs`
278+
279+
**IntVec Legacy SIMD**:
280+
- Removed deprecated `from_slice_bulk_simd_legacy()` function
281+
- Removed deprecated `bulk_convert_to_u64_simd()` function
282+
- All code now uses adaptive SIMD selection framework
283+
284+
**README.md**:
285+
- Removed legacy Tournament Tree examples
286+
- Removed "Traditional pools (legacy)" examples from C FFI section
287+
- Added new blob store examples (ZeroLength, SimpleZip, MixedLen)
288+
- Updated performance summary table
289+
290+
**Build Status**: ✅ All 2,178 tests passing, zero compilation errors
291+
269292
## Latest Updates (2025-10-14)
270293

271294
### ✅ ALL CRITICAL BLOB STORES IMPLEMENTED

README.md

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Zipora 2.0 introduces a **unified architecture** following referenced project's
2424
- **🛡️ Memory Safety**: Eliminates segfaults, buffer overflows, use-after-free bugs
2525
- **🧠 Secure Memory Management**: Production-ready memory pools with thread safety, RAII, and vulnerability prevention
2626
- **🚨 Advanced Error Handling & Recovery**: Sophisticated error classification (WARNING/RECOVERABLE/CRITICAL/FATAL), automatic recovery strategies (memory reclamation, structure rebuilding, fallback algorithms), contextual error reporting with metadata, and comprehensive verification macros
27-
- **💾 Blob Storage**: Advanced storage systems including trie-based indexing and offset-based compression
27+
- **💾 Blob Storage**: 7 specialized stores including trie-based indexing, offset compression, zero-length, fragment deduplication, and mixed-length hybrid storage
2828
- **📦 Specialized Containers**: Production-ready containers with 40-90% memory/performance improvements
2929
- **🗂️ Specialized Hash Maps**: Golden ratio optimized, string-optimized, small inline maps with advanced cache locality optimizations, sophisticated collision resolution algorithms, and memory-efficient string arena management
3030
- **⚡ Cache Optimization Infrastructure**: Comprehensive cache-line alignment, hot/cold data separation, software prefetching, NUMA-aware allocation, and access pattern analysis for maximum performance
@@ -1260,12 +1260,98 @@ let stats = cached_store.cache_stats(); // Performance metrics
12601260
println!("Hit ratio: {:.2}%", stats.hit_ratio * 100.0);
12611261
```
12621262

1263+
### Specialized Blob Stores (New in 2.0)
1264+
1265+
#### Zero-Length Blob Store
1266+
1267+
```rust
1268+
use zipora::{ZeroLengthBlobStore, BlobStore};
1269+
1270+
// Optimized storage for zero-length blobs (empty records)
1271+
// O(1) memory overhead regardless of record count
1272+
let mut store = ZeroLengthBlobStore::new();
1273+
1274+
// Add empty records efficiently
1275+
let id1 = store.put(b"").unwrap();
1276+
let id2 = store.put(&[]).unwrap();
1277+
let id3 = store.put(b"").unwrap();
1278+
1279+
// All get operations return empty vectors
1280+
assert_eq!(store.get(id1).unwrap(), b"");
1281+
assert!(store.contains(id2));
1282+
assert_eq!(store.len(), 3);
1283+
1284+
// Perfect for sparse indexes, placeholder records, or bitmap storage
1285+
```
1286+
1287+
#### Simple Zip Blob Store
1288+
1289+
```rust
1290+
use zipora::{SimpleZipBlobStore, SimpleZipConfig, SimpleZipConfigBuilder, BlobStore};
1291+
1292+
// Fragment-based compression with HashMap deduplication
1293+
let config = SimpleZipConfig::builder()
1294+
.delimiters(vec![b'\n', b' ', b'\t']) // Split at whitespace
1295+
.min_fragment_len(3)
1296+
.max_fragment_len(64)
1297+
.enable_deduplication(true)
1298+
.build().unwrap();
1299+
1300+
let records = vec![
1301+
b"GET /api/users HTTP/1.1".to_vec(),
1302+
b"GET /api/posts HTTP/1.1".to_vec(),
1303+
b"POST /api/users HTTP/1.1".to_vec(),
1304+
];
1305+
1306+
let store = SimpleZipBlobStore::build_from(records, config).unwrap();
1307+
1308+
// Retrieve records efficiently
1309+
let id = 0;
1310+
let data = store.get(id).unwrap();
1311+
assert_eq!(data, b"GET /api/users HTTP/1.1");
1312+
1313+
// Ideal for datasets with shared substrings (logs, JSON, configuration files)
1314+
let stats = store.stats();
1315+
println!("Deduplication saved: {:.1}% space",
1316+
(1.0 - stats.average_size / stats.total_size as f64) * 100.0);
1317+
```
1318+
1319+
#### Mixed-Length Blob Store
1320+
1321+
```rust
1322+
use zipora::{MixedLenBlobStore, BlobStore};
1323+
1324+
// Hybrid storage for datasets with mixed fixed/variable-length records
1325+
let records = vec![
1326+
b"FIXED".to_vec(), // 5 bytes (common length)
1327+
b"FIXED".to_vec(), // 5 bytes
1328+
b"FIXED".to_vec(), // 5 bytes
1329+
b"VARIABLE LENGTH".to_vec(), // Different length
1330+
b"FIXED".to_vec(), // 5 bytes
1331+
];
1332+
1333+
let store = MixedLenBlobStore::build_from(records, 5).unwrap();
1334+
1335+
// Automatic rank/select bitmap distinguishes fixed from variable
1336+
let id = 0;
1337+
let data = store.get(id).unwrap();
1338+
assert_eq!(data, b"FIXED");
1339+
1340+
// Best for datasets where ≥50% records share same length
1341+
let stats = store.stats();
1342+
println!("Fixed-length ratio: {:.1}%",
1343+
stats.blob_count as f64 / store.len() as f64 * 100.0);
1344+
```
1345+
12631346
### Blob Storage Performance Summary
12641347

12651348
| Storage Type | Memory Efficiency | Throughput | Features | Best Use Case |
12661349
|--------------|------------------|------------|----------|---------------|
12671350
| **NestLoudsTrieBlobStore** | **Trie compression + blob compression** | **O(key) access + O(1) blob retrieval** | **String indexing, prefix queries** | **Hierarchical data, key-value stores** |
12681351
| **ZipOffsetBlobStore** | **Block-based delta compression** | **O(1) offset-based access** | **Template optimization, ZSTD** | **Large datasets, streaming access** |
1352+
| **ZeroLengthBlobStore** | **O(1) overhead** | **O(1) all operations** | **Bitmap-only storage** | **Sparse indexes, empty records** |
1353+
| **SimpleZipBlobStore** | **Fragment deduplication** | **O(1) indexed access** | **Delimiter-based splitting** | **Logs, JSON, shared substrings** |
1354+
| **MixedLenBlobStore** | **Rank/select hybrid** | **O(1) bitmap + vector** | **Fixed/variable separation** | **Mixed-length datasets** |
12691355
| **LRU Page Cache** | **Page-aligned allocation** | **Reduced contention** | **Multi-shard architecture** | **High-concurrency access** |
12701356

12711357
## Memory Management
@@ -1985,19 +2071,6 @@ let mut external_sorter = ReplaceSelectSort::new(config);
19852071
let large_dataset = (0..10_000_000).rev().collect::<Vec<u32>>();
19862072
let sorted = external_sorter.sort(large_dataset).unwrap();
19872073

1988-
// Legacy Tournament Tree (still available)
1989-
let tree_config = LoserTreeConfig {
1990-
initial_capacity: 16,
1991-
stable_sort: true,
1992-
cache_optimized: true,
1993-
..Default::default()
1994-
};
1995-
let mut tournament_tree = LoserTree::new(tree_config);
1996-
tournament_tree.add_way(vec![1, 4, 7, 10].into_iter()).unwrap();
1997-
tournament_tree.add_way(vec![2, 5, 8, 11].into_iter()).unwrap();
1998-
tournament_tree.add_way(vec![3, 6, 9, 12].into_iter()).unwrap();
1999-
let merged = tournament_tree.merge_to_vec().unwrap();
2000-
20012074
// 🚀 Sophisticated Suffix Array Construction with 5 Algorithm Variants + Adaptive Selection
20022075
let text = b"banana";
20032076

@@ -3658,23 +3731,38 @@ println!("Compression ratio: {:.1}%", stats.compression_ratio() * 100.0);
36583731
println!("Dictionary hit rate: {:.2}%", stats.dictionary_hit_rate * 100.0);
36593732
```
36603733

3661-
### Advanced Entropy Coding Algorithms
3734+
### Advanced Entropy Coding Algorithms ✅
3735+
3736+
**Fully Implemented in Zipora 2.0:**
3737+
-**Huffman Order-0/1/2**: Context-dependent encoding with 256/1024 optimized trees
3738+
-**FSE Interleaving**: Parallel block processing with hardware acceleration
3739+
-**64-bit rANS**: Adaptive frequencies with X1/X2/X4/X8 parallel variants
3740+
-**SIMD Optimizations**: AVX2, BMI2 acceleration across all encoders
36623741

36633742
```rust
36643743
use zipora::entropy::*;
36653744

3666-
// 🚀 Contextual Huffman coding with Order-1/Order-2 models
3745+
// 🚀 Contextual Huffman coding with Order-1/Order-2 models (FULLY IMPLEMENTED)
36673746
let contextual_encoder = ContextualHuffmanEncoder::new(b"training data", HuffmanOrder::Order1).unwrap();
36683747
let compressed = contextual_encoder.encode(b"sample data").unwrap();
36693748

3749+
// Order-2 Huffman for even better compression (exceeds reference implementation)
3750+
let order2_encoder = ContextualHuffmanEncoder::new(b"training data", HuffmanOrder::Order2).unwrap();
3751+
let better_compressed = order2_encoder.encode(b"sample data").unwrap();
3752+
36703753
// 🚀 64-bit rANS with parallel variants
36713754
let mut frequencies = [1u32; 256];
36723755
for &byte in b"sample data" { frequencies[byte as usize] += 1; }
36733756
let rans_encoder = Rans64Encoder::<ParallelX4>::new(&frequencies).unwrap();
36743757
let compressed = rans_encoder.encode(b"sample data").unwrap();
36753758

3676-
// 🚀 FSE with ZSTD optimizations
3677-
let mut fse_encoder = FseEncoder::new(FseConfig::high_compression()).unwrap();
3759+
// 🚀 FSE with ZSTD optimizations and parallel block interleaving (FULLY IMPLEMENTED)
3760+
let fse_config = FseConfig {
3761+
parallel_blocks: true, // Enable parallel block processing
3762+
advanced_states: true, // Advanced state management
3763+
..FseConfig::high_compression()
3764+
};
3765+
let mut fse_encoder = FseEncoder::new(fse_config).unwrap();
36783766
let compressed = fse_encoder.compress(b"sample data").unwrap();
36793767

36803768
// 🚀 Parallel encoding with adaptive selection
@@ -3862,12 +3950,6 @@ CSecurePooledPtr* ptr = secure_memory_pool_allocate(pool);
38623950
secure_pooled_ptr_free(ptr);
38633951
secure_memory_pool_free(pool);
38643952

3865-
// Traditional pools (legacy, less secure)
3866-
CMemoryPool* old_pool = memory_pool_new(64 * 1024, 100);
3867-
void* chunk = memory_pool_allocate(old_pool);
3868-
memory_pool_deallocate(old_pool, chunk);
3869-
memory_pool_free(old_pool);
3870-
38713953
// Error handling
38723954
zipora_set_error_callback(error_callback);
38733955
if (fast_vec_push(NULL, 42) != CResult_Success) {

src/algorithms/external_sort.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
//! available memory by using disk-based temporary storage and efficient
66
//! merge operations.
77
8-
use crate::algorithms::tournament_tree::{LoserTree, LoserTreeConfig};
8+
use crate::algorithms::tournament_tree::{EnhancedLoserTree, LoserTreeConfig};
99
use crate::error::{Result, ZiporaError};
1010
use std::cmp::Ordering;
1111
use std::collections::BinaryHeap;
@@ -484,7 +484,7 @@ where
484484
alignment: 64,
485485
};
486486

487-
let mut tournament_tree = LoserTree::with_comparator(tree_config, self.comparator.clone());
487+
let mut tournament_tree = EnhancedLoserTree::with_comparator(tree_config, self.comparator.clone());
488488

489489
// Add all runs to the tournament tree
490490
for run in &self.temp_files {

src/algorithms/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ pub use radix_sort::{
2828
pub use set_operations::{SetOperations, SetOperationsConfig, SetOperationStats};
2929
pub use simd_merge::{SimdComparator, SimdConfig, SimdOperations};
3030
pub use suffix_array::{LcpArray, SuffixArray, SuffixArrayBuilder};
31-
pub use tournament_tree::{EnhancedLoserTree, LoserTree, LoserTreeConfig, TournamentNode, CacheAlignedNode};
31+
pub use tournament_tree::{EnhancedLoserTree, LoserTreeConfig, TournamentNode, CacheAlignedNode};
3232

3333
/// Configuration for algorithm behavior
3434
#[derive(Debug, Clone)]

src/algorithms/tournament_tree.rs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -591,9 +591,6 @@ where
591591
}
592592
}
593593

594-
/// Type alias for backward compatibility
595-
pub type LoserTree<T, F = fn(&T, &T) -> Ordering> = EnhancedLoserTree<T, F>;
596-
597594
/// Iterator implementation for consuming the enhanced tournament tree
598595
impl<T, F> Iterator for EnhancedLoserTree<T, F>
599596
where
@@ -630,7 +627,7 @@ mod tests {
630627
#[test]
631628
fn test_empty_tree() {
632629
let config = LoserTreeConfig::default();
633-
let mut tree = LoserTree::<i32>::new(config);
630+
let mut tree = EnhancedLoserTree::<i32>::new(config);
634631

635632
assert!(tree.is_empty());
636633
assert_eq!(tree.num_ways(), 0);
@@ -641,7 +638,7 @@ mod tests {
641638
#[test]
642639
fn test_single_way() -> Result<()> {
643640
let config = LoserTreeConfig::default();
644-
let mut tree = LoserTree::<i32>::new(config);
641+
let mut tree = EnhancedLoserTree::<i32>::new(config);
645642

646643
tree.add_way(vec![1, 2, 3].into_iter())?;
647644

@@ -654,7 +651,7 @@ mod tests {
654651
#[test]
655652
fn test_two_way_merge() -> Result<()> {
656653
let config = LoserTreeConfig::default();
657-
let mut tree = LoserTree::<i32>::new(config);
654+
let mut tree = EnhancedLoserTree::<i32>::new(config);
658655

659656
tree.add_way(vec![1, 3, 5].into_iter())?;
660657
tree.add_way(vec![2, 4, 6].into_iter())?;
@@ -668,7 +665,7 @@ mod tests {
668665
#[test]
669666
fn test_three_way_merge() -> Result<()> {
670667
let config = LoserTreeConfig::default();
671-
let mut tree = LoserTree::<i32>::new(config);
668+
let mut tree = EnhancedLoserTree::<i32>::new(config);
672669

673670
tree.add_way(vec![1, 4, 7].into_iter())?;
674671
tree.add_way(vec![2, 5, 8].into_iter())?;
@@ -683,7 +680,7 @@ mod tests {
683680
#[test]
684681
fn test_uneven_lengths() -> Result<()> {
685682
let config = LoserTreeConfig::default();
686-
let mut tree = LoserTree::<i32>::new(config);
683+
let mut tree = EnhancedLoserTree::<i32>::new(config);
687684

688685
tree.add_way(vec![1].into_iter())?;
689686
tree.add_way(vec![2, 3, 4, 5].into_iter())?;
@@ -698,7 +695,7 @@ mod tests {
698695
#[test]
699696
fn test_empty_ways() -> Result<()> {
700697
let config = LoserTreeConfig::default();
701-
let mut tree = LoserTree::<i32>::new(config);
698+
let mut tree = EnhancedLoserTree::<i32>::new(config);
702699

703700
tree.add_way(vec![1, 2].into_iter())?;
704701
tree.add_way(std::iter::empty())?;
@@ -713,7 +710,7 @@ mod tests {
713710
#[test]
714711
fn test_duplicate_values() -> Result<()> {
715712
let config = LoserTreeConfig::default();
716-
let mut tree = LoserTree::<i32>::new(config);
713+
let mut tree = EnhancedLoserTree::<i32>::new(config);
717714

718715
tree.add_way(vec![1, 2, 2, 3].into_iter())?;
719716
tree.add_way(vec![2, 2, 4].into_iter())?;
@@ -727,7 +724,7 @@ mod tests {
727724
#[test]
728725
fn test_custom_comparator() -> Result<()> {
729726
let config = LoserTreeConfig::default();
730-
let mut tree = LoserTree::with_comparator(config, |a: &i32, b: &i32| b.cmp(a)); // Reverse order
727+
let mut tree = EnhancedLoserTree::with_comparator(config, |a: &i32, b: &i32| b.cmp(a)); // Reverse order
731728

732729
tree.add_way(vec![5, 3, 1].into_iter())?;
733730
tree.add_way(vec![6, 4, 2].into_iter())?;
@@ -741,7 +738,7 @@ mod tests {
741738
#[test]
742739
fn test_iterator_interface() -> Result<()> {
743740
let config = LoserTreeConfig::default();
744-
let mut tree = LoserTree::<i32>::new(config);
741+
let mut tree = EnhancedLoserTree::<i32>::new(config);
745742

746743
tree.add_way(vec![1, 3].into_iter())?;
747744
tree.add_way(vec![2, 4].into_iter())?;
@@ -757,7 +754,7 @@ mod tests {
757754
#[test]
758755
fn test_peek_before_pop() -> Result<()> {
759756
let config = LoserTreeConfig::default();
760-
let mut tree = LoserTree::<i32>::new(config);
757+
let mut tree = EnhancedLoserTree::<i32>::new(config);
761758

762759
tree.add_way(vec![1, 3].into_iter())?;
763760
tree.add_way(vec![2, 4].into_iter())?;
@@ -776,7 +773,7 @@ mod tests {
776773
#[test]
777774
fn test_large_merge() -> Result<()> {
778775
let config = LoserTreeConfig::default();
779-
let mut tree = LoserTree::<i32>::new(config);
776+
let mut tree = EnhancedLoserTree::<i32>::new(config);
780777

781778
// Add 10 ways with 100 elements each
782779
for way in 0..10 {

0 commit comments

Comments
 (0)