@@ -24,7 +24,7 @@ Zipora 2.0 introduces a **unified architecture** following referenced project's
2424- ** 🛡️ Memory Safety** : Eliminates segfaults, buffer overflows, use-after-free bugs
2525- ** 🧠 Secure Memory Management** : Production-ready memory pools with thread safety, RAII, and vulnerability prevention
2626- ** 🚨 Advanced Error Handling & Recovery** : Sophisticated error classification (WARNING/RECOVERABLE/CRITICAL/FATAL), automatic recovery strategies (memory reclamation, structure rebuilding, fallback algorithms), contextual error reporting with metadata, and comprehensive verification macros
27- - ** 💾 Blob Storage** : Advanced storage systems including trie-based indexing and offset-based compression
27+ - ** 💾 Blob Storage** : 7 specialized stores including trie-based indexing, offset compression, zero-length, fragment deduplication, and mixed-length hybrid storage
2828- ** 📦 Specialized Containers** : Production-ready containers with 40-90% memory/performance improvements
2929- ** 🗂️ Specialized Hash Maps** : Golden ratio optimized, string-optimized, small inline maps with advanced cache locality optimizations, sophisticated collision resolution algorithms, and memory-efficient string arena management
3030- ** ⚡ Cache Optimization Infrastructure** : Comprehensive cache-line alignment, hot/cold data separation, software prefetching, NUMA-aware allocation, and access pattern analysis for maximum performance
@@ -1260,12 +1260,98 @@ let stats = cached_store.cache_stats(); // Performance metrics
12601260println! (" Hit ratio: {:.2}%" , stats . hit_ratio * 100.0 );
12611261```
12621262
1263+ ### Specialized Blob Stores (New in 2.0)
1264+
1265+ #### Zero-Length Blob Store
1266+
1267+ ``` rust
1268+ use zipora :: {ZeroLengthBlobStore , BlobStore };
1269+
1270+ // Optimized storage for zero-length blobs (empty records)
1271+ // O(1) memory overhead regardless of record count
1272+ let mut store = ZeroLengthBlobStore :: new ();
1273+
1274+ // Add empty records efficiently
1275+ let id1 = store . put (b "" ). unwrap ();
1276+ let id2 = store . put (& []). unwrap ();
1277+ let id3 = store . put (b "" ). unwrap ();
1278+
1279+ // All get operations return empty vectors
1280+ assert_eq! (store . get (id1 ). unwrap (), b "" );
1281+ assert! (store . contains (id2 ));
1282+ assert_eq! (store . len (), 3 );
1283+
1284+ // Perfect for sparse indexes, placeholder records, or bitmap storage
1285+ ```
1286+
1287+ #### Simple Zip Blob Store
1288+
1289+ ``` rust
1290+ use zipora :: {SimpleZipBlobStore , SimpleZipConfig , SimpleZipConfigBuilder , BlobStore };
1291+
1292+ // Fragment-based compression with HashMap deduplication
1293+ let config = SimpleZipConfig :: builder ()
1294+ . delimiters (vec! [b '\ n ' , b ' ' , b '\ t ' ]) // Split at whitespace
1295+ . min_fragment_len (3 )
1296+ . max_fragment_len (64 )
1297+ . enable_deduplication (true )
1298+ . build (). unwrap ();
1299+
1300+ let records = vec! [
1301+ b " GET /api/users HTTP/1.1" . to_vec (),
1302+ b " GET /api/posts HTTP/1.1" . to_vec (),
1303+ b " POST /api/users HTTP/1.1" . to_vec (),
1304+ ];
1305+
1306+ let store = SimpleZipBlobStore :: build_from (records , config ). unwrap ();
1307+
1308+ // Retrieve records efficiently
1309+ let id = 0 ;
1310+ let data = store . get (id ). unwrap ();
1311+ assert_eq! (data , b " GET /api/users HTTP/1.1" );
1312+
1313+ // Ideal for datasets with shared substrings (logs, JSON, configuration files)
1314+ let stats = store . stats ();
1315+ println! (" Deduplication saved: {:.1}% space" ,
1316+ (1.0 - stats . average_size / stats . total_size as f64 ) * 100.0 );
1317+ ```
1318+
1319+ #### Mixed-Length Blob Store
1320+
1321+ ``` rust
1322+ use zipora :: {MixedLenBlobStore , BlobStore };
1323+
1324+ // Hybrid storage for datasets with mixed fixed/variable-length records
1325+ let records = vec! [
1326+ b " FIXED" . to_vec (), // 5 bytes (common length)
1327+ b " FIXED" . to_vec (), // 5 bytes
1328+ b " FIXED" . to_vec (), // 5 bytes
1329+ b " VARIABLE LENGTH" . to_vec (), // Different length
1330+ b " FIXED" . to_vec (), // 5 bytes
1331+ ];
1332+
1333+ let store = MixedLenBlobStore :: build_from (records , 5 ). unwrap ();
1334+
1335+ // Automatic rank/select bitmap distinguishes fixed from variable
1336+ let id = 0 ;
1337+ let data = store . get (id ). unwrap ();
1338+ assert_eq! (data , b " FIXED" );
1339+
1340+ // Best for datasets where ≥50% records share same length
1341+ let stats = store . stats ();
1342+ println! (" Fixed-length ratio: {:.1}%" ,
1343+ stats . blob_count as f64 / store . len () as f64 * 100.0 );
1344+ ```
1345+
12631346### Blob Storage Performance Summary
12641347
12651348| Storage Type | Memory Efficiency | Throughput | Features | Best Use Case |
12661349| --------------| ------------------| ------------| ----------| ---------------|
12671350| ** NestLoudsTrieBlobStore** | ** Trie compression + blob compression** | ** O(key) access + O(1) blob retrieval** | ** String indexing, prefix queries** | ** Hierarchical data, key-value stores** |
12681351| ** ZipOffsetBlobStore** | ** Block-based delta compression** | ** O(1) offset-based access** | ** Template optimization, ZSTD** | ** Large datasets, streaming access** |
1352+ | ** ZeroLengthBlobStore** | ** O(1) overhead** | ** O(1) all operations** | ** Bitmap-only storage** | ** Sparse indexes, empty records** |
1353+ | ** SimpleZipBlobStore** | ** Fragment deduplication** | ** O(1) indexed access** | ** Delimiter-based splitting** | ** Logs, JSON, shared substrings** |
1354+ | ** MixedLenBlobStore** | ** Rank/select hybrid** | ** O(1) bitmap + vector** | ** Fixed/variable separation** | ** Mixed-length datasets** |
12691355| ** LRU Page Cache** | ** Page-aligned allocation** | ** Reduced contention** | ** Multi-shard architecture** | ** High-concurrency access** |
12701356
12711357## Memory Management
@@ -1985,19 +2071,6 @@ let mut external_sorter = ReplaceSelectSort::new(config);
19852071let large_dataset = (0 .. 10_000_000 ). rev (). collect :: <Vec <u32 >>();
19862072let sorted = external_sorter . sort (large_dataset ). unwrap ();
19872073
1988- // Legacy Tournament Tree (still available)
1989- let tree_config = LoserTreeConfig {
1990- initial_capacity : 16 ,
1991- stable_sort : true ,
1992- cache_optimized : true ,
1993- .. Default :: default ()
1994- };
1995- let mut tournament_tree = LoserTree :: new (tree_config );
1996- tournament_tree . add_way (vec! [1 , 4 , 7 , 10 ]. into_iter ()). unwrap ();
1997- tournament_tree . add_way (vec! [2 , 5 , 8 , 11 ]. into_iter ()). unwrap ();
1998- tournament_tree . add_way (vec! [3 , 6 , 9 , 12 ]. into_iter ()). unwrap ();
1999- let merged = tournament_tree . merge_to_vec (). unwrap ();
2000-
20012074// 🚀 Sophisticated Suffix Array Construction with 5 Algorithm Variants + Adaptive Selection
20022075let text = b " banana" ;
20032076
@@ -3658,23 +3731,38 @@ println!("Compression ratio: {:.1}%", stats.compression_ratio() * 100.0);
36583731println! (" Dictionary hit rate: {:.2}%" , stats . dictionary_hit_rate * 100.0 );
36593732```
36603733
3661- ### Advanced Entropy Coding Algorithms
3734+ ### Advanced Entropy Coding Algorithms ✅
3735+
3736+ ** Fully Implemented in Zipora 2.0:**
3737+ - ✅ ** Huffman Order-0/1/2** : Context-dependent encoding with 256/1024 optimized trees
3738+ - ✅ ** FSE Interleaving** : Parallel block processing with hardware acceleration
3739+ - ✅ ** 64-bit rANS** : Adaptive frequencies with X1/X2/X4/X8 parallel variants
3740+ - ✅ ** SIMD Optimizations** : AVX2, BMI2 acceleration across all encoders
36623741
36633742``` rust
36643743use zipora :: entropy :: * ;
36653744
3666- // 🚀 Contextual Huffman coding with Order-1/Order-2 models
3745+ // 🚀 Contextual Huffman coding with Order-1/Order-2 models (FULLY IMPLEMENTED)
36673746let contextual_encoder = ContextualHuffmanEncoder :: new (b " training data" , HuffmanOrder :: Order1 ). unwrap ();
36683747let compressed = contextual_encoder . encode (b " sample data" ). unwrap ();
36693748
3749+ // Order-2 Huffman for even better compression (exceeds reference implementation)
3750+ let order2_encoder = ContextualHuffmanEncoder :: new (b " training data" , HuffmanOrder :: Order2 ). unwrap ();
3751+ let better_compressed = order2_encoder . encode (b " sample data" ). unwrap ();
3752+
36703753// 🚀 64-bit rANS with parallel variants
36713754let mut frequencies = [1u32 ; 256 ];
36723755for & byte in b " sample data" { frequencies [byte as usize ] += 1 ; }
36733756let rans_encoder = Rans64Encoder :: <ParallelX4 >:: new (& frequencies ). unwrap ();
36743757let compressed = rans_encoder . encode (b " sample data" ). unwrap ();
36753758
3676- // 🚀 FSE with ZSTD optimizations
3677- let mut fse_encoder = FseEncoder :: new (FseConfig :: high_compression ()). unwrap ();
3759+ // 🚀 FSE with ZSTD optimizations and parallel block interleaving (FULLY IMPLEMENTED)
3760+ let fse_config = FseConfig {
3761+ parallel_blocks : true , // Enable parallel block processing
3762+ advanced_states : true , // Advanced state management
3763+ .. FseConfig :: high_compression ()
3764+ };
3765+ let mut fse_encoder = FseEncoder :: new (fse_config ). unwrap ();
36783766let compressed = fse_encoder . compress (b " sample data" ). unwrap ();
36793767
36803768// 🚀 Parallel encoding with adaptive selection
@@ -3862,12 +3950,6 @@ CSecurePooledPtr* ptr = secure_memory_pool_allocate(pool);
38623950secure_pooled_ptr_free(ptr);
38633951secure_memory_pool_free(pool);
38643952
3865- // Traditional pools (legacy, less secure)
3866- CMemoryPool* old_pool = memory_pool_new(64 * 1024, 100);
3867- void* chunk = memory_pool_allocate(old_pool);
3868- memory_pool_deallocate(old_pool, chunk);
3869- memory_pool_free(old_pool);
3870-
38713953// Error handling
38723954zipora_set_error_callback(error_callback);
38733955if (fast_vec_push(NULL, 42) != CResult_Success) {
0 commit comments