feat: experiment with different implementations of LSEQ

2025-07-08 16:49:52 -07:00 · 2025-07-08 16:49:52 -07:00 · 1e45ef9314
commit 1e45ef9314
23 changed files with 3578 additions and 0 deletions
--- a/research/src/algorithms/lseq_base64.rs
+++ b/research/src/algorithms/lseq_base64.rs
@ -0,0 +1,613 @@
+use rand::Rng;
+use std::error::Error;
+use std::fmt;
+use log::{trace, debug};
+
+const BOUNDARY: u64 = 40; // The paper says this can be any constant
+
+// The maximum level is 9 because the maximum value of a level is 2^(6+6*9) - 1,
+// which is 2^60 - 1, which fits in u64. At level 10, we would have 2^66 - 1,
+// which exceeds u64 capacity.
+const MAX_LEVEL: usize = 9;
+
+// Python program used to generate LEVEL_DIGITS_LOOKUP:
+// ```python
+// def compute_level_digits():
+//     digits = []
+//     for i in range(10):
+//         max_value = (64 * (64 ** i)) - 1  # 64^(i+1) - 1 = 2^(6+6*i) - 1
+//         num_digits = len(str(max_value))
+//         digits.append(num_digits)
+//     return digits
+// 
+// if __name__ == "__main__":
+//     digits = compute_level_digits()
+//     print(f"const LEVEL_DIGITS_LOOKUP: [usize; 10] = {digits};")
+// ```
+
+// Precomputed number of digits needed for each level (0-9)
+// Level i has max value of 2^(6+6*i) - 1, so we need enough digits to represent that
+const LEVEL_DIGITS_LOOKUP: [usize; 10] = [
+    2, 4, 6, 8, 10, 11, 13, 15, 17, 19
+];
+
+/// L-SEQ implementation with 64 slots per level, multiplying by 64 each level
+pub struct LSEQBase64<R: Rng + std::fmt::Debug> {
+    /// Strategy vector - true for + strategy, false for - strategy
+    strategies: Vec<bool>,
+    /// Random number generator
+    rng: R,
+}
+
+/// Sort key implementation for 64-slot L-SEQ
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SortKeyBase64 {
+    levels: Vec<u64>,
+}
+
+impl SortKeyBase64 {
+    pub fn new(levels: Vec<u64>) -> Self {
+        Self { levels }
+    }
+    
+    pub fn levels(&self) -> &[u64] {
+        &self.levels
+    }
+    
+    /// Calculate the number of base64 characters needed for maximally encoded form
+    /// In this compact encoding, level i needs exactly (i+1) base64 characters:
+    /// - Level 0: 1 character (6 bits, 0-63)
+    /// - Level 1: 2 characters (12 bits, 0-4095)  
+    /// - Level 2: 3 characters (18 bits, 0-262143)
+    /// - etc.
+    /// No separators needed since we know the structure.
+    pub fn max_base64_chars(&self) -> usize {
+        self.levels.iter().enumerate().map(|(level, _)| level + 1).sum()
+    }
+}
+
+/// Get the number of slots for a given level (64 * 64^level = 64^(level+1))
+#[allow(dead_code)]
+fn get_level_slots(level: usize) -> u64 {
+    let base_slots = 64u64;
+    let multiplier = 64u64.checked_pow(level as u32)
+        .expect("Level exceeds u64 representation capacity");
+    
+    base_slots.checked_mul(multiplier)
+        .expect("Level slots exceed u64 capacity")
+}
+
+impl fmt::Display for SortKeyBase64 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let parts: Vec<String> = self.levels.iter().map(|&x| x.to_string()).collect();
+        write!(f, "{}", parts.join("."))
+    }
+}
+
+impl fmt::Debug for SortKeyBase64 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let parts: Vec<String> = self.levels.iter().enumerate().map(|(level, &value)| {
+            let digits = if level <= MAX_LEVEL {
+                LEVEL_DIGITS_LOOKUP[level]
+            } else {
+                // For levels beyond MAX_LEVEL, use the same digit count as MAX_LEVEL
+                // since we're capping at 2^60 - 1
+                LEVEL_DIGITS_LOOKUP[MAX_LEVEL]
+            };
+            format!("{:0width$}", value, width = digits)
+        }).collect();
+        write!(f, "{}", parts.join("."))
+    }
+}
+
+impl<R: Rng + std::fmt::Debug> LSEQBase64<R> {
+    pub fn new(rng: R) -> Self {
+        Self {
+            strategies: Vec::new(),
+            rng,
+        }
+    }
+    
+    /// Set strategies for testing purposes
+    #[cfg(test)]
+    pub fn set_strategies(&mut self, strategies: Vec<bool>) {
+        self.strategies = strategies;
+    }
+    
+    /// Allocate a new identifier between two existing identifiers
+    pub fn allocate(&mut self, before: Option<&SortKeyBase64>, after: Option<&SortKeyBase64>) -> Result<SortKeyBase64, Box<dyn Error>> {
+
+        // Convert to the format expected by the paper's algorithm
+        let p = before.map_or(vec![0], |k| k.levels().to_vec());
+        let q = after.map_or(vec![self.get_depth_max(0)], |k| k.levels().to_vec());
+        
+        let levels = self.alloc(&p, &q);
+        let key = SortKeyBase64::new(levels);
+        
+        // Debug assertions to verify the allocated key is properly ordered
+        if let Some(before_key) = before {
+            debug_assert!(
+                before_key < &key,
+                "ORDERING VIOLATION: before < allocated failed\n\
+                 before    = {:?} (internal: {:?})\n\
+                 allocated = {:?} (internal: {:?})\n\
+                 after     = {} (internal: {:?})\n\
+                 Expected: before < allocated < after",
+                before_key, before_key.levels(), 
+                key, key.levels(),
+                after.map(|k| format!("{:?}", k)).unwrap_or_else(|| "None".to_string()),
+                after.map(|k| k.levels()).unwrap_or(&[])
+            );
+        }
+        
+        if let Some(after_key) = after {
+            debug_assert!(
+                &key < after_key,
+                "ORDERING VIOLATION: allocated < after failed\n\
+                 before    = {} (internal: {:?})\n\
+                 allocated = {:?} (internal: {:?})\n\
+                 after     = {:?} (internal: {:?})\n\
+                 Expected: before < allocated < after",
+                before.map(|k| format!("{:?}", k)).unwrap_or_else(|| "None".to_string()),
+                before.map(|k| k.levels()).unwrap_or(&[]),
+                key, key.levels(),
+                after_key, after_key.levels()
+            );
+        }
+        
+        Ok(key)
+    }
+    
+    /// Get the maximum value for a given level (64^(level+1) - 1 = 2^(6+6*level) - 1)
+    /// For levels beyond 9, we cap at 2^60 - 1 to avoid u64 overflow
+    fn get_depth_max(&self, depth: usize) -> u64 {
+        let max_val = if depth <= MAX_LEVEL {
+            (1 << (6 + 6 * depth)) - 1
+        } else {
+            // Cap at 2^60 - 1 for levels beyond 9
+            (1 << 60) - 1
+        };
+        trace!("get_depth_max({}) -> {}", depth, max_val);
+        max_val
+    }
+    
+    fn alloc(&mut self, p: &[u64], q: &[u64]) -> Vec<u64> {
+        debug!("Starting allocation between p={:?} and q={:?}", p, q);
+        if !(p.is_empty() && q.is_empty()) {
+            debug_assert_ne!(p, q, "Cannot allocate between identical positions: p={:?}, q={:?}", p, q);
+        }
+        
+        let mut borrow_flag = false;
+        let max_levels = std::cmp::max(p.len(), q.len()) + 1;
+        let mut result = Vec::with_capacity(max_levels);
+        
+        trace!("Initial state: carry_flag={}, max_levels={}", borrow_flag, max_levels);
+        
+        // Phase 1: Find the allocation depth
+        for depth in 0..max_levels {
+            trace!("=== Processing depth {} ===", depth);
+            trace!("Current result so far: {:?}", result);
+            trace!("Current carry_flag: {}", borrow_flag);
+            
+            if self.strategies.len() <= depth {
+                let new_strategy = self.rng.gen_bool(0.5);
+                trace!("BRANCH: Generating new strategy for depth {}: {} (+ strategy: {})", 
+                      depth, new_strategy, new_strategy);
+                self.strategies.push(new_strategy);
+            } else {
+                trace!("Using existing strategy for depth {}: {} (+ strategy: {})", 
+                      depth, self.strategies[depth], self.strategies[depth]);
+            }
+            
+            
+            let p_val = if depth < p.len() { 
+                trace!("BRANCH: p_val from p[{}] = {}", depth, p[depth]);
+                p[depth] 
+            } else { 
+                trace!("BRANCH: p_val defaulted to 0 (depth {} >= p.len() {})", depth, p.len());
+                0 
+            };
+            
+            let q_val = if borrow_flag {
+                let max_val = self.get_depth_max(depth);
+                trace!("BRANCH: q_val from get_depth_max({}) = {} (carry_flag=true)", depth, max_val);
+                max_val
+            } else if depth < q.len() {
+                trace!("BRANCH: q_val from q[{}] = {} (carry_flag=false)", depth, q[depth]);
+                q[depth]
+            } else {
+                trace!("BRANCH: q_val defaulted to 0 (depth {} >= q.len() {}, carry_flag=false)", depth, q.len());
+                0
+            };
+            
+            trace!("At depth {}: p_val={}, q_val={}, gap={}", depth, p_val, q_val, q_val.saturating_sub(p_val));
+            
+            if p_val == q_val {
+                trace!("BRANCH: Values equal at depth {} (p_val={}, q_val={}), extending prefix and going deeper", 
+                      depth, p_val, q_val);
+                result.push(p_val);
+                continue;
+            }
+            
+            if q_val < p_val {
+                trace!("BRANCH: ERROR - q_val < p_val at depth {} (q_val={}, p_val={})", depth, q_val, p_val);
+                debug_assert!(q_val > p_val, "q < p at depth {}", depth);
+                // We know that q > p overall, and we know that we had a shared
+                // prefix up until this point, therefor q_val must be greater than p_val
+                // TODO I might want to return an error here instead of panicing
+            }
+            
+            let gap = q_val - p_val;
+            if gap > 1 {
+                // Enough space at this level
+                trace!("BRANCH: Sufficient space found at depth {} (gap={} > 1)", depth, gap);
+                let interval = gap - 1;
+                let step = std::cmp::min(BOUNDARY, interval);
+                
+                let allocated_value = if self.strategies[depth] {
+                    let delta = self.rng.gen_range(1..=step);
+                    trace!("Space allocation: interval={}, step={}, delta={}", interval, step, delta);
+                    let val = p_val + delta;
+                    trace!("BRANCH: Using + strategy, allocated_value = p_val + delta = {} + {} = {}", 
+                          p_val, delta, val);
+                    val
+                } else {
+                    let delta = if borrow_flag {
+                        //self.rng.gen_range(0..step)
+                        self.rng.gen_range(1..=step)
+                    } else {
+                        self.rng.gen_range(1..=step)
+                    };
+                    trace!("Space allocation: interval={}, step={}, delta={}", interval, step, delta);
+                    let val = q_val - delta;
+                    trace!("BRANCH: Using - strategy, allocated_value = q_val - delta = {} - {} = {}", 
+                          q_val, delta, val);
+                    val
+                };
+                
+                result.push(allocated_value);
+                trace!("BRANCH: Allocation complete at depth {}, final result: {:?}", depth, result);
+                return result;
+            } else {
+                trace!("BRANCH: Insufficient space at depth {} (gap={} <= 1), extending prefix and setting carry_flag", 
+                      depth, gap);
+                result.push(p_val);
+                borrow_flag = true;
+                trace!("Updated state: result={:?}, carry_flag={}", result, borrow_flag);
+            }
+        }
+        
+        trace!("BRANCH: Loop completed without allocation, returning result: {:?}", result);
+        result
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::rngs::StdRng;
+    use rand::SeedableRng;
+    
+    #[test]
+    fn test_level_max() {
+        let lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        // Level 0: 64 slots (0-63)
+        assert_eq!(lseq.get_depth_max(0), 63);
+        // Level 1: 4096 slots (0-4095)
+        assert_eq!(lseq.get_depth_max(1), 4095);
+        // Level 2: 262144 slots (0-262143)
+        assert_eq!(lseq.get_depth_max(2), 262143);
+        // Level 3: 16777216 slots (0-16777215)
+        assert_eq!(lseq.get_depth_max(3), 16777215);
+    }
+    
+    #[test]
+    fn test_basic_allocation() {
+        let mut lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        let key1 = lseq.allocate(None, None).unwrap();
+        let key2 = lseq.allocate(Some(&key1), None).unwrap();
+        let key3 = lseq.allocate(None, Some(&key1)).unwrap();
+        
+        assert!(key3 < key1);
+        assert!(key1 < key2);
+    }
+    
+    #[test]
+    fn test_sort_key_ordering() {
+        let key1 = SortKeyBase64::new(vec![5]);
+        let key2 = SortKeyBase64::new(vec![5, 10]);
+        let key3 = SortKeyBase64::new(vec![6]);
+        
+        assert!(key1 < key2);
+        assert!(key2 < key3);
+    }
+    
+    #[test]
+    fn test_boundary_usage() {
+        let mut lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        // Create keys with large gaps to test boundary limiting
+        let key1 = SortKeyBase64::new(vec![0]);
+        let key2 = SortKeyBase64::new(vec![63]);
+        
+        // Allocate between them - should use BOUNDARY to limit step
+        let key_between = lseq.allocate(Some(&key1), Some(&key2)).unwrap();
+        
+        // The new key should be valid
+        assert!(key1 < key_between);
+        assert!(key_between < key2);
+    }
+    
+    #[test]
+    fn test_allocation_beyond_max_level() {
+        let mut lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        // Create two identifiers that are identical at every level up to MAX_LEVEL,
+        // but differ by 1 at the MAX_LEVEL position. This forces the algorithm
+        // to keep going deeper beyond MAX_LEVEL.
+        
+        // Build p: [0, 0, 0, ..., 0, max_value_at_MAX_LEVEL - 1]
+        let mut p = vec![0u64; MAX_LEVEL + 1];
+        let max_value_at_max_level = (1u64 << (6 + 6 * MAX_LEVEL)) - 1;
+        p[MAX_LEVEL] = max_value_at_max_level - 1;
+        
+        // Build q: [0, 0, 0, ..., 0, max_value_at_MAX_LEVEL]  
+        let mut q = vec![0u64; MAX_LEVEL + 1];
+        q[MAX_LEVEL] = max_value_at_max_level;
+        
+        let p_key = SortKeyBase64::new(p);
+        let q_key = SortKeyBase64::new(q);
+        
+        // This should now succeed by allocating at depth MAX_LEVEL + 1 with capped max value
+        let allocated_key = lseq.allocate(Some(&p_key), Some(&q_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(p_key < allocated_key, "p_key < allocated_key should be true");
+        assert!(allocated_key < q_key, "allocated_key < q_key should be true");
+        
+        // The allocated key should be at least MAX_LEVEL + 2 levels deep
+        assert!(allocated_key.levels().len() >= MAX_LEVEL + 2, 
+               "Allocated key should be at least {} levels deep, got {}", 
+               MAX_LEVEL + 2, allocated_key.levels().len());
+    }
+    
+    #[test]
+    fn test_formatting() {
+        // Test with various values to verify digit padding
+        let xs = vec![5, 6, 7, 8, 9];
+        assert_eq!(SortKeyBase64::new(xs.clone()).to_string(), "5.6.7.8.9");
+        assert_eq!(format!("{:?}", SortKeyBase64::new(xs)), "05.0006.000007.00000008.0000000009");
+        
+        let ys = vec![5, 10, 63, 127, 4095];
+        assert_eq!(SortKeyBase64::new(ys.clone()).to_string(), "5.10.63.127.4095");
+        assert_eq!(format!("{:?}", SortKeyBase64::new(ys)), "05.0010.000063.00000127.0000004095");
+    }
+
+    #[test]
+    fn test_level_digits_lookup_correctness() {
+        // Validate that our precomputed lookup table matches the actual calculation
+        for i in 0..=MAX_LEVEL {
+            let max_value = (1u64 << (6 + 6 * i)) - 1;
+            let expected_digits = max_value.to_string().len();
+            
+            assert_eq!(
+                LEVEL_DIGITS_LOOKUP[i], 
+                expected_digits,
+                "Level {} digit count mismatch: lookup={}, calculated={}, max_value={}",
+                i, LEVEL_DIGITS_LOOKUP[i], expected_digits, max_value
+            );
+        }
+    }
+
+    #[test]
+    fn test_get_level_slots() {
+        // Test that get_level_slots function works correctly
+        assert_eq!(get_level_slots(0), 64);     // 64 * 64^0 = 64
+        assert_eq!(get_level_slots(1), 4096);   // 64 * 64^1 = 4096
+        assert_eq!(get_level_slots(2), 262144); // 64 * 64^2 = 262144
+        assert_eq!(get_level_slots(3), 16777216); // 64 * 64^3 = 16777216
+    }
+    
+    #[test]
+    fn test_max_base64_chars() {
+        // Test the compact base64 encoding calculation (no separators)
+        // Level i needs exactly (i+1) base64 characters in this encoding
+        let key1 = SortKeyBase64::new(vec![5]); // Level 0 only
+        assert_eq!(key1.max_base64_chars(), 1); // 1 character for level 0
+        
+        let key2 = SortKeyBase64::new(vec![5, 10]); // Levels 0 and 1
+        assert_eq!(key2.max_base64_chars(), 3); // 1 + 2 characters for levels 0 and 1
+        
+        let key3 = SortKeyBase64::new(vec![5, 10, 15]); // Levels 0, 1, and 2
+        assert_eq!(key3.max_base64_chars(), 6); // 1 + 2 + 3 characters for levels 0, 1, and 2
+        
+        let key4 = SortKeyBase64::new(vec![1, 2, 3, 4, 5]); // Levels 0-4
+        assert_eq!(key4.max_base64_chars(), 15); // 1 + 2 + 3 + 4 + 5 = 15
+    }
+    
+    #[test]
+    fn test_reproduce_ordering_violation_bug() {
+        // Initialize logger with trace level for this test
+        let _ = env_logger::Builder::from_default_env()
+            .filter_level(log::LevelFilter::Trace)
+            .is_test(true)
+            .try_init();
+            
+        // This test reproduces the exact bug found in random insertion:
+        // ORDERING VIOLATION: allocated < after failed
+        // before = "52.0034" (internal: [52, 34])
+        // allocated = 52.0035.262119 (internal: [52, 35, 262119])  
+        // after = 52.0035 (internal: [52, 35])
+        // Expected: before < allocated < after
+        
+        let mut lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        // Create the before and after keys from the bug report
+        let before_key = SortKeyBase64::new(vec![52, 34]);
+        let after_key = SortKeyBase64::new(vec![52, 35]);
+        
+        // Verify the keys are properly ordered before we start
+        assert!(before_key < after_key, "Sanity check: before < after should be true");
+        
+        // Try to allocate between them - this should succeed and maintain ordering
+        let allocated_key = lseq.allocate(Some(&before_key), Some(&after_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(before_key < allocated_key, "before < allocated should be true, got before={:?}, allocated={:?}", before_key, allocated_key);
+        assert!(allocated_key < after_key, "allocated < after should be true, got allocated={:?}, after={:?}", allocated_key, after_key);
+    }
+
+    #[test]
+    fn test_reproduce_specific_ordering_violation_bug() {
+        // Initialize logger with trace level for this test
+        let _ = env_logger::Builder::from_default_env()
+            .filter_level(log::LevelFilter::Trace)
+            .is_test(true)
+            .try_init();
+            
+        // This test reproduces a specific ordering violation bug found in random insertion:
+        // ORDERING VIOLATION: before < allocated failed
+        // before    = 51.0038 (internal: [51, 38])
+        // allocated = 51.0017 (internal: [51, 17])
+        // after     = 52 (internal: [52])
+        // Expected: before < allocated < after
+        
+        // Create the before and after keys from the bug report
+        let before_key = SortKeyBase64::new(vec![51, 38]);
+        let after_key = SortKeyBase64::new(vec![52]);
+        
+        // Verify the keys are properly ordered before we start
+        assert!(before_key < after_key, "Sanity check: before < after should be true");
+        
+        let mut violations_found = Vec::new();
+        
+        // Loop over 1000 different seeds to see if we can reproduce the failure
+        for seed in 0..1000 {
+            let mut lseq: LSEQBase64<StdRng> = LSEQBase64::new(StdRng::seed_from_u64(seed));
+            
+            // Initialize strategies to match the bug condition: [false, true, true]
+            lseq.set_strategies(vec![false, true, true]);
+            
+            // Try to allocate between them
+            match lseq.allocate(Some(&before_key), Some(&after_key)) {
+                Ok(allocated_key) => {
+                    // Check for ordering violations
+                    let before_violation = !(before_key < allocated_key);
+                    let after_violation = !(allocated_key < after_key);
+                    
+                    if before_violation || after_violation {
+                        violations_found.push((seed, allocated_key.clone(), before_violation, after_violation));
+                        
+                        eprintln!("ORDERING VIOLATION found with seed {}: 
+                          before    = {:?} (internal: {:?})
+                          allocated = {:?} (internal: {:?})  
+                          after     = {:?} (internal: {:?})
+                          before_violation: {} (before < allocated = {})
+                          after_violation: {} (allocated < after = {})",
+                          seed,
+                          before_key, before_key.levels(),
+                          allocated_key, allocated_key.levels(),
+                          after_key, after_key.levels(),
+                          before_violation, before_key < allocated_key,
+                          after_violation, allocated_key < after_key
+                        );
+                    }
+                }
+                Err(e) => {
+                    eprintln!("Allocation failed with seed {}: {}", seed, e);
+                }
+            }
+        }
+        
+        if !violations_found.is_empty() {
+            panic!("Found {} ordering violations out of 1000 seeds tested. First violation was with seed {}", 
+                   violations_found.len(), violations_found[0].0);
+        } else {
+            println!("No ordering violations found across 1000 different seeds for the specific test case.");
+        }
+    }
+
+    #[test]
+    fn test_allocate_between_prefix_and_deep_extension() {
+        // Initialize logger with trace level for this test
+        let _ = env_logger::Builder::from_default_env()
+            .filter_level(log::LevelFilter::Trace)
+            .is_test(true)
+            .try_init();
+            
+        // Test allocating between [3] and [3, 0, 0, 0, 2]
+        // This tests the case where we have a short key and a longer key that extends it deeply
+        let mut lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        let before_key = SortKeyBase64::new(vec![3]);
+        let after_key = SortKeyBase64::new(vec![3, 0, 0, 0, 2]);
+        
+        // Verify the keys are properly ordered before we start
+        assert!(before_key < after_key, "Sanity check: before < after should be true");
+        
+        // Allocate between them
+        let allocated_key = lseq.allocate(Some(&before_key), Some(&after_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(before_key < allocated_key, 
+               "before < allocated should be true, got before={:?}, allocated={:?}", 
+               before_key, allocated_key);
+        assert!(allocated_key < after_key, 
+               "allocated < after should be true, got allocated={:?}, after={:?}", 
+               allocated_key, after_key);
+        
+        // The allocated key should start with [3] since that's the common prefix
+        assert_eq!(allocated_key.levels()[0], 3, "Allocated key should start with 3");
+        
+        // The allocated key should be at least 5 levels deep to fit between [3] and [3, 0, 0, 0, 2]
+        assert_eq!(allocated_key.levels().len(), 5, 
+                  "Allocated key should be 5 levels deep, got {:?}", allocated_key.levels());
+        
+        println!("Successfully allocated between [3] and [3, 0, 0, 0, 2]: {:?}", allocated_key);
+    }
+
+    #[test]
+    fn test_allocate_between_max_value_and_next_level() {
+        // Initialize logger with trace level for this test
+        let _ = env_logger::Builder::from_default_env()
+            .filter_level(log::LevelFilter::Trace)
+            .is_test(true)
+            .try_init();
+            
+        // Test allocating between [2, 64^2 - 1] and [3, 0]
+        // This tests suffix space allocation when the before key has max value at a level
+        let mut lseq = LSEQBase64::new(StdRng::seed_from_u64(42));
+        
+        let level_1_max = 64u64.pow(2) - 1; // 4095
+        let before_key = SortKeyBase64::new(vec![2, level_1_max]);
+        let after_key = SortKeyBase64::new(vec![3, 0]);
+        
+        // Verify the keys are properly ordered before we start
+        assert!(before_key < after_key, "Sanity check: before < after should be true");
+        
+        // Allocate between them
+        let allocated_key = lseq.allocate(Some(&before_key), Some(&after_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(before_key < allocated_key, 
+               "before < allocated should be true, got before={:?}, allocated={:?}", 
+               before_key, allocated_key);
+        assert!(allocated_key < after_key, 
+               "allocated < after should be true, got allocated={:?}, after={:?}", 
+               allocated_key, after_key);
+        
+        // Since [2] and [3] differ by 1, we should be allocating in suffix space after [2, 4095]
+        // The allocated key should start with [2, 4095] as prefix
+        assert_eq!(allocated_key.levels()[0], 2, "Allocated key should start with 2");
+        assert_eq!(allocated_key.levels()[1], level_1_max, "Allocated key should have max value at level 1");
+        
+        // The allocated key should be at least 3 levels deep for suffix space allocation
+        assert!(allocated_key.levels().len() >= 3, 
+               "Allocated key should be at least 3 levels deep for suffix allocation, got {:?}", 
+               allocated_key.levels());
+        
+        println!("Successfully allocated between [2, {}] and [3, 0]: {:?}", level_1_max, allocated_key);
+    }
+} 
--- a/research/src/algorithms/mod.rs
+++ b/research/src/algorithms/mod.rs
@ -0,0 +1,5 @@
+pub mod original_paper_reference_impl;
+pub mod lseq_base64;
+
+pub use original_paper_reference_impl::ReferenceLSEQ;
+pub use lseq_base64::LSEQBase64; 
--- a/research/src/algorithms/original_paper_reference_impl.rs
+++ b/research/src/algorithms/original_paper_reference_impl.rs
@ -0,0 +1,501 @@
+use rand::Rng;
+use std::error::Error;
+use std::fmt;
+use log::{trace, debug};
+
+const BOUNDARY: u64 = 10; // The paper says this can be any constant
+//
+// The maximum level is 58 because the maximum value of a level is 2^(4+58) - 1,
+// which is 2^62 - 1, which is i64::MAX. Because the coding below is lazy and
+// uses i64 to keep track of sign. This could be pushed to 59 if we used u64 for
+// calcuations.
+const MAX_LEVEL: usize = 58;
+
+// Python program used to generate LEVEL_DIGITS_LOOKUP:
+// ```python
+// def compute_level_digits():
+//     digits = []
+//     for i in range(59):
+//         max_value = (16 * (2 ** i)) - 1  # 2^(4+i) - 1
+//         num_digits = len(str(max_value))
+//         digits.append(num_digits)
+//     return digits
+// 
+// if __name__ == "__main__":
+//     digits = compute_level_digits()
+//     print(f"const LEVEL_DIGITS_LOOKUP: [usize; 59] = {digits};")
+// ```
+
+// Precomputed number of digits needed for each level (0-58)
+// Level i has max value of 2^(4+i) - 1, so we need enough digits to represent that
+const LEVEL_DIGITS_LOOKUP: [usize; 59] = [
+    2, 2, 2, 3, 3, 3, 4, 4, 4, 4,
+    5, 5, 5, 6, 6, 6, 7, 7, 7, 7,
+    8, 8, 8, 9, 9, 9, 10, 10, 10, 10,
+    11, 11, 11, 12, 12, 12, 13, 13, 13, 13,
+    14, 14, 14, 15, 15, 15, 16, 16, 16, 16,
+    17, 17, 17, 18, 18, 18, 19, 19, 19,
+];
+
+/// Reference implementation of L-SEQ following the original paper
+/// This is a direct, naive translation without optimizations
+pub struct ReferenceLSEQ<R: Rng> {
+    /// Strategy vector - true for + strategy, false for - strategy
+    strategies: Vec<bool>,
+    /// Random number generator
+    rng: R,
+}
+
+/// Reference sort key implementation for the original paper
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct ReferenceSortKey {
+    levels: Vec<u64>,
+}
+
+impl ReferenceSortKey {
+    pub fn new(levels: Vec<u64>) -> Self {
+        Self { levels }
+    }
+    
+    pub fn levels(&self) -> &[u64] {
+        &self.levels
+    }
+    
+    /// Calculate the number of base64 characters needed to encode the full identifier
+    /// In this compact encoding, we pack all level bits together without separators:
+    /// - Level 0: 4 bits (0-15)
+    /// - Level 1: 5 bits (0-31)
+    /// - Level 2: 6 bits (0-63)
+    /// - etc.
+    /// We sum all bits and encode as base64 (6 bits per character, rounding up).
+    pub fn base64_chars_needed(&self) -> usize {
+        let total_bits: usize = self.levels.iter().enumerate()
+            .map(|(level, _)| 4 + level)
+            .sum();
+        
+        // Round up to nearest multiple of 6 bits (since base64 uses 6 bits per character)
+        (total_bits + 5) / 6
+    }
+}
+
+impl fmt::Display for ReferenceSortKey {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let parts: Vec<String> = self.levels.iter().map(|&x| x.to_string()).collect();
+        write!(f, "{}", parts.join("."))
+    }
+}
+
+impl fmt::Debug for ReferenceSortKey {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let parts: Vec<String> = self.levels.iter().enumerate().map(|(level, &value)| {
+            if level > MAX_LEVEL {
+                panic!("Level exceeds u64 representation capacity");
+            }
+            let digits = LEVEL_DIGITS_LOOKUP[level];
+            format!("{:0width$}", value, width = digits)
+        }).collect();
+        write!(f, "{}", parts.join("."))
+    }
+}
+
+impl<R: Rng> ReferenceLSEQ<R> {
+    pub fn new(rng: R) -> Self {
+        Self {
+            strategies: Vec::new(),
+            rng,
+        }
+    }
+    
+    /// Set strategies for testing purposes
+    #[cfg(test)]
+    pub fn set_strategies(&mut self, strategies: Vec<bool>) {
+        self.strategies = strategies;
+    }
+    
+    /// Allocate a new identifier between two existing identifiers
+    pub fn allocate(&mut self, before: Option<&ReferenceSortKey>, after: Option<&ReferenceSortKey>) -> Result<ReferenceSortKey, Box<dyn Error>> {
+        // Convert to the format expected by the paper's algorithm
+        let p = before.map_or(vec![0], |k| k.levels().to_vec());
+        let q = after.map_or(vec![self.get_depth_max(0)], |k| k.levels().to_vec());
+        
+        let levels = self.alloc(&p, &q);
+        let key = ReferenceSortKey::new(levels);
+        
+        // Debug assertions to verify the allocated key is properly ordered
+        if let Some(before_key) = before {
+            debug_assert!(
+                before_key < &key,
+                "ORDERING VIOLATION: before < allocated failed\n\
+                 before    = {:?} (internal: {:?})\n\
+                 allocated = {:?} (internal: {:?})\n\
+                 after     = {} (internal: {:?})\n\
+                 Expected: before < allocated < after",
+                before_key, before_key.levels(), 
+                key, key.levels(),
+                after.map(|k| format!("{:?}", k)).unwrap_or_else(|| "None".to_string()),
+                after.map(|k| k.levels()).unwrap_or(&[])
+            );
+        }
+        
+        if let Some(after_key) = after {
+            debug_assert!(
+                &key < after_key,
+                "ORDERING VIOLATION: allocated < after failed\n\
+                 before    = {} (internal: {:?})\n\
+                 allocated = {:?} (internal: {:?})\n\
+                 after     = {:?} (internal: {:?})\n\
+                 Expected: before < allocated < after",
+                before.map(|k| format!("{:?}", k)).unwrap_or_else(|| "None".to_string()),
+                before.map(|k| k.levels()).unwrap_or(&[]),
+                key, key.levels(),
+                after_key, after_key.levels()
+            );
+        }
+        
+        Ok(key)
+    }
+    
+    /// Get the maximum value for a given level (16 * 2^level - 1)
+    /// For levels beyond MAX_LEVEL, we cap at 2^62 - 1 to avoid u64 overflow
+    fn get_depth_max(&self, depth: usize) -> u64 {
+        let max_val = if depth <= MAX_LEVEL {
+            (1 << (4 + depth)) - 1
+        } else {
+            // Cap at 2^62 - 1 for levels beyond MAX_LEVEL
+            (1 << 62) - 1
+        };
+        trace!("get_depth_max({}) -> {}", depth, max_val);
+        max_val
+    }
+    
+    fn alloc(&mut self, p: &[u64], q: &[u64]) -> Vec<u64> {
+        debug!("Starting allocation between p={:?} and q={:?}", p, q);
+        if !(p.is_empty() && q.is_empty()) {
+            debug_assert_ne!(p, q, "Cannot allocate between identical positions: p={:?}, q={:?}", p, q);
+        }
+        
+        let mut borrow_flag = false;
+        let max_levels = std::cmp::max(p.len(), q.len()) + 1;
+        let mut result = Vec::with_capacity(max_levels);
+        
+        trace!("Initial state: carry_flag={}, max_levels={}", borrow_flag, max_levels);
+        
+        // Phase 1: Find the allocation depth using continued fraction approach
+        for depth in 0..max_levels {
+            trace!("=== Processing depth {} ===", depth);
+            trace!("Current result so far: {:?}", result);
+            trace!("Current carry_flag: {}", borrow_flag);
+            
+            if self.strategies.len() <= depth {
+                let new_strategy = self.rng.gen_bool(0.5);
+                trace!("BRANCH: Generating new strategy for depth {}: {} (+ strategy: {})", 
+                      depth, new_strategy, new_strategy);
+                self.strategies.push(new_strategy);
+            } else {
+                trace!("Using existing strategy for depth {}: {} (+ strategy: {})", 
+                      depth, self.strategies[depth], self.strategies[depth]);
+            }
+            
+            let p_val = if depth < p.len() { 
+                trace!("BRANCH: p_val from p[{}] = {}", depth, p[depth]);
+                p[depth] 
+            } else { 
+                trace!("BRANCH: p_val defaulted to 0 (depth {} >= p.len() {})", depth, p.len());
+                0 
+            };
+            
+            let q_val = if borrow_flag {
+                let max_val = self.get_depth_max(depth);
+                trace!("BRANCH: q_val from get_depth_max({}) = {} (carry_flag=true)", depth, max_val);
+                max_val
+            } else if depth < q.len() {
+                trace!("BRANCH: q_val from q[{}] = {} (carry_flag=false)", depth, q[depth]);
+                q[depth]
+            } else {
+                trace!("BRANCH: q_val defaulted to 0 (depth {} >= q.len() {}, carry_flag=false)", depth, q.len());
+                0
+            };
+            
+            trace!("At depth {}: p_val={}, q_val={}, gap={}", depth, p_val, q_val, q_val.saturating_sub(p_val));
+            
+            if p_val == q_val {
+                trace!("BRANCH: Values equal at depth {} (p_val={}, q_val={}), extending prefix and going deeper", 
+                      depth, p_val, q_val);
+                result.push(p_val);
+                continue;
+            }
+            
+            if q_val < p_val {
+                trace!("BRANCH: ERROR - q_val < p_val at depth {} (q_val={}, p_val={})", depth, q_val, p_val);
+                debug_assert!(q_val > p_val, "q < p at depth {}", depth);
+                // We know that q > p overall, and we know that we had a shared
+                // prefix up until this point, therefore q_val must be greater than p_val
+                // TODO I might want to return an error here instead of panicking
+            }
+            
+            let gap = q_val - p_val;
+            if gap > 1 {
+                // Enough space at this level
+                trace!("BRANCH: Sufficient space found at depth {} (gap={} > 1)", depth, gap);
+                let interval = gap - 1;
+                let step = std::cmp::min(BOUNDARY, interval);
+                
+                let allocated_value = if self.strategies[depth] {
+                    let delta = self.rng.gen_range(1..=step);
+                    trace!("Space allocation: interval={}, step={}, delta={}", interval, step, delta);
+                    let val = p_val + delta;
+                    trace!("BRANCH: Using + strategy, allocated_value = p_val + delta = {} + {} = {}", 
+                          p_val, delta, val);
+                    val
+                } else {
+                    let delta = if borrow_flag {
+                        self.rng.gen_range(1..=step)
+                    } else {
+                        self.rng.gen_range(1..=step)
+                    };
+                    trace!("Space allocation: interval={}, step={}, delta={}", interval, step, delta);
+                    let val = q_val - delta;
+                    trace!("BRANCH: Using - strategy, allocated_value = q_val - delta = {} - {} = {}", 
+                          q_val, delta, val);
+                    val
+                };
+                
+                result.push(allocated_value);
+                trace!("BRANCH: Allocation complete at depth {}, final result: {:?}", depth, result);
+                return result;
+            } else {
+                trace!("BRANCH: Insufficient space at depth {} (gap={} <= 1), extending prefix and setting carry_flag", 
+                      depth, gap);
+                result.push(p_val);
+                borrow_flag = true;
+                trace!("Updated state: result={:?}, carry_flag={}", result, borrow_flag);
+            }
+        }
+        
+        trace!("BRANCH: Loop completed without allocation, returning result: {:?}", result);
+        result
+    }
+}
+
+/// Get the number of slots for a given level (16 * 2^level)
+#[allow(dead_code)]
+fn get_level_slots(level: usize) -> u64 {
+    let base_slots = 16u64;
+    let multiplier = 2u64.checked_pow(level as u32)
+        .expect("Level exceeds u64 representation capacity");
+    
+    base_slots.checked_mul(multiplier)
+        .expect("Level slots exceed u64 capacity")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::rngs::StdRng;
+    use rand::SeedableRng;
+    
+    #[test]
+    fn test_level_max() {
+        let lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+        
+        assert_eq!(lseq.get_depth_max(0), 15);
+        assert_eq!(lseq.get_depth_max(1), 31);
+        assert_eq!(lseq.get_depth_max(2), 63);
+        assert_eq!(lseq.get_depth_max(3), 127);
+    }
+    
+    #[test]
+    fn test_basic_allocation() {
+        let mut lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+        
+        let key1 = lseq.allocate(None, None).unwrap();
+        let key2 = lseq.allocate(Some(&key1), None).unwrap();
+        let key3 = lseq.allocate(None, Some(&key1)).unwrap();
+        
+        assert!(key3 < key1);
+        assert!(key1 < key2);
+    }
+    
+    #[test]
+    fn test_sort_key_ordering() {
+        let key1 = ReferenceSortKey::new(vec![5]);
+        let key2 = ReferenceSortKey::new(vec![5, 10]);
+        let key3 = ReferenceSortKey::new(vec![6]);
+        
+        assert!(key1 < key2);
+        assert!(key2 < key3);
+    }
+    
+    #[test]
+    fn test_boundary_usage() {
+        let mut lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+        
+        // Create keys with large gaps to test boundary limiting
+        let key1 = ReferenceSortKey::new(vec![0]);
+        let key2 = ReferenceSortKey::new(vec![15]);
+        
+        // Allocate between them - should use BOUNDARY to limit step
+        let key_between = lseq.allocate(Some(&key1), Some(&key2)).unwrap();
+        
+        // The new key should be valid
+        assert!(key1 < key_between);
+        assert!(key_between < key2);
+    }
+    
+    #[test]
+    fn test_allocation_beyond_max_level() {
+        let mut lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+        
+        // Create two identifiers that are identical at every level up to MAX_LEVEL,
+        // but differ by 1 at the MAX_LEVEL position. This forces the algorithm
+        // to keep going deeper beyond MAX_LEVEL.
+        
+        // Build p: [0, 0, 0, ..., 0, max_value_at_MAX_LEVEL - 1]
+        let mut p = vec![0u64; MAX_LEVEL + 1];
+        let max_value_at_max_level = (1u64 << (4 + MAX_LEVEL)) - 1;
+        p[MAX_LEVEL] = max_value_at_max_level - 1;
+        
+        // Build q: [0, 0, 0, ..., 0, max_value_at_MAX_LEVEL]  
+        let mut q = vec![0u64; MAX_LEVEL + 1];
+        q[MAX_LEVEL] = max_value_at_max_level;
+        
+        let p_key = ReferenceSortKey::new(p);
+        let q_key = ReferenceSortKey::new(q);
+        
+        // This should now succeed by allocating at depth MAX_LEVEL + 1 with capped max value
+        let allocated_key = lseq.allocate(Some(&p_key), Some(&q_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(p_key < allocated_key, "p_key < allocated_key should be true");
+        assert!(allocated_key < q_key, "allocated_key < q_key should be true");
+        
+        // The allocated key should be at least MAX_LEVEL + 2 levels deep
+        assert!(allocated_key.levels().len() >= MAX_LEVEL + 2, 
+               "Allocated key should be at least {} levels deep, got {}", 
+               MAX_LEVEL + 2, allocated_key.levels().len());
+    }
+    
+    #[test]
+    fn test_formatting() {
+        // Test with values that needs 3 digits at 4th level (128 slots)
+
+        let xs = vec![5, 6, 7, 8, 9];
+        assert_eq!(ReferenceSortKey::new(xs.clone()).to_string(), "5.6.7.8.9");
+        assert_eq!(format!("{:?}", ReferenceSortKey::new(xs)), "05.06.07.008.009");
+        
+        
+        let ys = vec![5, 10, 63, 127];
+        assert_eq!(ReferenceSortKey::new(ys.clone()).to_string(), "5.10.63.127");
+        assert_eq!(format!("{:?}", ReferenceSortKey::new(ys)), "05.10.63.127");
+    }
+
+    #[test]
+    fn test_level_digits_lookup_correctness() {
+        // Validate that our precomputed lookup table matches the actual calculation
+        for i in 0..=MAX_LEVEL {
+            let max_value = (1u64 << (4 + i)) - 1;
+            let expected_digits = max_value.to_string().len();
+            
+            assert_eq!(
+                LEVEL_DIGITS_LOOKUP[i], 
+                expected_digits,
+                "Level {} digit count mismatch: lookup={}, calculated={}, max_value={}",
+                i, LEVEL_DIGITS_LOOKUP[i], expected_digits, max_value
+            );
+        }
+    }
+    
+    #[test]
+    fn test_base64_chars_needed() {
+        // Test the compact base64 encoding calculation (no separators)
+        let key1 = ReferenceSortKey::new(vec![5]); // Level 0 only: 4 bits
+        assert_eq!(key1.base64_chars_needed(), 1); // 4 bits -> 1 base64 character
+        
+        let key2 = ReferenceSortKey::new(vec![5, 10]); // Levels 0 and 1: 4 + 5 = 9 bits
+        assert_eq!(key2.base64_chars_needed(), 2); // 9 bits -> 2 base64 characters
+        
+        let key3 = ReferenceSortKey::new(vec![5, 10, 15]); // Levels 0, 1, and 2: 4 + 5 + 6 = 15 bits
+        assert_eq!(key3.base64_chars_needed(), 3); // 15 bits -> 3 base64 characters
+        
+        let key4 = ReferenceSortKey::new(vec![1, 2, 3, 4, 5]); // Levels 0-4: 4 + 5 + 6 + 7 + 8 = 30 bits
+        assert_eq!(key4.base64_chars_needed(), 5); // 30 bits -> 5 base64 characters
+        
+        // Test edge case: exactly divisible by 6
+        let key5 = ReferenceSortKey::new(vec![1, 2, 3, 4, 5, 6]); // Levels 0-5: 4 + 5 + 6 + 7 + 8 + 9 = 39 bits
+        assert_eq!(key5.base64_chars_needed(), 7); // 39 bits -> 7 base64 characters
+        
+        // Test edge case with 36 bits (exactly divisible by 6)
+        let key6 = ReferenceSortKey::new(vec![1, 2, 3, 4, 5, 6, 7]); // Levels 0-6: 4+5+6+7+8+9+10 = 49 bits
+        assert_eq!(key6.base64_chars_needed(), 9); // 49 bits -> 9 base64 characters (rounded up from 8.17)
+    }
+    
+    #[test]
+    fn test_continued_fraction_ordering_validation() {
+        // Initialize logger with trace level for this test
+        let _ = env_logger::Builder::from_default_env()
+            .filter_level(log::LevelFilter::Trace)
+            .is_test(true)
+            .try_init();
+            
+        // Test the continued fraction approach with adjacent identifiers
+        let mut lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+        
+        // Create adjacent keys that need to use the continued fraction approach
+        let before_key = ReferenceSortKey::new(vec![5, 10]);
+        let after_key = ReferenceSortKey::new(vec![5, 11]);
+        
+        // Verify the keys are properly ordered before we start
+        assert!(before_key < after_key, "Sanity check: before < after should be true");
+        
+        // Try to allocate between them - this should succeed using the continued fraction approach
+        let allocated_key = lseq.allocate(Some(&before_key), Some(&after_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(before_key < allocated_key, "before < allocated should be true, got before={:?}, allocated={:?}", before_key, allocated_key);
+        assert!(allocated_key < after_key, "allocated < after should be true, got allocated={:?}, after={:?}", allocated_key, after_key);
+        
+        // The allocated key should be at least 3 levels deep since there's no space at level 1
+        assert!(allocated_key.levels().len() >= 3, 
+               "Allocated key should be at least 3 levels deep for continued fraction, got {:?}", 
+               allocated_key.levels());
+    }
+    
+    #[test]
+    fn test_allocate_between_prefix_and_deep_extension() {
+        // Initialize logger with trace level for this test
+        let _ = env_logger::Builder::from_default_env()
+            .filter_level(log::LevelFilter::Trace)
+            .is_test(true)
+            .try_init();
+            
+        // Test allocating between [3] and [3, 0, 0, 0, 2]
+        // This tests the case where we have a short key and a longer key that extends it deeply
+        let mut lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+        
+        let before_key = ReferenceSortKey::new(vec![3]);
+        let after_key = ReferenceSortKey::new(vec![3, 0, 0, 0, 2]);
+        
+        // Verify the keys are properly ordered before we start
+        assert!(before_key < after_key, "Sanity check: before < after should be true");
+        
+        // Allocate between them
+        let allocated_key = lseq.allocate(Some(&before_key), Some(&after_key)).unwrap();
+        
+        // Verify the allocated key is properly ordered
+        assert!(before_key < allocated_key, 
+               "before < allocated should be true, got before={:?}, allocated={:?}", 
+               before_key, allocated_key);
+        assert!(allocated_key < after_key, 
+               "allocated < after should be true, got allocated={:?}, after={:?}", 
+               allocated_key, after_key);
+        
+        // The allocated key should start with [3] since that's the common prefix
+        assert_eq!(allocated_key.levels()[0], 3, "Allocated key should start with 3");
+        
+        // The allocated key should be at least 5 levels deep to fit between [3] and [3, 0, 0, 0, 2]
+        assert_eq!(allocated_key.levels().len(), 5, 
+                  "Allocated key should be 5 levels deep, got {:?}", allocated_key.levels());
+        
+        println!("Successfully allocated between [3] and [3, 0, 0, 0, 2]: {:?}", allocated_key);
+    }
+} 
--- a/research/src/bin/encoding_analyzer.rs
+++ b/research/src/bin/encoding_analyzer.rs
@ -0,0 +1,373 @@
+/*!
+# L-SEQ Encoding Analysis Tool
+
+This binary demonstrates the encoding efficiency analysis for L-SEQ algorithms.
+
+It allocates a large number of identifiers (configurable, default 10,000) and shows:
+- Base64 encoding size histograms
+- Comparison between different L-SEQ variants
+- Statistics useful for real-world deployment decisions
+
+## Usage
+
+```bash
+cargo run --bin encoding_analyzer
+cargo run --bin encoding_analyzer -- --count 1000000
+cargo run --bin encoding_analyzer -- --count 10000 --insertion-mode random
+cargo run --bin encoding_analyzer -- --count 10000 --insertion-mode tail
+cargo run --bin encoding_analyzer -- --count 10000 --insertion-mode head
+```
+
+## Options
+
+- `--count <number>`: Number of identifiers to generate (default: 10000)
+- `--insertion-mode <mode>`: 'tail' for sequential insertion, 'random' for random insertion, or 'head' for head insertion (default: tail)
+*/
+
+use std::env;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use peoplesgrocers_lseq_research::algorithms::lseq_base64::{LSEQBase64, SortKeyBase64};
+use peoplesgrocers_lseq_research::algorithms::original_paper_reference_impl::{ReferenceLSEQ, ReferenceSortKey};
+use peoplesgrocers_lseq_research::encoding_analysis::{analyze_base64_encoding, analyze_reference_encoding, compare_encodings};
+
+#[derive(Debug, Clone, PartialEq)]
+enum InsertionMode {
+    Tail,
+    Random,
+    Head,
+}
+
+impl InsertionMode {
+    fn from_str(s: &str) -> Result<Self, &'static str> {
+        match s.to_lowercase().as_str() {
+            "tail" => Ok(InsertionMode::Tail),
+            "random" => Ok(InsertionMode::Random),
+            "head" => Ok(InsertionMode::Head),
+            _ => Err("Invalid insertion mode. Use 'tail', 'random', or 'head'"),
+        }
+    }
+}
+
+/// Verify that all keys are sorted in proper order
+fn verify_sorted_base64(keys: &[SortKeyBase64]) -> Result<(), String> {
+    for i in 1..keys.len() {
+        if keys[i-1] >= keys[i] {
+            return Err(format!(
+                "I expected key at position {} to be smaller than key at position {}\n\
+                 [{}] = {:?} (internal: {:?})\n\
+                 [{}] = {:?} (internal: {:?})\n\
+                 But {:?} >= {:?}",
+                i-1, i,
+                i-1, keys[i-1], keys[i-1].levels(),
+                i, keys[i], keys[i].levels(),
+                keys[i-1], keys[i]
+            ));
+        }
+    }
+    Ok(())
+}
+
+/// Verify that all keys are sorted in proper order
+#[allow(dead_code)]
+fn verify_sorted_reference(keys: &[ReferenceSortKey]) -> Result<(), String> {
+    for i in 1..keys.len() {
+        if keys[i-1] >= keys[i] {
+            return Err(format!(
+                "I expected key at position {} to be smaller than key at position {}\n\
+                 [{}] = {:?} (internal: {:?})\n\
+                 [{}] = {:?} (internal: {:?})\n\
+                 But {:?} >= {:?}",
+                i-1, i,
+                i-1, keys[i-1], keys[i-1].levels(),
+                i, keys[i], keys[i].levels(),
+                keys[i-1], keys[i]
+            ));
+        }
+    }
+    Ok(())
+}
+
+/// Generate random insertion positions for consistent comparison
+fn generate_insertion_positions(count: usize, rng: &mut StdRng) -> Vec<usize> {
+    let mut positions = Vec::new();
+    
+    for i in 0..count {
+        if i == 0 {
+            positions.push(0); // First element always goes at position 0
+        } else {
+            // Insert after position 0 to i-1 (current list has i elements)
+            positions.push(rng.gen_range(0..i));
+        }
+    }
+    
+    positions
+}
+
+/// Generate identifiers using tail insertion
+fn generate_tail_insertion_base64(count: usize, rng: StdRng) -> Vec<SortKeyBase64> {
+    let mut keys = Vec::new();
+    let mut lseq = LSEQBase64::new(rng);
+    
+    for i in 0..count {
+        let before = if i == 0 { 
+            None 
+        } else { 
+            Some(&keys[i - 1]) 
+        };
+        
+        let key = lseq.allocate(before, None).unwrap();
+        keys.push(key);
+    }
+    
+    keys
+}
+
+/// Generate identifiers using tail insertion
+fn generate_tail_insertion_reference(count: usize, rng: StdRng) -> Vec<ReferenceSortKey> {
+    let mut keys = Vec::new();
+    let mut lseq = ReferenceLSEQ::new(rng);
+    
+    for i in 0..count {
+        let before = if i == 0 { 
+            None 
+        } else { 
+            Some(&keys[i - 1]) 
+        };
+        
+        let key = lseq.allocate(before, None).unwrap();
+        keys.push(key);
+    }
+    
+    keys
+}
+
+/// Generate identifiers using head insertion
+fn generate_head_insertion_base64(count: usize, rng: StdRng) -> Vec<SortKeyBase64> {
+    let mut keys = Vec::new();
+    let mut lseq = LSEQBase64::new(rng);
+    
+    for i in 0..count {
+        let after = if i == 0 { 
+            None 
+        } else { 
+            Some(&keys[0]) 
+        };
+        
+        let key = lseq.allocate(None, after).unwrap();
+        keys.insert(0, key);
+    }
+    
+    keys
+}
+
+/// Generate identifiers using head insertion
+fn generate_head_insertion_reference(count: usize, rng: StdRng) -> Vec<ReferenceSortKey> {
+    let mut keys = Vec::new();
+    let mut lseq = ReferenceLSEQ::new(rng);
+    
+    for i in 0..count {
+        let after = if i == 0 { 
+            None 
+        } else { 
+            Some(&keys[0]) 
+        };
+        
+        let key = lseq.allocate(None, after).unwrap();
+        keys.insert(0, key);
+    }
+    
+    keys
+}
+
+/// Generate identifiers using random insertion at the same positions
+fn generate_random_insertion_base64(count: usize, positions: &[usize], rng: StdRng) -> Vec<SortKeyBase64> {
+    let mut keys = Vec::new();
+    let mut lseq = LSEQBase64::new(rng);
+    
+    for i in 0..count {
+        eprintln!("Generating key {} of {}", i, count);
+        let insert_after_pos = positions[i];
+        
+        // We want to insert after position insert_after_pos
+        // before = element at insert_after_pos (if valid)
+        // after = element at insert_after_pos + 1 (if valid)
+        // insert at position insert_after_pos + 1
+        
+        let before = if insert_after_pos >= keys.len() {
+            // If insert_after_pos is beyond the end, insert at the end
+            keys.last()
+        } else {
+            Some(&keys[insert_after_pos])
+        };
+        
+        let after = if insert_after_pos + 1 >= keys.len() {
+            None
+        } else {
+            Some(&keys[insert_after_pos + 1])
+        };
+        
+        eprintln!("before: {:?}, after: {:?}", before, after);
+        let key = lseq.allocate(before, after).unwrap();
+        let insert_pos = std::cmp::min(insert_after_pos + 1, keys.len());
+        keys.insert(insert_pos, key);
+    }
+    
+    keys
+}
+
+/// Generate identifiers using random insertion at the same positions
+fn generate_random_insertion_reference(count: usize, positions: &[usize], rng: StdRng) -> Vec<ReferenceSortKey> {
+    let mut keys = Vec::new();
+    let mut lseq = ReferenceLSEQ::new(rng);
+    
+    for i in 0..count {
+        let insert_after_pos = positions[i];
+        
+        // We want to insert after position insert_after_pos
+        // before = element at insert_after_pos (if valid)
+        // after = element at insert_after_pos + 1 (if valid)
+        // insert at position insert_after_pos + 1
+        
+        let before = if insert_after_pos >= keys.len() {
+            // If insert_after_pos is beyond the end, insert at the end
+            keys.last()
+        } else {
+            Some(&keys[insert_after_pos])
+        };
+        
+        let after = if insert_after_pos + 1 >= keys.len() {
+            None
+        } else {
+            Some(&keys[insert_after_pos + 1])
+        };
+        
+        let key = lseq.allocate(before, after).unwrap();
+        let insert_pos = std::cmp::min(insert_after_pos + 1, keys.len());
+        keys.insert(insert_pos, key);
+    }
+    
+    keys
+}
+
+fn main() {
+    // Parse command line arguments
+    let args: Vec<String> = env::args().collect();
+    let mut count = 10000;
+    let mut insertion_mode = InsertionMode::Tail;
+    
+    let mut i = 1;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--count" => {
+                if i + 1 < args.len() {
+                    count = args[i + 1].parse::<usize>().unwrap_or(10000);
+                    i += 2;
+                } else {
+                    eprintln!("Error: --count requires a number");
+                    std::process::exit(1);
+                }
+            }
+            "--insertion-mode" => {
+                if i + 1 < args.len() {
+                    insertion_mode = InsertionMode::from_str(&args[i + 1]).unwrap_or_else(|err| {
+                        eprintln!("Error: {}", err);
+                        std::process::exit(1);
+                    });
+                    i += 2;
+                } else {
+                    eprintln!("Error: --insertion-mode requires 'tail', 'random', or 'head'");
+                    std::process::exit(1);
+                }
+            }
+            _ => {
+                eprintln!("Unknown argument: {}", args[i]);
+                std::process::exit(1);
+            }
+        }
+    }
+    
+    println!("L-SEQ Encoding Analysis Tool");
+    println!("============================");
+    println!("Allocating {} identifiers for analysis...", count);
+    println!("Insertion mode: {:?}", insertion_mode);
+    println!();
+    
+    // Generate identifiers based on insertion mode
+    let (base64_keys, reference_keys) = match insertion_mode {
+        InsertionMode::Tail => {
+            println!("Using tail insertion (sequential)...");
+            let base64_keys = generate_tail_insertion_base64(count, StdRng::seed_from_u64(42));
+            let reference_keys = generate_tail_insertion_reference(count, StdRng::seed_from_u64(42));
+            (base64_keys, reference_keys)
+        }
+        InsertionMode::Random => {
+            println!("Using random insertion...");
+            let mut rng = StdRng::seed_from_u64(42);
+            let positions = generate_insertion_positions(count, &mut rng);
+            
+            let base64_keys = generate_random_insertion_base64(count, &positions, StdRng::seed_from_u64(42));
+            let reference_keys = generate_random_insertion_reference(count, &positions, StdRng::seed_from_u64(42));
+            (base64_keys, reference_keys)
+        }
+        InsertionMode::Head => {
+            println!("Using head insertion (reverse sequential)...");
+            let base64_keys = generate_head_insertion_base64(count, StdRng::seed_from_u64(42));
+            let reference_keys = generate_head_insertion_reference(count, StdRng::seed_from_u64(42));
+            (base64_keys, reference_keys)
+        }
+    };
+    
+    // Verify that all keys are sorted
+    println!("Verifying sort order...");
+    if let Err(e) = verify_sorted_base64(&base64_keys) {
+        eprintln!("ERROR: Base64 keys not sorted: {}", e);
+        std::process::exit(1);
+    }
+    
+    //if let Err(e) = verify_sorted_reference(&reference_keys) {
+    //    eprintln!("ERROR: Reference keys not sorted: {}", e);
+    //    std::process::exit(1);
+    //}
+    
+    println!("✓ All keys are properly sorted!");
+    println!();
+    
+    // Analyze encoding efficiency
+    let base64_stats = analyze_base64_encoding(&base64_keys);
+    let reference_stats = analyze_reference_encoding(&reference_keys);
+    
+    // Print results
+    base64_stats.print_summary("Base64 Variant (64 slots per level)");
+    reference_stats.print_summary("Reference Implementation (16 * 2^level slots)");
+    
+    compare_encodings(&base64_stats, "Base64 Variant", &reference_stats, "Reference");
+    
+    // Additional analysis
+    println!("\n=== Additional Analysis ===");
+    println!("Total base64 characters needed:");
+    let base64_total: usize = base64_keys.iter().map(|k| k.max_base64_chars()).sum();
+    let reference_total: usize = reference_keys.iter().map(|k| k.base64_chars_needed()).sum();
+    
+    println!("  Base64 variant: {} characters", base64_total);
+    println!("  Reference impl: {} characters", reference_total);
+    println!("  Difference: {} characters ({:.1}% {})", 
+             base64_total.abs_diff(reference_total),
+             (base64_total as f64 - reference_total as f64).abs() / reference_total as f64 * 100.0,
+             if base64_total > reference_total { "more" } else { "less" });
+    
+    println!("\nAverage bytes per key (assuming 1 byte per base64 character):");
+    println!("  Base64 variant: {:.2} bytes", base64_total as f64 / count as f64);
+    println!("  Reference impl: {:.2} bytes", reference_total as f64 / count as f64);
+    
+    // Show some sample keys for understanding
+    println!("\n=== Sample Keys (first 10) ===");
+    for i in 0..std::cmp::min(10, count) {
+        println!("Key {}: Base64({} chars) = {:?}, Reference({} chars) = {:?}", 
+                 i,
+                 base64_keys[i].max_base64_chars(),
+                 base64_keys[i],
+                 reference_keys[i].base64_chars_needed(),
+                 reference_keys[i]);
+    }
+} 
--- a/research/src/encoding_analysis.rs
+++ b/research/src/encoding_analysis.rs
@ -0,0 +1,180 @@
+/*!
+# L-SEQ Encoding Efficiency Analysis
+
+This module provides tools for analyzing the encoding efficiency of L-SEQ algorithms.
+
+## Use Case
+
+When implementing L-SEQ in real-world applications (especially web applications), we need to 
+serialize and transfer sort keys between systems. JavaScript and web APIs commonly use base64 
+encoding for safely representing binary data in text format.
+
+To measure the practical efficiency of different L-SEQ variants, we:
+
+1. **Allocate large numbers of identifiers** (e.g., 1,000,000) in realistic usage patterns
+2. **Calculate base64 encoding requirements** for each identifier using the "maximally encoded" 
+   compact format (no separators, since the structure is known)
+3. **Generate histograms** showing the distribution of encoding sizes
+4. **Compare different algorithms** to understand their space efficiency trade-offs
+
+## Encoding Formats
+
+### Base64 Variant (64 slots per level)
+- Level 0: 1 base64 character (6 bits, 0-63)
+- Level 1: 2 base64 characters (12 bits, 0-4095)
+- Level 2: 3 base64 characters (18 bits, 0-262143)
+- Sequential parsing: read 1 char, then 2 chars, then 3 chars, etc.
+
+### Original Paper Reference (16 * 2^level slots)
+- Level 0: 4 bits (0-15)
+- Level 1: 5 bits (0-31)  
+- Level 2: 6 bits (0-63)
+- Packed encoding: concatenate all bits, encode as base64 (6 bits per character)
+
+## Analysis Functions
+
+This module provides functions to:
+- Calculate encoding size histograms for collections of sort keys
+- Compare efficiency between different L-SEQ variants
+- Generate statistics for real-world usage scenarios
+*/
+
+use std::collections::HashMap;
+use crate::algorithms::lseq_base64::SortKeyBase64;
+use crate::algorithms::original_paper_reference_impl::ReferenceSortKey;
+
+/// Histogram of base64 encoding sizes
+pub type EncodingSizeHistogram = HashMap<usize, usize>;
+
+/// Statistics about encoding sizes
+#[derive(Debug, Clone)]
+pub struct EncodingStats {
+    pub total_keys: usize,
+    pub min_size: usize,
+    pub max_size: usize,
+    pub mean_size: f64,
+    pub median_size: usize,
+    pub histogram: EncodingSizeHistogram,
+}
+
+impl EncodingStats {
+    /// Calculate statistics from a list of encoding sizes
+    pub fn from_sizes(sizes: Vec<usize>) -> Self {
+        let total_keys = sizes.len();
+        let min_size = *sizes.iter().min().unwrap_or(&0);
+        let max_size = *sizes.iter().max().unwrap_or(&0);
+        let mean_size = sizes.iter().sum::<usize>() as f64 / total_keys as f64;
+        
+        let mut sorted_sizes = sizes.clone();
+        sorted_sizes.sort_unstable();
+        let median_size = if total_keys % 2 == 0 {
+            (sorted_sizes[total_keys / 2 - 1] + sorted_sizes[total_keys / 2]) / 2
+        } else {
+            sorted_sizes[total_keys / 2]
+        };
+        
+        let mut histogram = HashMap::new();
+        for size in sizes {
+            *histogram.entry(size).or_insert(0) += 1;
+        }
+        
+        Self {
+            total_keys,
+            min_size,
+            max_size,
+            mean_size,
+            median_size,
+            histogram,
+        }
+    }
+    
+    /// Print a formatted summary of the statistics
+    pub fn print_summary(&self, algorithm_name: &str) {
+        println!("\n=== {} Encoding Statistics ===", algorithm_name);
+        println!("Total keys: {}", self.total_keys);
+        println!("Min size: {} base64 characters", self.min_size);
+        println!("Max size: {} base64 characters", self.max_size);
+        println!("Mean size: {:.2} base64 characters", self.mean_size);
+        println!("Median size: {} base64 characters", self.median_size);
+        
+        println!("\nSize distribution:");
+        let mut sizes: Vec<_> = self.histogram.keys().collect();
+        sizes.sort();
+        for &size in sizes {
+            let count = self.histogram[&size];
+            let percentage = (count as f64 / self.total_keys as f64) * 100.0;
+            println!("  {} chars: {} keys ({:.1}%)", size, count, percentage);
+        }
+    }
+}
+
+/// Analyze the encoding efficiency of Base64 variant sort keys
+pub fn analyze_base64_encoding(keys: &[SortKeyBase64]) -> EncodingStats {
+    let sizes: Vec<usize> = keys.iter().map(|key| key.max_base64_chars()).collect();
+    EncodingStats::from_sizes(sizes)
+}
+
+/// Analyze the encoding efficiency of Reference implementation sort keys
+pub fn analyze_reference_encoding(keys: &[ReferenceSortKey]) -> EncodingStats {
+    let sizes: Vec<usize> = keys.iter().map(|key| key.base64_chars_needed()).collect();
+    EncodingStats::from_sizes(sizes)
+}
+
+/// Compare encoding efficiency between two algorithms
+pub fn compare_encodings(stats1: &EncodingStats, name1: &str, stats2: &EncodingStats, name2: &str) {
+    println!("\n=== Encoding Comparison: {} vs {} ===", name1, name2);
+    println!("Mean size: {:.2} vs {:.2} chars ({:.1}% difference)",
+             stats1.mean_size, stats2.mean_size,
+             ((stats2.mean_size - stats1.mean_size) / stats1.mean_size) * 100.0);
+    println!("Max size: {} vs {} chars", stats1.max_size, stats2.max_size);
+    println!("Min size: {} vs {} chars", stats1.min_size, stats2.min_size);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_encoding_stats() {
+        let sizes = vec![1, 2, 2, 3, 3, 3, 4, 5];
+        let stats = EncodingStats::from_sizes(sizes);
+        
+        assert_eq!(stats.total_keys, 8);
+        assert_eq!(stats.min_size, 1);
+        assert_eq!(stats.max_size, 5);
+        assert_eq!(stats.mean_size, 2.875);
+        assert_eq!(stats.median_size, 3);
+        assert_eq!(stats.histogram[&3], 3);
+        assert_eq!(stats.histogram[&2], 2);
+    }
+
+    #[test]
+    fn test_base64_analysis() {
+        let keys = vec![
+            SortKeyBase64::new(vec![1]),
+            SortKeyBase64::new(vec![1, 2]),
+            SortKeyBase64::new(vec![1, 2, 3]),
+        ];
+        
+        let stats = analyze_base64_encoding(&keys);
+        assert_eq!(stats.total_keys, 3);
+        assert_eq!(stats.min_size, 1);  // 1 level = 1 char
+        assert_eq!(stats.max_size, 6);  // 3 levels = 1+2+3 = 6 chars
+        assert_eq!(stats.mean_size, 10.0/3.0);  // (1+3+6)/3
+    }
+
+    #[test]
+    fn test_reference_analysis() {
+        let keys = vec![
+            ReferenceSortKey::new(vec![1]),
+            ReferenceSortKey::new(vec![1, 2]),
+            ReferenceSortKey::new(vec![1, 2, 3]),
+        ];
+        
+        let stats = analyze_reference_encoding(&keys);
+        assert_eq!(stats.total_keys, 3);
+        assert_eq!(stats.min_size, 1);  // 4 bits = 1 char
+        assert_eq!(stats.max_size, 3);  // 4+5+6=15 bits = 3 chars
+        assert_eq!(stats.mean_size, 2.0);  // (1+2+3)/3
+    }
+} 
--- a/research/src/lib.rs
+++ b/research/src/lib.rs
@ -0,0 +1,7 @@
+pub mod algorithms;
+pub mod encoding_analysis;
+
+pub use algorithms::ReferenceLSEQ;
+
+// Re-export for convenience in benchmarks
+pub use rand; 
--- a/research/src/main.rs
+++ b/research/src/main.rs
@ -0,0 +1,52 @@
+use peoplesgrocers_lseq_research::ReferenceLSEQ;
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+use log::trace;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Because this smoke test is so simple, I'm not going to show the module name or timestamp.
+    env_logger::Builder::from_default_env()
+        .format(|buf, record| {
+            use std::io::Write;
+            use env_logger::fmt::Color;
+            
+            let mut style = buf.style();
+            let level_color = match record.level() {
+                log::Level::Error => Color::Red,
+                log::Level::Warn => Color::Yellow,
+                log::Level::Info => Color::Green,
+                log::Level::Debug => Color::Blue,
+                log::Level::Trace => Color::Cyan,
+            };
+            style.set_color(level_color).set_bold(true);
+            
+            writeln!(buf, "{} {}", style.value(record.level()), record.args())
+        })
+        .init();
+    
+    println!("L-SEQ Research - Original Paper Reference Implementation");
+    
+    // Test the original paper reference implementation
+    let mut lseq = ReferenceLSEQ::new(StdRng::seed_from_u64(42));
+    let mut keys = Vec::new();
+    
+    // Generate 10 sequential insertions
+    for i in 0..10 {
+        let before = keys.last();
+        let key = lseq.allocate(before, None)?;
+        println!("Generated key {}: {}", i + 1, key);
+        trace!("--------------------------------");
+        keys.push(key);
+    }
+    
+    // Verify they are sorted
+    println!("\nVerifying sort order:");
+    for i in 0..keys.len() - 1 {
+        println!("{} < {}", keys[i], keys[i + 1]);
+        assert!(keys[i] < keys[i + 1]);
+    }
+    
+    println!("\nAll keys are properly sorted!");
+    
+    Ok(())
+}