feat: experiment with different implementations of LSEQ

This commit is contained in:
nobody 2025-07-08 16:49:52 -07:00
commit 1e45ef9314
Signed by: GrocerPublishAgent
GPG key ID: D460CD54A9E3AB86
23 changed files with 3578 additions and 0 deletions

382
rust/src/lib.rs Normal file
View file

@ -0,0 +1,382 @@
use rand::Rng;
#[cfg(feature = "serde")]
use serde::{
de::{self, Visitor},
Deserialize, Serialize,
};
use std::error::Error;
use std::fmt;
use std::str::FromStr;
const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(Serialize))]
#[cfg_attr(feature = "serde", serde(into = "String"))]
pub struct SortKey {
numbers: Vec<u8>,
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for SortKey {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct SortKeyVisitor;
impl<'de> Visitor<'de> for SortKeyVisitor {
type Value = SortKey;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string containing valid sort key characters")
}
fn visit_str<E>(self, value: &str) -> Result<SortKey, E>
where
E: de::Error,
{
value.parse().map_err(|e| E::custom(e))
}
}
deserializer.deserialize_str(SortKeyVisitor)
}
}
impl SortKey {
pub fn from_numbers(numbers: Vec<u8>) -> Self {
SortKey { numbers }
}
}
impl From<SortKey> for Vec<u8> {
fn from(key: SortKey) -> Vec<u8> {
key.numbers
}
}
impl From<SortKey> for String {
fn from(key: SortKey) -> String {
key.to_string()
}
}
impl AsRef<[u8]> for SortKey {
fn as_ref(&self) -> &[u8] {
&self.numbers
}
}
impl From<String> for SortKey {
fn from(s: String) -> Self {
s.parse().unwrap_or_else(|_| SortKey { numbers: vec![0] })
}
}
impl fmt::Display for SortKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for &n in &self.numbers {
write!(f, "{}", ALPHABET[n as usize] as char)?;
}
Ok(())
}
}
#[allow(dead_code)]
#[derive(Debug)]
pub struct LSEQ<R: Rng> {
strategies: Vec<bool>,
rng: R,
}
#[allow(dead_code)]
impl<R: Rng> LSEQ<R> {
pub fn new(mut rng: R) -> Self {
let strategies = vec![rng.gen_bool(0.5)];
LSEQ { strategies, rng }
}
pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
// Convert to numeric arrays, using boundary values for null
let p = before.map_or(vec![0], |s| s.numbers.clone());
let q = after.map_or(vec![63], |s| s.numbers.clone());
// Walk through digits looking for space
let mut depth = 0;
let mut result = Vec::new();
loop {
let p_val = if depth < p.len() { p[depth] } else { 0 };
let q_val = if depth < q.len() { q[depth] } else { 63 };
let interval = q_val as i32 - p_val as i32;
// If we have space between values at this depth
if interval > 1 {
// Pick a value in the available range
let range = interval - 1;
let add_val = 1 + self.rng.gen_range(0..range) as u8;
let new_value = if self.strategies[depth] {
p_val + add_val
} else {
q_val - add_val
};
// Take the prefix from p up to depth and append our new value
result.push(new_value);
return SortKey::from_numbers(result);
}
result.push(p_val);
// If values are the same or adjacent at this depth,
// continue to next depth
depth += 1;
if depth >= self.strategies.len() {
self.strategies.push(self.rng.gen_bool(0.5));
}
}
}
}
#[derive(Debug)]
pub enum SpacingError {
TooManyItems,
}
impl fmt::Display for SpacingError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SpacingError::TooManyItems => write!(f, "Too many items to allocate"),
}
}
}
impl Error for SpacingError {}
#[derive(Debug, Clone)]
pub struct EvenSpacingIterator {
remaining_items: usize,
space_size: u64,
next_item: u64,
step_size_integer: u64, // Integer part of step size
step_size_error: f64, // Fractional part of step size
error_accumulator: f64, // Accumulated error
}
impl EvenSpacingIterator {
// Static table of (64^k - 2) values for k from 1 to 9
// We subtract 2 from each space size because we need to reserve two boundary positions:
// 1. Position 0 (represented by "-") is reserved as the lower boundary
// 2. Position 63 (represented by "z") is reserved as the upper boundary
// This ensures we can always insert elements at the very beginning or end of the sequence
const USABLE_SPACE: [usize; 9] = [
64 - 2, // 64^1 - 2
4096 - 2, // 64^2 - 2
262144 - 2, // 64^3 - 2
16777216 - 2, // 64^4 - 2
1073741824 - 2, // 64^5 - 2
68719476736 - 2, // 64^6 - 2
4398046511104 - 2, // 64^7 - 2
281474976710656 - 2, // 64^8 - 2
18014398509481984 - 2, // 64^9 - 2
];
pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
if total_items == 0 {
return Err(SpacingError::TooManyItems);
}
// Find the smallest k where 64^k > total_items using the static table
let mut k = 0;
let mut space_size = 0;
for (index, &size) in Self::USABLE_SPACE.iter().enumerate() {
if size >= total_items {
k = index as u64 + 1; // k is 1-indexed
space_size = size;
break;
}
}
// If we couldn't find a suitable k, the request is too large
if k == 0 {
return Err(SpacingError::TooManyItems);
}
// Calculate step size split into integer and fractional parts
let step_size = (space_size as f64) / (total_items as f64);
let step_size_integer = step_size.floor() as u64;
let step_size_error = step_size - step_size_integer as f64;
Ok((
k,
EvenSpacingIterator {
remaining_items: total_items,
space_size: space_size.try_into().unwrap(),
next_item: 1,
step_size_integer,
step_size_error,
error_accumulator: 0.0,
},
))
}
// Helper method to convert a position to a sort key
pub fn position_to_key(k: u64, position: u64) -> SortKey {
let mut result = Vec::with_capacity(k as usize);
let mut pos = position;
const BASE: u64 = 64;
// Fill in digits from least significant to most significant
for _ in 0..k {
// SAFETY: digit is guaranteed to be in bounds because:
// 1. digit = pos % base where base is 64
// 2. ALPHABET has exactly 64 elements
// Therefore digit as u64 will always be 0-63
let digit = (pos % BASE) as u8;
pos /= BASE;
result.push(digit);
}
// Reverse to get most significant digit first
result.reverse();
SortKey::from_numbers(result)
}
}
impl Iterator for EvenSpacingIterator {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
if self.remaining_items == 0 {
return None;
}
if self.next_item > self.space_size {
return None;
}
let current_position = self.next_item;
self.remaining_items -= 1;
self.next_item += self.step_size_integer;
self.error_accumulator += self.step_size_error;
if self.error_accumulator >= 1.0 {
self.next_item += 1;
self.error_accumulator -= 1.0;
}
Some(current_position)
}
}
#[derive(Debug)]
pub enum SortKeyParseError {
InvalidCharacter(char),
}
impl fmt::Display for SortKeyParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SortKeyParseError::InvalidCharacter(c) => write!(
f,
"Invalid character '{}' in sort key. Expected characters from alphabet: {}",
c,
String::from_utf8_lossy(ALPHABET)
),
}
}
}
impl Error for SortKeyParseError {}
impl FromStr for SortKey {
type Err = SortKeyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let numbers = s
.bytes()
.map(|b| ALPHABET.iter().position(|&x| x == b).map(|pos| pos as u8))
.collect::<Option<Vec<u8>>>()
.ok_or_else(|| SortKeyParseError::InvalidCharacter(s.chars().next().unwrap()))?;
Ok(SortKey { numbers })
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::rngs::StdRng;
use rand::SeedableRng;
#[test]
fn test_compare_lseq() {
let a = "a".parse::<SortKey>().unwrap();
let b = "b".parse::<SortKey>().unwrap();
assert_eq!(a < b, true);
assert_eq!(b < a, false);
assert_eq!(a < a, false);
}
#[test]
fn test_lseq_alloc() {
let rng = StdRng::seed_from_u64(42); // Deterministic RNG for testing
let mut lseq = LSEQ::new(rng);
let id1 = lseq.alloc(None, None);
let id2 = lseq.alloc(Some(&id1), None);
let id3 = lseq.alloc(Some(&id1), Some(&id2));
assert!(id1 < id2);
assert!(id1 < id3);
assert!(id3 < id2);
}
#[test]
fn test_position_to_key() {
const K: u64 = 2;
assert_eq!(EvenSpacingIterator::position_to_key(K, 1).to_string(), "-0");
}
#[test]
fn test_even_spacing_4093() {
let (k, mut iter) = EvenSpacingIterator::new(4093).unwrap();
assert_eq!(k, 2);
let mut positions = Vec::new();
for pos in iter.by_ref() {
// Use by_ref() to borrow instead of consume
positions.push(pos);
}
// Print all generated sort keys
//println!("\nGenerated sort keys for 62 positions:");
//for (i, pos) in positions.iter().enumerate() {
// let key = EvenSpacingIterator::position_to_key(k, *pos);
// println!("Position {}: {} (numeric: {})", i, key, pos);
//}
println!("{:?}", iter);
assert_eq!(positions.len(), 4093);
}
#[test]
fn test_even_spacing_6() {
let (k, mut iter) = EvenSpacingIterator::new(6).unwrap();
eprintln!("Created iterator with k={}", k);
let mut positions = Vec::new();
let mut count = 0;
while let Some(pos) = iter.next() {
count += 1;
eprintln!("Iteration {}: Got position {}", count, pos);
positions.push(pos);
}
eprintln!("Final iterator state: {:?}", iter);
assert_eq!(
positions.len(),
6,
"Expected 6 positions, got {}",
positions.len()
);
}
}