feat: experiment with different implementations of LSEQ

This commit is contained in:
nobody 2025-07-08 16:49:52 -07:00
commit 1e45ef9314
Signed by: GrocerPublishAgent
GPG key ID: D460CD54A9E3AB86
23 changed files with 3578 additions and 0 deletions

21
rust/Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
name = "peoplesgrocers-lseq"
version = "1.0.0"
edition = "2021"
description = "L-SEQ algorithm implementation for fractional indexing and list CRDTs"
keywords = ["lseq", "crdt", "fractional-indexing", "sequence", "collaborative-editing"]
categories = ["data-structures", "algorithms"]
license = "MIT"
repository = "https://github.com/peoplesgrocers/lseq"
readme = "README.md"
[features]
default = []
serde = ["dep:serde"]
[dependencies]
rand = "0.8"
serde = { version = "1.0", features = ["derive"], optional = true }
[dev-dependencies]
rand = { version = "0.8", features = ["small_rng"] }

82
rust/README.md Normal file
View file

@ -0,0 +1,82 @@
# peoplesgrocers-lseq
Rust implementation of the L-SEQ algorithm for fractional indexing and list CRDTs.
## Installation
Add this to your `Cargo.toml`:
```toml
[dependencies]
peoplesgrocers-lseq = "1.0.0"
```
## Usage
```rust
use peoplesgrocers_lseq::{LSEQ, SortKey, compare_lseq};
use rand::thread_rng;
// Create a new L-SEQ instance
let mut lseq = LSEQ::new(thread_rng());
// Allocate identifiers
let id1 = lseq.alloc(None, None); // First identifier
let id2 = lseq.alloc(Some(&id1), None); // After id1
let id3 = lseq.alloc(Some(&id1), Some(&id2)); // Between id1 and id2
// Sort identifiers
let mut ids = vec![id3.clone(), id1.clone(), id2.clone()];
ids.sort();
println!("{:?}", ids); // [id1, id3, id2] - properly ordered
// Convert to/from strings
let key_str = id1.to_string();
let parsed_key: SortKey = key_str.parse().unwrap();
assert_eq!(id1, parsed_key);
// Use with deterministic RNG for testing
use rand::rngs::StdRng;
use rand::SeedableRng;
let rng = StdRng::seed_from_u64(42);
let mut deterministic_lseq = LSEQ::new(rng);
```
## Features
- **Fractional indexing**: Generate identifiers that can be inserted between any two existing ones
- **Serialization**: Full support for serde serialization/deserialization
- **Ordering**: SortKey implements Ord and can be used directly with Rust's sorting
- **String conversion**: Convert to/from strings for storage and transmission
- **Even spacing**: Utilities for generating evenly distributed keys for bulk operations
## API
### `LSEQ<R: Rng>`
#### `new(rng: R) -> Self`
Creates a new L-SEQ instance with the given random number generator.
#### `alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey`
Allocates a new identifier between two existing identifiers.
- `before`: The identifier that should come before the new one (or `None` for beginning)
- `after`: The identifier that should come after the new one (or `None` for end)
- Returns: A new SortKey that sorts between `before` and `after`
### `SortKey`
A sort key that implements `Ord`, `Serialize`, `Deserialize`, and string conversion.
### `EvenSpacingIterator`
Utility for generating evenly spaced sort keys for bulk operations.
## How it works
L-SEQ generates identifiers using a base-64 alphabet that maintains lexicographic ordering. Each identifier is a sequence of characters from this alphabet, and new identifiers are generated by finding space between existing ones at different depths.
The algorithm uses alternating allocation strategies (bias toward min or max) at different depths to avoid degenerative cases and maintain good performance characteristics.

382
rust/src/lib.rs Normal file
View file

@ -0,0 +1,382 @@
use rand::Rng;
#[cfg(feature = "serde")]
use serde::{
de::{self, Visitor},
Deserialize, Serialize,
};
use std::error::Error;
use std::fmt;
use std::str::FromStr;
const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(Serialize))]
#[cfg_attr(feature = "serde", serde(into = "String"))]
pub struct SortKey {
numbers: Vec<u8>,
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for SortKey {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct SortKeyVisitor;
impl<'de> Visitor<'de> for SortKeyVisitor {
type Value = SortKey;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string containing valid sort key characters")
}
fn visit_str<E>(self, value: &str) -> Result<SortKey, E>
where
E: de::Error,
{
value.parse().map_err(|e| E::custom(e))
}
}
deserializer.deserialize_str(SortKeyVisitor)
}
}
impl SortKey {
pub fn from_numbers(numbers: Vec<u8>) -> Self {
SortKey { numbers }
}
}
impl From<SortKey> for Vec<u8> {
fn from(key: SortKey) -> Vec<u8> {
key.numbers
}
}
impl From<SortKey> for String {
fn from(key: SortKey) -> String {
key.to_string()
}
}
impl AsRef<[u8]> for SortKey {
fn as_ref(&self) -> &[u8] {
&self.numbers
}
}
impl From<String> for SortKey {
fn from(s: String) -> Self {
s.parse().unwrap_or_else(|_| SortKey { numbers: vec![0] })
}
}
impl fmt::Display for SortKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for &n in &self.numbers {
write!(f, "{}", ALPHABET[n as usize] as char)?;
}
Ok(())
}
}
#[allow(dead_code)]
#[derive(Debug)]
pub struct LSEQ<R: Rng> {
strategies: Vec<bool>,
rng: R,
}
#[allow(dead_code)]
impl<R: Rng> LSEQ<R> {
pub fn new(mut rng: R) -> Self {
let strategies = vec![rng.gen_bool(0.5)];
LSEQ { strategies, rng }
}
pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
// Convert to numeric arrays, using boundary values for null
let p = before.map_or(vec![0], |s| s.numbers.clone());
let q = after.map_or(vec![63], |s| s.numbers.clone());
// Walk through digits looking for space
let mut depth = 0;
let mut result = Vec::new();
loop {
let p_val = if depth < p.len() { p[depth] } else { 0 };
let q_val = if depth < q.len() { q[depth] } else { 63 };
let interval = q_val as i32 - p_val as i32;
// If we have space between values at this depth
if interval > 1 {
// Pick a value in the available range
let range = interval - 1;
let add_val = 1 + self.rng.gen_range(0..range) as u8;
let new_value = if self.strategies[depth] {
p_val + add_val
} else {
q_val - add_val
};
// Take the prefix from p up to depth and append our new value
result.push(new_value);
return SortKey::from_numbers(result);
}
result.push(p_val);
// If values are the same or adjacent at this depth,
// continue to next depth
depth += 1;
if depth >= self.strategies.len() {
self.strategies.push(self.rng.gen_bool(0.5));
}
}
}
}
#[derive(Debug)]
pub enum SpacingError {
TooManyItems,
}
impl fmt::Display for SpacingError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SpacingError::TooManyItems => write!(f, "Too many items to allocate"),
}
}
}
impl Error for SpacingError {}
#[derive(Debug, Clone)]
pub struct EvenSpacingIterator {
remaining_items: usize,
space_size: u64,
next_item: u64,
step_size_integer: u64, // Integer part of step size
step_size_error: f64, // Fractional part of step size
error_accumulator: f64, // Accumulated error
}
impl EvenSpacingIterator {
// Static table of (64^k - 2) values for k from 1 to 9
// We subtract 2 from each space size because we need to reserve two boundary positions:
// 1. Position 0 (represented by "-") is reserved as the lower boundary
// 2. Position 63 (represented by "z") is reserved as the upper boundary
// This ensures we can always insert elements at the very beginning or end of the sequence
const USABLE_SPACE: [usize; 9] = [
64 - 2, // 64^1 - 2
4096 - 2, // 64^2 - 2
262144 - 2, // 64^3 - 2
16777216 - 2, // 64^4 - 2
1073741824 - 2, // 64^5 - 2
68719476736 - 2, // 64^6 - 2
4398046511104 - 2, // 64^7 - 2
281474976710656 - 2, // 64^8 - 2
18014398509481984 - 2, // 64^9 - 2
];
pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
if total_items == 0 {
return Err(SpacingError::TooManyItems);
}
// Find the smallest k where 64^k > total_items using the static table
let mut k = 0;
let mut space_size = 0;
for (index, &size) in Self::USABLE_SPACE.iter().enumerate() {
if size >= total_items {
k = index as u64 + 1; // k is 1-indexed
space_size = size;
break;
}
}
// If we couldn't find a suitable k, the request is too large
if k == 0 {
return Err(SpacingError::TooManyItems);
}
// Calculate step size split into integer and fractional parts
let step_size = (space_size as f64) / (total_items as f64);
let step_size_integer = step_size.floor() as u64;
let step_size_error = step_size - step_size_integer as f64;
Ok((
k,
EvenSpacingIterator {
remaining_items: total_items,
space_size: space_size.try_into().unwrap(),
next_item: 1,
step_size_integer,
step_size_error,
error_accumulator: 0.0,
},
))
}
// Helper method to convert a position to a sort key
pub fn position_to_key(k: u64, position: u64) -> SortKey {
let mut result = Vec::with_capacity(k as usize);
let mut pos = position;
const BASE: u64 = 64;
// Fill in digits from least significant to most significant
for _ in 0..k {
// SAFETY: digit is guaranteed to be in bounds because:
// 1. digit = pos % base where base is 64
// 2. ALPHABET has exactly 64 elements
// Therefore digit as u64 will always be 0-63
let digit = (pos % BASE) as u8;
pos /= BASE;
result.push(digit);
}
// Reverse to get most significant digit first
result.reverse();
SortKey::from_numbers(result)
}
}
impl Iterator for EvenSpacingIterator {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
if self.remaining_items == 0 {
return None;
}
if self.next_item > self.space_size {
return None;
}
let current_position = self.next_item;
self.remaining_items -= 1;
self.next_item += self.step_size_integer;
self.error_accumulator += self.step_size_error;
if self.error_accumulator >= 1.0 {
self.next_item += 1;
self.error_accumulator -= 1.0;
}
Some(current_position)
}
}
#[derive(Debug)]
pub enum SortKeyParseError {
InvalidCharacter(char),
}
impl fmt::Display for SortKeyParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SortKeyParseError::InvalidCharacter(c) => write!(
f,
"Invalid character '{}' in sort key. Expected characters from alphabet: {}",
c,
String::from_utf8_lossy(ALPHABET)
),
}
}
}
impl Error for SortKeyParseError {}
impl FromStr for SortKey {
type Err = SortKeyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let numbers = s
.bytes()
.map(|b| ALPHABET.iter().position(|&x| x == b).map(|pos| pos as u8))
.collect::<Option<Vec<u8>>>()
.ok_or_else(|| SortKeyParseError::InvalidCharacter(s.chars().next().unwrap()))?;
Ok(SortKey { numbers })
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::rngs::StdRng;
use rand::SeedableRng;
#[test]
fn test_compare_lseq() {
let a = "a".parse::<SortKey>().unwrap();
let b = "b".parse::<SortKey>().unwrap();
assert_eq!(a < b, true);
assert_eq!(b < a, false);
assert_eq!(a < a, false);
}
#[test]
fn test_lseq_alloc() {
let rng = StdRng::seed_from_u64(42); // Deterministic RNG for testing
let mut lseq = LSEQ::new(rng);
let id1 = lseq.alloc(None, None);
let id2 = lseq.alloc(Some(&id1), None);
let id3 = lseq.alloc(Some(&id1), Some(&id2));
assert!(id1 < id2);
assert!(id1 < id3);
assert!(id3 < id2);
}
#[test]
fn test_position_to_key() {
const K: u64 = 2;
assert_eq!(EvenSpacingIterator::position_to_key(K, 1).to_string(), "-0");
}
#[test]
fn test_even_spacing_4093() {
let (k, mut iter) = EvenSpacingIterator::new(4093).unwrap();
assert_eq!(k, 2);
let mut positions = Vec::new();
for pos in iter.by_ref() {
// Use by_ref() to borrow instead of consume
positions.push(pos);
}
// Print all generated sort keys
//println!("\nGenerated sort keys for 62 positions:");
//for (i, pos) in positions.iter().enumerate() {
// let key = EvenSpacingIterator::position_to_key(k, *pos);
// println!("Position {}: {} (numeric: {})", i, key, pos);
//}
println!("{:?}", iter);
assert_eq!(positions.len(), 4093);
}
#[test]
fn test_even_spacing_6() {
let (k, mut iter) = EvenSpacingIterator::new(6).unwrap();
eprintln!("Created iterator with k={}", k);
let mut positions = Vec::new();
let mut count = 0;
while let Some(pos) = iter.next() {
count += 1;
eprintln!("Iteration {}: Got position {}", count, pos);
positions.push(pos);
}
eprintln!("Final iterator state: {:?}", iter);
assert_eq!(
positions.len(),
6,
"Expected 6 positions, got {}",
positions.len()
);
}
}