feat: experiment with different implementations of LSEQ
This commit is contained in:
commit
1e45ef9314
23 changed files with 3578 additions and 0 deletions
21
rust/Cargo.toml
Normal file
21
rust/Cargo.toml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
[package]
|
||||
name = "peoplesgrocers-lseq"
|
||||
version = "1.0.0"
|
||||
edition = "2021"
|
||||
description = "L-SEQ algorithm implementation for fractional indexing and list CRDTs"
|
||||
keywords = ["lseq", "crdt", "fractional-indexing", "sequence", "collaborative-editing"]
|
||||
categories = ["data-structures", "algorithms"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/peoplesgrocers/lseq"
|
||||
readme = "README.md"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
serde = ["dep:serde"]
|
||||
|
||||
[dependencies]
|
||||
rand = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"], optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
rand = { version = "0.8", features = ["small_rng"] }
|
||||
82
rust/README.md
Normal file
82
rust/README.md
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# peoplesgrocers-lseq
|
||||
|
||||
Rust implementation of the L-SEQ algorithm for fractional indexing and list CRDTs.
|
||||
|
||||
## Installation
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
peoplesgrocers-lseq = "1.0.0"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
use peoplesgrocers_lseq::{LSEQ, SortKey, compare_lseq};
|
||||
use rand::thread_rng;
|
||||
|
||||
// Create a new L-SEQ instance
|
||||
let mut lseq = LSEQ::new(thread_rng());
|
||||
|
||||
// Allocate identifiers
|
||||
let id1 = lseq.alloc(None, None); // First identifier
|
||||
let id2 = lseq.alloc(Some(&id1), None); // After id1
|
||||
let id3 = lseq.alloc(Some(&id1), Some(&id2)); // Between id1 and id2
|
||||
|
||||
// Sort identifiers
|
||||
let mut ids = vec![id3.clone(), id1.clone(), id2.clone()];
|
||||
ids.sort();
|
||||
println!("{:?}", ids); // [id1, id3, id2] - properly ordered
|
||||
|
||||
// Convert to/from strings
|
||||
let key_str = id1.to_string();
|
||||
let parsed_key: SortKey = key_str.parse().unwrap();
|
||||
assert_eq!(id1, parsed_key);
|
||||
|
||||
// Use with deterministic RNG for testing
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
let rng = StdRng::seed_from_u64(42);
|
||||
let mut deterministic_lseq = LSEQ::new(rng);
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- **Fractional indexing**: Generate identifiers that can be inserted between any two existing ones
|
||||
- **Serialization**: Full support for serde serialization/deserialization
|
||||
- **Ordering**: SortKey implements Ord and can be used directly with Rust's sorting
|
||||
- **String conversion**: Convert to/from strings for storage and transmission
|
||||
- **Even spacing**: Utilities for generating evenly distributed keys for bulk operations
|
||||
|
||||
## API
|
||||
|
||||
### `LSEQ<R: Rng>`
|
||||
|
||||
#### `new(rng: R) -> Self`
|
||||
|
||||
Creates a new L-SEQ instance with the given random number generator.
|
||||
|
||||
#### `alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey`
|
||||
|
||||
Allocates a new identifier between two existing identifiers.
|
||||
|
||||
- `before`: The identifier that should come before the new one (or `None` for beginning)
|
||||
- `after`: The identifier that should come after the new one (or `None` for end)
|
||||
- Returns: A new SortKey that sorts between `before` and `after`
|
||||
|
||||
### `SortKey`
|
||||
|
||||
A sort key that implements `Ord`, `Serialize`, `Deserialize`, and string conversion.
|
||||
|
||||
### `EvenSpacingIterator`
|
||||
|
||||
Utility for generating evenly spaced sort keys for bulk operations.
|
||||
|
||||
## How it works
|
||||
|
||||
L-SEQ generates identifiers using a base-64 alphabet that maintains lexicographic ordering. Each identifier is a sequence of characters from this alphabet, and new identifiers are generated by finding space between existing ones at different depths.
|
||||
|
||||
The algorithm uses alternating allocation strategies (bias toward min or max) at different depths to avoid degenerative cases and maintain good performance characteristics.
|
||||
382
rust/src/lib.rs
Normal file
382
rust/src/lib.rs
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
use rand::Rng;
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{
|
||||
de::{self, Visitor},
|
||||
Deserialize, Serialize,
|
||||
};
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize))]
|
||||
#[cfg_attr(feature = "serde", serde(into = "String"))]
|
||||
pub struct SortKey {
|
||||
numbers: Vec<u8>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl<'de> Deserialize<'de> for SortKey {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
struct SortKeyVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for SortKeyVisitor {
|
||||
type Value = SortKey;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a string containing valid sort key characters")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<SortKey, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
value.parse().map_err(|e| E::custom(e))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(SortKeyVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl SortKey {
|
||||
pub fn from_numbers(numbers: Vec<u8>) -> Self {
|
||||
SortKey { numbers }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SortKey> for Vec<u8> {
|
||||
fn from(key: SortKey) -> Vec<u8> {
|
||||
key.numbers
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SortKey> for String {
|
||||
fn from(key: SortKey) -> String {
|
||||
key.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[u8]> for SortKey {
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
&self.numbers
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for SortKey {
|
||||
fn from(s: String) -> Self {
|
||||
s.parse().unwrap_or_else(|_| SortKey { numbers: vec![0] })
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SortKey {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for &n in &self.numbers {
|
||||
write!(f, "{}", ALPHABET[n as usize] as char)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub struct LSEQ<R: Rng> {
|
||||
strategies: Vec<bool>,
|
||||
rng: R,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<R: Rng> LSEQ<R> {
|
||||
pub fn new(mut rng: R) -> Self {
|
||||
let strategies = vec![rng.gen_bool(0.5)];
|
||||
LSEQ { strategies, rng }
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
|
||||
// Convert to numeric arrays, using boundary values for null
|
||||
let p = before.map_or(vec![0], |s| s.numbers.clone());
|
||||
let q = after.map_or(vec![63], |s| s.numbers.clone());
|
||||
|
||||
// Walk through digits looking for space
|
||||
let mut depth = 0;
|
||||
let mut result = Vec::new();
|
||||
|
||||
loop {
|
||||
let p_val = if depth < p.len() { p[depth] } else { 0 };
|
||||
let q_val = if depth < q.len() { q[depth] } else { 63 };
|
||||
|
||||
let interval = q_val as i32 - p_val as i32;
|
||||
|
||||
// If we have space between values at this depth
|
||||
if interval > 1 {
|
||||
// Pick a value in the available range
|
||||
let range = interval - 1;
|
||||
let add_val = 1 + self.rng.gen_range(0..range) as u8;
|
||||
let new_value = if self.strategies[depth] {
|
||||
p_val + add_val
|
||||
} else {
|
||||
q_val - add_val
|
||||
};
|
||||
|
||||
// Take the prefix from p up to depth and append our new value
|
||||
result.push(new_value);
|
||||
return SortKey::from_numbers(result);
|
||||
}
|
||||
result.push(p_val);
|
||||
|
||||
// If values are the same or adjacent at this depth,
|
||||
// continue to next depth
|
||||
depth += 1;
|
||||
if depth >= self.strategies.len() {
|
||||
self.strategies.push(self.rng.gen_bool(0.5));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SpacingError {
|
||||
TooManyItems,
|
||||
}
|
||||
|
||||
impl fmt::Display for SpacingError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SpacingError::TooManyItems => write!(f, "Too many items to allocate"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SpacingError {}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EvenSpacingIterator {
|
||||
remaining_items: usize,
|
||||
space_size: u64,
|
||||
next_item: u64,
|
||||
step_size_integer: u64, // Integer part of step size
|
||||
step_size_error: f64, // Fractional part of step size
|
||||
error_accumulator: f64, // Accumulated error
|
||||
}
|
||||
|
||||
impl EvenSpacingIterator {
|
||||
// Static table of (64^k - 2) values for k from 1 to 9
|
||||
// We subtract 2 from each space size because we need to reserve two boundary positions:
|
||||
// 1. Position 0 (represented by "-") is reserved as the lower boundary
|
||||
// 2. Position 63 (represented by "z") is reserved as the upper boundary
|
||||
// This ensures we can always insert elements at the very beginning or end of the sequence
|
||||
const USABLE_SPACE: [usize; 9] = [
|
||||
64 - 2, // 64^1 - 2
|
||||
4096 - 2, // 64^2 - 2
|
||||
262144 - 2, // 64^3 - 2
|
||||
16777216 - 2, // 64^4 - 2
|
||||
1073741824 - 2, // 64^5 - 2
|
||||
68719476736 - 2, // 64^6 - 2
|
||||
4398046511104 - 2, // 64^7 - 2
|
||||
281474976710656 - 2, // 64^8 - 2
|
||||
18014398509481984 - 2, // 64^9 - 2
|
||||
];
|
||||
|
||||
pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
|
||||
if total_items == 0 {
|
||||
return Err(SpacingError::TooManyItems);
|
||||
}
|
||||
|
||||
// Find the smallest k where 64^k > total_items using the static table
|
||||
let mut k = 0;
|
||||
let mut space_size = 0;
|
||||
|
||||
for (index, &size) in Self::USABLE_SPACE.iter().enumerate() {
|
||||
if size >= total_items {
|
||||
k = index as u64 + 1; // k is 1-indexed
|
||||
space_size = size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we couldn't find a suitable k, the request is too large
|
||||
if k == 0 {
|
||||
return Err(SpacingError::TooManyItems);
|
||||
}
|
||||
|
||||
// Calculate step size split into integer and fractional parts
|
||||
let step_size = (space_size as f64) / (total_items as f64);
|
||||
let step_size_integer = step_size.floor() as u64;
|
||||
let step_size_error = step_size - step_size_integer as f64;
|
||||
|
||||
Ok((
|
||||
k,
|
||||
EvenSpacingIterator {
|
||||
remaining_items: total_items,
|
||||
space_size: space_size.try_into().unwrap(),
|
||||
next_item: 1,
|
||||
step_size_integer,
|
||||
step_size_error,
|
||||
error_accumulator: 0.0,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
// Helper method to convert a position to a sort key
|
||||
pub fn position_to_key(k: u64, position: u64) -> SortKey {
|
||||
let mut result = Vec::with_capacity(k as usize);
|
||||
let mut pos = position;
|
||||
const BASE: u64 = 64;
|
||||
|
||||
// Fill in digits from least significant to most significant
|
||||
for _ in 0..k {
|
||||
// SAFETY: digit is guaranteed to be in bounds because:
|
||||
// 1. digit = pos % base where base is 64
|
||||
// 2. ALPHABET has exactly 64 elements
|
||||
// Therefore digit as u64 will always be 0-63
|
||||
let digit = (pos % BASE) as u8;
|
||||
pos /= BASE;
|
||||
result.push(digit);
|
||||
}
|
||||
|
||||
// Reverse to get most significant digit first
|
||||
result.reverse();
|
||||
SortKey::from_numbers(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for EvenSpacingIterator {
|
||||
type Item = u64;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.remaining_items == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.next_item > self.space_size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let current_position = self.next_item;
|
||||
self.remaining_items -= 1;
|
||||
|
||||
self.next_item += self.step_size_integer;
|
||||
|
||||
self.error_accumulator += self.step_size_error;
|
||||
if self.error_accumulator >= 1.0 {
|
||||
self.next_item += 1;
|
||||
self.error_accumulator -= 1.0;
|
||||
}
|
||||
|
||||
Some(current_position)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SortKeyParseError {
|
||||
InvalidCharacter(char),
|
||||
}
|
||||
|
||||
impl fmt::Display for SortKeyParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
SortKeyParseError::InvalidCharacter(c) => write!(
|
||||
f,
|
||||
"Invalid character '{}' in sort key. Expected characters from alphabet: {}",
|
||||
c,
|
||||
String::from_utf8_lossy(ALPHABET)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SortKeyParseError {}
|
||||
|
||||
impl FromStr for SortKey {
|
||||
type Err = SortKeyParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let numbers = s
|
||||
.bytes()
|
||||
.map(|b| ALPHABET.iter().position(|&x| x == b).map(|pos| pos as u8))
|
||||
.collect::<Option<Vec<u8>>>()
|
||||
.ok_or_else(|| SortKeyParseError::InvalidCharacter(s.chars().next().unwrap()))?;
|
||||
Ok(SortKey { numbers })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
#[test]
|
||||
fn test_compare_lseq() {
|
||||
let a = "a".parse::<SortKey>().unwrap();
|
||||
let b = "b".parse::<SortKey>().unwrap();
|
||||
assert_eq!(a < b, true);
|
||||
assert_eq!(b < a, false);
|
||||
assert_eq!(a < a, false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lseq_alloc() {
|
||||
let rng = StdRng::seed_from_u64(42); // Deterministic RNG for testing
|
||||
let mut lseq = LSEQ::new(rng);
|
||||
let id1 = lseq.alloc(None, None);
|
||||
let id2 = lseq.alloc(Some(&id1), None);
|
||||
let id3 = lseq.alloc(Some(&id1), Some(&id2));
|
||||
|
||||
assert!(id1 < id2);
|
||||
assert!(id1 < id3);
|
||||
assert!(id3 < id2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_position_to_key() {
|
||||
const K: u64 = 2;
|
||||
assert_eq!(EvenSpacingIterator::position_to_key(K, 1).to_string(), "-0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_even_spacing_4093() {
|
||||
let (k, mut iter) = EvenSpacingIterator::new(4093).unwrap();
|
||||
assert_eq!(k, 2);
|
||||
let mut positions = Vec::new();
|
||||
for pos in iter.by_ref() {
|
||||
// Use by_ref() to borrow instead of consume
|
||||
positions.push(pos);
|
||||
}
|
||||
|
||||
// Print all generated sort keys
|
||||
//println!("\nGenerated sort keys for 62 positions:");
|
||||
//for (i, pos) in positions.iter().enumerate() {
|
||||
// let key = EvenSpacingIterator::position_to_key(k, *pos);
|
||||
// println!("Position {}: {} (numeric: {})", i, key, pos);
|
||||
//}
|
||||
println!("{:?}", iter);
|
||||
|
||||
assert_eq!(positions.len(), 4093);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_even_spacing_6() {
|
||||
let (k, mut iter) = EvenSpacingIterator::new(6).unwrap();
|
||||
eprintln!("Created iterator with k={}", k);
|
||||
let mut positions = Vec::new();
|
||||
let mut count = 0;
|
||||
while let Some(pos) = iter.next() {
|
||||
count += 1;
|
||||
eprintln!("Iteration {}: Got position {}", count, pos);
|
||||
positions.push(pos);
|
||||
}
|
||||
eprintln!("Final iterator state: {:?}", iter);
|
||||
assert_eq!(
|
||||
positions.len(),
|
||||
6,
|
||||
"Expected 6 positions, got {}",
|
||||
positions.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue