feat: conformance tests pass for first time

Change implementation to exponentially increase search space at each
level.
This commit is contained in:
nobody 2025-12-12 21:05:29 -08:00
commit 546d6deb69
Signed by: GrocerPublishAgent
GPG key ID: D460CD54A9E3AB86
13 changed files with 1852 additions and 102 deletions

View file

@ -10,11 +10,39 @@ use std::str::FromStr;
const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
/// Minimal RNG trait for LSEQ - only the methods we actually use.
/// This allows custom implementations (e.g., recording wrappers) without
/// implementing the full Rng trait.
pub trait LseqRng {
fn gen_bool(&mut self, p: f64) -> bool;
fn gen_range(&mut self, range: std::ops::Range<u64>) -> u64;
}
/// Blanket implementation for anything that implements rand::Rng
impl<R: Rng> LseqRng for R {
fn gen_bool(&mut self, p: f64) -> bool {
Rng::gen_bool(self, p)
}
fn gen_range(&mut self, range: std::ops::Range<u64>) -> u64 {
Rng::gen_range(self, range)
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(Serialize))]
#[cfg_attr(feature = "serde", serde(into = "String"))]
pub struct SortKey {
numbers: Vec<u8>,
/// Each element is a value at one level of the LSEQ tree.
/// Level n (0-indexed) holds values 0 to 64^(n+1) - 1, capped at 64^8.
///
/// String encoding length grows as triangular numbers only up to depth 8:
/// Depths 1-8: 1, 3, 6, 10, 15, 21, 28, 36 chars (triangular)
/// Depths 9+: 44, 52, 60, 68, ... chars (+8 per level, linear)
///
/// We cap at 64^8 = 2^48 per level for JavaScript float compatibility (2^53 max).
/// But we can still keep going deeper: even at 8 chars per level, the total
/// address space is (2^48)^depth which remains astronomically large.
numbers: Vec<u64>,
}
#[cfg(feature = "serde")]
@ -44,14 +72,34 @@ impl<'de> Deserialize<'de> for SortKey {
}
}
/// Maximum exponent for level values, capped for JavaScript compatibility.
/// JavaScript numbers are IEEE 754 floats with 53 bits of precision.
/// 64^8 = 2^48, which is safely within 2^53.
const MAX_LEVEL_EXPONENT: u32 = 8;
impl SortKey {
pub fn from_numbers(numbers: Vec<u8>) -> Self {
pub fn from_numbers(numbers: Vec<u64>) -> Self {
SortKey { numbers }
}
/// Returns the maximum value for a given level (0-indexed).
/// Level 0: 64^1 - 1 = 63
/// Level 1: 64^2 - 1 = 4095
/// ...
/// Level 7+: 64^8 - 1 (capped for JS compatibility)
fn max_value_for_level(level: usize) -> u64 {
let exp = (level as u32 + 1).min(MAX_LEVEL_EXPONENT);
64u64.pow(exp) - 1
}
/// Returns the number of characters needed to encode a value at this level.
fn chars_for_level(level: usize) -> usize {
(level + 1).min(MAX_LEVEL_EXPONENT as usize)
}
}
impl From<SortKey> for Vec<u8> {
fn from(key: SortKey) -> Vec<u8> {
impl From<SortKey> for Vec<u64> {
fn from(key: SortKey) -> Vec<u64> {
key.numbers
}
}
@ -62,8 +110,8 @@ impl From<SortKey> for String {
}
}
impl AsRef<[u8]> for SortKey {
fn as_ref(&self) -> &[u8] {
impl AsRef<[u64]> for SortKey {
fn as_ref(&self) -> &[u64] {
&self.numbers
}
}
@ -76,8 +124,21 @@ impl From<String> for SortKey {
impl fmt::Display for SortKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for &n in &self.numbers {
write!(f, "{}", ALPHABET[n as usize] as char)?;
for (level, &value) in self.numbers.iter().enumerate() {
let num_chars = Self::chars_for_level(level);
let mut chars = Vec::with_capacity(num_chars);
let mut v = value;
// Extract digits from least significant to most significant
for _ in 0..num_chars {
chars.push(ALPHABET[(v % 64) as usize] as char);
v /= 64;
}
// Write in reverse (most significant first)
for c in chars.into_iter().rev() {
write!(f, "{}", c)?;
}
}
Ok(())
}
@ -85,52 +146,106 @@ impl fmt::Display for SortKey {
#[allow(dead_code)]
#[derive(Debug)]
pub struct LSEQ<R: Rng> {
pub struct LSEQ<R: LseqRng> {
strategies: Vec<bool>,
rng: R,
}
#[allow(dead_code)]
impl<R: Rng> LSEQ<R> {
impl<R: LseqRng> LSEQ<R> {
pub fn new(mut rng: R) -> Self {
let strategies = vec![rng.gen_bool(0.5)];
LSEQ { strategies, rng }
}
pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
// Convert to numeric arrays, using boundary values for null
let p = before.map_or(vec![0], |s| s.numbers.clone());
let q = after.map_or(vec![63], |s| s.numbers.clone());
/// Consume the LSEQ and return the inner RNG
pub fn take_rng(self) -> R {
self.rng
}
/// Allocate a new sort key between `before` and `after`.
///
/// # The invariant
///
/// This function guarantees you can always allocate a key that sorts before
/// any previously allocated key. This is essential for CRDT list insertion.
///
/// # The encoding
///
/// Keys use a 64-character alphabet where `'-'` is 0 and `'z'` is 63, chosen
/// so that `strcmp` matches numeric comparison. Keys are paths through an
/// LSEQ tree where each level has exponentially more space:
///
/// ```text
/// Level 1: 64¹ values → 1 char "-", "0", ..., "z"
/// Level 2: 64² values → 2 chars "--", "-0", ..., "zz"
/// Level 3: 64³ values → 3 chars "---", "--0", ..., "zzz"
/// ```
///
/// A path is encoded by concatenating each level's representation:
///
/// ```text
/// [0] = ["-"] = "-" (1 char)
/// [0, 1] = ["-", "-0"] = "--0" (1 + 2 = 3 chars)
/// [0, 0] = ["-", "--"] = "---" (1 + 2 = 3 chars)
/// [0, 0, 1] = ["-", "--", "--0"] = "-----0" (1 + 2 + 3 = 6 chars)
/// ```
///
/// # Why we go deeper than the LSEQ paper
///
/// With `strcmp`, `"-"` == `"---"` == `"------"` in a crucial sense: nothing
/// can sort before any of them. All-zeros at any depth is "negative infinity".
///
/// The LSEQ paper says: to insert before `[0, 1]` (= `"--0"`), use `[0, 0]`.
/// But `[0, 0]` = `"---"`, and nothing can ever sort before that!
///
/// This implementation goes one level deeper to preserve the invariant:
///
/// ```text
/// Insert before "--0" (i.e., [0, 1])?
/// Paper says: use [0, 0] = "---" → dead end
/// We say: use [0, 0, X] = "---" + X → can still prepend [0, 0, Y] where Y < X
/// ```
///
/// The cost is longer keys, but we guarantee indefinite prepending.
pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
let p = before.map_or(vec![], |s| s.numbers.clone());
let q = after.map_or(vec![], |s| s.numbers.clone());
// Walk through digits looking for space
let mut depth = 0;
let mut result = Vec::new();
let mut result: Vec<u64> = Vec::new();
loop {
let p_val = if depth < p.len() { p[depth] } else { 0 };
let q_val = if depth < q.len() { q[depth] } else { 63 };
let p_val = p.get(depth).copied().unwrap_or(0);
let q_upper = q.get(depth).copied();
let level_max = SortKey::max_value_for_level(depth);
let interval = q_val as i32 - p_val as i32;
// Minimum allocatable (inclusive): one above the lower bound.
// This naturally reserves value 0 when p_val=0, ensuring we never
// allocate an all-zeros key. If we allocate [0, 1] and later need
// to prepend before it, we simply go deeper to get [0, 0, X].
let min_alloc = p_val + 1;
// If we have space between values at this depth
if interval > 1 {
// Pick a value in the available range
let range = interval - 1;
let add_val = 1 + self.rng.gen_range(0..range) as u8;
// Maximum allocatable (inclusive):
// - With upper bound: one below it
// - Without upper bound (after=None): full range for this level
let max_alloc = q_upper.map_or(level_max, |q| q.saturating_sub(1));
if min_alloc <= max_alloc {
let range = max_alloc - min_alloc + 1;
let offset = self.rng.gen_range(0..range);
let new_value = if self.strategies[depth] {
p_val + add_val
min_alloc + offset
} else {
q_val - add_val
max_alloc - offset
};
// Take the prefix from p up to depth and append our new value
result.push(new_value);
return SortKey::from_numbers(result);
}
// Descend to next level
result.push(p_val);
// If values are the same or adjacent at this depth,
// continue to next depth
depth += 1;
if depth >= self.strategies.len() {
self.strategies.push(self.rng.gen_bool(0.5));
@ -165,21 +280,21 @@ pub struct EvenSpacingIterator {
}
impl EvenSpacingIterator {
// Static table of (64^k - 2) values for k from 1 to 9
// We subtract 2 from each space size because we need to reserve two boundary positions:
// 1. Position 0 (represented by "-") is reserved as the lower boundary
// 2. Position 63 (represented by "z") is reserved as the upper boundary
// This ensures we can always insert elements at the very beginning or end of the sequence
const USABLE_SPACE: [usize; 9] = [
64 - 2, // 64^1 - 2
4096 - 2, // 64^2 - 2
262144 - 2, // 64^3 - 2
16777216 - 2, // 64^4 - 2
1073741824 - 2, // 64^5 - 2
68719476736 - 2, // 64^6 - 2
4398046511104 - 2, // 64^7 - 2
281474976710656 - 2, // 64^8 - 2
18014398509481984 - 2, // 64^9 - 2
// Static table of (64^k - 1) values for k from 1 to 8
// We subtract 1 because we reserve only the lower boundary (position 0, all "-"s).
// Position 0 cannot be used because nothing can be lexicographically less than it.
// The upper boundary (all "z"s) IS usable because we can always insert after it
// by extending: "zzz" < "zzza" lexicographically (prefix comparison).
// Capped at 64^8 = 2^48 for JavaScript number compatibility (max safe: 2^53).
const USABLE_SPACE: [usize; 8] = [
64 - 1, // 64^1 - 1
4096 - 1, // 64^2 - 1
262144 - 1, // 64^3 - 1
16777216 - 1, // 64^4 - 1
1073741824 - 1, // 64^5 - 1
68719476736 - 1, // 64^6 - 1
4398046511104 - 1, // 64^7 - 1
281474976710656 - 1, // 64^8 - 1
];
pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
@ -222,25 +337,25 @@ impl EvenSpacingIterator {
))
}
// Helper method to convert a position to a sort key
/// Convert a position within a level-k space to a SortKey.
///
/// Creates a k-level key where levels 0 through k-2 are 0, and level k-1
/// contains the position value.
///
/// Example: position_to_key(2, 1) = [0, 1] which displays as "--0"
pub fn position_to_key(k: u64, position: u64) -> SortKey {
let mut result = Vec::with_capacity(k as usize);
let mut pos = position;
const BASE: u64 = 64;
// Fill in digits from least significant to most significant
for _ in 0..k {
// SAFETY: digit is guaranteed to be in bounds because:
// 1. digit = pos % base where base is 64
// 2. ALPHABET has exactly 64 elements
// Therefore digit as u64 will always be 0-63
let digit = (pos % BASE) as u8;
pos /= BASE;
result.push(digit);
// Levels 0 through k-2 are 0
for _ in 0..k.saturating_sub(1) {
result.push(0);
}
// Level k-1 contains the position
if k > 0 {
result.push(position);
}
// Reverse to get most significant digit first
result.reverse();
SortKey::from_numbers(result)
}
}
@ -275,6 +390,7 @@ impl Iterator for EvenSpacingIterator {
#[derive(Debug)]
pub enum SortKeyParseError {
InvalidCharacter(char),
InvalidLength(usize),
}
impl fmt::Display for SortKeyParseError {
@ -286,6 +402,11 @@ impl fmt::Display for SortKeyParseError {
c,
String::from_utf8_lossy(ALPHABET)
),
SortKeyParseError::InvalidLength(len) => write!(
f,
"Invalid sort key length {}. Expected triangular number up to 36 (1, 3, 6, 10, 15, 21, 28, 36), then +8 per level (44, 52, 60, ...)",
len
),
}
}
}
@ -296,11 +417,33 @@ impl FromStr for SortKey {
type Err = SortKeyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let numbers = s
.bytes()
.map(|b| ALPHABET.iter().position(|&x| x == b).map(|pos| pos as u8))
.collect::<Option<Vec<u8>>>()
.ok_or_else(|| SortKeyParseError::InvalidCharacter(s.chars().next().unwrap()))?;
let bytes = s.as_bytes();
let mut numbers = Vec::new();
let mut pos = 0;
let mut level = 0;
while pos < bytes.len() {
let num_chars = SortKey::chars_for_level(level);
if pos + num_chars > bytes.len() {
return Err(SortKeyParseError::InvalidLength(bytes.len()));
}
// Parse num_chars characters as a base-64 number
let mut value: u64 = 0;
for i in 0..num_chars {
let b = bytes[pos + i];
let digit = ALPHABET
.iter()
.position(|&x| x == b)
.ok_or(SortKeyParseError::InvalidCharacter(b as char))?;
value = value * 64 + digit as u64;
}
numbers.push(value);
pos += num_chars;
level += 1;
}
Ok(SortKey { numbers })
}
}
@ -311,13 +454,20 @@ mod tests {
use rand::rngs::StdRng;
use rand::SeedableRng;
/// Helper to create a SortKey from a slice of numbers
fn key(nums: &[u64]) -> SortKey {
SortKey::from_numbers(nums.to_vec())
}
#[test]
fn test_compare_lseq() {
let a = "a".parse::<SortKey>().unwrap();
let b = "b".parse::<SortKey>().unwrap();
assert_eq!(a < b, true);
assert_eq!(b < a, false);
assert_eq!(a < a, false);
// Single-character keys are level-0 values (0-63)
// "a" is position 38 in alphabet, "b" is 39
let a = "-".parse::<SortKey>().unwrap(); // value 0
let b = "0".parse::<SortKey>().unwrap(); // value 1
assert!(a < b);
assert!(!(b < a));
assert!(!(a < a));
}
#[test]
@ -335,8 +485,28 @@ mod tests {
#[test]
fn test_position_to_key() {
// k=2 means 2 levels: level 0 (1 char) + level 1 (2 chars) = 3 chars total
// position 1 goes into level 1, with level 0 = 0
// [0, 1] = "-" + "-0" = "--0"
const K: u64 = 2;
assert_eq!(EvenSpacingIterator::position_to_key(K, 1).to_string(), "-0");
assert_eq!(
EvenSpacingIterator::position_to_key(K, 1).to_string(),
"--0"
);
// k=1 means just level 0 (1 char)
// position 1 = "0" (alphabet[1])
assert_eq!(
EvenSpacingIterator::position_to_key(1, 1).to_string(),
"0"
);
// k=2, position 4095 (max for level 1 = 64² - 1)
// [0, 4095] = "-" + "zz" = "-zz"
assert_eq!(
EvenSpacingIterator::position_to_key(2, 4095).to_string(),
"-zz"
);
}
#[test]
@ -379,4 +549,85 @@ mod tests {
positions.len()
);
}
/// Test the "go deeper" strategy for prepending before left-edge keys
#[test]
fn test_prepend_before_left_edge() {
let rng = StdRng::seed_from_u64(123);
let mut lseq = LSEQ::new(rng);
// Prepend before [0, 1] -> should get [0, 0, X]
let target = key(&[0, 1]);
let result = lseq.alloc(None, Some(&target));
assert!(result < target);
assert_eq!(result.numbers.len(), 3);
assert_eq!(result.numbers[0], 0);
assert_eq!(result.numbers[1], 0);
// Prepend before [0, 0, 1] -> should get [0, 0, 0, X]
let target = key(&[0, 0, 1]);
let result = lseq.alloc(None, Some(&target));
assert!(result < target);
assert_eq!(result.numbers.len(), 4);
// Prepend before [0, 0, 0, 1] -> should get [0, 0, 0, 0, X]
let target = key(&[0, 0, 0, 1]);
let result = lseq.alloc(None, Some(&target));
assert!(result < target);
assert_eq!(result.numbers.len(), 5);
}
/// Verify the ordering: [0, 0, X] < [0, 1] for any X
#[test]
fn test_left_edge_ordering() {
assert!(key(&[0, 0, 63]) < key(&[0, 1]));
assert!(key(&[0, 0, 1]) < key(&[0, 1]));
assert!(key(&[0, 0, 0, 63]) < key(&[0, 0, 1]));
}
/// Verify roundtrip: display -> parse -> display gives same result
#[test]
fn test_roundtrip_encoding() {
let cases = vec![
key(&[0]), // "-"
key(&[63]), // "z"
key(&[0, 0]), // "---"
key(&[0, 1]), // "--0"
key(&[0, 4095]), // "-zz"
key(&[1, 0]), // "0--"
key(&[0, 0, 0]), // "------"
key(&[0, 0, 1]), // "-----0"
key(&[0, 0, 262143]), // "---zzz"
];
for original in cases {
let s = original.to_string();
let parsed: SortKey = s.parse().expect(&format!("Failed to parse '{}'", s));
assert_eq!(
original.numbers, parsed.numbers,
"Roundtrip failed for {:?} -> '{}' -> {:?}",
original.numbers, s, parsed.numbers
);
}
}
/// Verify string encoding matches expected format
#[test]
fn test_string_encoding() {
// Level 0 only (1 char)
assert_eq!(key(&[0]).to_string(), "-");
assert_eq!(key(&[1]).to_string(), "0");
assert_eq!(key(&[63]).to_string(), "z");
// Level 0 + Level 1 (1 + 2 = 3 chars)
assert_eq!(key(&[0, 0]).to_string(), "---");
assert_eq!(key(&[0, 1]).to_string(), "--0");
assert_eq!(key(&[0, 64]).to_string(), "-0-"); // 64 = 1*64 + 0
assert_eq!(key(&[0, 4095]).to_string(), "-zz");
// Level 0 + Level 1 + Level 2 (1 + 2 + 3 = 6 chars)
assert_eq!(key(&[0, 0, 0]).to_string(), "------");
assert_eq!(key(&[0, 0, 1]).to_string(), "-----0");
assert_eq!(key(&[0, 0, 262143]).to_string(), "---zzz");
}
}