feat: experiment with different implementations of LSEQ
This commit is contained in:
commit
1e45ef9314
23 changed files with 3578 additions and 0 deletions
382
rust/src/lib.rs
Normal file
382
rust/src/lib.rs
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
use rand::Rng;
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{
|
||||
de::{self, Visitor},
|
||||
Deserialize, Serialize,
|
||||
};
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize))]
|
||||
#[cfg_attr(feature = "serde", serde(into = "String"))]
|
||||
pub struct SortKey {
|
||||
numbers: Vec<u8>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl<'de> Deserialize<'de> for SortKey {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
struct SortKeyVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for SortKeyVisitor {
|
||||
type Value = SortKey;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a string containing valid sort key characters")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<SortKey, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
value.parse().map_err(|e| E::custom(e))
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_str(SortKeyVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
impl SortKey {
|
||||
pub fn from_numbers(numbers: Vec<u8>) -> Self {
|
||||
SortKey { numbers }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SortKey> for Vec<u8> {
|
||||
fn from(key: SortKey) -> Vec<u8> {
|
||||
key.numbers
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SortKey> for String {
|
||||
fn from(key: SortKey) -> String {
|
||||
key.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[u8]> for SortKey {
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
&self.numbers
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for SortKey {
|
||||
fn from(s: String) -> Self {
|
||||
s.parse().unwrap_or_else(|_| SortKey { numbers: vec![0] })
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SortKey {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for &n in &self.numbers {
|
||||
write!(f, "{}", ALPHABET[n as usize] as char)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub struct LSEQ<R: Rng> {
|
||||
strategies: Vec<bool>,
|
||||
rng: R,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<R: Rng> LSEQ<R> {
|
||||
pub fn new(mut rng: R) -> Self {
|
||||
let strategies = vec![rng.gen_bool(0.5)];
|
||||
LSEQ { strategies, rng }
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
|
||||
// Convert to numeric arrays, using boundary values for null
|
||||
let p = before.map_or(vec![0], |s| s.numbers.clone());
|
||||
let q = after.map_or(vec![63], |s| s.numbers.clone());
|
||||
|
||||
// Walk through digits looking for space
|
||||
let mut depth = 0;
|
||||
let mut result = Vec::new();
|
||||
|
||||
loop {
|
||||
let p_val = if depth < p.len() { p[depth] } else { 0 };
|
||||
let q_val = if depth < q.len() { q[depth] } else { 63 };
|
||||
|
||||
let interval = q_val as i32 - p_val as i32;
|
||||
|
||||
// If we have space between values at this depth
|
||||
if interval > 1 {
|
||||
// Pick a value in the available range
|
||||
let range = interval - 1;
|
||||
let add_val = 1 + self.rng.gen_range(0..range) as u8;
|
||||
let new_value = if self.strategies[depth] {
|
||||
p_val + add_val
|
||||
} else {
|
||||
q_val - add_val
|
||||
};
|
||||
|
||||
// Take the prefix from p up to depth and append our new value
|
||||
result.push(new_value);
|
||||
return SortKey::from_numbers(result);
|
||||
}
|
||||
result.push(p_val);
|
||||
|
||||
// If values are the same or adjacent at this depth,
|
||||
// continue to next depth
|
||||
depth += 1;
|
||||
if depth >= self.strategies.len() {
|
||||
self.strategies.push(self.rng.gen_bool(0.5));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SpacingError {
|
||||
TooManyItems,
|
||||
}
|
||||
|
||||
impl fmt::Display for SpacingError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SpacingError::TooManyItems => write!(f, "Too many items to allocate"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SpacingError {}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EvenSpacingIterator {
|
||||
remaining_items: usize,
|
||||
space_size: u64,
|
||||
next_item: u64,
|
||||
step_size_integer: u64, // Integer part of step size
|
||||
step_size_error: f64, // Fractional part of step size
|
||||
error_accumulator: f64, // Accumulated error
|
||||
}
|
||||
|
||||
impl EvenSpacingIterator {
|
||||
// Static table of (64^k - 2) values for k from 1 to 9
|
||||
// We subtract 2 from each space size because we need to reserve two boundary positions:
|
||||
// 1. Position 0 (represented by "-") is reserved as the lower boundary
|
||||
// 2. Position 63 (represented by "z") is reserved as the upper boundary
|
||||
// This ensures we can always insert elements at the very beginning or end of the sequence
|
||||
const USABLE_SPACE: [usize; 9] = [
|
||||
64 - 2, // 64^1 - 2
|
||||
4096 - 2, // 64^2 - 2
|
||||
262144 - 2, // 64^3 - 2
|
||||
16777216 - 2, // 64^4 - 2
|
||||
1073741824 - 2, // 64^5 - 2
|
||||
68719476736 - 2, // 64^6 - 2
|
||||
4398046511104 - 2, // 64^7 - 2
|
||||
281474976710656 - 2, // 64^8 - 2
|
||||
18014398509481984 - 2, // 64^9 - 2
|
||||
];
|
||||
|
||||
pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
|
||||
if total_items == 0 {
|
||||
return Err(SpacingError::TooManyItems);
|
||||
}
|
||||
|
||||
// Find the smallest k where 64^k > total_items using the static table
|
||||
let mut k = 0;
|
||||
let mut space_size = 0;
|
||||
|
||||
for (index, &size) in Self::USABLE_SPACE.iter().enumerate() {
|
||||
if size >= total_items {
|
||||
k = index as u64 + 1; // k is 1-indexed
|
||||
space_size = size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we couldn't find a suitable k, the request is too large
|
||||
if k == 0 {
|
||||
return Err(SpacingError::TooManyItems);
|
||||
}
|
||||
|
||||
// Calculate step size split into integer and fractional parts
|
||||
let step_size = (space_size as f64) / (total_items as f64);
|
||||
let step_size_integer = step_size.floor() as u64;
|
||||
let step_size_error = step_size - step_size_integer as f64;
|
||||
|
||||
Ok((
|
||||
k,
|
||||
EvenSpacingIterator {
|
||||
remaining_items: total_items,
|
||||
space_size: space_size.try_into().unwrap(),
|
||||
next_item: 1,
|
||||
step_size_integer,
|
||||
step_size_error,
|
||||
error_accumulator: 0.0,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
// Helper method to convert a position to a sort key
|
||||
pub fn position_to_key(k: u64, position: u64) -> SortKey {
|
||||
let mut result = Vec::with_capacity(k as usize);
|
||||
let mut pos = position;
|
||||
const BASE: u64 = 64;
|
||||
|
||||
// Fill in digits from least significant to most significant
|
||||
for _ in 0..k {
|
||||
// SAFETY: digit is guaranteed to be in bounds because:
|
||||
// 1. digit = pos % base where base is 64
|
||||
// 2. ALPHABET has exactly 64 elements
|
||||
// Therefore digit as u64 will always be 0-63
|
||||
let digit = (pos % BASE) as u8;
|
||||
pos /= BASE;
|
||||
result.push(digit);
|
||||
}
|
||||
|
||||
// Reverse to get most significant digit first
|
||||
result.reverse();
|
||||
SortKey::from_numbers(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for EvenSpacingIterator {
|
||||
type Item = u64;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.remaining_items == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.next_item > self.space_size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let current_position = self.next_item;
|
||||
self.remaining_items -= 1;
|
||||
|
||||
self.next_item += self.step_size_integer;
|
||||
|
||||
self.error_accumulator += self.step_size_error;
|
||||
if self.error_accumulator >= 1.0 {
|
||||
self.next_item += 1;
|
||||
self.error_accumulator -= 1.0;
|
||||
}
|
||||
|
||||
Some(current_position)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SortKeyParseError {
|
||||
InvalidCharacter(char),
|
||||
}
|
||||
|
||||
impl fmt::Display for SortKeyParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
SortKeyParseError::InvalidCharacter(c) => write!(
|
||||
f,
|
||||
"Invalid character '{}' in sort key. Expected characters from alphabet: {}",
|
||||
c,
|
||||
String::from_utf8_lossy(ALPHABET)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SortKeyParseError {}
|
||||
|
||||
impl FromStr for SortKey {
|
||||
type Err = SortKeyParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let numbers = s
|
||||
.bytes()
|
||||
.map(|b| ALPHABET.iter().position(|&x| x == b).map(|pos| pos as u8))
|
||||
.collect::<Option<Vec<u8>>>()
|
||||
.ok_or_else(|| SortKeyParseError::InvalidCharacter(s.chars().next().unwrap()))?;
|
||||
Ok(SortKey { numbers })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
#[test]
|
||||
fn test_compare_lseq() {
|
||||
let a = "a".parse::<SortKey>().unwrap();
|
||||
let b = "b".parse::<SortKey>().unwrap();
|
||||
assert_eq!(a < b, true);
|
||||
assert_eq!(b < a, false);
|
||||
assert_eq!(a < a, false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lseq_alloc() {
|
||||
let rng = StdRng::seed_from_u64(42); // Deterministic RNG for testing
|
||||
let mut lseq = LSEQ::new(rng);
|
||||
let id1 = lseq.alloc(None, None);
|
||||
let id2 = lseq.alloc(Some(&id1), None);
|
||||
let id3 = lseq.alloc(Some(&id1), Some(&id2));
|
||||
|
||||
assert!(id1 < id2);
|
||||
assert!(id1 < id3);
|
||||
assert!(id3 < id2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_position_to_key() {
|
||||
const K: u64 = 2;
|
||||
assert_eq!(EvenSpacingIterator::position_to_key(K, 1).to_string(), "-0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_even_spacing_4093() {
|
||||
let (k, mut iter) = EvenSpacingIterator::new(4093).unwrap();
|
||||
assert_eq!(k, 2);
|
||||
let mut positions = Vec::new();
|
||||
for pos in iter.by_ref() {
|
||||
// Use by_ref() to borrow instead of consume
|
||||
positions.push(pos);
|
||||
}
|
||||
|
||||
// Print all generated sort keys
|
||||
//println!("\nGenerated sort keys for 62 positions:");
|
||||
//for (i, pos) in positions.iter().enumerate() {
|
||||
// let key = EvenSpacingIterator::position_to_key(k, *pos);
|
||||
// println!("Position {}: {} (numeric: {})", i, key, pos);
|
||||
//}
|
||||
println!("{:?}", iter);
|
||||
|
||||
assert_eq!(positions.len(), 4093);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_even_spacing_6() {
|
||||
let (k, mut iter) = EvenSpacingIterator::new(6).unwrap();
|
||||
eprintln!("Created iterator with k={}", k);
|
||||
let mut positions = Vec::new();
|
||||
let mut count = 0;
|
||||
while let Some(pos) = iter.next() {
|
||||
count += 1;
|
||||
eprintln!("Iteration {}: Got position {}", count, pos);
|
||||
positions.push(pos);
|
||||
}
|
||||
eprintln!("Final iterator state: {:?}", iter);
|
||||
assert_eq!(
|
||||
positions.len(),
|
||||
6,
|
||||
"Expected 6 positions, got {}",
|
||||
positions.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue