feat: transparently append to compressed archives
When appending to a compressed archive (gzip, brotli, zstd), the tool now handles compression automatically. Since some compression formats don't support appending to compressed files in place, we write a new compressed file with all the data and atomically rename it to replace the original (assuming there is enough space on that filesystem). This means you can work with compressed archives the same way as uncompressed ones. Point the tool at your .json.gz file and append values. No manual decompression/recompression needed.
This commit is contained in:
parent
da0fed29de
commit
2ab1c31993
34 changed files with 4747 additions and 1099 deletions
116
fuzz/Cargo.lock
generated
116
fuzz/Cargo.lock
generated
|
|
@ -2,6 +2,27 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
version = "2.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
|
||||
|
||||
[[package]]
|
||||
name = "alloc-stdlib"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
|
|
@ -32,6 +53,27 @@ version = "2.9.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "8.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
"brotli-decompressor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli-decompressor"
|
||||
version = "5.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.19.0"
|
||||
|
|
@ -76,6 +118,15 @@ version = "0.8.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.4.2"
|
||||
|
|
@ -109,6 +160,16 @@ version = "0.1.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.3"
|
||||
|
|
@ -173,13 +234,16 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "json-archive"
|
||||
version = "0.1.0"
|
||||
version = "0.99.0"
|
||||
dependencies = [
|
||||
"brotli",
|
||||
"chrono",
|
||||
"flate2",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"uuid",
|
||||
"xflags",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -227,6 +291,16 @@ version = "2.7.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
|
|
@ -242,6 +316,12 @@ version = "1.21.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.101"
|
||||
|
|
@ -340,6 +420,12 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "simd-adler32"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.106"
|
||||
|
|
@ -547,3 +633,31 @@ name = "xflags-macros"
|
|||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "7.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
||||
dependencies = [
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.16+zstd.1.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -36,3 +36,10 @@ path = "fuzz_targets/fuzz_mutations.rs"
|
|||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_apply_move"
|
||||
path = "fuzz_targets/fuzz_apply_move.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
|
|
|||
186
fuzz/fuzz_targets/fuzz_apply_move.rs
Normal file
186
fuzz/fuzz_targets/fuzz_apply_move.rs
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
#![no_main]
|
||||
|
||||
use arbitrary::{Arbitrary, Unstructured};
|
||||
use json_archive::apply_move;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
#[derive(Arbitrary, Debug)]
|
||||
struct FuzzMoveInput {
|
||||
structure: FuzzStructure,
|
||||
moves: Vec<(u8, u8)>,
|
||||
}
|
||||
|
||||
#[derive(Arbitrary, Debug)]
|
||||
enum FuzzStructure {
|
||||
// Direct array at root path
|
||||
RootArray(Vec<FuzzValue>),
|
||||
// Object with array field
|
||||
ObjectWithArray {
|
||||
field_name: String,
|
||||
array: Vec<FuzzValue>,
|
||||
},
|
||||
// Nested object with array
|
||||
NestedArray {
|
||||
outer_field: String,
|
||||
inner_field: String,
|
||||
array: Vec<FuzzValue>,
|
||||
},
|
||||
// Non-array value (should error)
|
||||
NonArray(FuzzValue),
|
||||
}
|
||||
|
||||
#[derive(Arbitrary, Debug, Clone)]
|
||||
enum FuzzValue {
|
||||
Null,
|
||||
Bool(bool),
|
||||
SmallInt(i8),
|
||||
String(String),
|
||||
// Limit recursion depth
|
||||
Array(Vec<SimpleValue>),
|
||||
Object(Vec<(String, SimpleValue)>),
|
||||
}
|
||||
|
||||
#[derive(Arbitrary, Debug, Clone)]
|
||||
enum SimpleValue {
|
||||
Null,
|
||||
Bool(bool),
|
||||
SmallInt(i8),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl SimpleValue {
|
||||
fn to_json(&self) -> Value {
|
||||
match self {
|
||||
SimpleValue::Null => Value::Null,
|
||||
SimpleValue::Bool(b) => Value::Bool(*b),
|
||||
SimpleValue::SmallInt(n) => json!(n),
|
||||
SimpleValue::String(s) => Value::String(s.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FuzzValue {
|
||||
fn to_json(&self) -> Value {
|
||||
match self {
|
||||
FuzzValue::Null => Value::Null,
|
||||
FuzzValue::Bool(b) => Value::Bool(*b),
|
||||
FuzzValue::SmallInt(n) => json!(n),
|
||||
FuzzValue::String(s) => Value::String(s.clone()),
|
||||
FuzzValue::Array(arr) => Value::Array(arr.iter().map(|v| v.to_json()).collect()),
|
||||
FuzzValue::Object(obj) => {
|
||||
let map: serde_json::Map<String, Value> =
|
||||
obj.iter().map(|(k, v)| (k.clone(), v.to_json())).collect();
|
||||
Value::Object(map)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FuzzStructure {
|
||||
fn to_json_and_path(&self) -> (Value, String) {
|
||||
match self {
|
||||
FuzzStructure::RootArray(arr) => {
|
||||
let json_arr = Value::Array(arr.iter().map(|v| v.to_json()).collect());
|
||||
(json!({"root": json_arr}), "/root".to_string())
|
||||
}
|
||||
FuzzStructure::ObjectWithArray { field_name, array } => {
|
||||
let json_arr = Value::Array(array.iter().map(|v| v.to_json()).collect());
|
||||
let path = format!("/{}", escape_json_pointer(field_name));
|
||||
(json!({ field_name.clone(): json_arr }), path)
|
||||
}
|
||||
FuzzStructure::NestedArray {
|
||||
outer_field,
|
||||
inner_field,
|
||||
array,
|
||||
} => {
|
||||
let json_arr = Value::Array(array.iter().map(|v| v.to_json()).collect());
|
||||
let path = format!(
|
||||
"/{}/{}",
|
||||
escape_json_pointer(outer_field),
|
||||
escape_json_pointer(inner_field)
|
||||
);
|
||||
(
|
||||
json!({ outer_field.clone(): { inner_field.clone(): json_arr } }),
|
||||
path,
|
||||
)
|
||||
}
|
||||
FuzzStructure::NonArray(val) => {
|
||||
(json!({"value": val.to_json()}), "/value".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_json_pointer(s: &str) -> String {
|
||||
s.replace('~', "~0").replace('/', "~1")
|
||||
}
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let mut u = Unstructured::new(data);
|
||||
if let Ok(input) = FuzzMoveInput::arbitrary(&mut u) {
|
||||
let (mut state, path) = input.structure.to_json_and_path();
|
||||
let original_state = state.clone();
|
||||
|
||||
// Get actual array from original state to compare against
|
||||
let original_array = get_array_at_path(&original_state, &path).cloned();
|
||||
|
||||
// Convert moves to usize
|
||||
let moves: Vec<(usize, usize)> = input
|
||||
.moves
|
||||
.iter()
|
||||
.map(|(from, to)| (*from as usize, *to as usize))
|
||||
.collect();
|
||||
|
||||
let result = apply_move(&mut state, &path, moves.clone());
|
||||
|
||||
match result {
|
||||
Ok(()) => {
|
||||
// If successful, verify invariants using actual arrays from JSON
|
||||
let new_array = get_array_at_path(&state, &path);
|
||||
|
||||
if let (Some(orig_arr), Some(new_arr)) = (&original_array, new_array) {
|
||||
// 1. Array length should be preserved
|
||||
assert_eq!(
|
||||
new_arr.len(),
|
||||
orig_arr.len(),
|
||||
"Array length changed after move: was {}, now {}",
|
||||
orig_arr.len(),
|
||||
new_arr.len()
|
||||
);
|
||||
|
||||
// 2. All original elements should still exist (as a multiset)
|
||||
let mut orig_sorted: Vec<_> =
|
||||
orig_arr.iter().map(|v| v.to_string()).collect();
|
||||
let mut new_sorted: Vec<_> = new_arr.iter().map(|v| v.to_string()).collect();
|
||||
orig_sorted.sort();
|
||||
new_sorted.sort();
|
||||
assert_eq!(
|
||||
orig_sorted, new_sorted,
|
||||
"Elements were lost or duplicated during move"
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(diag) => {
|
||||
// Error is expected for:
|
||||
// - Non-array targets
|
||||
// - Out of bounds indices
|
||||
// - Invalid paths
|
||||
// Just make sure we got a proper diagnostic
|
||||
assert!(!diag.description.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
fn get_array_at_path<'a>(state: &'a Value, path: &str) -> Option<&'a Vec<Value>> {
|
||||
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
|
||||
let mut current = state;
|
||||
|
||||
for part in parts {
|
||||
let unescaped = part.replace("~1", "/").replace("~0", "~");
|
||||
current = current.get(&unescaped)?;
|
||||
}
|
||||
|
||||
current.as_array()
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue