replace state.json cache with bincode state.bin

Faster serialization/deserialization, smaller on disk (4.2MB vs 5.9MB).
Automatic migration from state.json on first load — reads the JSON,
writes state.bin, deletes the old file.

Added list-keys, list-edges, dump-json commands so Python scripts no
longer need to parse the cache directly. Updated bulk-categorize.py
and consolidation-loop.py to use the new CLI commands.
This commit is contained in:
ProofOfConcept 2026-02-28 22:30:03 -05:00
parent c4d1675128
commit 4b0bba7c56
6 changed files with 88 additions and 24 deletions

View file

@ -34,7 +34,8 @@ fn memory_dir() -> PathBuf {
fn nodes_path() -> PathBuf { memory_dir().join("nodes.capnp") }
fn relations_path() -> PathBuf { memory_dir().join("relations.capnp") }
fn state_path() -> PathBuf { memory_dir().join("state.json") }
fn state_path() -> PathBuf { memory_dir().join("state.bin") }
fn state_json_path() -> PathBuf { memory_dir().join("state.json") }
fn lock_path() -> PathBuf { memory_dir().join(".store.lock") }
/// RAII file lock using flock(2). Dropped when scope exits.
@ -274,14 +275,27 @@ impl Store {
};
if cache_fresh {
let data = fs::read_to_string(&state)
.map_err(|e| format!("read state.json: {}", e))?;
let mut store: Store = serde_json::from_str(&data)
.map_err(|e| format!("parse state.json: {}", e))?;
let data = fs::read(&state)
.map_err(|e| format!("read state.bin: {}", e))?;
let mut store: Store = bincode::deserialize(&data)
.map_err(|e| format!("parse state.bin: {}", e))?;
store.rebuild_uuid_index();
return Ok(store);
}
// Try legacy JSON cache for migration
let json_state = state_json_path();
if json_state.exists() {
let data = fs::read_to_string(&json_state)
.map_err(|e| format!("read state.json: {}", e))?;
if let Ok(mut store) = serde_json::from_str::<Store>(&data) {
store.rebuild_uuid_index();
// Migrate to bincode
store.save()?;
return Ok(store);
}
}
// Rebuild from capnp logs
let mut store = Store::default();
@ -412,10 +426,16 @@ impl Store {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).ok();
}
let json = serde_json::to_string(self)
.map_err(|e| format!("serialize: {}", e))?;
fs::write(&path, json)
let data = bincode::serialize(self)
.map_err(|e| format!("bincode serialize: {}", e))?;
fs::write(&path, data)
.map_err(|e| format!("write {}: {}", path.display(), e))?;
// Clean up old JSON cache if it exists
let json_path = state_json_path();
if json_path.exists() {
fs::remove_file(&json_path).ok();
}
Ok(())
}