replace state.json cache with bincode state.bin
Faster serialization/deserialization, smaller on disk (4.2MB vs 5.9MB). Automatic migration from state.json on first load — reads the JSON, writes state.bin, deletes the old file. Added list-keys, list-edges, dump-json commands so Python scripts no longer need to parse the cache directly. Updated bulk-categorize.py and consolidation-loop.py to use the new CLI commands.
This commit is contained in:
parent
c4d1675128
commit
4b0bba7c56
6 changed files with 88 additions and 24 deletions
10
Cargo.lock
generated
10
Cargo.lock
generated
|
|
@ -17,6 +17,15 @@ version = "1.0.102"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.11.0"
|
||||
|
|
@ -184,6 +193,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|||
name = "poc-memory"
|
||||
version = "0.4.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"capnp",
|
||||
"capnpc",
|
||||
"libc",
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ capnp = "0.20"
|
|||
uuid = { version = "1", features = ["v4"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
bincode = "1"
|
||||
regex = "1"
|
||||
rand = "0.8"
|
||||
libc = "0.2"
|
||||
|
|
|
|||
|
|
@ -50,13 +50,12 @@ def call_sonnet(prompt: str, timeout: int = 300) -> str:
|
|||
|
||||
|
||||
def get_all_keys() -> list[str]:
|
||||
"""Get all node keys from state.json."""
|
||||
state_path = MEMORY_DIR / "state.json"
|
||||
if not state_path.exists():
|
||||
"""Get all node keys via poc-memory list-keys."""
|
||||
r = subprocess.run(["poc-memory", "list-keys"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
content = state_path.read_text()
|
||||
keys = re.findall(r'"key":\s*"([^"]*)"', content)
|
||||
return sorted(set(keys))
|
||||
return [k for k in r.stdout.strip().split('\n') if k]
|
||||
|
||||
|
||||
def get_unique_files(keys: list[str]) -> list[str]:
|
||||
|
|
|
|||
|
|
@ -167,16 +167,17 @@ def build_triangle_prompt(round_num: int) -> str:
|
|||
graph = get_graph_structure()
|
||||
status = get_status()
|
||||
|
||||
# Get some node pairs that share neighbors
|
||||
state_path = MEMORY_DIR / "state.json"
|
||||
if state_path.exists():
|
||||
state = state_path.read_text()
|
||||
# Extract some relations
|
||||
relations = re.findall(r'"source_key":\s*"([^"]*)".*?"target_key":\s*"([^"]*)"', state[:20000])
|
||||
else:
|
||||
relations = []
|
||||
# Get edges via CLI
|
||||
r = subprocess.run(["poc-memory", "list-edges"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
relations = []
|
||||
if r.returncode == 0:
|
||||
for line in r.stdout.strip().split('\n')[:100]:
|
||||
parts = line.split('\t')
|
||||
if len(parts) >= 2:
|
||||
relations.append((parts[0], parts[1]))
|
||||
|
||||
rel_sample = '\n'.join(f" {s} → {t}" for s, t in relations[:100])
|
||||
rel_sample = '\n'.join(f" {s} → {t}" for s, t in relations)
|
||||
|
||||
return f"""You are a triangle-closing agent (round {round_num}).
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,8 @@ fn memory_dir() -> PathBuf {
|
|||
|
||||
fn nodes_path() -> PathBuf { memory_dir().join("nodes.capnp") }
|
||||
fn relations_path() -> PathBuf { memory_dir().join("relations.capnp") }
|
||||
fn state_path() -> PathBuf { memory_dir().join("state.json") }
|
||||
fn state_path() -> PathBuf { memory_dir().join("state.bin") }
|
||||
fn state_json_path() -> PathBuf { memory_dir().join("state.json") }
|
||||
fn lock_path() -> PathBuf { memory_dir().join(".store.lock") }
|
||||
|
||||
/// RAII file lock using flock(2). Dropped when scope exits.
|
||||
|
|
@ -274,14 +275,27 @@ impl Store {
|
|||
};
|
||||
|
||||
if cache_fresh {
|
||||
let data = fs::read_to_string(&state)
|
||||
.map_err(|e| format!("read state.json: {}", e))?;
|
||||
let mut store: Store = serde_json::from_str(&data)
|
||||
.map_err(|e| format!("parse state.json: {}", e))?;
|
||||
let data = fs::read(&state)
|
||||
.map_err(|e| format!("read state.bin: {}", e))?;
|
||||
let mut store: Store = bincode::deserialize(&data)
|
||||
.map_err(|e| format!("parse state.bin: {}", e))?;
|
||||
store.rebuild_uuid_index();
|
||||
return Ok(store);
|
||||
}
|
||||
|
||||
// Try legacy JSON cache for migration
|
||||
let json_state = state_json_path();
|
||||
if json_state.exists() {
|
||||
let data = fs::read_to_string(&json_state)
|
||||
.map_err(|e| format!("read state.json: {}", e))?;
|
||||
if let Ok(mut store) = serde_json::from_str::<Store>(&data) {
|
||||
store.rebuild_uuid_index();
|
||||
// Migrate to bincode
|
||||
store.save()?;
|
||||
return Ok(store);
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuild from capnp logs
|
||||
let mut store = Store::default();
|
||||
|
||||
|
|
@ -412,10 +426,16 @@ impl Store {
|
|||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent).ok();
|
||||
}
|
||||
let json = serde_json::to_string(self)
|
||||
.map_err(|e| format!("serialize: {}", e))?;
|
||||
fs::write(&path, json)
|
||||
let data = bincode::serialize(self)
|
||||
.map_err(|e| format!("bincode serialize: {}", e))?;
|
||||
fs::write(&path, data)
|
||||
.map_err(|e| format!("write {}: {}", path.display(), e))?;
|
||||
|
||||
// Clean up old JSON cache if it exists
|
||||
let json_path = state_json_path();
|
||||
if json_path.exists() {
|
||||
fs::remove_file(&json_path).ok();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
35
src/main.rs
35
src/main.rs
|
|
@ -59,6 +59,9 @@ fn main() {
|
|||
"apply-agent" => cmd_apply_agent(&args[2..]),
|
||||
"digest" => cmd_digest(&args[2..]),
|
||||
"trace" => cmd_trace(&args[2..]),
|
||||
"list-keys" => cmd_list_keys(),
|
||||
"list-edges" => cmd_list_edges(),
|
||||
"dump-json" => cmd_dump_json(),
|
||||
_ => {
|
||||
eprintln!("Unknown command: {}", args[1]);
|
||||
usage();
|
||||
|
|
@ -103,7 +106,10 @@ Commands:
|
|||
apply-agent [--all] Import pending agent results into the graph
|
||||
digest daily [DATE] Generate daily episodic digest (default: today)
|
||||
digest weekly [DATE] Generate weekly digest (any date in target week)
|
||||
trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation");
|
||||
trace KEY Walk temporal links: semantic ↔ episodic ↔ conversation
|
||||
list-keys List all node keys (one per line)
|
||||
list-edges List all edges (tsv: source target strength type)
|
||||
dump-json Dump entire store as JSON");
|
||||
}
|
||||
|
||||
fn cmd_search(args: &[String]) -> Result<(), String> {
|
||||
|
|
@ -738,6 +744,33 @@ fn cmd_trace(args: &[String]) -> Result<(), String> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_list_keys() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let mut keys: Vec<_> = store.nodes.keys().collect();
|
||||
keys.sort();
|
||||
for key in keys {
|
||||
println!("{}", key);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_list_edges() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
for rel in &store.relations {
|
||||
println!("{}\t{}\t{:.2}\t{:?}",
|
||||
rel.source_key, rel.target_key, rel.strength, rel.rel_type);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_dump_json() -> Result<(), String> {
|
||||
let store = capnp_store::Store::load()?;
|
||||
let json = serde_json::to_string_pretty(&store)
|
||||
.map_err(|e| format!("serialize: {}", e))?;
|
||||
println!("{}", json);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_interference(args: &[String]) -> Result<(), String> {
|
||||
let mut threshold = 0.4f32;
|
||||
let mut i = 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue