split agent: two-phase node decomposition for memory consolidation

Phase 1 sends a large node with its neighbor communities to the LLM and gets back a JSON split plan (child keys, descriptions, section hints). Phase 2 fires one extraction call per child in parallel — each gets the full parent content and extracts/reorganizes just its portion. This handles arbitrarily large nodes because output is always proportional to one child, not the whole parent. Tested on the kent node (19K chars → 3 children totaling 20K chars with clean topic separation). New files: prompts/split-plan.md — phase 1 planning prompt prompts/split-extract.md — phase 2 extraction prompt prompts/split.md — original single-phase (kept for reference) Modified: agents/prompts.rs — split_candidates(), split_plan_prompt(), split_extract_prompt(), agent_prompt "split" arm agents/daemon.rs — job_split_agent() two-phase implementation, RPC dispatch for "split" agent type tui.rs — added "split" to AGENT_TYPES
2026-03-10 01:48:41 -04:00 · 2026-03-10 01:48:41 -04:00 · ca62692a28
commit ca62692a28
parent 4c973183c4
6 changed files with 515 additions and 2 deletions
--- a/poc-memory/src/agents/prompts.rs
+++ b/poc-memory/src/agents/prompts.rs
@ -311,6 +311,85 @@ fn format_rename_candidates(store: &Store, count: usize) -> String {
    out
 }

+/// Get split candidates sorted by size (largest first)
+pub fn split_candidates(store: &Store) -> Vec<String> {
+    let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
+        .filter(|(key, node)| {
+            !key.starts_with('_')
+                && !node.deleted
+                && node.content.len() > 2000
+        })
+        .map(|(k, n)| (k.as_str(), n.content.len()))
+        .collect();
+    candidates.sort_by(|a, b| b.1.cmp(&a.1));
+    candidates.into_iter().map(|(k, _)| k.to_string()).collect()
+}
+
+/// Format a single node for split-plan prompt (phase 1)
+fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
+    let communities = graph.communities();
+    let node = match store.nodes.get(key) {
+        Some(n) => n,
+        None => return format!("Node '{}' not found\n", key),
+    };
+
+    let mut out = String::new();
+    out.push_str(&format!("### {} ({} chars)\n", key, node.content.len()));
+
+    // Show neighbors grouped by community
+    let neighbors = graph.neighbors(key);
+    if !neighbors.is_empty() {
+        let mut by_community: std::collections::BTreeMap<String, Vec<(&str, f32)>> =
+            std::collections::BTreeMap::new();
+        for (nkey, strength) in &neighbors {
+            let comm = communities.get(nkey.as_str())
+                .map(|c| format!("c{}", c))
+                .unwrap_or_else(|| "unclustered".into());
+            by_community.entry(comm)
+                .or_default()
+                .push((nkey.as_str(), *strength));
+        }
+
+        out.push_str("\nNeighbors by community:\n");
+        for (comm, members) in &by_community {
+            out.push_str(&format!("  {} ({}):", comm, members.len()));
+            for (nkey, strength) in members.iter().take(5) {
+                out.push_str(&format!(" {}({:.2})", nkey, strength));
+            }
+            if members.len() > 5 {
+                out.push_str(&format!(" +{} more", members.len() - 5));
+            }
+            out.push('\n');
+        }
+    }
+
+    // Full content
+    out.push_str(&format!("\nContent:\n{}\n\n", node.content));
+    out.push_str("---\n\n");
+    out
+}
+
+/// Build split-plan prompt for a single node (phase 1)
+pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
+    let graph = store.build_graph();
+    let topology = format_topology_header(&graph);
+    let node_section = format_split_plan_node(store, &graph, key);
+    load_prompt("split-plan", &[("{{TOPOLOGY}}", &topology), ("{{NODE}}", &node_section)])
+}
+
+/// Build split-extract prompt for one child (phase 2)
+pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
+    let parent_content = store.nodes.get(parent_key)
+        .map(|n| n.content.as_str())
+        .ok_or_else(|| format!("No node '{}'", parent_key))?;
+    load_prompt("split-extract", &[
+        ("{{CHILD_KEY}}", child_key),
+        ("{{CHILD_DESC}}", child_desc),
+        ("{{CHILD_SECTIONS}}", child_sections),
+        ("{{PARENT_CONTENT}}", parent_content),
+    ])
+}
+
 /// Run agent consolidation on top-priority nodes
 pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
    let graph = store.build_graph();
@ -424,6 +503,16 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
            let nodes_section = format_rename_candidates(store, count);
            load_prompt("rename", &[("{{NODES}}", &nodes_section)])
        }
-        _ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health, rename", agent)),
+        "split" => {
+            // Phase 1: plan prompt for the largest candidate
+            let candidates = split_candidates(store);
+            if candidates.is_empty() {
+                return Err("No nodes large enough to split".to_string());
+            }
+            let key = &candidates[0];
+            let node_section = format_split_plan_node(store, &graph, key);
+            load_prompt("split-plan", &[("{{TOPOLOGY}}", &topology), ("{{NODE}}", &node_section)])
+        }
+        _ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health, rename, split", agent)),
    }
 }