evaluate: include agent prompt + affected nodes in comparisons

Each comparison now shows the LLM: - Agent instructions (the .agent prompt file) - Report output (what the agent did) - Affected nodes content (what it changed) The comparator sees intent, action, and impact — can judge whether a deletion was correct, whether links are meaningful, whether WRITE_NODEs capture real insights. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-14 19:34:10 -04:00 · 2026-03-14 19:34:10 -04:00 · b964335317
commit b964335317
parent 433d36aea8
1 changed files with 38 additions and 5 deletions
--- a/poc-memory/src/cli/agent.rs
+++ b/poc-memory/src/cli/agent.rs
@ -151,22 +151,55 @@ pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), S
    let agent_types = ["linker", "organize", "replay", "connector",
                       "separator", "transfer", "distill", "rename"];
-    let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, summary)
+    // Load agent prompt files for context
    let prompts_dir = crate::config::get().data_dir
        .parent().unwrap_or(std::path::Path::new("."))
        .join("poc-memory/agents");
    let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, context)
    for agent_type in &agent_types {
        // Load the agent's prompt file (skip JSON header line)
        let prompt_file = prompts_dir.join(format!("{}.agent", agent_type));
        let agent_prompt = std::fs::read_to_string(&prompt_file)
            .unwrap_or_default()
            .lines().skip(1).collect::<Vec<_>>().join("\n");
        let agent_prompt = crate::util::truncate(&agent_prompt, 500, "...");
        let prefix = format!("_consolidate-{}", agent_type);
        let mut keys: Vec<(String, i64)> = store.nodes.iter()
            .filter(|(k, _)| k.starts_with(&prefix))
            .map(|(k, n)| (k.clone(), n.timestamp))
            .collect();
-        keys.sort_by(|a, b| b.1.cmp(&a.1)); // newest first
+        keys.sort_by(|a, b| b.1.cmp(&a.1));
        keys.truncate(samples_per_type);
        for (key, _) in &keys {
-            let content = store.nodes.get(key)
+            let report = store.nodes.get(key)
-                .map(|n| crate::util::truncate(&n.content, 500, "..."))
+                .map(|n| n.content.clone())
                .unwrap_or_default();
-            all_samples.push((agent_type.to_string(), key.clone(), content));
+
            // Extract target node keys mentioned in the report and include their content
            let mut target_content = String::new();
            for word in report.split_whitespace() {
                let clean = word.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_');
                if clean.len() > 10 && store.nodes.contains_key(clean) {
                    if let Some(node) = store.nodes.get(clean) {
                        let preview = crate::util::truncate(&node.content, 200, "...");
                        target_content.push_str(&format!("\n### {}\n{}\n", clean, preview));
                        if target_content.len() > 1500 { break; }
                    }
                }
            }
            let context = format!(
                "## Agent instructions\n{}\n\n## Report output\n{}\n\n## Affected nodes\n{}",
                agent_prompt,
                crate::util::truncate(&report, 1000, "..."),
                if target_content.is_empty() { "(none found)".into() } else { target_content }
            );
            all_samples.push((agent_type.to_string(), key.clone(), context));
        }
    }