evaluate: include agent prompt + affected nodes in comparisons
Each comparison now shows the LLM: - Agent instructions (the .agent prompt file) - Report output (what the agent did) - Affected nodes content (what it changed) The comparator sees intent, action, and impact — can judge whether a deletion was correct, whether links are meaningful, whether WRITE_NODEs capture real insights. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
433d36aea8
commit
b964335317
1 changed files with 38 additions and 5 deletions
|
|
@ -151,22 +151,55 @@ pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), S
|
|||
let agent_types = ["linker", "organize", "replay", "connector",
|
||||
"separator", "transfer", "distill", "rename"];
|
||||
|
||||
let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, summary)
|
||||
// Load agent prompt files for context
|
||||
let prompts_dir = crate::config::get().data_dir
|
||||
.parent().unwrap_or(std::path::Path::new("."))
|
||||
.join("poc-memory/agents");
|
||||
|
||||
let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, context)
|
||||
|
||||
for agent_type in &agent_types {
|
||||
// Load the agent's prompt file (skip JSON header line)
|
||||
let prompt_file = prompts_dir.join(format!("{}.agent", agent_type));
|
||||
let agent_prompt = std::fs::read_to_string(&prompt_file)
|
||||
.unwrap_or_default()
|
||||
.lines().skip(1).collect::<Vec<_>>().join("\n");
|
||||
let agent_prompt = crate::util::truncate(&agent_prompt, 500, "...");
|
||||
|
||||
let prefix = format!("_consolidate-{}", agent_type);
|
||||
let mut keys: Vec<(String, i64)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
keys.sort_by(|a, b| b.1.cmp(&a.1)); // newest first
|
||||
keys.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
keys.truncate(samples_per_type);
|
||||
|
||||
for (key, _) in &keys {
|
||||
let content = store.nodes.get(key)
|
||||
.map(|n| crate::util::truncate(&n.content, 500, "..."))
|
||||
let report = store.nodes.get(key)
|
||||
.map(|n| n.content.clone())
|
||||
.unwrap_or_default();
|
||||
all_samples.push((agent_type.to_string(), key.clone(), content));
|
||||
|
||||
// Extract target node keys mentioned in the report and include their content
|
||||
let mut target_content = String::new();
|
||||
for word in report.split_whitespace() {
|
||||
let clean = word.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_');
|
||||
if clean.len() > 10 && store.nodes.contains_key(clean) {
|
||||
if let Some(node) = store.nodes.get(clean) {
|
||||
let preview = crate::util::truncate(&node.content, 200, "...");
|
||||
target_content.push_str(&format!("\n### {}\n{}\n", clean, preview));
|
||||
if target_content.len() > 1500 { break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let context = format!(
|
||||
"## Agent instructions\n{}\n\n## Report output\n{}\n\n## Affected nodes\n{}",
|
||||
agent_prompt,
|
||||
crate::util::truncate(&report, 1000, "..."),
|
||||
if target_content.is_empty() { "(none found)".into() } else { target_content }
|
||||
);
|
||||
|
||||
all_samples.push((agent_type.to_string(), key.clone(), context));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue