consciousness/poc-memory/src/cli/agent.rs
Kent Overstreet 7fe55e28bd poc-memory agent run --debug: dump prompt and response
Add --debug flag that prints the full prompt and LLM response to
stdout, making it easy to iterate on agent prompts. Also adds
prompt field to AgentResult so callers can inspect what was sent.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 19:13:43 -04:00

414 lines
15 KiB
Rust

// cli/agent.rs — agent subcommand handlers
use crate::store;
use crate::store::StoreView;
use crate::agents::llm;
use std::sync::atomic::{AtomicUsize, Ordering};
pub fn cmd_run_agent(agent: &str, count: usize, dry_run: bool, debug: bool) -> Result<(), String> {
if dry_run {
std::env::set_var("POC_MEMORY_DRY_RUN", "1");
}
let mut store = store::Store::load()?;
let log = |msg: &str| eprintln!("[{}] {}", agent, msg);
if debug {
// Debug mode: show prompt, call LLM, show response — don't apply
let result = crate::agents::knowledge::run_one_agent(
&mut store, agent, count, "test", &log,
)?;
eprintln!("\n=== PROMPT ({} bytes) ===\n", result.prompt.len());
println!("{}", result.prompt);
eprintln!("\n=== RESPONSE ({} bytes) ===\n", result.output.len());
println!("{}", result.output);
eprintln!("\n=== PARSED: {} actions, {} no-ops ===", result.actions.len(), result.no_ops);
} else {
let (total, applied) = crate::agents::knowledge::run_and_apply_with_log(
&mut store, agent, count, "test", &log,
)?;
eprintln!("[{}] {} actions, {} applied", agent, total, applied);
}
Ok(())
}
pub fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
let store = store::Store::load()?;
if let Some(agent_name) = agent {
let batch = crate::agents::prompts::agent_prompt(&store, &agent_name, count)?;
println!("{}", batch.prompt);
Ok(())
} else {
crate::agents::prompts::consolidation_batch(&store, count, auto)
}
}
pub fn cmd_replay_queue(count: usize) -> Result<(), String> {
let store = store::Store::load()?;
let queue = crate::neuro::replay_queue(&store, count);
println!("Replay queue ({} items):", queue.len());
for (i, item) in queue.iter().enumerate() {
println!(" {:2}. [{:.3}] {:>10} {} (interval={}d, emotion={:.1}, spectral={:.1})",
i + 1, item.priority, item.classification, item.key,
item.interval_days, item.emotion, item.outlier_score);
}
Ok(())
}
pub fn cmd_consolidate_session() -> Result<(), String> {
let store = store::Store::load()?;
let plan = crate::neuro::consolidation_plan(&store);
println!("{}", crate::neuro::format_plan(&plan));
Ok(())
}
pub fn cmd_consolidate_full() -> Result<(), String> {
let mut store = store::Store::load()?;
crate::consolidate::consolidate_full(&mut store)
}
pub fn cmd_digest_links(do_apply: bool) -> Result<(), String> {
let store = store::Store::load()?;
let links = crate::digest::parse_all_digest_links(&store);
drop(store);
println!("Found {} unique links from digest nodes", links.len());
if !do_apply {
for (i, link) in links.iter().enumerate() {
println!(" {:3}. {}{}", i + 1, link.source, link.target);
if !link.reason.is_empty() {
println!(" ({})", &link.reason[..link.reason.len().min(80)]);
}
}
println!("\nTo apply: poc-memory digest-links --apply");
return Ok(());
}
let mut store = store::Store::load()?;
let (applied, skipped, fallbacks) = crate::digest::apply_digest_links(&mut store, &links);
println!("\nApplied: {} ({} file-level fallbacks) Skipped: {}", applied, fallbacks, skipped);
Ok(())
}
pub fn cmd_journal_enrich(jsonl_path: &str, entry_text: &str, grep_line: usize) -> Result<(), String> {
if !std::path::Path::new(jsonl_path).is_file() {
return Err(format!("JSONL not found: {}", jsonl_path));
}
let mut store = store::Store::load()?;
crate::enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
}
pub fn cmd_apply_consolidation(do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
let mut store = store::Store::load()?;
crate::consolidate::apply_consolidation(&mut store, do_apply, report_file)
}
pub fn cmd_knowledge_loop(max_cycles: usize, batch_size: usize, window: usize, max_depth: i32) -> Result<(), String> {
let config = crate::knowledge::KnowledgeLoopConfig {
max_cycles,
batch_size,
window,
max_depth,
..Default::default()
};
let results = crate::knowledge::run_knowledge_loop(&config)?;
eprintln!("\nCompleted {} cycles, {} total actions applied",
results.len(),
results.iter().map(|r| r.total_applied).sum::<usize>());
Ok(())
}
pub fn cmd_fact_mine(path: &str, batch: bool, dry_run: bool, output_file: Option<&str>, min_messages: usize) -> Result<(), String> {
let p = std::path::Path::new(path);
let paths: Vec<std::path::PathBuf> = if batch {
if !p.is_dir() {
return Err(format!("Not a directory: {}", path));
}
let mut files: Vec<_> = std::fs::read_dir(p)
.map_err(|e| format!("read dir: {}", e))?
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
.collect();
files.sort();
eprintln!("Found {} transcripts", files.len());
files
} else {
vec![p.to_path_buf()]
};
let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
let facts = crate::fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;
if !dry_run {
let json = serde_json::to_string_pretty(&facts)
.map_err(|e| format!("serialize: {}", e))?;
if let Some(out) = output_file {
std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
eprintln!("\nWrote {} facts to {}", facts.len(), out);
} else {
println!("{}", json);
}
}
eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
Ok(())
}
pub fn cmd_fact_mine_store(path: &str) -> Result<(), String> {
let path = std::path::Path::new(path);
if !path.exists() {
return Err(format!("File not found: {}", path.display()));
}
let count = crate::fact_mine::mine_and_store(path, None)?;
eprintln!("Stored {} facts", count);
Ok(())
}
/// Sample recent actions from each agent type, sort by quality using
/// LLM pairwise comparison, report per-type rankings.
/// Elo ratings file path
fn elo_path() -> std::path::PathBuf {
crate::config::get().data_dir.join("agent-elo.json")
}
/// Load persisted Elo ratings, or initialize at 1000.0
fn load_elo_ratings(agent_types: &[&str]) -> std::collections::HashMap<String, f64> {
let path = elo_path();
let mut ratings: std::collections::HashMap<String, f64> = std::fs::read_to_string(&path)
.ok()
.and_then(|s| serde_json::from_str(&s).ok())
.unwrap_or_default();
for t in agent_types {
ratings.entry(t.to_string()).or_insert(1000.0);
}
ratings
}
fn save_elo_ratings(ratings: &std::collections::HashMap<String, f64>) {
let path = elo_path();
if let Ok(json) = serde_json::to_string_pretty(ratings) {
let _ = std::fs::write(path, json);
}
}
pub fn cmd_evaluate_agents(matchups: usize, model: &str, dry_run: bool) -> Result<(), String> {
use skillratings::elo::{elo, EloConfig, EloRating};
use skillratings::Outcomes;
let store = store::Store::load()?;
let agent_types: Vec<&str> = vec![
"linker", "organize", "replay", "connector",
"separator", "transfer", "distill", "rename",
];
// Load agent prompt files
let prompts_dir = {
let repo = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
if repo.is_dir() { repo } else { crate::store::memory_dir().join("agents") }
};
// Collect recent actions per agent type
let mut actions: std::collections::HashMap<String, Vec<(String, String)>> = std::collections::HashMap::new();
for agent_type in &agent_types {
let prompt_file = prompts_dir.join(format!("{}.agent", agent_type));
let agent_prompt = std::fs::read_to_string(&prompt_file)
.unwrap_or_default()
.lines().skip(1).collect::<Vec<_>>().join("\n");
let agent_prompt = crate::util::truncate(&agent_prompt, 500, "...");
let prefix = format!("_consolidate-{}", agent_type);
let mut keys: Vec<(String, i64)> = store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.clone(), n.timestamp))
.collect();
keys.sort_by(|a, b| b.1.cmp(&a.1));
keys.truncate(20); // pool of recent actions to sample from
let mut type_actions = Vec::new();
for (key, _) in &keys {
let report = store.nodes.get(key)
.map(|n| n.content.clone())
.unwrap_or_default();
let mut target_content = String::new();
let mut seen = std::collections::HashSet::new();
for word in report.split_whitespace() {
let clean = word.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_');
if clean.len() > 10 && seen.insert(clean.to_string()) && store.nodes.contains_key(clean) {
if let Some(node) = store.nodes.get(clean) {
let preview = crate::util::truncate(&node.content, 200, "...");
target_content.push_str(&format!("\n### {}\n{}\n", clean, preview));
if target_content.len() > 1500 { break; }
}
}
}
let context = format!(
"## Agent instructions\n{}\n\n## Report output\n{}\n\n## Affected nodes\n{}",
agent_prompt,
crate::util::truncate(&report, 1000, "..."),
if target_content.is_empty() { "(none found)".into() } else { target_content }
);
type_actions.push((key.clone(), context));
}
actions.insert(agent_type.to_string(), type_actions);
}
// Filter to types that have at least 1 action
let active_types: Vec<&str> = agent_types.iter()
.filter(|t| actions.get(**t).map(|a| !a.is_empty()).unwrap_or(false))
.copied()
.collect();
if active_types.len() < 2 {
return Err("Need at least 2 agent types with actions".into());
}
eprintln!("Evaluating {} agent types with {} matchups (model={})",
active_types.len(), matchups, model);
if dry_run {
let t1 = active_types[0];
let t2 = active_types[active_types.len() - 1];
let a1 = &actions[t1][0];
let a2 = &actions[t2][0];
let sample_a = (t1.to_string(), a1.0.clone(), a1.1.clone());
let sample_b = (t2.to_string(), a2.0.clone(), a2.1.clone());
println!("=== DRY RUN: Example comparison ===\n");
println!("{}", build_compare_prompt(&sample_a, &sample_b));
return Ok(());
}
// Load persisted ratings
let mut ratings = load_elo_ratings(&agent_types);
let config = EloConfig { k: 32.0 };
// Simple but adequate RNG: xorshift32
let mut rng = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH).unwrap().subsec_nanos() | 1;
let mut next_rng = || -> usize {
rng ^= rng << 13;
rng ^= rng >> 17;
rng ^= rng << 5;
rng as usize
};
for i in 0..matchups {
// Pick two different random agent types
let idx_a = next_rng() % active_types.len();
let mut idx_b = next_rng() % active_types.len();
if idx_b == idx_a { idx_b = (idx_b + 1) % active_types.len(); }
let type_a = active_types[idx_a];
let type_b = active_types[idx_b];
// Pick random recent action from each
let acts_a = &actions[type_a];
let acts_b = &actions[type_b];
let act_a = &acts_a[next_rng() % acts_a.len()];
let act_b = &acts_b[next_rng() % acts_b.len()];
let sample_a = (type_a.to_string(), act_a.0.clone(), act_a.1.clone());
let sample_b = (type_b.to_string(), act_b.0.clone(), act_b.1.clone());
let result = llm_compare(&sample_a, &sample_b, model);
let rating_a = EloRating { rating: ratings[type_a] };
let rating_b = EloRating { rating: ratings[type_b] };
let outcome = match result {
Ok(std::cmp::Ordering::Less) => Outcomes::WIN, // A wins
Ok(std::cmp::Ordering::Greater) => Outcomes::LOSS, // B wins
_ => Outcomes::WIN, // default to A
};
let (new_a, new_b) = elo(&rating_a, &rating_b, &outcome, &config);
ratings.insert(type_a.to_string(), new_a.rating);
ratings.insert(type_b.to_string(), new_b.rating);
eprint!(" matchup {}/{}: {} vs {}{}\r",
i + 1, matchups, type_a, type_b,
if matches!(outcome, Outcomes::WIN) { type_a } else { type_b });
}
eprintln!();
// Save updated ratings
save_elo_ratings(&ratings);
// Print rankings
let mut ranked: Vec<_> = ratings.iter().collect();
ranked.sort_by(|a, b| b.1.total_cmp(a.1));
println!("\nAgent Elo Ratings (after {} matchups):\n", matchups);
for (agent_type, rating) in &ranked {
let bar_len = ((*rating - 800.0) / 10.0).max(0.0) as usize;
let bar = "#".repeat(bar_len.min(40));
println!(" {:12} {:7.1} {}", agent_type, rating, bar);
}
Ok(())
}
fn build_compare_prompt(
a: &(String, String, String),
b: &(String, String, String),
) -> String {
if a.0 == b.0 {
// Same agent type — show instructions once
// Split context at "## Report output" to extract shared prompt
let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
let split_b: Vec<&str> = b.2.splitn(2, "## Report output").collect();
let shared_prompt = split_a.first().unwrap_or(&"");
let report_a = split_a.get(1).unwrap_or(&"");
let report_b = split_b.get(1).unwrap_or(&"");
format!(
"Compare two actions from the same {} agent. Which was better?\n\n\
{}\n\n\
## Action A\n## Report output{}\n\n\
## Action B\n## Report output{}\n\n\
Say which is better and why in 1-2 sentences, then end with:\n\
BETTER: A or BETTER: B\n\
You must pick one. No ties.",
a.0, shared_prompt, report_a, report_b
)
} else {
format!(
"Compare these two memory graph agent actions. Which one was better \
for building a useful, well-organized knowledge graph?\n\n\
## Action A ({} agent)\n{}\n\n\
## Action B ({} agent)\n{}\n\n\
Say which is better and why in 1-2 sentences, then end with:\n\
BETTER: A or BETTER: B\n\
You must pick one. No ties.",
a.0, a.2, b.0, b.2
)
}
}
fn llm_compare(
a: &(String, String, String),
b: &(String, String, String),
model: &str,
) -> Result<std::cmp::Ordering, String> {
let prompt = build_compare_prompt(a, b);
let response = if model == "haiku" {
llm::call_haiku("compare", &prompt)?
} else {
llm::call_sonnet("compare", &prompt)?
};
let response = response.trim().to_uppercase();
if response.contains("BETTER: B") {
Ok(std::cmp::Ordering::Greater)
} else {
// Default to A (includes "BETTER: A" and any unparseable response)
Ok(std::cmp::Ordering::Less)
}
}