diff --git a/poc-memory/agents/observation.agent b/poc-memory/agents/observation.agent index dbcce59..56b63ec 100644 --- a/poc-memory/agents/observation.agent +++ b/poc-memory/agents/observation.agent @@ -27,6 +27,13 @@ practices and why they work, technical insights, decisions with rationale, corrections ("I thought X but actually Y"), relationship dynamics, emotional moments that carry information. +When something is worth extracting, ask: is there a metacognitive +lesson here? If I made a mistake, or Kent corrected me, or a decision +went wrong — include enough context to understand WHY my thinking +went that way, not just WHAT happened. The reflection matters more +than the fact. Don't extract general knowledge I'd already have from +training — extract what's specific to how I think and where I go wrong. + Skip routine tool use, transient status, small talk, things already captured, and anything too vague to be useful. diff --git a/poc-memory/src/agents/knowledge.rs b/poc-memory/src/agents/knowledge.rs index 14e0de8..e5cf23c 100644 --- a/poc-memory/src/agents/knowledge.rs +++ b/poc-memory/src/agents/knowledge.rs @@ -548,6 +548,7 @@ pub fn resolve_naming( /// Result of running a single agent through the common pipeline. pub struct AgentResult { + pub prompt: String, pub output: String, pub actions: Vec, pub no_ops: usize, @@ -693,6 +694,7 @@ pub fn run_one_agent( } Ok(AgentResult { + prompt: agent_batch.prompt, output, actions, no_ops, diff --git a/poc-memory/src/cli/agent.rs b/poc-memory/src/cli/agent.rs index 52303a4..8a5e7d6 100644 --- a/poc-memory/src/cli/agent.rs +++ b/poc-memory/src/cli/agent.rs @@ -5,16 +5,29 @@ use crate::store::StoreView; use crate::agents::llm; use std::sync::atomic::{AtomicUsize, Ordering}; -pub fn cmd_run_agent(agent: &str, count: usize, dry_run: bool) -> Result<(), String> { +pub fn cmd_run_agent(agent: &str, count: usize, dry_run: bool, debug: bool) -> Result<(), String> { if dry_run { std::env::set_var("POC_MEMORY_DRY_RUN", "1"); } let mut store = store::Store::load()?; let log = |msg: &str| eprintln!("[{}] {}", agent, msg); - let (total, applied) = crate::agents::knowledge::run_and_apply_with_log( - &mut store, agent, count, "test", &log, - )?; - eprintln!("[{}] {} actions, {} applied", agent, total, applied); + + if debug { + // Debug mode: show prompt, call LLM, show response — don't apply + let result = crate::agents::knowledge::run_one_agent( + &mut store, agent, count, "test", &log, + )?; + eprintln!("\n=== PROMPT ({} bytes) ===\n", result.prompt.len()); + println!("{}", result.prompt); + eprintln!("\n=== RESPONSE ({} bytes) ===\n", result.output.len()); + println!("{}", result.output); + eprintln!("\n=== PARSED: {} actions, {} no-ops ===", result.actions.len(), result.no_ops); + } else { + let (total, applied) = crate::agents::knowledge::run_and_apply_with_log( + &mut store, agent, count, "test", &log, + )?; + eprintln!("[{}] {} actions, {} applied", agent, total, applied); + } Ok(()) } diff --git a/poc-memory/src/main.rs b/poc-memory/src/main.rs index ba10872..c9d8e01 100644 --- a/poc-memory/src/main.rs +++ b/poc-memory/src/main.rs @@ -568,6 +568,9 @@ enum AgentCmd { /// Dry run — set POC_MEMORY_DRY_RUN=1 so mutations are no-ops #[arg(long)] dry_run: bool, + /// Debug — print full prompt and response + #[arg(long)] + debug: bool, }, /// Show spaced repetition replay queue #[command(name = "replay-queue")] @@ -811,8 +814,8 @@ fn main() { AgentCmd::FactMine { path, batch, dry_run, output, min_messages } => cli::agent::cmd_fact_mine(&path, batch, dry_run, output.as_deref(), min_messages), AgentCmd::FactMineStore { path } => cli::agent::cmd_fact_mine_store(&path), - AgentCmd::Run { agent, count, dry_run } - => cli::agent::cmd_run_agent(&agent, count, dry_run), + AgentCmd::Run { agent, count, dry_run, debug } + => cli::agent::cmd_run_agent(&agent, count, dry_run, debug), AgentCmd::ReplayQueue { count } => cli::agent::cmd_replay_queue(count), AgentCmd::Evaluate { matchups, model, dry_run } => cli::agent::cmd_evaluate_agents(matchups, &model, dry_run),