Improve hook agent diagnostics

This commit is contained in:
Kent Overstreet 2026-06-15 15:47:59 -05:00
commit b4a8f08b9b
8 changed files with 307 additions and 36 deletions

View file

@ -12,7 +12,9 @@ use crate::subconscious::{defs, prompts};
use std::collections::HashMap;
use std::fs;
use std::io::Write as _;
use std::path::PathBuf;
use std::time::Instant;
use super::context::AstNode;
use super::tools::{self as agent_tools};
@ -106,6 +108,10 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
stats
}
fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
eprintln!("[agent:{agent}] {msg}");
}
fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
use super::context::{AstNode, NodeBody};
@ -345,20 +351,44 @@ impl AutoAgent {
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
) -> Result<(), String> {
dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
log_agent_event(&self.name, format_args!(
"starting run steps={} temperature={} priority={}",
self.steps.len(), self.temperature, self.priority));
let run_start = Instant::now();
for (i, step) in self.steps.iter().enumerate() {
self.turn = i + 1;
self.current_phase = step.phase.clone();
let step_start = Instant::now();
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} prompt_bytes={}",
i + 1, self.steps.len(), step.phase, step.prompt.len()));
if let Some(ref check) = bail_fn {
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
check(i)?;
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
}
backend.push_node(AstNode::system_msg(&step.prompt)).await;
Agent::turn(backend.0.clone()).await
.map_err(|e| format!("{}: {}", self.name, e))?;
.map_err(|e| {
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} failed after {:.2}s: {}",
i + 1, self.steps.len(), step.phase,
step_start.elapsed().as_secs_f64(), e));
format!("{}: {}", self.name, e)
})?;
log_agent_event(&self.name, format_args!(
"step {}/{} phase={} done in {:.2}s",
i + 1, self.steps.len(), step.phase,
step_start.elapsed().as_secs_f64()));
}
log_agent_event(&self.name, format_args!(
"run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
Ok(())
}
@ -382,8 +412,29 @@ pub async fn run_one_agent(
count: usize,
keys: Option<&[String]>,
) -> Result<AgentResult, String> {
let run_start = Instant::now();
log_agent_event(agent_name, format_args!(
"run_one_agent start pid={} count={} explicit_keys={}",
std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
log_agent_event(agent_name, format_args!(
"env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
std::env::var("POC_SESSION_ID").ok(),
std::env::var("POC_TRANSCRIPT_PATH").ok(),
std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
if let Some(session) = crate::session::HookSession::from_env() {
let transcript = session.transcript();
log_agent_event(agent_name, format_args!(
"session={} transcript={} size={} exists={}",
session.session_id, transcript.path, transcript.size, transcript.exists()));
} else {
log_agent_event(agent_name, format_args!("no hook session in environment"));
}
let def = defs::get_def(agent_name)
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
log_agent_event(agent_name, format_args!(
"definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
def.steps.len(), def.tools, def.count, def.priority, def.bail));
// State dir for agent output files
let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
@ -392,6 +443,7 @@ pub async fn run_one_agent(
fs::create_dir_all(&state_dir)
.map_err(|e| format!("create state dir: {}", e))?;
unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));
// Build prompt batch — either from explicit keys or the agent's query
let agent_batch = if let Some(keys) = keys {
@ -411,6 +463,8 @@ pub async fn run_one_agent(
prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
} else {
let effective_count = def.count.unwrap_or(count);
log_agent_event(agent_name, format_args!(
"resolving default prompt placeholders effective_count={}", effective_count));
defs::run_agent(&def, effective_count, &Default::default()).await?
};
@ -463,6 +517,14 @@ pub async fn run_one_agent(
})),
});
let n_steps = agent_batch.steps.len();
log_agent_event(agent_name, format_args!(
"prompt batch ready steps={} node_keys={}",
n_steps, agent_batch.node_keys.len()));
for (i, step) in agent_batch.steps.iter().enumerate() {
log_agent_event(agent_name, format_args!(
"prompt step {}/{} phase={} bytes={}",
i + 1, n_steps, step.phase, step.prompt.len()));
}
// Guard: reject oversized first prompt
let max_prompt_bytes = 800_000;
@ -485,6 +547,9 @@ pub async fn run_one_agent(
let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
log_agent_event(agent_name, format_args!(
"tools enabled: {}",
effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));
let prompts: Vec<String> = agent_batch.steps.iter()
.map(|s| s.prompt.clone()).collect();
@ -497,18 +562,25 @@ pub async fn run_one_agent(
let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
let state_dir_for_bail = state_dir.clone();
let our_pid = std::process::id();
let our_pid_file = format!("pid-{}", our_pid);
let our_pid_file = std::env::var("POC_AGENT_PID_FILE")
.unwrap_or_else(|_| format!("pid-{}", our_pid));
let step_phases_for_bail = step_phases.clone();
let bail_fn = move |step_idx: usize| -> Result<(), String> {
if let Some(ref script) = bail_script {
let phase = step_phases_for_bail.get(step_idx)
.map(String::as_str).unwrap_or("");
eprintln!(
"[agent:bail] script={} state_dir={} pid_file={} phase={}",
script.display(), state_dir_for_bail.display(), our_pid_file, phase);
let status = std::process::Command::new(script)
.arg(&our_pid_file)
.arg(phase)
.current_dir(&state_dir_for_bail)
.status()
.map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
eprintln!(
"[agent:bail] script={} phase={} status={}",
script.display(), phase, status);
if !status.success() {
return Err(format!("bailed at step {}: {:?} exited {}",
step_idx + 1, script.file_name().unwrap_or_default(),
@ -521,6 +593,8 @@ pub async fn run_one_agent(
call_api_with_tools_sync(
agent_name, &prompts, &step_phases, def.temperature, def.priority,
&effective_tools, Some(&bail_fn))?;
log_agent_event(agent_name, format_args!(
"run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));
Ok(AgentResult {
node_keys: agent_batch.node_keys,
@ -598,6 +672,15 @@ pub fn spawn_agent(
agent_name: &str,
state_dir: &std::path::Path,
session_id: &str,
) -> Option<SpawnResult> {
spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
}
pub fn spawn_agent_with_transcript(
agent_name: &str,
state_dir: &std::path::Path,
session_id: &str,
transcript_path: Option<&str>,
) -> Option<SpawnResult> {
let def = defs::get_def(agent_name)?;
let first_phase = def.steps.first()
@ -608,17 +691,41 @@ pub fn spawn_agent(
.join(format!(".consciousness/logs/{}", agent_name));
fs::create_dir_all(&log_dir).ok();
let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
let agent_log = fs::File::create(&log_path)
let mut agent_log = fs::File::create(&log_path)
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
let child = std::process::Command::new("poc-memory")
.args(["agent", "run", agent_name, "--count", "1", "--local",
"--state-dir", &state_dir.to_string_lossy()])
.env("POC_SESSION_ID", session_id)
.stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
.stderr(agent_log)
.spawn()
.ok()?;
let mut cmd = std::process::Command::new("bash");
cmd.args([
"-lc",
r#"
set +e
export POC_AGENT_PID_FILE="pid-$$"
"$@"
status=$?
printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
exit "$status"
"#,
"poc-memory-agent-wrapper",
"poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
"--state-dir", &state_dir.to_string_lossy(),
]).env("POC_SESSION_ID", session_id);
if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
cmd.env("POC_TRANSCRIPT_PATH", path);
}
let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
let _ = writeln!(agent_log, "agent={agent_name}");
let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
let _ = writeln!(agent_log, "session_id={session_id}");
let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
let _ = writeln!(agent_log, "first_phase={first_phase}");
let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
let _ = agent_log.flush();
let child_stdout = agent_log.try_clone()
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
let child_stderr = agent_log;
let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;
let pid = child.id();
let pid_path = state_dir.join(format!("pid-{}", pid));