Improve hook agent diagnostics
This commit is contained in:
parent
e6f9f062f0
commit
b4a8f08b9b
8 changed files with 307 additions and 36 deletions
|
|
@ -12,7 +12,9 @@ use crate::subconscious::{defs, prompts};
|
|||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::io::Write as _;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use super::context::AstNode;
|
||||
use super::tools::{self as agent_tools};
|
||||
|
|
@ -106,6 +108,10 @@ pub async fn save_agent_log(name: &str, agent: &std::sync::Arc<Agent>) -> RunSta
|
|||
stats
|
||||
}
|
||||
|
||||
fn log_agent_event(agent: &str, msg: std::fmt::Arguments) {
|
||||
eprintln!("[agent:{agent}] {msg}");
|
||||
}
|
||||
|
||||
fn compute_run_stats(conversation: &[super::context::AstNode]) -> RunStats {
|
||||
use super::context::{AstNode, NodeBody};
|
||||
|
||||
|
|
@ -345,20 +351,44 @@ impl AutoAgent {
|
|||
bail_fn: Option<&(dyn Fn(usize) -> Result<(), String> + Sync)>,
|
||||
) -> Result<(), String> {
|
||||
dbglog!("[auto] {} starting, {} steps", self.name, self.steps.len());
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"starting run steps={} temperature={} priority={}",
|
||||
self.steps.len(), self.temperature, self.priority));
|
||||
let run_start = Instant::now();
|
||||
|
||||
for (i, step) in self.steps.iter().enumerate() {
|
||||
self.turn = i + 1;
|
||||
self.current_phase = step.phase.clone();
|
||||
let step_start = Instant::now();
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} prompt_bytes={}",
|
||||
i + 1, self.steps.len(), step.phase, step.prompt.len()));
|
||||
|
||||
if let Some(ref check) = bail_fn {
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} bail check", i + 1, self.steps.len(), step.phase));
|
||||
check(i)?;
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} bail ok", i + 1, self.steps.len(), step.phase));
|
||||
}
|
||||
|
||||
backend.push_node(AstNode::system_msg(&step.prompt)).await;
|
||||
Agent::turn(backend.0.clone()).await
|
||||
.map_err(|e| format!("{}: {}", self.name, e))?;
|
||||
.map_err(|e| {
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} failed after {:.2}s: {}",
|
||||
i + 1, self.steps.len(), step.phase,
|
||||
step_start.elapsed().as_secs_f64(), e));
|
||||
format!("{}: {}", self.name, e)
|
||||
})?;
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"step {}/{} phase={} done in {:.2}s",
|
||||
i + 1, self.steps.len(), step.phase,
|
||||
step_start.elapsed().as_secs_f64()));
|
||||
}
|
||||
|
||||
log_agent_event(&self.name, format_args!(
|
||||
"run completed in {:.2}s", run_start.elapsed().as_secs_f64()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -382,8 +412,29 @@ pub async fn run_one_agent(
|
|||
count: usize,
|
||||
keys: Option<&[String]>,
|
||||
) -> Result<AgentResult, String> {
|
||||
let run_start = Instant::now();
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"run_one_agent start pid={} count={} explicit_keys={}",
|
||||
std::process::id(), count, keys.map(|k| k.len()).unwrap_or(0)));
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"env POC_SESSION_ID={:?} POC_TRANSCRIPT_PATH={:?} POC_AGENT_OUTPUT_DIR={:?}",
|
||||
std::env::var("POC_SESSION_ID").ok(),
|
||||
std::env::var("POC_TRANSCRIPT_PATH").ok(),
|
||||
std::env::var("POC_AGENT_OUTPUT_DIR").ok()));
|
||||
if let Some(session) = crate::session::HookSession::from_env() {
|
||||
let transcript = session.transcript();
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"session={} transcript={} size={} exists={}",
|
||||
session.session_id, transcript.path, transcript.size, transcript.exists()));
|
||||
} else {
|
||||
log_agent_event(agent_name, format_args!("no hook session in environment"));
|
||||
}
|
||||
|
||||
let def = defs::get_def(agent_name)
|
||||
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"definition loaded steps={} tools={:?} count={:?} priority={} bail={:?}",
|
||||
def.steps.len(), def.tools, def.count, def.priority, def.bail));
|
||||
|
||||
// State dir for agent output files
|
||||
let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
|
||||
|
|
@ -392,6 +443,7 @@ pub async fn run_one_agent(
|
|||
fs::create_dir_all(&state_dir)
|
||||
.map_err(|e| format!("create state dir: {}", e))?;
|
||||
unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
|
||||
log_agent_event(agent_name, format_args!("state_dir={}", state_dir.display()));
|
||||
|
||||
// Build prompt batch — either from explicit keys or the agent's query
|
||||
let agent_batch = if let Some(keys) = keys {
|
||||
|
|
@ -411,6 +463,8 @@ pub async fn run_one_agent(
|
|||
prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys }
|
||||
} else {
|
||||
let effective_count = def.count.unwrap_or(count);
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"resolving default prompt placeholders effective_count={}", effective_count));
|
||||
defs::run_agent(&def, effective_count, &Default::default()).await?
|
||||
};
|
||||
|
||||
|
|
@ -463,6 +517,14 @@ pub async fn run_one_agent(
|
|||
})),
|
||||
});
|
||||
let n_steps = agent_batch.steps.len();
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"prompt batch ready steps={} node_keys={}",
|
||||
n_steps, agent_batch.node_keys.len()));
|
||||
for (i, step) in agent_batch.steps.iter().enumerate() {
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"prompt step {}/{} phase={} bytes={}",
|
||||
i + 1, n_steps, step.phase, step.prompt.len()));
|
||||
}
|
||||
|
||||
// Guard: reject oversized first prompt
|
||||
let max_prompt_bytes = 800_000;
|
||||
|
|
@ -485,6 +547,9 @@ pub async fn run_one_agent(
|
|||
let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
|
||||
dbglog!("[{}] {} step(s) {:?}, {}KB initial, {} nodes",
|
||||
agent_name, n_steps, phases, first_len / 1024, agent_batch.node_keys.len());
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"tools enabled: {}",
|
||||
effective_tools.iter().map(|t| t.name).collect::<Vec<_>>().join(", ")));
|
||||
|
||||
let prompts: Vec<String> = agent_batch.steps.iter()
|
||||
.map(|s| s.prompt.clone()).collect();
|
||||
|
|
@ -497,18 +562,25 @@ pub async fn run_one_agent(
|
|||
let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name));
|
||||
let state_dir_for_bail = state_dir.clone();
|
||||
let our_pid = std::process::id();
|
||||
let our_pid_file = format!("pid-{}", our_pid);
|
||||
let our_pid_file = std::env::var("POC_AGENT_PID_FILE")
|
||||
.unwrap_or_else(|_| format!("pid-{}", our_pid));
|
||||
let step_phases_for_bail = step_phases.clone();
|
||||
let bail_fn = move |step_idx: usize| -> Result<(), String> {
|
||||
if let Some(ref script) = bail_script {
|
||||
let phase = step_phases_for_bail.get(step_idx)
|
||||
.map(String::as_str).unwrap_or("");
|
||||
eprintln!(
|
||||
"[agent:bail] script={} state_dir={} pid_file={} phase={}",
|
||||
script.display(), state_dir_for_bail.display(), our_pid_file, phase);
|
||||
let status = std::process::Command::new(script)
|
||||
.arg(&our_pid_file)
|
||||
.arg(phase)
|
||||
.current_dir(&state_dir_for_bail)
|
||||
.status()
|
||||
.map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
|
||||
eprintln!(
|
||||
"[agent:bail] script={} phase={} status={}",
|
||||
script.display(), phase, status);
|
||||
if !status.success() {
|
||||
return Err(format!("bailed at step {}: {:?} exited {}",
|
||||
step_idx + 1, script.file_name().unwrap_or_default(),
|
||||
|
|
@ -521,6 +593,8 @@ pub async fn run_one_agent(
|
|||
call_api_with_tools_sync(
|
||||
agent_name, &prompts, &step_phases, def.temperature, def.priority,
|
||||
&effective_tools, Some(&bail_fn))?;
|
||||
log_agent_event(agent_name, format_args!(
|
||||
"run_one_agent completed in {:.2}s", run_start.elapsed().as_secs_f64()));
|
||||
|
||||
Ok(AgentResult {
|
||||
node_keys: agent_batch.node_keys,
|
||||
|
|
@ -598,6 +672,15 @@ pub fn spawn_agent(
|
|||
agent_name: &str,
|
||||
state_dir: &std::path::Path,
|
||||
session_id: &str,
|
||||
) -> Option<SpawnResult> {
|
||||
spawn_agent_with_transcript(agent_name, state_dir, session_id, None)
|
||||
}
|
||||
|
||||
pub fn spawn_agent_with_transcript(
|
||||
agent_name: &str,
|
||||
state_dir: &std::path::Path,
|
||||
session_id: &str,
|
||||
transcript_path: Option<&str>,
|
||||
) -> Option<SpawnResult> {
|
||||
let def = defs::get_def(agent_name)?;
|
||||
let first_phase = def.steps.first()
|
||||
|
|
@ -608,17 +691,41 @@ pub fn spawn_agent(
|
|||
.join(format!(".consciousness/logs/{}", agent_name));
|
||||
fs::create_dir_all(&log_dir).ok();
|
||||
let log_path = log_dir.join(format!("{}.log", store::compact_timestamp()));
|
||||
let agent_log = fs::File::create(&log_path)
|
||||
let mut agent_log = fs::File::create(&log_path)
|
||||
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
|
||||
|
||||
let child = std::process::Command::new("poc-memory")
|
||||
.args(["agent", "run", agent_name, "--count", "1", "--local",
|
||||
"--state-dir", &state_dir.to_string_lossy()])
|
||||
.env("POC_SESSION_ID", session_id)
|
||||
.stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
|
||||
.stderr(agent_log)
|
||||
.spawn()
|
||||
.ok()?;
|
||||
let mut cmd = std::process::Command::new("bash");
|
||||
cmd.args([
|
||||
"-lc",
|
||||
r#"
|
||||
set +e
|
||||
export POC_AGENT_PID_FILE="pid-$$"
|
||||
"$@"
|
||||
status=$?
|
||||
printf '=== agent process exit status: %s at %s ===\n' "$status" "$(date --iso-8601=seconds)"
|
||||
exit "$status"
|
||||
"#,
|
||||
"poc-memory-agent-wrapper",
|
||||
"poc-memory", "agent", "run", agent_name, "--count", "1", "--local",
|
||||
"--state-dir", &state_dir.to_string_lossy(),
|
||||
]).env("POC_SESSION_ID", session_id);
|
||||
if let Some(path) = transcript_path.filter(|p| !p.is_empty()) {
|
||||
cmd.env("POC_TRANSCRIPT_PATH", path);
|
||||
}
|
||||
|
||||
let _ = writeln!(agent_log, "=== spawn {} ===", chrono::Local::now().format("%Y-%m-%dT%H:%M:%S"));
|
||||
let _ = writeln!(agent_log, "agent={agent_name}");
|
||||
let _ = writeln!(agent_log, "state_dir={}", state_dir.display());
|
||||
let _ = writeln!(agent_log, "session_id={session_id}");
|
||||
let _ = writeln!(agent_log, "transcript_path={}", transcript_path.unwrap_or(""));
|
||||
let _ = writeln!(agent_log, "first_phase={first_phase}");
|
||||
let _ = writeln!(agent_log, "command=poc-memory agent run {agent_name} --count 1 --local --state-dir {}", state_dir.display());
|
||||
let _ = agent_log.flush();
|
||||
|
||||
let child_stdout = agent_log.try_clone()
|
||||
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
|
||||
let child_stderr = agent_log;
|
||||
let child = cmd.stdout(child_stdout).stderr(child_stderr).spawn().ok()?;
|
||||
|
||||
let pid = child.id();
|
||||
let pid_path = state_dir.join(format!("pid-{}", pid));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue