agents: bail script support, pid file simplification, cleanup
- Bail command moved from hardcoded closure to external script
specified in agent JSON header ("bail": "bail-no-competing.sh")
- Runner executes script between steps with pid file path as $1,
cwd = state dir. Non-zero exit stops the pipeline.
- PID files simplified to just the phase name (no JSON) for easy
bash inspection (cat pid-*)
- scan_pid_files helper deduplicates pid scanning logic
- Timeout check uses file mtime instead of embedded timestamp
- PID file cleaned up on bail/error (not just success)
- output() tool validates key names (rejects pid-*, /, ..)
- Agent log files append instead of truncate
- Fixed orphaned derive and doc comment on AgentStep/AgentDef
- Phase written after bail check passes, not before
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
e20aeeeabe
commit
52703b4637
5 changed files with 135 additions and 85 deletions
|
|
@ -174,15 +174,11 @@ fn run_one_agent_inner(
|
|||
// Safe: agent runs single-threaded, env var read only by our dispatch code
|
||||
unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &output_dir); }
|
||||
|
||||
// Write PID file with initial phase
|
||||
// Write PID file — content is just the phase name
|
||||
let pid = std::process::id();
|
||||
let pid_path = output_dir.join(format!("pid-{}", pid));
|
||||
let write_pid = |phase: &str| {
|
||||
let json = format!("{{\"phase\":\"{}\",\"started\":{}}}", phase,
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap().as_secs());
|
||||
fs::write(&pid_path, &json).ok();
|
||||
fs::write(&pid_path, phase).ok();
|
||||
};
|
||||
write_pid(&agent_batch.steps[0].phase);
|
||||
|
||||
|
|
@ -201,42 +197,44 @@ fn run_one_agent_inner(
|
|||
}
|
||||
log("\n=== CALLING LLM ===");
|
||||
|
||||
// Bail check: between steps, check for other pid files in the state dir.
|
||||
// If another agent has started, bail — let it have the resources.
|
||||
let output_dir_clone = output_dir.clone();
|
||||
// Bail check: if the agent defines a bail script, run it between steps.
|
||||
// The script receives the pid file path as $1, cwd = state dir.
|
||||
let bail_script = def.bail.as_ref().map(|name| {
|
||||
// Look for the script next to the .agent file
|
||||
let agents_dir = super::defs::agents_dir();
|
||||
agents_dir.join(name)
|
||||
});
|
||||
let output_dir_for_bail = output_dir.clone();
|
||||
let pid_path_for_bail = pid_path.clone();
|
||||
let bail_fn = move |step_idx: usize| -> Result<(), String> {
|
||||
// Update phase
|
||||
if step_idx < step_phases.len() {
|
||||
write_pid(&step_phases[step_idx]);
|
||||
}
|
||||
// After step 0 (surface), check for competing agents
|
||||
if step_idx > 0 {
|
||||
if let Ok(entries) = fs::read_dir(&output_dir_clone) {
|
||||
for entry in entries.flatten() {
|
||||
let name = entry.file_name();
|
||||
let name_str = name.to_string_lossy();
|
||||
if !name_str.starts_with("pid-") { continue; }
|
||||
let other_pid: u32 = name_str.strip_prefix("pid-")
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
if other_pid == pid || other_pid == 0 { continue; }
|
||||
// Check if the other process is alive
|
||||
if unsafe { libc::kill(other_pid as i32, 0) } == 0 {
|
||||
log(&format!("bail: another agent running (pid {})", other_pid));
|
||||
return Err(format!("bailed at step {} — competing agent pid {}",
|
||||
step_idx + 1, other_pid));
|
||||
} else {
|
||||
// Dead process — clean up stale pid file
|
||||
fs::remove_file(entry.path()).ok();
|
||||
}
|
||||
}
|
||||
// Run bail script if defined
|
||||
if let Some(ref script) = bail_script {
|
||||
let status = std::process::Command::new(script)
|
||||
.arg(&pid_path_for_bail)
|
||||
.current_dir(&output_dir_for_bail)
|
||||
.status()
|
||||
.map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
|
||||
if !status.success() {
|
||||
return Err(format!("bailed at step {}: {:?} exited {}",
|
||||
step_idx + 1, script.file_name().unwrap_or_default(),
|
||||
status.code().unwrap_or(-1)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
let output = llm::call_for_def_multi(def, &prompts, Some(&bail_fn), log)?;
|
||||
let output = match llm::call_for_def_multi(def, &prompts, Some(&bail_fn), log) {
|
||||
Ok(output) => output,
|
||||
Err(e) => {
|
||||
fs::remove_file(&pid_path).ok();
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
// Clean up PID file
|
||||
fs::remove_file(&pid_path).ok();
|
||||
|
||||
Ok(AgentResult {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue