agents: phase tracking, pid files, pipelining, unified cycle
- AgentStep with phase labels (=== PROMPT phase:name ===)
- PID files in state dir (pid-{PID} with JSON phase/timestamp)
- Built-in bail check: between steps, bail if other pid files exist
- surface_observe_cycle replaces surface_agent_cycle + journal_agent_cycle
- Reads surface output from state dir instead of parsing stdout
- Pipelining: starts new agent if running one is past surface phase
- link_set upserts (creates link if missing)
- Better error message for context window overflow
Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
11289667f5
commit
e20aeeeabe
8 changed files with 256 additions and 178 deletions
|
|
@ -80,16 +80,19 @@ pub fn run_one_agent_with_keys(
|
|||
|
||||
log(&format!("targeting: {}", keys.join(", ")));
|
||||
let graph = store.build_graph();
|
||||
let mut resolved_prompts = Vec::new();
|
||||
let mut resolved_steps = Vec::new();
|
||||
let mut all_keys: Vec<String> = keys.to_vec();
|
||||
for prompt_template in &def.prompts {
|
||||
for step in &def.steps {
|
||||
let (prompt, extra_keys) = super::defs::resolve_placeholders(
|
||||
prompt_template, store, &graph, keys, count,
|
||||
&step.prompt, store, &graph, keys, count,
|
||||
);
|
||||
all_keys.extend(extra_keys);
|
||||
resolved_prompts.push(prompt);
|
||||
resolved_steps.push(super::prompts::ResolvedStep {
|
||||
prompt,
|
||||
phase: step.phase.clone(),
|
||||
});
|
||||
}
|
||||
let agent_batch = super::prompts::AgentBatch { prompts: resolved_prompts, node_keys: all_keys };
|
||||
let agent_batch = super::prompts::AgentBatch { steps: resolved_steps, node_keys: all_keys };
|
||||
|
||||
// Record visits eagerly so concurrent agents pick different seeds
|
||||
if !agent_batch.node_keys.is_empty() {
|
||||
|
|
@ -138,7 +141,7 @@ fn run_one_agent_inner(
|
|||
) -> Result<AgentResult, String> {
|
||||
let tools_desc = if def.tools.is_empty() { "no tools".into() }
|
||||
else { format!("{} tools", def.tools.len()) };
|
||||
let n_steps = agent_batch.prompts.len();
|
||||
let n_steps = agent_batch.steps.len();
|
||||
|
||||
for key in &agent_batch.node_keys {
|
||||
log(&format!(" node: {}", key));
|
||||
|
|
@ -146,7 +149,7 @@ fn run_one_agent_inner(
|
|||
|
||||
// Guard: reject oversized first prompt (later steps grow via conversation)
|
||||
let max_prompt_bytes = 800_000;
|
||||
let first_len = agent_batch.prompts[0].len();
|
||||
let first_len = agent_batch.steps[0].prompt.len();
|
||||
if first_len > max_prompt_bytes {
|
||||
let prompt_kb = first_len / 1024;
|
||||
let oversize_dir = store::memory_dir().join("llm-logs").join("oversized");
|
||||
|
|
@ -155,7 +158,7 @@ fn run_one_agent_inner(
|
|||
agent_name, store::compact_timestamp()));
|
||||
let header = format!("=== OVERSIZED PROMPT ===\nagent: {}\nsize: {}KB (max {}KB)\nnodes: {:?}\n\n",
|
||||
agent_name, prompt_kb, max_prompt_bytes / 1024, agent_batch.node_keys);
|
||||
fs::write(&oversize_path, format!("{}{}", header, &agent_batch.prompts[0])).ok();
|
||||
fs::write(&oversize_path, format!("{}{}", header, &agent_batch.steps[0].prompt)).ok();
|
||||
log(&format!("oversized prompt logged to {}", oversize_path.display()));
|
||||
return Err(format!(
|
||||
"prompt too large: {}KB (max {}KB) — seed nodes may be oversized",
|
||||
|
|
@ -163,7 +166,7 @@ fn run_one_agent_inner(
|
|||
));
|
||||
}
|
||||
|
||||
// Output directory — use --state-dir if set, otherwise flat per-agent
|
||||
// Output/state directory — use --state-dir if set, otherwise flat per-agent
|
||||
let output_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
|
||||
.map(std::path::PathBuf::from)
|
||||
.unwrap_or_else(|_| store::memory_dir().join("agent-output").join(agent_name));
|
||||
|
|
@ -171,16 +174,70 @@ fn run_one_agent_inner(
|
|||
// Safe: agent runs single-threaded, env var read only by our dispatch code
|
||||
unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &output_dir); }
|
||||
|
||||
log(&format!("{} step(s), {}KB initial, model={}, {}, {} nodes, output={}",
|
||||
n_steps, first_len / 1024, def.model, tools_desc,
|
||||
// Write PID file with initial phase
|
||||
let pid = std::process::id();
|
||||
let pid_path = output_dir.join(format!("pid-{}", pid));
|
||||
let write_pid = |phase: &str| {
|
||||
let json = format!("{{\"phase\":\"{}\",\"started\":{}}}", phase,
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap().as_secs());
|
||||
fs::write(&pid_path, &json).ok();
|
||||
};
|
||||
write_pid(&agent_batch.steps[0].phase);
|
||||
|
||||
let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
|
||||
log(&format!("{} step(s) {:?}, {}KB initial, model={}, {}, {} nodes, output={}",
|
||||
n_steps, phases, first_len / 1024, def.model, tools_desc,
|
||||
agent_batch.node_keys.len(), output_dir.display()));
|
||||
|
||||
for (i, p) in agent_batch.prompts.iter().enumerate() {
|
||||
log(&format!("=== PROMPT {}/{} ===\n\n{}", i + 1, n_steps, p));
|
||||
let prompts: Vec<String> = agent_batch.steps.iter()
|
||||
.map(|s| s.prompt.clone()).collect();
|
||||
let step_phases: Vec<String> = agent_batch.steps.iter()
|
||||
.map(|s| s.phase.clone()).collect();
|
||||
|
||||
for (i, s) in agent_batch.steps.iter().enumerate() {
|
||||
log(&format!("=== PROMPT {}/{} ({}) ===\n\n{}", i + 1, n_steps, s.phase, s.prompt));
|
||||
}
|
||||
log("\n=== CALLING LLM ===");
|
||||
|
||||
let output = llm::call_for_def_multi(def, &agent_batch.prompts, log)?;
|
||||
// Bail check: between steps, check for other pid files in the state dir.
|
||||
// If another agent has started, bail — let it have the resources.
|
||||
let output_dir_clone = output_dir.clone();
|
||||
let bail_fn = move |step_idx: usize| -> Result<(), String> {
|
||||
if step_idx < step_phases.len() {
|
||||
write_pid(&step_phases[step_idx]);
|
||||
}
|
||||
// After step 0 (surface), check for competing agents
|
||||
if step_idx > 0 {
|
||||
if let Ok(entries) = fs::read_dir(&output_dir_clone) {
|
||||
for entry in entries.flatten() {
|
||||
let name = entry.file_name();
|
||||
let name_str = name.to_string_lossy();
|
||||
if !name_str.starts_with("pid-") { continue; }
|
||||
let other_pid: u32 = name_str.strip_prefix("pid-")
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
if other_pid == pid || other_pid == 0 { continue; }
|
||||
// Check if the other process is alive
|
||||
if unsafe { libc::kill(other_pid as i32, 0) } == 0 {
|
||||
log(&format!("bail: another agent running (pid {})", other_pid));
|
||||
return Err(format!("bailed at step {} — competing agent pid {}",
|
||||
step_idx + 1, other_pid));
|
||||
} else {
|
||||
// Dead process — clean up stale pid file
|
||||
fs::remove_file(entry.path()).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
let output = llm::call_for_def_multi(def, &prompts, Some(&bail_fn), log)?;
|
||||
|
||||
// Clean up PID file
|
||||
fs::remove_file(&pid_path).ok();
|
||||
|
||||
Ok(AgentResult {
|
||||
output,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue