agent_cycles: spawn-gate respects phase, not just presence
The surface_observe_cycle check was gated on \`self.agent_running("surface-observe")\`,
which returns true as long as any surface-observe agent has a pid in
\`self.agents\` — regardless of which phase that agent is in. So even
after the bail script learned to allow "one in surface + one in
post-surface" concurrency, the orchestrator still refused to spawn a
second agent for the whole cycle duration.
Replace the in-memory gate with an on-disk phase check that matches
the bail script's view of the world: scan the state dir for live
\`pid-*\` files (same ones the bail script reads and keeps fresh),
and only skip spawning if some live agent is currently in the
\`surface\` phase. If the existing agent has moved on to
\`organize-search\` / \`organize-new\` / \`observe\`, the new surface
agent can start while the old one finishes its tail.
The older agent's child handle gets dropped when \`self.agents[surface-observe]\`
is overwritten on spawn — OS reaps when it exits. That's fine: the
hook is short-lived, and cross-invocation liveness is checked via
\`libc::kill(pid, 0)\` on restored pid state, not via held child handles.
Also drop the "agent is N KB behind, sleep 5s" block. That loop only
made sense when spawns were serialized — its purpose was to give a
running agent a chance to complete before the current hook returned.
With parallelism, there's nothing to wait for: either a new surface
agent just started (no point waiting on it), or the existing agent
is in the surface phase and will finish when it finishes.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
6ec15776f1
commit
719d3a4856
2 changed files with 75 additions and 275 deletions
|
|
@ -9,7 +9,7 @@ use std::fs;
|
|||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
pub use consciousness::session::HookSession;
|
||||
|
||||
|
|
@ -229,9 +229,6 @@ impl AgentCycleState {
|
|||
let state_dir = self.agent_dir("surface-observe");
|
||||
let transcript = session.transcript();
|
||||
let offset_path = state_dir.join("transcript-offset");
|
||||
let last_offset: u64 = fs::read_to_string(&offset_path).ok()
|
||||
.and_then(|s| s.trim().parse().ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
// Read surfaced keys
|
||||
let mut surfaced_keys = Vec::new();
|
||||
|
|
@ -255,10 +252,15 @@ impl AgentCycleState {
|
|||
fs::remove_file(&surface_path).ok();
|
||||
}
|
||||
|
||||
// Spawn new agent if not already running
|
||||
let running = self.agent_running("surface-observe");
|
||||
if running {
|
||||
self.log(format_args!("surface-observe already running\n"));
|
||||
// Spawn a new surface agent if no live agent is currently in the
|
||||
// "surface" phase. The bail script (bail-no-competing.sh) keeps
|
||||
// each pid file's content updated with the agent's current phase,
|
||||
// so we can read them to decide. Allowing a new "surface" agent
|
||||
// to start while an older agent finishes out its post-surface
|
||||
// phases is the whole point — surface can run at a higher cadence
|
||||
// than the full organize/observe tail.
|
||||
if surface_phase_busy(&state_dir) {
|
||||
self.log(format_args!("surface-observe already in surface phase\n"));
|
||||
} else {
|
||||
if transcript.size > 0 {
|
||||
fs::write(&offset_path, transcript.size.to_string()).ok();
|
||||
|
|
@ -270,30 +272,7 @@ impl AgentCycleState {
|
|||
}
|
||||
}
|
||||
|
||||
// Wait if agent is significantly behind
|
||||
let mut sleep_secs = None;
|
||||
let conversation_budget: u64 = 50_000;
|
||||
|
||||
if running && transcript.size > 0 {
|
||||
let behind = transcript.size.saturating_sub(last_offset);
|
||||
|
||||
if behind > conversation_budget / 2 {
|
||||
let sleep_start = Instant::now();
|
||||
self.log(format_args!("agent {}KB behind\n", behind / 1024));
|
||||
|
||||
for _ in 0..5 {
|
||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||
self.poll_children();
|
||||
if !self.agent_running("surface-observe") { break; }
|
||||
}
|
||||
|
||||
let secs = (Instant::now() - sleep_start).as_secs_f64();
|
||||
self.log(format_args!("slept {secs:.2}s\n"));
|
||||
sleep_secs = Some(secs);
|
||||
}
|
||||
}
|
||||
|
||||
(surfaced_keys, sleep_secs)
|
||||
(surfaced_keys, None)
|
||||
}
|
||||
|
||||
fn reflection_cycle(&mut self, session: &HookSession) -> Option<String> {
|
||||
|
|
@ -367,6 +346,27 @@ impl AgentCycleState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Is there a live agent in `state_dir` currently in the "surface" phase?
|
||||
/// Inspects pid-<N> files; their content is the phase string, kept fresh
|
||||
/// by bail-no-competing.sh on each step transition.
|
||||
fn surface_phase_busy(state_dir: &Path) -> bool {
|
||||
let Ok(entries) = fs::read_dir(state_dir) else { return false; };
|
||||
for entry in entries.flatten() {
|
||||
let fname = entry.file_name();
|
||||
let fname_s = fname.to_string_lossy();
|
||||
let Some(pid_str) = fname_s.strip_prefix("pid-") else { continue; };
|
||||
let Ok(pid) = pid_str.parse::<i32>() else { continue; };
|
||||
// Is the process alive?
|
||||
let alive = unsafe { libc::kill(pid, 0) } == 0;
|
||||
if !alive { continue; }
|
||||
let phase = fs::read_to_string(entry.path()).unwrap_or_default();
|
||||
if phase.trim() == "surface" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Format agent cycle output for injection into a Claude Code session.
|
||||
pub fn format_agent_output(output: &AgentCycleOutput) -> String {
|
||||
let mut out = String::new();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue