diff --git a/src/agent/oneshot.rs b/src/agent/oneshot.rs index 2fce906..0f04e4d 100644 --- a/src/agent/oneshot.rs +++ b/src/agent/oneshot.rs @@ -497,15 +497,20 @@ pub async fn run_one_agent( .map(|s| s.phase.clone()).collect(); // Bail check: if the agent defines a bail script, run it between steps. + // The script also refreshes our pid-file with the current phase — that's + // how concurrent agents know which phase each of us is in. let bail_script = def.bail.as_ref().map(|name| defs::agents_dir().join(name)); let state_dir_for_bail = state_dir.clone(); - // Find our own pid file so we can pass it to the bail script let our_pid = std::process::id(); let our_pid_file = format!("pid-{}", our_pid); + let step_phases_for_bail = step_phases.clone(); let bail_fn = move |step_idx: usize| -> Result<(), String> { if let Some(ref script) = bail_script { + let phase = step_phases_for_bail.get(step_idx) + .map(String::as_str).unwrap_or(""); let status = std::process::Command::new(script) .arg(&our_pid_file) + .arg(phase) .current_dir(&state_dir_for_bail) .status() .map_err(|e| format!("bail script {:?} failed: {}", script, e))?; diff --git a/src/subconscious/agents/bail-no-competing.sh b/src/subconscious/agents/bail-no-competing.sh index 43c3096..95b8219 100755 --- a/src/subconscious/agents/bail-no-competing.sh +++ b/src/subconscious/agents/bail-no-competing.sh @@ -1,21 +1,49 @@ #!/bin/bash -# Bail if other agents are alive in the state dir. -# $1 = this agent's pid file name (e.g. pid-12345) -# cwd = state dir +# Bail if another agent is in the same phase-group as us. # -# Exit 0 = continue, exit 1 = bail +# $1 = our pid file name (e.g. "pid-12345") +# $2 = the phase we're about to enter (e.g. "surface", "observe") +# cwd = state dir +# +# Also refreshes our own pid file with the current phase on each call, +# so concurrent agents can read each other's phase by cat'ing the pid +# files in the state dir. +# +# Phase groups: "surface" vs everything else ("post-surface"). We allow +# at most one agent per group to be alive at a time — so surface can run +# at a higher frequency than the slower organize/observe tail. +# +# Exit 0 = continue, exit 1 = bail (another agent in our group is alive). shopt -s nullglob my_pid_file="$1" +my_phase="$2" + +# Refresh our own pid file with the current phase. +printf '%s' "$my_phase" > "$my_pid_file" + +group_of() { + if [[ "$1" == "surface" ]]; then + echo "surface" + else + echo "post-surface" + fi +} + +my_group=$(group_of "$my_phase") for f in pid-*; do - [[ $f == $my_pid_file ]] && continue + [[ "$f" == "$my_pid_file" ]] && continue pid="${f#pid-}" - if kill -0 "$pid" 2>/dev/null; then - exit 1 # competing agent is alive - else - rm -f "$f" # stale pid file, clean up + if ! kill -0 "$pid" 2>/dev/null; then + rm -f "$f" # stale pid file, clean up + continue + fi + other_phase=$(cat "$f" 2>/dev/null) + other_group=$(group_of "$other_phase") + if [[ "$my_group" == "$other_group" ]]; then + exit 1 fi done