cleanup: kill dead code, fix signal handler safety
- Remove unused now_secs(), parse_json_response, any_alive, Regex import - Signal handler: replace Mutex with AtomicPtr<c_char> for signal safety (Mutex::lock in a signal handler can deadlock if main thread holds it) - PidGuard Drop reclaims the leaked CString; signal handler just unlinks - scan_pid_files moved to knowledge.rs as pub helper - setup_agent_state calls scan_pid_files to clean stale pids on startup Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
52703b4637
commit
5d803441c9
5 changed files with 223 additions and 215 deletions
|
|
@ -10,11 +10,8 @@ use std::fs::File;
|
|||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs()
|
||||
}
|
||||
|
||||
/// Max bytes per context chunk (hook output limit is ~10K chars)
|
||||
const CHUNK_SIZE: usize = 9000;
|
||||
|
|
@ -129,46 +126,6 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str, seen: &mut HashSet<String>
|
|||
}
|
||||
}
|
||||
|
||||
/// Check for live agent processes in a state dir. Returns (phase, pid) pairs.
|
||||
/// Cleans up stale pid files and kills timed-out processes.
|
||||
fn scan_pid_files(state_dir: &Path, timeout_secs: u64, self_pid: u32) -> Vec<(String, u32)> {
|
||||
let mut live = Vec::new();
|
||||
let Ok(entries) = fs::read_dir(state_dir) else { return live };
|
||||
for entry in entries.flatten() {
|
||||
let name = entry.file_name();
|
||||
let name_str = name.to_string_lossy();
|
||||
if !name_str.starts_with("pid-") { continue; }
|
||||
let pid: u32 = name_str.strip_prefix("pid-")
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
if pid == 0 || pid == self_pid { continue; }
|
||||
|
||||
if unsafe { libc::kill(pid as i32, 0) } != 0 {
|
||||
fs::remove_file(entry.path()).ok();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Timeout via mtime
|
||||
if timeout_secs > 0 {
|
||||
if let Ok(meta) = entry.metadata() {
|
||||
if let Ok(modified) = meta.modified() {
|
||||
if modified.elapsed().unwrap_or_default().as_secs() > timeout_secs {
|
||||
unsafe { libc::kill(pid as i32, libc::SIGTERM); }
|
||||
fs::remove_file(entry.path()).ok();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let phase = fs::read_to_string(entry.path())
|
||||
.unwrap_or_default()
|
||||
.trim().to_string();
|
||||
live.push((phase, pid));
|
||||
}
|
||||
live
|
||||
}
|
||||
|
||||
/// Unified agent cycle — runs surface-observe agent with state dir.
|
||||
/// Reads output files for surface results, spawns new agent when ready.
|
||||
///
|
||||
|
|
@ -184,12 +141,12 @@ fn surface_observe_cycle(session: &Session, out: &mut String, log_f: &mut File)
|
|||
.surface_timeout_secs
|
||||
.unwrap_or(300) as u64;
|
||||
|
||||
let live = scan_pid_files(&state_dir, timeout, 0);
|
||||
let live = crate::agents::knowledge::scan_pid_files(&state_dir, timeout);
|
||||
for (phase, pid) in &live {
|
||||
let _ = writeln!(log_f, "alive pid-{}: phase={}", pid, phase);
|
||||
}
|
||||
let any_in_surface = live.iter().any(|(p, _)| p == "surface" || p == "step-0");
|
||||
let any_alive = !live.is_empty();
|
||||
|
||||
|
||||
// Read surface output and inject into context
|
||||
let surface_path = state_dir.join("surface");
|
||||
|
|
@ -224,47 +181,12 @@ fn surface_observe_cycle(session: &Session, out: &mut String, log_f: &mut File)
|
|||
// - nothing running, OR
|
||||
// - something running but past surface phase (pipelining)
|
||||
if any_in_surface {
|
||||
let _ = writeln!(log_f, "agent in surface phase, waiting");
|
||||
return;
|
||||
let _ = writeln!(log_f, "agent in surface phase (have {:?}), waiting", live);
|
||||
} else {
|
||||
let pid = crate::agents::knowledge::spawn_agent(
|
||||
"surface-observe", &state_dir, &session.session_id);
|
||||
let _ = writeln!(log_f, "spawned agent {:?}, have {:?}", pid, live);
|
||||
}
|
||||
|
||||
if any_alive {
|
||||
let _ = writeln!(log_f, "agent past surface, starting new (pipeline)");
|
||||
}
|
||||
|
||||
if let Some(pid) = spawn_agent("surface-observe", &state_dir, &session.session_id) {
|
||||
let _ = writeln!(log_f, "spawned pid {}", pid);
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn an agent asynchronously. Reads the .agent file to get the first
|
||||
/// phase name, spawns the process, writes the pid file, and returns.
|
||||
fn spawn_agent(agent_name: &str, state_dir: &Path, session_id: &str) -> Option<u32> {
|
||||
// Read first phase from agent definition
|
||||
let first_phase = crate::agents::defs::get_def(agent_name)
|
||||
.and_then(|d| d.steps.first().map(|s| s.phase.clone()))
|
||||
.unwrap_or_else(|| "step-0".into());
|
||||
|
||||
let log_dir = crate::store::memory_dir().join("logs");
|
||||
fs::create_dir_all(&log_dir).ok();
|
||||
let agent_log = fs::OpenOptions::new()
|
||||
.create(true).append(true)
|
||||
.open(log_dir.join(format!("{}.log", agent_name)))
|
||||
.unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
|
||||
|
||||
let child = Command::new("poc-memory")
|
||||
.args(["agent", "run", agent_name, "--count", "1", "--local",
|
||||
"--state-dir", &state_dir.to_string_lossy()])
|
||||
.env("POC_SESSION_ID", session_id)
|
||||
.stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
|
||||
.stderr(agent_log)
|
||||
.spawn()
|
||||
.ok()?;
|
||||
|
||||
let pid = child.id();
|
||||
let pid_path = state_dir.join(format!("pid-{}", pid));
|
||||
fs::write(&pid_path, &first_phase).ok();
|
||||
Some(pid)
|
||||
}
|
||||
|
||||
fn cleanup_stale_files(dir: &Path, max_age: Duration) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue