supervisor: PID file to prevent duplicate daemon spawns

Multiple supervisor instances (Mind init + channel polling) could
both see no socket and start the same daemon. The socket hasn't
bound yet by the time the second check runs.

Write a PID file on spawn, check it in is_alive(). kill(pid, 0)
verifies the process is still running. Stale PID files are cleaned
up automatically.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2026-04-05 13:30:56 -04:00
parent 58737a2cef
commit 755a359078

View file

@ -111,19 +111,29 @@ impl Supervisor {
}
}
/// Check if a daemon is alive by testing its socket.
/// Check if a daemon is alive by testing its socket or PID file.
fn is_alive(name: &str) -> bool {
// Check socket first
let sock = channels_dir().join(format!("{}.sock", name));
if !sock.exists() {
return false;
}
match std::os::unix::net::UnixStream::connect(&sock) {
Ok(_) => true,
Err(_) => {
let _ = std::fs::remove_file(&sock);
false
if sock.exists() {
match std::os::unix::net::UnixStream::connect(&sock) {
Ok(_) => return true,
Err(_) => { let _ = std::fs::remove_file(&sock); }
}
}
// Check PID file — daemon may still be starting up
let pid_file = channels_dir().join(format!("{}.pid", name));
if let Ok(pid_str) = std::fs::read_to_string(&pid_file) {
if let Ok(pid) = pid_str.trim().parse::<i32>() {
// kill(pid, 0) checks if process exists
if unsafe { libc::kill(pid, 0) } == 0 {
return true;
}
// Process dead, clean up stale PID file
let _ = std::fs::remove_file(&pid_file);
}
}
false
}
/// Ensure all configured autostart daemons are running.
@ -173,6 +183,8 @@ impl Supervisor {
{
Ok(child) => {
info!("channel {} started (pid {})", name, child.id());
let pid_file = channels_dir().join(format!("{}.pid", name));
let _ = std::fs::write(&pid_file, child.id().to_string());
self.children.insert(name.to_string(), child);
}
Err(e) => error!("failed to start channel {}: {}", name, e),