From 755a359078a453f05ada63b2b3e3c6813d6aa61c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 Apr 2026 13:30:56 -0400 Subject: [PATCH] supervisor: PID file to prevent duplicate daemon spawns Multiple supervisor instances (Mind init + channel polling) could both see no socket and start the same daemon. The socket hasn't bound yet by the time the second check runs. Write a PID file on spawn, check it in is_alive(). kill(pid, 0) verifies the process is still running. Stale PID files are cleaned up automatically. Co-Authored-By: Kent Overstreet --- src/thalamus/supervisor.rs | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/thalamus/supervisor.rs b/src/thalamus/supervisor.rs index b38f9b3..a4c53ec 100644 --- a/src/thalamus/supervisor.rs +++ b/src/thalamus/supervisor.rs @@ -111,19 +111,29 @@ impl Supervisor { } } - /// Check if a daemon is alive by testing its socket. + /// Check if a daemon is alive by testing its socket or PID file. fn is_alive(name: &str) -> bool { + // Check socket first let sock = channels_dir().join(format!("{}.sock", name)); - if !sock.exists() { - return false; - } - match std::os::unix::net::UnixStream::connect(&sock) { - Ok(_) => true, - Err(_) => { - let _ = std::fs::remove_file(&sock); - false + if sock.exists() { + match std::os::unix::net::UnixStream::connect(&sock) { + Ok(_) => return true, + Err(_) => { let _ = std::fs::remove_file(&sock); } } } + // Check PID file — daemon may still be starting up + let pid_file = channels_dir().join(format!("{}.pid", name)); + if let Ok(pid_str) = std::fs::read_to_string(&pid_file) { + if let Ok(pid) = pid_str.trim().parse::() { + // kill(pid, 0) checks if process exists + if unsafe { libc::kill(pid, 0) } == 0 { + return true; + } + // Process dead, clean up stale PID file + let _ = std::fs::remove_file(&pid_file); + } + } + false } /// Ensure all configured autostart daemons are running. @@ -173,6 +183,8 @@ impl Supervisor { { Ok(child) => { info!("channel {} started (pid {})", name, child.id()); + let pid_file = channels_dir().join(format!("{}.pid", name)); + let _ = std::fs::write(&pid_file, child.id().to_string()); self.children.insert(name.to_string(), child); } Err(e) => error!("failed to start channel {}: {}", name, e),