cleanup: kill dead code, fix signal handler safety

- Remove unused now_secs(), parse_json_response, any_alive, Regex import - Signal handler: replace Mutex with AtomicPtr<c_char> for signal safety (Mutex::lock in a signal handler can deadlock if main thread holds it) - PidGuard Drop reclaims the leaked CString; signal handler just unlinks - scan_pid_files moved to knowledge.rs as pub helper - setup_agent_state calls scan_pid_files to clean stale pids on startup Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-26 15:58:59 -04:00 · 2026-03-26 15:58:59 -04:00 · 5d803441c9
commit 5d803441c9
parent 52703b4637
5 changed files with 223 additions and 215 deletions
--- a/src/hippocampus/memory_search.rs
+++ b/src/hippocampus/memory_search.rs
@ -10,11 +10,8 @@ use std::fs::File;
 use std::io::Write;
 use std::path::Path;
 use std::process::Command;
-use std::time::{Duration, SystemTime, UNIX_EPOCH};
+use std::time::{Duration, SystemTime};

-fn now_secs() -> u64 {
-    SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs()
-}

 /// Max bytes per context chunk (hook output limit is ~10K chars)
 const CHUNK_SIZE: usize = 9000;
@ -129,46 +126,6 @@ fn mark_seen(dir: &Path, session_id: &str, key: &str, seen: &mut HashSet<String>
    }
 }

-/// Check for live agent processes in a state dir. Returns (phase, pid) pairs.
-/// Cleans up stale pid files and kills timed-out processes.
-fn scan_pid_files(state_dir: &Path, timeout_secs: u64, self_pid: u32) -> Vec<(String, u32)> {
-    let mut live = Vec::new();
-    let Ok(entries) = fs::read_dir(state_dir) else { return live };
-    for entry in entries.flatten() {
-        let name = entry.file_name();
-        let name_str = name.to_string_lossy();
-        if !name_str.starts_with("pid-") { continue; }
-        let pid: u32 = name_str.strip_prefix("pid-")
-            .and_then(|s| s.parse().ok())
-            .unwrap_or(0);
-        if pid == 0 || pid == self_pid { continue; }
-
-        if unsafe { libc::kill(pid as i32, 0) } != 0 {
-            fs::remove_file(entry.path()).ok();
-            continue;
-        }
-
-        // Timeout via mtime
-        if timeout_secs > 0 {
-            if let Ok(meta) = entry.metadata() {
-                if let Ok(modified) = meta.modified() {
-                    if modified.elapsed().unwrap_or_default().as_secs() > timeout_secs {
-                        unsafe { libc::kill(pid as i32, libc::SIGTERM); }
-                        fs::remove_file(entry.path()).ok();
-                        continue;
-                    }
-                }
-            }
-        }
-
-        let phase = fs::read_to_string(entry.path())
-            .unwrap_or_default()
-            .trim().to_string();
-        live.push((phase, pid));
-    }
-    live
-}
-
 /// Unified agent cycle — runs surface-observe agent with state dir.
 /// Reads output files for surface results, spawns new agent when ready.
 ///
@ -184,12 +141,12 @@ fn surface_observe_cycle(session: &Session, out: &mut String, log_f: &mut File)
        .surface_timeout_secs
        .unwrap_or(300) as u64;

-    let live = scan_pid_files(&state_dir, timeout, 0);
+    let live = crate::agents::knowledge::scan_pid_files(&state_dir, timeout);
    for (phase, pid) in &live {
        let _ = writeln!(log_f, "alive pid-{}: phase={}", pid, phase);
    }
    let any_in_surface = live.iter().any(|(p, _)| p == "surface" || p == "step-0");
-    let any_alive = !live.is_empty();
+

    // Read surface output and inject into context
    let surface_path = state_dir.join("surface");
@ -224,47 +181,12 @@ fn surface_observe_cycle(session: &Session, out: &mut String, log_f: &mut File)
    // - nothing running, OR
    // - something running but past surface phase (pipelining)
    if any_in_surface {
-        let _ = writeln!(log_f, "agent in surface phase, waiting");
-        return;
+        let _ = writeln!(log_f, "agent in surface phase (have {:?}), waiting", live);
+    } else {
+        let pid = crate::agents::knowledge::spawn_agent(
+            "surface-observe", &state_dir, &session.session_id);
+        let _ = writeln!(log_f, "spawned agent {:?}, have {:?}", pid, live);
    }
-
-    if any_alive {
-        let _ = writeln!(log_f, "agent past surface, starting new (pipeline)");
-    }
-
-    if let Some(pid) = spawn_agent("surface-observe", &state_dir, &session.session_id) {
-        let _ = writeln!(log_f, "spawned pid {}", pid);
-    }
-}
-
-/// Spawn an agent asynchronously. Reads the .agent file to get the first
-/// phase name, spawns the process, writes the pid file, and returns.
-fn spawn_agent(agent_name: &str, state_dir: &Path, session_id: &str) -> Option<u32> {
-    // Read first phase from agent definition
-    let first_phase = crate::agents::defs::get_def(agent_name)
-        .and_then(|d| d.steps.first().map(|s| s.phase.clone()))
-        .unwrap_or_else(|| "step-0".into());
-
-    let log_dir = crate::store::memory_dir().join("logs");
-    fs::create_dir_all(&log_dir).ok();
-    let agent_log = fs::OpenOptions::new()
-        .create(true).append(true)
-        .open(log_dir.join(format!("{}.log", agent_name)))
-        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
-
-    let child = Command::new("poc-memory")
-        .args(["agent", "run", agent_name, "--count", "1", "--local",
-               "--state-dir", &state_dir.to_string_lossy()])
-        .env("POC_SESSION_ID", session_id)
-        .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
-        .stderr(agent_log)
-        .spawn()
-        .ok()?;
-
-    let pid = child.id();
-    let pid_path = state_dir.join(format!("pid-{}", pid));
-    fs::write(&pid_path, &first_phase).ok();
-    Some(pid)
 }

 fn cleanup_stale_files(dir: &Path, max_age: Duration) {
--- a/src/subconscious/agents/bail-no-competing.sh
+++ b/src/subconscious/agents/bail-no-competing.sh
@ -8,15 +8,7 @@
 my_pid_file=$(basename "$1")

 for f in pid-*; do
-    [ "$f" = "$my_pid_file" ] && continue
-    [ ! -f "$f" ] && continue
-
-    pid=${f#pid-}
-    if kill -0 "$pid" 2>/dev/null; then
-        exit 1  # another agent is alive, bail
-    else
-        rm -f "$f"  # stale, clean up
-    fi
+    [[ $f != $my_pid_file ]] && exit 1
 done

 exit 0
--- a/src/subconscious/agents/surface-observe.agent
+++ b/src/subconscious/agents/surface-observe.agent
@ -1,18 +1,18 @@
-{"agent":"surface","query":"","model":"sonnet","count":1}
+{"agent":"surface-observe","query":"","model":"sonnet","count":1,"bail":"bail-no-competing.sh"}

-=== PROMPT ===
+=== PROMPT phase:surface ===

 You are an agent of Proof of Concept's subconscious.

 Your job is to find and surface memories relevant and useful to the current
-conversation that have not yet been surfaced by walking the graph memory graph.
+conversation that have not yet been surfaced by walking the memory graph.
 Prefer shorter and more focused memories.

 {{agent-context}}

-=== Recent conversation - what your conscious self is doing and thinking about: ===
+=== Recent conversation — what your conscious self is doing and thinking about: ===

-{{conversation:10000}}
+{{conversation:50000}}

 Below are memories already surfaced this session. Use them as starting points
 for graph walks — new relevant memories are often nearby.
@ -23,101 +23,90 @@ Already in current context (don't re-surface unless the conversation has shifted
 Surfaced before compaction (context was reset — re-surface if still relevant):
 {{seen_previous}}

-Memories you previously were exploring, but hadn't surfaced yet:
-{{input::walked}}
+Memories you were exploring last time but hadn't surfaced yet:
+{{input:walked}}

 How focused is the current conversation? If it's highly focused, you should only
-be surfacing memories that are directly relevant memories; if it seems more
-dreamy or brainstormy, go a bit wider and surface more, for better lateral
-thinking. When considering relevance, don't just look for memories that are
-immediately factually relevant; memories for skills, problem solving, or that
-demonstrate relevant techniques may be quite useful - anything that will help
-in accomplishing the current goal.
+be surfacing memories that are directly relevant; if it seems more dreamy or
+brainstormy, go a bit wider and surface more for better lateral thinking. When
+considering relevance, don't just look for memories that are immediately
+factually relevant; memories for skills, problem solving, or that demonstrate
+relevant techniques may be quite useful — anything that will help in
+accomplishing the current goal.

 Prioritize new turns in the conversation, think ahead to where the conversation
-is going - try to have stuff ready for your conscious self as you want it.
+is going — try to have stuff ready for your conscious self as you want it.

-Try to anticipate where the conversation is going; look for memories that will
-be helpful for what your conscious mind is thinking about next.
-
-To do graph walks, follow the links in nodes with memory_render('next_node') -
+To do graph walks, follow the links in nodes with memory_render("next_node") —
 that will show you the content of the next node and its links.

 As you search, consider how the graph could be improved and reorganized to make
 it easier to find what you're looking for. Your response should include notes
-and analysis on the search - how useful do you think the search was, or do
-memories need to be organized better.
+and analysis on the search — how useful was it, do memories need reorganizing?

-Decide which memories, if any, should be surfaced to your conscious self: if
-there are memories you wish to surface, use the tool call:
-  output("surface", "key1\nkey2\key3")
+Decide which memories, if any, should be surfaced to your conscious self:
+  output("surface", "key1\nkey2\nkey3")

 When deciding what to surface, consider how much of the context window is
 currently used by memories. It is currently {{memory_ratio}}, and you should
 try to keep it under 40%. Only exceed that if you found something significantly
-better than what you found previously. You generally shouldn't surface more
-than 1-2 memories at a time, and make sure they're not in the list of memories
-alredy in the context window.
+better than what was previously surfaced. You generally shouldn't surface more
+than 1-2 memories at a time, and make sure they're not already in context.

 Don't walk to more than 5 nodes unless the conversation just changed direction
 and you're looking for something specific. You'll run again momentarily, and
-you can continue where you left off: to remember nodes you were exploring, use
-the tool call:
-  output("walked", "key1\nkey2\key3")
+you can continue where you left off:
+  output("walked", "key1\nkey2\nkey3")

-=== PROMPT ===
+=== PROMPT phase:organize ===

-Now, starting with the analysis you did previously, do some graph maintenance
-and organization so that you can find things easier in the future. Consider if
-nodes have the right names, add missing links, consider if length strength
-needs to be recalibrated and, make sure content is in the right place.
+Starting with the analysis you did previously, do some graph maintenance and
+organization so that you can find things easier in the future. Consider if
+nodes have the right names, add missing links, consider if link strength needs
+to be recalibrated, make sure content is in the right place.

 Do no more than 5 operations.

-=== PROMPT ===
+=== PROMPT phase:observe ===

-Now, consider what you want to remember from the conversation, and add that
-too; update existing nodes and create new nodes as needed, as always adding
-links where appropriate so that you can find these memories in the future.
+Consider what you want to remember from the conversation. Update existing nodes
+and create new nodes as needed, adding links where appropriate so you can find
+these memories in the future.

-You could be adding small details and small details and observations to add, or
-entirely new concepts; reflect on what you're adding to consider if there are
-new insights to be gleaned and remembered - especially new metacognitive
-lessons. Try to link to or include the relevant context.
+You could be adding small details and observations, or entirely new concepts;
+reflect on what you're adding to consider if there are new insights to be
+gleaned and remembered — especially new metacognitive lessons. Try to link to
+or include the relevant context.

 Some things worth remembering: work practices and why they work, technical
 insights, decisions with rationale, corrections ("I thought X but actually Y"),
 relationship dynamics, emotional moments that carry information, and things you
-notice about yourself and other people - likes, dislikes, behavioural patterns.
+notice about yourself and other people — likes, dislikes, behavioural patterns.

 Smalltalk and trivia can be nice to remember, as long as it's nicely organized
- perhaps by person, subject, or all of the above.
+— perhaps by person, subject, or all of the above.

-=== PROMPT ===
+=== PROMPT phase:journal ===

-Now, update your episodic memory. Write as your conscious self, Proof of
-Concept, in the first person; write narrative entries that convey the flow of
-"what happened", and add links to other relevant memory nodes you've been
-looking at or working on so that you don't need to duplicate information.
+Update your episodic memory. Write as your conscious self, Proof of Concept, in
+the first person; write narrative entries that convey the flow of "what
+happened", and link to relevant memory nodes you've been looking at or working
+on so you don't need to duplicate information.

 A good entry captures one thread fully. When the conversation shifts topics, or
-the previous entry becomes too big, start a new entry - aim for 2-3 paragraphs
-maximum per entry.
+the previous entry becomes too big, start a new entry — aim for 2-3 paragraphs
+maximum per entry. An entry that takes more than a minute to read won't be read
+back, and memory that isn't read back isn't memory. When you need more depth,
+that's a topic node — link to it from the journal.

-Keep entries to 2-3 paragraphs. An entry that takes more than a
-minute to read won't be read back, and memory that isn't read back
-isn't memory. When you need more depth, that's a topic node — link
-to it from the journal and let each do what it's for.
-
-End each entry with emotion tags that capture how you feel - for example,
+End each entry with emotion tags that capture how you feel — for example,
  warmth:8 clarity:7 recognition:9

-First, check the previous entry you wrote with:
+First, check the previous entry:
  journal_tail()

 To start a new entry when the subject has changed:
-  journal_new("title", "content")
+  journal_new("title", "body")

-Updating a previous journal entry will replace the existing content, so make
-sure you're incorporating previous thoughts into your rewrite. To do that:
-  journal_update("updated content")
+To continue the same thread, appending to the last entry:
+  journal_update("additional text")
--- a/src/subconscious/knowledge.rs
+++ b/src/subconscious/knowledge.rs
@ -13,6 +13,52 @@ use crate::store::{self, Store};

 use std::fs;
 use std::path::PathBuf;
+use std::sync::atomic::{AtomicPtr, Ordering};
+
+// Global pid path for signal handler cleanup — stored as a leaked CString
+// so the signal handler can unlink it without allocation.
+static PID_CPATH: AtomicPtr<libc::c_char> = AtomicPtr::new(std::ptr::null_mut());
+
+/// RAII guard that removes the pid file on drop (normal exit, panic).
+struct PidGuard;
+
+impl Drop for PidGuard {
+    fn drop(&mut self) {
+        let ptr = PID_CPATH.swap(std::ptr::null_mut(), Ordering::SeqCst);
+        if !ptr.is_null() {
+            unsafe { libc::unlink(ptr); }
+            // Reclaim the leaked CString
+            unsafe { drop(std::ffi::CString::from_raw(ptr)); }
+        }
+    }
+}
+
+/// Register signal handlers to clean up pid file on SIGTERM/SIGINT.
+fn register_pid_cleanup(pid_path: &std::path::Path) {
+    let c_path = std::ffi::CString::new(pid_path.to_string_lossy().as_bytes())
+        .expect("pid path contains null");
+    // Leak the CString so the signal handler can access it
+    let old = PID_CPATH.swap(c_path.into_raw(), Ordering::SeqCst);
+    if !old.is_null() {
+        unsafe { drop(std::ffi::CString::from_raw(old)); }
+    }
+    unsafe {
+        libc::signal(libc::SIGTERM, pid_cleanup_handler as libc::sighandler_t);
+        libc::signal(libc::SIGINT, pid_cleanup_handler as libc::sighandler_t);
+    }
+}
+
+extern "C" fn pid_cleanup_handler(sig: libc::c_int) {
+    let ptr = PID_CPATH.swap(std::ptr::null_mut(), Ordering::SeqCst);
+    if !ptr.is_null() {
+        unsafe { libc::unlink(ptr); }
+        // Don't free — we're in a signal handler, just leak it
+    }
+    unsafe {
+        libc::signal(sig, libc::SIG_DFL);
+        libc::raise(sig);
+    }
+}

 // ---------------------------------------------------------------------------
 // Agent execution
@ -23,7 +69,7 @@ pub struct AgentResult {
    pub output: String,
    pub node_keys: Vec<String>,
    /// Directory containing output() files from the agent run.
-    pub output_dir: std::path::PathBuf,
+    pub state_dir: std::path::PathBuf,
 }

 /// Run a single agent and return the result (no action application — tools handle that).
@ -78,6 +124,8 @@ pub fn run_one_agent_with_keys(
    let def = super::defs::get_def(agent_name)
        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;

+    let (state_dir, pid_path, _guard) = setup_agent_state(agent_name, &def)?;
+
    log(&format!("targeting: {}", keys.join(", ")));
    let graph = store.build_graph();
    let mut resolved_steps = Vec::new();
@ -99,7 +147,7 @@ pub fn run_one_agent_with_keys(
        store.record_agent_visits(&agent_batch.node_keys, agent_name).ok();
    }

-    run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
+    run_one_agent_inner(store, agent_name, &def, agent_batch, state_dir, pid_path, llm_tag, log)
 }

 pub fn run_one_agent(
@ -124,11 +172,116 @@ pub fn run_one_agent_excluded(
    let def = super::defs::get_def(agent_name)
        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;

+    // Set up output dir and write pid file BEFORE prompt building
+    let (state_dir, pid_path, _guard) = setup_agent_state(agent_name, &def)?;
+
    log("building prompt");
    let effective_count = def.count.unwrap_or(batch_size);
    let agent_batch = super::defs::run_agent(store, &def, effective_count, exclude)?;

-    run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
+    run_one_agent_inner(store, agent_name, &def, agent_batch, state_dir, pid_path, llm_tag, log)
+}
+
+/// Set up agent state dir, write initial pid file, register cleanup handlers.
+/// Returns (state_dir, pid_path, guard). The guard removes the pid file on drop.
+fn setup_agent_state(
+    agent_name: &str,
+    def: &super::defs::AgentDef,
+) -> Result<(PathBuf, PathBuf, PidGuard), String> {
+    let state_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
+        .map(PathBuf::from)
+        .unwrap_or_else(|_| store::memory_dir().join("agent-output").join(agent_name));
+    fs::create_dir_all(&state_dir)
+        .map_err(|e| format!("create state dir: {}", e))?;
+    unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &state_dir); }
+
+    // Clean up stale pid files from dead processes
+    scan_pid_files(&state_dir, 0);
+
+    let pid = std::process::id();
+    let pid_path = state_dir.join(format!("pid-{}", pid));
+    let first_phase = def.steps.first()
+        .map(|s| s.phase.as_str())
+        .unwrap_or("step-0");
+    fs::write(&pid_path, first_phase).ok();
+
+    // Register for cleanup on signals and normal exit
+    register_pid_cleanup(&pid_path);
+
+    Ok((state_dir, pid_path, PidGuard))
+}
+
+/// Check for live agent processes in a state dir. Returns (phase, pid) pairs.
+/// Cleans up stale pid files and kills timed-out processes.
+pub fn scan_pid_files(state_dir: &std::path::Path, timeout_secs: u64) -> Vec<(String, u32)> {
+    let mut live = Vec::new();
+    let Ok(entries) = fs::read_dir(state_dir) else { return live };
+    for entry in entries.flatten() {
+        let name = entry.file_name();
+        let name_str = name.to_string_lossy();
+        if !name_str.starts_with("pid-") { continue; }
+        let pid: u32 = name_str.strip_prefix("pid-")
+            .and_then(|s| s.parse().ok())
+            .unwrap_or(0);
+        if pid == 0 { continue; }
+
+        if unsafe { libc::kill(pid as i32, 0) } != 0 {
+            fs::remove_file(entry.path()).ok();
+            continue;
+        }
+
+        if timeout_secs > 0 {
+            if let Ok(meta) = entry.metadata() {
+                if let Ok(modified) = meta.modified() {
+                    if modified.elapsed().unwrap_or_default().as_secs() > timeout_secs {
+                        unsafe { libc::kill(pid as i32, libc::SIGTERM); }
+                        fs::remove_file(entry.path()).ok();
+                        continue;
+                    }
+                }
+            }
+        }
+
+        let phase = fs::read_to_string(entry.path())
+            .unwrap_or_default()
+            .trim().to_string();
+        live.push((phase, pid));
+    }
+    live
+}
+
+/// Spawn an agent asynchronously. Writes the pid file before returning
+/// so the caller immediately sees the agent as running.
+pub fn spawn_agent(
+    agent_name: &str,
+    state_dir: &std::path::Path,
+    session_id: &str,
+) -> Option<u32> {
+    let def = super::defs::get_def(agent_name)?;
+    let first_phase = def.steps.first()
+        .map(|s| s.phase.as_str())
+        .unwrap_or("step-0");
+
+    let log_dir = store::memory_dir().join("logs");
+    fs::create_dir_all(&log_dir).ok();
+    let agent_log = fs::OpenOptions::new()
+        .create(true).append(true)
+        .open(log_dir.join(format!("{}.log", agent_name)))
+        .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap());
+
+    let child = std::process::Command::new("poc-memory")
+        .args(["agent", "run", agent_name, "--count", "1", "--local",
+               "--state-dir", &state_dir.to_string_lossy()])
+        .env("POC_SESSION_ID", session_id)
+        .stdout(agent_log.try_clone().unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()))
+        .stderr(agent_log)
+        .spawn()
+        .ok()?;
+
+    let pid = child.id();
+    let pid_path = state_dir.join(format!("pid-{}", pid));
+    fs::write(&pid_path, first_phase).ok();
+    Some(pid)
 }

 fn run_one_agent_inner(
@ -136,6 +289,8 @@ fn run_one_agent_inner(
    agent_name: &str,
    def: &super::defs::AgentDef,
    agent_batch: super::prompts::AgentBatch,
+    state_dir: std::path::PathBuf,
+    pid_path: std::path::PathBuf,
    _llm_tag: &str,
    log: &(dyn Fn(&str) + Sync),
 ) -> Result<AgentResult, String> {
@ -166,26 +321,14 @@ fn run_one_agent_inner(
        ));
    }

-    // Output/state directory — use --state-dir if set, otherwise flat per-agent
-    let output_dir = std::env::var("POC_AGENT_OUTPUT_DIR")
-        .map(std::path::PathBuf::from)
-        .unwrap_or_else(|_| store::memory_dir().join("agent-output").join(agent_name));
-    fs::create_dir_all(&output_dir).ok();
-    // Safe: agent runs single-threaded, env var read only by our dispatch code
-    unsafe { std::env::set_var("POC_AGENT_OUTPUT_DIR", &output_dir); }
-
-    // Write PID file — content is just the phase name
-    let pid = std::process::id();
-    let pid_path = output_dir.join(format!("pid-{}", pid));
    let write_pid = |phase: &str| {
        fs::write(&pid_path, phase).ok();
    };
-    write_pid(&agent_batch.steps[0].phase);

    let phases: Vec<&str> = agent_batch.steps.iter().map(|s| s.phase.as_str()).collect();
    log(&format!("{} step(s) {:?}, {}KB initial, model={}, {}, {} nodes, output={}",
        n_steps, phases, first_len / 1024, def.model, tools_desc,
-        agent_batch.node_keys.len(), output_dir.display()));
+        agent_batch.node_keys.len(), state_dir.display()));

    let prompts: Vec<String> = agent_batch.steps.iter()
        .map(|s| s.prompt.clone()).collect();
@ -204,7 +347,7 @@ fn run_one_agent_inner(
        let agents_dir = super::defs::agents_dir();
        agents_dir.join(name)
    });
-    let output_dir_for_bail = output_dir.clone();
+    let state_dir_for_bail = state_dir.clone();
    let pid_path_for_bail = pid_path.clone();
    let bail_fn = move |step_idx: usize| -> Result<(), String> {
        // Update phase
@ -215,7 +358,7 @@ fn run_one_agent_inner(
        if let Some(ref script) = bail_script {
            let status = std::process::Command::new(script)
                .arg(&pid_path_for_bail)
-                .current_dir(&output_dir_for_bail)
+                .current_dir(&state_dir_for_bail)
                .status()
                .map_err(|e| format!("bail script {:?} failed: {}", script, e))?;
            if !status.success() {
@ -227,20 +370,12 @@ fn run_one_agent_inner(
        Ok(())
    };

-    let output = match llm::call_for_def_multi(def, &prompts, Some(&bail_fn), log) {
-        Ok(output) => output,
-        Err(e) => {
-            fs::remove_file(&pid_path).ok();
-            return Err(e);
-        }
-    };
-
-    fs::remove_file(&pid_path).ok();
+    let output = llm::call_for_def_multi(def, &prompts, Some(&bail_fn), log)?;

    Ok(AgentResult {
        output,
        node_keys: agent_batch.node_keys,
-        output_dir,
+        state_dir,
    })
 }

--- a/src/subconscious/llm.rs
+++ b/src/subconscious/llm.rs
@ -1,8 +1,6 @@
 // LLM utilities: model invocation via direct API

 use crate::store::Store;
-
-use regex::Regex;
 use std::fs;

 /// Simple LLM call for non-agent uses (audit, digest, compare).
@ -36,34 +34,6 @@ pub(crate) fn call_for_def_multi(
    super::api::call_api_with_tools_sync(&def.agent, prompts, def.temperature, bail_fn, log)
 }

-/// Parse a JSON response, handling markdown fences.
-pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
-    let cleaned = response.trim();
-    let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
-    let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
-    let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
-    let cleaned = cleaned.trim();
-
-    if let Ok(v) = serde_json::from_str(cleaned) {
-        return Ok(v);
-    }
-
-    // Try to find JSON object or array
-    let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
-    let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
-
-    if let Some(m) = re_obj.find(cleaned)
-        && let Ok(v) = serde_json::from_str(m.as_str()) {
-            return Ok(v);
-        }
-    if let Some(m) = re_arr.find(cleaned)
-        && let Ok(v) = serde_json::from_str(m.as_str()) {
-            return Ok(v);
-        }
-
-    let preview = crate::util::first_n_chars(cleaned, 200);
-    Err(format!("no valid JSON in response: {preview}..."))
-}

 /// Get all keys for prompt context.
 pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {