From be2b4999789e98a90463a37284351f30f2e9643c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 22 Mar 2026 02:09:40 -0400
Subject: [PATCH] remove claude CLI subprocess code from llm.rs

All LLM calls now go through the direct API backend. Removes
call_model, call_model_with_tools, call_sonnet, call_haiku,
log_usage, and their dependencies (Command, prctl, watchdog).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 poc-memory/src/agents/llm.rs | 180 +----------------------------------
 1 file changed, 1 insertion(+), 179 deletions(-)

diff --git a/poc-memory/src/agents/llm.rs b/poc-memory/src/agents/llm.rs
index d117f15..a8db5ca 100644
--- a/poc-memory/src/agents/llm.rs
+++ b/poc-memory/src/agents/llm.rs
@@ -1,187 +1,9 @@
-// LLM utilities: model invocation and response parsing
-//
-// Calls claude CLI as a subprocess. Uses prctl(PR_SET_PDEATHSIG)
-// so child processes die when the daemon exits, preventing orphans.
+// LLM utilities: model invocation via direct API
 
 use crate::store::Store;
 
 use regex::Regex;
 use std::fs;
-use std::os::unix::process::CommandExt;
-use std::process::Command;
-
-fn log_usage(agent: &str, model: &str, prompt: &str, response: &str,
-             duration_ms: u128, ok: bool) {
-    let dir = crate::config::get().data_dir.join("llm-logs").join(agent);
-    let _ = fs::create_dir_all(&dir);
-
-    let date = chrono::Local::now().format("%Y-%m-%d");
-    let path = dir.join(format!("{}.md", date));
-
-    let ts = chrono::Local::now().format("%H:%M:%S");
-    let status = if ok { "ok" } else { "ERROR" };
-
-    let entry = format!(
-        "\n## {} — {} ({}, {:.1}s, {})\n\n\
-         ### Prompt ({} chars)\n\n\
-         ```\n{}\n```\n\n\
-         ### Response ({} chars)\n\n\
-         ```\n{}\n```\n\n---\n",
-        ts, agent, model, duration_ms as f64 / 1000.0, status,
-        prompt.len(), prompt,
-        response.len(), response,
-    );
-
-    use std::io::Write;
-    if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&path) {
-        let _ = f.write_all(entry.as_bytes());
-    }
-}
-
-/// Maximum time to wait for a claude subprocess before killing it.
-const SUBPROCESS_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); // 5 minutes
-/// Longer timeout for agents with tool access (multi-turn conversations).
-const TOOL_AGENT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(900); // 15 minutes
-
-/// Call a model via claude CLI. Returns the response text.
-///
-/// Sets PR_SET_PDEATHSIG on the child so it gets SIGTERM if the
-/// parent daemon exits — no more orphaned claude processes.
-/// Times out after 5 minutes to prevent blocking the daemon forever.
-fn call_model(agent: &str, model: &str, prompt: &str) -> Result<String, String> {
-    call_model_with_tools(agent, model, prompt, &[])
-}
-
-/// Call a model via claude CLI, optionally with allowed tools.
-///
-/// When `tools` is empty, all tools are disabled (`--tools ""`).
-/// When `tools` has entries, they're passed as `--allowedTools` patterns
-/// (e.g. `["Bash(poc-memory:*)"]`), letting the agent call those tools
-/// in Claude's native tool loop.
-fn call_model_with_tools(agent: &str, model: &str, prompt: &str,
-                         tools: &[String]) -> Result<String, String> {
-    let timeout = if tools.is_empty() { SUBPROCESS_TIMEOUT } else { TOOL_AGENT_TIMEOUT };
-
-    // Write prompt to temp file (claude CLI needs file input for large prompts)
-    let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
-        std::process::id(), std::thread::current().id()));
-    fs::write(&tmp, prompt)
-        .map_err(|e| format!("write temp prompt: {}", e))?;
-
-    let mut cmd = Command::new("claude");
-    if tools.is_empty() {
-        cmd.args(["-p", "--model", model, "--tools", "", "--no-session-persistence",
-                  "--strict-mcp-config"]);
-    } else {
-        cmd.args(["-p", "--model", model, "--no-session-persistence",
-                  "--strict-mcp-config", "--allowedTools"]);
-        for tool in tools {
-            cmd.arg(tool);
-        }
-    }
-    cmd
-        .stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .env_remove("CLAUDECODE");
-
-    // Use separate OAuth credentials for agent work if configured
-    if let Some(ref dir) = crate::config::get().agent_config_dir {
-        cmd.env("CLAUDE_CONFIG_DIR", dir);
-    }
-
-    // Tell hooks this is a daemon agent call, not interactive
-    cmd.env("POC_AGENT", "1");
-
-    // Set provenance so any nodes/links created by tool calls are tagged
-    cmd.env("POC_PROVENANCE", format!("agent:{}", agent));
-
-    let start = std::time::Instant::now();
-
-    let child = unsafe {
-        cmd.pre_exec(|| {
-                libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
-                Ok(())
-            })
-            .spawn()
-            .map_err(|e| format!("spawn claude: {}", e))?
-    };
-
-    // Spawn a watchdog thread that kills the child after the timeout.
-    // Uses a cancellation flag so the thread exits promptly when the child finishes.
-    let child_id = child.id();
-    let cancel = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
-    let cancel_flag = cancel.clone();
-    let watchdog = std::thread::spawn(move || {
-        // Sleep in 1s increments so we can check the cancel flag
-        let deadline = std::time::Instant::now() + timeout;
-        while std::time::Instant::now() < deadline {
-            if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
-                return;
-            }
-            std::thread::sleep(std::time::Duration::from_secs(1));
-        }
-        if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
-            return;
-        }
-        // Send SIGTERM, then SIGKILL after 5s grace period
-        unsafe { libc::kill(child_id as i32, libc::SIGTERM); }
-        for _ in 0..5 {
-            std::thread::sleep(std::time::Duration::from_secs(1));
-            if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
-                return;
-            }
-        }
-        unsafe { libc::kill(child_id as i32, libc::SIGKILL); }
-    });
-
-    let result = child.wait_with_output();
-
-    // Cancel the watchdog thread
-    cancel.store(true, std::sync::atomic::Ordering::Relaxed);
-    watchdog.join().ok();
-
-    fs::remove_file(&tmp).ok();
-
-    match result {
-        Ok(output) => {
-            let elapsed = start.elapsed().as_millis();
-            if elapsed > timeout.as_millis() - 1000 {
-                log_usage(agent, model, prompt, "TIMEOUT", elapsed, false);
-                return Err(format!("claude timed out after {:.0}s", elapsed as f64 / 1000.0));
-            }
-            if output.status.success() {
-                let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
-                if response.is_empty() {
-                    log_usage(agent, model, prompt, "EMPTY", elapsed, false);
-                    return Err("claude returned empty response".into());
-                }
-                if response.contains(": You've hit your limit \u{00b7} resets") {
-                    log_usage(agent, model, prompt, "RATE_LIMITED", elapsed, false);
-                    return Err(format!("rate limited: {}", crate::util::first_n_chars(&response, 200)));
-                }
-                log_usage(agent, model, prompt, &response, elapsed, true);
-                Ok(response)
-            } else {
-                let stderr = String::from_utf8_lossy(&output.stderr);
-                let preview = crate::util::first_n_chars(&stderr, 500);
-                log_usage(agent, model, prompt, &preview, elapsed, false);
-                Err(format!("claude exited {}: {}", output.status, preview.trim()))
-            }
-        }
-        Err(e) => Err(format!("wait claude: {}", e)),
-    }
-}
-
-/// Call Sonnet via claude CLI.
-pub(crate) fn call_sonnet(agent: &str, prompt: &str) -> Result<String, String> {
-    call_model(agent, "sonnet", prompt)
-}
-
-/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
-pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
-    call_model(agent, "haiku", prompt)
-}
 
 /// Simple LLM call for non-agent uses (audit, digest, compare).
 /// Logs to llm-logs/{caller}/ file.