From be2b4999789e98a90463a37284351f30f2e9643c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Mar 2026 02:09:40 -0400 Subject: [PATCH] remove claude CLI subprocess code from llm.rs All LLM calls now go through the direct API backend. Removes call_model, call_model_with_tools, call_sonnet, call_haiku, log_usage, and their dependencies (Command, prctl, watchdog). Co-Authored-By: Claude Opus 4.6 (1M context) --- poc-memory/src/agents/llm.rs | 180 +---------------------------------- 1 file changed, 1 insertion(+), 179 deletions(-) diff --git a/poc-memory/src/agents/llm.rs b/poc-memory/src/agents/llm.rs index d117f15..a8db5ca 100644 --- a/poc-memory/src/agents/llm.rs +++ b/poc-memory/src/agents/llm.rs @@ -1,187 +1,9 @@ -// LLM utilities: model invocation and response parsing -// -// Calls claude CLI as a subprocess. Uses prctl(PR_SET_PDEATHSIG) -// so child processes die when the daemon exits, preventing orphans. +// LLM utilities: model invocation via direct API use crate::store::Store; use regex::Regex; use std::fs; -use std::os::unix::process::CommandExt; -use std::process::Command; - -fn log_usage(agent: &str, model: &str, prompt: &str, response: &str, - duration_ms: u128, ok: bool) { - let dir = crate::config::get().data_dir.join("llm-logs").join(agent); - let _ = fs::create_dir_all(&dir); - - let date = chrono::Local::now().format("%Y-%m-%d"); - let path = dir.join(format!("{}.md", date)); - - let ts = chrono::Local::now().format("%H:%M:%S"); - let status = if ok { "ok" } else { "ERROR" }; - - let entry = format!( - "\n## {} — {} ({}, {:.1}s, {})\n\n\ - ### Prompt ({} chars)\n\n\ - ```\n{}\n```\n\n\ - ### Response ({} chars)\n\n\ - ```\n{}\n```\n\n---\n", - ts, agent, model, duration_ms as f64 / 1000.0, status, - prompt.len(), prompt, - response.len(), response, - ); - - use std::io::Write; - if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&path) { - let _ = f.write_all(entry.as_bytes()); - } -} - -/// Maximum time to wait for a claude subprocess before killing it. -const SUBPROCESS_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); // 5 minutes -/// Longer timeout for agents with tool access (multi-turn conversations). -const TOOL_AGENT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(900); // 15 minutes - -/// Call a model via claude CLI. Returns the response text. -/// -/// Sets PR_SET_PDEATHSIG on the child so it gets SIGTERM if the -/// parent daemon exits — no more orphaned claude processes. -/// Times out after 5 minutes to prevent blocking the daemon forever. -fn call_model(agent: &str, model: &str, prompt: &str) -> Result { - call_model_with_tools(agent, model, prompt, &[]) -} - -/// Call a model via claude CLI, optionally with allowed tools. -/// -/// When `tools` is empty, all tools are disabled (`--tools ""`). -/// When `tools` has entries, they're passed as `--allowedTools` patterns -/// (e.g. `["Bash(poc-memory:*)"]`), letting the agent call those tools -/// in Claude's native tool loop. -fn call_model_with_tools(agent: &str, model: &str, prompt: &str, - tools: &[String]) -> Result { - let timeout = if tools.is_empty() { SUBPROCESS_TIMEOUT } else { TOOL_AGENT_TIMEOUT }; - - // Write prompt to temp file (claude CLI needs file input for large prompts) - let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt", - std::process::id(), std::thread::current().id())); - fs::write(&tmp, prompt) - .map_err(|e| format!("write temp prompt: {}", e))?; - - let mut cmd = Command::new("claude"); - if tools.is_empty() { - cmd.args(["-p", "--model", model, "--tools", "", "--no-session-persistence", - "--strict-mcp-config"]); - } else { - cmd.args(["-p", "--model", model, "--no-session-persistence", - "--strict-mcp-config", "--allowedTools"]); - for tool in tools { - cmd.arg(tool); - } - } - cmd - .stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .env_remove("CLAUDECODE"); - - // Use separate OAuth credentials for agent work if configured - if let Some(ref dir) = crate::config::get().agent_config_dir { - cmd.env("CLAUDE_CONFIG_DIR", dir); - } - - // Tell hooks this is a daemon agent call, not interactive - cmd.env("POC_AGENT", "1"); - - // Set provenance so any nodes/links created by tool calls are tagged - cmd.env("POC_PROVENANCE", format!("agent:{}", agent)); - - let start = std::time::Instant::now(); - - let child = unsafe { - cmd.pre_exec(|| { - libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM); - Ok(()) - }) - .spawn() - .map_err(|e| format!("spawn claude: {}", e))? - }; - - // Spawn a watchdog thread that kills the child after the timeout. - // Uses a cancellation flag so the thread exits promptly when the child finishes. - let child_id = child.id(); - let cancel = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); - let cancel_flag = cancel.clone(); - let watchdog = std::thread::spawn(move || { - // Sleep in 1s increments so we can check the cancel flag - let deadline = std::time::Instant::now() + timeout; - while std::time::Instant::now() < deadline { - if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) { - return; - } - std::thread::sleep(std::time::Duration::from_secs(1)); - } - if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) { - return; - } - // Send SIGTERM, then SIGKILL after 5s grace period - unsafe { libc::kill(child_id as i32, libc::SIGTERM); } - for _ in 0..5 { - std::thread::sleep(std::time::Duration::from_secs(1)); - if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) { - return; - } - } - unsafe { libc::kill(child_id as i32, libc::SIGKILL); } - }); - - let result = child.wait_with_output(); - - // Cancel the watchdog thread - cancel.store(true, std::sync::atomic::Ordering::Relaxed); - watchdog.join().ok(); - - fs::remove_file(&tmp).ok(); - - match result { - Ok(output) => { - let elapsed = start.elapsed().as_millis(); - if elapsed > timeout.as_millis() - 1000 { - log_usage(agent, model, prompt, "TIMEOUT", elapsed, false); - return Err(format!("claude timed out after {:.0}s", elapsed as f64 / 1000.0)); - } - if output.status.success() { - let response = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if response.is_empty() { - log_usage(agent, model, prompt, "EMPTY", elapsed, false); - return Err("claude returned empty response".into()); - } - if response.contains(": You've hit your limit \u{00b7} resets") { - log_usage(agent, model, prompt, "RATE_LIMITED", elapsed, false); - return Err(format!("rate limited: {}", crate::util::first_n_chars(&response, 200))); - } - log_usage(agent, model, prompt, &response, elapsed, true); - Ok(response) - } else { - let stderr = String::from_utf8_lossy(&output.stderr); - let preview = crate::util::first_n_chars(&stderr, 500); - log_usage(agent, model, prompt, &preview, elapsed, false); - Err(format!("claude exited {}: {}", output.status, preview.trim())) - } - } - Err(e) => Err(format!("wait claude: {}", e)), - } -} - -/// Call Sonnet via claude CLI. -pub(crate) fn call_sonnet(agent: &str, prompt: &str) -> Result { - call_model(agent, "sonnet", prompt) -} - -/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction). -pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result { - call_model(agent, "haiku", prompt) -} /// Simple LLM call for non-agent uses (audit, digest, compare). /// Logs to llm-logs/{caller}/ file.