2026-03-05 15:30:57 -05:00
|
|
|
// LLM utilities: model invocation and response parsing
|
2026-03-03 17:18:18 -05:00
|
|
|
//
|
2026-03-05 21:15:57 -05:00
|
|
|
// Calls claude CLI as a subprocess. Uses prctl(PR_SET_PDEATHSIG)
|
|
|
|
|
// so child processes die when the daemon exits, preventing orphans.
|
2026-03-03 17:18:18 -05:00
|
|
|
|
|
|
|
|
use crate::store::Store;
|
|
|
|
|
|
|
|
|
|
use regex::Regex;
|
|
|
|
|
use std::fs;
|
2026-03-05 21:15:57 -05:00
|
|
|
use std::os::unix::process::CommandExt;
|
2026-03-03 17:18:18 -05:00
|
|
|
use std::process::Command;
|
|
|
|
|
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
fn log_usage(agent: &str, model: &str, prompt: &str, response: &str,
|
|
|
|
|
duration_ms: u128, ok: bool) {
|
2026-03-05 22:54:05 -05:00
|
|
|
let dir = crate::config::get().data_dir.join("llm-logs").join(agent);
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
let _ = fs::create_dir_all(&dir);
|
|
|
|
|
|
|
|
|
|
let date = chrono::Local::now().format("%Y-%m-%d");
|
|
|
|
|
let path = dir.join(format!("{}.md", date));
|
|
|
|
|
|
|
|
|
|
let ts = chrono::Local::now().format("%H:%M:%S");
|
|
|
|
|
let status = if ok { "ok" } else { "ERROR" };
|
|
|
|
|
|
|
|
|
|
let entry = format!(
|
|
|
|
|
"\n## {} — {} ({}, {:.1}s, {})\n\n\
|
|
|
|
|
### Prompt ({} chars)\n\n\
|
|
|
|
|
```\n{}\n```\n\n\
|
|
|
|
|
### Response ({} chars)\n\n\
|
|
|
|
|
```\n{}\n```\n\n---\n",
|
|
|
|
|
ts, agent, model, duration_ms as f64 / 1000.0, status,
|
|
|
|
|
prompt.len(), prompt,
|
|
|
|
|
response.len(), response,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
use std::io::Write;
|
|
|
|
|
if let Ok(mut f) = fs::OpenOptions::new().create(true).append(true).open(&path) {
|
|
|
|
|
let _ = f.write_all(entry.as_bytes());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-10 23:29:01 -04:00
|
|
|
/// Maximum time to wait for a claude subprocess before killing it.
|
|
|
|
|
const SUBPROCESS_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); // 5 minutes
|
2026-03-13 18:50:06 -04:00
|
|
|
/// Longer timeout for agents with tool access (multi-turn conversations).
|
|
|
|
|
const TOOL_AGENT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(900); // 15 minutes
|
2026-03-10 23:29:01 -04:00
|
|
|
|
2026-03-05 15:30:57 -05:00
|
|
|
/// Call a model via claude CLI. Returns the response text.
|
2026-03-05 21:15:57 -05:00
|
|
|
///
|
|
|
|
|
/// Sets PR_SET_PDEATHSIG on the child so it gets SIGTERM if the
|
|
|
|
|
/// parent daemon exits — no more orphaned claude processes.
|
2026-03-10 23:29:01 -04:00
|
|
|
/// Times out after 5 minutes to prevent blocking the daemon forever.
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
fn call_model(agent: &str, model: &str, prompt: &str) -> Result<String, String> {
|
2026-03-13 18:50:06 -04:00
|
|
|
call_model_with_tools(agent, model, prompt, &[])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Call a model via claude CLI, optionally with allowed tools.
|
|
|
|
|
///
|
|
|
|
|
/// When `tools` is empty, all tools are disabled (`--tools ""`).
|
|
|
|
|
/// When `tools` has entries, they're passed as `--allowedTools` patterns
|
|
|
|
|
/// (e.g. `["Bash(poc-memory:*)"]`), letting the agent call those tools
|
|
|
|
|
/// in Claude's native tool loop.
|
|
|
|
|
fn call_model_with_tools(agent: &str, model: &str, prompt: &str,
|
|
|
|
|
tools: &[String]) -> Result<String, String> {
|
|
|
|
|
let timeout = if tools.is_empty() { SUBPROCESS_TIMEOUT } else { TOOL_AGENT_TIMEOUT };
|
|
|
|
|
|
2026-03-03 17:18:18 -05:00
|
|
|
// Write prompt to temp file (claude CLI needs file input for large prompts)
|
|
|
|
|
let tmp = std::env::temp_dir().join(format!("poc-llm-{}-{:?}.txt",
|
|
|
|
|
std::process::id(), std::thread::current().id()));
|
|
|
|
|
fs::write(&tmp, prompt)
|
|
|
|
|
.map_err(|e| format!("write temp prompt: {}", e))?;
|
|
|
|
|
|
2026-03-05 22:28:39 -05:00
|
|
|
let mut cmd = Command::new("claude");
|
2026-03-13 18:50:06 -04:00
|
|
|
if tools.is_empty() {
|
|
|
|
|
cmd.args(["-p", "--model", model, "--tools", "", "--no-session-persistence",
|
|
|
|
|
"--strict-mcp-config"]);
|
|
|
|
|
} else {
|
|
|
|
|
cmd.args(["-p", "--model", model, "--no-session-persistence",
|
|
|
|
|
"--strict-mcp-config", "--allowedTools"]);
|
|
|
|
|
for tool in tools {
|
|
|
|
|
cmd.arg(tool);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
cmd
|
2026-03-05 22:28:39 -05:00
|
|
|
.stdin(fs::File::open(&tmp).map_err(|e| format!("open temp: {}", e))?)
|
2026-03-10 23:29:01 -04:00
|
|
|
.stdout(std::process::Stdio::piped())
|
|
|
|
|
.stderr(std::process::Stdio::piped())
|
2026-03-05 22:28:39 -05:00
|
|
|
.env_remove("CLAUDECODE");
|
|
|
|
|
|
2026-03-05 22:43:50 -05:00
|
|
|
// Use separate OAuth credentials for agent work if configured
|
|
|
|
|
if let Some(ref dir) = crate::config::get().agent_config_dir {
|
|
|
|
|
cmd.env("CLAUDE_CONFIG_DIR", dir);
|
2026-03-05 22:28:39 -05:00
|
|
|
}
|
|
|
|
|
|
2026-03-06 00:16:03 -05:00
|
|
|
// Tell hooks this is a daemon agent call, not interactive
|
|
|
|
|
cmd.env("POC_AGENT", "1");
|
|
|
|
|
|
2026-03-14 12:27:30 -04:00
|
|
|
// Set provenance so any nodes/links created by tool calls are tagged
|
|
|
|
|
cmd.env("POC_PROVENANCE", format!("agent:{}", agent));
|
|
|
|
|
|
2026-03-05 22:58:40 -05:00
|
|
|
let start = std::time::Instant::now();
|
|
|
|
|
|
2026-03-17 00:47:52 -04:00
|
|
|
let child = unsafe {
|
2026-03-05 22:28:39 -05:00
|
|
|
cmd.pre_exec(|| {
|
2026-03-05 21:15:57 -05:00
|
|
|
libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
|
|
|
|
|
Ok(())
|
|
|
|
|
})
|
2026-03-10 23:29:01 -04:00
|
|
|
.spawn()
|
|
|
|
|
.map_err(|e| format!("spawn claude: {}", e))?
|
2026-03-05 21:15:57 -05:00
|
|
|
};
|
2026-03-03 17:18:18 -05:00
|
|
|
|
2026-03-10 23:29:01 -04:00
|
|
|
// Spawn a watchdog thread that kills the child after the timeout.
|
|
|
|
|
// Uses a cancellation flag so the thread exits promptly when the child finishes.
|
|
|
|
|
let child_id = child.id();
|
|
|
|
|
let cancel = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
|
|
|
|
|
let cancel_flag = cancel.clone();
|
|
|
|
|
let watchdog = std::thread::spawn(move || {
|
|
|
|
|
// Sleep in 1s increments so we can check the cancel flag
|
2026-03-13 18:50:06 -04:00
|
|
|
let deadline = std::time::Instant::now() + timeout;
|
2026-03-10 23:29:01 -04:00
|
|
|
while std::time::Instant::now() < deadline {
|
|
|
|
|
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
std::thread::sleep(std::time::Duration::from_secs(1));
|
|
|
|
|
}
|
|
|
|
|
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// Send SIGTERM, then SIGKILL after 5s grace period
|
|
|
|
|
unsafe { libc::kill(child_id as i32, libc::SIGTERM); }
|
|
|
|
|
for _ in 0..5 {
|
|
|
|
|
std::thread::sleep(std::time::Duration::from_secs(1));
|
|
|
|
|
if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
unsafe { libc::kill(child_id as i32, libc::SIGKILL); }
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let result = child.wait_with_output();
|
|
|
|
|
|
|
|
|
|
// Cancel the watchdog thread
|
|
|
|
|
cancel.store(true, std::sync::atomic::Ordering::Relaxed);
|
|
|
|
|
watchdog.join().ok();
|
|
|
|
|
|
2026-03-03 17:18:18 -05:00
|
|
|
fs::remove_file(&tmp).ok();
|
|
|
|
|
|
|
|
|
|
match result {
|
|
|
|
|
Ok(output) => {
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
let elapsed = start.elapsed().as_millis();
|
2026-03-13 18:50:06 -04:00
|
|
|
if elapsed > timeout.as_millis() - 1000 {
|
2026-03-10 23:29:01 -04:00
|
|
|
log_usage(agent, model, prompt, "TIMEOUT", elapsed, false);
|
|
|
|
|
return Err(format!("claude timed out after {:.0}s", elapsed as f64 / 1000.0));
|
|
|
|
|
}
|
2026-03-03 17:18:18 -05:00
|
|
|
if output.status.success() {
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
let response = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
2026-03-16 19:28:13 -04:00
|
|
|
if response.is_empty() {
|
|
|
|
|
log_usage(agent, model, prompt, "EMPTY", elapsed, false);
|
|
|
|
|
return Err("claude returned empty response".into());
|
|
|
|
|
}
|
|
|
|
|
if response.contains(": You've hit your limit \u{00b7} resets") {
|
|
|
|
|
log_usage(agent, model, prompt, "RATE_LIMITED", elapsed, false);
|
|
|
|
|
return Err(format!("rate limited: {}", crate::util::first_n_chars(&response, 200)));
|
|
|
|
|
}
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
log_usage(agent, model, prompt, &response, elapsed, true);
|
|
|
|
|
Ok(response)
|
2026-03-03 17:18:18 -05:00
|
|
|
} else {
|
|
|
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
2026-03-08 21:13:02 -04:00
|
|
|
let preview = crate::util::first_n_chars(&stderr, 500);
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
log_usage(agent, model, prompt, &preview, elapsed, false);
|
2026-03-05 21:15:57 -05:00
|
|
|
Err(format!("claude exited {}: {}", output.status, preview.trim()))
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
}
|
2026-03-10 23:29:01 -04:00
|
|
|
Err(e) => Err(format!("wait claude: {}", e)),
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-05 15:30:57 -05:00
|
|
|
/// Call Sonnet via claude CLI.
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
pub(crate) fn call_sonnet(agent: &str, prompt: &str) -> Result<String, String> {
|
|
|
|
|
call_model(agent, "sonnet", prompt)
|
2026-03-05 15:30:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Call Haiku via claude CLI (cheaper, faster — good for high-volume extraction).
|
llm: full per-agent usage logging with prompts and responses
Log every model call to ~/.claude/memory/llm-logs/YYYY-MM-DD.md with
full prompt, response, agent type, model, duration, and status. One
file per day, markdown formatted for easy reading.
Agent types: fact-mine, experience-mine, consolidate, knowledge,
digest, enrich, audit. This gives visibility into what each agent
is doing and whether to adjust prompts or frequency.
2026-03-05 22:52:08 -05:00
|
|
|
pub(crate) fn call_haiku(agent: &str, prompt: &str) -> Result<String, String> {
|
|
|
|
|
call_model(agent, "haiku", prompt)
|
2026-03-05 15:30:57 -05:00
|
|
|
}
|
|
|
|
|
|
2026-03-13 18:50:06 -04:00
|
|
|
/// Call a model using an agent definition's model and tool configuration.
|
2026-03-18 23:05:14 -04:00
|
|
|
/// Uses the direct API backend when api_base_url is configured,
|
|
|
|
|
/// otherwise falls back to claude CLI subprocess.
|
2026-03-13 18:50:06 -04:00
|
|
|
pub(crate) fn call_for_def(def: &super::defs::AgentDef, prompt: &str) -> Result<String, String> {
|
2026-03-19 11:17:07 -04:00
|
|
|
let config = crate::config::get();
|
2026-03-20 14:26:39 -04:00
|
|
|
if config.api_base_url.is_some() {
|
2026-03-19 11:21:30 -04:00
|
|
|
super::daemon::log_verbose(&def.agent, "llm-backend",
|
2026-03-19 11:17:07 -04:00
|
|
|
&format!("API: {}", config.api_base_url.as_deref().unwrap_or("?")));
|
2026-03-18 23:05:14 -04:00
|
|
|
let log = |msg: &str| eprintln!("[{}] {}", def.agent, msg);
|
|
|
|
|
super::api::call_api_with_tools_sync(&def.agent, prompt, &log)
|
|
|
|
|
} else {
|
2026-03-19 11:21:30 -04:00
|
|
|
super::daemon::log_verbose(&def.agent, "llm-backend",
|
2026-03-19 11:17:07 -04:00
|
|
|
&format!("claude -p (model={}, tools={})", def.model, def.tools.len()));
|
2026-03-18 23:05:14 -04:00
|
|
|
call_model_with_tools(&def.agent, &def.model, prompt, &def.tools)
|
|
|
|
|
}
|
2026-03-13 18:50:06 -04:00
|
|
|
}
|
|
|
|
|
|
2026-03-05 15:30:57 -05:00
|
|
|
/// Parse a JSON response, handling markdown fences.
|
2026-03-03 17:18:18 -05:00
|
|
|
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
|
|
|
|
let cleaned = response.trim();
|
|
|
|
|
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
|
|
|
|
|
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
|
|
|
|
|
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
|
|
|
|
|
let cleaned = cleaned.trim();
|
|
|
|
|
|
|
|
|
|
if let Ok(v) = serde_json::from_str(cleaned) {
|
|
|
|
|
return Ok(v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to find JSON object or array
|
|
|
|
|
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
|
|
|
|
|
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
|
|
|
|
|
|
|
|
|
|
if let Some(m) = re_obj.find(cleaned) {
|
|
|
|
|
if let Ok(v) = serde_json::from_str(m.as_str()) {
|
|
|
|
|
return Ok(v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if let Some(m) = re_arr.find(cleaned) {
|
|
|
|
|
if let Ok(v) = serde_json::from_str(m.as_str()) {
|
|
|
|
|
return Ok(v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-08 21:13:02 -04:00
|
|
|
let preview = crate::util::first_n_chars(cleaned, 200);
|
2026-03-05 21:15:57 -05:00
|
|
|
Err(format!("no valid JSON in response: {preview}..."))
|
2026-03-03 17:18:18 -05:00
|
|
|
}
|
|
|
|
|
|
2026-03-08 20:07:07 -04:00
|
|
|
/// Get all keys for prompt context.
|
2026-03-03 17:18:18 -05:00
|
|
|
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
|
|
|
|
let mut keys: Vec<String> = store.nodes.keys()
|
|
|
|
|
.cloned()
|
|
|
|
|
.collect();
|
|
|
|
|
keys.sort();
|
|
|
|
|
keys.truncate(200);
|
|
|
|
|
keys
|
|
|
|
|
}
|