restructure: hippocampus/ for memory, subconscious/ for agents

hippocampus/ — memory storage, retrieval, and consolidation: store, graph, query, similarity, spectral, neuro, counters, config, transcript, memory_search, lookups, cursor, migrate subconscious/ — autonomous agents that process without being asked: reflect, surface, consolidate, digest, audit, etc. All existing crate::X paths preserved via re-exports in lib.rs. Co-Authored-By: Proof of Concept <poc@bcachefs.org> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-25 01:04:13 -04:00 · 2026-03-25 01:04:13 -04:00 · d5c0e86700
commit d5c0e86700
parent cfed85bd20
39 changed files with 87 additions and 32 deletions
--- a/src/agents/knowledge.rs
+++ b/src/agents/knowledge.rs
@ -1,312 +0,0 @@
-// knowledge.rs — agent execution and conversation fragment selection
-//
-// Agent prompts live in agents/*.agent files, dispatched via defs.rs.
-// This module handles:
-//   - Agent execution (build prompt → call LLM with tools → log)
-//   - Conversation fragment selection (for observation agent)
-//
-// Agents apply changes via tool calls (poc-memory write/link-add/etc)
-// during the LLM call — no action parsing needed.
-
-use super::llm;
-use crate::store::{self, Store};
-
-use std::fs;
-use std::path::PathBuf;
-
-// ---------------------------------------------------------------------------
-// Agent execution
-// ---------------------------------------------------------------------------
-
-/// Result of running a single agent.
-pub struct AgentResult {
-    pub output: String,
-    pub node_keys: Vec<String>,
-}
-
-/// Run a single agent and return the result (no action application — tools handle that).
-pub fn run_and_apply(
-    store: &mut Store,
-    agent_name: &str,
-    batch_size: usize,
-    llm_tag: &str,
-) -> Result<(), String> {
-    run_and_apply_with_log(store, agent_name, batch_size, llm_tag, &|_| {})
-}
-
-pub fn run_and_apply_with_log(
-    store: &mut Store,
-    agent_name: &str,
-    batch_size: usize,
-    llm_tag: &str,
-    log: &(dyn Fn(&str) + Sync),
-) -> Result<(), String> {
-    run_and_apply_excluded(store, agent_name, batch_size, llm_tag, log, &Default::default())
-}
-
-/// Like run_and_apply_with_log but with an in-flight exclusion set.
-/// Returns the keys that were processed (for the daemon to track).
-pub fn run_and_apply_excluded(
-    store: &mut Store,
-    agent_name: &str,
-    batch_size: usize,
-    llm_tag: &str,
-    log: &(dyn Fn(&str) + Sync),
-    exclude: &std::collections::HashSet<String>,
-) -> Result<(), String> {
-    let result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
-
-    // Mark conversation segments as mined after successful processing
-    if agent_name == "observation" {
-        mark_observation_done(&result.node_keys);
-    }
-
-    Ok(())
-}
-
-/// Run an agent with explicit target keys, bypassing the agent's query.
-pub fn run_one_agent_with_keys(
-    store: &mut Store,
-    agent_name: &str,
-    keys: &[String],
-    count: usize,
-    llm_tag: &str,
-    log: &(dyn Fn(&str) + Sync),
-) -> Result<AgentResult, String> {
-    let def = super::defs::get_def(agent_name)
-        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;
-
-    log(&format!("targeting: {}", keys.join(", ")));
-    let graph = store.build_graph();
-    let (prompt, extra_keys) = super::defs::resolve_placeholders(
-        &def.prompt, store, &graph, keys, count,
-    );
-    let mut all_keys: Vec<String> = keys.to_vec();
-    all_keys.extend(extra_keys);
-    let agent_batch = super::prompts::AgentBatch { prompt, node_keys: all_keys };
-
-    // Record visits eagerly so concurrent agents pick different seeds
-    if !agent_batch.node_keys.is_empty() {
-        store.record_agent_visits(&agent_batch.node_keys, agent_name).ok();
-    }
-
-    run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
-}
-
-pub fn run_one_agent(
-    store: &mut Store,
-    agent_name: &str,
-    batch_size: usize,
-    llm_tag: &str,
-    log: &(dyn Fn(&str) + Sync),
-) -> Result<AgentResult, String> {
-    run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, &Default::default())
-}
-
-/// Like run_one_agent but excludes nodes currently being worked on by other agents.
-pub fn run_one_agent_excluded(
-    store: &mut Store,
-    agent_name: &str,
-    batch_size: usize,
-    llm_tag: &str,
-    log: &(dyn Fn(&str) + Sync),
-    exclude: &std::collections::HashSet<String>,
-) -> Result<AgentResult, String> {
-    let def = super::defs::get_def(agent_name)
-        .ok_or_else(|| format!("no .agent file for {}", agent_name))?;
-
-    log("building prompt");
-    let effective_count = def.count.unwrap_or(batch_size);
-    let agent_batch = super::defs::run_agent(store, &def, effective_count, exclude)?;
-
-    run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
-}
-
-fn run_one_agent_inner(
-    _store: &mut Store,
-    agent_name: &str,
-    def: &super::defs::AgentDef,
-    agent_batch: super::prompts::AgentBatch,
-    _llm_tag: &str,
-    log: &(dyn Fn(&str) + Sync),
-) -> Result<AgentResult, String> {
-    let prompt_kb = agent_batch.prompt.len() / 1024;
-    let tools_desc = if def.tools.is_empty() { "no tools".into() }
-        else { format!("{} tools", def.tools.len()) };
-    log(&format!("prompt {}KB, model={}, {}, {} nodes",
-        prompt_kb, def.model, tools_desc, agent_batch.node_keys.len()));
-
-    // Guard: reject prompts that would exceed model context.
-    // Rough estimate: 1 token ≈ 4 bytes. Reserve 16K tokens for output.
-    let max_prompt_bytes = 800_000; // ~200K tokens, leaves room for output
-    if agent_batch.prompt.len() > max_prompt_bytes {
-        // Log the oversized prompt for debugging
-        let oversize_dir = store::memory_dir().join("llm-logs").join("oversized");
-        fs::create_dir_all(&oversize_dir).ok();
-        let oversize_path = oversize_dir.join(format!("{}-{}.txt",
-            agent_name, store::compact_timestamp()));
-        let header = format!("=== OVERSIZED PROMPT ===\nagent: {}\nsize: {}KB (max {}KB)\nnodes: {:?}\n\n",
-            agent_name, prompt_kb, max_prompt_bytes / 1024, agent_batch.node_keys);
-        fs::write(&oversize_path, format!("{}{}", header, agent_batch.prompt)).ok();
-        log(&format!("oversized prompt logged to {}", oversize_path.display()));
-
-        return Err(format!(
-            "prompt too large: {}KB (max {}KB) — seed nodes may be oversized",
-            prompt_kb, max_prompt_bytes / 1024,
-        ));
-    }
-    for key in &agent_batch.node_keys {
-        log(&format!("  node: {}", key));
-    }
-
-    log(&format!("=== PROMPT ===\n\n{}\n\n=== CALLING LLM ===", agent_batch.prompt));
-
-    let output = llm::call_for_def(def, &agent_batch.prompt, log)?;
-
-
-    Ok(AgentResult {
-        output,
-        node_keys: agent_batch.node_keys,
-    })
-}
-
-// ---------------------------------------------------------------------------
-// Conversation fragment selection
-// ---------------------------------------------------------------------------
-
-/// Select conversation fragments (per-segment) for the observation extractor.
-/// Uses the transcript-progress.capnp log for dedup — no stub nodes.
-/// Does NOT pre-mark segments; caller must call mark_observation_done() after success.
-pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
-    let projects = crate::config::get().projects_dir.clone();
-    if !projects.exists() { return Vec::new(); }
-
-    let store = match crate::store::Store::load() {
-        Ok(s) => s,
-        Err(_) => return Vec::new(),
-    };
-
-    let mut jsonl_files: Vec<PathBuf> = Vec::new();
-    if let Ok(dirs) = fs::read_dir(&projects) {
-        for dir in dirs.filter_map(|e| e.ok()) {
-            if !dir.path().is_dir() { continue; }
-            if let Ok(files) = fs::read_dir(dir.path()) {
-                for f in files.filter_map(|e| e.ok()) {
-                    let p = f.path();
-                    if p.extension().map(|x| x == "jsonl").unwrap_or(false)
-                        && let Ok(meta) = p.metadata()
-                            && meta.len() > 50_000 {
-                                jsonl_files.push(p);
-                            }
-                }
-            }
-        }
-    }
-
-    // Collect unmined segments across all transcripts
-    let mut candidates: Vec<(String, String)> = Vec::new();
-    for path in &jsonl_files {
-        let path_str = path.to_string_lossy();
-        let messages = match super::enrich::extract_conversation(&path_str) {
-            Ok(m) => m,
-            Err(_) => continue,
-        };
-        let session_id = path.file_stem()
-            .map(|s| s.to_string_lossy().to_string())
-            .unwrap_or_else(|| "unknown".into());
-
-        let segments = super::enrich::split_on_compaction(messages);
-        for (seg_idx, segment) in segments.into_iter().enumerate() {
-            if store.is_segment_mined(&session_id, seg_idx as u32, "observation") {
-                continue;
-            }
-            // Skip segments with too few assistant messages (rate limits, errors)
-            let assistant_msgs = segment.iter()
-                .filter(|(_, role, _, _)| role == "assistant")
-                .count();
-            if assistant_msgs < 2 {
-                continue;
-            }
-            // Skip segments that are just rate limit errors
-            let has_rate_limit = segment.iter().any(|(_, _, text, _)|
-                text.contains("hit your limit") || text.contains("rate limit"));
-            if has_rate_limit && assistant_msgs < 3 {
-                continue;
-            }
-            let text = format_segment(&segment);
-            if text.len() < 500 {
-                continue;
-            }
-            const CHUNK_SIZE: usize = 50_000;
-            const OVERLAP: usize = 10_000;
-            if text.len() <= CHUNK_SIZE {
-                let id = format!("{}.{}", session_id, seg_idx);
-                candidates.push((id, text));
-            } else {
-                // Split on line boundaries with overlap
-                let lines: Vec<&str> = text.lines().collect();
-                let mut start_line = 0;
-                let mut chunk_idx = 0;
-                while start_line < lines.len() {
-                    let mut end_line = start_line;
-                    let mut size = 0;
-                    while end_line < lines.len() && size < CHUNK_SIZE {
-                        size += lines[end_line].len() + 1;
-                        end_line += 1;
-                    }
-                    let chunk: String = lines[start_line..end_line].join("\n");
-                    let id = format!("{}.{}.{}", session_id, seg_idx, chunk_idx);
-                    candidates.push((id, chunk));
-                    if end_line >= lines.len() { break; }
-                    // Back up by overlap amount for next chunk
-                    let mut overlap_size = 0;
-                    let mut overlap_start = end_line;
-                    while overlap_start > start_line && overlap_size < OVERLAP {
-                        overlap_start -= 1;
-                        overlap_size += lines[overlap_start].len() + 1;
-                    }
-                    start_line = overlap_start;
-                    chunk_idx += 1;
-                }
-            }
-        }
-
-        if candidates.len() >= n { break; }
-    }
-
-    candidates.truncate(n);
-    candidates
-}
-
-/// Mark observation segments as successfully mined (call AFTER the agent succeeds).
-pub fn mark_observation_done(fragment_ids: &[String]) {
-    let mut store = match crate::store::Store::load() {
-        Ok(s) => s,
-        Err(_) => return,
-    };
-    for id in fragment_ids {
-        if let Some((session_id, seg_str)) = id.rsplit_once('.')
-            && let Ok(seg) = seg_str.parse::<u32>() {
-                let _ = store.mark_segment_mined(session_id, seg, "observation");
-            }
-    }
-}
-
-/// Format a segment's messages into readable text for the observation agent.
-fn format_segment(messages: &[(usize, String, String, String)]) -> String {
-    let cfg = crate::config::get();
-    let mut fragments = Vec::new();
-
-    for (_, role, text, ts) in messages {
-        let min_len = if role == "user" { 5 } else { 10 };
-        if text.len() <= min_len { continue; }
-
-        let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
-        if ts.is_empty() {
-            fragments.push(format!("**{}:** {}", name, text));
-        } else {
-            fragments.push(format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], text));
-        }
-    }
-    fragments.join("\n\n")
-}