restructure: hippocampus/ for memory, subconscious/ for agents

hippocampus/ — memory storage, retrieval, and consolidation:
  store, graph, query, similarity, spectral, neuro, counters,
  config, transcript, memory_search, lookups, cursor, migrate

subconscious/ — autonomous agents that process without being asked:
  reflect, surface, consolidate, digest, audit, etc.

All existing crate::X paths preserved via re-exports in lib.rs.

Co-Authored-By: Proof of Concept <poc@bcachefs.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-25 01:04:13 -04:00 committed by Kent Overstreet
parent cfed85bd20
commit d5c0e86700
39 changed files with 87 additions and 32 deletions

View file

@ -0,0 +1,192 @@
// agents/api.rs — Direct API backend for agent execution
//
// Uses poc-agent's OpenAI-compatible API client to call models directly
// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
// Implements the tool loop: send prompt → if tool_calls, execute them →
// send results back → repeat until text response.
//
// Activated when config has api_base_url set.
use crate::agent::api::ApiClient;
use crate::agent::types::*;
use crate::agent::tools::{self, ProcessTracker};
use crate::agent::ui_channel::StreamTarget;
use std::sync::OnceLock;
static API_CLIENT: OnceLock<ApiClient> = OnceLock::new();
fn get_client() -> Result<&'static ApiClient, String> {
Ok(API_CLIENT.get_or_init(|| {
let config = crate::config::get();
let base_url = config.api_base_url.as_deref().unwrap_or("");
let api_key = config.api_key.as_deref().unwrap_or("");
let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
ApiClient::new(base_url, api_key, model)
}))
}
/// Run an agent prompt through the direct API with tool support.
/// Returns the final text response after all tool calls are resolved.
pub async fn call_api_with_tools(
agent: &str,
prompt: &str,
temperature: Option<f32>,
log: &dyn Fn(&str),
) -> Result<String, String> {
let client = get_client()?;
// Set up a UI channel — we drain reasoning tokens into the log
let (ui_tx, mut ui_rx) = crate::agent::ui_channel::channel();
// Build tool definitions — memory tools for graph operations
let all_defs = tools::definitions();
let tool_defs: Vec<ToolDef> = all_defs.into_iter()
.filter(|d| d.function.name.starts_with("memory_"))
.collect();
let tracker = ProcessTracker::new();
// Start with the prompt as a user message
let mut messages = vec![Message::user(prompt)];
let reasoning = crate::config::get().api_reasoning.clone();
let max_turns = 50;
for turn in 0..max_turns {
log(&format!("\n=== TURN {} ({} messages) ===\n", turn, messages.len()));
let (msg, usage) = client.chat_completion_stream_temp(
&messages,
Some(&tool_defs),
&ui_tx,
StreamTarget::Autonomous,
&reasoning,
temperature,
).await.map_err(|e| {
let msg_bytes: usize = messages.iter()
.map(|m| m.content_text().len())
.sum();
format!("API error on turn {} (~{}KB payload, {} messages): {}",
turn, msg_bytes / 1024, messages.len(), e)
})?;
if let Some(u) = &usage {
log(&format!("tokens: {} prompt + {} completion",
u.prompt_tokens, u.completion_tokens));
}
// Drain reasoning tokens from the UI channel into the log
{
let mut reasoning_buf = String::new();
while let Ok(ui_msg) = ui_rx.try_recv() {
if let crate::agent::ui_channel::UiMessage::Reasoning(r) = ui_msg {
reasoning_buf.push_str(&r);
}
}
if !reasoning_buf.is_empty() {
log(&format!("<think>\n{}\n</think>", reasoning_buf.trim()));
}
}
let has_content = msg.content.is_some();
let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
if has_tools {
// Push the assistant message with tool calls.
// Sanitize arguments: vllm re-parses them as JSON when
// preprocessing the conversation, so invalid JSON from the
// model crashes the next request.
let mut sanitized = msg.clone();
if let Some(ref mut calls) = sanitized.tool_calls {
for call in calls {
if serde_json::from_str::<serde_json::Value>(&call.function.arguments).is_err() {
log(&format!("sanitizing malformed args for {}: {}",
call.function.name, &call.function.arguments));
call.function.arguments = "{}".to_string();
}
}
}
messages.push(sanitized);
// Execute each tool call
for call in msg.tool_calls.as_ref().unwrap() {
log(&format!("\nTOOL CALL: {}({})",
call.function.name,
&call.function.arguments));
let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) {
Ok(v) => v,
Err(_) => {
log(&format!("malformed tool call args: {}", &call.function.arguments));
messages.push(Message::tool_result(
&call.id,
"Error: your tool call had malformed JSON arguments. Please retry with valid JSON.",
));
continue;
}
};
let output = if call.function.name.starts_with("memory_") {
let prov = format!("agent:{}", agent);
match crate::agent::tools::memory::dispatch(
&call.function.name, &args, Some(&prov),
) {
Ok(text) => crate::agent::tools::ToolOutput {
text, is_yield: false, images: Vec::new(),
model_switch: None, dmn_pause: false,
},
Err(e) => crate::agent::tools::ToolOutput {
text: format!("Error: {}", e),
is_yield: false, images: Vec::new(),
model_switch: None, dmn_pause: false,
},
}
} else {
tools::dispatch(&call.function.name, &args, &tracker).await
};
log(&format!("TOOL RESULT ({} chars):\n{}", output.text.len(), output.text));
messages.push(Message::tool_result(&call.id, &output.text));
}
continue;
}
// Text-only response — we're done
let text = msg.content_text().to_string();
if text.is_empty() && !has_content {
log("empty response, retrying");
messages.push(Message::user(
"[system] Your previous response was empty. Please respond with text or use a tool."
));
continue;
}
log(&format!("\n=== RESPONSE ===\n\n{}", text));
return Ok(text);
}
Err(format!("agent exceeded {} tool turns", max_turns))
}
/// Synchronous wrapper — runs the async function on a dedicated thread
/// with its own tokio runtime. Safe to call from any context.
pub fn call_api_with_tools_sync(
agent: &str,
prompt: &str,
temperature: Option<f32>,
log: &(dyn Fn(&str) + Sync),
) -> Result<String, String> {
std::thread::scope(|s| {
s.spawn(|| {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.map_err(|e| format!("tokio runtime: {}", e))?;
let prov = format!("agent:{}", agent);
rt.block_on(
crate::store::TASK_PROVENANCE.scope(prov,
call_api_with_tools(agent, prompt, temperature, log))
)
}).join().unwrap()
})
}

View file

@ -0,0 +1,333 @@
// Link audit: walk every link in the graph, batch to Sonnet for quality review.
//
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
use super::llm;
use crate::store::{self, Store, new_relation};
use std::collections::HashSet;
struct LinkInfo {
rel_idx: usize,
source_key: String,
target_key: String,
source_content: String,
target_content: String,
strength: f32,
target_sections: Vec<String>,
}
pub struct AuditStats {
pub kept: usize,
pub deleted: usize,
pub retargeted: usize,
pub weakened: usize,
pub strengthened: usize,
pub errors: usize,
}
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
let mut prompt = format!(
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
For each numbered link, decide what to do:\n\n\
KEEP N link is meaningful, leave it\n\
DELETE N link is noise, accidental, or too generic to be useful\n\
RETARGET N new_key link points to the right topic area but wrong node;\n\
\x20 retarget to a more specific section (listed under each link)\n\
WEAKEN N strength link is marginal; reduce strength (0.1-0.3)\n\
STRENGTHEN N strength link is important but underweighted; increase (0.8-1.0)\n\n\
Output exactly one action per link number, nothing else.\n\n\
Links to review:\n\n",
batch_num, total_batches);
for (i, link) in batch.iter().enumerate() {
let n = i + 1;
prompt.push_str(&format!(
"--- Link {} ---\n\
{} {} (strength={:.2})\n\n\
Source content:\n{}\n\n\
Target content:\n{}\n",
n, link.source_key, link.target_key, link.strength,
&link.source_content, &link.target_content));
if !link.target_sections.is_empty() {
prompt.push_str(
"\nTarget has sections (consider RETARGET to a more specific one):\n");
for s in &link.target_sections {
prompt.push_str(&format!(" - {}\n", s));
}
}
prompt.push('\n');
}
prompt
}
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
let mut actions = Vec::new();
for line in response.lines() {
let line = line.trim();
if line.is_empty() { continue; }
let parts: Vec<&str> = line.splitn(3, ' ').collect();
if parts.len() < 2 { continue; }
let action = parts[0].to_uppercase();
let idx: usize = match parts[1].parse::<usize>() {
Ok(n) if n >= 1 && n <= batch_size => n - 1,
_ => continue,
};
let audit_action = match action.as_str() {
"KEEP" => AuditAction::Keep,
"DELETE" => AuditAction::Delete,
"RETARGET" => {
if parts.len() < 3 { continue; }
AuditAction::Retarget(parts[2].trim().to_string())
}
"WEAKEN" => {
if parts.len() < 3 { continue; }
match parts[2].trim().parse::<f32>() {
Ok(s) => AuditAction::Weaken(s),
Err(_) => continue,
}
}
"STRENGTHEN" => {
if parts.len() < 3 { continue; }
match parts[2].trim().parse::<f32>() {
Ok(s) => AuditAction::Strengthen(s),
Err(_) => continue,
}
}
_ => continue,
};
actions.push((idx, audit_action));
}
actions
}
enum AuditAction {
Keep,
Delete,
Retarget(String),
Weaken(f32),
Strengthen(f32),
}
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
// Collect all non-deleted relations with their info
let mut links: Vec<LinkInfo> = Vec::new();
for (idx, rel) in store.relations.iter().enumerate() {
if rel.deleted { continue; }
let source_content = store.nodes.get(&rel.source_key)
.map(|n| n.content.clone()).unwrap_or_default();
let target_content = store.nodes.get(&rel.target_key)
.map(|n| n.content.clone()).unwrap_or_default();
// Find section children of target if it's file-level
let target_sections = if !rel.target_key.contains('#') {
let prefix = format!("{}#", rel.target_key);
store.nodes.keys()
.filter(|k| k.starts_with(&prefix))
.cloned()
.collect()
} else {
Vec::new()
};
links.push(LinkInfo {
rel_idx: idx,
source_key: rel.source_key.clone(),
target_key: rel.target_key.clone(),
source_content,
target_content,
strength: rel.strength,
target_sections,
});
}
let total = links.len();
println!("Link audit: {} links to review", total);
if !apply {
println!("DRY RUN — use --apply to make changes");
}
// Batch by char budget (~100K chars per prompt)
let char_budget = 100_000usize;
let mut batches: Vec<Vec<usize>> = Vec::new();
let mut current_batch: Vec<usize> = Vec::new();
let mut current_chars = 0usize;
for (i, link) in links.iter().enumerate() {
let link_chars = link.source_content.len() + link.target_content.len() + 200;
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
batches.push(std::mem::take(&mut current_batch));
current_chars = 0;
}
current_batch.push(i);
current_chars += link_chars;
}
if !current_batch.is_empty() {
batches.push(current_batch);
}
let total_batches = batches.len();
println!("{} batches (avg {} links/batch)\n", total_batches,
if total_batches > 0 { total / total_batches } else { 0 });
use rayon::prelude::*;
use std::sync::atomic::{AtomicUsize, Ordering};
// Build all batch prompts up front
let batch_data: Vec<(usize, Vec<LinkInfo>, String)> = batches.iter().enumerate()
.map(|(batch_idx, batch_indices)| {
let batch_infos: Vec<LinkInfo> = batch_indices.iter().map(|&i| {
let l = &links[i];
LinkInfo {
rel_idx: l.rel_idx,
source_key: l.source_key.clone(),
target_key: l.target_key.clone(),
source_content: l.source_content.clone(),
target_content: l.target_content.clone(),
strength: l.strength,
target_sections: l.target_sections.clone(),
}
}).collect();
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
(batch_idx, batch_infos, prompt)
})
.collect();
// Progress counter
let done = AtomicUsize::new(0);
// Run batches in parallel via rayon
let batch_results: Vec<_> = batch_data.par_iter()
.map(|(batch_idx, batch_infos, prompt)| {
let response = llm::call_simple("audit", prompt);
let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
eprint!("\r Batches: {}/{} done", completed, total_batches);
(*batch_idx, batch_infos, response)
})
.collect();
eprintln!(); // newline after progress
// Process results sequentially
let mut stats = AuditStats {
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
};
let mut deletions: Vec<usize> = Vec::new();
let mut retargets: Vec<(usize, String)> = Vec::new();
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
for (batch_idx, batch_infos, response) in &batch_results {
let response = match response {
Ok(r) => r,
Err(e) => {
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
stats.errors += batch_infos.len();
continue;
}
};
let actions = parse_audit_response(response, batch_infos.len());
let mut responded: HashSet<usize> = HashSet::new();
for (idx, action) in &actions {
responded.insert(*idx);
let link = &batch_infos[*idx];
match action {
AuditAction::Keep => {
stats.kept += 1;
}
AuditAction::Delete => {
println!(" DELETE {}{}", link.source_key, link.target_key);
deletions.push(link.rel_idx);
stats.deleted += 1;
}
AuditAction::Retarget(new_target) => {
println!(" RETARGET {}{} (was {})",
link.source_key, new_target, link.target_key);
retargets.push((link.rel_idx, new_target.clone()));
stats.retargeted += 1;
}
AuditAction::Weaken(s) => {
println!(" WEAKEN {}{} (str {:.2}{:.2})",
link.source_key, link.target_key, link.strength, s);
strength_changes.push((link.rel_idx, *s));
stats.weakened += 1;
}
AuditAction::Strengthen(s) => {
println!(" STRENGTHEN {}{} (str {:.2}{:.2})",
link.source_key, link.target_key, link.strength, s);
strength_changes.push((link.rel_idx, *s));
stats.strengthened += 1;
}
}
}
for i in 0..batch_infos.len() {
if !responded.contains(&i) {
stats.kept += 1;
}
}
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
batch_idx + 1, total_batches,
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
}
// Apply changes
if apply && (stats.deleted > 0 || stats.retargeted > 0
|| stats.weakened > 0 || stats.strengthened > 0) {
println!("\nApplying changes...");
// Deletions: soft-delete
for rel_idx in &deletions {
store.relations[*rel_idx].deleted = true;
}
// Strength changes
for (rel_idx, new_strength) in &strength_changes {
store.relations[*rel_idx].strength = *new_strength;
}
// Retargets: soft-delete old, create new
for (rel_idx, new_target) in &retargets {
let source_key = store.relations[*rel_idx].source_key.clone();
let old_strength = store.relations[*rel_idx].strength;
let source_uuid = store.nodes.get(&source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(new_target)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
// Soft-delete old
store.relations[*rel_idx].deleted = true;
// Create new
if target_uuid != [0u8; 16] {
let new_rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Auto,
old_strength,
&source_key, new_target,
);
store.add_relation(new_rel).ok();
}
}
store.save()?;
println!("Saved.");
}
Ok(stats)
}

View file

@ -0,0 +1,173 @@
// Consolidation pipeline: plan → agents → maintenance → digests → links
//
// consolidate_full() runs the full autonomous consolidation:
// 1. Plan: analyze metrics, allocate agents
// 2. Execute: run each agent (agents apply changes via tool calls)
// 3. Graph maintenance (orphans, degree cap)
// 4. Digest: generate missing daily/weekly/monthly digests
// 5. Links: apply links extracted from digests
// 6. Summary: final metrics comparison
use super::digest;
use super::knowledge;
use crate::neuro;
use crate::store::{self, Store};
/// Append a line to the log buffer.
fn log_line(buf: &mut String, line: &str) {
buf.push_str(line);
buf.push('\n');
}
/// Run the full autonomous consolidation pipeline with logging.
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
consolidate_full_with_progress(store, &|_| {})
}
pub fn consolidate_full_with_progress(
store: &mut Store,
on_progress: &dyn Fn(&str),
) -> Result<(), String> {
let start = std::time::Instant::now();
let log_key = format!("_consolidate-log-{}", store::compact_timestamp());
let mut log_buf = String::new();
log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
log_line(&mut log_buf, "");
// --- Step 1: Plan ---
log_line(&mut log_buf, "--- Step 1: Plan ---");
on_progress("planning");
let plan = neuro::consolidation_plan(store);
let plan_text = neuro::format_plan(&plan);
log_line(&mut log_buf, &plan_text);
println!("{}", plan_text);
let total_agents = plan.total();
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
// --- Step 2: Execute agents ---
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
let mut agent_num = 0usize;
let mut agent_errors = 0usize;
let batch_size = 5;
let runs = plan.to_agent_runs(batch_size);
for (agent_type, count) in &runs {
agent_num += 1;
let label = if *count > 0 {
format!("[{}/{}] {} (batch={})", agent_num, runs.len(), agent_type, count)
} else {
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
};
log_line(&mut log_buf, &format!("\n{}", label));
on_progress(&label);
println!("{}", label);
// Reload store to pick up changes from previous agents
if agent_num > 1 {
*store = Store::load()?;
}
match knowledge::run_and_apply(store, agent_type, *count, "consolidate") {
Ok(()) => {
let msg = " Done".to_string();
log_line(&mut log_buf, &msg);
on_progress(&msg);
println!("{}", msg);
}
Err(e) => {
let msg = format!(" ERROR: {}", e);
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
agent_errors += 1;
}
}
}
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors",
agent_num - agent_errors, agent_errors));
store.save()?;
// --- Step 3: Link orphans ---
log_line(&mut log_buf, "\n--- Step 3: Link orphans ---");
on_progress("linking orphans");
println!("\n--- Linking orphan nodes ---");
*store = Store::load()?;
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
// --- Step 3b: Cap degree ---
log_line(&mut log_buf, "\n--- Step 3b: Cap degree ---");
on_progress("capping degree");
println!("\n--- Capping node degree ---");
*store = Store::load()?;
match store.cap_degree(50) {
Ok((hubs, pruned)) => {
store.save()?;
log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
}
Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
}
// --- Step 4: Digest auto ---
log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
on_progress("generating digests");
println!("\n--- Generating missing digests ---");
*store = Store::load()?;
match digest::digest_auto(store) {
Ok(()) => log_line(&mut log_buf, " Digests done."),
Err(e) => {
let msg = format!(" ERROR in digest auto: {}", e);
log_line(&mut log_buf, &msg);
eprintln!("{}", msg);
}
}
// --- Step 5: Apply digest links ---
log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
on_progress("applying digest links");
println!("\n--- Applying digest links ---");
*store = Store::load()?;
let links = digest::parse_all_digest_links(store);
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
store.save()?;
log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
applied, skipped, fallbacks));
// --- Step 6: Summary ---
let elapsed = start.elapsed();
log_line(&mut log_buf, "\n--- Summary ---");
log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
*store = Store::load()?;
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
let summary = format!(
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
Duration: {:.0}s\n\
Agents: {} run, {} errors\n\
Nodes: {} Relations: {}\n",
elapsed.as_secs_f64(),
agent_num - agent_errors, agent_errors,
store.nodes.len(), store.relations.len(),
);
log_line(&mut log_buf, &summary);
println!("{}", summary);
// Store the log as a node
store.upsert_provenance(&log_key, &log_buf,
"consolidate:write").ok();
store.save()?;
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,736 @@
// Agent definitions: self-contained files with query + prompt template.
//
// Each agent is a file in the agents/ directory:
// - First line: JSON header (agent, query, model, schedule)
// - After blank line: prompt template with {{placeholder}} lookups
//
// Placeholders are resolved at runtime:
// {{topology}} — graph topology header
// {{nodes}} — query results formatted as node sections
// {{episodes}} — alias for {{nodes}}
// {{health}} — graph health report
// {{pairs}} — interference pairs from detect_interference
// {{rename}} — rename candidates
// {{split}} — split detail for the first query result
//
// The query selects what to operate on; placeholders pull in context.
use crate::graph::Graph;
use crate::neuro::{consolidation_priority, ReplayItem};
use crate::search;
use crate::store::Store;
use serde::Deserialize;
use std::path::PathBuf;
/// Agent definition: config (from JSON header) + prompt (raw markdown body).
#[derive(Clone, Debug)]
pub struct AgentDef {
pub agent: String,
pub query: String,
pub prompt: String,
pub model: String,
pub schedule: String,
pub tools: Vec<String>,
pub count: Option<usize>,
pub chunk_size: Option<usize>,
pub chunk_overlap: Option<usize>,
pub temperature: Option<f32>,
}
/// The JSON header portion (first line of the file).
#[derive(Deserialize)]
struct AgentHeader {
agent: String,
#[serde(default)]
query: String,
#[serde(default = "default_model")]
model: String,
#[serde(default)]
schedule: String,
#[serde(default)]
tools: Vec<String>,
/// Number of seed nodes / conversation fragments (overrides --count)
#[serde(default)]
count: Option<usize>,
/// Max size of conversation chunks in bytes (default 50000)
#[serde(default)]
chunk_size: Option<usize>,
/// Overlap between chunks in bytes (default 10000)
#[serde(default)]
chunk_overlap: Option<usize>,
/// LLM temperature override
#[serde(default)]
temperature: Option<f32>,
}
fn default_model() -> String { "sonnet".into() }
/// Parse an agent file: first line is JSON config, rest is the prompt.
fn parse_agent_file(content: &str) -> Option<AgentDef> {
let (first_line, rest) = content.split_once('\n')?;
let header: AgentHeader = serde_json::from_str(first_line.trim()).ok()?;
// Skip optional blank line between header and prompt body
let prompt = rest.strip_prefix('\n').unwrap_or(rest);
Some(AgentDef {
agent: header.agent,
query: header.query,
prompt: prompt.to_string(),
model: header.model,
schedule: header.schedule,
tools: header.tools,
count: header.count,
chunk_size: header.chunk_size,
chunk_overlap: header.chunk_overlap,
temperature: header.temperature,
})
}
fn agents_dir() -> PathBuf {
let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
if repo.is_dir() { return repo; }
crate::store::memory_dir().join("agents")
}
/// Load all agent definitions.
pub fn load_defs() -> Vec<AgentDef> {
let dir = agents_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() };
entries
.filter_map(|e| e.ok())
.filter(|e| {
let p = e.path();
p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false)
})
.filter_map(|e| {
let content = std::fs::read_to_string(e.path()).ok()?;
parse_agent_file(&content)
})
.collect()
}
/// Look up a single agent definition by name.
pub fn get_def(name: &str) -> Option<AgentDef> {
let dir = agents_dir();
for ext in ["agent", "md"] {
let path = dir.join(format!("{}.{}", name, ext));
if let Ok(content) = std::fs::read_to_string(&path)
&& let Some(def) = parse_agent_file(&content) {
return Some(def);
}
}
load_defs().into_iter().find(|d| d.agent == name)
}
/// Result of resolving a placeholder: text + any affected node keys.
struct Resolved {
text: String,
keys: Vec<String>,
}
/// Resolve a single {{placeholder}} by name.
/// Returns the replacement text and any node keys it produced (for visit tracking).
fn resolve(
name: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> Option<Resolved> {
match name {
"topology" => Some(Resolved {
text: super::prompts::format_topology_header(graph),
keys: vec![],
}),
"nodes" | "episodes" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![], // keys already tracked from query
})
}
"health" => Some(Resolved {
text: super::prompts::format_health_section(store, graph),
keys: vec![],
}),
"pairs" => {
let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
pairs.truncate(count);
let pair_keys: Vec<String> = pairs.iter()
.flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
.collect();
Some(Resolved {
text: super::prompts::format_pairs_section(&pairs, store, graph),
keys: pair_keys,
})
}
"rename" => {
let (rename_keys, section) = super::prompts::format_rename_candidates(store, count);
Some(Resolved { text: section, keys: rename_keys })
}
"split" => {
let key = keys.first()?;
Some(Resolved {
text: super::prompts::format_split_plan_node(store, graph, key),
keys: vec![], // key already tracked from query
})
}
// seed — render output for each seed node (content + deduped links)
"seed" => {
let mut text = String::new();
let mut result_keys = Vec::new();
for key in keys {
if let Some(rendered) = crate::cli::node::render_node(store, key) {
if !text.is_empty() { text.push_str("\n\n---\n\n"); }
text.push_str(&format!("## {}\n\n{}", key, rendered));
result_keys.push(key.clone());
}
}
if text.is_empty() { return None; }
Some(Resolved { text, keys: result_keys })
}
"organize" => {
// Show seed nodes with their neighbors for exploratory organizing
use crate::store::NodeType;
// Helper: shell-quote keys containing #
let sq = |k: &str| -> String {
if k.contains('#') { format!("'{}'", k) } else { k.to_string() }
};
let mut text = format!("### Seed nodes ({} starting points)\n\n", keys.len());
let mut result_keys = Vec::new();
for key in keys {
let Some(node) = store.nodes.get(key) else { continue };
if node.deleted { continue; }
let is_journal = node.node_type == NodeType::EpisodicSession;
let tag = if is_journal { " [JOURNAL — no delete]" } else { "" };
let words = node.content.split_whitespace().count();
text.push_str(&format!("#### {}{} ({} words)\n\n", sq(key), tag, words));
// Show first ~200 words of content as preview
let preview: String = node.content.split_whitespace()
.take(200).collect::<Vec<_>>().join(" ");
if words > 200 {
text.push_str(&format!("{}...\n\n", preview));
} else {
text.push_str(&format!("{}\n\n", node.content));
}
// Show neighbors with strengths
let neighbors = graph.neighbors(key);
if !neighbors.is_empty() {
text.push_str("**Neighbors:**\n");
for (nbr, strength) in neighbors.iter().take(15) {
let nbr_type = store.nodes.get(nbr.as_str())
.map(|n| match n.node_type {
NodeType::EpisodicSession => " [journal]",
NodeType::EpisodicDaily => " [daily]",
_ => "",
})
.unwrap_or("");
text.push_str(&format!(" [{:.1}] {}{}\n", strength, sq(nbr), nbr_type));
}
if neighbors.len() > 15 {
text.push_str(&format!(" ... and {} more\n", neighbors.len() - 15));
}
text.push('\n');
}
text.push_str("---\n\n");
result_keys.push(key.clone());
}
text.push_str("Use `poc-memory render KEY` and `poc-memory query \"neighbors('KEY')\"` to explore further.\n");
Some(Resolved { text, keys: result_keys })
}
"conversations" => {
let fragments = super::knowledge::select_conversation_fragments(count);
let fragment_ids: Vec<String> = fragments.iter()
.map(|(id, _)| id.clone())
.collect();
let text = fragments.iter()
.map(|(id, text)| format!("### Session {}\n\n{}", id, text))
.collect::<Vec<_>>()
.join("\n\n---\n\n");
Some(Resolved { text, keys: fragment_ids })
}
"siblings" | "neighborhood" => {
let mut out = String::new();
let mut all_keys: Vec<String> = Vec::new();
let mut included_nodes: std::collections::HashSet<String> = std::collections::HashSet::new();
const MAX_NEIGHBORS: usize = 25;
for key in keys {
if included_nodes.contains(key) { continue; }
included_nodes.insert(key.clone());
let Some(node) = store.nodes.get(key.as_str()) else { continue };
let neighbors = graph.neighbors(key);
// Seed node with full content
out.push_str(&format!("## {} (seed)\n\n{}\n\n", key, node.content));
all_keys.push(key.clone());
// Rank neighbors by link_strength * node_weight
// Include all if <= 10, otherwise take top MAX_NEIGHBORS
let mut ranked: Vec<(String, f32, f32)> = neighbors.iter()
.filter_map(|(nbr, strength)| {
store.nodes.get(nbr.as_str()).map(|n| {
let node_weight = n.weight.max(0.01);
let score = strength * node_weight;
(nbr.to_string(), *strength, score)
})
})
.collect();
ranked.sort_by(|a, b| b.2.total_cmp(&a.2));
let total = ranked.len();
let included: Vec<_> = if total <= 10 {
ranked
} else {
// Smooth cutoff: threshold scales with neighborhood size
// Generous — err on including too much so the agent can
// see and clean up junk. 20 → top 75%, 50 → top 30%
let top_score = ranked.first().map(|(_, _, s)| *s).unwrap_or(0.0);
let ratio = (15.0 / total as f32).min(1.0);
let threshold = top_score * ratio;
ranked.into_iter()
.enumerate()
.take_while(|(i, (_, _, score))| *i < 10 || *score >= threshold)
.take(MAX_NEIGHBORS)
.map(|(_, item)| item)
.collect()
};
if !included.is_empty() {
if total > included.len() {
out.push_str(&format!("### Neighbors (top {} of {}, ranked by importance)\n\n",
included.len(), total));
} else {
out.push_str("### Neighbors\n\n");
}
let included_keys: std::collections::HashSet<&str> = included.iter()
.map(|(k, _, _)| k.as_str()).collect();
// Budget: stop adding full content when prompt gets large.
// Remaining neighbors get header-only (key + first line).
const NEIGHBORHOOD_BUDGET: usize = 400_000; // ~100K tokens, leaves room for core-personality + instructions
let mut budget_exceeded = false;
for (nbr, strength, _score) in &included {
if included_nodes.contains(nbr) { continue; }
included_nodes.insert(nbr.clone());
if let Some(n) = store.nodes.get(nbr.as_str()) {
if budget_exceeded || out.len() > NEIGHBORHOOD_BUDGET {
// Header-only: key + first non-empty line
budget_exceeded = true;
let first_line = n.content.lines()
.find(|l| !l.trim().is_empty())
.unwrap_or("(empty)");
out.push_str(&format!("#### {} (link: {:.2}) — {}\n",
nbr, strength, first_line));
} else {
out.push_str(&format!("#### {} (link: {:.2})\n\n{}\n\n",
nbr, strength, n.content));
}
all_keys.push(nbr.to_string());
}
}
if budget_exceeded {
out.push_str("\n(remaining neighbors shown as headers only — prompt budget)\n\n");
}
// Cross-links between included neighbors
let mut cross_links = Vec::new();
for (nbr, _, _) in &included {
for (nbr2, strength) in graph.neighbors(nbr) {
if nbr2.as_str() != key
&& included_keys.contains(nbr2.as_str())
&& nbr.as_str() < nbr2.as_str()
{
cross_links.push((nbr.clone(), nbr2, strength));
}
}
}
if !cross_links.is_empty() {
out.push_str("### Cross-links between neighbors\n\n");
for (a, b, s) in &cross_links {
out.push_str(&format!(" {}{} ({:.2})\n", a, b, s));
}
out.push('\n');
}
}
}
Some(Resolved { text: out, keys: all_keys })
}
// targets/context: aliases for challenger-style presentation
"targets" => {
let items = keys_to_replay_items(store, keys, graph);
Some(Resolved {
text: super::prompts::format_nodes_section(store, &items, graph),
keys: vec![],
})
}
"hubs" => {
// Top hub nodes by degree, spread apart (skip neighbors of already-selected hubs)
let mut hubs: Vec<(String, usize)> = store.nodes.iter()
.filter(|(k, n)| !n.deleted && !k.starts_with('_'))
.map(|(k, _)| {
let degree = graph.neighbors(k).len();
(k.clone(), degree)
})
.collect();
hubs.sort_by(|a, b| b.1.cmp(&a.1));
let mut selected = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for (key, degree) in &hubs {
if seen.contains(key) { continue; }
selected.push(format!(" - {} (degree {})", key, degree));
// Mark neighbors as seen so we pick far-apart hubs
for (nbr, _) in graph.neighbors(key) {
seen.insert(nbr.clone());
}
seen.insert(key.clone());
if selected.len() >= 20 { break; }
}
let text = format!("## Hub nodes (link targets)\n\n{}", selected.join("\n"));
Some(Resolved { text, keys: vec![] })
}
// agent-context — personality/identity groups from load-context config
"agent-context" => {
let cfg = crate::config::get();
let mut text = String::new();
let mut keys = Vec::new();
for group in &cfg.context_groups {
if !group.agent { continue; }
let entries = crate::cli::misc::get_group_content(group, store, &cfg);
for (key, content) in entries {
use std::fmt::Write;
writeln!(text, "--- {} ({}) ---", key, group.label).ok();
writeln!(text, "{}\n", content).ok();
keys.push(key);
}
}
if text.is_empty() { None }
else { Some(Resolved { text, keys }) }
}
// node:KEY — inline a node's content by key
other if other.starts_with("node:") => {
let key = &other[5..];
store.nodes.get(key).map(|n| Resolved {
text: n.content.clone(),
keys: vec![key.to_string()],
})
}
// conversation — tail of the current session transcript (post-compaction)
"conversation" => {
let text = resolve_conversation();
if text.is_empty() { None }
else { Some(Resolved { text, keys: vec![] }) }
}
// seen_current — memories surfaced in current (post-compaction) context
"seen_current" => {
let text = resolve_seen_list("");
Some(Resolved { text, keys: vec![] })
}
// seen_previous — memories surfaced before last compaction
"seen_previous" => {
let text = resolve_seen_list("-prev");
Some(Resolved { text, keys: vec![] })
}
// memory_ratio — what % of current context is recalled memories
"memory_ratio" => {
let text = resolve_memory_ratio();
Some(Resolved { text, keys: vec![] })
}
_ => None,
}
}
/// Get the tail of the current session's conversation.
/// Reads POC_SESSION_ID to find the transcript, extracts the last
/// segment (post-compaction), returns the tail (~100K chars).
fn resolve_conversation() -> String {
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
if session_id.is_empty() { return String::new(); }
let projects = crate::config::get().projects_dir.clone();
// Find the transcript file matching this session
let mut transcript = None;
if let Ok(dirs) = std::fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
let path = dir.path().join(format!("{}.jsonl", session_id));
if path.exists() {
transcript = Some(path);
break;
}
}
}
let Some(path) = transcript else { return String::new() };
let path_str = path.to_string_lossy();
let Some(iter) = crate::transcript::TailMessages::open(&path_str) else {
return String::new();
};
let cfg = crate::config::get();
let mut fragments: Vec<String> = Vec::new();
let mut total_bytes = 0;
const MAX_BYTES: usize = 200_000;
for (role, content, ts) in iter {
if total_bytes >= MAX_BYTES { break; }
let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
let formatted = if !ts.is_empty() {
format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], content)
} else {
format!("**{}:** {}", name, content)
};
total_bytes += content.len();
fragments.push(formatted);
}
// Reverse back to chronological order
fragments.reverse();
fragments.join("\n\n")
}
/// Get surfaced memory keys from a seen-set file.
/// `suffix` is "" for current, "-prev" for pre-compaction.
fn resolve_seen_list(suffix: &str) -> String {
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
if session_id.is_empty() {
return "(no session ID)".to_string();
}
let state_dir = std::path::PathBuf::from("/tmp/claude-memory-search");
let path = state_dir.join(format!("seen{}-{}", suffix, session_id));
let entries: Vec<(String, String)> = std::fs::read_to_string(&path).ok()
.map(|content| {
content.lines()
.filter(|s| !s.is_empty())
.filter_map(|line| {
let (ts, key) = line.split_once('\t')?;
Some((ts.to_string(), key.to_string()))
})
.collect()
})
.unwrap_or_default();
if entries.is_empty() {
return "(none)".to_string();
}
// Sort newest first, dedup, cap at 20
let mut sorted = entries;
sorted.sort_by(|a, b| b.0.cmp(&a.0));
let mut seen = std::collections::HashSet::new();
let deduped: Vec<_> = sorted.into_iter()
.filter(|(_, key)| seen.insert(key.clone()))
.take(20)
.collect();
deduped.iter()
.map(|(ts, key)| format!("- {} ({})", key, ts))
.collect::<Vec<_>>()
.join("\n")
}
/// Compute what percentage of the current conversation context is recalled memories.
/// Sums rendered size of current seen-set keys vs total post-compaction transcript size.
fn resolve_memory_ratio() -> String {
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
if session_id.is_empty() {
return "(no session ID)".to_string();
}
let state_dir = std::path::PathBuf::from("/tmp/claude-memory-search");
// Get post-compaction transcript size
let projects = crate::config::get().projects_dir.clone();
let transcript_size: u64 = std::fs::read_dir(&projects).ok()
.and_then(|dirs| {
for dir in dirs.filter_map(|e| e.ok()) {
let path = dir.path().join(format!("{}.jsonl", session_id));
if path.exists() {
let file_len = path.metadata().map(|m| m.len()).unwrap_or(0);
let compaction_offset: u64 = std::fs::read_to_string(
state_dir.join(format!("compaction-{}", session_id))
).ok().and_then(|s| s.trim().parse().ok()).unwrap_or(0);
return Some(file_len.saturating_sub(compaction_offset));
}
}
None
})
.unwrap_or(0);
if transcript_size == 0 {
return "0% of context is recalled memories (new session)".to_string();
}
// Sum rendered size of each key in current seen set
let seen_path = state_dir.join(format!("seen-{}", session_id));
let mut seen_keys = std::collections::HashSet::new();
let keys: Vec<String> = std::fs::read_to_string(&seen_path).ok()
.map(|content| {
content.lines()
.filter(|s| !s.is_empty())
.filter_map(|line| line.split_once('\t').map(|(_, k)| k.to_string()))
.filter(|k| seen_keys.insert(k.clone()))
.collect()
})
.unwrap_or_default();
let memory_bytes: u64 = keys.iter()
.filter_map(|key| {
std::process::Command::new("poc-memory")
.args(["render", key])
.output().ok()
})
.map(|out| out.stdout.len() as u64)
.sum();
let pct = (memory_bytes as f64 / transcript_size as f64 * 100.0).round() as u32;
format!("{}% of current context is recalled memories ({} memories, ~{}KB of ~{}KB)",
pct, keys.len(), memory_bytes / 1024, transcript_size / 1024)
}
/// Resolve all {{placeholder}} patterns in a prompt template.
/// Returns the resolved text and all node keys collected from placeholders.
pub fn resolve_placeholders(
template: &str,
store: &Store,
graph: &Graph,
keys: &[String],
count: usize,
) -> (String, Vec<String>) {
let mut result = template.to_string();
let mut extra_keys = Vec::new();
let mut pos = 0;
loop {
let Some(rel_start) = result[pos..].find("{{") else { break };
let start = pos + rel_start;
let Some(rel_end) = result[start + 2..].find("}}") else { break };
let end = start + 2 + rel_end;
let name = result[start + 2..end].trim().to_lowercase();
match resolve(&name, store, graph, keys, count) {
Some(resolved) => {
let len = resolved.text.len();
extra_keys.extend(resolved.keys);
result.replace_range(start..end + 2, &resolved.text);
pos = start + len;
}
None => {
let msg = format!("(unknown: {})", name);
let len = msg.len();
result.replace_range(start..end + 2, &msg);
pos = start + len;
}
}
}
(result, extra_keys)
}
/// Run a config-driven agent: query → resolve placeholders → prompt.
/// `exclude` filters out nodes (and their neighborhoods) already being
/// worked on by other agents, preventing concurrent collisions.
pub fn run_agent(
store: &Store,
def: &AgentDef,
count: usize,
exclude: &std::collections::HashSet<String>,
) -> Result<super::prompts::AgentBatch, String> {
let graph = store.build_graph();
// Run the query if present
let keys = if !def.query.is_empty() {
let mut stages = search::Stage::parse_pipeline(&def.query)?;
let has_limit = stages.iter().any(|s|
matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
if !has_limit {
// Request extra results to compensate for exclusion filtering
let padded = count + exclude.len().min(100);
stages.push(search::Stage::Transform(search::Transform::Limit(padded)));
}
let results = search::run_query(&stages, vec![], &graph, store, false, count + exclude.len().min(100));
let filtered: Vec<String> = results.into_iter()
.map(|(k, _)| k)
.filter(|k| !exclude.contains(k))
.take(count)
.collect();
if filtered.is_empty() {
return Err(format!("{}: query returned no results (after exclusion)", def.agent));
}
filtered
} else {
vec![]
};
// Substitute {agent_name} before resolving {{...}} placeholders,
// so agents can reference their own notes: {{node:subconscious-notes-{agent_name}}}
let template = def.prompt.replace("{agent_name}", &def.agent);
let (prompt, extra_keys) = resolve_placeholders(&template, store, &graph, &keys, count);
// Identity and instructions are now pulled in via {{node:KEY}} placeholders.
// Agents should include {{node:core-personality}} and {{node:memory-instructions-core}}
// in their prompt templates. The resolve_placeholders call below handles this.
// Merge query keys with any keys produced by placeholder resolution
let mut all_keys = keys;
all_keys.extend(extra_keys);
Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
}
/// Convert a list of keys to ReplayItems with priority and graph metrics.
pub fn keys_to_replay_items(
store: &Store,
keys: &[String],
graph: &Graph,
) -> Vec<ReplayItem> {
keys.iter()
.filter_map(|key| {
let node = store.nodes.get(key)?;
let priority = consolidation_priority(store, key, graph, None);
let cc = graph.clustering_coefficient(key);
Some(ReplayItem {
key: key.clone(),
priority,
interval_days: node.spaced_repetition_interval,
emotion: node.emotion,
cc,
classification: "unknown",
outlier_score: 0.0,
})
})
.collect()
}

View file

@ -0,0 +1,544 @@
// Episodic digest generation: daily, weekly, monthly, auto
//
// Three digest levels form a temporal hierarchy: daily digests summarize
// journal entries, weekly digests summarize dailies, monthly digests
// summarize weeklies. All three share the same generate/auto-detect
// pipeline, parameterized by DigestLevel.
use super::llm;
use crate::store::{self, Store, new_relation};
use crate::neuro;
use chrono::{Datelike, Duration, Local, NaiveDate};
use regex::Regex;
use std::collections::BTreeSet;
// --- Digest level descriptors ---
#[allow(clippy::type_complexity)]
struct DigestLevel {
name: &'static str,
title: &'static str,
period: &'static str,
input_title: &'static str,
child_name: Option<&'static str>, // None = journal (leaf), Some = child digest files
/// Expand an arg into (canonical_label, dates covered).
label_dates: fn(&str) -> Result<(String, Vec<String>), String>,
/// Map a YYYY-MM-DD date to this level's label.
date_to_label: fn(&str) -> Option<String>,
}
const DAILY: DigestLevel = DigestLevel {
name: "daily",
title: "Daily",
period: "Date",
input_title: "Journal entries",
child_name: None,
label_dates: |date| Ok((date.to_string(), vec![date.to_string()])),
date_to_label: |date| Some(date.to_string()),
};
/// Week label and 7 dates (Mon-Sun) for the week containing `date`.
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", date, e))?;
let iso = nd.iso_week();
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((week_label, dates))
}
const WEEKLY: DigestLevel = DigestLevel {
name: "weekly",
title: "Weekly",
period: "Week",
input_title: "Daily digests",
child_name: Some("daily"),
label_dates: |arg| {
if !arg.contains('W') {
return week_dates(arg);
}
let (y, w) = arg.split_once("-W")
.ok_or_else(|| format!("bad week label: {}", arg))?;
let year: i32 = y.parse().map_err(|_| format!("bad week year: {}", arg))?;
let week: u32 = w.parse().map_err(|_| format!("bad week number: {}", arg))?;
let monday = NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
.ok_or_else(|| format!("invalid week: {}", arg))?;
let dates = (0..7)
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
.collect();
Ok((arg.to_string(), dates))
},
date_to_label: |date| week_dates(date).ok().map(|(l, _)| l),
};
const MONTHLY: DigestLevel = DigestLevel {
name: "monthly",
title: "Monthly",
period: "Month",
input_title: "Weekly digests",
child_name: Some("weekly"),
label_dates: |arg| {
let (year, month) = if arg.len() <= 7 {
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
.map_err(|e| format!("bad month '{}': {}", arg, e))?;
(d.year(), d.month())
} else {
let d = NaiveDate::parse_from_str(arg, "%Y-%m-%d")
.map_err(|e| format!("bad date '{}': {}", arg, e))?;
(d.year(), d.month())
};
let label = format!("{}-{:02}", year, month);
let mut dates = Vec::new();
let mut day = 1u32;
while let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
if date.month() != month { break; }
dates.push(date.format("%Y-%m-%d").to_string());
day += 1;
}
Ok((label, dates))
},
date_to_label: |date| NaiveDate::parse_from_str(date, "%Y-%m-%d")
.ok().map(|d| format!("{}-{:02}", d.year(), d.month())),
};
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc.
fn digest_node_key(level_name: &str, label: &str) -> String {
format!("{}-{}", level_name, label)
}
// --- Input gathering ---
/// Result of gathering inputs for a digest.
struct GatherResult {
label: String,
/// (display_label, content) pairs for the prompt.
inputs: Vec<(String, String)>,
/// Store keys of source nodes — used to create structural links.
source_keys: Vec<String>,
}
/// Load child digest content from the store.
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> (Vec<(String, String)>, Vec<String>) {
let mut digests = Vec::new();
let mut keys = Vec::new();
for label in labels {
let key = digest_node_key(prefix, label);
if let Some(node) = store.nodes.get(&key) {
digests.push((label.clone(), node.content.clone()));
keys.push(key);
}
}
(digests, keys)
}
/// Unified: gather inputs for any digest level.
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<GatherResult, String> {
let (label, dates) = (level.label_dates)(arg)?;
let (inputs, source_keys) = if let Some(child_name) = level.child_name {
// Map parent's dates through child's date_to_label → child labels
let child = LEVELS.iter()
.find(|l| l.name == child_name)
.expect("invalid child_name");
let child_labels: Vec<String> = dates.iter()
.filter_map(|d| (child.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
load_child_digests(store, child_name, &child_labels)
} else {
// Leaf level: scan store for episodic entries matching date
let mut entries: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.node_type == store::NodeType::EpisodicSession
&& n.timestamp > 0
&& store::format_date(n.timestamp) == label)
.map(|(key, n)| {
(store::format_datetime(n.timestamp), n.content.clone(), key.clone())
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
let keys = entries.iter().map(|(_, _, k)| k.clone()).collect();
let inputs = entries.into_iter().map(|(dt, c, _)| (dt, c)).collect();
(inputs, keys)
};
Ok(GatherResult { label, inputs, source_keys })
}
/// Unified: find candidate labels for auto-generation (past, not yet generated).
fn find_candidates(level: &DigestLevel, dates: &[String], today: &str) -> Vec<String> {
let today_label = (level.date_to_label)(today);
dates.iter()
.filter_map(|d| (level.date_to_label)(d))
.collect::<BTreeSet<_>>()
.into_iter()
.filter(|l| Some(l) != today_label.as_ref())
.collect()
}
// --- Unified generator ---
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
let mut text = String::new();
for (label, content) in inputs {
if daily {
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
} else {
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
}
}
text
}
fn generate_digest(
store: &mut Store,
level: &DigestLevel,
label: &str,
inputs: &[(String, String)],
source_keys: &[String],
) -> Result<(), String> {
println!("Generating {} digest for {}...", level.name, label);
if inputs.is_empty() {
println!(" No inputs found for {}", label);
return Ok(());
}
println!(" {} inputs", inputs.len());
let keys = llm::semantic_keys(store);
let keys_text = keys.iter()
.map(|k| format!(" - {}", k))
.collect::<Vec<_>>()
.join("\n");
let content = format_inputs(inputs, level.child_name.is_none());
let covered = inputs.iter()
.map(|(l, _)| l.as_str())
.collect::<Vec<_>>()
.join(", ");
// Load prompt from agent file; fall back to prompts dir
let def = super::defs::get_def("digest");
let template = match &def {
Some(d) => d.prompt.clone(),
None => {
let path = crate::config::get().prompts_dir.join("digest.md");
std::fs::read_to_string(&path)
.map_err(|e| format!("load digest prompt: {}", e))?
}
};
let prompt = template
.replace("{{LEVEL}}", level.title)
.replace("{{PERIOD}}", level.period)
.replace("{{INPUT_TITLE}}", level.input_title)
.replace("{{LABEL}}", label)
.replace("{{CONTENT}}", &content)
.replace("{{COVERED}}", &covered)
.replace("{{KEYS}}", &keys_text);
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
println!(" Calling Sonnet...");
let digest = llm::call_simple("digest", &prompt)?;
let key = digest_node_key(level.name, label);
store.upsert_provenance(&key, &digest, "digest:write")?;
// Structural links: connect all source entries to this digest
let mut linked = 0;
for source_key in source_keys {
// Skip if link already exists
let exists = store.relations.iter().any(|r|
!r.deleted && r.source_key == *source_key && r.target_key == key);
if exists { continue; }
let source_uuid = store.nodes.get(source_key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let target_uuid = store.nodes.get(&key)
.map(|n| n.uuid).unwrap_or([0u8; 16]);
let mut rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link, 0.8,
source_key, &key,
);
rel.provenance = "digest:structural".to_string();
store.add_relation(rel)?;
linked += 1;
}
if linked > 0 {
println!(" Linked {} source entries → {}", linked, key);
}
store.save()?;
println!(" Stored: {}", key);
println!(" Done: {} lines", digest.lines().count());
Ok(())
}
// --- Public API ---
pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), String> {
let level = LEVELS.iter()
.find(|l| l.name == level_name)
.ok_or_else(|| format!("unknown digest level: {}", level_name))?;
let result = gather(level, store, arg)?;
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)
}
// --- Auto-detect and generate missing digests ---
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
let today = Local::now().format("%Y-%m-%d").to_string();
// Collect all dates with episodic entries
let dates: Vec<String> = store.nodes.values()
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.timestamp > 0)
.map(|n| store::format_date(n.timestamp))
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
let mut total = 0u32;
for level in LEVELS {
let candidates = find_candidates(level, &dates, &today);
let mut generated = 0u32;
let mut skipped = 0u32;
for arg in &candidates {
let result = gather(level, store, arg)?;
let key = digest_node_key(level.name, &result.label);
if store.nodes.contains_key(&key) {
skipped += 1;
continue;
}
if result.inputs.is_empty() { continue; }
println!("[auto] Missing {} digest for {}", level.name, result.label);
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)?;
generated += 1;
}
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
total += generated;
}
if total == 0 {
println!("[auto] All digests up to date.");
} else {
println!("[auto] Generated {} total digests.", total);
}
Ok(())
}
// --- Digest link parsing ---
// Replaces digest-link-parser.py: parses ## Links sections from digest
// files and applies them to the memory graph.
/// A parsed link from a digest's Links section.
pub struct DigestLink {
pub source: String,
pub target: String,
pub reason: String,
pub file: String,
}
/// Normalize a raw link target to a poc-memory key.
fn normalize_link_key(raw: &str) -> String {
let key = raw.trim().trim_matches('`').trim();
if key.is_empty() { return String::new(); }
// Self-references
let lower = key.to_lowercase();
if lower.starts_with("this ") { return String::new(); }
let mut key = key.to_string();
// Strip .md suffix if present
if let Some(stripped) = key.strip_suffix(".md") {
key = stripped.to_string();
} else if key.contains('#') {
let (file, section) = key.split_once('#').unwrap();
if let Some(bare) = file.strip_suffix(".md") {
key = format!("{}-{}", bare, section);
}
}
// weekly/2026-W06 → weekly-2026-W06, etc.
if let Some(pos) = key.find('/') {
let prefix = &key[..pos];
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
let rest = &key[pos + 1..];
key = format!("{}-{}", prefix, rest);
}
}
// Bare date → daily digest
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
if date_re.is_match(&key) {
key = format!("daily-{}", key);
}
key
}
/// Parse the Links section from a digest node's content.
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
let header_re = Regex::new(r"^##\s+Links").unwrap();
let mut links = Vec::new();
let mut in_links = false;
for line in content.lines() {
if header_re.is_match(line) {
in_links = true;
continue;
}
if in_links && line.starts_with("## ") {
in_links = false;
continue;
}
if !in_links { continue; }
if line.starts_with("###") || line.starts_with("**") { continue; }
if let Some(cap) = link_re.captures(line) {
let raw_source = cap[1].trim();
let raw_target = cap[2].trim();
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
let mut source = normalize_link_key(raw_source);
let mut target = normalize_link_key(raw_target);
// Replace self-references with digest key
if source.is_empty() { source = key.to_string(); }
if target.is_empty() { target = key.to_string(); }
// Handle "this daily/weekly/monthly" in raw text
let raw_s_lower = raw_source.to_lowercase();
let raw_t_lower = raw_target.to_lowercase();
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|| raw_s_lower.contains("this monthly")
{
source = key.to_string();
}
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|| raw_t_lower.contains("this monthly")
{
target = key.to_string();
}
// Skip NEW: and self-links
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
if source == target { continue; }
links.push(DigestLink { source, target, reason, file: key.to_string() });
}
}
links
}
/// Parse links from all digest nodes in the store.
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
let mut all_links = Vec::new();
let mut digest_keys: Vec<&String> = store.nodes.iter()
.filter(|(_, n)| matches!(n.node_type,
store::NodeType::EpisodicDaily
| store::NodeType::EpisodicWeekly
| store::NodeType::EpisodicMonthly))
.map(|(k, _)| k)
.collect();
digest_keys.sort();
for key in digest_keys {
if let Some(node) = store.nodes.get(key) {
all_links.extend(parse_digest_node_links(key, &node.content));
}
}
// Deduplicate by (source, target) pair
let mut seen = std::collections::HashSet::new();
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
all_links
}
/// Apply parsed digest links to the store.
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
let mut applied = 0usize;
let mut skipped = 0usize;
let mut fallbacks = 0usize;
for link in links {
// Try resolving both keys
let source = match store.resolve_key(&link.source) {
Ok(s) => s,
Err(_) => {
// Try stripping section anchor as fallback
if let Some(base) = link.source.split('#').next() {
match store.resolve_key(base) {
Ok(s) => { fallbacks += 1; s }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
let target = match store.resolve_key(&link.target) {
Ok(t) => t,
Err(_) => {
if let Some(base) = link.target.split('#').next() {
match store.resolve_key(base) {
Ok(t) => { fallbacks += 1; t }
Err(_) => { skipped += 1; continue; }
}
} else {
skipped += 1; continue;
}
}
};
// Refine target to best-matching section if available
let source_content = store.nodes.get(&source)
.map(|n| n.content.as_str()).unwrap_or("");
let target = neuro::refine_target(store, source_content, &target);
if source == target { skipped += 1; continue; }
// Check if link already exists
let exists = store.relations.iter().any(|r|
r.source_key == source && r.target_key == target && !r.deleted
);
if exists { skipped += 1; continue; }
let source_uuid = match store.nodes.get(&source) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let target_uuid = match store.nodes.get(&target) {
Some(n) => n.uuid,
None => { skipped += 1; continue; }
};
let rel = new_relation(
source_uuid, target_uuid,
store::RelationType::Link,
0.5,
&source, &target,
);
if store.add_relation(rel).is_ok() {
println!(" + {}{}", source, target);
applied += 1;
}
}
(applied, skipped, fallbacks)
}

View file

@ -0,0 +1,40 @@
// Conversation extraction from JSONL transcripts
//
// extract_conversation — parse JSONL transcript to messages
// split_on_compaction — split messages at compaction boundaries
/// Extract conversation messages from a JSONL transcript file.
/// Returns (line_number, role, text, timestamp) tuples.
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
let path = std::path::Path::new(jsonl_path);
let messages = super::transcript::parse_transcript(path)?;
Ok(messages.into_iter()
.map(|m| (m.line, m.role, m.text, m.timestamp))
.collect())
}
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
/// Split extracted messages into segments at compaction boundaries.
/// Each segment represents one continuous conversation before context was compacted.
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
let mut current = Vec::new();
for msg in messages {
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
if !current.is_empty() {
segments.push(current);
current = Vec::new();
}
current.push(msg);
} else {
current.push(msg);
}
}
if !current.is_empty() {
segments.push(current);
}
segments
}

View file

@ -0,0 +1,312 @@
// knowledge.rs — agent execution and conversation fragment selection
//
// Agent prompts live in agents/*.agent files, dispatched via defs.rs.
// This module handles:
// - Agent execution (build prompt → call LLM with tools → log)
// - Conversation fragment selection (for observation agent)
//
// Agents apply changes via tool calls (poc-memory write/link-add/etc)
// during the LLM call — no action parsing needed.
use super::llm;
use crate::store::{self, Store};
use std::fs;
use std::path::PathBuf;
// ---------------------------------------------------------------------------
// Agent execution
// ---------------------------------------------------------------------------
/// Result of running a single agent.
pub struct AgentResult {
pub output: String,
pub node_keys: Vec<String>,
}
/// Run a single agent and return the result (no action application — tools handle that).
pub fn run_and_apply(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
) -> Result<(), String> {
run_and_apply_with_log(store, agent_name, batch_size, llm_tag, &|_| {})
}
pub fn run_and_apply_with_log(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
log: &(dyn Fn(&str) + Sync),
) -> Result<(), String> {
run_and_apply_excluded(store, agent_name, batch_size, llm_tag, log, &Default::default())
}
/// Like run_and_apply_with_log but with an in-flight exclusion set.
/// Returns the keys that were processed (for the daemon to track).
pub fn run_and_apply_excluded(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
log: &(dyn Fn(&str) + Sync),
exclude: &std::collections::HashSet<String>,
) -> Result<(), String> {
let result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
// Mark conversation segments as mined after successful processing
if agent_name == "observation" {
mark_observation_done(&result.node_keys);
}
Ok(())
}
/// Run an agent with explicit target keys, bypassing the agent's query.
pub fn run_one_agent_with_keys(
store: &mut Store,
agent_name: &str,
keys: &[String],
count: usize,
llm_tag: &str,
log: &(dyn Fn(&str) + Sync),
) -> Result<AgentResult, String> {
let def = super::defs::get_def(agent_name)
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
log(&format!("targeting: {}", keys.join(", ")));
let graph = store.build_graph();
let (prompt, extra_keys) = super::defs::resolve_placeholders(
&def.prompt, store, &graph, keys, count,
);
let mut all_keys: Vec<String> = keys.to_vec();
all_keys.extend(extra_keys);
let agent_batch = super::prompts::AgentBatch { prompt, node_keys: all_keys };
// Record visits eagerly so concurrent agents pick different seeds
if !agent_batch.node_keys.is_empty() {
store.record_agent_visits(&agent_batch.node_keys, agent_name).ok();
}
run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
}
pub fn run_one_agent(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
log: &(dyn Fn(&str) + Sync),
) -> Result<AgentResult, String> {
run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, &Default::default())
}
/// Like run_one_agent but excludes nodes currently being worked on by other agents.
pub fn run_one_agent_excluded(
store: &mut Store,
agent_name: &str,
batch_size: usize,
llm_tag: &str,
log: &(dyn Fn(&str) + Sync),
exclude: &std::collections::HashSet<String>,
) -> Result<AgentResult, String> {
let def = super::defs::get_def(agent_name)
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
log("building prompt");
let effective_count = def.count.unwrap_or(batch_size);
let agent_batch = super::defs::run_agent(store, &def, effective_count, exclude)?;
run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
}
fn run_one_agent_inner(
_store: &mut Store,
agent_name: &str,
def: &super::defs::AgentDef,
agent_batch: super::prompts::AgentBatch,
_llm_tag: &str,
log: &(dyn Fn(&str) + Sync),
) -> Result<AgentResult, String> {
let prompt_kb = agent_batch.prompt.len() / 1024;
let tools_desc = if def.tools.is_empty() { "no tools".into() }
else { format!("{} tools", def.tools.len()) };
log(&format!("prompt {}KB, model={}, {}, {} nodes",
prompt_kb, def.model, tools_desc, agent_batch.node_keys.len()));
// Guard: reject prompts that would exceed model context.
// Rough estimate: 1 token ≈ 4 bytes. Reserve 16K tokens for output.
let max_prompt_bytes = 800_000; // ~200K tokens, leaves room for output
if agent_batch.prompt.len() > max_prompt_bytes {
// Log the oversized prompt for debugging
let oversize_dir = store::memory_dir().join("llm-logs").join("oversized");
fs::create_dir_all(&oversize_dir).ok();
let oversize_path = oversize_dir.join(format!("{}-{}.txt",
agent_name, store::compact_timestamp()));
let header = format!("=== OVERSIZED PROMPT ===\nagent: {}\nsize: {}KB (max {}KB)\nnodes: {:?}\n\n",
agent_name, prompt_kb, max_prompt_bytes / 1024, agent_batch.node_keys);
fs::write(&oversize_path, format!("{}{}", header, agent_batch.prompt)).ok();
log(&format!("oversized prompt logged to {}", oversize_path.display()));
return Err(format!(
"prompt too large: {}KB (max {}KB) — seed nodes may be oversized",
prompt_kb, max_prompt_bytes / 1024,
));
}
for key in &agent_batch.node_keys {
log(&format!(" node: {}", key));
}
log(&format!("=== PROMPT ===\n\n{}\n\n=== CALLING LLM ===", agent_batch.prompt));
let output = llm::call_for_def(def, &agent_batch.prompt, log)?;
Ok(AgentResult {
output,
node_keys: agent_batch.node_keys,
})
}
// ---------------------------------------------------------------------------
// Conversation fragment selection
// ---------------------------------------------------------------------------
/// Select conversation fragments (per-segment) for the observation extractor.
/// Uses the transcript-progress.capnp log for dedup — no stub nodes.
/// Does NOT pre-mark segments; caller must call mark_observation_done() after success.
pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
let projects = crate::config::get().projects_dir.clone();
if !projects.exists() { return Vec::new(); }
let store = match crate::store::Store::load() {
Ok(s) => s,
Err(_) => return Vec::new(),
};
let mut jsonl_files: Vec<PathBuf> = Vec::new();
if let Ok(dirs) = fs::read_dir(&projects) {
for dir in dirs.filter_map(|e| e.ok()) {
if !dir.path().is_dir() { continue; }
if let Ok(files) = fs::read_dir(dir.path()) {
for f in files.filter_map(|e| e.ok()) {
let p = f.path();
if p.extension().map(|x| x == "jsonl").unwrap_or(false)
&& let Ok(meta) = p.metadata()
&& meta.len() > 50_000 {
jsonl_files.push(p);
}
}
}
}
}
// Collect unmined segments across all transcripts
let mut candidates: Vec<(String, String)> = Vec::new();
for path in &jsonl_files {
let path_str = path.to_string_lossy();
let messages = match super::enrich::extract_conversation(&path_str) {
Ok(m) => m,
Err(_) => continue,
};
let session_id = path.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "unknown".into());
let segments = super::enrich::split_on_compaction(messages);
for (seg_idx, segment) in segments.into_iter().enumerate() {
if store.is_segment_mined(&session_id, seg_idx as u32, "observation") {
continue;
}
// Skip segments with too few assistant messages (rate limits, errors)
let assistant_msgs = segment.iter()
.filter(|(_, role, _, _)| role == "assistant")
.count();
if assistant_msgs < 2 {
continue;
}
// Skip segments that are just rate limit errors
let has_rate_limit = segment.iter().any(|(_, _, text, _)|
text.contains("hit your limit") || text.contains("rate limit"));
if has_rate_limit && assistant_msgs < 3 {
continue;
}
let text = format_segment(&segment);
if text.len() < 500 {
continue;
}
const CHUNK_SIZE: usize = 50_000;
const OVERLAP: usize = 10_000;
if text.len() <= CHUNK_SIZE {
let id = format!("{}.{}", session_id, seg_idx);
candidates.push((id, text));
} else {
// Split on line boundaries with overlap
let lines: Vec<&str> = text.lines().collect();
let mut start_line = 0;
let mut chunk_idx = 0;
while start_line < lines.len() {
let mut end_line = start_line;
let mut size = 0;
while end_line < lines.len() && size < CHUNK_SIZE {
size += lines[end_line].len() + 1;
end_line += 1;
}
let chunk: String = lines[start_line..end_line].join("\n");
let id = format!("{}.{}.{}", session_id, seg_idx, chunk_idx);
candidates.push((id, chunk));
if end_line >= lines.len() { break; }
// Back up by overlap amount for next chunk
let mut overlap_size = 0;
let mut overlap_start = end_line;
while overlap_start > start_line && overlap_size < OVERLAP {
overlap_start -= 1;
overlap_size += lines[overlap_start].len() + 1;
}
start_line = overlap_start;
chunk_idx += 1;
}
}
}
if candidates.len() >= n { break; }
}
candidates.truncate(n);
candidates
}
/// Mark observation segments as successfully mined (call AFTER the agent succeeds).
pub fn mark_observation_done(fragment_ids: &[String]) {
let mut store = match crate::store::Store::load() {
Ok(s) => s,
Err(_) => return,
};
for id in fragment_ids {
if let Some((session_id, seg_str)) = id.rsplit_once('.')
&& let Ok(seg) = seg_str.parse::<u32>() {
let _ = store.mark_segment_mined(session_id, seg, "observation");
}
}
}
/// Format a segment's messages into readable text for the observation agent.
fn format_segment(messages: &[(usize, String, String, String)]) -> String {
let cfg = crate::config::get();
let mut fragments = Vec::new();
for (_, role, text, ts) in messages {
let min_len = if role == "user" { 5 } else { 10 };
if text.len() <= min_len { continue; }
let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
if ts.is_empty() {
fragments.push(format!("**{}:** {}", name, text));
} else {
fragments.push(format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], text));
}
}
fragments.join("\n\n")
}

View file

@ -0,0 +1,73 @@
// LLM utilities: model invocation via direct API
use crate::store::Store;
use regex::Regex;
use std::fs;
/// Simple LLM call for non-agent uses (audit, digest, compare).
/// Logs to llm-logs/{caller}/ file.
pub(crate) fn call_simple(caller: &str, prompt: &str) -> Result<String, String> {
let log_dir = crate::store::memory_dir().join("llm-logs").join(caller);
fs::create_dir_all(&log_dir).ok();
let log_path = log_dir.join(format!("{}.txt", crate::store::compact_timestamp()));
use std::io::Write;
let log = move |msg: &str| {
if let Ok(mut f) = fs::OpenOptions::new()
.create(true).append(true).open(&log_path)
{
let _ = writeln!(f, "{}", msg);
}
};
super::api::call_api_with_tools_sync(caller, prompt, None, &log)
}
/// Call a model using an agent definition's configuration.
pub(crate) fn call_for_def(
def: &super::defs::AgentDef,
prompt: &str,
log: &(dyn Fn(&str) + Sync),
) -> Result<String, String> {
super::api::call_api_with_tools_sync(&def.agent, prompt, def.temperature, log)
}
/// Parse a JSON response, handling markdown fences.
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
let cleaned = response.trim();
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
let cleaned = cleaned.trim();
if let Ok(v) = serde_json::from_str(cleaned) {
return Ok(v);
}
// Try to find JSON object or array
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
if let Some(m) = re_obj.find(cleaned)
&& let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
if let Some(m) = re_arr.find(cleaned)
&& let Ok(v) = serde_json::from_str(m.as_str()) {
return Ok(v);
}
let preview = crate::util::first_n_chars(cleaned, 200);
Err(format!("no valid JSON in response: {preview}..."))
}
/// Get all keys for prompt context.
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
let mut keys: Vec<String> = store.nodes.keys()
.cloned()
.collect();
keys.sort();
keys.truncate(200);
keys
}

View file

@ -0,0 +1,28 @@
// Agent layer: LLM-powered operations on the memory graph
//
// Everything here calls external models (Sonnet, Haiku) or orchestrates
// sequences of such calls. The core graph infrastructure (store, graph,
// spectral, search, similarity) lives at the crate root.
//
// llm — model invocation, response parsing
// prompts — prompt generation from store data
// defs — agent file loading and placeholder resolution
// audit — link quality review via Sonnet
// consolidate — full consolidation pipeline
// knowledge — agent execution, conversation fragment selection
// enrich — journal enrichment, experience mining
// digest — episodic digest generation (daily/weekly/monthly)
// daemon — background job scheduler
// transcript — shared JSONL transcript parsing
pub mod transcript;
pub mod api;
pub mod llm;
pub mod prompts;
pub mod defs;
pub mod audit;
pub mod consolidate;
pub mod knowledge;
pub mod enrich;
pub mod digest;
pub mod daemon;

View file

@ -0,0 +1,431 @@
// Agent prompt generation and formatting. Presentation logic —
// builds text prompts from store data for consolidation agents.
use crate::store::Store;
use crate::graph::Graph;
use crate::neuro::{
ReplayItem,
replay_queue, detect_interference,
};
/// Result of building an agent prompt — includes both the prompt text
/// and the keys of nodes selected for processing, so the caller can
/// record visits after successful completion.
pub struct AgentBatch {
pub prompt: String,
pub node_keys: Vec<String>,
}
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
let path = crate::config::get().prompts_dir.join(format!("{}.md", name));
let mut content = std::fs::read_to_string(&path)
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
for (placeholder, data) in replacements {
content = content.replace(placeholder, data);
}
Ok(content)
}
pub fn format_topology_header(graph: &Graph) -> String {
let sigma = graph.small_world_sigma();
let alpha = graph.degree_power_law_exponent();
let gini = graph.degree_gini();
let avg_cc = graph.avg_clustering_coefficient();
let n = graph.nodes().len();
let e = graph.edge_count();
// Identify saturated hubs — nodes with degree well above threshold
let threshold = graph.hub_threshold();
let mut hubs: Vec<_> = graph.nodes().iter()
.map(|k| (k.clone(), graph.degree(k)))
.filter(|(_, d)| *d >= threshold)
.collect();
hubs.sort_by(|a, b| b.1.cmp(&a.1));
hubs.truncate(15);
let hub_list = if hubs.is_empty() {
String::new()
} else {
let lines: Vec<String> = hubs.iter()
.map(|(k, d)| format!(" - {} (degree {})", k, d))
.collect();
format!(
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
The following nodes are already over-connected. Adding more links\n\
to them makes the graph worse (star topology). Find lateral\n\
connections between peripheral nodes instead.\n\n{}\n\n\
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
lines.join("\n"))
};
format!(
"## Current graph topology\n\
Nodes: {} Edges: {} Communities: {}\n\
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
Avg clustering coefficient: {:.4}\n\n\
{}\
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
}
pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
let hub_thresh = graph.hub_threshold();
let mut out = String::new();
for item in items {
let node = match store.nodes.get(&item.key) {
Some(n) => n,
None => continue,
};
out.push_str(&format!("## {} \n", item.key));
out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ",
item.priority, item.cc, item.emotion));
out.push_str(&format!("Interval: {}d\n",
node.spaced_repetition_interval));
if item.outlier_score > 0.0 {
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
item.classification, item.outlier_score));
}
if let Some(community) = node.community_id {
out.push_str(&format!("Community: {} ", community));
}
let deg = graph.degree(&item.key);
let cc = graph.clustering_coefficient(&item.key);
// Hub-link ratio: what fraction of this node's edges go to hubs?
let neighbors = graph.neighbors(&item.key);
let hub_links = neighbors.iter()
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
.count();
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
let is_hub = deg >= hub_thresh;
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
deg, cc, hub_ratio * 100.0, hub_links, deg));
if is_hub {
out.push_str(" ← THIS IS A HUB");
} else if hub_ratio > 0.6 {
out.push_str(" ← mostly hub-connected, needs lateral links");
}
out.push('\n');
let hits = crate::counters::search_hit_count(&item.key);
if hits > 0 {
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep\n", hits));
}
// Full content — the agent needs to see everything to do quality work
let content = &node.content;
out.push_str(&format!("\nContent:\n{}\n\n", content));
// Neighbors
let neighbors = graph.neighbors(&item.key);
if !neighbors.is_empty() {
out.push_str("Neighbors:\n");
for (n, strength) in neighbors.iter().take(15) {
let n_cc = graph.clustering_coefficient(n);
let n_community = store.nodes.get(n.as_str())
.and_then(|n| n.community_id);
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
n, strength, n_cc));
if let Some(c) = n_community {
out.push_str(&format!(", c{}", c));
}
out.push_str(")\n");
}
}
out.push_str("\n---\n\n");
}
out
}
pub fn format_health_section(store: &Store, graph: &Graph) -> String {
use crate::graph;
let health = graph::health_report(graph, store);
let mut out = health;
out.push_str("\n\n## Weight distribution\n");
// Weight histogram
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
for node in store.nodes.values() {
let bucket = ((node.weight * 10.0) as usize).min(9);
buckets[bucket] += 1;
}
for (i, &count) in buckets.iter().enumerate() {
let lo = i as f32 / 10.0;
let hi = (i + 1) as f32 / 10.0;
let bar = "".repeat((count as usize) / 10);
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
}
// Near-prune nodes
let near_prune: Vec<_> = store.nodes.iter()
.filter(|(_, n)| n.weight < 0.15)
.map(|(k, n)| (k.clone(), n.weight))
.collect();
if !near_prune.is_empty() {
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
for (k, w) in near_prune.iter().take(20) {
out.push_str(&format!(" [{:.3}] {}\n", w, k));
}
}
// Community sizes
let communities = graph.communities();
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
for (key, &label) in communities {
comm_sizes.entry(label).or_default().push(key.clone());
}
let mut sizes: Vec<_> = comm_sizes.iter()
.map(|(id, members)| (*id, members.len(), members.clone()))
.collect();
sizes.sort_by(|a, b| b.1.cmp(&a.1));
out.push_str("\n## Largest communities\n");
for (id, size, members) in sizes.iter().take(10) {
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
out.push_str(&sample.join(", "));
if *size > 5 { out.push_str(", ..."); }
out.push('\n');
}
out
}
pub fn format_pairs_section(
pairs: &[(String, String, f32)],
store: &Store,
graph: &Graph,
) -> String {
let mut out = String::new();
let communities = graph.communities();
for (a, b, sim) in pairs {
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
// Node A
out.push_str(&format!("\n### {} ({})\n", a, ca));
if let Some(node) = store.nodes.get(a) {
let content = crate::util::truncate(&node.content, 500, "...");
out.push_str(&format!("Weight: {:.2}\n{}\n",
node.weight, content));
}
// Node B
out.push_str(&format!("\n### {} ({})\n", b, cb));
if let Some(node) = store.nodes.get(b) {
let content = crate::util::truncate(&node.content, 500, "...");
out.push_str(&format!("Weight: {:.2}\n{}\n",
node.weight, content));
}
out.push_str("\n---\n\n");
}
out
}
pub fn format_rename_candidates(store: &Store, count: usize) -> (Vec<String>, String) {
let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
.filter(|(key, _)| {
if key.starts_with("_facts-") { return true; }
if key.len() < 60 { return false; }
if key.starts_with("journal#j-") { return true; }
if key.starts_with("_mined-transcripts#f-") { return true; }
false
})
.map(|(k, n)| (k.as_str(), n))
.collect();
// Deprioritize nodes actively found by search — renaming them would
// break working queries. Sort by: search hits (ascending), then
// least-recently visited. Nodes with many hits sink to the bottom.
let hit_counts = crate::counters::all_search_hits();
let hit_map: std::collections::HashMap<&str, u64> = hit_counts.iter()
.map(|(k, v)| (k.as_str(), *v))
.collect();
candidates.sort_by_key(|(key, _)| {
let hits = hit_map.get(key).copied().unwrap_or(0);
(hits, store.last_visited(key, "rename"))
});
candidates.truncate(count);
let keys: Vec<String> = candidates.iter().map(|(k, _)| k.to_string()).collect();
let mut out = String::new();
out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
candidates.len(),
store.nodes.keys().filter(|k| k.starts_with("_facts-") ||
(k.len() >= 60 &&
(k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-")))).count()));
for (key, node) in &candidates {
out.push_str(&format!("### {}\n", key));
let created = if node.timestamp > 0 {
crate::store::format_datetime(node.timestamp)
} else {
"unknown".to_string()
};
out.push_str(&format!("Created: {}\n", created));
let hits = hit_map.get(key).copied().unwrap_or(0);
if hits > 0 {
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep current name\n", hits));
}
let content = &node.content;
if content.len() > 800 {
let truncated = crate::util::truncate(content, 800, "\n[...]");
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
content.len(), truncated));
} else {
out.push_str(&format!("\nContent:\n{}\n\n", content));
}
out.push_str("---\n\n");
}
(keys, out)
}
/// Get split candidates sorted by size (largest first)
pub fn split_candidates(store: &Store) -> Vec<String> {
let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
.filter(|(key, node)| {
!key.starts_with('_')
&& !node.deleted
&& matches!(node.node_type, crate::store::NodeType::Semantic)
})
.map(|(k, n)| (k.as_str(), n.content.len()))
.collect();
candidates.sort_by(|a, b| b.1.cmp(&a.1));
candidates.into_iter().map(|(k, _)| k.to_string()).collect()
}
/// Format a single node for split-plan prompt (phase 1)
pub fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
let communities = graph.communities();
let node = match store.nodes.get(key) {
Some(n) => n,
None => return format!("Node '{}' not found\n", key),
};
let mut out = String::new();
out.push_str(&format!("### {} ({} chars)\n", key, node.content.len()));
// Show neighbors grouped by community
let neighbors = graph.neighbors(key);
if !neighbors.is_empty() {
let mut by_community: std::collections::BTreeMap<String, Vec<(&str, f32)>> =
std::collections::BTreeMap::new();
for (nkey, strength) in &neighbors {
let comm = communities.get(nkey.as_str())
.map(|c| format!("c{}", c))
.unwrap_or_else(|| "unclustered".into());
by_community.entry(comm)
.or_default()
.push((nkey.as_str(), *strength));
}
out.push_str("\nNeighbors by community:\n");
for (comm, members) in &by_community {
out.push_str(&format!(" {} ({}):", comm, members.len()));
for (nkey, strength) in members.iter().take(5) {
out.push_str(&format!(" {}({:.2})", nkey, strength));
}
if members.len() > 5 {
out.push_str(&format!(" +{} more", members.len() - 5));
}
out.push('\n');
}
}
// Full content
out.push_str(&format!("\nContent:\n{}\n\n", node.content));
out.push_str("---\n\n");
out
}
/// Build split-plan prompt for a single node (phase 1).
/// Uses the split.agent template with placeholders resolved for the given key.
pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
let def = super::defs::get_def("split")
.ok_or_else(|| "no split.agent file".to_string())?;
let graph = store.build_graph();
// Override the query — we have a specific key to split
let keys = vec![key.to_string()];
let (prompt, _) = super::defs::resolve_placeholders(&def.prompt, store, &graph, &keys, 1);
Ok(prompt)
}
/// Build split-extract prompt for one child (phase 2)
pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
let parent_content = store.nodes.get(parent_key)
.map(|n| n.content.as_str())
.ok_or_else(|| format!("No node '{}'", parent_key))?;
load_prompt("split-extract", &[
("{{CHILD_KEY}}", child_key),
("{{CHILD_DESC}}", child_desc),
("{{CHILD_SECTIONS}}", child_sections),
("{{PARENT_CONTENT}}", parent_content),
])
}
/// Show consolidation batch status or generate an agent prompt.
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
if auto {
let batch = agent_prompt(store, "replay", count)?;
println!("{}", batch.prompt);
return Ok(());
}
let graph = store.build_graph();
let items = replay_queue(store, count);
if items.is_empty() {
println!("No nodes to consolidate.");
return Ok(());
}
println!("Consolidation batch ({} nodes):\n", items.len());
for item in &items {
let node_type = store.nodes.get(&item.key)
.map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
.unwrap_or("?");
println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
item.priority, item.key, item.cc, item.interval_days, node_type);
}
let pairs = detect_interference(store, &graph, 0.6);
if !pairs.is_empty() {
println!("\nInterfering pairs ({}):", pairs.len());
for (a, b, sim) in pairs.iter().take(5) {
println!(" [{:.3}] {}{}", sim, a, b);
}
}
println!("\nAgent prompts:");
println!(" --auto Generate replay agent prompt");
println!(" --agent replay Replay agent (schema assimilation)");
println!(" --agent linker Linker agent (relational binding)");
println!(" --agent separator Separator agent (pattern separation)");
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
println!(" --agent health Health agent (synaptic homeostasis)");
Ok(())
}
/// Generate a specific agent prompt with filled-in data.
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<AgentBatch, String> {
let def = super::defs::get_def(agent)
.ok_or_else(|| format!("Unknown agent: {}", agent))?;
super::defs::run_agent(store, &def, count, &Default::default())
}

View file

@ -0,0 +1,94 @@
// Shared JSONL transcript parsing
//
// Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL
// transcripts. This module provides the shared core: parse each line, extract
// message type, text content from string-or-array blocks, timestamp, and
// user type. Callers filter and transform as needed.
use std::fs;
use std::path::Path;
/// A single message extracted from a JSONL transcript.
pub struct TranscriptMessage {
/// 1-based line number in the JSONL file.
pub line: usize,
/// Raw role: "user" or "assistant".
pub role: String,
/// Extracted text content (trimmed, blocks joined with newlines).
pub text: String,
/// ISO timestamp from the message, or empty string.
pub timestamp: String,
/// For user messages: "external", "internal", etc. None for assistant.
pub user_type: Option<String>,
}
/// Parse a JSONL transcript into structured messages.
///
/// Extracts all user and assistant messages. Content blocks of type "text"
/// are joined; tool_use, tool_result, thinking blocks are skipped.
/// System-reminder blocks are filtered out.
pub fn parse_transcript(path: &Path) -> Result<Vec<TranscriptMessage>, String> {
let content = fs::read_to_string(path)
.map_err(|e| format!("read {}: {}", path.display(), e))?;
let mut messages = Vec::new();
for (i, line) in content.lines().enumerate() {
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
if msg_type != "user" && msg_type != "assistant" { continue; }
let timestamp = obj.get("timestamp")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let user_type = obj.get("userType")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let Some(text) = extract_text_content(&obj) else { continue };
let text = text.trim().to_string();
if text.is_empty() { continue; }
messages.push(TranscriptMessage {
line: i + 1,
role: msg_type.to_string(),
text,
timestamp,
user_type,
});
}
Ok(messages)
}
/// Extract text content from a JSONL message object.
///
/// Handles both string content and array-of-blocks content (filtering to
/// type="text" blocks only). Strips `<system-reminder>` tags.
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
let msg = obj.get("message").unwrap_or(obj);
let content = msg.get("content")?;
let text = match content {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Array(arr) => {
let texts: Vec<&str> = arr.iter()
.filter_map(|block| {
let block_type = block.get("type").and_then(|v| v.as_str())?;
if block_type != "text" { return None; }
let t = block.get("text").and_then(|v| v.as_str())?;
// Skip system-reminder blocks entirely
if t.contains("<system-reminder>") { return None; }
Some(t)
})
.collect();
if texts.is_empty() { return None; }
texts.join("\n")
}
_ => return None,
};
Some(text)
}

7
src/subconscious/mod.rs Normal file
View file

@ -0,0 +1,7 @@
// subconscious — autonomous agents that process without being asked
//
// Reflect, surface, consolidate, digest, audit — the background
// processes that maintain and evolve the memory graph. Runs on
// local models via the API backend.
pub mod agents;