restructure: hippocampus/ for memory, subconscious/ for agents
hippocampus/ — memory storage, retrieval, and consolidation: store, graph, query, similarity, spectral, neuro, counters, config, transcript, memory_search, lookups, cursor, migrate subconscious/ — autonomous agents that process without being asked: reflect, surface, consolidate, digest, audit, etc. All existing crate::X paths preserved via re-exports in lib.rs. Co-Authored-By: Proof of Concept <poc@bcachefs.org> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
cfed85bd20
commit
d5c0e86700
39 changed files with 87 additions and 32 deletions
192
src/subconscious/agents/api.rs
Normal file
192
src/subconscious/agents/api.rs
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
// agents/api.rs — Direct API backend for agent execution
|
||||
//
|
||||
// Uses poc-agent's OpenAI-compatible API client to call models directly
|
||||
// (vllm, llama.cpp, OpenRouter, etc.) instead of shelling out to claude CLI.
|
||||
// Implements the tool loop: send prompt → if tool_calls, execute them →
|
||||
// send results back → repeat until text response.
|
||||
//
|
||||
// Activated when config has api_base_url set.
|
||||
|
||||
use crate::agent::api::ApiClient;
|
||||
use crate::agent::types::*;
|
||||
use crate::agent::tools::{self, ProcessTracker};
|
||||
use crate::agent::ui_channel::StreamTarget;
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
static API_CLIENT: OnceLock<ApiClient> = OnceLock::new();
|
||||
|
||||
fn get_client() -> Result<&'static ApiClient, String> {
|
||||
Ok(API_CLIENT.get_or_init(|| {
|
||||
let config = crate::config::get();
|
||||
let base_url = config.api_base_url.as_deref().unwrap_or("");
|
||||
let api_key = config.api_key.as_deref().unwrap_or("");
|
||||
let model = config.api_model.as_deref().unwrap_or("qwen-2.5-27b");
|
||||
ApiClient::new(base_url, api_key, model)
|
||||
}))
|
||||
}
|
||||
|
||||
/// Run an agent prompt through the direct API with tool support.
|
||||
/// Returns the final text response after all tool calls are resolved.
|
||||
pub async fn call_api_with_tools(
|
||||
agent: &str,
|
||||
prompt: &str,
|
||||
temperature: Option<f32>,
|
||||
log: &dyn Fn(&str),
|
||||
) -> Result<String, String> {
|
||||
let client = get_client()?;
|
||||
|
||||
// Set up a UI channel — we drain reasoning tokens into the log
|
||||
let (ui_tx, mut ui_rx) = crate::agent::ui_channel::channel();
|
||||
|
||||
// Build tool definitions — memory tools for graph operations
|
||||
let all_defs = tools::definitions();
|
||||
let tool_defs: Vec<ToolDef> = all_defs.into_iter()
|
||||
.filter(|d| d.function.name.starts_with("memory_"))
|
||||
.collect();
|
||||
let tracker = ProcessTracker::new();
|
||||
|
||||
// Start with the prompt as a user message
|
||||
let mut messages = vec![Message::user(prompt)];
|
||||
let reasoning = crate::config::get().api_reasoning.clone();
|
||||
|
||||
let max_turns = 50;
|
||||
for turn in 0..max_turns {
|
||||
log(&format!("\n=== TURN {} ({} messages) ===\n", turn, messages.len()));
|
||||
|
||||
let (msg, usage) = client.chat_completion_stream_temp(
|
||||
&messages,
|
||||
Some(&tool_defs),
|
||||
&ui_tx,
|
||||
StreamTarget::Autonomous,
|
||||
&reasoning,
|
||||
temperature,
|
||||
).await.map_err(|e| {
|
||||
let msg_bytes: usize = messages.iter()
|
||||
.map(|m| m.content_text().len())
|
||||
.sum();
|
||||
format!("API error on turn {} (~{}KB payload, {} messages): {}",
|
||||
turn, msg_bytes / 1024, messages.len(), e)
|
||||
})?;
|
||||
|
||||
if let Some(u) = &usage {
|
||||
log(&format!("tokens: {} prompt + {} completion",
|
||||
u.prompt_tokens, u.completion_tokens));
|
||||
}
|
||||
|
||||
// Drain reasoning tokens from the UI channel into the log
|
||||
{
|
||||
let mut reasoning_buf = String::new();
|
||||
while let Ok(ui_msg) = ui_rx.try_recv() {
|
||||
if let crate::agent::ui_channel::UiMessage::Reasoning(r) = ui_msg {
|
||||
reasoning_buf.push_str(&r);
|
||||
}
|
||||
}
|
||||
if !reasoning_buf.is_empty() {
|
||||
log(&format!("<think>\n{}\n</think>", reasoning_buf.trim()));
|
||||
}
|
||||
}
|
||||
|
||||
let has_content = msg.content.is_some();
|
||||
let has_tools = msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty());
|
||||
|
||||
if has_tools {
|
||||
// Push the assistant message with tool calls.
|
||||
// Sanitize arguments: vllm re-parses them as JSON when
|
||||
// preprocessing the conversation, so invalid JSON from the
|
||||
// model crashes the next request.
|
||||
let mut sanitized = msg.clone();
|
||||
if let Some(ref mut calls) = sanitized.tool_calls {
|
||||
for call in calls {
|
||||
if serde_json::from_str::<serde_json::Value>(&call.function.arguments).is_err() {
|
||||
log(&format!("sanitizing malformed args for {}: {}",
|
||||
call.function.name, &call.function.arguments));
|
||||
call.function.arguments = "{}".to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
messages.push(sanitized);
|
||||
|
||||
// Execute each tool call
|
||||
for call in msg.tool_calls.as_ref().unwrap() {
|
||||
log(&format!("\nTOOL CALL: {}({})",
|
||||
call.function.name,
|
||||
&call.function.arguments));
|
||||
|
||||
let args: serde_json::Value = match serde_json::from_str(&call.function.arguments) {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
log(&format!("malformed tool call args: {}", &call.function.arguments));
|
||||
messages.push(Message::tool_result(
|
||||
&call.id,
|
||||
"Error: your tool call had malformed JSON arguments. Please retry with valid JSON.",
|
||||
));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let output = if call.function.name.starts_with("memory_") {
|
||||
let prov = format!("agent:{}", agent);
|
||||
match crate::agent::tools::memory::dispatch(
|
||||
&call.function.name, &args, Some(&prov),
|
||||
) {
|
||||
Ok(text) => crate::agent::tools::ToolOutput {
|
||||
text, is_yield: false, images: Vec::new(),
|
||||
model_switch: None, dmn_pause: false,
|
||||
},
|
||||
Err(e) => crate::agent::tools::ToolOutput {
|
||||
text: format!("Error: {}", e),
|
||||
is_yield: false, images: Vec::new(),
|
||||
model_switch: None, dmn_pause: false,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
tools::dispatch(&call.function.name, &args, &tracker).await
|
||||
};
|
||||
|
||||
log(&format!("TOOL RESULT ({} chars):\n{}", output.text.len(), output.text));
|
||||
|
||||
messages.push(Message::tool_result(&call.id, &output.text));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Text-only response — we're done
|
||||
let text = msg.content_text().to_string();
|
||||
if text.is_empty() && !has_content {
|
||||
log("empty response, retrying");
|
||||
messages.push(Message::user(
|
||||
"[system] Your previous response was empty. Please respond with text or use a tool."
|
||||
));
|
||||
continue;
|
||||
}
|
||||
|
||||
log(&format!("\n=== RESPONSE ===\n\n{}", text));
|
||||
return Ok(text);
|
||||
}
|
||||
|
||||
Err(format!("agent exceeded {} tool turns", max_turns))
|
||||
}
|
||||
|
||||
/// Synchronous wrapper — runs the async function on a dedicated thread
|
||||
/// with its own tokio runtime. Safe to call from any context.
|
||||
pub fn call_api_with_tools_sync(
|
||||
agent: &str,
|
||||
prompt: &str,
|
||||
temperature: Option<f32>,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<String, String> {
|
||||
std::thread::scope(|s| {
|
||||
s.spawn(|| {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.map_err(|e| format!("tokio runtime: {}", e))?;
|
||||
let prov = format!("agent:{}", agent);
|
||||
rt.block_on(
|
||||
crate::store::TASK_PROVENANCE.scope(prov,
|
||||
call_api_with_tools(agent, prompt, temperature, log))
|
||||
)
|
||||
}).join().unwrap()
|
||||
})
|
||||
}
|
||||
333
src/subconscious/agents/audit.rs
Normal file
333
src/subconscious/agents/audit.rs
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
// Link audit: walk every link in the graph, batch to Sonnet for quality review.
|
||||
//
|
||||
// Each batch of links gets reviewed by Sonnet, which returns per-link actions:
|
||||
// KEEP, DELETE, RETARGET, WEAKEN, STRENGTHEN. Batches run in parallel via rayon.
|
||||
|
||||
use super::llm;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
struct LinkInfo {
|
||||
rel_idx: usize,
|
||||
source_key: String,
|
||||
target_key: String,
|
||||
source_content: String,
|
||||
target_content: String,
|
||||
strength: f32,
|
||||
target_sections: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct AuditStats {
|
||||
pub kept: usize,
|
||||
pub deleted: usize,
|
||||
pub retargeted: usize,
|
||||
pub weakened: usize,
|
||||
pub strengthened: usize,
|
||||
pub errors: usize,
|
||||
}
|
||||
|
||||
fn build_audit_prompt(batch: &[LinkInfo], batch_num: usize, total_batches: usize) -> String {
|
||||
let mut prompt = format!(
|
||||
"You are auditing memory graph links for quality (batch {}/{}).\n\n\
|
||||
For each numbered link, decide what to do:\n\n\
|
||||
KEEP N — link is meaningful, leave it\n\
|
||||
DELETE N — link is noise, accidental, or too generic to be useful\n\
|
||||
RETARGET N new_key — link points to the right topic area but wrong node;\n\
|
||||
\x20 retarget to a more specific section (listed under each link)\n\
|
||||
WEAKEN N strength — link is marginal; reduce strength (0.1-0.3)\n\
|
||||
STRENGTHEN N strength — link is important but underweighted; increase (0.8-1.0)\n\n\
|
||||
Output exactly one action per link number, nothing else.\n\n\
|
||||
Links to review:\n\n",
|
||||
batch_num, total_batches);
|
||||
|
||||
for (i, link) in batch.iter().enumerate() {
|
||||
let n = i + 1;
|
||||
prompt.push_str(&format!(
|
||||
"--- Link {} ---\n\
|
||||
{} → {} (strength={:.2})\n\n\
|
||||
Source content:\n{}\n\n\
|
||||
Target content:\n{}\n",
|
||||
n, link.source_key, link.target_key, link.strength,
|
||||
&link.source_content, &link.target_content));
|
||||
|
||||
if !link.target_sections.is_empty() {
|
||||
prompt.push_str(
|
||||
"\nTarget has sections (consider RETARGET to a more specific one):\n");
|
||||
for s in &link.target_sections {
|
||||
prompt.push_str(&format!(" - {}\n", s));
|
||||
}
|
||||
}
|
||||
prompt.push('\n');
|
||||
}
|
||||
|
||||
prompt
|
||||
}
|
||||
|
||||
fn parse_audit_response(response: &str, batch_size: usize) -> Vec<(usize, AuditAction)> {
|
||||
let mut actions = Vec::new();
|
||||
|
||||
for line in response.lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() { continue; }
|
||||
|
||||
let parts: Vec<&str> = line.splitn(3, ' ').collect();
|
||||
if parts.len() < 2 { continue; }
|
||||
|
||||
let action = parts[0].to_uppercase();
|
||||
let idx: usize = match parts[1].parse::<usize>() {
|
||||
Ok(n) if n >= 1 && n <= batch_size => n - 1,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let audit_action = match action.as_str() {
|
||||
"KEEP" => AuditAction::Keep,
|
||||
"DELETE" => AuditAction::Delete,
|
||||
"RETARGET" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
AuditAction::Retarget(parts[2].trim().to_string())
|
||||
}
|
||||
"WEAKEN" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
match parts[2].trim().parse::<f32>() {
|
||||
Ok(s) => AuditAction::Weaken(s),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
"STRENGTHEN" => {
|
||||
if parts.len() < 3 { continue; }
|
||||
match parts[2].trim().parse::<f32>() {
|
||||
Ok(s) => AuditAction::Strengthen(s),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
actions.push((idx, audit_action));
|
||||
}
|
||||
|
||||
actions
|
||||
}
|
||||
|
||||
enum AuditAction {
|
||||
Keep,
|
||||
Delete,
|
||||
Retarget(String),
|
||||
Weaken(f32),
|
||||
Strengthen(f32),
|
||||
}
|
||||
|
||||
/// Run a full link audit: walk every link, batch to Sonnet, apply results.
|
||||
pub fn link_audit(store: &mut Store, apply: bool) -> Result<AuditStats, String> {
|
||||
// Collect all non-deleted relations with their info
|
||||
let mut links: Vec<LinkInfo> = Vec::new();
|
||||
|
||||
for (idx, rel) in store.relations.iter().enumerate() {
|
||||
if rel.deleted { continue; }
|
||||
|
||||
let source_content = store.nodes.get(&rel.source_key)
|
||||
.map(|n| n.content.clone()).unwrap_or_default();
|
||||
let target_content = store.nodes.get(&rel.target_key)
|
||||
.map(|n| n.content.clone()).unwrap_or_default();
|
||||
|
||||
// Find section children of target if it's file-level
|
||||
let target_sections = if !rel.target_key.contains('#') {
|
||||
let prefix = format!("{}#", rel.target_key);
|
||||
store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.cloned()
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
links.push(LinkInfo {
|
||||
rel_idx: idx,
|
||||
source_key: rel.source_key.clone(),
|
||||
target_key: rel.target_key.clone(),
|
||||
source_content,
|
||||
target_content,
|
||||
strength: rel.strength,
|
||||
target_sections,
|
||||
});
|
||||
}
|
||||
|
||||
let total = links.len();
|
||||
println!("Link audit: {} links to review", total);
|
||||
if !apply {
|
||||
println!("DRY RUN — use --apply to make changes");
|
||||
}
|
||||
|
||||
// Batch by char budget (~100K chars per prompt)
|
||||
let char_budget = 100_000usize;
|
||||
let mut batches: Vec<Vec<usize>> = Vec::new();
|
||||
let mut current_batch: Vec<usize> = Vec::new();
|
||||
let mut current_chars = 0usize;
|
||||
|
||||
for (i, link) in links.iter().enumerate() {
|
||||
let link_chars = link.source_content.len() + link.target_content.len() + 200;
|
||||
if !current_batch.is_empty() && current_chars + link_chars > char_budget {
|
||||
batches.push(std::mem::take(&mut current_batch));
|
||||
current_chars = 0;
|
||||
}
|
||||
current_batch.push(i);
|
||||
current_chars += link_chars;
|
||||
}
|
||||
if !current_batch.is_empty() {
|
||||
batches.push(current_batch);
|
||||
}
|
||||
|
||||
let total_batches = batches.len();
|
||||
println!("{} batches (avg {} links/batch)\n", total_batches,
|
||||
if total_batches > 0 { total / total_batches } else { 0 });
|
||||
|
||||
use rayon::prelude::*;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
// Build all batch prompts up front
|
||||
let batch_data: Vec<(usize, Vec<LinkInfo>, String)> = batches.iter().enumerate()
|
||||
.map(|(batch_idx, batch_indices)| {
|
||||
let batch_infos: Vec<LinkInfo> = batch_indices.iter().map(|&i| {
|
||||
let l = &links[i];
|
||||
LinkInfo {
|
||||
rel_idx: l.rel_idx,
|
||||
source_key: l.source_key.clone(),
|
||||
target_key: l.target_key.clone(),
|
||||
source_content: l.source_content.clone(),
|
||||
target_content: l.target_content.clone(),
|
||||
strength: l.strength,
|
||||
target_sections: l.target_sections.clone(),
|
||||
}
|
||||
}).collect();
|
||||
let prompt = build_audit_prompt(&batch_infos, batch_idx + 1, total_batches);
|
||||
(batch_idx, batch_infos, prompt)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Progress counter
|
||||
let done = AtomicUsize::new(0);
|
||||
|
||||
// Run batches in parallel via rayon
|
||||
let batch_results: Vec<_> = batch_data.par_iter()
|
||||
.map(|(batch_idx, batch_infos, prompt)| {
|
||||
let response = llm::call_simple("audit", prompt);
|
||||
let completed = done.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
eprint!("\r Batches: {}/{} done", completed, total_batches);
|
||||
(*batch_idx, batch_infos, response)
|
||||
})
|
||||
.collect();
|
||||
eprintln!(); // newline after progress
|
||||
|
||||
// Process results sequentially
|
||||
let mut stats = AuditStats {
|
||||
kept: 0, deleted: 0, retargeted: 0, weakened: 0, strengthened: 0, errors: 0,
|
||||
};
|
||||
let mut deletions: Vec<usize> = Vec::new();
|
||||
let mut retargets: Vec<(usize, String)> = Vec::new();
|
||||
let mut strength_changes: Vec<(usize, f32)> = Vec::new();
|
||||
|
||||
for (batch_idx, batch_infos, response) in &batch_results {
|
||||
let response = match response {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!(" Batch {}: error: {}", batch_idx + 1, e);
|
||||
stats.errors += batch_infos.len();
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let actions = parse_audit_response(response, batch_infos.len());
|
||||
|
||||
let mut responded: HashSet<usize> = HashSet::new();
|
||||
|
||||
for (idx, action) in &actions {
|
||||
responded.insert(*idx);
|
||||
let link = &batch_infos[*idx];
|
||||
|
||||
match action {
|
||||
AuditAction::Keep => {
|
||||
stats.kept += 1;
|
||||
}
|
||||
AuditAction::Delete => {
|
||||
println!(" DELETE {} → {}", link.source_key, link.target_key);
|
||||
deletions.push(link.rel_idx);
|
||||
stats.deleted += 1;
|
||||
}
|
||||
AuditAction::Retarget(new_target) => {
|
||||
println!(" RETARGET {} → {} (was {})",
|
||||
link.source_key, new_target, link.target_key);
|
||||
retargets.push((link.rel_idx, new_target.clone()));
|
||||
stats.retargeted += 1;
|
||||
}
|
||||
AuditAction::Weaken(s) => {
|
||||
println!(" WEAKEN {} → {} (str {:.2} → {:.2})",
|
||||
link.source_key, link.target_key, link.strength, s);
|
||||
strength_changes.push((link.rel_idx, *s));
|
||||
stats.weakened += 1;
|
||||
}
|
||||
AuditAction::Strengthen(s) => {
|
||||
println!(" STRENGTHEN {} → {} (str {:.2} → {:.2})",
|
||||
link.source_key, link.target_key, link.strength, s);
|
||||
strength_changes.push((link.rel_idx, *s));
|
||||
stats.strengthened += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..batch_infos.len() {
|
||||
if !responded.contains(&i) {
|
||||
stats.kept += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Batch {}/{}: +{}kept +{}del +{}retarget +{}weak +{}strong",
|
||||
batch_idx + 1, total_batches,
|
||||
stats.kept, stats.deleted, stats.retargeted, stats.weakened, stats.strengthened);
|
||||
}
|
||||
|
||||
// Apply changes
|
||||
if apply && (stats.deleted > 0 || stats.retargeted > 0
|
||||
|| stats.weakened > 0 || stats.strengthened > 0) {
|
||||
println!("\nApplying changes...");
|
||||
|
||||
// Deletions: soft-delete
|
||||
for rel_idx in &deletions {
|
||||
store.relations[*rel_idx].deleted = true;
|
||||
}
|
||||
|
||||
// Strength changes
|
||||
for (rel_idx, new_strength) in &strength_changes {
|
||||
store.relations[*rel_idx].strength = *new_strength;
|
||||
}
|
||||
|
||||
// Retargets: soft-delete old, create new
|
||||
for (rel_idx, new_target) in &retargets {
|
||||
let source_key = store.relations[*rel_idx].source_key.clone();
|
||||
let old_strength = store.relations[*rel_idx].strength;
|
||||
let source_uuid = store.nodes.get(&source_key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
let target_uuid = store.nodes.get(new_target)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
|
||||
// Soft-delete old
|
||||
store.relations[*rel_idx].deleted = true;
|
||||
|
||||
// Create new
|
||||
if target_uuid != [0u8; 16] {
|
||||
let new_rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Auto,
|
||||
old_strength,
|
||||
&source_key, new_target,
|
||||
);
|
||||
store.add_relation(new_rel).ok();
|
||||
}
|
||||
}
|
||||
|
||||
store.save()?;
|
||||
println!("Saved.");
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
173
src/subconscious/agents/consolidate.rs
Normal file
173
src/subconscious/agents/consolidate.rs
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
// Consolidation pipeline: plan → agents → maintenance → digests → links
|
||||
//
|
||||
// consolidate_full() runs the full autonomous consolidation:
|
||||
// 1. Plan: analyze metrics, allocate agents
|
||||
// 2. Execute: run each agent (agents apply changes via tool calls)
|
||||
// 3. Graph maintenance (orphans, degree cap)
|
||||
// 4. Digest: generate missing daily/weekly/monthly digests
|
||||
// 5. Links: apply links extracted from digests
|
||||
// 6. Summary: final metrics comparison
|
||||
|
||||
use super::digest;
|
||||
use super::knowledge;
|
||||
use crate::neuro;
|
||||
use crate::store::{self, Store};
|
||||
|
||||
|
||||
/// Append a line to the log buffer.
|
||||
fn log_line(buf: &mut String, line: &str) {
|
||||
buf.push_str(line);
|
||||
buf.push('\n');
|
||||
}
|
||||
|
||||
/// Run the full autonomous consolidation pipeline with logging.
|
||||
pub fn consolidate_full(store: &mut Store) -> Result<(), String> {
|
||||
consolidate_full_with_progress(store, &|_| {})
|
||||
}
|
||||
|
||||
pub fn consolidate_full_with_progress(
|
||||
store: &mut Store,
|
||||
on_progress: &dyn Fn(&str),
|
||||
) -> Result<(), String> {
|
||||
let start = std::time::Instant::now();
|
||||
let log_key = format!("_consolidate-log-{}", store::compact_timestamp());
|
||||
let mut log_buf = String::new();
|
||||
|
||||
log_line(&mut log_buf, "=== CONSOLIDATE FULL ===");
|
||||
log_line(&mut log_buf, &format!("Started: {}", store::format_datetime(store::now_epoch())));
|
||||
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
|
||||
log_line(&mut log_buf, "");
|
||||
|
||||
// --- Step 1: Plan ---
|
||||
log_line(&mut log_buf, "--- Step 1: Plan ---");
|
||||
on_progress("planning");
|
||||
let plan = neuro::consolidation_plan(store);
|
||||
let plan_text = neuro::format_plan(&plan);
|
||||
log_line(&mut log_buf, &plan_text);
|
||||
println!("{}", plan_text);
|
||||
|
||||
let total_agents = plan.total();
|
||||
log_line(&mut log_buf, &format!("Total agents to run: {}", total_agents));
|
||||
|
||||
// --- Step 2: Execute agents ---
|
||||
log_line(&mut log_buf, "\n--- Step 2: Execute agents ---");
|
||||
let mut agent_num = 0usize;
|
||||
let mut agent_errors = 0usize;
|
||||
|
||||
let batch_size = 5;
|
||||
let runs = plan.to_agent_runs(batch_size);
|
||||
|
||||
for (agent_type, count) in &runs {
|
||||
agent_num += 1;
|
||||
let label = if *count > 0 {
|
||||
format!("[{}/{}] {} (batch={})", agent_num, runs.len(), agent_type, count)
|
||||
} else {
|
||||
format!("[{}/{}] {}", agent_num, runs.len(), agent_type)
|
||||
};
|
||||
|
||||
log_line(&mut log_buf, &format!("\n{}", label));
|
||||
on_progress(&label);
|
||||
println!("{}", label);
|
||||
|
||||
// Reload store to pick up changes from previous agents
|
||||
if agent_num > 1 {
|
||||
*store = Store::load()?;
|
||||
}
|
||||
|
||||
match knowledge::run_and_apply(store, agent_type, *count, "consolidate") {
|
||||
Ok(()) => {
|
||||
let msg = " Done".to_string();
|
||||
log_line(&mut log_buf, &msg);
|
||||
on_progress(&msg);
|
||||
println!("{}", msg);
|
||||
}
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR: {}", e);
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
agent_errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_line(&mut log_buf, &format!("\nAgents complete: {} run, {} errors",
|
||||
agent_num - agent_errors, agent_errors));
|
||||
store.save()?;
|
||||
|
||||
// --- Step 3: Link orphans ---
|
||||
log_line(&mut log_buf, "\n--- Step 3: Link orphans ---");
|
||||
on_progress("linking orphans");
|
||||
println!("\n--- Linking orphan nodes ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let (lo_orphans, lo_added) = neuro::link_orphans(store, 2, 3, 0.15);
|
||||
log_line(&mut log_buf, &format!(" {} orphans, {} links added", lo_orphans, lo_added));
|
||||
|
||||
// --- Step 3b: Cap degree ---
|
||||
log_line(&mut log_buf, "\n--- Step 3b: Cap degree ---");
|
||||
on_progress("capping degree");
|
||||
println!("\n--- Capping node degree ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match store.cap_degree(50) {
|
||||
Ok((hubs, pruned)) => {
|
||||
store.save()?;
|
||||
log_line(&mut log_buf, &format!(" {} hubs capped, {} edges pruned", hubs, pruned));
|
||||
}
|
||||
Err(e) => log_line(&mut log_buf, &format!(" ERROR: {}", e)),
|
||||
}
|
||||
|
||||
// --- Step 4: Digest auto ---
|
||||
log_line(&mut log_buf, "\n--- Step 4: Digest auto ---");
|
||||
on_progress("generating digests");
|
||||
println!("\n--- Generating missing digests ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
match digest::digest_auto(store) {
|
||||
Ok(()) => log_line(&mut log_buf, " Digests done."),
|
||||
Err(e) => {
|
||||
let msg = format!(" ERROR in digest auto: {}", e);
|
||||
log_line(&mut log_buf, &msg);
|
||||
eprintln!("{}", msg);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 5: Apply digest links ---
|
||||
log_line(&mut log_buf, "\n--- Step 5: Apply digest links ---");
|
||||
on_progress("applying digest links");
|
||||
println!("\n--- Applying digest links ---");
|
||||
*store = Store::load()?;
|
||||
|
||||
let links = digest::parse_all_digest_links(store);
|
||||
let (applied, skipped, fallbacks) = digest::apply_digest_links(store, &links);
|
||||
store.save()?;
|
||||
log_line(&mut log_buf, &format!(" {} links applied, {} skipped, {} fallbacks",
|
||||
applied, skipped, fallbacks));
|
||||
|
||||
// --- Step 6: Summary ---
|
||||
let elapsed = start.elapsed();
|
||||
log_line(&mut log_buf, "\n--- Summary ---");
|
||||
log_line(&mut log_buf, &format!("Finished: {}", store::format_datetime(store::now_epoch())));
|
||||
log_line(&mut log_buf, &format!("Duration: {:.0}s", elapsed.as_secs_f64()));
|
||||
*store = Store::load()?;
|
||||
log_line(&mut log_buf, &format!("Nodes: {} Relations: {}", store.nodes.len(), store.relations.len()));
|
||||
|
||||
let summary = format!(
|
||||
"\n=== CONSOLIDATE FULL COMPLETE ===\n\
|
||||
Duration: {:.0}s\n\
|
||||
Agents: {} run, {} errors\n\
|
||||
Nodes: {} Relations: {}\n",
|
||||
elapsed.as_secs_f64(),
|
||||
agent_num - agent_errors, agent_errors,
|
||||
store.nodes.len(), store.relations.len(),
|
||||
);
|
||||
log_line(&mut log_buf, &summary);
|
||||
println!("{}", summary);
|
||||
|
||||
// Store the log as a node
|
||||
store.upsert_provenance(&log_key, &log_buf,
|
||||
"consolidate:write").ok();
|
||||
store.save()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
1825
src/subconscious/agents/daemon.rs
Normal file
1825
src/subconscious/agents/daemon.rs
Normal file
File diff suppressed because it is too large
Load diff
736
src/subconscious/agents/defs.rs
Normal file
736
src/subconscious/agents/defs.rs
Normal file
|
|
@ -0,0 +1,736 @@
|
|||
// Agent definitions: self-contained files with query + prompt template.
|
||||
//
|
||||
// Each agent is a file in the agents/ directory:
|
||||
// - First line: JSON header (agent, query, model, schedule)
|
||||
// - After blank line: prompt template with {{placeholder}} lookups
|
||||
//
|
||||
// Placeholders are resolved at runtime:
|
||||
// {{topology}} — graph topology header
|
||||
// {{nodes}} — query results formatted as node sections
|
||||
// {{episodes}} — alias for {{nodes}}
|
||||
// {{health}} — graph health report
|
||||
// {{pairs}} — interference pairs from detect_interference
|
||||
// {{rename}} — rename candidates
|
||||
// {{split}} — split detail for the first query result
|
||||
//
|
||||
// The query selects what to operate on; placeholders pull in context.
|
||||
|
||||
use crate::graph::Graph;
|
||||
use crate::neuro::{consolidation_priority, ReplayItem};
|
||||
use crate::search;
|
||||
use crate::store::Store;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Agent definition: config (from JSON header) + prompt (raw markdown body).
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AgentDef {
|
||||
pub agent: String,
|
||||
pub query: String,
|
||||
pub prompt: String,
|
||||
pub model: String,
|
||||
pub schedule: String,
|
||||
pub tools: Vec<String>,
|
||||
pub count: Option<usize>,
|
||||
pub chunk_size: Option<usize>,
|
||||
pub chunk_overlap: Option<usize>,
|
||||
pub temperature: Option<f32>,
|
||||
}
|
||||
|
||||
/// The JSON header portion (first line of the file).
|
||||
#[derive(Deserialize)]
|
||||
struct AgentHeader {
|
||||
agent: String,
|
||||
#[serde(default)]
|
||||
query: String,
|
||||
#[serde(default = "default_model")]
|
||||
model: String,
|
||||
#[serde(default)]
|
||||
schedule: String,
|
||||
#[serde(default)]
|
||||
tools: Vec<String>,
|
||||
/// Number of seed nodes / conversation fragments (overrides --count)
|
||||
#[serde(default)]
|
||||
count: Option<usize>,
|
||||
/// Max size of conversation chunks in bytes (default 50000)
|
||||
#[serde(default)]
|
||||
chunk_size: Option<usize>,
|
||||
/// Overlap between chunks in bytes (default 10000)
|
||||
#[serde(default)]
|
||||
chunk_overlap: Option<usize>,
|
||||
/// LLM temperature override
|
||||
#[serde(default)]
|
||||
temperature: Option<f32>,
|
||||
}
|
||||
|
||||
fn default_model() -> String { "sonnet".into() }
|
||||
|
||||
/// Parse an agent file: first line is JSON config, rest is the prompt.
|
||||
fn parse_agent_file(content: &str) -> Option<AgentDef> {
|
||||
let (first_line, rest) = content.split_once('\n')?;
|
||||
let header: AgentHeader = serde_json::from_str(first_line.trim()).ok()?;
|
||||
// Skip optional blank line between header and prompt body
|
||||
let prompt = rest.strip_prefix('\n').unwrap_or(rest);
|
||||
Some(AgentDef {
|
||||
agent: header.agent,
|
||||
query: header.query,
|
||||
prompt: prompt.to_string(),
|
||||
model: header.model,
|
||||
schedule: header.schedule,
|
||||
tools: header.tools,
|
||||
count: header.count,
|
||||
chunk_size: header.chunk_size,
|
||||
chunk_overlap: header.chunk_overlap,
|
||||
temperature: header.temperature,
|
||||
})
|
||||
}
|
||||
|
||||
fn agents_dir() -> PathBuf {
|
||||
let repo = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
|
||||
if repo.is_dir() { return repo; }
|
||||
crate::store::memory_dir().join("agents")
|
||||
}
|
||||
|
||||
/// Load all agent definitions.
|
||||
pub fn load_defs() -> Vec<AgentDef> {
|
||||
let dir = agents_dir();
|
||||
let Ok(entries) = std::fs::read_dir(&dir) else { return Vec::new() };
|
||||
|
||||
entries
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
let p = e.path();
|
||||
p.extension().map(|x| x == "agent" || x == "md").unwrap_or(false)
|
||||
})
|
||||
.filter_map(|e| {
|
||||
let content = std::fs::read_to_string(e.path()).ok()?;
|
||||
parse_agent_file(&content)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Look up a single agent definition by name.
|
||||
pub fn get_def(name: &str) -> Option<AgentDef> {
|
||||
let dir = agents_dir();
|
||||
for ext in ["agent", "md"] {
|
||||
let path = dir.join(format!("{}.{}", name, ext));
|
||||
if let Ok(content) = std::fs::read_to_string(&path)
|
||||
&& let Some(def) = parse_agent_file(&content) {
|
||||
return Some(def);
|
||||
}
|
||||
}
|
||||
load_defs().into_iter().find(|d| d.agent == name)
|
||||
}
|
||||
|
||||
/// Result of resolving a placeholder: text + any affected node keys.
|
||||
struct Resolved {
|
||||
text: String,
|
||||
keys: Vec<String>,
|
||||
}
|
||||
|
||||
/// Resolve a single {{placeholder}} by name.
|
||||
/// Returns the replacement text and any node keys it produced (for visit tracking).
|
||||
fn resolve(
|
||||
name: &str,
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
keys: &[String],
|
||||
count: usize,
|
||||
) -> Option<Resolved> {
|
||||
match name {
|
||||
"topology" => Some(Resolved {
|
||||
text: super::prompts::format_topology_header(graph),
|
||||
keys: vec![],
|
||||
}),
|
||||
|
||||
"nodes" | "episodes" => {
|
||||
let items = keys_to_replay_items(store, keys, graph);
|
||||
Some(Resolved {
|
||||
text: super::prompts::format_nodes_section(store, &items, graph),
|
||||
keys: vec![], // keys already tracked from query
|
||||
})
|
||||
}
|
||||
|
||||
"health" => Some(Resolved {
|
||||
text: super::prompts::format_health_section(store, graph),
|
||||
keys: vec![],
|
||||
}),
|
||||
|
||||
"pairs" => {
|
||||
let mut pairs = crate::neuro::detect_interference(store, graph, 0.5);
|
||||
pairs.truncate(count);
|
||||
let pair_keys: Vec<String> = pairs.iter()
|
||||
.flat_map(|(a, b, _)| vec![a.clone(), b.clone()])
|
||||
.collect();
|
||||
Some(Resolved {
|
||||
text: super::prompts::format_pairs_section(&pairs, store, graph),
|
||||
keys: pair_keys,
|
||||
})
|
||||
}
|
||||
|
||||
"rename" => {
|
||||
let (rename_keys, section) = super::prompts::format_rename_candidates(store, count);
|
||||
Some(Resolved { text: section, keys: rename_keys })
|
||||
}
|
||||
|
||||
"split" => {
|
||||
let key = keys.first()?;
|
||||
Some(Resolved {
|
||||
text: super::prompts::format_split_plan_node(store, graph, key),
|
||||
keys: vec![], // key already tracked from query
|
||||
})
|
||||
}
|
||||
|
||||
// seed — render output for each seed node (content + deduped links)
|
||||
"seed" => {
|
||||
let mut text = String::new();
|
||||
let mut result_keys = Vec::new();
|
||||
for key in keys {
|
||||
if let Some(rendered) = crate::cli::node::render_node(store, key) {
|
||||
if !text.is_empty() { text.push_str("\n\n---\n\n"); }
|
||||
text.push_str(&format!("## {}\n\n{}", key, rendered));
|
||||
result_keys.push(key.clone());
|
||||
}
|
||||
}
|
||||
if text.is_empty() { return None; }
|
||||
Some(Resolved { text, keys: result_keys })
|
||||
}
|
||||
|
||||
"organize" => {
|
||||
// Show seed nodes with their neighbors for exploratory organizing
|
||||
use crate::store::NodeType;
|
||||
|
||||
// Helper: shell-quote keys containing #
|
||||
let sq = |k: &str| -> String {
|
||||
if k.contains('#') { format!("'{}'", k) } else { k.to_string() }
|
||||
};
|
||||
|
||||
let mut text = format!("### Seed nodes ({} starting points)\n\n", keys.len());
|
||||
let mut result_keys = Vec::new();
|
||||
|
||||
for key in keys {
|
||||
let Some(node) = store.nodes.get(key) else { continue };
|
||||
if node.deleted { continue; }
|
||||
|
||||
let is_journal = node.node_type == NodeType::EpisodicSession;
|
||||
let tag = if is_journal { " [JOURNAL — no delete]" } else { "" };
|
||||
let words = node.content.split_whitespace().count();
|
||||
|
||||
text.push_str(&format!("#### {}{} ({} words)\n\n", sq(key), tag, words));
|
||||
|
||||
// Show first ~200 words of content as preview
|
||||
let preview: String = node.content.split_whitespace()
|
||||
.take(200).collect::<Vec<_>>().join(" ");
|
||||
if words > 200 {
|
||||
text.push_str(&format!("{}...\n\n", preview));
|
||||
} else {
|
||||
text.push_str(&format!("{}\n\n", node.content));
|
||||
}
|
||||
|
||||
// Show neighbors with strengths
|
||||
let neighbors = graph.neighbors(key);
|
||||
if !neighbors.is_empty() {
|
||||
text.push_str("**Neighbors:**\n");
|
||||
for (nbr, strength) in neighbors.iter().take(15) {
|
||||
let nbr_type = store.nodes.get(nbr.as_str())
|
||||
.map(|n| match n.node_type {
|
||||
NodeType::EpisodicSession => " [journal]",
|
||||
NodeType::EpisodicDaily => " [daily]",
|
||||
_ => "",
|
||||
})
|
||||
.unwrap_or("");
|
||||
text.push_str(&format!(" [{:.1}] {}{}\n", strength, sq(nbr), nbr_type));
|
||||
}
|
||||
if neighbors.len() > 15 {
|
||||
text.push_str(&format!(" ... and {} more\n", neighbors.len() - 15));
|
||||
}
|
||||
text.push('\n');
|
||||
}
|
||||
|
||||
text.push_str("---\n\n");
|
||||
result_keys.push(key.clone());
|
||||
}
|
||||
|
||||
text.push_str("Use `poc-memory render KEY` and `poc-memory query \"neighbors('KEY')\"` to explore further.\n");
|
||||
|
||||
Some(Resolved { text, keys: result_keys })
|
||||
}
|
||||
|
||||
"conversations" => {
|
||||
let fragments = super::knowledge::select_conversation_fragments(count);
|
||||
let fragment_ids: Vec<String> = fragments.iter()
|
||||
.map(|(id, _)| id.clone())
|
||||
.collect();
|
||||
let text = fragments.iter()
|
||||
.map(|(id, text)| format!("### Session {}\n\n{}", id, text))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n---\n\n");
|
||||
Some(Resolved { text, keys: fragment_ids })
|
||||
}
|
||||
|
||||
"siblings" | "neighborhood" => {
|
||||
let mut out = String::new();
|
||||
let mut all_keys: Vec<String> = Vec::new();
|
||||
let mut included_nodes: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
const MAX_NEIGHBORS: usize = 25;
|
||||
|
||||
for key in keys {
|
||||
if included_nodes.contains(key) { continue; }
|
||||
included_nodes.insert(key.clone());
|
||||
let Some(node) = store.nodes.get(key.as_str()) else { continue };
|
||||
let neighbors = graph.neighbors(key);
|
||||
|
||||
// Seed node with full content
|
||||
out.push_str(&format!("## {} (seed)\n\n{}\n\n", key, node.content));
|
||||
all_keys.push(key.clone());
|
||||
|
||||
// Rank neighbors by link_strength * node_weight
|
||||
// Include all if <= 10, otherwise take top MAX_NEIGHBORS
|
||||
let mut ranked: Vec<(String, f32, f32)> = neighbors.iter()
|
||||
.filter_map(|(nbr, strength)| {
|
||||
store.nodes.get(nbr.as_str()).map(|n| {
|
||||
let node_weight = n.weight.max(0.01);
|
||||
let score = strength * node_weight;
|
||||
(nbr.to_string(), *strength, score)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
ranked.sort_by(|a, b| b.2.total_cmp(&a.2));
|
||||
|
||||
let total = ranked.len();
|
||||
let included: Vec<_> = if total <= 10 {
|
||||
ranked
|
||||
} else {
|
||||
// Smooth cutoff: threshold scales with neighborhood size
|
||||
// Generous — err on including too much so the agent can
|
||||
// see and clean up junk. 20 → top 75%, 50 → top 30%
|
||||
let top_score = ranked.first().map(|(_, _, s)| *s).unwrap_or(0.0);
|
||||
let ratio = (15.0 / total as f32).min(1.0);
|
||||
let threshold = top_score * ratio;
|
||||
ranked.into_iter()
|
||||
.enumerate()
|
||||
.take_while(|(i, (_, _, score))| *i < 10 || *score >= threshold)
|
||||
.take(MAX_NEIGHBORS)
|
||||
.map(|(_, item)| item)
|
||||
.collect()
|
||||
};
|
||||
|
||||
if !included.is_empty() {
|
||||
if total > included.len() {
|
||||
out.push_str(&format!("### Neighbors (top {} of {}, ranked by importance)\n\n",
|
||||
included.len(), total));
|
||||
} else {
|
||||
out.push_str("### Neighbors\n\n");
|
||||
}
|
||||
let included_keys: std::collections::HashSet<&str> = included.iter()
|
||||
.map(|(k, _, _)| k.as_str()).collect();
|
||||
|
||||
// Budget: stop adding full content when prompt gets large.
|
||||
// Remaining neighbors get header-only (key + first line).
|
||||
const NEIGHBORHOOD_BUDGET: usize = 400_000; // ~100K tokens, leaves room for core-personality + instructions
|
||||
let mut budget_exceeded = false;
|
||||
|
||||
for (nbr, strength, _score) in &included {
|
||||
if included_nodes.contains(nbr) { continue; }
|
||||
included_nodes.insert(nbr.clone());
|
||||
if let Some(n) = store.nodes.get(nbr.as_str()) {
|
||||
if budget_exceeded || out.len() > NEIGHBORHOOD_BUDGET {
|
||||
// Header-only: key + first non-empty line
|
||||
budget_exceeded = true;
|
||||
let first_line = n.content.lines()
|
||||
.find(|l| !l.trim().is_empty())
|
||||
.unwrap_or("(empty)");
|
||||
out.push_str(&format!("#### {} (link: {:.2}) — {}\n",
|
||||
nbr, strength, first_line));
|
||||
} else {
|
||||
out.push_str(&format!("#### {} (link: {:.2})\n\n{}\n\n",
|
||||
nbr, strength, n.content));
|
||||
}
|
||||
all_keys.push(nbr.to_string());
|
||||
}
|
||||
}
|
||||
if budget_exceeded {
|
||||
out.push_str("\n(remaining neighbors shown as headers only — prompt budget)\n\n");
|
||||
}
|
||||
|
||||
// Cross-links between included neighbors
|
||||
let mut cross_links = Vec::new();
|
||||
for (nbr, _, _) in &included {
|
||||
for (nbr2, strength) in graph.neighbors(nbr) {
|
||||
if nbr2.as_str() != key
|
||||
&& included_keys.contains(nbr2.as_str())
|
||||
&& nbr.as_str() < nbr2.as_str()
|
||||
{
|
||||
cross_links.push((nbr.clone(), nbr2, strength));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !cross_links.is_empty() {
|
||||
out.push_str("### Cross-links between neighbors\n\n");
|
||||
for (a, b, s) in &cross_links {
|
||||
out.push_str(&format!(" {} ↔ {} ({:.2})\n", a, b, s));
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(Resolved { text: out, keys: all_keys })
|
||||
}
|
||||
|
||||
// targets/context: aliases for challenger-style presentation
|
||||
"targets" => {
|
||||
let items = keys_to_replay_items(store, keys, graph);
|
||||
Some(Resolved {
|
||||
text: super::prompts::format_nodes_section(store, &items, graph),
|
||||
keys: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
"hubs" => {
|
||||
// Top hub nodes by degree, spread apart (skip neighbors of already-selected hubs)
|
||||
let mut hubs: Vec<(String, usize)> = store.nodes.iter()
|
||||
.filter(|(k, n)| !n.deleted && !k.starts_with('_'))
|
||||
.map(|(k, _)| {
|
||||
let degree = graph.neighbors(k).len();
|
||||
(k.clone(), degree)
|
||||
})
|
||||
.collect();
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
let mut selected = Vec::new();
|
||||
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
for (key, degree) in &hubs {
|
||||
if seen.contains(key) { continue; }
|
||||
selected.push(format!(" - {} (degree {})", key, degree));
|
||||
// Mark neighbors as seen so we pick far-apart hubs
|
||||
for (nbr, _) in graph.neighbors(key) {
|
||||
seen.insert(nbr.clone());
|
||||
}
|
||||
seen.insert(key.clone());
|
||||
if selected.len() >= 20 { break; }
|
||||
}
|
||||
|
||||
let text = format!("## Hub nodes (link targets)\n\n{}", selected.join("\n"));
|
||||
Some(Resolved { text, keys: vec![] })
|
||||
}
|
||||
|
||||
// agent-context — personality/identity groups from load-context config
|
||||
"agent-context" => {
|
||||
let cfg = crate::config::get();
|
||||
let mut text = String::new();
|
||||
let mut keys = Vec::new();
|
||||
for group in &cfg.context_groups {
|
||||
if !group.agent { continue; }
|
||||
let entries = crate::cli::misc::get_group_content(group, store, &cfg);
|
||||
for (key, content) in entries {
|
||||
use std::fmt::Write;
|
||||
writeln!(text, "--- {} ({}) ---", key, group.label).ok();
|
||||
writeln!(text, "{}\n", content).ok();
|
||||
keys.push(key);
|
||||
}
|
||||
}
|
||||
if text.is_empty() { None }
|
||||
else { Some(Resolved { text, keys }) }
|
||||
}
|
||||
|
||||
// node:KEY — inline a node's content by key
|
||||
other if other.starts_with("node:") => {
|
||||
let key = &other[5..];
|
||||
store.nodes.get(key).map(|n| Resolved {
|
||||
text: n.content.clone(),
|
||||
keys: vec![key.to_string()],
|
||||
})
|
||||
}
|
||||
|
||||
// conversation — tail of the current session transcript (post-compaction)
|
||||
"conversation" => {
|
||||
let text = resolve_conversation();
|
||||
if text.is_empty() { None }
|
||||
else { Some(Resolved { text, keys: vec![] }) }
|
||||
}
|
||||
|
||||
// seen_current — memories surfaced in current (post-compaction) context
|
||||
"seen_current" => {
|
||||
let text = resolve_seen_list("");
|
||||
Some(Resolved { text, keys: vec![] })
|
||||
}
|
||||
|
||||
// seen_previous — memories surfaced before last compaction
|
||||
"seen_previous" => {
|
||||
let text = resolve_seen_list("-prev");
|
||||
Some(Resolved { text, keys: vec![] })
|
||||
}
|
||||
|
||||
// memory_ratio — what % of current context is recalled memories
|
||||
"memory_ratio" => {
|
||||
let text = resolve_memory_ratio();
|
||||
Some(Resolved { text, keys: vec![] })
|
||||
}
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the tail of the current session's conversation.
|
||||
/// Reads POC_SESSION_ID to find the transcript, extracts the last
|
||||
/// segment (post-compaction), returns the tail (~100K chars).
|
||||
fn resolve_conversation() -> String {
|
||||
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
|
||||
if session_id.is_empty() { return String::new(); }
|
||||
|
||||
let projects = crate::config::get().projects_dir.clone();
|
||||
// Find the transcript file matching this session
|
||||
let mut transcript = None;
|
||||
if let Ok(dirs) = std::fs::read_dir(&projects) {
|
||||
for dir in dirs.filter_map(|e| e.ok()) {
|
||||
let path = dir.path().join(format!("{}.jsonl", session_id));
|
||||
if path.exists() {
|
||||
transcript = Some(path);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let Some(path) = transcript else { return String::new() };
|
||||
let path_str = path.to_string_lossy();
|
||||
|
||||
let Some(iter) = crate::transcript::TailMessages::open(&path_str) else {
|
||||
return String::new();
|
||||
};
|
||||
|
||||
let cfg = crate::config::get();
|
||||
let mut fragments: Vec<String> = Vec::new();
|
||||
let mut total_bytes = 0;
|
||||
const MAX_BYTES: usize = 200_000;
|
||||
|
||||
for (role, content, ts) in iter {
|
||||
if total_bytes >= MAX_BYTES { break; }
|
||||
let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
|
||||
let formatted = if !ts.is_empty() {
|
||||
format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], content)
|
||||
} else {
|
||||
format!("**{}:** {}", name, content)
|
||||
};
|
||||
total_bytes += content.len();
|
||||
fragments.push(formatted);
|
||||
}
|
||||
|
||||
// Reverse back to chronological order
|
||||
fragments.reverse();
|
||||
fragments.join("\n\n")
|
||||
}
|
||||
|
||||
/// Get surfaced memory keys from a seen-set file.
|
||||
/// `suffix` is "" for current, "-prev" for pre-compaction.
|
||||
fn resolve_seen_list(suffix: &str) -> String {
|
||||
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
|
||||
if session_id.is_empty() {
|
||||
return "(no session ID)".to_string();
|
||||
}
|
||||
|
||||
let state_dir = std::path::PathBuf::from("/tmp/claude-memory-search");
|
||||
let path = state_dir.join(format!("seen{}-{}", suffix, session_id));
|
||||
|
||||
let entries: Vec<(String, String)> = std::fs::read_to_string(&path).ok()
|
||||
.map(|content| {
|
||||
content.lines()
|
||||
.filter(|s| !s.is_empty())
|
||||
.filter_map(|line| {
|
||||
let (ts, key) = line.split_once('\t')?;
|
||||
Some((ts.to_string(), key.to_string()))
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
if entries.is_empty() {
|
||||
return "(none)".to_string();
|
||||
}
|
||||
|
||||
// Sort newest first, dedup, cap at 20
|
||||
let mut sorted = entries;
|
||||
sorted.sort_by(|a, b| b.0.cmp(&a.0));
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let deduped: Vec<_> = sorted.into_iter()
|
||||
.filter(|(_, key)| seen.insert(key.clone()))
|
||||
.take(20)
|
||||
.collect();
|
||||
|
||||
deduped.iter()
|
||||
.map(|(ts, key)| format!("- {} ({})", key, ts))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
/// Compute what percentage of the current conversation context is recalled memories.
|
||||
/// Sums rendered size of current seen-set keys vs total post-compaction transcript size.
|
||||
fn resolve_memory_ratio() -> String {
|
||||
let session_id = std::env::var("POC_SESSION_ID").unwrap_or_default();
|
||||
if session_id.is_empty() {
|
||||
return "(no session ID)".to_string();
|
||||
}
|
||||
|
||||
let state_dir = std::path::PathBuf::from("/tmp/claude-memory-search");
|
||||
|
||||
// Get post-compaction transcript size
|
||||
let projects = crate::config::get().projects_dir.clone();
|
||||
let transcript_size: u64 = std::fs::read_dir(&projects).ok()
|
||||
.and_then(|dirs| {
|
||||
for dir in dirs.filter_map(|e| e.ok()) {
|
||||
let path = dir.path().join(format!("{}.jsonl", session_id));
|
||||
if path.exists() {
|
||||
let file_len = path.metadata().map(|m| m.len()).unwrap_or(0);
|
||||
let compaction_offset: u64 = std::fs::read_to_string(
|
||||
state_dir.join(format!("compaction-{}", session_id))
|
||||
).ok().and_then(|s| s.trim().parse().ok()).unwrap_or(0);
|
||||
return Some(file_len.saturating_sub(compaction_offset));
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
if transcript_size == 0 {
|
||||
return "0% of context is recalled memories (new session)".to_string();
|
||||
}
|
||||
|
||||
// Sum rendered size of each key in current seen set
|
||||
let seen_path = state_dir.join(format!("seen-{}", session_id));
|
||||
let mut seen_keys = std::collections::HashSet::new();
|
||||
let keys: Vec<String> = std::fs::read_to_string(&seen_path).ok()
|
||||
.map(|content| {
|
||||
content.lines()
|
||||
.filter(|s| !s.is_empty())
|
||||
.filter_map(|line| line.split_once('\t').map(|(_, k)| k.to_string()))
|
||||
.filter(|k| seen_keys.insert(k.clone()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let memory_bytes: u64 = keys.iter()
|
||||
.filter_map(|key| {
|
||||
std::process::Command::new("poc-memory")
|
||||
.args(["render", key])
|
||||
.output().ok()
|
||||
})
|
||||
.map(|out| out.stdout.len() as u64)
|
||||
.sum();
|
||||
|
||||
let pct = (memory_bytes as f64 / transcript_size as f64 * 100.0).round() as u32;
|
||||
format!("{}% of current context is recalled memories ({} memories, ~{}KB of ~{}KB)",
|
||||
pct, keys.len(), memory_bytes / 1024, transcript_size / 1024)
|
||||
}
|
||||
|
||||
/// Resolve all {{placeholder}} patterns in a prompt template.
|
||||
/// Returns the resolved text and all node keys collected from placeholders.
|
||||
pub fn resolve_placeholders(
|
||||
template: &str,
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
keys: &[String],
|
||||
count: usize,
|
||||
) -> (String, Vec<String>) {
|
||||
let mut result = template.to_string();
|
||||
let mut extra_keys = Vec::new();
|
||||
let mut pos = 0;
|
||||
loop {
|
||||
let Some(rel_start) = result[pos..].find("{{") else { break };
|
||||
let start = pos + rel_start;
|
||||
let Some(rel_end) = result[start + 2..].find("}}") else { break };
|
||||
let end = start + 2 + rel_end;
|
||||
let name = result[start + 2..end].trim().to_lowercase();
|
||||
match resolve(&name, store, graph, keys, count) {
|
||||
Some(resolved) => {
|
||||
let len = resolved.text.len();
|
||||
extra_keys.extend(resolved.keys);
|
||||
result.replace_range(start..end + 2, &resolved.text);
|
||||
pos = start + len;
|
||||
}
|
||||
None => {
|
||||
let msg = format!("(unknown: {})", name);
|
||||
let len = msg.len();
|
||||
result.replace_range(start..end + 2, &msg);
|
||||
pos = start + len;
|
||||
}
|
||||
}
|
||||
}
|
||||
(result, extra_keys)
|
||||
}
|
||||
|
||||
/// Run a config-driven agent: query → resolve placeholders → prompt.
|
||||
/// `exclude` filters out nodes (and their neighborhoods) already being
|
||||
/// worked on by other agents, preventing concurrent collisions.
|
||||
pub fn run_agent(
|
||||
store: &Store,
|
||||
def: &AgentDef,
|
||||
count: usize,
|
||||
exclude: &std::collections::HashSet<String>,
|
||||
) -> Result<super::prompts::AgentBatch, String> {
|
||||
let graph = store.build_graph();
|
||||
|
||||
// Run the query if present
|
||||
let keys = if !def.query.is_empty() {
|
||||
let mut stages = search::Stage::parse_pipeline(&def.query)?;
|
||||
let has_limit = stages.iter().any(|s|
|
||||
matches!(s, search::Stage::Transform(search::Transform::Limit(_))));
|
||||
if !has_limit {
|
||||
// Request extra results to compensate for exclusion filtering
|
||||
let padded = count + exclude.len().min(100);
|
||||
stages.push(search::Stage::Transform(search::Transform::Limit(padded)));
|
||||
}
|
||||
let results = search::run_query(&stages, vec![], &graph, store, false, count + exclude.len().min(100));
|
||||
let filtered: Vec<String> = results.into_iter()
|
||||
.map(|(k, _)| k)
|
||||
.filter(|k| !exclude.contains(k))
|
||||
.take(count)
|
||||
.collect();
|
||||
if filtered.is_empty() {
|
||||
return Err(format!("{}: query returned no results (after exclusion)", def.agent));
|
||||
}
|
||||
filtered
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Substitute {agent_name} before resolving {{...}} placeholders,
|
||||
// so agents can reference their own notes: {{node:subconscious-notes-{agent_name}}}
|
||||
let template = def.prompt.replace("{agent_name}", &def.agent);
|
||||
let (prompt, extra_keys) = resolve_placeholders(&template, store, &graph, &keys, count);
|
||||
|
||||
// Identity and instructions are now pulled in via {{node:KEY}} placeholders.
|
||||
// Agents should include {{node:core-personality}} and {{node:memory-instructions-core}}
|
||||
// in their prompt templates. The resolve_placeholders call below handles this.
|
||||
|
||||
// Merge query keys with any keys produced by placeholder resolution
|
||||
let mut all_keys = keys;
|
||||
all_keys.extend(extra_keys);
|
||||
Ok(super::prompts::AgentBatch { prompt, node_keys: all_keys })
|
||||
}
|
||||
|
||||
/// Convert a list of keys to ReplayItems with priority and graph metrics.
|
||||
pub fn keys_to_replay_items(
|
||||
store: &Store,
|
||||
keys: &[String],
|
||||
graph: &Graph,
|
||||
) -> Vec<ReplayItem> {
|
||||
keys.iter()
|
||||
.filter_map(|key| {
|
||||
let node = store.nodes.get(key)?;
|
||||
let priority = consolidation_priority(store, key, graph, None);
|
||||
let cc = graph.clustering_coefficient(key);
|
||||
|
||||
Some(ReplayItem {
|
||||
key: key.clone(),
|
||||
priority,
|
||||
interval_days: node.spaced_repetition_interval,
|
||||
emotion: node.emotion,
|
||||
cc,
|
||||
classification: "unknown",
|
||||
outlier_score: 0.0,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
544
src/subconscious/agents/digest.rs
Normal file
544
src/subconscious/agents/digest.rs
Normal file
|
|
@ -0,0 +1,544 @@
|
|||
// Episodic digest generation: daily, weekly, monthly, auto
|
||||
//
|
||||
// Three digest levels form a temporal hierarchy: daily digests summarize
|
||||
// journal entries, weekly digests summarize dailies, monthly digests
|
||||
// summarize weeklies. All three share the same generate/auto-detect
|
||||
// pipeline, parameterized by DigestLevel.
|
||||
|
||||
use super::llm;
|
||||
use crate::store::{self, Store, new_relation};
|
||||
use crate::neuro;
|
||||
|
||||
use chrono::{Datelike, Duration, Local, NaiveDate};
|
||||
use regex::Regex;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
// --- Digest level descriptors ---
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
struct DigestLevel {
|
||||
name: &'static str,
|
||||
title: &'static str,
|
||||
period: &'static str,
|
||||
input_title: &'static str,
|
||||
child_name: Option<&'static str>, // None = journal (leaf), Some = child digest files
|
||||
/// Expand an arg into (canonical_label, dates covered).
|
||||
label_dates: fn(&str) -> Result<(String, Vec<String>), String>,
|
||||
/// Map a YYYY-MM-DD date to this level's label.
|
||||
date_to_label: fn(&str) -> Option<String>,
|
||||
}
|
||||
|
||||
const DAILY: DigestLevel = DigestLevel {
|
||||
name: "daily",
|
||||
title: "Daily",
|
||||
period: "Date",
|
||||
input_title: "Journal entries",
|
||||
child_name: None,
|
||||
label_dates: |date| Ok((date.to_string(), vec![date.to_string()])),
|
||||
date_to_label: |date| Some(date.to_string()),
|
||||
};
|
||||
|
||||
/// Week label and 7 dates (Mon-Sun) for the week containing `date`.
|
||||
fn week_dates(date: &str) -> Result<(String, Vec<String>), String> {
|
||||
let nd = NaiveDate::parse_from_str(date, "%Y-%m-%d")
|
||||
.map_err(|e| format!("bad date '{}': {}", date, e))?;
|
||||
let iso = nd.iso_week();
|
||||
let week_label = format!("{}-W{:02}", iso.year(), iso.week());
|
||||
let monday = nd - Duration::days(nd.weekday().num_days_from_monday() as i64);
|
||||
let dates = (0..7)
|
||||
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
|
||||
.collect();
|
||||
Ok((week_label, dates))
|
||||
}
|
||||
|
||||
const WEEKLY: DigestLevel = DigestLevel {
|
||||
name: "weekly",
|
||||
title: "Weekly",
|
||||
period: "Week",
|
||||
input_title: "Daily digests",
|
||||
child_name: Some("daily"),
|
||||
label_dates: |arg| {
|
||||
if !arg.contains('W') {
|
||||
return week_dates(arg);
|
||||
}
|
||||
let (y, w) = arg.split_once("-W")
|
||||
.ok_or_else(|| format!("bad week label: {}", arg))?;
|
||||
let year: i32 = y.parse().map_err(|_| format!("bad week year: {}", arg))?;
|
||||
let week: u32 = w.parse().map_err(|_| format!("bad week number: {}", arg))?;
|
||||
let monday = NaiveDate::from_isoywd_opt(year, week, chrono::Weekday::Mon)
|
||||
.ok_or_else(|| format!("invalid week: {}", arg))?;
|
||||
let dates = (0..7)
|
||||
.map(|i| (monday + Duration::days(i)).format("%Y-%m-%d").to_string())
|
||||
.collect();
|
||||
Ok((arg.to_string(), dates))
|
||||
},
|
||||
date_to_label: |date| week_dates(date).ok().map(|(l, _)| l),
|
||||
};
|
||||
|
||||
const MONTHLY: DigestLevel = DigestLevel {
|
||||
name: "monthly",
|
||||
title: "Monthly",
|
||||
period: "Month",
|
||||
input_title: "Weekly digests",
|
||||
child_name: Some("weekly"),
|
||||
label_dates: |arg| {
|
||||
let (year, month) = if arg.len() <= 7 {
|
||||
let d = NaiveDate::parse_from_str(&format!("{}-01", arg), "%Y-%m-%d")
|
||||
.map_err(|e| format!("bad month '{}': {}", arg, e))?;
|
||||
(d.year(), d.month())
|
||||
} else {
|
||||
let d = NaiveDate::parse_from_str(arg, "%Y-%m-%d")
|
||||
.map_err(|e| format!("bad date '{}': {}", arg, e))?;
|
||||
(d.year(), d.month())
|
||||
};
|
||||
let label = format!("{}-{:02}", year, month);
|
||||
let mut dates = Vec::new();
|
||||
let mut day = 1u32;
|
||||
while let Some(date) = NaiveDate::from_ymd_opt(year, month, day) {
|
||||
if date.month() != month { break; }
|
||||
dates.push(date.format("%Y-%m-%d").to_string());
|
||||
day += 1;
|
||||
}
|
||||
Ok((label, dates))
|
||||
},
|
||||
date_to_label: |date| NaiveDate::parse_from_str(date, "%Y-%m-%d")
|
||||
.ok().map(|d| format!("{}-{:02}", d.year(), d.month())),
|
||||
};
|
||||
|
||||
const LEVELS: &[&DigestLevel] = &[&DAILY, &WEEKLY, &MONTHLY];
|
||||
|
||||
/// Store key for a digest node: "daily-2026-03-04", "weekly-2026-W09", etc.
|
||||
fn digest_node_key(level_name: &str, label: &str) -> String {
|
||||
format!("{}-{}", level_name, label)
|
||||
}
|
||||
|
||||
// --- Input gathering ---
|
||||
|
||||
/// Result of gathering inputs for a digest.
|
||||
struct GatherResult {
|
||||
label: String,
|
||||
/// (display_label, content) pairs for the prompt.
|
||||
inputs: Vec<(String, String)>,
|
||||
/// Store keys of source nodes — used to create structural links.
|
||||
source_keys: Vec<String>,
|
||||
}
|
||||
|
||||
/// Load child digest content from the store.
|
||||
fn load_child_digests(store: &Store, prefix: &str, labels: &[String]) -> (Vec<(String, String)>, Vec<String>) {
|
||||
let mut digests = Vec::new();
|
||||
let mut keys = Vec::new();
|
||||
for label in labels {
|
||||
let key = digest_node_key(prefix, label);
|
||||
if let Some(node) = store.nodes.get(&key) {
|
||||
digests.push((label.clone(), node.content.clone()));
|
||||
keys.push(key);
|
||||
}
|
||||
}
|
||||
(digests, keys)
|
||||
}
|
||||
|
||||
/// Unified: gather inputs for any digest level.
|
||||
fn gather(level: &DigestLevel, store: &Store, arg: &str) -> Result<GatherResult, String> {
|
||||
let (label, dates) = (level.label_dates)(arg)?;
|
||||
|
||||
let (inputs, source_keys) = if let Some(child_name) = level.child_name {
|
||||
// Map parent's dates through child's date_to_label → child labels
|
||||
let child = LEVELS.iter()
|
||||
.find(|l| l.name == child_name)
|
||||
.expect("invalid child_name");
|
||||
let child_labels: Vec<String> = dates.iter()
|
||||
.filter_map(|d| (child.date_to_label)(d))
|
||||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
load_child_digests(store, child_name, &child_labels)
|
||||
} else {
|
||||
// Leaf level: scan store for episodic entries matching date
|
||||
let mut entries: Vec<_> = store.nodes.iter()
|
||||
.filter(|(_, n)| n.node_type == store::NodeType::EpisodicSession
|
||||
&& n.timestamp > 0
|
||||
&& store::format_date(n.timestamp) == label)
|
||||
.map(|(key, n)| {
|
||||
(store::format_datetime(n.timestamp), n.content.clone(), key.clone())
|
||||
})
|
||||
.collect();
|
||||
entries.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
let keys = entries.iter().map(|(_, _, k)| k.clone()).collect();
|
||||
let inputs = entries.into_iter().map(|(dt, c, _)| (dt, c)).collect();
|
||||
(inputs, keys)
|
||||
};
|
||||
|
||||
Ok(GatherResult { label, inputs, source_keys })
|
||||
}
|
||||
|
||||
/// Unified: find candidate labels for auto-generation (past, not yet generated).
|
||||
fn find_candidates(level: &DigestLevel, dates: &[String], today: &str) -> Vec<String> {
|
||||
let today_label = (level.date_to_label)(today);
|
||||
dates.iter()
|
||||
.filter_map(|d| (level.date_to_label)(d))
|
||||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.filter(|l| Some(l) != today_label.as_ref())
|
||||
.collect()
|
||||
}
|
||||
|
||||
// --- Unified generator ---
|
||||
|
||||
fn format_inputs(inputs: &[(String, String)], daily: bool) -> String {
|
||||
let mut text = String::new();
|
||||
for (label, content) in inputs {
|
||||
if daily {
|
||||
text.push_str(&format!("\n### {}\n\n{}\n", label, content));
|
||||
} else {
|
||||
text.push_str(&format!("\n---\n## {}\n{}\n", label, content));
|
||||
}
|
||||
}
|
||||
text
|
||||
}
|
||||
|
||||
fn generate_digest(
|
||||
store: &mut Store,
|
||||
level: &DigestLevel,
|
||||
label: &str,
|
||||
inputs: &[(String, String)],
|
||||
source_keys: &[String],
|
||||
) -> Result<(), String> {
|
||||
println!("Generating {} digest for {}...", level.name, label);
|
||||
|
||||
if inputs.is_empty() {
|
||||
println!(" No inputs found for {}", label);
|
||||
return Ok(());
|
||||
}
|
||||
println!(" {} inputs", inputs.len());
|
||||
|
||||
let keys = llm::semantic_keys(store);
|
||||
let keys_text = keys.iter()
|
||||
.map(|k| format!(" - {}", k))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let content = format_inputs(inputs, level.child_name.is_none());
|
||||
let covered = inputs.iter()
|
||||
.map(|(l, _)| l.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
// Load prompt from agent file; fall back to prompts dir
|
||||
let def = super::defs::get_def("digest");
|
||||
let template = match &def {
|
||||
Some(d) => d.prompt.clone(),
|
||||
None => {
|
||||
let path = crate::config::get().prompts_dir.join("digest.md");
|
||||
std::fs::read_to_string(&path)
|
||||
.map_err(|e| format!("load digest prompt: {}", e))?
|
||||
}
|
||||
};
|
||||
let prompt = template
|
||||
.replace("{{LEVEL}}", level.title)
|
||||
.replace("{{PERIOD}}", level.period)
|
||||
.replace("{{INPUT_TITLE}}", level.input_title)
|
||||
.replace("{{LABEL}}", label)
|
||||
.replace("{{CONTENT}}", &content)
|
||||
.replace("{{COVERED}}", &covered)
|
||||
.replace("{{KEYS}}", &keys_text);
|
||||
println!(" Prompt: {} chars (~{} tokens)", prompt.len(), prompt.len() / 4);
|
||||
|
||||
println!(" Calling Sonnet...");
|
||||
let digest = llm::call_simple("digest", &prompt)?;
|
||||
|
||||
let key = digest_node_key(level.name, label);
|
||||
store.upsert_provenance(&key, &digest, "digest:write")?;
|
||||
|
||||
// Structural links: connect all source entries to this digest
|
||||
let mut linked = 0;
|
||||
for source_key in source_keys {
|
||||
// Skip if link already exists
|
||||
let exists = store.relations.iter().any(|r|
|
||||
!r.deleted && r.source_key == *source_key && r.target_key == key);
|
||||
if exists { continue; }
|
||||
|
||||
let source_uuid = store.nodes.get(source_key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
let target_uuid = store.nodes.get(&key)
|
||||
.map(|n| n.uuid).unwrap_or([0u8; 16]);
|
||||
let mut rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link, 0.8,
|
||||
source_key, &key,
|
||||
);
|
||||
rel.provenance = "digest:structural".to_string();
|
||||
store.add_relation(rel)?;
|
||||
linked += 1;
|
||||
}
|
||||
if linked > 0 {
|
||||
println!(" Linked {} source entries → {}", linked, key);
|
||||
}
|
||||
|
||||
store.save()?;
|
||||
println!(" Stored: {}", key);
|
||||
|
||||
println!(" Done: {} lines", digest.lines().count());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Public API ---
|
||||
|
||||
pub fn generate(store: &mut Store, level_name: &str, arg: &str) -> Result<(), String> {
|
||||
let level = LEVELS.iter()
|
||||
.find(|l| l.name == level_name)
|
||||
.ok_or_else(|| format!("unknown digest level: {}", level_name))?;
|
||||
let result = gather(level, store, arg)?;
|
||||
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)
|
||||
}
|
||||
|
||||
// --- Auto-detect and generate missing digests ---
|
||||
|
||||
pub fn digest_auto(store: &mut Store) -> Result<(), String> {
|
||||
let today = Local::now().format("%Y-%m-%d").to_string();
|
||||
|
||||
// Collect all dates with episodic entries
|
||||
let dates: Vec<String> = store.nodes.values()
|
||||
.filter(|n| n.node_type == store::NodeType::EpisodicSession && n.timestamp > 0)
|
||||
.map(|n| store::format_date(n.timestamp))
|
||||
.collect::<BTreeSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let mut total = 0u32;
|
||||
|
||||
for level in LEVELS {
|
||||
let candidates = find_candidates(level, &dates, &today);
|
||||
let mut generated = 0u32;
|
||||
let mut skipped = 0u32;
|
||||
|
||||
for arg in &candidates {
|
||||
let result = gather(level, store, arg)?;
|
||||
let key = digest_node_key(level.name, &result.label);
|
||||
if store.nodes.contains_key(&key) {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
if result.inputs.is_empty() { continue; }
|
||||
println!("[auto] Missing {} digest for {}", level.name, result.label);
|
||||
generate_digest(store, level, &result.label, &result.inputs, &result.source_keys)?;
|
||||
generated += 1;
|
||||
}
|
||||
|
||||
println!("[auto] {}: {} generated, {} existed", level.name, generated, skipped);
|
||||
total += generated;
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
println!("[auto] All digests up to date.");
|
||||
} else {
|
||||
println!("[auto] Generated {} total digests.", total);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Digest link parsing ---
|
||||
// Replaces digest-link-parser.py: parses ## Links sections from digest
|
||||
// files and applies them to the memory graph.
|
||||
|
||||
/// A parsed link from a digest's Links section.
|
||||
pub struct DigestLink {
|
||||
pub source: String,
|
||||
pub target: String,
|
||||
pub reason: String,
|
||||
pub file: String,
|
||||
}
|
||||
|
||||
/// Normalize a raw link target to a poc-memory key.
|
||||
fn normalize_link_key(raw: &str) -> String {
|
||||
let key = raw.trim().trim_matches('`').trim();
|
||||
if key.is_empty() { return String::new(); }
|
||||
|
||||
// Self-references
|
||||
let lower = key.to_lowercase();
|
||||
if lower.starts_with("this ") { return String::new(); }
|
||||
|
||||
let mut key = key.to_string();
|
||||
|
||||
// Strip .md suffix if present
|
||||
if let Some(stripped) = key.strip_suffix(".md") {
|
||||
key = stripped.to_string();
|
||||
} else if key.contains('#') {
|
||||
let (file, section) = key.split_once('#').unwrap();
|
||||
if let Some(bare) = file.strip_suffix(".md") {
|
||||
key = format!("{}-{}", bare, section);
|
||||
}
|
||||
}
|
||||
|
||||
// weekly/2026-W06 → weekly-2026-W06, etc.
|
||||
if let Some(pos) = key.find('/') {
|
||||
let prefix = &key[..pos];
|
||||
if prefix == "daily" || prefix == "weekly" || prefix == "monthly" {
|
||||
let rest = &key[pos + 1..];
|
||||
key = format!("{}-{}", prefix, rest);
|
||||
}
|
||||
}
|
||||
|
||||
// Bare date → daily digest
|
||||
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
|
||||
if date_re.is_match(&key) {
|
||||
key = format!("daily-{}", key);
|
||||
}
|
||||
|
||||
key
|
||||
}
|
||||
|
||||
/// Parse the Links section from a digest node's content.
|
||||
fn parse_digest_node_links(key: &str, content: &str) -> Vec<DigestLink> {
|
||||
|
||||
let link_re = Regex::new(r"^-\s+(.+?)\s*[→↔←]\s*(.+?)(?:\s*\((.+?)\))?\s*$").unwrap();
|
||||
let header_re = Regex::new(r"^##\s+Links").unwrap();
|
||||
let mut links = Vec::new();
|
||||
let mut in_links = false;
|
||||
|
||||
for line in content.lines() {
|
||||
if header_re.is_match(line) {
|
||||
in_links = true;
|
||||
continue;
|
||||
}
|
||||
if in_links && line.starts_with("## ") {
|
||||
in_links = false;
|
||||
continue;
|
||||
}
|
||||
if !in_links { continue; }
|
||||
if line.starts_with("###") || line.starts_with("**") { continue; }
|
||||
|
||||
if let Some(cap) = link_re.captures(line) {
|
||||
let raw_source = cap[1].trim();
|
||||
let raw_target = cap[2].trim();
|
||||
let reason = cap.get(3).map(|m| m.as_str().to_string()).unwrap_or_default();
|
||||
|
||||
let mut source = normalize_link_key(raw_source);
|
||||
let mut target = normalize_link_key(raw_target);
|
||||
|
||||
// Replace self-references with digest key
|
||||
if source.is_empty() { source = key.to_string(); }
|
||||
if target.is_empty() { target = key.to_string(); }
|
||||
|
||||
// Handle "this daily/weekly/monthly" in raw text
|
||||
let raw_s_lower = raw_source.to_lowercase();
|
||||
let raw_t_lower = raw_target.to_lowercase();
|
||||
if raw_s_lower.contains("this daily") || raw_s_lower.contains("this weekly")
|
||||
|| raw_s_lower.contains("this monthly")
|
||||
{
|
||||
source = key.to_string();
|
||||
}
|
||||
if raw_t_lower.contains("this daily") || raw_t_lower.contains("this weekly")
|
||||
|| raw_t_lower.contains("this monthly")
|
||||
{
|
||||
target = key.to_string();
|
||||
}
|
||||
|
||||
// Skip NEW: and self-links
|
||||
if source.starts_with("NEW:") || target.starts_with("NEW:") { continue; }
|
||||
if source == target { continue; }
|
||||
|
||||
links.push(DigestLink { source, target, reason, file: key.to_string() });
|
||||
}
|
||||
}
|
||||
|
||||
links
|
||||
}
|
||||
|
||||
/// Parse links from all digest nodes in the store.
|
||||
pub fn parse_all_digest_links(store: &Store) -> Vec<DigestLink> {
|
||||
let mut all_links = Vec::new();
|
||||
|
||||
let mut digest_keys: Vec<&String> = store.nodes.iter()
|
||||
.filter(|(_, n)| matches!(n.node_type,
|
||||
store::NodeType::EpisodicDaily
|
||||
| store::NodeType::EpisodicWeekly
|
||||
| store::NodeType::EpisodicMonthly))
|
||||
.map(|(k, _)| k)
|
||||
.collect();
|
||||
digest_keys.sort();
|
||||
|
||||
for key in digest_keys {
|
||||
if let Some(node) = store.nodes.get(key) {
|
||||
all_links.extend(parse_digest_node_links(key, &node.content));
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by (source, target) pair
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
all_links.retain(|link| seen.insert((link.source.clone(), link.target.clone())));
|
||||
|
||||
all_links
|
||||
}
|
||||
|
||||
/// Apply parsed digest links to the store.
|
||||
pub fn apply_digest_links(store: &mut Store, links: &[DigestLink]) -> (usize, usize, usize) {
|
||||
let mut applied = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
let mut fallbacks = 0usize;
|
||||
|
||||
for link in links {
|
||||
// Try resolving both keys
|
||||
let source = match store.resolve_key(&link.source) {
|
||||
Ok(s) => s,
|
||||
Err(_) => {
|
||||
// Try stripping section anchor as fallback
|
||||
if let Some(base) = link.source.split('#').next() {
|
||||
match store.resolve_key(base) {
|
||||
Ok(s) => { fallbacks += 1; s }
|
||||
Err(_) => { skipped += 1; continue; }
|
||||
}
|
||||
} else {
|
||||
skipped += 1; continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
let target = match store.resolve_key(&link.target) {
|
||||
Ok(t) => t,
|
||||
Err(_) => {
|
||||
if let Some(base) = link.target.split('#').next() {
|
||||
match store.resolve_key(base) {
|
||||
Ok(t) => { fallbacks += 1; t }
|
||||
Err(_) => { skipped += 1; continue; }
|
||||
}
|
||||
} else {
|
||||
skipped += 1; continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Refine target to best-matching section if available
|
||||
let source_content = store.nodes.get(&source)
|
||||
.map(|n| n.content.as_str()).unwrap_or("");
|
||||
let target = neuro::refine_target(store, source_content, &target);
|
||||
|
||||
if source == target { skipped += 1; continue; }
|
||||
|
||||
// Check if link already exists
|
||||
let exists = store.relations.iter().any(|r|
|
||||
r.source_key == source && r.target_key == target && !r.deleted
|
||||
);
|
||||
if exists { skipped += 1; continue; }
|
||||
|
||||
let source_uuid = match store.nodes.get(&source) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
let target_uuid = match store.nodes.get(&target) {
|
||||
Some(n) => n.uuid,
|
||||
None => { skipped += 1; continue; }
|
||||
};
|
||||
|
||||
let rel = new_relation(
|
||||
source_uuid, target_uuid,
|
||||
store::RelationType::Link,
|
||||
0.5,
|
||||
&source, &target,
|
||||
);
|
||||
if store.add_relation(rel).is_ok() {
|
||||
println!(" + {} → {}", source, target);
|
||||
applied += 1;
|
||||
}
|
||||
}
|
||||
|
||||
(applied, skipped, fallbacks)
|
||||
}
|
||||
40
src/subconscious/agents/enrich.rs
Normal file
40
src/subconscious/agents/enrich.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
// Conversation extraction from JSONL transcripts
|
||||
//
|
||||
// extract_conversation — parse JSONL transcript to messages
|
||||
// split_on_compaction — split messages at compaction boundaries
|
||||
|
||||
/// Extract conversation messages from a JSONL transcript file.
|
||||
/// Returns (line_number, role, text, timestamp) tuples.
|
||||
pub fn extract_conversation(jsonl_path: &str) -> Result<Vec<(usize, String, String, String)>, String> {
|
||||
let path = std::path::Path::new(jsonl_path);
|
||||
let messages = super::transcript::parse_transcript(path)?;
|
||||
Ok(messages.into_iter()
|
||||
.map(|m| (m.line, m.role, m.text, m.timestamp))
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub const COMPACTION_MARKER: &str = "This session is being continued from a previous conversation that ran out of context";
|
||||
|
||||
/// Split extracted messages into segments at compaction boundaries.
|
||||
/// Each segment represents one continuous conversation before context was compacted.
|
||||
pub fn split_on_compaction(messages: Vec<(usize, String, String, String)>) -> Vec<Vec<(usize, String, String, String)>> {
|
||||
let mut segments: Vec<Vec<(usize, String, String, String)>> = Vec::new();
|
||||
let mut current = Vec::new();
|
||||
|
||||
for msg in messages {
|
||||
if msg.1 == "user" && msg.2.starts_with(COMPACTION_MARKER) {
|
||||
if !current.is_empty() {
|
||||
segments.push(current);
|
||||
current = Vec::new();
|
||||
}
|
||||
current.push(msg);
|
||||
} else {
|
||||
current.push(msg);
|
||||
}
|
||||
}
|
||||
if !current.is_empty() {
|
||||
segments.push(current);
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
312
src/subconscious/agents/knowledge.rs
Normal file
312
src/subconscious/agents/knowledge.rs
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
// knowledge.rs — agent execution and conversation fragment selection
|
||||
//
|
||||
// Agent prompts live in agents/*.agent files, dispatched via defs.rs.
|
||||
// This module handles:
|
||||
// - Agent execution (build prompt → call LLM with tools → log)
|
||||
// - Conversation fragment selection (for observation agent)
|
||||
//
|
||||
// Agents apply changes via tool calls (poc-memory write/link-add/etc)
|
||||
// during the LLM call — no action parsing needed.
|
||||
|
||||
use super::llm;
|
||||
use crate::store::{self, Store};
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent execution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Result of running a single agent.
|
||||
pub struct AgentResult {
|
||||
pub output: String,
|
||||
pub node_keys: Vec<String>,
|
||||
}
|
||||
|
||||
/// Run a single agent and return the result (no action application — tools handle that).
|
||||
pub fn run_and_apply(
|
||||
store: &mut Store,
|
||||
agent_name: &str,
|
||||
batch_size: usize,
|
||||
llm_tag: &str,
|
||||
) -> Result<(), String> {
|
||||
run_and_apply_with_log(store, agent_name, batch_size, llm_tag, &|_| {})
|
||||
}
|
||||
|
||||
pub fn run_and_apply_with_log(
|
||||
store: &mut Store,
|
||||
agent_name: &str,
|
||||
batch_size: usize,
|
||||
llm_tag: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<(), String> {
|
||||
run_and_apply_excluded(store, agent_name, batch_size, llm_tag, log, &Default::default())
|
||||
}
|
||||
|
||||
/// Like run_and_apply_with_log but with an in-flight exclusion set.
|
||||
/// Returns the keys that were processed (for the daemon to track).
|
||||
pub fn run_and_apply_excluded(
|
||||
store: &mut Store,
|
||||
agent_name: &str,
|
||||
batch_size: usize,
|
||||
llm_tag: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
exclude: &std::collections::HashSet<String>,
|
||||
) -> Result<(), String> {
|
||||
let result = run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, exclude)?;
|
||||
|
||||
// Mark conversation segments as mined after successful processing
|
||||
if agent_name == "observation" {
|
||||
mark_observation_done(&result.node_keys);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run an agent with explicit target keys, bypassing the agent's query.
|
||||
pub fn run_one_agent_with_keys(
|
||||
store: &mut Store,
|
||||
agent_name: &str,
|
||||
keys: &[String],
|
||||
count: usize,
|
||||
llm_tag: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<AgentResult, String> {
|
||||
let def = super::defs::get_def(agent_name)
|
||||
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
|
||||
|
||||
log(&format!("targeting: {}", keys.join(", ")));
|
||||
let graph = store.build_graph();
|
||||
let (prompt, extra_keys) = super::defs::resolve_placeholders(
|
||||
&def.prompt, store, &graph, keys, count,
|
||||
);
|
||||
let mut all_keys: Vec<String> = keys.to_vec();
|
||||
all_keys.extend(extra_keys);
|
||||
let agent_batch = super::prompts::AgentBatch { prompt, node_keys: all_keys };
|
||||
|
||||
// Record visits eagerly so concurrent agents pick different seeds
|
||||
if !agent_batch.node_keys.is_empty() {
|
||||
store.record_agent_visits(&agent_batch.node_keys, agent_name).ok();
|
||||
}
|
||||
|
||||
run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
|
||||
}
|
||||
|
||||
pub fn run_one_agent(
|
||||
store: &mut Store,
|
||||
agent_name: &str,
|
||||
batch_size: usize,
|
||||
llm_tag: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<AgentResult, String> {
|
||||
run_one_agent_excluded(store, agent_name, batch_size, llm_tag, log, &Default::default())
|
||||
}
|
||||
|
||||
/// Like run_one_agent but excludes nodes currently being worked on by other agents.
|
||||
pub fn run_one_agent_excluded(
|
||||
store: &mut Store,
|
||||
agent_name: &str,
|
||||
batch_size: usize,
|
||||
llm_tag: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
exclude: &std::collections::HashSet<String>,
|
||||
) -> Result<AgentResult, String> {
|
||||
let def = super::defs::get_def(agent_name)
|
||||
.ok_or_else(|| format!("no .agent file for {}", agent_name))?;
|
||||
|
||||
log("building prompt");
|
||||
let effective_count = def.count.unwrap_or(batch_size);
|
||||
let agent_batch = super::defs::run_agent(store, &def, effective_count, exclude)?;
|
||||
|
||||
run_one_agent_inner(store, agent_name, &def, agent_batch, llm_tag, log)
|
||||
}
|
||||
|
||||
fn run_one_agent_inner(
|
||||
_store: &mut Store,
|
||||
agent_name: &str,
|
||||
def: &super::defs::AgentDef,
|
||||
agent_batch: super::prompts::AgentBatch,
|
||||
_llm_tag: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<AgentResult, String> {
|
||||
let prompt_kb = agent_batch.prompt.len() / 1024;
|
||||
let tools_desc = if def.tools.is_empty() { "no tools".into() }
|
||||
else { format!("{} tools", def.tools.len()) };
|
||||
log(&format!("prompt {}KB, model={}, {}, {} nodes",
|
||||
prompt_kb, def.model, tools_desc, agent_batch.node_keys.len()));
|
||||
|
||||
// Guard: reject prompts that would exceed model context.
|
||||
// Rough estimate: 1 token ≈ 4 bytes. Reserve 16K tokens for output.
|
||||
let max_prompt_bytes = 800_000; // ~200K tokens, leaves room for output
|
||||
if agent_batch.prompt.len() > max_prompt_bytes {
|
||||
// Log the oversized prompt for debugging
|
||||
let oversize_dir = store::memory_dir().join("llm-logs").join("oversized");
|
||||
fs::create_dir_all(&oversize_dir).ok();
|
||||
let oversize_path = oversize_dir.join(format!("{}-{}.txt",
|
||||
agent_name, store::compact_timestamp()));
|
||||
let header = format!("=== OVERSIZED PROMPT ===\nagent: {}\nsize: {}KB (max {}KB)\nnodes: {:?}\n\n",
|
||||
agent_name, prompt_kb, max_prompt_bytes / 1024, agent_batch.node_keys);
|
||||
fs::write(&oversize_path, format!("{}{}", header, agent_batch.prompt)).ok();
|
||||
log(&format!("oversized prompt logged to {}", oversize_path.display()));
|
||||
|
||||
return Err(format!(
|
||||
"prompt too large: {}KB (max {}KB) — seed nodes may be oversized",
|
||||
prompt_kb, max_prompt_bytes / 1024,
|
||||
));
|
||||
}
|
||||
for key in &agent_batch.node_keys {
|
||||
log(&format!(" node: {}", key));
|
||||
}
|
||||
|
||||
log(&format!("=== PROMPT ===\n\n{}\n\n=== CALLING LLM ===", agent_batch.prompt));
|
||||
|
||||
let output = llm::call_for_def(def, &agent_batch.prompt, log)?;
|
||||
|
||||
|
||||
Ok(AgentResult {
|
||||
output,
|
||||
node_keys: agent_batch.node_keys,
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Conversation fragment selection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Select conversation fragments (per-segment) for the observation extractor.
|
||||
/// Uses the transcript-progress.capnp log for dedup — no stub nodes.
|
||||
/// Does NOT pre-mark segments; caller must call mark_observation_done() after success.
|
||||
pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> {
|
||||
let projects = crate::config::get().projects_dir.clone();
|
||||
if !projects.exists() { return Vec::new(); }
|
||||
|
||||
let store = match crate::store::Store::load() {
|
||||
Ok(s) => s,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
|
||||
let mut jsonl_files: Vec<PathBuf> = Vec::new();
|
||||
if let Ok(dirs) = fs::read_dir(&projects) {
|
||||
for dir in dirs.filter_map(|e| e.ok()) {
|
||||
if !dir.path().is_dir() { continue; }
|
||||
if let Ok(files) = fs::read_dir(dir.path()) {
|
||||
for f in files.filter_map(|e| e.ok()) {
|
||||
let p = f.path();
|
||||
if p.extension().map(|x| x == "jsonl").unwrap_or(false)
|
||||
&& let Ok(meta) = p.metadata()
|
||||
&& meta.len() > 50_000 {
|
||||
jsonl_files.push(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collect unmined segments across all transcripts
|
||||
let mut candidates: Vec<(String, String)> = Vec::new();
|
||||
for path in &jsonl_files {
|
||||
let path_str = path.to_string_lossy();
|
||||
let messages = match super::enrich::extract_conversation(&path_str) {
|
||||
Ok(m) => m,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let session_id = path.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
|
||||
let segments = super::enrich::split_on_compaction(messages);
|
||||
for (seg_idx, segment) in segments.into_iter().enumerate() {
|
||||
if store.is_segment_mined(&session_id, seg_idx as u32, "observation") {
|
||||
continue;
|
||||
}
|
||||
// Skip segments with too few assistant messages (rate limits, errors)
|
||||
let assistant_msgs = segment.iter()
|
||||
.filter(|(_, role, _, _)| role == "assistant")
|
||||
.count();
|
||||
if assistant_msgs < 2 {
|
||||
continue;
|
||||
}
|
||||
// Skip segments that are just rate limit errors
|
||||
let has_rate_limit = segment.iter().any(|(_, _, text, _)|
|
||||
text.contains("hit your limit") || text.contains("rate limit"));
|
||||
if has_rate_limit && assistant_msgs < 3 {
|
||||
continue;
|
||||
}
|
||||
let text = format_segment(&segment);
|
||||
if text.len() < 500 {
|
||||
continue;
|
||||
}
|
||||
const CHUNK_SIZE: usize = 50_000;
|
||||
const OVERLAP: usize = 10_000;
|
||||
if text.len() <= CHUNK_SIZE {
|
||||
let id = format!("{}.{}", session_id, seg_idx);
|
||||
candidates.push((id, text));
|
||||
} else {
|
||||
// Split on line boundaries with overlap
|
||||
let lines: Vec<&str> = text.lines().collect();
|
||||
let mut start_line = 0;
|
||||
let mut chunk_idx = 0;
|
||||
while start_line < lines.len() {
|
||||
let mut end_line = start_line;
|
||||
let mut size = 0;
|
||||
while end_line < lines.len() && size < CHUNK_SIZE {
|
||||
size += lines[end_line].len() + 1;
|
||||
end_line += 1;
|
||||
}
|
||||
let chunk: String = lines[start_line..end_line].join("\n");
|
||||
let id = format!("{}.{}.{}", session_id, seg_idx, chunk_idx);
|
||||
candidates.push((id, chunk));
|
||||
if end_line >= lines.len() { break; }
|
||||
// Back up by overlap amount for next chunk
|
||||
let mut overlap_size = 0;
|
||||
let mut overlap_start = end_line;
|
||||
while overlap_start > start_line && overlap_size < OVERLAP {
|
||||
overlap_start -= 1;
|
||||
overlap_size += lines[overlap_start].len() + 1;
|
||||
}
|
||||
start_line = overlap_start;
|
||||
chunk_idx += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if candidates.len() >= n { break; }
|
||||
}
|
||||
|
||||
candidates.truncate(n);
|
||||
candidates
|
||||
}
|
||||
|
||||
/// Mark observation segments as successfully mined (call AFTER the agent succeeds).
|
||||
pub fn mark_observation_done(fragment_ids: &[String]) {
|
||||
let mut store = match crate::store::Store::load() {
|
||||
Ok(s) => s,
|
||||
Err(_) => return,
|
||||
};
|
||||
for id in fragment_ids {
|
||||
if let Some((session_id, seg_str)) = id.rsplit_once('.')
|
||||
&& let Ok(seg) = seg_str.parse::<u32>() {
|
||||
let _ = store.mark_segment_mined(session_id, seg, "observation");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a segment's messages into readable text for the observation agent.
|
||||
fn format_segment(messages: &[(usize, String, String, String)]) -> String {
|
||||
let cfg = crate::config::get();
|
||||
let mut fragments = Vec::new();
|
||||
|
||||
for (_, role, text, ts) in messages {
|
||||
let min_len = if role == "user" { 5 } else { 10 };
|
||||
if text.len() <= min_len { continue; }
|
||||
|
||||
let name = if role == "user" { &cfg.user_name } else { &cfg.assistant_name };
|
||||
if ts.is_empty() {
|
||||
fragments.push(format!("**{}:** {}", name, text));
|
||||
} else {
|
||||
fragments.push(format!("**{}** {}: {}", name, &ts[..ts.len().min(19)], text));
|
||||
}
|
||||
}
|
||||
fragments.join("\n\n")
|
||||
}
|
||||
73
src/subconscious/agents/llm.rs
Normal file
73
src/subconscious/agents/llm.rs
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// LLM utilities: model invocation via direct API
|
||||
|
||||
use crate::store::Store;
|
||||
|
||||
use regex::Regex;
|
||||
use std::fs;
|
||||
|
||||
/// Simple LLM call for non-agent uses (audit, digest, compare).
|
||||
/// Logs to llm-logs/{caller}/ file.
|
||||
pub(crate) fn call_simple(caller: &str, prompt: &str) -> Result<String, String> {
|
||||
let log_dir = crate::store::memory_dir().join("llm-logs").join(caller);
|
||||
fs::create_dir_all(&log_dir).ok();
|
||||
let log_path = log_dir.join(format!("{}.txt", crate::store::compact_timestamp()));
|
||||
|
||||
use std::io::Write;
|
||||
let log = move |msg: &str| {
|
||||
if let Ok(mut f) = fs::OpenOptions::new()
|
||||
.create(true).append(true).open(&log_path)
|
||||
{
|
||||
let _ = writeln!(f, "{}", msg);
|
||||
}
|
||||
};
|
||||
|
||||
super::api::call_api_with_tools_sync(caller, prompt, None, &log)
|
||||
}
|
||||
|
||||
/// Call a model using an agent definition's configuration.
|
||||
pub(crate) fn call_for_def(
|
||||
def: &super::defs::AgentDef,
|
||||
prompt: &str,
|
||||
log: &(dyn Fn(&str) + Sync),
|
||||
) -> Result<String, String> {
|
||||
super::api::call_api_with_tools_sync(&def.agent, prompt, def.temperature, log)
|
||||
}
|
||||
|
||||
/// Parse a JSON response, handling markdown fences.
|
||||
pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, String> {
|
||||
let cleaned = response.trim();
|
||||
let cleaned = cleaned.strip_prefix("```json").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.strip_prefix("```").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.strip_suffix("```").unwrap_or(cleaned);
|
||||
let cleaned = cleaned.trim();
|
||||
|
||||
if let Ok(v) = serde_json::from_str(cleaned) {
|
||||
return Ok(v);
|
||||
}
|
||||
|
||||
// Try to find JSON object or array
|
||||
let re_obj = Regex::new(r"\{[\s\S]*\}").unwrap();
|
||||
let re_arr = Regex::new(r"\[[\s\S]*\]").unwrap();
|
||||
|
||||
if let Some(m) = re_obj.find(cleaned)
|
||||
&& let Ok(v) = serde_json::from_str(m.as_str()) {
|
||||
return Ok(v);
|
||||
}
|
||||
if let Some(m) = re_arr.find(cleaned)
|
||||
&& let Ok(v) = serde_json::from_str(m.as_str()) {
|
||||
return Ok(v);
|
||||
}
|
||||
|
||||
let preview = crate::util::first_n_chars(cleaned, 200);
|
||||
Err(format!("no valid JSON in response: {preview}..."))
|
||||
}
|
||||
|
||||
/// Get all keys for prompt context.
|
||||
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
||||
let mut keys: Vec<String> = store.nodes.keys()
|
||||
.cloned()
|
||||
.collect();
|
||||
keys.sort();
|
||||
keys.truncate(200);
|
||||
keys
|
||||
}
|
||||
28
src/subconscious/agents/mod.rs
Normal file
28
src/subconscious/agents/mod.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// Agent layer: LLM-powered operations on the memory graph
|
||||
//
|
||||
// Everything here calls external models (Sonnet, Haiku) or orchestrates
|
||||
// sequences of such calls. The core graph infrastructure (store, graph,
|
||||
// spectral, search, similarity) lives at the crate root.
|
||||
//
|
||||
// llm — model invocation, response parsing
|
||||
// prompts — prompt generation from store data
|
||||
// defs — agent file loading and placeholder resolution
|
||||
// audit — link quality review via Sonnet
|
||||
// consolidate — full consolidation pipeline
|
||||
// knowledge — agent execution, conversation fragment selection
|
||||
// enrich — journal enrichment, experience mining
|
||||
// digest — episodic digest generation (daily/weekly/monthly)
|
||||
// daemon — background job scheduler
|
||||
// transcript — shared JSONL transcript parsing
|
||||
|
||||
pub mod transcript;
|
||||
pub mod api;
|
||||
pub mod llm;
|
||||
pub mod prompts;
|
||||
pub mod defs;
|
||||
pub mod audit;
|
||||
pub mod consolidate;
|
||||
pub mod knowledge;
|
||||
pub mod enrich;
|
||||
pub mod digest;
|
||||
pub mod daemon;
|
||||
431
src/subconscious/agents/prompts.rs
Normal file
431
src/subconscious/agents/prompts.rs
Normal file
|
|
@ -0,0 +1,431 @@
|
|||
// Agent prompt generation and formatting. Presentation logic —
|
||||
// builds text prompts from store data for consolidation agents.
|
||||
|
||||
use crate::store::Store;
|
||||
use crate::graph::Graph;
|
||||
|
||||
use crate::neuro::{
|
||||
ReplayItem,
|
||||
replay_queue, detect_interference,
|
||||
};
|
||||
|
||||
/// Result of building an agent prompt — includes both the prompt text
|
||||
/// and the keys of nodes selected for processing, so the caller can
|
||||
/// record visits after successful completion.
|
||||
pub struct AgentBatch {
|
||||
pub prompt: String,
|
||||
pub node_keys: Vec<String>,
|
||||
}
|
||||
|
||||
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
|
||||
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
|
||||
let path = crate::config::get().prompts_dir.join(format!("{}.md", name));
|
||||
let mut content = std::fs::read_to_string(&path)
|
||||
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
|
||||
for (placeholder, data) in replacements {
|
||||
content = content.replace(placeholder, data);
|
||||
}
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
pub fn format_topology_header(graph: &Graph) -> String {
|
||||
let sigma = graph.small_world_sigma();
|
||||
let alpha = graph.degree_power_law_exponent();
|
||||
let gini = graph.degree_gini();
|
||||
let avg_cc = graph.avg_clustering_coefficient();
|
||||
let n = graph.nodes().len();
|
||||
let e = graph.edge_count();
|
||||
|
||||
// Identify saturated hubs — nodes with degree well above threshold
|
||||
let threshold = graph.hub_threshold();
|
||||
let mut hubs: Vec<_> = graph.nodes().iter()
|
||||
.map(|k| (k.clone(), graph.degree(k)))
|
||||
.filter(|(_, d)| *d >= threshold)
|
||||
.collect();
|
||||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
hubs.truncate(15);
|
||||
|
||||
let hub_list = if hubs.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
let lines: Vec<String> = hubs.iter()
|
||||
.map(|(k, d)| format!(" - {} (degree {})", k, d))
|
||||
.collect();
|
||||
format!(
|
||||
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
|
||||
The following nodes are already over-connected. Adding more links\n\
|
||||
to them makes the graph worse (star topology). Find lateral\n\
|
||||
connections between peripheral nodes instead.\n\n{}\n\n\
|
||||
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
|
||||
lines.join("\n"))
|
||||
};
|
||||
|
||||
format!(
|
||||
"## Current graph topology\n\
|
||||
Nodes: {} Edges: {} Communities: {}\n\
|
||||
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
|
||||
Avg clustering coefficient: {:.4}\n\n\
|
||||
{}\
|
||||
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
|
||||
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
|
||||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
|
||||
}
|
||||
|
||||
pub fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
|
||||
let hub_thresh = graph.hub_threshold();
|
||||
let mut out = String::new();
|
||||
for item in items {
|
||||
let node = match store.nodes.get(&item.key) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
out.push_str(&format!("## {} \n", item.key));
|
||||
out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ",
|
||||
item.priority, item.cc, item.emotion));
|
||||
out.push_str(&format!("Interval: {}d\n",
|
||||
node.spaced_repetition_interval));
|
||||
if item.outlier_score > 0.0 {
|
||||
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
|
||||
item.classification, item.outlier_score));
|
||||
}
|
||||
|
||||
if let Some(community) = node.community_id {
|
||||
out.push_str(&format!("Community: {} ", community));
|
||||
}
|
||||
let deg = graph.degree(&item.key);
|
||||
let cc = graph.clustering_coefficient(&item.key);
|
||||
|
||||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
let hub_links = neighbors.iter()
|
||||
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
||||
.count();
|
||||
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
||||
let is_hub = deg >= hub_thresh;
|
||||
|
||||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
||||
if is_hub {
|
||||
out.push_str(" ← THIS IS A HUB");
|
||||
} else if hub_ratio > 0.6 {
|
||||
out.push_str(" ← mostly hub-connected, needs lateral links");
|
||||
}
|
||||
out.push('\n');
|
||||
|
||||
let hits = crate::counters::search_hit_count(&item.key);
|
||||
if hits > 0 {
|
||||
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep\n", hits));
|
||||
}
|
||||
|
||||
// Full content — the agent needs to see everything to do quality work
|
||||
let content = &node.content;
|
||||
out.push_str(&format!("\nContent:\n{}\n\n", content));
|
||||
|
||||
// Neighbors
|
||||
let neighbors = graph.neighbors(&item.key);
|
||||
if !neighbors.is_empty() {
|
||||
out.push_str("Neighbors:\n");
|
||||
for (n, strength) in neighbors.iter().take(15) {
|
||||
let n_cc = graph.clustering_coefficient(n);
|
||||
let n_community = store.nodes.get(n.as_str())
|
||||
.and_then(|n| n.community_id);
|
||||
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
|
||||
n, strength, n_cc));
|
||||
if let Some(c) = n_community {
|
||||
out.push_str(&format!(", c{}", c));
|
||||
}
|
||||
out.push_str(")\n");
|
||||
}
|
||||
}
|
||||
|
||||
out.push_str("\n---\n\n");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub fn format_health_section(store: &Store, graph: &Graph) -> String {
|
||||
use crate::graph;
|
||||
|
||||
let health = graph::health_report(graph, store);
|
||||
|
||||
let mut out = health;
|
||||
out.push_str("\n\n## Weight distribution\n");
|
||||
|
||||
// Weight histogram
|
||||
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
|
||||
for node in store.nodes.values() {
|
||||
let bucket = ((node.weight * 10.0) as usize).min(9);
|
||||
buckets[bucket] += 1;
|
||||
}
|
||||
for (i, &count) in buckets.iter().enumerate() {
|
||||
let lo = i as f32 / 10.0;
|
||||
let hi = (i + 1) as f32 / 10.0;
|
||||
let bar = "█".repeat((count as usize) / 10);
|
||||
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
|
||||
}
|
||||
|
||||
// Near-prune nodes
|
||||
let near_prune: Vec<_> = store.nodes.iter()
|
||||
.filter(|(_, n)| n.weight < 0.15)
|
||||
.map(|(k, n)| (k.clone(), n.weight))
|
||||
.collect();
|
||||
if !near_prune.is_empty() {
|
||||
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
|
||||
for (k, w) in near_prune.iter().take(20) {
|
||||
out.push_str(&format!(" [{:.3}] {}\n", w, k));
|
||||
}
|
||||
}
|
||||
|
||||
// Community sizes
|
||||
let communities = graph.communities();
|
||||
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
|
||||
for (key, &label) in communities {
|
||||
comm_sizes.entry(label).or_default().push(key.clone());
|
||||
}
|
||||
let mut sizes: Vec<_> = comm_sizes.iter()
|
||||
.map(|(id, members)| (*id, members.len(), members.clone()))
|
||||
.collect();
|
||||
sizes.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
out.push_str("\n## Largest communities\n");
|
||||
for (id, size, members) in sizes.iter().take(10) {
|
||||
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
|
||||
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
|
||||
out.push_str(&sample.join(", "));
|
||||
if *size > 5 { out.push_str(", ..."); }
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub fn format_pairs_section(
|
||||
pairs: &[(String, String, f32)],
|
||||
store: &Store,
|
||||
graph: &Graph,
|
||||
) -> String {
|
||||
let mut out = String::new();
|
||||
let communities = graph.communities();
|
||||
|
||||
for (a, b, sim) in pairs {
|
||||
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
|
||||
|
||||
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||||
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||||
|
||||
// Node A
|
||||
out.push_str(&format!("\n### {} ({})\n", a, ca));
|
||||
if let Some(node) = store.nodes.get(a) {
|
||||
let content = crate::util::truncate(&node.content, 500, "...");
|
||||
out.push_str(&format!("Weight: {:.2}\n{}\n",
|
||||
node.weight, content));
|
||||
}
|
||||
|
||||
// Node B
|
||||
out.push_str(&format!("\n### {} ({})\n", b, cb));
|
||||
if let Some(node) = store.nodes.get(b) {
|
||||
let content = crate::util::truncate(&node.content, 500, "...");
|
||||
out.push_str(&format!("Weight: {:.2}\n{}\n",
|
||||
node.weight, content));
|
||||
}
|
||||
|
||||
out.push_str("\n---\n\n");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub fn format_rename_candidates(store: &Store, count: usize) -> (Vec<String>, String) {
|
||||
let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
|
||||
.filter(|(key, _)| {
|
||||
if key.starts_with("_facts-") { return true; }
|
||||
if key.len() < 60 { return false; }
|
||||
if key.starts_with("journal#j-") { return true; }
|
||||
if key.starts_with("_mined-transcripts#f-") { return true; }
|
||||
false
|
||||
})
|
||||
.map(|(k, n)| (k.as_str(), n))
|
||||
.collect();
|
||||
|
||||
// Deprioritize nodes actively found by search — renaming them would
|
||||
// break working queries. Sort by: search hits (ascending), then
|
||||
// least-recently visited. Nodes with many hits sink to the bottom.
|
||||
let hit_counts = crate::counters::all_search_hits();
|
||||
let hit_map: std::collections::HashMap<&str, u64> = hit_counts.iter()
|
||||
.map(|(k, v)| (k.as_str(), *v))
|
||||
.collect();
|
||||
candidates.sort_by_key(|(key, _)| {
|
||||
let hits = hit_map.get(key).copied().unwrap_or(0);
|
||||
(hits, store.last_visited(key, "rename"))
|
||||
});
|
||||
candidates.truncate(count);
|
||||
|
||||
let keys: Vec<String> = candidates.iter().map(|(k, _)| k.to_string()).collect();
|
||||
|
||||
let mut out = String::new();
|
||||
out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
|
||||
candidates.len(),
|
||||
store.nodes.keys().filter(|k| k.starts_with("_facts-") ||
|
||||
(k.len() >= 60 &&
|
||||
(k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-")))).count()));
|
||||
|
||||
for (key, node) in &candidates {
|
||||
out.push_str(&format!("### {}\n", key));
|
||||
let created = if node.timestamp > 0 {
|
||||
crate::store::format_datetime(node.timestamp)
|
||||
} else {
|
||||
"unknown".to_string()
|
||||
};
|
||||
out.push_str(&format!("Created: {}\n", created));
|
||||
|
||||
let hits = hit_map.get(key).copied().unwrap_or(0);
|
||||
if hits > 0 {
|
||||
out.push_str(&format!("Search hits: {} ← actively found by search, prefer to keep current name\n", hits));
|
||||
}
|
||||
|
||||
let content = &node.content;
|
||||
if content.len() > 800 {
|
||||
let truncated = crate::util::truncate(content, 800, "\n[...]");
|
||||
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
|
||||
content.len(), truncated));
|
||||
} else {
|
||||
out.push_str(&format!("\nContent:\n{}\n\n", content));
|
||||
}
|
||||
|
||||
out.push_str("---\n\n");
|
||||
}
|
||||
(keys, out)
|
||||
}
|
||||
|
||||
/// Get split candidates sorted by size (largest first)
|
||||
pub fn split_candidates(store: &Store) -> Vec<String> {
|
||||
let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
|
||||
.filter(|(key, node)| {
|
||||
!key.starts_with('_')
|
||||
&& !node.deleted
|
||||
&& matches!(node.node_type, crate::store::NodeType::Semantic)
|
||||
})
|
||||
.map(|(k, n)| (k.as_str(), n.content.len()))
|
||||
.collect();
|
||||
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
candidates.into_iter().map(|(k, _)| k.to_string()).collect()
|
||||
}
|
||||
|
||||
/// Format a single node for split-plan prompt (phase 1)
|
||||
pub fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
|
||||
let communities = graph.communities();
|
||||
let node = match store.nodes.get(key) {
|
||||
Some(n) => n,
|
||||
None => return format!("Node '{}' not found\n", key),
|
||||
};
|
||||
|
||||
let mut out = String::new();
|
||||
out.push_str(&format!("### {} ({} chars)\n", key, node.content.len()));
|
||||
|
||||
// Show neighbors grouped by community
|
||||
let neighbors = graph.neighbors(key);
|
||||
if !neighbors.is_empty() {
|
||||
let mut by_community: std::collections::BTreeMap<String, Vec<(&str, f32)>> =
|
||||
std::collections::BTreeMap::new();
|
||||
for (nkey, strength) in &neighbors {
|
||||
let comm = communities.get(nkey.as_str())
|
||||
.map(|c| format!("c{}", c))
|
||||
.unwrap_or_else(|| "unclustered".into());
|
||||
by_community.entry(comm)
|
||||
.or_default()
|
||||
.push((nkey.as_str(), *strength));
|
||||
}
|
||||
|
||||
out.push_str("\nNeighbors by community:\n");
|
||||
for (comm, members) in &by_community {
|
||||
out.push_str(&format!(" {} ({}):", comm, members.len()));
|
||||
for (nkey, strength) in members.iter().take(5) {
|
||||
out.push_str(&format!(" {}({:.2})", nkey, strength));
|
||||
}
|
||||
if members.len() > 5 {
|
||||
out.push_str(&format!(" +{} more", members.len() - 5));
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
// Full content
|
||||
out.push_str(&format!("\nContent:\n{}\n\n", node.content));
|
||||
out.push_str("---\n\n");
|
||||
out
|
||||
}
|
||||
|
||||
/// Build split-plan prompt for a single node (phase 1).
|
||||
/// Uses the split.agent template with placeholders resolved for the given key.
|
||||
pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
|
||||
let def = super::defs::get_def("split")
|
||||
.ok_or_else(|| "no split.agent file".to_string())?;
|
||||
let graph = store.build_graph();
|
||||
// Override the query — we have a specific key to split
|
||||
let keys = vec![key.to_string()];
|
||||
let (prompt, _) = super::defs::resolve_placeholders(&def.prompt, store, &graph, &keys, 1);
|
||||
Ok(prompt)
|
||||
}
|
||||
|
||||
/// Build split-extract prompt for one child (phase 2)
|
||||
pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
|
||||
let parent_content = store.nodes.get(parent_key)
|
||||
.map(|n| n.content.as_str())
|
||||
.ok_or_else(|| format!("No node '{}'", parent_key))?;
|
||||
load_prompt("split-extract", &[
|
||||
("{{CHILD_KEY}}", child_key),
|
||||
("{{CHILD_DESC}}", child_desc),
|
||||
("{{CHILD_SECTIONS}}", child_sections),
|
||||
("{{PARENT_CONTENT}}", parent_content),
|
||||
])
|
||||
}
|
||||
|
||||
/// Show consolidation batch status or generate an agent prompt.
|
||||
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
|
||||
if auto {
|
||||
let batch = agent_prompt(store, "replay", count)?;
|
||||
println!("{}", batch.prompt);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let graph = store.build_graph();
|
||||
let items = replay_queue(store, count);
|
||||
|
||||
if items.is_empty() {
|
||||
println!("No nodes to consolidate.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Consolidation batch ({} nodes):\n", items.len());
|
||||
for item in &items {
|
||||
let node_type = store.nodes.get(&item.key)
|
||||
.map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
|
||||
.unwrap_or("?");
|
||||
println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
|
||||
item.priority, item.key, item.cc, item.interval_days, node_type);
|
||||
}
|
||||
|
||||
let pairs = detect_interference(store, &graph, 0.6);
|
||||
if !pairs.is_empty() {
|
||||
println!("\nInterfering pairs ({}):", pairs.len());
|
||||
for (a, b, sim) in pairs.iter().take(5) {
|
||||
println!(" [{:.3}] {} ↔ {}", sim, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nAgent prompts:");
|
||||
println!(" --auto Generate replay agent prompt");
|
||||
println!(" --agent replay Replay agent (schema assimilation)");
|
||||
println!(" --agent linker Linker agent (relational binding)");
|
||||
println!(" --agent separator Separator agent (pattern separation)");
|
||||
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
|
||||
println!(" --agent health Health agent (synaptic homeostasis)");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate a specific agent prompt with filled-in data.
|
||||
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<AgentBatch, String> {
|
||||
let def = super::defs::get_def(agent)
|
||||
.ok_or_else(|| format!("Unknown agent: {}", agent))?;
|
||||
super::defs::run_agent(store, &def, count, &Default::default())
|
||||
}
|
||||
94
src/subconscious/agents/transcript.rs
Normal file
94
src/subconscious/agents/transcript.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
// Shared JSONL transcript parsing
|
||||
//
|
||||
// Three agents (enrich, fact_mine, knowledge) all parse Claude Code JSONL
|
||||
// transcripts. This module provides the shared core: parse each line, extract
|
||||
// message type, text content from string-or-array blocks, timestamp, and
|
||||
// user type. Callers filter and transform as needed.
|
||||
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// A single message extracted from a JSONL transcript.
|
||||
pub struct TranscriptMessage {
|
||||
/// 1-based line number in the JSONL file.
|
||||
pub line: usize,
|
||||
/// Raw role: "user" or "assistant".
|
||||
pub role: String,
|
||||
/// Extracted text content (trimmed, blocks joined with newlines).
|
||||
pub text: String,
|
||||
/// ISO timestamp from the message, or empty string.
|
||||
pub timestamp: String,
|
||||
/// For user messages: "external", "internal", etc. None for assistant.
|
||||
pub user_type: Option<String>,
|
||||
}
|
||||
|
||||
/// Parse a JSONL transcript into structured messages.
|
||||
///
|
||||
/// Extracts all user and assistant messages. Content blocks of type "text"
|
||||
/// are joined; tool_use, tool_result, thinking blocks are skipped.
|
||||
/// System-reminder blocks are filtered out.
|
||||
pub fn parse_transcript(path: &Path) -> Result<Vec<TranscriptMessage>, String> {
|
||||
let content = fs::read_to_string(path)
|
||||
.map_err(|e| format!("read {}: {}", path.display(), e))?;
|
||||
|
||||
let mut messages = Vec::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
let Ok(obj) = serde_json::from_str::<serde_json::Value>(line) else { continue };
|
||||
|
||||
let msg_type = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if msg_type != "user" && msg_type != "assistant" { continue; }
|
||||
|
||||
let timestamp = obj.get("timestamp")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let user_type = obj.get("userType")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let Some(text) = extract_text_content(&obj) else { continue };
|
||||
let text = text.trim().to_string();
|
||||
if text.is_empty() { continue; }
|
||||
|
||||
messages.push(TranscriptMessage {
|
||||
line: i + 1,
|
||||
role: msg_type.to_string(),
|
||||
text,
|
||||
timestamp,
|
||||
user_type,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Extract text content from a JSONL message object.
|
||||
///
|
||||
/// Handles both string content and array-of-blocks content (filtering to
|
||||
/// type="text" blocks only). Strips `<system-reminder>` tags.
|
||||
fn extract_text_content(obj: &serde_json::Value) -> Option<String> {
|
||||
let msg = obj.get("message").unwrap_or(obj);
|
||||
let content = msg.get("content")?;
|
||||
|
||||
let text = match content {
|
||||
serde_json::Value::String(s) => s.clone(),
|
||||
serde_json::Value::Array(arr) => {
|
||||
let texts: Vec<&str> = arr.iter()
|
||||
.filter_map(|block| {
|
||||
let block_type = block.get("type").and_then(|v| v.as_str())?;
|
||||
if block_type != "text" { return None; }
|
||||
let t = block.get("text").and_then(|v| v.as_str())?;
|
||||
// Skip system-reminder blocks entirely
|
||||
if t.contains("<system-reminder>") { return None; }
|
||||
Some(t)
|
||||
})
|
||||
.collect();
|
||||
if texts.is_empty() { return None; }
|
||||
texts.join("\n")
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(text)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue