// knowledge.rs — knowledge agent action parsing, depth tracking, and convergence loop // // Agent prompts live in agents/*.agent files, dispatched via defs.rs. // This module handles: // - Action parsing (WRITE_NODE, LINK, REFINE from LLM output) // - Inference depth tracking (prevents runaway abstraction) // - Action application (write to store with provenance) // - Convergence loop (sequences agents, measures graph stability) // - Conversation fragment selection (for observation agent) use crate::graph::Graph; use super::llm; use crate::spectral; use crate::store::{self, Store, new_relation, RelationType}; use regex::Regex; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; // --------------------------------------------------------------------------- // Action types // --------------------------------------------------------------------------- #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Action { pub kind: ActionKind, pub confidence: Confidence, pub weight: f64, pub depth: i32, pub applied: Option, pub rejected_reason: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ActionKind { WriteNode { key: String, content: String, covers: Vec, }, Link { source: String, target: String, }, Refine { key: String, content: String, }, } #[derive(Debug, Clone, Copy, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum Confidence { High, Medium, Low, } impl Confidence { fn weight(self) -> f64 { match self { Self::High => 1.0, Self::Medium => 0.6, Self::Low => 0.3, } } fn value(self) -> f64 { match self { Self::High => 0.9, Self::Medium => 0.6, Self::Low => 0.3, } } fn parse(s: &str) -> Self { match s.to_lowercase().as_str() { "high" => Self::High, "low" => Self::Low, _ => Self::Medium, } } } // --------------------------------------------------------------------------- // Action parsing // --------------------------------------------------------------------------- pub fn parse_write_nodes(text: &str) -> Vec { let re = Regex::new(r"(?s)WRITE_NODE\s+(\S+)\s*\n(.*?)END_NODE").unwrap(); let conf_re = Regex::new(r"(?i)CONFIDENCE:\s*(high|medium|low)").unwrap(); let covers_re = Regex::new(r"COVERS:\s*(.+)").unwrap(); re.captures_iter(text) .map(|cap| { let key = cap[1].to_string(); let mut content = cap[2].trim().to_string(); let confidence = conf_re .captures(&content) .map(|c| Confidence::parse(&c[1])) .unwrap_or(Confidence::Medium); content = conf_re.replace(&content, "").trim().to_string(); let covers: Vec = covers_re .captures(&content) .map(|c| c[1].split(',').map(|s| s.trim().to_string()).collect()) .unwrap_or_default(); content = covers_re.replace(&content, "").trim().to_string(); Action { weight: confidence.weight(), kind: ActionKind::WriteNode { key, content, covers }, confidence, depth: 0, applied: None, rejected_reason: None, } }) .collect() } pub fn parse_links(text: &str) -> Vec { let re = Regex::new(r"(?m)^LINK\s+(\S+)\s+(\S+)").unwrap(); re.captures_iter(text) .map(|cap| Action { kind: ActionKind::Link { source: cap[1].to_string(), target: cap[2].to_string(), }, confidence: Confidence::Low, weight: 0.3, depth: -1, applied: None, rejected_reason: None, }) .collect() } pub fn parse_refines(text: &str) -> Vec { let re = Regex::new(r"(?s)REFINE\s+(\S+)\s*\n(.*?)END_REFINE").unwrap(); re.captures_iter(text) .map(|cap| { let key = cap[1].trim_matches('*').trim().to_string(); Action { kind: ActionKind::Refine { key, content: cap[2].trim().to_string(), }, confidence: Confidence::Medium, weight: 0.7, depth: 0, applied: None, rejected_reason: None, } }) .collect() } pub fn parse_all_actions(text: &str) -> Vec { let mut actions = parse_write_nodes(text); actions.extend(parse_links(text)); actions.extend(parse_refines(text)); actions } pub fn count_no_ops(text: &str) -> usize { let no_conn = Regex::new(r"\bNO_CONNECTION\b").unwrap().find_iter(text).count(); let affirm = Regex::new(r"\bAFFIRM\b").unwrap().find_iter(text).count(); let no_extract = Regex::new(r"\bNO_EXTRACTION\b").unwrap().find_iter(text).count(); no_conn + affirm + no_extract } // --------------------------------------------------------------------------- // Inference depth tracking // --------------------------------------------------------------------------- const DEPTH_DB_KEY: &str = "_knowledge-depths"; #[derive(Default)] pub struct DepthDb { depths: HashMap, } impl DepthDb { pub fn load(store: &Store) -> Self { let depths = store.nodes.get(DEPTH_DB_KEY) .and_then(|n| serde_json::from_str(&n.content).ok()) .unwrap_or_default(); Self { depths } } pub fn save(&self, store: &mut Store) { if let Ok(json) = serde_json::to_string(&self.depths) { store.upsert_provenance(DEPTH_DB_KEY, &json, store::Provenance::AgentKnowledgeObservation).ok(); } } pub fn get(&self, key: &str) -> i32 { self.depths.get(key).copied().unwrap_or(0) } pub fn set(&mut self, key: String, depth: i32) { self.depths.insert(key, depth); } } /// Agent base depths: observation=1, extractor=2, connector=3 fn agent_base_depth(agent: &str) -> Option { match agent { "observation" => Some(1), "extractor" => Some(2), "connector" => Some(3), "challenger" => None, _ => Some(2), } } pub fn compute_action_depth(db: &DepthDb, action: &Action, agent: &str) -> i32 { match &action.kind { ActionKind::Link { .. } => -1, ActionKind::Refine { key, .. } => db.get(key), ActionKind::WriteNode { covers, .. } => { if !covers.is_empty() { covers.iter().map(|k| db.get(k)).max().unwrap_or(0) + 1 } else { agent_base_depth(agent).unwrap_or(2) } } } } /// Confidence threshold that scales with inference depth. pub fn required_confidence(depth: i32, base: f64) -> f64 { if depth <= 0 { return 0.0; } 1.0 - (1.0 - base).powi(depth) } /// Confidence bonus from real-world use. pub fn use_bonus(use_count: u32) -> f64 { if use_count == 0 { return 0.0; } 1.0 - 1.0 / (1.0 + 0.15 * use_count as f64) } // --------------------------------------------------------------------------- // Action application // --------------------------------------------------------------------------- fn stamp_content(content: &str, agent: &str, timestamp: &str, depth: i32) -> String { format!("\n{}", agent, timestamp, depth, content) } /// Check if a link already exists between two keys. fn has_edge(store: &Store, source: &str, target: &str) -> bool { store.relations.iter().any(|r| { !r.deleted && ((r.source_key == source && r.target_key == target) || (r.source_key == target && r.target_key == source)) }) } pub fn apply_action( store: &mut Store, action: &Action, agent: &str, timestamp: &str, depth: i32, ) -> bool { let provenance = agent_provenance(agent); match &action.kind { ActionKind::WriteNode { key, content, .. } => { let stamped = stamp_content(content, agent, timestamp, depth); store.upsert_provenance(key, &stamped, provenance).is_ok() } ActionKind::Link { source, target } => { if has_edge(store, source, target) { return false; } let source_uuid = match store.nodes.get(source.as_str()) { Some(n) => n.uuid, None => return false, }; let target_uuid = match store.nodes.get(target.as_str()) { Some(n) => n.uuid, None => return false, }; let mut rel = new_relation( source_uuid, target_uuid, RelationType::Link, 0.3, source, target, ); rel.provenance = provenance; store.add_relation(rel).is_ok() } ActionKind::Refine { key, content } => { let stamped = stamp_content(content, agent, timestamp, depth); store.upsert_provenance(key, &stamped, provenance).is_ok() } } } fn agent_provenance(agent: &str) -> store::Provenance { match agent { "observation" => store::Provenance::AgentKnowledgeObservation, "extractor" | "pattern" => store::Provenance::AgentKnowledgePattern, "connector" => store::Provenance::AgentKnowledgeConnector, "challenger" => store::Provenance::AgentKnowledgeChallenger, _ => store::Provenance::Agent, } } // --------------------------------------------------------------------------- // Shared agent execution // --------------------------------------------------------------------------- /// Result of running a single agent through the common pipeline. pub struct AgentResult { pub output: String, pub actions: Vec, pub no_ops: usize, pub node_keys: Vec, } /// Run a single agent: build prompt → call LLM → store output → parse actions → record visits. /// /// This is the common pipeline shared by the knowledge loop, consolidation pipeline, /// and daemon. Callers handle action application (with or without depth tracking). pub fn run_one_agent( store: &mut Store, agent_name: &str, batch_size: usize, llm_tag: &str, ) -> Result { let def = super::defs::get_def(agent_name) .ok_or_else(|| format!("no .agent file for {}", agent_name))?; let agent_batch = super::defs::run_agent(store, &def, batch_size)?; let output = llm::call_sonnet(llm_tag, &agent_batch.prompt)?; // Store raw output for audit trail let ts = store::format_datetime(store::now_epoch()) .replace([':', '-', 'T'], ""); let report_key = format!("_{}-{}-{}", llm_tag, agent_name, ts); let provenance = agent_provenance(agent_name); store.upsert_provenance(&report_key, &output, provenance).ok(); let actions = parse_all_actions(&output); let no_ops = count_no_ops(&output); // Record visits for processed nodes if !agent_batch.node_keys.is_empty() { store.record_agent_visits(&agent_batch.node_keys, agent_name).ok(); } Ok(AgentResult { output, actions, no_ops, node_keys: agent_batch.node_keys, }) } // --------------------------------------------------------------------------- // Conversation fragment selection // --------------------------------------------------------------------------- /// Extract human-readable dialogue from a conversation JSONL fn extract_conversation_text(path: &Path, max_chars: usize) -> String { let cfg = crate::config::get(); let messages = super::transcript::parse_transcript(path).unwrap_or_default(); let mut fragments = Vec::new(); let mut total = 0; for msg in &messages { let min_len = if msg.role == "user" { 5 } else { 10 }; if msg.text.len() <= min_len { continue; } // Only include external user messages if msg.role == "user" { if msg.user_type.as_deref() != Some("external") { continue; } if msg.text.starts_with("[Request interrupted") { continue; } } let role = if msg.role == "user" { &cfg.user_name } else { &cfg.assistant_name }; fragments.push(format!("**{}:** {}", role, msg.text)); total += msg.text.len(); if total > max_chars { break; } } fragments.join("\n\n") } /// Count short user messages (dialogue turns) in a JSONL fn count_dialogue_turns(path: &Path) -> usize { let messages = super::transcript::parse_transcript(path).unwrap_or_default(); messages.iter() .filter(|m| m.role == "user" && m.user_type.as_deref() == Some("external") && m.text.len() > 5 && m.text.len() < 500 && !m.text.starts_with("[Request interrupted") && !m.text.starts_with("Implement the following")) .count() } /// Select conversation fragments for the observation extractor pub fn select_conversation_fragments(n: usize) -> Vec<(String, String)> { let projects = crate::config::get().projects_dir.clone(); if !projects.exists() { return Vec::new(); } let mut jsonl_files: Vec = Vec::new(); if let Ok(dirs) = fs::read_dir(&projects) { for dir in dirs.filter_map(|e| e.ok()) { if !dir.path().is_dir() { continue; } if let Ok(files) = fs::read_dir(dir.path()) { for f in files.filter_map(|e| e.ok()) { let p = f.path(); if p.extension().map(|x| x == "jsonl").unwrap_or(false) { if let Ok(meta) = p.metadata() { if meta.len() > 50_000 { jsonl_files.push(p); } } } } } } } let mut scored: Vec<(usize, PathBuf)> = jsonl_files.into_iter() .map(|f| (count_dialogue_turns(&f), f)) .filter(|(turns, _)| *turns >= 10) .collect(); scored.sort_by(|a, b| b.0.cmp(&a.0)); let mut fragments = Vec::new(); for (_, f) in scored.iter().take(n * 2) { let session_id = f.file_stem() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_else(|| "unknown".into()); let text = extract_conversation_text(f, 8000); if text.len() > 500 { fragments.push((session_id, text)); } if fragments.len() >= n { break; } } fragments } // --------------------------------------------------------------------------- // Convergence metrics // --------------------------------------------------------------------------- #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CycleResult { pub cycle: usize, pub timestamp: String, pub total_actions: usize, pub total_applied: usize, pub total_no_ops: usize, pub depth_rejected: usize, pub weighted_delta: f64, pub graph_metrics_before: GraphMetrics, pub graph_metrics_after: GraphMetrics, } #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct GraphMetrics { pub nodes: usize, pub edges: usize, pub cc: f64, pub sigma: f64, pub communities: usize, } impl GraphMetrics { pub fn from_graph(store: &Store, graph: &Graph) -> Self { Self { nodes: store.nodes.len(), edges: graph.edge_count(), cc: graph.avg_clustering_coefficient() as f64, sigma: graph.small_world_sigma() as f64, communities: graph.community_count(), } } } fn metric_stability(history: &[CycleResult], key: &str, window: usize) -> f64 { if history.len() < window { return f64::INFINITY; } let values: Vec = history[history.len() - window..].iter() .map(|h| match key { "sigma" => h.graph_metrics_after.sigma, "cc" => h.graph_metrics_after.cc, "communities" => h.graph_metrics_after.communities as f64, _ => 0.0, }) .collect(); if values.len() < 2 { return f64::INFINITY; } let mean = values.iter().sum::() / values.len() as f64; if mean == 0.0 { return 0.0; } let variance = values.iter().map(|v| (v - mean).powi(2)).sum::() / values.len() as f64; variance.sqrt() / mean.abs() } pub fn check_convergence(history: &[CycleResult], window: usize) -> bool { if history.len() < window { return false; } let sigma_cv = metric_stability(history, "sigma", window); let cc_cv = metric_stability(history, "cc", window); let comm_cv = metric_stability(history, "communities", window); let recent = &history[history.len() - window..]; let avg_delta = recent.iter().map(|r| r.weighted_delta).sum::() / recent.len() as f64; eprintln!("\n Convergence check (last {} cycles):", window); eprintln!(" sigma CV: {:.4} (< 0.05?)", sigma_cv); eprintln!(" CC CV: {:.4} (< 0.05?)", cc_cv); eprintln!(" community CV: {:.4} (< 0.10?)", comm_cv); eprintln!(" avg delta: {:.2} (< 1.00?)", avg_delta); let structural = sigma_cv < 0.05 && cc_cv < 0.05 && comm_cv < 0.10; let behavioral = avg_delta < 1.0; if structural && behavioral { eprintln!(" → CONVERGED"); true } else { false } } // --------------------------------------------------------------------------- // The knowledge loop // --------------------------------------------------------------------------- pub struct KnowledgeLoopConfig { pub max_cycles: usize, pub batch_size: usize, pub window: usize, pub max_depth: i32, pub confidence_base: f64, } impl Default for KnowledgeLoopConfig { fn default() -> Self { Self { max_cycles: 20, batch_size: 5, window: 5, max_depth: 4, confidence_base: 0.3, } } } pub fn run_knowledge_loop(config: &KnowledgeLoopConfig) -> Result, String> { let mut store = Store::load()?; let mut depth_db = DepthDb::load(&store); let mut history = Vec::new(); eprintln!("Knowledge Loop — fixed-point iteration"); eprintln!(" max_cycles={} batch_size={}", config.max_cycles, config.batch_size); eprintln!(" window={} max_depth={}", config.window, config.max_depth); for cycle in 1..=config.max_cycles { let result = run_cycle(cycle, config, &mut depth_db)?; history.push(result); if check_convergence(&history, config.window) { eprintln!("\n CONVERGED after {} cycles", cycle); break; } } // Save loop summary as a store node if let Some(first) = history.first() { let key = format!("_knowledge-loop-{}", first.timestamp); if let Ok(json) = serde_json::to_string_pretty(&history) { store = Store::load()?; store.upsert_provenance(&key, &json, store::Provenance::AgentKnowledgeObservation).ok(); depth_db.save(&mut store); store.save()?; } } Ok(history) } fn run_cycle( cycle_num: usize, config: &KnowledgeLoopConfig, depth_db: &mut DepthDb, ) -> Result { let timestamp = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string(); eprintln!("\n{}", "=".repeat(60)); eprintln!("CYCLE {} — {}", cycle_num, timestamp); eprintln!("{}", "=".repeat(60)); let mut store = Store::load()?; let graph = store.build_graph(); let metrics_before = GraphMetrics::from_graph(&store, &graph); eprintln!(" Before: nodes={} edges={} cc={:.3} sigma={:.3}", metrics_before.nodes, metrics_before.edges, metrics_before.cc, metrics_before.sigma); let mut all_actions = Vec::new(); let mut all_no_ops = 0; let mut depth_rejected = 0; let mut total_applied = 0; // Run each agent via .agent file dispatch let agent_names = ["observation", "extractor", "connector", "challenger"]; for agent_name in &agent_names { eprintln!("\n --- {} (n={}) ---", agent_name, config.batch_size); let result = match run_one_agent(&mut store, agent_name, config.batch_size, "knowledge") { Ok(r) => r, Err(e) => { eprintln!(" ERROR: {}", e); continue; } }; let mut actions = result.actions; all_no_ops += result.no_ops; eprintln!(" Actions: {} No-ops: {}", actions.len(), result.no_ops); let mut applied = 0; for action in &mut actions { let depth = compute_action_depth(depth_db, action, agent_name); action.depth = depth; match &action.kind { ActionKind::WriteNode { key, covers, .. } => { let conf_val = action.confidence.value(); let req = required_confidence(depth, config.confidence_base); let source_uses: Vec = covers.iter() .filter_map(|k| store.nodes.get(k).map(|n| n.uses)) .collect(); let avg_uses = if source_uses.is_empty() { 0 } else { source_uses.iter().sum::() / source_uses.len() as u32 }; let eff_conf = (conf_val + use_bonus(avg_uses)).min(1.0); if eff_conf < req { action.applied = Some(false); action.rejected_reason = Some("depth_threshold".into()); depth_rejected += 1; continue; } if depth > config.max_depth { action.applied = Some(false); action.rejected_reason = Some("max_depth".into()); depth_rejected += 1; continue; } eprintln!(" WRITE {} depth={} conf={:.2} eff={:.2} req={:.2}", key, depth, conf_val, eff_conf, req); } ActionKind::Link { source, target } => { eprintln!(" LINK {} → {}", source, target); } ActionKind::Refine { key, .. } => { eprintln!(" REFINE {} depth={}", key, depth); } } if apply_action(&mut store, action, agent_name, ×tamp, depth) { applied += 1; action.applied = Some(true); if let ActionKind::WriteNode { key, .. } | ActionKind::Refine { key, .. } = &action.kind { depth_db.set(key.clone(), depth); } } else { action.applied = Some(false); } } eprintln!(" Applied: {}/{}", applied, actions.len()); total_applied += applied; all_actions.extend(actions); } depth_db.save(&mut store); // Recompute spectral if anything changed if total_applied > 0 { eprintln!("\n Recomputing spectral embedding..."); let graph = store.build_graph(); let result = spectral::decompose(&graph, 8); let emb = spectral::to_embedding(&result); spectral::save_embedding(&emb).ok(); } let graph = store.build_graph(); let metrics_after = GraphMetrics::from_graph(&store, &graph); let weighted_delta: f64 = all_actions.iter() .filter(|a| a.applied == Some(true)) .map(|a| a.weight) .sum(); eprintln!("\n CYCLE {} SUMMARY", cycle_num); eprintln!(" Applied: {}/{} depth-rejected: {} no-ops: {}", total_applied, all_actions.len(), depth_rejected, all_no_ops); eprintln!(" Weighted delta: {:.2}", weighted_delta); Ok(CycleResult { cycle: cycle_num, timestamp, total_actions: all_actions.len(), total_applied, total_no_ops: all_no_ops, depth_rejected, weighted_delta, graph_metrics_before: metrics_before, graph_metrics_after: metrics_after, }) }