forked from kent/consciousness
Episodic nodes (journal entries, digests) are narratives that should not be split even when large. Only semantic reference nodes that have grown to cover multiple topics are candidates.
519 lines
20 KiB
Rust
519 lines
20 KiB
Rust
// Agent prompt generation and formatting. Presentation logic —
|
||
// builds text prompts from store data for consolidation agents.
|
||
|
||
use crate::store::Store;
|
||
use crate::graph::Graph;
|
||
use crate::similarity;
|
||
use crate::spectral;
|
||
|
||
use crate::neuro::{
|
||
ReplayItem, consolidation_priority,
|
||
replay_queue, replay_queue_with_graph, detect_interference,
|
||
};
|
||
|
||
/// Load a prompt template, replacing {{PLACEHOLDER}} with data
|
||
pub fn load_prompt(name: &str, replacements: &[(&str, &str)]) -> Result<String, String> {
|
||
let path = crate::config::get().prompts_dir.join(format!("{}.md", name));
|
||
let mut content = std::fs::read_to_string(&path)
|
||
.map_err(|e| format!("load prompt {}: {}", path.display(), e))?;
|
||
for (placeholder, data) in replacements {
|
||
content = content.replace(placeholder, data);
|
||
}
|
||
Ok(content)
|
||
}
|
||
|
||
/// Format topology header for agent prompts — current graph health metrics
|
||
fn format_topology_header(graph: &Graph) -> String {
|
||
let sigma = graph.small_world_sigma();
|
||
let alpha = graph.degree_power_law_exponent();
|
||
let gini = graph.degree_gini();
|
||
let avg_cc = graph.avg_clustering_coefficient();
|
||
let n = graph.nodes().len();
|
||
let e = graph.edge_count();
|
||
|
||
// Identify saturated hubs — nodes with degree well above threshold
|
||
let threshold = graph.hub_threshold();
|
||
let mut hubs: Vec<_> = graph.nodes().iter()
|
||
.map(|k| (k.clone(), graph.degree(k)))
|
||
.filter(|(_, d)| *d >= threshold)
|
||
.collect();
|
||
hubs.sort_by(|a, b| b.1.cmp(&a.1));
|
||
hubs.truncate(15);
|
||
|
||
let hub_list = if hubs.is_empty() {
|
||
String::new()
|
||
} else {
|
||
let lines: Vec<String> = hubs.iter()
|
||
.map(|(k, d)| format!(" - {} (degree {})", k, d))
|
||
.collect();
|
||
format!(
|
||
"### SATURATED HUBS — DO NOT LINK TO THESE\n\
|
||
The following nodes are already over-connected. Adding more links\n\
|
||
to them makes the graph worse (star topology). Find lateral\n\
|
||
connections between peripheral nodes instead.\n\n{}\n\n\
|
||
Only link to a hub if it is genuinely the ONLY reasonable target.\n\n",
|
||
lines.join("\n"))
|
||
};
|
||
|
||
format!(
|
||
"## Current graph topology\n\
|
||
Nodes: {} Edges: {} Communities: {}\n\
|
||
Small-world σ: {:.1} Power-law α: {:.2} Degree Gini: {:.3}\n\
|
||
Avg clustering coefficient: {:.4}\n\n\
|
||
{}\
|
||
Each node below shows its hub-link ratio (fraction of edges to top-5% degree nodes).\n\
|
||
Use `poc-memory link-impact SOURCE TARGET` to evaluate proposed links.\n\n",
|
||
n, e, graph.community_count(), sigma, alpha, gini, avg_cc, hub_list)
|
||
}
|
||
|
||
/// Format node data section for prompt templates
|
||
fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> String {
|
||
let hub_thresh = graph.hub_threshold();
|
||
let mut out = String::new();
|
||
for item in items {
|
||
let node = match store.nodes.get(&item.key) {
|
||
Some(n) => n,
|
||
None => continue,
|
||
};
|
||
|
||
out.push_str(&format!("## {} \n", item.key));
|
||
out.push_str(&format!("Priority: {:.3} CC: {:.3} Emotion: {:.1} ",
|
||
item.priority, item.cc, item.emotion));
|
||
out.push_str(&format!("Interval: {}d\n",
|
||
node.spaced_repetition_interval));
|
||
if item.outlier_score > 0.0 {
|
||
out.push_str(&format!("Spectral: {} (outlier={:.1})\n",
|
||
item.classification, item.outlier_score));
|
||
}
|
||
|
||
if let Some(community) = node.community_id {
|
||
out.push_str(&format!("Community: {} ", community));
|
||
}
|
||
let deg = graph.degree(&item.key);
|
||
let cc = graph.clustering_coefficient(&item.key);
|
||
|
||
// Hub-link ratio: what fraction of this node's edges go to hubs?
|
||
let neighbors = graph.neighbors(&item.key);
|
||
let hub_links = neighbors.iter()
|
||
.filter(|(n, _)| graph.degree(n) >= hub_thresh)
|
||
.count();
|
||
let hub_ratio = if deg > 0 { hub_links as f32 / deg as f32 } else { 0.0 };
|
||
let is_hub = deg >= hub_thresh;
|
||
|
||
out.push_str(&format!("Degree: {} CC: {:.3} Hub-link ratio: {:.0}% ({}/{})",
|
||
deg, cc, hub_ratio * 100.0, hub_links, deg));
|
||
if is_hub {
|
||
out.push_str(" ← THIS IS A HUB");
|
||
} else if hub_ratio > 0.6 {
|
||
out.push_str(" ← mostly hub-connected, needs lateral links");
|
||
}
|
||
out.push('\n');
|
||
|
||
// Content (truncated for large nodes)
|
||
let content = &node.content;
|
||
if content.len() > 1500 {
|
||
let truncated = crate::util::truncate(content, 1500, "\n[...]");
|
||
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
|
||
content.len(), truncated));
|
||
} else {
|
||
out.push_str(&format!("\nContent:\n{}\n\n", content));
|
||
}
|
||
|
||
// Neighbors
|
||
let neighbors = graph.neighbors(&item.key);
|
||
if !neighbors.is_empty() {
|
||
out.push_str("Neighbors:\n");
|
||
for (n, strength) in neighbors.iter().take(15) {
|
||
let n_cc = graph.clustering_coefficient(n);
|
||
let n_community = store.nodes.get(n.as_str())
|
||
.and_then(|n| n.community_id);
|
||
out.push_str(&format!(" - {} (str={:.2}, cc={:.3}",
|
||
n, strength, n_cc));
|
||
if let Some(c) = n_community {
|
||
out.push_str(&format!(", c{}", c));
|
||
}
|
||
out.push_str(")\n");
|
||
}
|
||
}
|
||
|
||
// Suggested link targets: text-similar semantic nodes not already neighbors
|
||
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
|
||
.map(|(k, _)| k.as_str()).collect();
|
||
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
|
||
.filter(|(k, _)| {
|
||
*k != &item.key
|
||
&& !neighbor_keys.contains(k.as_str())
|
||
})
|
||
.map(|(k, n)| {
|
||
let sim = similarity::cosine_similarity(content, &n.content);
|
||
(k.as_str(), sim)
|
||
})
|
||
.filter(|(_, sim)| *sim > 0.1)
|
||
.collect();
|
||
candidates.sort_by(|a, b| b.1.total_cmp(&a.1));
|
||
candidates.truncate(8);
|
||
|
||
if !candidates.is_empty() {
|
||
out.push_str("\nSuggested link targets (by text similarity, not yet linked):\n");
|
||
for (k, sim) in &candidates {
|
||
let is_hub = graph.degree(k) >= hub_thresh;
|
||
out.push_str(&format!(" - {} (sim={:.3}{})\n",
|
||
k, sim, if is_hub { ", HUB" } else { "" }));
|
||
}
|
||
}
|
||
|
||
out.push_str("\n---\n\n");
|
||
}
|
||
out
|
||
}
|
||
|
||
/// Format health data for the health agent prompt
|
||
fn format_health_section(store: &Store, graph: &Graph) -> String {
|
||
use crate::graph;
|
||
|
||
let health = graph::health_report(graph, store);
|
||
|
||
let mut out = health;
|
||
out.push_str("\n\n## Weight distribution\n");
|
||
|
||
// Weight histogram
|
||
let mut buckets = [0u32; 10]; // 0.0-0.1, 0.1-0.2, ..., 0.9-1.0
|
||
for node in store.nodes.values() {
|
||
let bucket = ((node.weight * 10.0) as usize).min(9);
|
||
buckets[bucket] += 1;
|
||
}
|
||
for (i, &count) in buckets.iter().enumerate() {
|
||
let lo = i as f32 / 10.0;
|
||
let hi = (i + 1) as f32 / 10.0;
|
||
let bar = "█".repeat((count as usize) / 10);
|
||
out.push_str(&format!(" {:.1}-{:.1}: {:4} {}\n", lo, hi, count, bar));
|
||
}
|
||
|
||
// Near-prune nodes
|
||
let near_prune: Vec<_> = store.nodes.iter()
|
||
.filter(|(_, n)| n.weight < 0.15)
|
||
.map(|(k, n)| (k.clone(), n.weight))
|
||
.collect();
|
||
if !near_prune.is_empty() {
|
||
out.push_str(&format!("\n## Near-prune nodes ({} total)\n", near_prune.len()));
|
||
for (k, w) in near_prune.iter().take(20) {
|
||
out.push_str(&format!(" [{:.3}] {}\n", w, k));
|
||
}
|
||
}
|
||
|
||
// Community sizes
|
||
let communities = graph.communities();
|
||
let mut comm_sizes: std::collections::HashMap<u32, Vec<String>> = std::collections::HashMap::new();
|
||
for (key, &label) in communities {
|
||
comm_sizes.entry(label).or_default().push(key.clone());
|
||
}
|
||
let mut sizes: Vec<_> = comm_sizes.iter()
|
||
.map(|(id, members)| (*id, members.len(), members.clone()))
|
||
.collect();
|
||
sizes.sort_by(|a, b| b.1.cmp(&a.1));
|
||
|
||
out.push_str("\n## Largest communities\n");
|
||
for (id, size, members) in sizes.iter().take(10) {
|
||
out.push_str(&format!(" Community {} ({} nodes): ", id, size));
|
||
let sample: Vec<_> = members.iter().take(5).map(|s| s.as_str()).collect();
|
||
out.push_str(&sample.join(", "));
|
||
if *size > 5 { out.push_str(", ..."); }
|
||
out.push('\n');
|
||
}
|
||
|
||
out
|
||
}
|
||
|
||
/// Format interference pairs for the separator agent prompt
|
||
fn format_pairs_section(
|
||
pairs: &[(String, String, f32)],
|
||
store: &Store,
|
||
graph: &Graph,
|
||
) -> String {
|
||
let mut out = String::new();
|
||
let communities = graph.communities();
|
||
|
||
for (a, b, sim) in pairs {
|
||
out.push_str(&format!("## Pair: similarity={:.3}\n", sim));
|
||
|
||
let ca = communities.get(a).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||
let cb = communities.get(b).map(|c| format!("c{}", c)).unwrap_or_else(|| "?".into());
|
||
|
||
// Node A
|
||
out.push_str(&format!("\n### {} ({})\n", a, ca));
|
||
if let Some(node) = store.nodes.get(a) {
|
||
let content = crate::util::truncate(&node.content, 500, "...");
|
||
out.push_str(&format!("Weight: {:.2}\n{}\n",
|
||
node.weight, content));
|
||
}
|
||
|
||
// Node B
|
||
out.push_str(&format!("\n### {} ({})\n", b, cb));
|
||
if let Some(node) = store.nodes.get(b) {
|
||
let content = crate::util::truncate(&node.content, 500, "...");
|
||
out.push_str(&format!("Weight: {:.2}\n{}\n",
|
||
node.weight, content));
|
||
}
|
||
|
||
out.push_str("\n---\n\n");
|
||
}
|
||
out
|
||
}
|
||
|
||
/// Format rename candidates: nodes with auto-generated or opaque keys
|
||
fn format_rename_candidates(store: &Store, count: usize) -> String {
|
||
let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter()
|
||
.filter(|(key, _)| {
|
||
// Only rename nodes with long auto-generated keys
|
||
if key.len() < 60 { return false; }
|
||
|
||
// Journal entries with auto-slugs
|
||
if key.starts_with("journal#j-") { return true; }
|
||
|
||
// Mined transcripts with UUIDs
|
||
if key.starts_with("_mined-transcripts#f-") { return true; }
|
||
|
||
false
|
||
})
|
||
.map(|(k, n)| (k.as_str(), n))
|
||
.collect();
|
||
|
||
// Sort by timestamp (newest first) so we rename recent stuff first
|
||
candidates.sort_by(|a, b| b.1.timestamp.cmp(&a.1.timestamp));
|
||
candidates.truncate(count);
|
||
|
||
let mut out = String::new();
|
||
out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n",
|
||
candidates.len(),
|
||
store.nodes.keys().filter(|k| k.len() >= 60 &&
|
||
(k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-"))).count()));
|
||
|
||
for (key, node) in &candidates {
|
||
out.push_str(&format!("### {}\n", key));
|
||
let created = if node.timestamp > 0 {
|
||
crate::store::format_datetime(node.timestamp)
|
||
} else {
|
||
"unknown".to_string()
|
||
};
|
||
out.push_str(&format!("Created: {}\n", created));
|
||
|
||
let content = &node.content;
|
||
if content.len() > 800 {
|
||
let truncated = crate::util::truncate(content, 800, "\n[...]");
|
||
out.push_str(&format!("\nContent ({} chars, truncated):\n{}\n\n",
|
||
content.len(), truncated));
|
||
} else {
|
||
out.push_str(&format!("\nContent:\n{}\n\n", content));
|
||
}
|
||
|
||
out.push_str("---\n\n");
|
||
}
|
||
out
|
||
}
|
||
|
||
/// Get split candidates sorted by size (largest first)
|
||
pub fn split_candidates(store: &Store) -> Vec<String> {
|
||
let mut candidates: Vec<(&str, usize)> = store.nodes.iter()
|
||
.filter(|(key, node)| {
|
||
!key.starts_with('_')
|
||
&& !node.deleted
|
||
&& node.content.len() > 2000
|
||
&& matches!(node.node_type, crate::store::NodeType::Semantic)
|
||
})
|
||
.map(|(k, n)| (k.as_str(), n.content.len()))
|
||
.collect();
|
||
candidates.sort_by(|a, b| b.1.cmp(&a.1));
|
||
candidates.into_iter().map(|(k, _)| k.to_string()).collect()
|
||
}
|
||
|
||
/// Format a single node for split-plan prompt (phase 1)
|
||
fn format_split_plan_node(store: &Store, graph: &Graph, key: &str) -> String {
|
||
let communities = graph.communities();
|
||
let node = match store.nodes.get(key) {
|
||
Some(n) => n,
|
||
None => return format!("Node '{}' not found\n", key),
|
||
};
|
||
|
||
let mut out = String::new();
|
||
out.push_str(&format!("### {} ({} chars)\n", key, node.content.len()));
|
||
|
||
// Show neighbors grouped by community
|
||
let neighbors = graph.neighbors(key);
|
||
if !neighbors.is_empty() {
|
||
let mut by_community: std::collections::BTreeMap<String, Vec<(&str, f32)>> =
|
||
std::collections::BTreeMap::new();
|
||
for (nkey, strength) in &neighbors {
|
||
let comm = communities.get(nkey.as_str())
|
||
.map(|c| format!("c{}", c))
|
||
.unwrap_or_else(|| "unclustered".into());
|
||
by_community.entry(comm)
|
||
.or_default()
|
||
.push((nkey.as_str(), *strength));
|
||
}
|
||
|
||
out.push_str("\nNeighbors by community:\n");
|
||
for (comm, members) in &by_community {
|
||
out.push_str(&format!(" {} ({}):", comm, members.len()));
|
||
for (nkey, strength) in members.iter().take(5) {
|
||
out.push_str(&format!(" {}({:.2})", nkey, strength));
|
||
}
|
||
if members.len() > 5 {
|
||
out.push_str(&format!(" +{} more", members.len() - 5));
|
||
}
|
||
out.push('\n');
|
||
}
|
||
}
|
||
|
||
// Full content
|
||
out.push_str(&format!("\nContent:\n{}\n\n", node.content));
|
||
out.push_str("---\n\n");
|
||
out
|
||
}
|
||
|
||
/// Build split-plan prompt for a single node (phase 1)
|
||
pub fn split_plan_prompt(store: &Store, key: &str) -> Result<String, String> {
|
||
let graph = store.build_graph();
|
||
let topology = format_topology_header(&graph);
|
||
let node_section = format_split_plan_node(store, &graph, key);
|
||
load_prompt("split-plan", &[("{{TOPOLOGY}}", &topology), ("{{NODE}}", &node_section)])
|
||
}
|
||
|
||
/// Build split-extract prompt for one child (phase 2)
|
||
pub fn split_extract_prompt(store: &Store, parent_key: &str, child_key: &str, child_desc: &str, child_sections: &str) -> Result<String, String> {
|
||
let parent_content = store.nodes.get(parent_key)
|
||
.map(|n| n.content.as_str())
|
||
.ok_or_else(|| format!("No node '{}'", parent_key))?;
|
||
load_prompt("split-extract", &[
|
||
("{{CHILD_KEY}}", child_key),
|
||
("{{CHILD_DESC}}", child_desc),
|
||
("{{CHILD_SECTIONS}}", child_sections),
|
||
("{{PARENT_CONTENT}}", parent_content),
|
||
])
|
||
}
|
||
|
||
/// Run agent consolidation on top-priority nodes
|
||
pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<(), String> {
|
||
let graph = store.build_graph();
|
||
let items = replay_queue(store, count);
|
||
|
||
if items.is_empty() {
|
||
println!("No nodes to consolidate.");
|
||
return Ok(());
|
||
}
|
||
|
||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||
|
||
if auto {
|
||
let prompt = load_prompt("replay", &[("{{NODES}}", &nodes_section)])?;
|
||
println!("{}", prompt);
|
||
} else {
|
||
// Interactive: show what needs attention and available agent types
|
||
println!("Consolidation batch ({} nodes):\n", items.len());
|
||
for item in &items {
|
||
let node_type = store.nodes.get(&item.key)
|
||
.map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
|
||
.unwrap_or("?");
|
||
println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
|
||
item.priority, item.key, item.cc, item.interval_days, node_type);
|
||
}
|
||
|
||
// Also show interference pairs
|
||
let pairs = detect_interference(store, &graph, 0.6);
|
||
if !pairs.is_empty() {
|
||
println!("\nInterfering pairs ({}):", pairs.len());
|
||
for (a, b, sim) in pairs.iter().take(5) {
|
||
println!(" [{:.3}] {} ↔ {}", sim, a, b);
|
||
}
|
||
}
|
||
|
||
println!("\nAgent prompts:");
|
||
println!(" --auto Generate replay agent prompt");
|
||
println!(" --agent replay Replay agent (schema assimilation)");
|
||
println!(" --agent linker Linker agent (relational binding)");
|
||
println!(" --agent separator Separator agent (pattern separation)");
|
||
println!(" --agent transfer Transfer agent (CLS episodic→semantic)");
|
||
println!(" --agent health Health agent (synaptic homeostasis)");
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
/// Generate a specific agent prompt with filled-in data
|
||
pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String, String> {
|
||
let graph = store.build_graph();
|
||
let topology = format_topology_header(&graph);
|
||
|
||
let emb = spectral::load_embedding().ok();
|
||
|
||
match agent {
|
||
"replay" => {
|
||
let items = replay_queue_with_graph(store, count, &graph, emb.as_ref());
|
||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||
load_prompt("replay", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||
}
|
||
"linker" => {
|
||
// Filter to episodic entries
|
||
let mut items = replay_queue_with_graph(store, count * 2, &graph, emb.as_ref());
|
||
items.retain(|item| {
|
||
store.nodes.get(&item.key)
|
||
.map(|n| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||
.unwrap_or(false)
|
||
});
|
||
items.truncate(count);
|
||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||
load_prompt("linker", &[("{{TOPOLOGY}}", &topology), ("{{NODES}}", &nodes_section)])
|
||
}
|
||
"separator" => {
|
||
let mut pairs = detect_interference(store, &graph, 0.5);
|
||
pairs.truncate(count);
|
||
let pairs_section = format_pairs_section(&pairs, store, &graph);
|
||
load_prompt("separator", &[("{{TOPOLOGY}}", &topology), ("{{PAIRS}}", &pairs_section)])
|
||
}
|
||
"transfer" => {
|
||
// Recent episodic entries
|
||
let mut episodes: Vec<_> = store.nodes.iter()
|
||
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||
.collect();
|
||
episodes.sort_by(|a, b| b.1.cmp(&a.1));
|
||
episodes.truncate(count);
|
||
|
||
let episode_keys: Vec<_> = episodes.iter().map(|(k, _)| k.clone()).collect();
|
||
let items: Vec<ReplayItem> = episode_keys.iter()
|
||
.filter_map(|k| {
|
||
let node = store.nodes.get(k)?;
|
||
Some(ReplayItem {
|
||
key: k.clone(),
|
||
priority: consolidation_priority(store, k, &graph, None),
|
||
interval_days: node.spaced_repetition_interval,
|
||
emotion: node.emotion,
|
||
cc: graph.clustering_coefficient(k),
|
||
classification: "unknown",
|
||
outlier_score: 0.0,
|
||
})
|
||
})
|
||
.collect();
|
||
let episodes_section = format_nodes_section(store, &items, &graph);
|
||
load_prompt("transfer", &[("{{TOPOLOGY}}", &topology), ("{{EPISODES}}", &episodes_section)])
|
||
}
|
||
"health" => {
|
||
let health_section = format_health_section(store, &graph);
|
||
load_prompt("health", &[("{{TOPOLOGY}}", &topology), ("{{HEALTH}}", &health_section)])
|
||
}
|
||
"rename" => {
|
||
let nodes_section = format_rename_candidates(store, count);
|
||
load_prompt("rename", &[("{{NODES}}", &nodes_section)])
|
||
}
|
||
"split" => {
|
||
// Phase 1: plan prompt for the largest candidate
|
||
let candidates = split_candidates(store);
|
||
if candidates.is_empty() {
|
||
return Err("No nodes large enough to split".to_string());
|
||
}
|
||
let key = &candidates[0];
|
||
let node_section = format_split_plan_node(store, &graph, key);
|
||
load_prompt("split-plan", &[("{{TOPOLOGY}}", &topology), ("{{NODE}}", &node_section)])
|
||
}
|
||
_ => Err(format!("Unknown agent: {}. Use: replay, linker, separator, transfer, health, rename, split", agent)),
|
||
}
|
||
}
|