From 70c0276fa08454b6226d9f60e849454097b602a8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Mar 2026 20:02:01 -0400 Subject: [PATCH] stop filtering journal/digest nodes from knowledge and search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Journal and digest nodes are episodic memory — they should participate in the graph on the same terms as everything else. Remove all journal#/daily-/weekly-/monthly- skip filters from knowledge extraction, connector pairs, challenger, semantic keys, and link candidate selection. Use node_type field instead of key name matching for episodic/semantic classification. Operational nodes (MEMORY, where-am-i, work-queue, work-state) are still filtered — they're system state, not memory. Co-Authored-By: ProofOfConcept --- src/knowledge.rs | 15 +++++---------- src/llm.rs | 12 +++--------- src/neuro/prompts.rs | 12 ++++-------- src/neuro/scoring.rs | 2 +- 4 files changed, 13 insertions(+), 28 deletions(-) diff --git a/src/knowledge.rs b/src/knowledge.rs index 5bdb3d5..ccfd9bc 100644 --- a/src/knowledge.rs +++ b/src/knowledge.rs @@ -515,10 +515,10 @@ fn spectral_distance(embedding: &HashMap>, a: &str, b: &str) -> fn select_extractor_clusters(_store: &Store, n: usize) -> Vec> { let embedding = load_spectral_embedding(); - let skip = ["journal", "MEMORY", "where-am-i", "work-queue"]; + let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"]; let semantic_keys: Vec<&String> = embedding.keys() - .filter(|k| !k.starts_with("journal#") && !skip.contains(&k.as_str())) + .filter(|k| !skip.contains(&k.as_str())) .collect(); let cluster_size = 5; @@ -578,15 +578,11 @@ pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result< fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec, Vec)> { let embedding = load_spectral_embedding(); - let skip_prefixes = ["journal#", "daily-", "weekly-", "monthly-", "all-sessions"]; - let skip_exact: HashSet<&str> = ["journal", "MEMORY", "where-am-i", + let skip_exact: HashSet<&str> = ["MEMORY", "where-am-i", "work-queue", "work-state"].iter().copied().collect(); let semantic_keys: Vec<&String> = embedding.keys() - .filter(|k| { - !skip_exact.contains(k.as_str()) - && !skip_prefixes.iter().any(|p| k.starts_with(p)) - }) + .filter(|k| !skip_exact.contains(k.as_str())) .collect(); let mut pairs = Vec::new(); @@ -656,8 +652,7 @@ pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result let mut candidates: Vec<(&String, usize)> = store.nodes.iter() .filter(|(k, _)| { - !k.starts_with("journal#") - && !["journal", "MEMORY", "where-am-i"].contains(&k.as_str()) + !["MEMORY", "where-am-i", "work-queue", "work-state"].contains(&k.as_str()) }) .map(|(k, _)| (k, graph.degree(k))) .collect(); diff --git a/src/llm.rs b/src/llm.rs index 093bcc2..fafadb8 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -133,17 +133,11 @@ pub(crate) fn parse_json_response(response: &str) -> Result Vec { + let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"]; let mut keys: Vec = store.nodes.keys() - .filter(|k| { - !k.starts_with("journal#") - && *k != "journal" - && *k != "MEMORY" - && *k != "where-am-i" - && *k != "work-queue" - && *k != "work-state" - }) + .filter(|k| !skip.contains(&k.as_str())) .cloned() .collect(); keys.sort(); diff --git a/src/neuro/prompts.rs b/src/neuro/prompts.rs index d906ea5..9f41a83 100644 --- a/src/neuro/prompts.rs +++ b/src/neuro/prompts.rs @@ -145,12 +145,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S // Suggested link targets: text-similar semantic nodes not already neighbors let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter() .map(|(k, _)| k.as_str()).collect(); + let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"]; let mut candidates: Vec<(&str, f32)> = store.nodes.iter() .filter(|(k, _)| { - // Only semantic/topic file nodes, not episodic - !k.starts_with("journal.") && !k.starts_with("deep-index.") - && !k.starts_with("daily-") && !k.starts_with("weekly-") - && !k.starts_with("monthly-") && !k.starts_with("session-") + !skip.contains(&k.as_str()) && *k != &item.key && !neighbor_keys.contains(k.as_str()) }) @@ -300,7 +298,7 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<() println!("Consolidation batch ({} nodes):\n", items.len()); for item in &items { let node_type = store.nodes.get(&item.key) - .map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" }) + .map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" }) .unwrap_or("?"); println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})", item.priority, item.key, item.cc, item.interval_days, node_type); @@ -347,8 +345,6 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result Result { // Recent episodic entries let mut episodes: Vec<_> = store.nodes.iter() - .filter(|(k, _)| k.contains("journal") || k.contains("session")) + .filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession)) .map(|(k, n)| (k.clone(), n.timestamp)) .collect(); episodes.sort_by(|a, b| b.1.cmp(&a.1)); diff --git a/src/neuro/scoring.rs b/src/neuro/scoring.rs index 2b66f07..1ce32f6 100644 --- a/src/neuro/scoring.rs +++ b/src/neuro/scoring.rs @@ -210,7 +210,7 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan { // Count episodic vs semantic nodes let episodic_count = store.nodes.iter() - .filter(|(k, _)| k.contains("journal") || k.contains("session")) + .filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession)) .count(); let _semantic_count = store.nodes.len() - episodic_count; let episodic_ratio = if store.nodes.is_empty() { 0.0 }