stop filtering journal/digest nodes from knowledge and search
Journal and digest nodes are episodic memory — they should participate in the graph on the same terms as everything else. Remove all journal#/daily-/weekly-/monthly- skip filters from knowledge extraction, connector pairs, challenger, semantic keys, and link candidate selection. Use node_type field instead of key name matching for episodic/semantic classification. Operational nodes (MEMORY, where-am-i, work-queue, work-state) are still filtered — they're system state, not memory. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
b00e09b091
commit
70c0276fa0
4 changed files with 13 additions and 28 deletions
|
|
@ -515,10 +515,10 @@ fn spectral_distance(embedding: &HashMap<String, Vec<f64>>, a: &str, b: &str) ->
|
|||
|
||||
fn select_extractor_clusters(_store: &Store, n: usize) -> Vec<Vec<String>> {
|
||||
let embedding = load_spectral_embedding();
|
||||
let skip = ["journal", "MEMORY", "where-am-i", "work-queue"];
|
||||
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
|
||||
|
||||
let semantic_keys: Vec<&String> = embedding.keys()
|
||||
.filter(|k| !k.starts_with("journal#") && !skip.contains(&k.as_str()))
|
||||
.filter(|k| !skip.contains(&k.as_str()))
|
||||
.collect();
|
||||
|
||||
let cluster_size = 5;
|
||||
|
|
@ -578,15 +578,11 @@ pub fn run_extractor(store: &Store, graph: &Graph, batch_size: usize) -> Result<
|
|||
|
||||
fn select_connector_pairs(store: &Store, graph: &Graph, n: usize) -> Vec<(Vec<String>, Vec<String>)> {
|
||||
let embedding = load_spectral_embedding();
|
||||
let skip_prefixes = ["journal#", "daily-", "weekly-", "monthly-", "all-sessions"];
|
||||
let skip_exact: HashSet<&str> = ["journal", "MEMORY", "where-am-i",
|
||||
let skip_exact: HashSet<&str> = ["MEMORY", "where-am-i",
|
||||
"work-queue", "work-state"].iter().copied().collect();
|
||||
|
||||
let semantic_keys: Vec<&String> = embedding.keys()
|
||||
.filter(|k| {
|
||||
!skip_exact.contains(k.as_str())
|
||||
&& !skip_prefixes.iter().any(|p| k.starts_with(p))
|
||||
})
|
||||
.filter(|k| !skip_exact.contains(k.as_str()))
|
||||
.collect();
|
||||
|
||||
let mut pairs = Vec::new();
|
||||
|
|
@ -656,8 +652,7 @@ pub fn run_challenger(store: &Store, graph: &Graph, batch_size: usize) -> Result
|
|||
|
||||
let mut candidates: Vec<(&String, usize)> = store.nodes.iter()
|
||||
.filter(|(k, _)| {
|
||||
!k.starts_with("journal#")
|
||||
&& !["journal", "MEMORY", "where-am-i"].contains(&k.as_str())
|
||||
!["MEMORY", "where-am-i", "work-queue", "work-state"].contains(&k.as_str())
|
||||
})
|
||||
.map(|(k, _)| (k, graph.degree(k)))
|
||||
.collect();
|
||||
|
|
|
|||
12
src/llm.rs
12
src/llm.rs
|
|
@ -133,17 +133,11 @@ pub(crate) fn parse_json_response(response: &str) -> Result<serde_json::Value, S
|
|||
Err(format!("no valid JSON in response: {preview}..."))
|
||||
}
|
||||
|
||||
/// Get semantic keys (non-journal, non-system) for prompt context.
|
||||
/// Get non-operational keys for prompt context.
|
||||
pub(crate) fn semantic_keys(store: &Store) -> Vec<String> {
|
||||
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
|
||||
let mut keys: Vec<String> = store.nodes.keys()
|
||||
.filter(|k| {
|
||||
!k.starts_with("journal#")
|
||||
&& *k != "journal"
|
||||
&& *k != "MEMORY"
|
||||
&& *k != "where-am-i"
|
||||
&& *k != "work-queue"
|
||||
&& *k != "work-state"
|
||||
})
|
||||
.filter(|k| !skip.contains(&k.as_str()))
|
||||
.cloned()
|
||||
.collect();
|
||||
keys.sort();
|
||||
|
|
|
|||
|
|
@ -145,12 +145,10 @@ fn format_nodes_section(store: &Store, items: &[ReplayItem], graph: &Graph) -> S
|
|||
// Suggested link targets: text-similar semantic nodes not already neighbors
|
||||
let neighbor_keys: std::collections::HashSet<&str> = neighbors.iter()
|
||||
.map(|(k, _)| k.as_str()).collect();
|
||||
let skip = ["MEMORY", "where-am-i", "work-queue", "work-state"];
|
||||
let mut candidates: Vec<(&str, f32)> = store.nodes.iter()
|
||||
.filter(|(k, _)| {
|
||||
// Only semantic/topic file nodes, not episodic
|
||||
!k.starts_with("journal.") && !k.starts_with("deep-index.")
|
||||
&& !k.starts_with("daily-") && !k.starts_with("weekly-")
|
||||
&& !k.starts_with("monthly-") && !k.starts_with("session-")
|
||||
!skip.contains(&k.as_str())
|
||||
&& *k != &item.key
|
||||
&& !neighbor_keys.contains(k.as_str())
|
||||
})
|
||||
|
|
@ -300,7 +298,7 @@ pub fn consolidation_batch(store: &Store, count: usize, auto: bool) -> Result<()
|
|||
println!("Consolidation batch ({} nodes):\n", items.len());
|
||||
for item in &items {
|
||||
let node_type = store.nodes.get(&item.key)
|
||||
.map(|n| if n.key.contains("journal") { "episodic" } else { "semantic" })
|
||||
.map(|n| if matches!(n.node_type, crate::store::NodeType::EpisodicSession) { "episodic" } else { "semantic" })
|
||||
.unwrap_or("?");
|
||||
println!(" [{:.3}] {} (cc={:.3}, interval={}d, type={})",
|
||||
item.priority, item.key, item.cc, item.interval_days, node_type);
|
||||
|
|
@ -347,8 +345,6 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
|
|||
store.nodes.get(&item.key)
|
||||
.map(|n| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||||
.unwrap_or(false)
|
||||
|| item.key.contains("journal")
|
||||
|| item.key.contains("session")
|
||||
});
|
||||
items.truncate(count);
|
||||
let nodes_section = format_nodes_section(store, &items, &graph);
|
||||
|
|
@ -363,7 +359,7 @@ pub fn agent_prompt(store: &Store, agent: &str, count: usize) -> Result<String,
|
|||
"transfer" => {
|
||||
// Recent episodic entries
|
||||
let mut episodes: Vec<_> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
|
||||
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||||
.map(|(k, n)| (k.clone(), n.timestamp))
|
||||
.collect();
|
||||
episodes.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ pub fn consolidation_plan(store: &Store) -> ConsolidationPlan {
|
|||
|
||||
// Count episodic vs semantic nodes
|
||||
let episodic_count = store.nodes.iter()
|
||||
.filter(|(k, _)| k.contains("journal") || k.contains("session"))
|
||||
.filter(|(_, n)| matches!(n.node_type, crate::store::NodeType::EpisodicSession))
|
||||
.count();
|
||||
let _semantic_count = store.nodes.len() - episodic_count;
|
||||
let episodic_ratio = if store.nodes.is_empty() { 0.0 }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue