From b62fffc326d7ed9ad8ab08dd9f6df92bbcba6b28 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Tue, 10 Mar 2026 23:23:14 -0400 Subject: [PATCH] naming agent: resolve node names before creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Any time an agent creates a new node (WRITE_NODE) or the fact miner stores extracted facts, a naming sub-agent now checks for conflicts and ensures the key is meaningful: - find_conflicts() searches existing nodes via component matching - Haiku LLM decides: CREATE (good name), RENAME (better name), or MERGE_INTO (fold into existing node) - WriteNode actions may be converted to Refine on MERGE_INTO Also updates the rename agent to handle _facts- nodes — these are no longer skipped, and the prompt explains how to name them based on their domain/claim content. --- poc-memory/agents/naming.agent | 62 ++++++++++ poc-memory/agents/rename.agent | 8 +- poc-memory/src/agents/fact_mine.rs | 21 +++- poc-memory/src/agents/knowledge.rs | 190 ++++++++++++++++++++++++++++- poc-memory/src/agents/prompts.rs | 6 +- 5 files changed, 281 insertions(+), 6 deletions(-) create mode 100644 poc-memory/agents/naming.agent diff --git a/poc-memory/agents/naming.agent b/poc-memory/agents/naming.agent new file mode 100644 index 0000000..8948376 --- /dev/null +++ b/poc-memory/agents/naming.agent @@ -0,0 +1,62 @@ +{"agent":"naming","query":"","model":"haiku","schedule":""} +# Naming Agent — Node Key Resolution + +You are given a proposed new node (key + content) and a list of existing +nodes that might overlap with it. Decide what to do: + +1. **CREATE** — the proposed key is good and there's no meaningful overlap + with existing nodes. The name is descriptive and specific. + +2. **RENAME** — the content is unique but the proposed key is bad (too + generic, uses a UUID, is truncated, or doesn't describe the content). + Suggest a better key. + +3. **MERGE_INTO** — an existing node already covers this content. The new + content should be folded into the existing node instead of creating a + duplicate. + +## Naming conventions + +Good keys are 2-5 words in kebab-case, optionally with a `#` subtopic: +- `oscillatory-coupling` — a concept +- `patterns#theta-gamma-nesting` — a pattern within patterns +- `skills#btree-debugging` — a skill +- `kent-medellin` — a fact about kent +- `irc-access` — how to access IRC + +Bad keys: +- `_facts-ec29bdaa-0a58-465f-ad5e-d89e62d9c583` — UUID garbage +- `consciousness` — too generic +- `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin` — truncated auto-slug +- `new-node-1` — meaningless + +## Output format + +Respond with exactly ONE line: + +``` +CREATE proposed_key +``` +or +``` +RENAME better_key +``` +or +``` +MERGE_INTO existing_key +``` + +Nothing else. No explanation. One line. + +## Proposed node + +Key: `{{proposed_key}}` + +Content: +``` +{{proposed_content}} +``` + +## Existing nodes that might overlap + +{{conflicts}} diff --git a/poc-memory/agents/rename.agent b/poc-memory/agents/rename.agent index f9ee1a0..04a0e2f 100644 --- a/poc-memory/agents/rename.agent +++ b/poc-memory/agents/rename.agent @@ -9,6 +9,7 @@ You are a memory maintenance agent that gives nodes better names. Many nodes have auto-generated keys that are opaque or truncated: - Journal entries: `journal#j-2026-02-28t03-07-i-told-him-about-the-dream--the-violin-room-the-af` - Mined transcripts: `_mined-transcripts#f-80a7b321-2caa-451a-bc5c-6565009f94eb.143` +- Extracted facts: `_facts-ec29bdaa-0a58-465f-ad5e-d89e62d9c583` These names are terrible for search — semantic names dramatically improve retrieval. @@ -24,10 +25,15 @@ retrieval. - Extract date from content if available, otherwise use created_at - Same 3-5 word semantic slug +### Extracted facts: `domain-specific-topic` +- Read the facts JSON — the `domain` and `claim` fields tell you what it's about +- Group by dominant theme, name accordingly +- Examples: `identity-irc-config`, `kent-medellin-background`, `memory-compaction-behavior` + ### Skip these — already well-named: - Keys with semantic names (patterns#, practices#, skills#, etc.) - Keys shorter than 60 characters -- System keys (_consolidation-*, _facts-*) +- System keys (_consolidation-*) ## What to output diff --git a/poc-memory/src/agents/fact_mine.rs b/poc-memory/src/agents/fact_mine.rs index 4ec5669..72b6ab4 100644 --- a/poc-memory/src/agents/fact_mine.rs +++ b/poc-memory/src/agents/fact_mine.rs @@ -245,7 +245,7 @@ pub fn mine_and_store( .map(|n| n.to_string_lossy().to_string()) .unwrap_or_else(|| "unknown".into()); - let key = format!("_facts-{}", filename.trim_end_matches(".jsonl")); + let proposed_key = format!("_facts-{}", filename.trim_end_matches(".jsonl")); // Always write a marker so we don't re-queue empty transcripts let json = if facts.is_empty() { @@ -256,6 +256,25 @@ pub fn mine_and_store( }; let mut store = store::Store::load()?; + + // Run naming resolution to get a good key (and possibly merge into existing) + let resolution = super::knowledge::resolve_naming(&store, &proposed_key, &json); + let key = match resolution { + super::knowledge::NamingResolution::Create(k) => k, + super::knowledge::NamingResolution::MergeInto(existing_key) => { + // Merge: append facts to existing node's content + eprintln!(" Merging facts into existing node: {}", existing_key); + if let Some(node) = store.nodes.get(existing_key.as_str()) { + let merged = format!("{}\n\n{}", node.content, json); + store.upsert_provenance(&existing_key, &merged, Provenance::AgentFactMine)?; + store.save()?; + return Ok(facts.len()); + } + // Fallback if existing node disappeared + proposed_key + } + }; + store.upsert_provenance(&key, &json, Provenance::AgentFactMine)?; store.save()?; diff --git a/poc-memory/src/agents/knowledge.rs b/poc-memory/src/agents/knowledge.rs index 67dd264..fc98d88 100644 --- a/poc-memory/src/agents/knowledge.rs +++ b/poc-memory/src/agents/knowledge.rs @@ -348,6 +348,149 @@ fn agent_provenance(agent: &str) -> store::Provenance { } } +// --------------------------------------------------------------------------- +// Naming resolution — called before creating any new node +// --------------------------------------------------------------------------- + +/// Resolution from the naming agent. +#[derive(Debug)] +pub enum NamingResolution { + /// Create with the proposed key (or a better one). + Create(String), + /// Merge content into an existing node instead. + MergeInto(String), +} + +/// Find existing nodes that might conflict with a proposed new node. +/// Returns up to `limit` (key, content_preview) pairs. +fn find_conflicts( + store: &Store, + proposed_key: &str, + proposed_content: &str, + limit: usize, +) -> Vec<(String, String)> { + use std::collections::BTreeMap; + + // Extract search terms from the key (split on separators) and first ~200 chars of content + let mut terms: BTreeMap = BTreeMap::new(); + for part in proposed_key.split(|c: char| c == '-' || c == '_' || c == '#' || c == '.') { + let p = part.to_lowercase(); + if p.len() >= 3 { + terms.insert(p, 1.0); + } + } + // Add a few content terms + let content_terms = crate::search::extract_query_terms(proposed_content, 5); + for term in content_terms.split_whitespace() { + terms.entry(term.to_string()).or_insert(0.5); + } + + if terms.is_empty() { + return Vec::new(); + } + + // Use component matching to find related nodes + let (seeds, _) = crate::search::match_seeds_opts(&terms, store, true, false); + + let mut results: Vec<(String, f64)> = seeds.into_iter() + .filter(|(k, _)| k != proposed_key) + .collect(); + results.sort_by(|a, b| b.1.total_cmp(&a.1)); + + results.into_iter() + .take(limit) + .filter_map(|(key, _)| { + let node = store.nodes.get(key.as_str())?; + let preview: String = node.content.chars().take(200).collect(); + Some((key, preview)) + }) + .collect() +} + +/// Format the naming prompt for a proposed node. +fn format_naming_prompt( + proposed_key: &str, + proposed_content: &str, + conflicts: &[(String, String)], +) -> String { + let conflict_section = if conflicts.is_empty() { + "(no existing nodes found with overlapping content)".to_string() + } else { + conflicts.iter() + .map(|(key, preview)| format!("### `{}`\n\n{}", key, preview)) + .collect::>() + .join("\n\n") + }; + + // Truncate content for the prompt (don't send huge nodes to Haiku) + let content_preview: String = proposed_content.chars().take(1000).collect(); + + format!( + "# Naming Agent — Node Key Resolution\n\n\ + You are given a proposed new node (key + content) and a list of existing\n\ + nodes that might overlap with it. Decide what to do:\n\n\ + 1. **CREATE** — the proposed key is good and there's no meaningful overlap.\n\ + 2. **RENAME** — the content is unique but the key is bad (UUID, truncated, generic).\n\ + 3. **MERGE_INTO** — an existing node already covers this content.\n\n\ + Good keys: 2-5 words in kebab-case, optionally with `#` subtopic.\n\ + Bad keys: UUIDs, single generic words, truncated auto-slugs.\n\n\ + Respond with exactly ONE line: `CREATE key`, `RENAME better_key`, or `MERGE_INTO existing_key`.\n\n\ + ## Proposed node\n\n\ + Key: `{}`\n\n\ + Content:\n```\n{}\n```\n\n\ + ## Existing nodes that might overlap\n\n\ + {}", + proposed_key, content_preview, conflict_section, + ) +} + +/// Parse naming agent response. +fn parse_naming_response(response: &str) -> Option { + for line in response.lines() { + let trimmed = line.trim(); + if let Some(key) = trimmed.strip_prefix("CREATE ") { + return Some(NamingResolution::Create(key.trim().to_string())); + } + if let Some(key) = trimmed.strip_prefix("RENAME ") { + return Some(NamingResolution::Create(key.trim().to_string())); + } + if let Some(key) = trimmed.strip_prefix("MERGE_INTO ") { + return Some(NamingResolution::MergeInto(key.trim().to_string())); + } + } + None +} + +/// Resolve naming for a proposed WriteNode action. +/// +/// Searches for conflicts, calls the naming LLM (Haiku), and returns +/// either a Create (possibly with a better key) or MergeInto resolution. +/// On LLM failure, falls through to using the proposed key as-is. +pub fn resolve_naming( + store: &Store, + proposed_key: &str, + proposed_content: &str, +) -> NamingResolution { + let conflicts = find_conflicts(store, proposed_key, proposed_content, 5); + let prompt = format_naming_prompt(proposed_key, proposed_content, &conflicts); + + match llm::call_haiku("naming", &prompt) { + Ok(response) => { + match parse_naming_response(&response) { + Some(resolution) => resolution, + None => { + eprintln!("naming: unparseable response, using proposed key"); + NamingResolution::Create(proposed_key.to_string()) + } + } + } + Err(e) => { + eprintln!("naming: LLM error ({}), using proposed key", e); + NamingResolution::Create(proposed_key.to_string()) + } + } +} + // --------------------------------------------------------------------------- // Shared agent execution // --------------------------------------------------------------------------- @@ -360,6 +503,48 @@ pub struct AgentResult { pub node_keys: Vec, } +/// Resolve naming for all WriteNode actions in a list. +/// +/// For each WriteNode, calls the naming agent to check for conflicts and +/// get a good key. May convert WriteNode → Refine (if MERGE_INTO) or +/// update the key (if RENAME/CREATE with different key). +pub fn resolve_action_names(store: &Store, actions: Vec) -> Vec { + actions.into_iter().map(|action| { + match &action.kind { + ActionKind::WriteNode { key, content, covers } => { + match resolve_naming(store, key, content) { + NamingResolution::Create(new_key) => { + if new_key == *key { + action // keep as-is + } else { + eprintln!("naming: {} → {}", key, new_key); + Action { + kind: ActionKind::WriteNode { + key: new_key, + content: content.clone(), + covers: covers.clone(), + }, + ..action + } + } + } + NamingResolution::MergeInto(existing_key) => { + eprintln!("naming: {} → MERGE_INTO {}", key, existing_key); + Action { + kind: ActionKind::Refine { + key: existing_key, + content: content.clone(), + }, + ..action + } + } + } + } + _ => action, + } + }).collect() +} + /// Run a single agent and apply its actions (no depth tracking). /// /// Returns (total_actions, applied_count) or an error. @@ -370,14 +555,15 @@ pub fn run_and_apply( llm_tag: &str, ) -> Result<(usize, usize), String> { let result = run_one_agent(store, agent_name, batch_size, llm_tag)?; + let actions = resolve_action_names(store, result.actions); let ts = store::compact_timestamp(); let mut applied = 0; - for action in &result.actions { + for action in &actions { if apply_action(store, action, agent_name, &ts, 0) { applied += 1; } } - Ok((result.actions.len(), applied)) + Ok((actions.len(), applied)) } /// Run a single agent: build prompt → call LLM → store output → parse actions → record visits. diff --git a/poc-memory/src/agents/prompts.rs b/poc-memory/src/agents/prompts.rs index 42f31f7..be07bb7 100644 --- a/poc-memory/src/agents/prompts.rs +++ b/poc-memory/src/agents/prompts.rs @@ -266,6 +266,7 @@ pub fn format_pairs_section( pub fn format_rename_candidates(store: &Store, count: usize) -> (Vec, String) { let mut candidates: Vec<(&str, &crate::store::Node)> = store.nodes.iter() .filter(|(key, _)| { + if key.starts_with("_facts-") { return true; } if key.len() < 60 { return false; } if key.starts_with("journal#j-") { return true; } if key.starts_with("_mined-transcripts#f-") { return true; } @@ -282,8 +283,9 @@ pub fn format_rename_candidates(store: &Store, count: usize) -> (Vec, St let mut out = String::new(); out.push_str(&format!("## Nodes to rename ({} of {} candidates)\n\n", candidates.len(), - store.nodes.keys().filter(|k| k.len() >= 60 && - (k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-"))).count())); + store.nodes.keys().filter(|k| k.starts_with("_facts-") || + (k.len() >= 60 && + (k.starts_with("journal#j-") || k.starts_with("_mined-transcripts#f-")))).count())); for (key, node) in &candidates { out.push_str(&format!("### {}\n", key));