From b3cf934c1858f5ce42320265b03c5f51b333c047 Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Thu, 12 Mar 2026 17:55:47 -0400
Subject: [PATCH] conversations placeholder: show graph neighborhood to
 extractor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building the {{conversations}} placeholder for the observation
agent, search for existing nodes relevant to each conversation
fragment and include them in the prompt. Uses seed matching + one-hop
graph expansion to find the neighborhood, so the extractor sees what
the graph already knows about these topics.

This helps prevent duplicate extractions, but the deeper bug is that
select_conversation_fragments doesn't track which conversations have
already been processed — that's next.
---
 poc-memory/src/agents/defs.rs      | 12 +++++++-
 poc-memory/src/agents/knowledge.rs | 49 ++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/poc-memory/src/agents/defs.rs b/poc-memory/src/agents/defs.rs
index 8cf53fc..8fc4691 100644
--- a/poc-memory/src/agents/defs.rs
+++ b/poc-memory/src/agents/defs.rs
@@ -163,7 +163,17 @@ fn resolve(
         "conversations" => {
             let fragments = super::knowledge::select_conversation_fragments(count);
             let text = fragments.iter()
-                .map(|(id, text)| format!("### Session {}\n\n{}", id, text))
+                .map(|(id, text)| {
+                    let existing = super::knowledge::find_existing_observations(store, text, 10);
+                    let mut section = format!("### Session {}\n\n{}", id, text);
+                    if !existing.is_empty() {
+                        section.push_str("\n\n#### Already extracted from this or similar conversations\n\n");
+                        for (key, preview) in &existing {
+                            section.push_str(&format!("- **`{}`**: {}\n", key, preview));
+                        }
+                    }
+                    section
+                })
                 .collect::<Vec<_>>()
                 .join("\n\n---\n\n");
             Some(Resolved { text, keys: vec![] })
diff --git a/poc-memory/src/agents/knowledge.rs b/poc-memory/src/agents/knowledge.rs
index f5f5012..8f69132 100644
--- a/poc-memory/src/agents/knowledge.rs
+++ b/poc-memory/src/agents/knowledge.rs
@@ -362,6 +362,55 @@ pub enum NamingResolution {
     MergeInto(String),
 }
 
+/// Find existing observation-authored nodes relevant to a conversation fragment.
+/// Used to show the observation agent what's already been extracted,
+/// preventing duplicate extractions across consolidation runs.
+pub fn find_existing_observations(
+    store: &Store,
+    conversation_text: &str,
+    limit: usize,
+) -> Vec<(String, String)> {
+    use std::collections::{BTreeMap, HashSet};
+
+    let graph = store.build_graph();
+
+    let content_terms = crate::search::extract_query_terms(conversation_text, 15);
+    let mut terms: BTreeMap<String, f64> = BTreeMap::new();
+    for term in content_terms.split_whitespace() {
+        terms.entry(term.to_string()).or_insert(1.0);
+    }
+    if terms.is_empty() {
+        return Vec::new();
+    }
+
+    let (seeds, _) = crate::search::match_seeds_opts(&terms, store, true, false);
+
+    // Collect seeds + their graph neighbors (one hop)
+    let mut seen = HashSet::new();
+    let mut result = Vec::new();
+
+    for (key, _) in &seeds {
+        // Add the seed itself
+        if seen.insert(key.clone()) {
+            if let Some(node) = store.nodes.get(key.as_str()) {
+                result.push((key.clone(), node.content.clone()));
+            }
+        }
+        // Add its neighbors
+        for (neighbor, _) in graph.neighbors(key) {
+            if seen.insert(neighbor.clone()) {
+                if let Some(node) = store.nodes.get(neighbor.as_str()) {
+                    result.push((neighbor.clone(), node.content.clone()));
+                }
+            }
+        }
+        if result.len() >= limit { break; }
+    }
+
+    result.truncate(limit);
+    result
+}
+
 /// Find existing nodes that might conflict with a proposed new node.
 /// Returns up to `limit` (key, content_preview) pairs.
 fn find_conflicts(