extract shared transcript parser and similarity matching helpers

- New agents/transcript.rs: shared JSONL parsing for enrich, fact_mine,
  and knowledge (was 3 separate implementations, ~150 lines duplicated)
- New best_match() and section_children() helpers in neuro/rewrite.rs
  (was duplicated find-best-by-similarity loop + section collection)
- Net -153 lines
This commit is contained in:
ProofOfConcept 2026-03-08 21:42:53 -04:00
parent 7c491e92eb
commit 92f3ba5acf
6 changed files with 166 additions and 225 deletions

View file

@ -5,6 +5,28 @@ use crate::store::{Store, new_relation};
use crate::graph::Graph;
use crate::similarity;
/// Collect (key, content) pairs for all section children of a file-level node.
fn section_children<'a>(store: &'a Store, file_key: &str) -> Vec<(&'a str, &'a str)> {
let prefix = format!("{}#", file_key);
store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect()
}
/// Find the best matching candidate by cosine similarity against content.
/// Returns (key, similarity) if any candidate exceeds threshold.
fn best_match(candidates: &[(&str, &str)], content: &str, threshold: f32) -> Option<(String, f32)> {
let (best_key, best_sim) = candidates.iter()
.map(|(key, text)| (*key, similarity::cosine_similarity(content, text)))
.max_by(|a, b| a.1.total_cmp(&b.1))?;
if best_sim > threshold {
Some((best_key.to_string(), best_sim))
} else {
None
}
}
/// Refine a link target: if the target is a file-level node with section
/// children, find the best-matching section by cosine similarity against
/// the source content. Returns the original key if no sections exist or
@ -16,31 +38,13 @@ pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> S
// Only refine file-level nodes (no # in key)
if target_key.contains('#') { return target_key.to_string(); }
let prefix = format!("{}#", target_key);
let sections: Vec<(&str, &str)> = store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect();
let sections = section_children(store, target_key);
if sections.is_empty() { return target_key.to_string(); }
let mut best_section = "";
let mut best_sim = 0.0f32;
for (section_key, section_content) in &sections {
let sim = similarity::cosine_similarity(source_content, section_content);
if sim > best_sim {
best_sim = sim;
best_section = section_key;
}
}
// Threshold: only refine if there's a meaningful match
if best_sim > 0.05 && !best_section.is_empty() {
best_section.to_string()
} else {
target_key.to_string()
}
best_match(&sections, source_content, 0.05)
.map(|(key, _)| key)
.unwrap_or_else(|| target_key.to_string())
}
/// A proposed link move: from hub→neighbor to section→neighbor
@ -70,16 +74,12 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph)
// Only works on file-level nodes that have section children
if hub_key.contains('#') { return None; }
let prefix = format!("{}#", hub_key);
let sections: Vec<(&str, &str)> = store.nodes.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, n)| (k.as_str(), n.content.as_str()))
.collect();
let sections = section_children(store, hub_key);
if sections.is_empty() { return None; }
// Get all neighbors of the hub
let neighbors = graph.neighbors(hub_key);
let prefix = format!("{}#", hub_key);
let mut moves = Vec::new();
@ -93,19 +93,7 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph)
};
// Find best-matching section by content similarity
let mut best_section = "";
let mut best_sim = 0.0f32;
for (section_key, section_content) in &sections {
let sim = similarity::cosine_similarity(neighbor_content, section_content);
if sim > best_sim {
best_sim = sim;
best_section = section_key;
}
}
// Only propose move if there's a reasonable match
if best_sim > 0.05 && !best_section.is_empty() {
if let Some((best_section, best_sim)) = best_match(&sections, neighbor_content, 0.05) {
let snippet = crate::util::first_n_chars(
neighbor_content.lines()
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
@ -115,7 +103,7 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph)
moves.push(LinkMove {
neighbor_key: neighbor_key.to_string(),
from_hub: hub_key.to_string(),
to_section: best_section.to_string(),
to_section: best_section,
similarity: best_sim,
neighbor_snippet: snippet,
});
@ -188,11 +176,7 @@ pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
if deg < threshold { continue; }
if key.contains('#') { continue; }
let prefix = format!("{}#", key);
let section_count = store.nodes.keys()
.filter(|k| k.starts_with(&prefix))
.count();
let section_count = section_children(store, key).len();
if section_count > 0 {
hubs.push((key.clone(), deg, section_count));
}