extract shared transcript parser and similarity matching helpers
- New agents/transcript.rs: shared JSONL parsing for enrich, fact_mine, and knowledge (was 3 separate implementations, ~150 lines duplicated) - New best_match() and section_children() helpers in neuro/rewrite.rs (was duplicated find-best-by-similarity loop + section collection) - Net -153 lines
This commit is contained in:
parent
7c491e92eb
commit
92f3ba5acf
6 changed files with 166 additions and 225 deletions
|
|
@ -5,6 +5,28 @@ use crate::store::{Store, new_relation};
|
|||
use crate::graph::Graph;
|
||||
use crate::similarity;
|
||||
|
||||
/// Collect (key, content) pairs for all section children of a file-level node.
|
||||
fn section_children<'a>(store: &'a Store, file_key: &str) -> Vec<(&'a str, &'a str)> {
|
||||
let prefix = format!("{}#", file_key);
|
||||
store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Find the best matching candidate by cosine similarity against content.
|
||||
/// Returns (key, similarity) if any candidate exceeds threshold.
|
||||
fn best_match(candidates: &[(&str, &str)], content: &str, threshold: f32) -> Option<(String, f32)> {
|
||||
let (best_key, best_sim) = candidates.iter()
|
||||
.map(|(key, text)| (*key, similarity::cosine_similarity(content, text)))
|
||||
.max_by(|a, b| a.1.total_cmp(&b.1))?;
|
||||
if best_sim > threshold {
|
||||
Some((best_key.to_string(), best_sim))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Refine a link target: if the target is a file-level node with section
|
||||
/// children, find the best-matching section by cosine similarity against
|
||||
/// the source content. Returns the original key if no sections exist or
|
||||
|
|
@ -16,31 +38,13 @@ pub fn refine_target(store: &Store, source_content: &str, target_key: &str) -> S
|
|||
// Only refine file-level nodes (no # in key)
|
||||
if target_key.contains('#') { return target_key.to_string(); }
|
||||
|
||||
let prefix = format!("{}#", target_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
let sections = section_children(store, target_key);
|
||||
|
||||
if sections.is_empty() { return target_key.to_string(); }
|
||||
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(source_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Threshold: only refine if there's a meaningful match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
best_section.to_string()
|
||||
} else {
|
||||
target_key.to_string()
|
||||
}
|
||||
best_match(§ions, source_content, 0.05)
|
||||
.map(|(key, _)| key)
|
||||
.unwrap_or_else(|| target_key.to_string())
|
||||
}
|
||||
|
||||
/// A proposed link move: from hub→neighbor to section→neighbor
|
||||
|
|
@ -70,16 +74,12 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph)
|
|||
// Only works on file-level nodes that have section children
|
||||
if hub_key.contains('#') { return None; }
|
||||
|
||||
let prefix = format!("{}#", hub_key);
|
||||
let sections: Vec<(&str, &str)> = store.nodes.iter()
|
||||
.filter(|(k, _)| k.starts_with(&prefix))
|
||||
.map(|(k, n)| (k.as_str(), n.content.as_str()))
|
||||
.collect();
|
||||
|
||||
let sections = section_children(store, hub_key);
|
||||
if sections.is_empty() { return None; }
|
||||
|
||||
// Get all neighbors of the hub
|
||||
let neighbors = graph.neighbors(hub_key);
|
||||
let prefix = format!("{}#", hub_key);
|
||||
|
||||
let mut moves = Vec::new();
|
||||
|
||||
|
|
@ -93,19 +93,7 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph)
|
|||
};
|
||||
|
||||
// Find best-matching section by content similarity
|
||||
let mut best_section = "";
|
||||
let mut best_sim = 0.0f32;
|
||||
|
||||
for (section_key, section_content) in §ions {
|
||||
let sim = similarity::cosine_similarity(neighbor_content, section_content);
|
||||
if sim > best_sim {
|
||||
best_sim = sim;
|
||||
best_section = section_key;
|
||||
}
|
||||
}
|
||||
|
||||
// Only propose move if there's a reasonable match
|
||||
if best_sim > 0.05 && !best_section.is_empty() {
|
||||
if let Some((best_section, best_sim)) = best_match(§ions, neighbor_content, 0.05) {
|
||||
let snippet = crate::util::first_n_chars(
|
||||
neighbor_content.lines()
|
||||
.find(|l| !l.is_empty() && !l.starts_with("<!--") && !l.starts_with("##"))
|
||||
|
|
@ -115,7 +103,7 @@ pub fn differentiate_hub_with_graph(store: &Store, hub_key: &str, graph: &Graph)
|
|||
moves.push(LinkMove {
|
||||
neighbor_key: neighbor_key.to_string(),
|
||||
from_hub: hub_key.to_string(),
|
||||
to_section: best_section.to_string(),
|
||||
to_section: best_section,
|
||||
similarity: best_sim,
|
||||
neighbor_snippet: snippet,
|
||||
});
|
||||
|
|
@ -188,11 +176,7 @@ pub fn find_differentiable_hubs(store: &Store) -> Vec<(String, usize, usize)> {
|
|||
if deg < threshold { continue; }
|
||||
if key.contains('#') { continue; }
|
||||
|
||||
let prefix = format!("{}#", key);
|
||||
let section_count = store.nodes.keys()
|
||||
.filter(|k| k.starts_with(&prefix))
|
||||
.count();
|
||||
|
||||
let section_count = section_children(store, key).len();
|
||||
if section_count > 0 {
|
||||
hubs.push((key.clone(), deg, section_count));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue