search: composable algorithm pipeline
Break search into composable stages that chain left-to-right: each stage takes seeds Vec<(String, f64)> and returns modified seeds. Available algorithms: spread — spreading activation through graph edges spectral — nearest neighbors in spectral embedding manifold — (placeholder) extrapolation along seed direction Stages accept inline params: spread,max_hops=4,edge_decay=0.5 memory-search gets --hook, --debug, --seen modes plus positional pipeline args. poc-memory search gets -p/--pipeline flags. Also: fix spectral decompose() to skip zero eigenvalues from disconnected components, filter degenerate zero-coord nodes from spectral projection, POC_AGENT bail-out for daemon agents, all debug output to stdout. Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
parent
0a35a17fad
commit
c1664bf76b
4 changed files with 723 additions and 151 deletions
|
|
@ -113,12 +113,20 @@ pub fn decompose(graph: &Graph, k: usize) -> SpectralResult {
|
|||
let s = eig.S();
|
||||
let u = eig.U();
|
||||
|
||||
let k = k.min(n);
|
||||
let mut eigenvalues = Vec::with_capacity(k);
|
||||
let mut eigvecs = Vec::with_capacity(k);
|
||||
|
||||
let s_col = s.column_vector();
|
||||
for col in 0..k {
|
||||
|
||||
// Skip trivial eigenvalues (near-zero = null space from disconnected components).
|
||||
// The number of zero eigenvalues equals the number of connected components.
|
||||
let mut start = 0;
|
||||
while start < n && s_col[start].abs() < 1e-8 {
|
||||
start += 1;
|
||||
}
|
||||
|
||||
let k = k.min(n.saturating_sub(start));
|
||||
for col in start..start + k {
|
||||
eigenvalues.push(s_col[col]);
|
||||
let mut vec = Vec::with_capacity(n);
|
||||
for row in 0..n {
|
||||
|
|
@ -287,24 +295,71 @@ pub fn nearest_to_seeds(
|
|||
seeds: &[&str],
|
||||
k: usize,
|
||||
) -> Vec<(String, f64)> {
|
||||
let seed_set: HashSet<&str> = seeds.iter().copied().collect();
|
||||
nearest_to_seeds_weighted(emb, &seeds.iter().map(|&s| (s, 1.0)).collect::<Vec<_>>(), None, k)
|
||||
}
|
||||
|
||||
let seed_coords: Vec<&Vec<f64>> = seeds.iter()
|
||||
.filter_map(|s| emb.coords.get(*s))
|
||||
/// Find nearest neighbors to weighted seed nodes, using link weights.
|
||||
///
|
||||
/// Each seed has a weight (from query term weighting). For candidates
|
||||
/// directly linked to a seed, the spectral distance is scaled by
|
||||
/// 1/link_strength — strong links make effective distance shorter.
|
||||
/// Seed weight scales the contribution: high-weight seeds pull harder.
|
||||
///
|
||||
/// Returns (key, effective_distance) sorted by distance ascending.
|
||||
pub fn nearest_to_seeds_weighted(
|
||||
emb: &SpectralEmbedding,
|
||||
seeds: &[(&str, f64)], // (key, seed_weight)
|
||||
graph: Option<&crate::graph::Graph>,
|
||||
k: usize,
|
||||
) -> Vec<(String, f64)> {
|
||||
let seed_set: HashSet<&str> = seeds.iter().map(|(s, _)| *s).collect();
|
||||
|
||||
let seed_data: Vec<(&str, &Vec<f64>, f64)> = seeds.iter()
|
||||
.filter_map(|(s, w)| {
|
||||
emb.coords.get(*s)
|
||||
.filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) // skip degenerate seeds
|
||||
.map(|c| (*s, c, *w))
|
||||
})
|
||||
.collect();
|
||||
if seed_coords.is_empty() {
|
||||
if seed_data.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let weights = eigenvalue_weights(&emb.eigenvalues);
|
||||
// Build seed→neighbor link strength lookup
|
||||
let link_strengths: HashMap<(&str, &str), f32> = if let Some(g) = graph {
|
||||
let mut map = HashMap::new();
|
||||
for &(seed_key, _) in seeds {
|
||||
for (neighbor, strength) in g.neighbors(seed_key) {
|
||||
map.insert((seed_key, neighbor.as_str()), strength);
|
||||
}
|
||||
}
|
||||
map
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
|
||||
let dim_weights = eigenvalue_weights(&emb.eigenvalues);
|
||||
|
||||
let mut distances: Vec<(String, f64)> = emb.coords.iter()
|
||||
.filter(|(k, _)| !seed_set.contains(k.as_str()))
|
||||
.map(|(k, coords)| {
|
||||
let min_dist = seed_coords.iter()
|
||||
.map(|sc| weighted_distance(coords, sc, &weights))
|
||||
.filter(|(k, coords)| {
|
||||
!seed_set.contains(k.as_str())
|
||||
&& coords.iter().any(|&v| v.abs() > 1e-12) // skip degenerate zero-coord nodes
|
||||
})
|
||||
.map(|(candidate_key, coords)| {
|
||||
let min_dist = seed_data.iter()
|
||||
.map(|(seed_key, sc, seed_weight)| {
|
||||
let raw_dist = weighted_distance(coords, sc, &dim_weights);
|
||||
|
||||
// Scale by link strength if directly connected
|
||||
let link_scale = link_strengths
|
||||
.get(&(*seed_key, candidate_key.as_str()))
|
||||
.map(|&s| 1.0 / (1.0 + s as f64)) // strong link → smaller distance
|
||||
.unwrap_or(1.0);
|
||||
|
||||
raw_dist * link_scale / seed_weight
|
||||
})
|
||||
.fold(f64::MAX, f64::min);
|
||||
(k.clone(), min_dist)
|
||||
(candidate_key.clone(), min_dist)
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue