diff --git a/poc-memory/src/search.rs b/poc-memory/src/search.rs index ec1d6f6..8cd8afa 100644 --- a/poc-memory/src/search.rs +++ b/poc-memory/src/search.rs @@ -37,6 +37,8 @@ pub enum Algorithm { Spread, Spectral, Manifold, + Confluence, + Geodesic, } impl fmt::Display for Algorithm { @@ -45,6 +47,8 @@ impl fmt::Display for Algorithm { Algorithm::Spread => write!(f, "spread"), Algorithm::Spectral => write!(f, "spectral"), Algorithm::Manifold => write!(f, "manifold"), + Algorithm::Confluence => write!(f, "confluence"), + Algorithm::Geodesic => write!(f, "geodesic"), } } } @@ -58,6 +62,8 @@ impl AlgoStage { "spread" => Algorithm::Spread, "spectral" => Algorithm::Spectral, "manifold" => Algorithm::Manifold, + "confluence" => Algorithm::Confluence, + "geodesic" => Algorithm::Geodesic, _ => return Err(format!("unknown algorithm: {}", name)), }; let mut params = HashMap::new(); @@ -136,10 +142,9 @@ pub fn run_pipeline( current = match stage.algo { Algorithm::Spread => run_spread(¤t, graph, store, stage, debug), Algorithm::Spectral => run_spectral(¤t, graph, stage, debug), - Algorithm::Manifold => { - if debug { println!(" (manifold not yet implemented, passing through)"); } - current - } + Algorithm::Manifold => run_manifold(¤t, graph, stage, debug), + Algorithm::Confluence => run_confluence(¤t, graph, store, stage, debug), + Algorithm::Geodesic => run_geodesic(¤t, graph, stage, debug), }; if debug { @@ -222,6 +227,397 @@ fn run_spectral( result } +/// Confluence: multi-source reachability scoring. +/// +/// Unlike spreading activation (which takes max activation from any source), +/// confluence rewards nodes reachable from *multiple* seeds. For each candidate +/// node within k hops, score = sum of (seed_weight * edge_decay^distance) across +/// all seeds that can reach it. Nodes at the intersection of multiple seeds' +/// neighborhoods score highest. +/// +/// This naturally handles mixed seeds: unrelated seeds activate disjoint +/// neighborhoods that don't overlap, so their results separate naturally. +/// +/// Tunable params: max_hops (default 3), edge_decay (default 0.5), +/// min_sources (default 2, minimum number of distinct seeds that must reach a node). +fn run_confluence( + seeds: &[(String, f64)], + graph: &Graph, + store: &impl StoreView, + stage: &AlgoStage, + debug: bool, +) -> Vec<(String, f64)> { + let max_hops = stage.param_u32("max_hops", 3); + let edge_decay = stage.param_f64("edge_decay", 0.5); + let min_sources = stage.param_usize("min_sources", 2); + + // For each seed, BFS outward collecting (node → activation) at each distance + // Track which seeds contributed to each node's score + let mut node_scores: HashMap = HashMap::new(); + let mut node_sources: HashMap> = HashMap::new(); + + for (seed_idx, (seed_key, seed_weight)) in seeds.iter().enumerate() { + let mut visited: HashMap = HashMap::new(); + let mut queue: VecDeque<(String, u32)> = VecDeque::new(); + + visited.insert(seed_key.clone(), *seed_weight); + queue.push_back((seed_key.clone(), 0)); + + while let Some((key, depth)) = queue.pop_front() { + if depth >= max_hops { continue; } + + let act = visited[&key]; + + for (neighbor, strength) in graph.neighbors(&key) { + let neighbor_weight = store.node_weight(neighbor.as_str()); + let propagated = act * edge_decay * neighbor_weight * strength as f64; + if propagated < 0.001 { continue; } + + if !visited.contains_key(neighbor.as_str()) || visited[neighbor.as_str()] < propagated { + visited.insert(neighbor.clone(), propagated); + queue.push_back((neighbor.clone(), depth + 1)); + } + } + } + + // Accumulate into global scores (additive across seeds) + for (key, act) in visited { + *node_scores.entry(key.clone()).or_insert(0.0) += act; + node_sources.entry(key).or_default().insert(seed_idx); + } + } + + // Filter to nodes reached by min_sources distinct seeds + let mut results: Vec<(String, f64)> = node_scores.into_iter() + .filter(|(key, _)| { + node_sources.get(key).map(|s| s.len()).unwrap_or(0) >= min_sources + }) + .collect(); + + if debug { + // Show source counts + for (key, score) in results.iter().take(15) { + let sources = node_sources.get(key).map(|s| s.len()).unwrap_or(0); + println!(" [{:.4}] {} (from {} seeds)", score, key, sources); + } + } + + results.sort_by(|a, b| b.1.total_cmp(&a.1)); + results +} + +/// Geodesic: straightest paths between seed pairs in spectral space. +/// +/// For each pair of seeds, walk the graph from one to the other, at each +/// step choosing the neighbor whose spectral direction most aligns with +/// the target direction. Nodes along these geodesic paths score higher +/// the more paths pass through them and the straighter those paths are. +/// +/// Tunable params: max_path (default 6), k (default 20 results). +fn run_geodesic( + seeds: &[(String, f64)], + graph: &Graph, + stage: &AlgoStage, + debug: bool, +) -> Vec<(String, f64)> { + let max_path = stage.param_usize("max_path", 6); + let k = stage.param_usize("k", 20); + + let emb = match spectral::load_embedding() { + Ok(e) => e, + Err(e) => { + if debug { println!(" no spectral embedding: {}", e); } + return seeds.to_vec(); + } + }; + + // Filter seeds to those with valid spectral coords + let valid_seeds: Vec<(&str, f64, &Vec)> = seeds.iter() + .filter_map(|(key, weight)| { + emb.coords.get(key.as_str()) + .filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) + .map(|c| (key.as_str(), *weight, c)) + }) + .collect(); + + if valid_seeds.len() < 2 { + if debug { println!(" need ≥2 seeds with spectral coords, have {}", valid_seeds.len()); } + return seeds.to_vec(); + } + + // For each pair of seeds, find the geodesic path + let mut path_counts: HashMap = HashMap::new(); + let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect(); + + for i in 0..valid_seeds.len() { + for j in (i + 1)..valid_seeds.len() { + let (key_a, weight_a, coords_a) = &valid_seeds[i]; + let (key_b, weight_b, coords_b) = &valid_seeds[j]; + let pair_weight = weight_a * weight_b; + + // Walk from A toward B + let path_ab = geodesic_walk( + key_a, coords_a, coords_b, graph, &emb, max_path, + ); + // Walk from B toward A + let path_ba = geodesic_walk( + key_b, coords_b, coords_a, graph, &emb, max_path, + ); + + // Score nodes on both paths (nodes found from both directions score double) + for (node, alignment) in path_ab.iter().chain(path_ba.iter()) { + if !seed_set.contains(node.as_str()) { + *path_counts.entry(node.clone()).or_insert(0.0) += pair_weight * alignment; + } + } + } + } + + if debug && !path_counts.is_empty() { + println!(" {} pairs examined, {} distinct nodes on paths", + valid_seeds.len() * (valid_seeds.len() - 1) / 2, + path_counts.len()); + } + + // Merge with original seeds + let mut results = seeds.to_vec(); + let mut path_results: Vec<(String, f64)> = path_counts.into_iter().collect(); + path_results.sort_by(|a, b| b.1.total_cmp(&a.1)); + path_results.truncate(k); + + for (key, score) in path_results { + if !seed_set.contains(key.as_str()) { + results.push((key, score)); + } + } + + results.sort_by(|a, b| b.1.total_cmp(&a.1)); + results +} + +/// Walk from `start` toward `target_coords` in spectral space, choosing +/// the neighbor at each step whose direction most aligns with the target. +/// Returns (node_key, alignment_score) for each intermediate node. +fn geodesic_walk( + start: &str, + start_coords: &[f64], + target_coords: &[f64], + graph: &Graph, + emb: &spectral::SpectralEmbedding, + max_steps: usize, +) -> Vec<(String, f64)> { + let mut path = Vec::new(); + let mut current = start.to_string(); + let mut current_coords = start_coords.to_vec(); + let mut visited: HashSet = HashSet::new(); + visited.insert(current.clone()); + + for _ in 0..max_steps { + // Direction we want to travel: from current toward target + let direction: Vec = target_coords.iter() + .zip(current_coords.iter()) + .map(|(t, c)| t - c) + .collect(); + + let dir_norm = direction.iter().map(|d| d * d).sum::().sqrt(); + if dir_norm < 1e-12 { break; } // arrived + + // Among neighbors with spectral coords, find the one most aligned + let mut best: Option<(String, Vec, f64)> = None; + + for (neighbor, _strength) in graph.neighbors(¤t) { + if visited.contains(neighbor.as_str()) { continue; } + + let neighbor_coords = match emb.coords.get(neighbor.as_str()) { + Some(c) if c.iter().any(|&v| v.abs() > 1e-12) => c, + _ => continue, + }; + + // Direction to this neighbor + let step: Vec = neighbor_coords.iter() + .zip(current_coords.iter()) + .map(|(n, c)| n - c) + .collect(); + + let step_norm = step.iter().map(|s| s * s).sum::().sqrt(); + if step_norm < 1e-12 { continue; } + + // Cosine similarity between desired direction and step direction + let dot: f64 = direction.iter().zip(step.iter()).map(|(d, s)| d * s).sum(); + let alignment = dot / (dir_norm * step_norm); + + if alignment > 0.0 { // only consider forward-facing neighbors + if best.as_ref().map(|(_, _, a)| alignment > *a).unwrap_or(true) { + best = Some((neighbor.clone(), neighbor_coords.clone(), alignment)); + } + } + } + + match best { + Some((next_key, next_coords, alignment)) => { + path.push((next_key.clone(), alignment)); + visited.insert(next_key.clone()); + current = next_key; + current_coords = next_coords; + } + None => break, // no forward-facing neighbors + } + } + + path +} + +/// Manifold: extrapolation along the direction defined by seeds. +/// +/// Instead of finding what's *near* the seeds in spectral space (proximity), +/// find what's in the *direction* the seeds define. Given a weighted centroid +/// of seeds and the principal direction they span, find nodes that continue +/// along that direction. +/// +/// Tunable params: k (default 20 results). +fn run_manifold( + seeds: &[(String, f64)], + graph: &Graph, + stage: &AlgoStage, + debug: bool, +) -> Vec<(String, f64)> { + let k = stage.param_usize("k", 20); + + let emb = match spectral::load_embedding() { + Ok(e) => e, + Err(e) => { + if debug { println!(" no spectral embedding: {}", e); } + return seeds.to_vec(); + } + }; + + // Collect seeds with valid spectral coordinates + let seed_data: Vec<(&str, f64, &Vec)> = seeds.iter() + .filter_map(|(key, weight)| { + emb.coords.get(key.as_str()) + .filter(|c| c.iter().any(|&v| v.abs() > 1e-12)) + .map(|c| (key.as_str(), *weight, c)) + }) + .collect(); + + if seed_data.is_empty() { + if debug { println!(" no seeds with spectral coords"); } + return seeds.to_vec(); + } + + let dims = emb.dims; + + // Compute weighted centroid of seeds + let mut centroid = vec![0.0f64; dims]; + let mut total_weight = 0.0; + for (_, weight, coords) in &seed_data { + for (i, &c) in coords.iter().enumerate() { + centroid[i] += c * weight; + } + total_weight += weight; + } + if total_weight > 0.0 { + for c in &mut centroid { + *c /= total_weight; + } + } + + // Compute principal direction: weighted PCA axis 1 + // For each seed, its deviation from centroid contributes to the direction + let mut direction = vec![0.0f64; dims]; + if seed_data.len() >= 2 { + // Use power iteration to find dominant direction of seed spread + // Initialize with the vector from first seed to last seed + let first = seed_data.first().unwrap().2; + let last = seed_data.last().unwrap().2; + for i in 0..dims { + direction[i] = last[i] - first[i]; + } + + // One round of power iteration on the covariance matrix + let mut new_dir = vec![0.0f64; dims]; + for (_, weight, coords) in &seed_data { + let dev: Vec = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect(); + let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum(); + for i in 0..dims { + new_dir[i] += weight * dot * dev[i]; + } + } + direction = new_dir; + } + + let dir_norm = direction.iter().map(|d| d * d).sum::().sqrt(); + + let seed_set: HashSet<&str> = seeds.iter().map(|(k, _)| k.as_str()).collect(); + + // Score each non-seed node by projection onto the direction from centroid + let mut candidates: Vec<(String, f64)> = emb.coords.iter() + .filter(|(key, coords)| { + !seed_set.contains(key.as_str()) + && coords.iter().any(|&v| v.abs() > 1e-12) + }) + .map(|(key, coords)| { + let deviation: Vec = coords.iter().zip(centroid.iter()) + .map(|(c, m)| c - m) + .collect(); + + let score = if dir_norm > 1e-12 { + // Project onto direction: how far along the principal axis + let projection: f64 = deviation.iter().zip(direction.iter()) + .map(|(d, v)| d * v) + .sum::() / dir_norm; + + // Distance from the axis (perpendicular component) + let proj_vec: Vec = direction.iter() + .map(|&d| d * projection / dir_norm) + .collect(); + let perp_dist: f64 = deviation.iter().zip(proj_vec.iter()) + .map(|(d, p)| (d - p).powi(2)) + .sum::() + .sqrt(); + + // Score: prefer nodes far along the direction but close to the axis + // Use absolute projection (both directions from centroid are interesting) + let along = projection.abs(); + if perp_dist < 1e-12 { + along + } else { + along / (1.0 + perp_dist) + } + } else { + // No direction (single seed or all seeds coincide): use distance from centroid + let dist: f64 = deviation.iter().map(|d| d * d).sum::().sqrt(); + 1.0 / (1.0 + dist) + }; + + // Bonus for being connected to seeds in the graph + let graph_bonus: f64 = graph.neighbors(key).iter() + .filter(|(n, _)| seed_set.contains(n.as_str())) + .map(|(_, s)| *s as f64 * 0.1) + .sum(); + + (key.clone(), score + graph_bonus) + }) + .collect(); + + candidates.sort_by(|a, b| b.1.total_cmp(&a.1)); + candidates.truncate(k); + + if debug { + for (key, score) in candidates.iter().take(15) { + println!(" [{:.4}] {}", score, key); + } + } + + // Merge with original seeds + let mut results = seeds.to_vec(); + for (key, score) in candidates { + results.push((key, score)); + } + results.sort_by(|a, b| b.1.total_cmp(&a.1)); + results +} + fn spreading_activation( seeds: &[(String, f64)], graph: &Graph,