manifold: fix direction initialization, add power iteration rounds

Initialize direction from the two most spectrally separated seeds
instead of relying on input order (which was alphabetical from
BTreeMap). Run 3 rounds of power iteration with normalization
instead of 1 for better convergence.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-09 01:27:24 -04:00
parent 01dd8e5ef9
commit c13a9da81c

View file

@ -522,29 +522,43 @@ fn run_manifold(
} }
} }
// Compute principal direction: weighted PCA axis 1 // Compute principal direction via power iteration on seed covariance.
// For each seed, its deviation from centroid contributes to the direction // Initialize with the two most separated seeds (largest spectral distance).
let mut direction = vec![0.0f64; dims]; let mut direction = vec![0.0f64; dims];
if seed_data.len() >= 2 { if seed_data.len() >= 2 {
// Use power iteration to find dominant direction of seed spread // Find the two seeds furthest apart in spectral space
// Initialize with the vector from first seed to last seed let mut best_dist = 0.0f64;
let first = seed_data.first().unwrap().2; for i in 0..seed_data.len() {
let last = seed_data.last().unwrap().2; for j in (i + 1)..seed_data.len() {
for i in 0..dims { let dist: f64 = seed_data[i].2.iter().zip(seed_data[j].2.iter())
direction[i] = last[i] - first[i]; .map(|(a, b)| (a - b).powi(2)).sum::<f64>().sqrt();
if dist > best_dist {
best_dist = dist;
for d in 0..dims {
direction[d] = seed_data[j].2[d] - seed_data[i].2[d];
}
}
}
} }
// One round of power iteration on the covariance matrix // Power iteration: 3 rounds on the weighted covariance matrix
for _ in 0..3 {
let mut new_dir = vec![0.0f64; dims]; let mut new_dir = vec![0.0f64; dims];
for (_, weight, coords) in &seed_data { for (_, weight, coords) in &seed_data {
let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect(); let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum(); let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
for i in 0..dims { for d in 0..dims {
new_dir[i] += weight * dot * dev[i]; new_dir[d] += weight * dot * dev[d];
} }
} }
// Normalize
let norm = new_dir.iter().map(|d| d * d).sum::<f64>().sqrt();
if norm > 1e-12 {
for d in &mut new_dir { *d /= norm; }
}
direction = new_dir; direction = new_dir;
} }
}
let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt(); let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();