manifold: fix direction initialization, add power iteration rounds

Initialize direction from the two most spectrally separated seeds
instead of relying on input order (which was alphabetical from
BTreeMap). Run 3 rounds of power iteration with normalization
instead of 1 for better convergence.

Co-Authored-By: ProofOfConcept <poc@bcachefs.org>
This commit is contained in:
Kent Overstreet 2026-03-09 01:27:24 -04:00
parent 01dd8e5ef9
commit c13a9da81c

View file

@ -522,28 +522,42 @@ fn run_manifold(
}
}
// Compute principal direction: weighted PCA axis 1
// For each seed, its deviation from centroid contributes to the direction
// Compute principal direction via power iteration on seed covariance.
// Initialize with the two most separated seeds (largest spectral distance).
let mut direction = vec![0.0f64; dims];
if seed_data.len() >= 2 {
// Use power iteration to find dominant direction of seed spread
// Initialize with the vector from first seed to last seed
let first = seed_data.first().unwrap().2;
let last = seed_data.last().unwrap().2;
for i in 0..dims {
direction[i] = last[i] - first[i];
}
// One round of power iteration on the covariance matrix
let mut new_dir = vec![0.0f64; dims];
for (_, weight, coords) in &seed_data {
let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
for i in 0..dims {
new_dir[i] += weight * dot * dev[i];
// Find the two seeds furthest apart in spectral space
let mut best_dist = 0.0f64;
for i in 0..seed_data.len() {
for j in (i + 1)..seed_data.len() {
let dist: f64 = seed_data[i].2.iter().zip(seed_data[j].2.iter())
.map(|(a, b)| (a - b).powi(2)).sum::<f64>().sqrt();
if dist > best_dist {
best_dist = dist;
for d in 0..dims {
direction[d] = seed_data[j].2[d] - seed_data[i].2[d];
}
}
}
}
direction = new_dir;
// Power iteration: 3 rounds on the weighted covariance matrix
for _ in 0..3 {
let mut new_dir = vec![0.0f64; dims];
for (_, weight, coords) in &seed_data {
let dev: Vec<f64> = coords.iter().zip(centroid.iter()).map(|(c, m)| c - m).collect();
let dot: f64 = dev.iter().zip(direction.iter()).map(|(d, v)| d * v).sum();
for d in 0..dims {
new_dir[d] += weight * dot * dev[d];
}
}
// Normalize
let norm = new_dir.iter().map(|d| d * d).sum::<f64>().sqrt();
if norm > 1e-12 {
for d in &mut new_dir { *d /= norm; }
}
direction = new_dir;
}
}
let dir_norm = direction.iter().map(|d| d * d).sum::<f64>().sqrt();