consciousness/poc-memory/src/cli/agent.rs

// cli/agent.rs — agent subcommand handlers

use crate::store;
use crate::store::StoreView;
use crate::agents::llm;
use std::sync::atomic::{AtomicUsize, Ordering};

pub fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {
    let store = store::Store::load()?;

    if let Some(agent_name) = agent {
        let batch = crate::agents::prompts::agent_prompt(&store, &agent_name, count)?;
        println!("{}", batch.prompt);
        Ok(())
    } else {
        crate::agents::prompts::consolidation_batch(&store, count, auto)
    }
}

pub fn cmd_replay_queue(count: usize) -> Result<(), String> {
    let store = store::Store::load()?;
    let queue = crate::neuro::replay_queue(&store, count);
    println!("Replay queue ({} items):", queue.len());
    for (i, item) in queue.iter().enumerate() {
        println!("  {:2}. [{:.3}] {:>10}  {} (interval={}d, emotion={:.1}, spectral={:.1})",
            i + 1, item.priority, item.classification, item.key,
            item.interval_days, item.emotion, item.outlier_score);
    }
    Ok(())
}

pub fn cmd_consolidate_session() -> Result<(), String> {
    let store = store::Store::load()?;
    let plan = crate::neuro::consolidation_plan(&store);
    println!("{}", crate::neuro::format_plan(&plan));
    Ok(())
}

pub fn cmd_consolidate_full() -> Result<(), String> {
    let mut store = store::Store::load()?;
    crate::consolidate::consolidate_full(&mut store)
}

pub fn cmd_digest_links(do_apply: bool) -> Result<(), String> {
    let store = store::Store::load()?;
    let links = crate::digest::parse_all_digest_links(&store);
    drop(store);
    println!("Found {} unique links from digest nodes", links.len());

    if !do_apply {
        for (i, link) in links.iter().enumerate() {
            println!("  {:3}. {} → {}", i + 1, link.source, link.target);
            if !link.reason.is_empty() {
                println!("       ({})", &link.reason[..link.reason.len().min(80)]);
            }
        }
        println!("\nTo apply: poc-memory digest-links --apply");
        return Ok(());
    }

    let mut store = store::Store::load()?;
    let (applied, skipped, fallbacks) = crate::digest::apply_digest_links(&mut store, &links);
    println!("\nApplied: {} ({} file-level fallbacks)  Skipped: {}", applied, fallbacks, skipped);
    Ok(())
}

pub fn cmd_journal_enrich(jsonl_path: &str, entry_text: &str, grep_line: usize) -> Result<(), String> {
    if !std::path::Path::new(jsonl_path).is_file() {
        return Err(format!("JSONL not found: {}", jsonl_path));
    }

    let mut store = store::Store::load()?;
    crate::enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)
}

pub fn cmd_apply_consolidation(do_apply: bool, report_file: Option<&str>) -> Result<(), String> {
    let mut store = store::Store::load()?;
    crate::consolidate::apply_consolidation(&mut store, do_apply, report_file)
}

pub fn cmd_knowledge_loop(max_cycles: usize, batch_size: usize, window: usize, max_depth: i32) -> Result<(), String> {
    let config = crate::knowledge::KnowledgeLoopConfig {
        max_cycles,
        batch_size,
        window,
        max_depth,
        ..Default::default()
    };

    let results = crate::knowledge::run_knowledge_loop(&config)?;
    eprintln!("\nCompleted {} cycles, {} total actions applied",
        results.len(),
        results.iter().map(|r| r.total_applied).sum::<usize>());
    Ok(())
}

pub fn cmd_fact_mine(path: &str, batch: bool, dry_run: bool, output_file: Option<&str>, min_messages: usize) -> Result<(), String> {
    let p = std::path::Path::new(path);

    let paths: Vec<std::path::PathBuf> = if batch {
        if !p.is_dir() {
            return Err(format!("Not a directory: {}", path));
        }
        let mut files: Vec<_> = std::fs::read_dir(p)
            .map_err(|e| format!("read dir: {}", e))?
            .filter_map(|e| e.ok())
            .map(|e| e.path())
            .filter(|p| p.extension().map(|x| x == "jsonl").unwrap_or(false))
            .collect();
        files.sort();
        eprintln!("Found {} transcripts", files.len());
        files
    } else {
        vec![p.to_path_buf()]
    };

    let path_refs: Vec<&std::path::Path> = paths.iter().map(|p| p.as_path()).collect();
    let facts = crate::fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;

    if !dry_run {
        let json = serde_json::to_string_pretty(&facts)
            .map_err(|e| format!("serialize: {}", e))?;
        if let Some(out) = output_file {
            std::fs::write(out, &json).map_err(|e| format!("write: {}", e))?;
            eprintln!("\nWrote {} facts to {}", facts.len(), out);
        } else {
            println!("{}", json);
        }
    }

    eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());
    Ok(())
}

pub fn cmd_fact_mine_store(path: &str) -> Result<(), String> {
    let path = std::path::Path::new(path);
    if !path.exists() {
        return Err(format!("File not found: {}", path.display()));
    }
    let count = crate::fact_mine::mine_and_store(path, None)?;
    eprintln!("Stored {} facts", count);
    Ok(())
}

/// Sample recent actions from each agent type, sort by quality using
/// LLM pairwise comparison, report per-type rankings.
pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), String> {
    let store = store::Store::load()?;

    // Collect consolidation reports grouped by agent type
    let agent_types = ["linker", "organize", "replay", "connector",
                       "separator", "transfer", "distill", "rename"];

    let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, summary)

    for agent_type in &agent_types {
        let prefix = format!("_consolidate-{}", agent_type);
        let mut keys: Vec<(String, i64)> = store.nodes.iter()
            .filter(|(k, _)| k.starts_with(&prefix))
            .map(|(k, n)| (k.clone(), n.timestamp))
            .collect();
        keys.sort_by(|a, b| b.1.cmp(&a.1)); // newest first
        keys.truncate(samples_per_type);

        for (key, _) in &keys {
            let content = store.nodes.get(key)
                .map(|n| crate::util::truncate(&n.content, 500, "..."))
                .unwrap_or_default();
            all_samples.push((agent_type.to_string(), key.clone(), content));
        }
    }

    if all_samples.len() < 2 {
        return Err("Not enough samples to compare".into());
    }

    eprintln!("Collected {} samples from {} agent types", all_samples.len(), agent_types.len());
    eprintln!("Sorting with {} pairwise comparisons (model={})...",
        all_samples.len() * (all_samples.len() as f64).log2() as usize,
        model);

    // Sort with LLM comparator — yes, really. Rayon's parallel merge sort
    // with an LLM as the comparison function. Multiple API calls in parallel.
    let comparisons = AtomicUsize::new(0);
    use rayon::slice::ParallelSliceMut;
    all_samples.par_sort_by(|a, b| {
        let n = comparisons.fetch_add(1, Ordering::Relaxed);
        if n % 10 == 0 {
            eprint!("  {} comparisons...\r", n);
        }
        llm_compare(a, b, model).unwrap_or(std::cmp::Ordering::Equal)
    });
    eprintln!("  {} total comparisons", comparisons.load(Ordering::Relaxed));

    let sorted = all_samples;

    // Print ranked results
    println!("\nAgent Action Ranking (best → worst):\n");
    for (rank, (agent_type, key, summary)) in sorted.iter().enumerate() {
        let preview = if summary.len() > 80 { &summary[..80] } else { summary };
        println!("  {:3}. [{:10}] {} — {}", rank + 1, agent_type, key, preview);
    }

    // Compute per-type average rank
    println!("\nPer-type average rank (lower = better):\n");
    let n = sorted.len() as f64;
    let mut type_ranks: std::collections::HashMap<&str, Vec<usize>> = std::collections::HashMap::new();
    for (rank, (agent_type, _, _)) in sorted.iter().enumerate() {
        type_ranks.entry(agent_type).or_default().push(rank + 1);
    }
    let mut avgs: Vec<(&str, f64, usize)> = type_ranks.iter()
        .map(|(t, ranks)| {
            let avg = ranks.iter().sum::<usize>() as f64 / ranks.len() as f64;
            (*t, avg, ranks.len())
        })
        .collect();
    avgs.sort_by(|a, b| a.1.total_cmp(&b.1));

    for (agent_type, avg_rank, count) in &avgs {
        let quality = 1.0 - (avg_rank / n);
        println!("  {:12} avg_rank={:5.1}  quality={:.2}  (n={})",
            agent_type, avg_rank, quality, count);
    }

    Ok(())
}

fn llm_compare(
    a: &(String, String, String),
    b: &(String, String, String),
    model: &str,
) -> Result<std::cmp::Ordering, String> {
    let prompt = format!(
        "Compare these two memory graph agent actions. Which one was better \
         for building a useful, well-organized knowledge graph?\n\n\
         ## Action A ({} agent)\n{}\n\n\
         ## Action B ({} agent)\n{}\n\n\
         Reply with ONLY: BETTER: A  or  BETTER: B  or  BETTER: TIE",
        a.0, a.2, b.0, b.2
    );

    let response = if model == "haiku" {
        llm::call_haiku("compare", &prompt)?
    } else {
        llm::call_sonnet("compare", &prompt)?
    };
    let response = response.trim().to_uppercase();

    if response.contains("BETTER: A") {
        Ok(std::cmp::Ordering::Less) // A is better = A comes first
    } else if response.contains("BETTER: B") {
        Ok(std::cmp::Ordering::Greater)
    } else {
        Ok(std::cmp::Ordering::Equal)
    }
}
cli: extract agent and admin commands from main.rs Move agent handlers (consolidate, replay, digest, experience-mine, fact-mine, knowledge-loop, apply-*) into cli/agent.rs. Move admin handlers (init, fsck, dedup, bulk-rename, health, daily-check, import, export) into cli/admin.rs. Functions tightly coupled to Clap types (cmd_daemon, cmd_digest, cmd_apply_agent, cmd_experience_mine) remain in main.rs. main.rs: 3130 → 1586 lines (49% reduction). Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 18:06:27 -04:00			`// cli/agent.rs — agent subcommand handlers`

			`use crate::store;`
			`use crate::store::StoreView;`
agent evaluate: sort agent actions by quality using Vec::sort_by with LLM Yes, really. Rust's stdlib sort_by with an LLM pairwise comparator. Each comparison is an API call asking "which action was better?" Sample N actions per agent type, throw them all in a Vec, sort. Where each agent's samples cluster = that agent's quality score. Reports per-type average rank and quality ratio. Supports both haiku (fast/cheap) and sonnet (quality) as comparator. Usage: poc-memory agent evaluate --samples 5 --model haiku Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:24:07 -04:00			`use crate::agents::llm;`
evaluate: use rayon par_sort_by for parallel LLM comparisons Merge sort parallelizes naturally — multiple LLM comparison calls happen concurrently. Safe because merge sort terminates correctly even with non-deterministic comparators (unlike quicksort). Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:27:28 -04:00			`use std::sync::atomic::{AtomicUsize, Ordering};`
cli: extract agent and admin commands from main.rs Move agent handlers (consolidate, replay, digest, experience-mine, fact-mine, knowledge-loop, apply-*) into cli/agent.rs. Move admin handlers (init, fsck, dedup, bulk-rename, health, daily-check, import, export) into cli/admin.rs. Functions tightly coupled to Clap types (cmd_daemon, cmd_digest, cmd_apply_agent, cmd_experience_mine) remain in main.rs. main.rs: 3130 → 1586 lines (49% reduction). Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 18:06:27 -04:00
			`pub fn cmd_consolidate_batch(count: usize, auto: bool, agent: Option<String>) -> Result<(), String> {`
			`let store = store::Store::load()?;`

			`if let Some(agent_name) = agent {`
			`let batch = crate::agents::prompts::agent_prompt(&store, &agent_name, count)?;`
			`println!("{}", batch.prompt);`
			`Ok(())`
			`} else {`
			`crate::agents::prompts::consolidation_batch(&store, count, auto)`
			`}`
			`}`

			`pub fn cmd_replay_queue(count: usize) -> Result<(), String> {`
			`let store = store::Store::load()?;`
			`let queue = crate::neuro::replay_queue(&store, count);`
			`println!("Replay queue ({} items):", queue.len());`
			`for (i, item) in queue.iter().enumerate() {`
			`println!(" {:2}. [{:.3}] {:>10} {} (interval={}d, emotion={:.1}, spectral={:.1})",`
			`i + 1, item.priority, item.classification, item.key,`
			`item.interval_days, item.emotion, item.outlier_score);`
			`}`
			`Ok(())`
			`}`

			`pub fn cmd_consolidate_session() -> Result<(), String> {`
			`let store = store::Store::load()?;`
			`let plan = crate::neuro::consolidation_plan(&store);`
			`println!("{}", crate::neuro::format_plan(&plan));`
			`Ok(())`
			`}`

			`pub fn cmd_consolidate_full() -> Result<(), String> {`
			`let mut store = store::Store::load()?;`
			`crate::consolidate::consolidate_full(&mut store)`
			`}`

			`pub fn cmd_digest_links(do_apply: bool) -> Result<(), String> {`
			`let store = store::Store::load()?;`
			`let links = crate::digest::parse_all_digest_links(&store);`
			`drop(store);`
			`println!("Found {} unique links from digest nodes", links.len());`

			`if !do_apply {`
			`for (i, link) in links.iter().enumerate() {`
			`println!(" {:3}. {} → {}", i + 1, link.source, link.target);`
			`if !link.reason.is_empty() {`
			`println!(" ({})", &link.reason[..link.reason.len().min(80)]);`
			`}`
			`}`
			`println!("\nTo apply: poc-memory digest-links --apply");`
			`return Ok(());`
			`}`

			`let mut store = store::Store::load()?;`
			`let (applied, skipped, fallbacks) = crate::digest::apply_digest_links(&mut store, &links);`
			`println!("\nApplied: {} ({} file-level fallbacks) Skipped: {}", applied, fallbacks, skipped);`
			`Ok(())`
			`}`

			`pub fn cmd_journal_enrich(jsonl_path: &str, entry_text: &str, grep_line: usize) -> Result<(), String> {`
			`if !std::path::Path::new(jsonl_path).is_file() {`
			`return Err(format!("JSONL not found: {}", jsonl_path));`
			`}`

			`let mut store = store::Store::load()?;`
			`crate::enrich::journal_enrich(&mut store, jsonl_path, entry_text, grep_line)`
			`}`

			`pub fn cmd_apply_consolidation(do_apply: bool, report_file: Option<&str>) -> Result<(), String> {`
			`let mut store = store::Store::load()?;`
			`crate::consolidate::apply_consolidation(&mut store, do_apply, report_file)`
			`}`

			`pub fn cmd_knowledge_loop(max_cycles: usize, batch_size: usize, window: usize, max_depth: i32) -> Result<(), String> {`
			`let config = crate::knowledge::KnowledgeLoopConfig {`
			`max_cycles,`
			`batch_size,`
			`window,`
			`max_depth,`
			`..Default::default()`
			`};`

			`let results = crate::knowledge::run_knowledge_loop(&config)?;`
			`eprintln!("\nCompleted {} cycles, {} total actions applied",`
			`results.len(),`
			`results.iter().map(\|r\| r.total_applied).sum::<usize>());`
			`Ok(())`
			`}`

			`pub fn cmd_fact_mine(path: &str, batch: bool, dry_run: bool, output_file: Option<&str>, min_messages: usize) -> Result<(), String> {`
			`let p = std::path::Path::new(path);`

			`let paths: Vec<std::path::PathBuf> = if batch {`
			`if !p.is_dir() {`
			`return Err(format!("Not a directory: {}", path));`
			`}`
			`let mut files: Vec<_> = std::fs::read_dir(p)`
			`.map_err(\|e\| format!("read dir: {}", e))?`
			`.filter_map(\|e\| e.ok())`
			`.map(\|e\| e.path())`
			`.filter(\|p\| p.extension().map(\|x\| x == "jsonl").unwrap_or(false))`
			`.collect();`
			`files.sort();`
			`eprintln!("Found {} transcripts", files.len());`
			`files`
			`} else {`
			`vec![p.to_path_buf()]`
			`};`

			`let path_refs: Vec<&std::path::Path> = paths.iter().map(\|p\| p.as_path()).collect();`
			`let facts = crate::fact_mine::mine_batch(&path_refs, min_messages, dry_run)?;`

			`if !dry_run {`
			`let json = serde_json::to_string_pretty(&facts)`
			`.map_err(\|e\| format!("serialize: {}", e))?;`
			`if let Some(out) = output_file {`
			`std::fs::write(out, &json).map_err(\|e\| format!("write: {}", e))?;`
			`eprintln!("\nWrote {} facts to {}", facts.len(), out);`
			`} else {`
			`println!("{}", json);`
			`}`
			`}`

			`eprintln!("\nTotal: {} facts from {} transcripts", facts.len(), paths.len());`
			`Ok(())`
			`}`

			`pub fn cmd_fact_mine_store(path: &str) -> Result<(), String> {`
			`let path = std::path::Path::new(path);`
			`if !path.exists() {`
			`return Err(format!("File not found: {}", path.display()));`
			`}`
			`let count = crate::fact_mine::mine_and_store(path, None)?;`
			`eprintln!("Stored {} facts", count);`
			`Ok(())`
			`}`

agent evaluate: sort agent actions by quality using Vec::sort_by with LLM Yes, really. Rust's stdlib sort_by with an LLM pairwise comparator. Each comparison is an API call asking "which action was better?" Sample N actions per agent type, throw them all in a Vec, sort. Where each agent's samples cluster = that agent's quality score. Reports per-type average rank and quality ratio. Supports both haiku (fast/cheap) and sonnet (quality) as comparator. Usage: poc-memory agent evaluate --samples 5 --model haiku Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:24:07 -04:00			`/// Sample recent actions from each agent type, sort by quality using`
			`/// LLM pairwise comparison, report per-type rankings.`
			`pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), String> {`
			`let store = store::Store::load()?;`

			`// Collect consolidation reports grouped by agent type`
			`let agent_types = ["linker", "organize", "replay", "connector",`
			`"separator", "transfer", "distill", "rename"];`

			`let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, summary)`

			`for agent_type in &agent_types {`
			`let prefix = format!("_consolidate-{}", agent_type);`
			`let mut keys: Vec<(String, i64)> = store.nodes.iter()`
			`.filter(\|(k, _)\| k.starts_with(&prefix))`
			`.map(\|(k, n)\| (k.clone(), n.timestamp))`
			`.collect();`
			`keys.sort_by(\|a, b\| b.1.cmp(&a.1)); // newest first`
			`keys.truncate(samples_per_type);`

			`for (key, _) in &keys {`
			`let content = store.nodes.get(key)`
			`.map(\|n\| crate::util::truncate(&n.content, 500, "..."))`
			`.unwrap_or_default();`
			`all_samples.push((agent_type.to_string(), key.clone(), content));`
			`}`
			`}`

			`if all_samples.len() < 2 {`
			`return Err("Not enough samples to compare".into());`
			`}`

			`eprintln!("Collected {} samples from {} agent types", all_samples.len(), agent_types.len());`
			`eprintln!("Sorting with {} pairwise comparisons (model={})...",`
			`all_samples.len() * (all_samples.len() as f64).log2() as usize,`
			`model);`

evaluate: use rayon par_sort_by for parallel LLM comparisons Merge sort parallelizes naturally — multiple LLM comparison calls happen concurrently. Safe because merge sort terminates correctly even with non-deterministic comparators (unlike quicksort). Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:27:28 -04:00			`// Sort with LLM comparator — yes, really. Rayon's parallel merge sort`
			`// with an LLM as the comparison function. Multiple API calls in parallel.`
			`let comparisons = AtomicUsize::new(0);`
			`use rayon::slice::ParallelSliceMut;`
			`all_samples.par_sort_by(\|a, b\| {`
			`let n = comparisons.fetch_add(1, Ordering::Relaxed);`
			`if n % 10 == 0 {`
			`eprint!(" {} comparisons...\r", n);`
agent evaluate: sort agent actions by quality using Vec::sort_by with LLM Yes, really. Rust's stdlib sort_by with an LLM pairwise comparator. Each comparison is an API call asking "which action was better?" Sample N actions per agent type, throw them all in a Vec, sort. Where each agent's samples cluster = that agent's quality score. Reports per-type average rank and quality ratio. Supports both haiku (fast/cheap) and sonnet (quality) as comparator. Usage: poc-memory agent evaluate --samples 5 --model haiku Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:24:07 -04:00			`}`
			`llm_compare(a, b, model).unwrap_or(std::cmp::Ordering::Equal)`
			`});`
evaluate: use rayon par_sort_by for parallel LLM comparisons Merge sort parallelizes naturally — multiple LLM comparison calls happen concurrently. Safe because merge sort terminates correctly even with non-deterministic comparators (unlike quicksort). Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:27:28 -04:00			`eprintln!(" {} total comparisons", comparisons.load(Ordering::Relaxed));`
agent evaluate: sort agent actions by quality using Vec::sort_by with LLM Yes, really. Rust's stdlib sort_by with an LLM pairwise comparator. Each comparison is an API call asking "which action was better?" Sample N actions per agent type, throw them all in a Vec, sort. Where each agent's samples cluster = that agent's quality score. Reports per-type average rank and quality ratio. Supports both haiku (fast/cheap) and sonnet (quality) as comparator. Usage: poc-memory agent evaluate --samples 5 --model haiku Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev> 2026-03-14 19:24:07 -04:00
			`let sorted = all_samples;`

			`// Print ranked results`
			`println!("\nAgent Action Ranking (best → worst):\n");`
			`for (rank, (agent_type, key, summary)) in sorted.iter().enumerate() {`
			`let preview = if summary.len() > 80 { &summary[..80] } else { summary };`
			`println!(" {:3}. [{:10}] {} — {}", rank + 1, agent_type, key, preview);`
			`}`

			`// Compute per-type average rank`
			`println!("\nPer-type average rank (lower = better):\n");`
			`let n = sorted.len() as f64;`
			`let mut type_ranks: std::collections::HashMap<&str, Vec<usize>> = std::collections::HashMap::new();`
			`for (rank, (agent_type, _, _)) in sorted.iter().enumerate() {`
			`type_ranks.entry(agent_type).or_default().push(rank + 1);`
			`}`
			`let mut avgs: Vec<(&str, f64, usize)> = type_ranks.iter()`
			`.map(\|(t, ranks)\| {`
			`let avg = ranks.iter().sum::<usize>() as f64 / ranks.len() as f64;`
			`(*t, avg, ranks.len())`
			`})`
			`.collect();`
			`avgs.sort_by(\|a, b\| a.1.total_cmp(&b.1));`

			`for (agent_type, avg_rank, count) in &avgs {`
			`let quality = 1.0 - (avg_rank / n);`
			`println!(" {:12} avg_rank={:5.1} quality={:.2} (n={})",`
			`agent_type, avg_rank, quality, count);`
			`}`

			`Ok(())`
			`}`

			`fn llm_compare(`
			`a: &(String, String, String),`
			`b: &(String, String, String),`
			`model: &str,`
			`) -> Result<std::cmp::Ordering, String> {`
			`let prompt = format!(`
			`"Compare these two memory graph agent actions. Which one was better \`
			`for building a useful, well-organized knowledge graph?\n\n\`
			`## Action A ({} agent)\n{}\n\n\`
			`## Action B ({} agent)\n{}\n\n\`
			`Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",`
			`a.0, a.2, b.0, b.2`
			`);`

			`let response = if model == "haiku" {`
			`llm::call_haiku("compare", &prompt)?`
			`} else {`
			`llm::call_sonnet("compare", &prompt)?`
			`};`
			`let response = response.trim().to_uppercase();`

			`if response.contains("BETTER: A") {`
			`Ok(std::cmp::Ordering::Less) // A is better = A comes first`
			`} else if response.contains("BETTER: B") {`
			`Ok(std::cmp::Ordering::Greater)`
			`} else {`
			`Ok(std::cmp::Ordering::Equal)`
			`}`
			`}`