diff --git a/poc-memory/src/cli/agent.rs b/poc-memory/src/cli/agent.rs
index 73fed11..7eb2a7c 100644
--- a/poc-memory/src/cli/agent.rs
+++ b/poc-memory/src/cli/agent.rs
@@ -144,7 +144,7 @@ pub fn cmd_fact_mine_store(path: &str) -> Result<(), String> {
 
 /// Sample recent actions from each agent type, sort by quality using
 /// LLM pairwise comparison, report per-type rankings.
-pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), String> {
+pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str, dry_run: bool) -> Result<(), String> {
     let store = store::Store::load()?;
 
     // Collect consolidation reports grouped by agent type
@@ -152,9 +152,10 @@ pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), S
                        "separator", "transfer", "distill", "rename"];
 
     // Load agent prompt files for context
-    let prompts_dir = crate::config::get().data_dir
-        .parent().unwrap_or(std::path::Path::new("."))
-        .join("poc-memory/agents");
+    let prompts_dir = {
+        let repo = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("agents");
+        if repo.is_dir() { repo } else { crate::store::memory_dir().join("agents") }
+    };
 
     let mut all_samples: Vec<(String, String, String)> = Vec::new(); // (agent_type, key, context)
 
@@ -181,9 +182,10 @@ pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), S
 
             // Extract target node keys mentioned in the report and include their content
             let mut target_content = String::new();
+            let mut seen_keys = std::collections::HashSet::new();
             for word in report.split_whitespace() {
                 let clean = word.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_');
-                if clean.len() > 10 && store.nodes.contains_key(clean) {
+                if clean.len() > 10 && seen_keys.insert(clean.to_string()) && store.nodes.contains_key(clean) {
                     if let Some(node) = store.nodes.get(clean) {
                         let preview = crate::util::truncate(&node.content, 200, "...");
                         target_content.push_str(&format!("\n### {}\n{}\n", clean, preview));
@@ -212,6 +214,21 @@ pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), S
         all_samples.len() * (all_samples.len() as f64).log2() as usize,
         model);
 
+    if dry_run {
+        // Show what a comparison looks like without calling the LLM
+        if all_samples.len() >= 2 {
+            let a = &all_samples[0];
+            let b = &all_samples[all_samples.len() - 1];
+            let prompt = build_compare_prompt(a, b);
+            println!("=== DRY RUN: Example comparison prompt ===\n");
+            println!("{}", prompt);
+            println!("\n=== {} samples collected, would do ~{} comparisons ===",
+                all_samples.len(),
+                all_samples.len() * (all_samples.len() as f64).log2() as usize);
+        }
+        return Ok(());
+    }
+
     // Sort with LLM comparator — yes, really. Rayon's parallel merge sort
     // with an LLM as the comparison function. Multiple API calls in parallel.
     let comparisons = AtomicUsize::new(0);
@@ -258,12 +275,11 @@ pub fn cmd_evaluate_agents(samples_per_type: usize, model: &str) -> Result<(), S
     Ok(())
 }
 
-fn llm_compare(
+fn build_compare_prompt(
     a: &(String, String, String),
     b: &(String, String, String),
-    model: &str,
-) -> Result<std::cmp::Ordering, String> {
-    let prompt = if a.0 == b.0 {
+) -> String {
+    if a.0 == b.0 {
         // Same agent type — show instructions once
         // Split context at "## Report output" to extract shared prompt
         let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
@@ -290,7 +306,15 @@ fn llm_compare(
              BETTER: A  or  BETTER: B  or  BETTER: TIE",
             a.0, a.2, b.0, b.2
         )
-    };
+    }
+}
+
+fn llm_compare(
+    a: &(String, String, String),
+    b: &(String, String, String),
+    model: &str,
+) -> Result<std::cmp::Ordering, String> {
+    let prompt = build_compare_prompt(a, b);
 
     let response = if model == "haiku" {
         llm::call_haiku("compare", &prompt)?
diff --git a/poc-memory/src/main.rs b/poc-memory/src/main.rs
index 1b37e4f..c01cfad 100644
--- a/poc-memory/src/main.rs
+++ b/poc-memory/src/main.rs
@@ -574,6 +574,9 @@ enum AgentCmd {
         /// Model to use for comparison (haiku or sonnet)
         #[arg(long, default_value = "haiku")]
         model: String,
+        /// Show example comparison prompt without calling LLM
+        #[arg(long)]
+        dry_run: bool,
     },
 }
 
@@ -795,7 +798,8 @@ fn main() {
                 => cli::agent::cmd_fact_mine(&path, batch, dry_run, output.as_deref(), min_messages),
             AgentCmd::FactMineStore { path } => cli::agent::cmd_fact_mine_store(&path),
             AgentCmd::ReplayQueue { count } => cli::agent::cmd_replay_queue(count),
-            AgentCmd::Evaluate { samples, model } => cli::agent::cmd_evaluate_agents(samples, &model),
+            AgentCmd::Evaluate { samples, model, dry_run }
+                => cli::agent::cmd_evaluate_agents(samples, &model, dry_run),
         },
 
         // Admin