evaluate: dedup agent prompt when comparing same agent type

When both actions are from the same agent, show the instructions once and just compare the two report outputs + affected nodes. Saves tokens and makes the comparison cleaner. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
2026-03-14 19:35:11 -04:00 · 2026-03-14 19:35:11 -04:00 · 39e3d69e3c
commit 39e3d69e3c
parent b964335317
1 changed files with 26 additions and 8 deletions
--- a/poc-memory/src/cli/agent.rs
+++ b/poc-memory/src/cli/agent.rs
@ -263,14 +263,32 @@ fn llm_compare(
    b: &(String, String, String),
    model: &str,
 ) -> Result<std::cmp::Ordering, String> {
-    let prompt = format!(
+    let prompt = if a.0 == b.0 {
        // Same agent type — show instructions once
        // Split context at "## Report output" to extract shared prompt
        let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
        let split_b: Vec<&str> = b.2.splitn(2, "## Report output").collect();
        let shared_prompt = split_a.first().unwrap_or(&"");
        let report_a = split_a.get(1).unwrap_or(&"");
        let report_b = split_b.get(1).unwrap_or(&"");
        format!(
            "Compare two actions from the same {} agent. Which was better?\n\n\
             {}\n\n\
             ## Action A\n## Report output{}\n\n\
             ## Action B\n## Report output{}\n\n\
             Reply with ONLY: BETTER: A  or  BETTER: B  or  BETTER: TIE",
            a.0, shared_prompt, report_a, report_b
        )
    } else {
        format!(
            "Compare these two memory graph agent actions. Which one was better \
             for building a useful, well-organized knowledge graph?\n\n\
             ## Action A ({} agent)\n{}\n\n\
             ## Action B ({} agent)\n{}\n\n\
             Reply with ONLY: BETTER: A  or  BETTER: B  or  BETTER: TIE",
            a.0, a.2, b.0, b.2
-    );
+        )
    };
    let response = if model == "haiku" {
        llm::call_haiku("compare", &prompt)?