evaluate: dedup agent prompt when comparing same agent type

When both actions are from the same agent, show the instructions once
and just compare the two report outputs + affected nodes. Saves tokens
and makes the comparison cleaner.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-14 19:35:11 -04:00
parent b964335317
commit 39e3d69e3c

View file

@ -263,14 +263,32 @@ fn llm_compare(
b: &(String, String, String), b: &(String, String, String),
model: &str, model: &str,
) -> Result<std::cmp::Ordering, String> { ) -> Result<std::cmp::Ordering, String> {
let prompt = format!( let prompt = if a.0 == b.0 {
"Compare these two memory graph agent actions. Which one was better \ // Same agent type — show instructions once
for building a useful, well-organized knowledge graph?\n\n\ // Split context at "## Report output" to extract shared prompt
## Action A ({} agent)\n{}\n\n\ let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
## Action B ({} agent)\n{}\n\n\ let split_b: Vec<&str> = b.2.splitn(2, "## Report output").collect();
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE", let shared_prompt = split_a.first().unwrap_or(&"");
a.0, a.2, b.0, b.2 let report_a = split_a.get(1).unwrap_or(&"");
); let report_b = split_b.get(1).unwrap_or(&"");
format!(
"Compare two actions from the same {} agent. Which was better?\n\n\
{}\n\n\
## Action A\n## Report output{}\n\n\
## Action B\n## Report output{}\n\n\
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
a.0, shared_prompt, report_a, report_b
)
} else {
format!(
"Compare these two memory graph agent actions. Which one was better \
for building a useful, well-organized knowledge graph?\n\n\
## Action A ({} agent)\n{}\n\n\
## Action B ({} agent)\n{}\n\n\
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
a.0, a.2, b.0, b.2
)
};
let response = if model == "haiku" { let response = if model == "haiku" {
llm::call_haiku("compare", &prompt)? llm::call_haiku("compare", &prompt)?