evaluate: dedup agent prompt when comparing same agent type

When both actions are from the same agent, show the instructions once
and just compare the two report outputs + affected nodes. Saves tokens
and makes the comparison cleaner.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-14 19:35:11 -04:00
parent b964335317
commit 39e3d69e3c

View file

@ -263,14 +263,32 @@ fn llm_compare(
b: &(String, String, String),
model: &str,
) -> Result<std::cmp::Ordering, String> {
let prompt = format!(
"Compare these two memory graph agent actions. Which one was better \
for building a useful, well-organized knowledge graph?\n\n\
## Action A ({} agent)\n{}\n\n\
## Action B ({} agent)\n{}\n\n\
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
a.0, a.2, b.0, b.2
);
let prompt = if a.0 == b.0 {
// Same agent type — show instructions once
// Split context at "## Report output" to extract shared prompt
let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
let split_b: Vec<&str> = b.2.splitn(2, "## Report output").collect();
let shared_prompt = split_a.first().unwrap_or(&"");
let report_a = split_a.get(1).unwrap_or(&"");
let report_b = split_b.get(1).unwrap_or(&"");
format!(
"Compare two actions from the same {} agent. Which was better?\n\n\
{}\n\n\
## Action A\n## Report output{}\n\n\
## Action B\n## Report output{}\n\n\
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
a.0, shared_prompt, report_a, report_b
)
} else {
format!(
"Compare these two memory graph agent actions. Which one was better \
for building a useful, well-organized knowledge graph?\n\n\
## Action A ({} agent)\n{}\n\n\
## Action B ({} agent)\n{}\n\n\
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
a.0, a.2, b.0, b.2
)
};
let response = if model == "haiku" {
llm::call_haiku("compare", &prompt)?