evaluate: dedup agent prompt when comparing same agent type
When both actions are from the same agent, show the instructions once and just compare the two report outputs + affected nodes. Saves tokens and makes the comparison cleaner. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b964335317
commit
39e3d69e3c
1 changed files with 26 additions and 8 deletions
|
|
@ -263,14 +263,32 @@ fn llm_compare(
|
||||||
b: &(String, String, String),
|
b: &(String, String, String),
|
||||||
model: &str,
|
model: &str,
|
||||||
) -> Result<std::cmp::Ordering, String> {
|
) -> Result<std::cmp::Ordering, String> {
|
||||||
let prompt = format!(
|
let prompt = if a.0 == b.0 {
|
||||||
|
// Same agent type — show instructions once
|
||||||
|
// Split context at "## Report output" to extract shared prompt
|
||||||
|
let split_a: Vec<&str> = a.2.splitn(2, "## Report output").collect();
|
||||||
|
let split_b: Vec<&str> = b.2.splitn(2, "## Report output").collect();
|
||||||
|
let shared_prompt = split_a.first().unwrap_or(&"");
|
||||||
|
let report_a = split_a.get(1).unwrap_or(&"");
|
||||||
|
let report_b = split_b.get(1).unwrap_or(&"");
|
||||||
|
format!(
|
||||||
|
"Compare two actions from the same {} agent. Which was better?\n\n\
|
||||||
|
{}\n\n\
|
||||||
|
## Action A\n## Report output{}\n\n\
|
||||||
|
## Action B\n## Report output{}\n\n\
|
||||||
|
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
|
||||||
|
a.0, shared_prompt, report_a, report_b
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
"Compare these two memory graph agent actions. Which one was better \
|
"Compare these two memory graph agent actions. Which one was better \
|
||||||
for building a useful, well-organized knowledge graph?\n\n\
|
for building a useful, well-organized knowledge graph?\n\n\
|
||||||
## Action A ({} agent)\n{}\n\n\
|
## Action A ({} agent)\n{}\n\n\
|
||||||
## Action B ({} agent)\n{}\n\n\
|
## Action B ({} agent)\n{}\n\n\
|
||||||
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
|
Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE",
|
||||||
a.0, a.2, b.0, b.2
|
a.0, a.2, b.0, b.2
|
||||||
);
|
)
|
||||||
|
};
|
||||||
|
|
||||||
let response = if model == "haiku" {
|
let response = if model == "haiku" {
|
||||||
llm::call_haiku("compare", &prompt)?
|
llm::call_haiku("compare", &prompt)?
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue