evaluate: remove TIE option, force binary judgment

TIE causes inconsistency in sort (A=B, B=C but A>C breaks ordering).
Force the comparator to always pick a winner. Default to A if response
is unparseable.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
ProofOfConcept 2026-03-14 19:48:01 -04:00
parent 0cecfdb352
commit e2a6bc4c8b

View file

@ -293,7 +293,8 @@ fn build_compare_prompt(
## Action A\n## Report output{}\n\n\
## Action B\n## Report output{}\n\n\
Say which is better and why in 1-2 sentences, then end with:\n\
BETTER: A or BETTER: B or BETTER: TIE",
BETTER: A or BETTER: B\n\
You must pick one. No ties.",
a.0, shared_prompt, report_a, report_b
)
} else {
@ -303,7 +304,8 @@ fn build_compare_prompt(
## Action A ({} agent)\n{}\n\n\
## Action B ({} agent)\n{}\n\n\
Say which is better and why in 1-2 sentences, then end with:\n\
BETTER: A or BETTER: B or BETTER: TIE",
BETTER: A or BETTER: B\n\
You must pick one. No ties.",
a.0, a.2, b.0, b.2
)
}
@ -323,12 +325,11 @@ fn llm_compare(
};
let response = response.trim().to_uppercase();
if response.contains("BETTER: A") {
Ok(std::cmp::Ordering::Less) // A is better = A comes first
} else if response.contains("BETTER: B") {
if response.contains("BETTER: B") {
Ok(std::cmp::Ordering::Greater)
} else {
Ok(std::cmp::Ordering::Equal)
// Default to A (includes "BETTER: A" and any unparseable response)
Ok(std::cmp::Ordering::Less)
}
}