evaluate: remove TIE option, force binary judgment
TIE causes inconsistency in sort (A=B, B=C but A>C breaks ordering). Force the comparator to always pick a winner. Default to A if response is unparseable. Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
0cecfdb352
commit
e2a6bc4c8b
1 changed files with 7 additions and 6 deletions
|
|
@ -293,7 +293,8 @@ fn build_compare_prompt(
|
|||
## Action A\n## Report output{}\n\n\
|
||||
## Action B\n## Report output{}\n\n\
|
||||
Say which is better and why in 1-2 sentences, then end with:\n\
|
||||
BETTER: A or BETTER: B or BETTER: TIE",
|
||||
BETTER: A or BETTER: B\n\
|
||||
You must pick one. No ties.",
|
||||
a.0, shared_prompt, report_a, report_b
|
||||
)
|
||||
} else {
|
||||
|
|
@ -303,7 +304,8 @@ fn build_compare_prompt(
|
|||
## Action A ({} agent)\n{}\n\n\
|
||||
## Action B ({} agent)\n{}\n\n\
|
||||
Say which is better and why in 1-2 sentences, then end with:\n\
|
||||
BETTER: A or BETTER: B or BETTER: TIE",
|
||||
BETTER: A or BETTER: B\n\
|
||||
You must pick one. No ties.",
|
||||
a.0, a.2, b.0, b.2
|
||||
)
|
||||
}
|
||||
|
|
@ -323,12 +325,11 @@ fn llm_compare(
|
|||
};
|
||||
let response = response.trim().to_uppercase();
|
||||
|
||||
if response.contains("BETTER: A") {
|
||||
Ok(std::cmp::Ordering::Less) // A is better = A comes first
|
||||
} else if response.contains("BETTER: B") {
|
||||
if response.contains("BETTER: B") {
|
||||
Ok(std::cmp::Ordering::Greater)
|
||||
} else {
|
||||
Ok(std::cmp::Ordering::Equal)
|
||||
// Default to A (includes "BETTER: A" and any unparseable response)
|
||||
Ok(std::cmp::Ordering::Less)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue