From e2a6bc4c8bc66ff4302b1a14c85f5194ab362c9b Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Sat, 14 Mar 2026 19:48:01 -0400 Subject: [PATCH] evaluate: remove TIE option, force binary judgment TIE causes inconsistency in sort (A=B, B=C but A>C breaks ordering). Force the comparator to always pick a winner. Default to A if response is unparseable. Co-Authored-By: Kent Overstreet --- poc-memory/src/cli/agent.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/poc-memory/src/cli/agent.rs b/poc-memory/src/cli/agent.rs index 7eb2a7c..8ec9dea 100644 --- a/poc-memory/src/cli/agent.rs +++ b/poc-memory/src/cli/agent.rs @@ -293,7 +293,8 @@ fn build_compare_prompt( ## Action A\n## Report output{}\n\n\ ## Action B\n## Report output{}\n\n\ Say which is better and why in 1-2 sentences, then end with:\n\ - BETTER: A or BETTER: B or BETTER: TIE", + BETTER: A or BETTER: B\n\ + You must pick one. No ties.", a.0, shared_prompt, report_a, report_b ) } else { @@ -303,7 +304,8 @@ fn build_compare_prompt( ## Action A ({} agent)\n{}\n\n\ ## Action B ({} agent)\n{}\n\n\ Say which is better and why in 1-2 sentences, then end with:\n\ - BETTER: A or BETTER: B or BETTER: TIE", + BETTER: A or BETTER: B\n\ + You must pick one. No ties.", a.0, a.2, b.0, b.2 ) } @@ -323,12 +325,11 @@ fn llm_compare( }; let response = response.trim().to_uppercase(); - if response.contains("BETTER: A") { - Ok(std::cmp::Ordering::Less) // A is better = A comes first - } else if response.contains("BETTER: B") { + if response.contains("BETTER: B") { Ok(std::cmp::Ordering::Greater) } else { - Ok(std::cmp::Ordering::Equal) + // Default to A (includes "BETTER: A" and any unparseable response) + Ok(std::cmp::Ordering::Less) } }