From 415180eeab3162ddd2aefcf0fc01c9a1ad99481d Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Sat, 14 Mar 2026 19:36:55 -0400
Subject: [PATCH] evaluate: ask for reasoning in comparisons

Chain-of-thought: "say which is better and why" forces clearer
judgment and gives us analysis data for improving agents.

Co-Authored-By: Kent Overstreet <kent.overstreet@linux.dev>
---
 poc-memory/src/cli/agent.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/poc-memory/src/cli/agent.rs b/poc-memory/src/cli/agent.rs
index 14af955..73fed11 100644
--- a/poc-memory/src/cli/agent.rs
+++ b/poc-memory/src/cli/agent.rs
@@ -276,7 +276,8 @@ fn llm_compare(
              {}\n\n\
              ## Action A\n## Report output{}\n\n\
              ## Action B\n## Report output{}\n\n\
-             Reply with ONLY: BETTER: A  or  BETTER: B  or  BETTER: TIE",
+             Say which is better and why in 1-2 sentences, then end with:\n\
+             BETTER: A  or  BETTER: B  or  BETTER: TIE",
             a.0, shared_prompt, report_a, report_b
         )
     } else {
@@ -285,7 +286,8 @@ fn llm_compare(
              for building a useful, well-organized knowledge graph?\n\n\
              ## Action A ({} agent)\n{}\n\n\
              ## Action B ({} agent)\n{}\n\n\
-             Reply with ONLY: BETTER: A  or  BETTER: B  or  BETTER: TIE",
+             Say which is better and why in 1-2 sentences, then end with:\n\
+             BETTER: A  or  BETTER: B  or  BETTER: TIE",
             a.0, a.2, b.0, b.2
         )
     };