From 415180eeab3162ddd2aefcf0fc01c9a1ad99481d Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Sat, 14 Mar 2026 19:36:55 -0400 Subject: [PATCH] evaluate: ask for reasoning in comparisons Chain-of-thought: "say which is better and why" forces clearer judgment and gives us analysis data for improving agents. Co-Authored-By: Kent Overstreet --- poc-memory/src/cli/agent.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/poc-memory/src/cli/agent.rs b/poc-memory/src/cli/agent.rs index 14af955..73fed11 100644 --- a/poc-memory/src/cli/agent.rs +++ b/poc-memory/src/cli/agent.rs @@ -276,7 +276,8 @@ fn llm_compare( {}\n\n\ ## Action A\n## Report output{}\n\n\ ## Action B\n## Report output{}\n\n\ - Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE", + Say which is better and why in 1-2 sentences, then end with:\n\ + BETTER: A or BETTER: B or BETTER: TIE", a.0, shared_prompt, report_a, report_b ) } else { @@ -285,7 +286,8 @@ fn llm_compare( for building a useful, well-organized knowledge graph?\n\n\ ## Action A ({} agent)\n{}\n\n\ ## Action B ({} agent)\n{}\n\n\ - Reply with ONLY: BETTER: A or BETTER: B or BETTER: TIE", + Say which is better and why in 1-2 sentences, then end with:\n\ + BETTER: A or BETTER: B or BETTER: TIE", a.0, a.2, b.0, b.2 ) };