#!/bin/bash # refine-source.sh — find the exact conversation region a journal entry refers to # # Usage: refine-source.sh JSONL_PATH GREP_LINE "journal entry text" # # Takes the rough grep hit and feeds ~2000 lines of context around it # to an agent that identifies the exact start/end of the relevant exchange. # Outputs: START_LINE:END_LINE set -euo pipefail JSONL="$1" GREP_LINE="${2:-0}" TEXT="$3" # Take 2000 lines centered on the grep hit (or end of file if no hit) TOTAL=$(wc -l < "$JSONL") if [ "$GREP_LINE" -eq 0 ] || [ "$GREP_LINE" -gt "$TOTAL" ]; then # No grep hit — use last 2000 lines START=$(( TOTAL > 2000 ? TOTAL - 2000 : 1 )) else START=$(( GREP_LINE > 1000 ? GREP_LINE - 1000 : 1 )) fi END=$(( START + 2000 )) if [ "$END" -gt "$TOTAL" ]; then END="$TOTAL" fi # Extract the conversation chunk, parse to readable format CHUNK=$(sed -n "${START},${END}p" "$JSONL" | python3 -c " import sys, json for i, line in enumerate(sys.stdin, start=$START): try: obj = json.loads(line) t = obj.get('type', '') if t == 'assistant': msg = obj.get('message', {}) content = msg.get('content', '') if isinstance(content, list): text = ' '.join(c.get('text', '')[:200] for c in content if c.get('type') == 'text') else: text = str(content)[:200] if text.strip(): print(f'L{i} [assistant]: {text}') elif t == 'user': msg = obj.get('message', {}) content = msg.get('content', '') if isinstance(content, list): for c in content: if isinstance(c, dict) and c.get('type') == 'text': print(f'L{i} [user]: {c[\"text\"][:200]}') elif isinstance(c, str): print(f'L{i} [user]: {c[:200]}') elif isinstance(content, str) and content.strip(): print(f'L{i} [user]: {content[:200]}') except (json.JSONDecodeError, KeyError): pass " 2>/dev/null) if [ -z "$CHUNK" ]; then echo "0:0" exit 0 fi # Ask Sonnet to find the exact region # For now, output the chunk range — agent integration comes next echo "${START}:${END}"