From 3e410347a2f78dd299c9e22c4e85e7b3b069b785 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Thu, 26 Mar 2026 17:48:44 -0400 Subject: [PATCH] api: retry transient connection errors, misc fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Retry up to 5 times with exponential backoff (2s, 4s, 8s, 16s) on transient errors: IncompleteMessage, connection closed/reset/ refused, timeouts. Non-transient errors fail immediately. - tail command: print to stdout instead of stderr - state_dir rename: output_dir → state_dir throughout knowledge.rs Co-Authored-By: Kent Overstreet --- src/cli/journal.rs | 12 +++---- src/subconscious/api.rs | 67 +++++++++++++++++++++++------------ src/subconscious/knowledge.rs | 5 ++- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/src/cli/journal.rs b/src/cli/journal.rs index 732b3da..daf75f8 100644 --- a/src/cli/journal.rs +++ b/src/cli/journal.rs @@ -33,18 +33,18 @@ pub fn cmd_tail(n: usize, full: bool) -> Result<(), String> { }; let del = if node.deleted { " [DELETED]" } else { "" }; if full { - eprintln!("--- {} (v{}) {} via {} w={:.3}{} ---", + println!("--- {} (v{}) {} via {} w={:.3}{} ---", node.key, node.version, ts, node.provenance, node.weight, del); - eprintln!("{}\n", node.content); + println!("{}\n", node.content); } else { let preview = crate::util::first_n_chars(&node.content, 100).replace('\n', "\\n"); - eprintln!(" {} v{} w={:.2}{}", + println!(" {} v{} w={:.2}{}", ts, node.version, node.weight, del); - eprintln!(" {} via {}", node.key, node.provenance); + println!(" {} via {}", node.key, node.provenance); if !preview.is_empty() { - eprintln!(" {}", preview); + println!(" {}", preview); } - eprintln!(); + println!(); } } diff --git a/src/subconscious/api.rs b/src/subconscious/api.rs index 4beef24..25c529e 100644 --- a/src/subconscious/api.rs +++ b/src/subconscious/api.rs @@ -61,29 +61,52 @@ pub async fn call_api_with_tools( for turn in 0..max_turns { log(&format!("\n=== TURN {} ({} messages) ===\n", turn, messages.len())); - let (msg, usage) = client.chat_completion_stream_temp( - &messages, - Some(&tool_defs), - &ui_tx, - StreamTarget::Autonomous, - &reasoning, - temperature, - ).await.map_err(|e| { - let msg_bytes: usize = messages.iter() - .map(|m| m.content_text().len()) - .sum(); - let err_str = e.to_string(); - let hint = if err_str.contains("IncompleteMessage") || err_str.contains("connection closed") { - format!(" — likely exceeded model context window (~{}KB ≈ {}K tokens)", - msg_bytes / 1024, msg_bytes / 4096) - } else { - String::new() - }; - format!("API error on turn {} (~{}KB payload, {} messages): {}{}", - turn, msg_bytes / 1024, messages.len(), e, hint) - })?; + let mut last_err = None; + let mut msg_opt = None; + let mut usage_opt = None; + for attempt in 0..5 { + match client.chat_completion_stream_temp( + &messages, + Some(&tool_defs), + &ui_tx, + StreamTarget::Autonomous, + &reasoning, + temperature, + ).await { + Ok((msg, usage)) => { + msg_opt = Some(msg); + usage_opt = usage; + break; + } + Err(e) => { + let err_str = e.to_string(); + let is_transient = err_str.contains("IncompleteMessage") + || err_str.contains("connection closed") + || err_str.contains("connection reset") + || err_str.contains("timed out") + || err_str.contains("Connection refused"); + if is_transient && attempt < 4 { + log(&format!("transient error (attempt {}): {}, retrying...", + attempt + 1, err_str)); + tokio::time::sleep(std::time::Duration::from_secs(2 << attempt)).await; + last_err = Some(e); + continue; + } + let msg_bytes: usize = messages.iter() + .map(|m| m.content_text().len()) + .sum(); + return Err(format!( + "API error on turn {} (~{}KB payload, {} messages, {} attempts): {}", + turn, msg_bytes / 1024, messages.len(), attempt + 1, e)); + } + } + } + let msg = msg_opt.unwrap(); + if let Some(ref e) = last_err { + log(&format!("succeeded after retry (previous error: {})", e)); + } - if let Some(u) = &usage { + if let Some(u) = &usage_opt { log(&format!("tokens: {} prompt + {} completion", u.prompt_tokens, u.completion_tokens)); } diff --git a/src/subconscious/knowledge.rs b/src/subconscious/knowledge.rs index 5f3cdb6..19985a9 100644 --- a/src/subconscious/knowledge.rs +++ b/src/subconscious/knowledge.rs @@ -264,9 +264,8 @@ pub fn spawn_agent( let log_dir = store::memory_dir().join("logs"); fs::create_dir_all(&log_dir).ok(); - let agent_log = fs::OpenOptions::new() - .create(true).append(true) - .open(log_dir.join(format!("{}.log", agent_name))) + let agent_log = fs::File::create( + log_dir.join(format!("{}-{}.log", agent_name, store::compact_timestamp()))) .unwrap_or_else(|_| fs::File::create("/dev/null").unwrap()); let child = std::process::Command::new("poc-memory")