From a9b0438c74b128e1a57244c78fa80a16990fa7a8 Mon Sep 17 00:00:00 2001
From: ProofOfConcept <poc@bcachefs.org>
Date: Thu, 5 Mar 2026 22:56:16 -0500
Subject: [PATCH] daemon: configurable LLM concurrency

New config field "llm_concurrency" (default 1) controls how many
concurrent model calls the daemon runs. Worker pool scales to match.
---
 src/config.rs |  6 ++++++
 src/daemon.rs | 11 +++++------
 2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/config.rs b/src/config.rs
index 7f8a3df..1bb7853 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -49,6 +49,8 @@ pub struct Config {
     pub journal_max: usize,
     /// Ordered context groups for session-start loading.
     pub context_groups: Vec<ContextGroup>,
+    /// Max concurrent LLM calls in the daemon.
+    pub llm_concurrency: usize,
     /// Separate Claude config dir for background agent work (daemon jobs).
     /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
     /// with different OAuth credentials than the interactive session.
@@ -73,6 +75,7 @@ impl Default for Config {
                     source: ContextSource::Store,
                 },
             ],
+            llm_concurrency: 1,
             agent_config_dir: None,
         }
     }
@@ -127,6 +130,9 @@ impl Config {
                 if let Some(m) = cfg.get("journal_max").and_then(|v| v.as_u64()) {
                     config.journal_max = m as usize;
                 }
+                if let Some(n) = cfg.get("llm_concurrency").and_then(|v| v.as_u64()) {
+                    config.llm_concurrency = n.max(1) as usize;
+                }
                 if let Some(s) = cfg.get("agent_config_dir").and_then(|v| v.as_str()) {
                     config.agent_config_dir = Some(expand_home(s));
                 }
diff --git a/src/daemon.rs b/src/daemon.rs
index 4c1966e..8c35ab8 100644
--- a/src/daemon.rs
+++ b/src/daemon.rs
@@ -283,14 +283,13 @@ struct DaemonStatus {
 
 pub fn run_daemon() -> Result<(), String> {
     let choir = Choir::new();
-    // Workers: 2 for long-running loops (scheduler, session-watcher),
-    // plus 1 for the actual LLM job (pool capacity is 1).
-    // Non-LLM jobs (decay, health) also need a worker, so 4 total.
-    let names: Vec<String> = (0..4).map(|i| format!("w{}", i)).collect();
+    let llm_concurrency = crate::config::get().llm_concurrency;
+    // Workers: 2 for long-running loops + llm_concurrency + 1 for non-LLM jobs
+    let n_workers = llm_concurrency + 3;
+    let names: Vec<String> = (0..n_workers).map(|i| format!("w{}", i)).collect();
     let _workers: Vec<_> = names.iter().map(|n| choir.add_worker(n)).collect();
 
-    // LLM API: 1 concurrent call to control token burn rate
-    let llm = ResourcePool::new("llm", 1);
+    let llm = ResourcePool::new("llm", llm_concurrency);
     llm.bind(&choir);
 
     // Recover last_daily from previous status file