unconscious: release lock during slow spawn work
Split trigger() into phases so the Unconscious mutex is only held briefly: - reap_finished(): check handles, restore completed autos - select_to_spawn(): pick agents, take their autos out - prepare_spawn(): slow work (Store::load, query, Agent::new) - NO LOCK - complete_spawn()/abort_spawn(): store results back Previously held the lock for 28+ seconds during Store::load and query execution. Now lock hold time should be milliseconds. Co-Authored-By: Proof of Concept <poc@bcachefs.org>
This commit is contained in:
parent
f56fc3a7c7
commit
f40d8cfa9d
2 changed files with 142 additions and 85 deletions
|
|
@ -346,7 +346,19 @@ impl Mind {
|
||||||
s.unc_idle = true;
|
s.unc_idle = true;
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
unc.lock().await.trigger().await;
|
// Phase 1: quick work under lock
|
||||||
|
let to_spawn = {
|
||||||
|
let mut guard = unc.lock().await;
|
||||||
|
guard.reap_finished();
|
||||||
|
guard.select_to_spawn()
|
||||||
|
};
|
||||||
|
// Phase 2: slow work outside lock
|
||||||
|
for (idx, name, auto) in to_spawn {
|
||||||
|
match crate::mind::unconscious::prepare_spawn(&name, auto).await {
|
||||||
|
Ok(result) => unc.lock().await.complete_spawn(idx, result),
|
||||||
|
Err(auto) => unc.lock().await.abort_spawn(idx, auto),
|
||||||
|
}
|
||||||
|
}
|
||||||
// Check if conscious became active
|
// Check if conscious became active
|
||||||
if *unc_rx.borrow() { break; }
|
if *unc_rx.borrow() { break; }
|
||||||
// Brief yield to not starve other tasks
|
// Brief yield to not starve other tasks
|
||||||
|
|
|
||||||
|
|
@ -127,8 +127,13 @@ impl Unconscious {
|
||||||
self.agents[idx].enabled = !self.agents[idx].enabled;
|
self.agents[idx].enabled = !self.agents[idx].enabled;
|
||||||
let new_state = self.agents[idx].enabled;
|
let new_state = self.agents[idx].enabled;
|
||||||
self.save_enabled();
|
self.save_enabled();
|
||||||
if new_state && !self.agents[idx].is_running() {
|
if new_state && !self.agents[idx].is_running() && self.agents[idx].auto.is_some() {
|
||||||
self.spawn_agent(idx).await;
|
let agent_name = self.agents[idx].name.clone();
|
||||||
|
let auto = self.agents[idx].auto.take().unwrap();
|
||||||
|
match prepare_spawn(&agent_name, auto).await {
|
||||||
|
Ok(result) => self.complete_spawn(idx, result),
|
||||||
|
Err(auto) => self.abort_spawn(idx, auto),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Some(new_state)
|
Some(new_state)
|
||||||
}
|
}
|
||||||
|
|
@ -170,8 +175,8 @@ impl Unconscious {
|
||||||
self.last_health_check = Some(Instant::now());
|
self.last_health_check = Some(Instant::now());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reap finished agents and spawn new ones.
|
/// Reap finished agents (quick, hold lock briefly).
|
||||||
pub async fn trigger(&mut self) {
|
pub fn reap_finished(&mut self) {
|
||||||
// Periodic graph health refresh (also on first call)
|
// Periodic graph health refresh (also on first call)
|
||||||
if self.last_health_check
|
if self.last_health_check
|
||||||
.map(|t| t.elapsed() > std::time::Duration::from_secs(600))
|
.map(|t| t.elapsed() > std::time::Duration::from_secs(600))
|
||||||
|
|
@ -198,26 +203,58 @@ impl Unconscious {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Select agents to spawn and take their AutoAgents out (quick, hold lock briefly).
|
||||||
|
/// Returns vec of (index, name, auto, tools) for agents that should spawn.
|
||||||
|
pub fn select_to_spawn(&mut self) -> Vec<(usize, String, AutoAgent)> {
|
||||||
let running = self.agents.iter().filter(|a| a.is_running()).count();
|
let running = self.agents.iter().filter(|a| a.is_running()).count();
|
||||||
|
let mut to_spawn = Vec::new();
|
||||||
|
|
||||||
for _ in running..self.max_concurrent {
|
for _ in running..self.max_concurrent {
|
||||||
let next = self.agents.iter().enumerate()
|
let next = self.agents.iter().enumerate()
|
||||||
.filter(|(_, a)| a.should_run())
|
.filter(|(_, a)| a.should_run() && a.auto.is_some())
|
||||||
.min_by_key(|(_, a)| a.last_run);
|
.min_by_key(|(_, a)| a.last_run);
|
||||||
match next {
|
match next {
|
||||||
Some((idx, _)) => self.spawn_agent(idx).await,
|
Some((idx, _)) => {
|
||||||
|
let name = self.agents[idx].name.clone();
|
||||||
|
let auto = self.agents[idx].auto.take().unwrap();
|
||||||
|
to_spawn.push((idx, name, auto));
|
||||||
|
}
|
||||||
None => break,
|
None => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
to_spawn
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn spawn_agent(&mut self, idx: usize) {
|
/// Store spawn result back (quick, hold lock briefly).
|
||||||
let name = self.agents[idx].name.clone();
|
pub fn complete_spawn(&mut self, idx: usize, result: SpawnResult) {
|
||||||
|
self.agents[idx].agent = Some(result.agent);
|
||||||
|
self.agents[idx].handle = Some(result.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Restore auto on spawn failure (quick, hold lock briefly).
|
||||||
|
pub fn abort_spawn(&mut self, idx: usize, auto: AutoAgent) {
|
||||||
|
self.agents[idx].auto = Some(auto);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result of preparing an agent spawn (created outside the lock).
|
||||||
|
pub struct SpawnResult {
|
||||||
|
pub agent: std::sync::Arc<crate::agent::Agent>,
|
||||||
|
pub handle: tokio::task::JoinHandle<(AutoAgent, Result<(), String>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prepare an agent spawn — does the slow work (Store::load, query, Agent::new).
|
||||||
|
/// Called outside the Unconscious lock.
|
||||||
|
/// On success, auto is consumed (moved into spawned task).
|
||||||
|
/// On failure, auto is returned so it can be restored.
|
||||||
|
pub async fn prepare_spawn(name: &str, mut auto: AutoAgent) -> Result<SpawnResult, AutoAgent> {
|
||||||
dbglog!("[unconscious] spawning {}", name);
|
dbglog!("[unconscious] spawning {}", name);
|
||||||
|
|
||||||
let def = match defs::get_def(&name) {
|
let def = match defs::get_def(name) {
|
||||||
Some(d) => d,
|
Some(d) => d,
|
||||||
None => return,
|
None => return Err(auto),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Run query and resolve placeholders
|
// Run query and resolve placeholders
|
||||||
|
|
@ -225,7 +262,7 @@ impl Unconscious {
|
||||||
Ok(s) => s,
|
Ok(s) => s,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
dbglog!("[unconscious] store load failed: {}", e);
|
dbglog!("[unconscious] store load failed: {}", e);
|
||||||
return;
|
return Err(auto);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -236,19 +273,14 @@ impl Unconscious {
|
||||||
Ok(b) => b,
|
Ok(b) => b,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
dbglog!("[unconscious] {} query failed: {}", name, e);
|
dbglog!("[unconscious] {} query failed: {}", name, e);
|
||||||
return;
|
return Err(auto);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if !batch.node_keys.is_empty() {
|
if !batch.node_keys.is_empty() {
|
||||||
store.record_agent_visits(&batch.node_keys, &name).ok();
|
store.record_agent_visits(&batch.node_keys, name).ok();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Take auto out for the spawned task
|
|
||||||
let Some(mut auto) = self.agents[idx].auto.take() else {
|
|
||||||
dbglog!("[unconscious] {} already running", name);
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
let orig_steps = std::mem::replace(&mut auto.steps,
|
let orig_steps = std::mem::replace(&mut auto.steps,
|
||||||
batch.steps.iter().map(|s| AutoStep {
|
batch.steps.iter().map(|s| AutoStep {
|
||||||
prompt: s.prompt.clone(),
|
prompt: s.prompt.clone(),
|
||||||
|
|
@ -263,8 +295,7 @@ impl Unconscious {
|
||||||
if base_url.is_empty() || model.is_empty() {
|
if base_url.is_empty() || model.is_empty() {
|
||||||
dbglog!("[unconscious] API not configured");
|
dbglog!("[unconscious] API not configured");
|
||||||
auto.steps = orig_steps;
|
auto.steps = orig_steps;
|
||||||
self.agents[idx].auto = Some(auto);
|
return Err(auto);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let cli = crate::user::CliArgs::default();
|
let cli = crate::user::CliArgs::default();
|
||||||
|
|
@ -273,10 +304,10 @@ impl Unconscious {
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
dbglog!("[unconscious] config: {}", e);
|
dbglog!("[unconscious] config: {}", e);
|
||||||
auto.steps = orig_steps;
|
auto.steps = orig_steps;
|
||||||
self.agents[idx].auto = Some(auto);
|
return Err(auto);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Unconscious agents have self-contained prompts — no standard context.
|
// Unconscious agents have self-contained prompts — no standard context.
|
||||||
let client = crate::agent::api::ApiClient::new(base_url, api_key, model);
|
let client = crate::agent::api::ApiClient::new(base_url, api_key, model);
|
||||||
let agent = crate::agent::Agent::new(
|
let agent = crate::agent::Agent::new(
|
||||||
|
|
@ -292,15 +323,29 @@ impl Unconscious {
|
||||||
st.temperature = auto.temperature;
|
st.temperature = auto.temperature;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.agents[idx].agent = Some(agent.clone());
|
let agent_clone = agent.clone();
|
||||||
|
let handle = tokio::spawn(async move {
|
||||||
self.agents[idx].handle = Some(tokio::spawn(async move {
|
let result = auto.run_shared(&agent_clone).await;
|
||||||
let result = auto.run_shared(&agent).await;
|
let stats = crate::agent::oneshot::save_agent_log(&auto.name, &agent_clone).await;
|
||||||
let stats = crate::agent::oneshot::save_agent_log(&auto.name, &agent).await;
|
|
||||||
auto.update_stats(stats);
|
auto.update_stats(stats);
|
||||||
auto.steps = orig_steps;
|
auto.steps = orig_steps;
|
||||||
(auto, result)
|
(auto, result)
|
||||||
}));
|
});
|
||||||
|
|
||||||
|
Ok(SpawnResult { agent, handle })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backwards compat: trigger() that does all three phases (still holds lock too long, but works)
|
||||||
|
impl Unconscious {
|
||||||
|
pub async fn trigger(&mut self) {
|
||||||
|
self.reap_finished();
|
||||||
|
let to_spawn = self.select_to_spawn();
|
||||||
|
for (idx, name, auto) in to_spawn {
|
||||||
|
match prepare_spawn(&name, auto).await {
|
||||||
|
Ok(result) => self.complete_spawn(idx, result),
|
||||||
|
Err(auto) => self.abort_spawn(idx, auto),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue