2026-04-08 23:39:48 -04:00
|
|
|
// unconscious.rs — Graph maintenance agents
|
|
|
|
|
//
|
|
|
|
|
// Standalone agents that operate on the memory graph without needing
|
2026-04-09 00:41:18 -04:00
|
|
|
// conversation context. Each agent runs in a loop: finish one run,
|
2026-04-10 02:39:55 -04:00
|
|
|
// start the next. Agents can be toggled on/off, persisted to
|
|
|
|
|
// ~/.consciousness/agent-enabled.json.
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-10 02:39:55 -04:00
|
|
|
use std::time::Instant;
|
2026-04-09 00:51:10 -04:00
|
|
|
use std::collections::HashMap;
|
2026-04-09 01:00:48 -04:00
|
|
|
use futures::FutureExt;
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-11 21:34:41 -04:00
|
|
|
use crate::agent::oneshot::{AutoAgent, AutoStep, RunStats};
|
2026-04-08 23:39:48 -04:00
|
|
|
use crate::agent::tools;
|
|
|
|
|
use crate::subconscious::defs;
|
|
|
|
|
|
2026-04-09 00:51:10 -04:00
|
|
|
fn config_path() -> std::path::PathBuf {
|
|
|
|
|
dirs::home_dir().unwrap_or_default()
|
|
|
|
|
.join(".consciousness/agent-enabled.json")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn load_enabled_config() -> HashMap<String, bool> {
|
|
|
|
|
std::fs::read_to_string(config_path()).ok()
|
|
|
|
|
.and_then(|s| serde_json::from_str(&s).ok())
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn save_enabled_config(map: &HashMap<String, bool>) {
|
|
|
|
|
if let Ok(json) = serde_json::to_string_pretty(map) {
|
|
|
|
|
let _ = std::fs::write(config_path(), json);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-08 23:39:48 -04:00
|
|
|
struct UnconsciousAgent {
|
|
|
|
|
name: String,
|
2026-04-09 00:41:18 -04:00
|
|
|
enabled: bool,
|
2026-04-12 20:11:40 -04:00
|
|
|
auto: Option<AutoAgent>,
|
2026-04-12 02:04:50 -04:00
|
|
|
handle: Option<tokio::task::JoinHandle<(AutoAgent, Result<(), String>)>>,
|
2026-04-09 01:00:48 -04:00
|
|
|
/// Shared agent handle — UI locks to read context live.
|
|
|
|
|
pub agent: Option<std::sync::Arc<crate::agent::Agent>>,
|
2026-04-08 23:39:48 -04:00
|
|
|
last_run: Option<Instant>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl UnconsciousAgent {
|
|
|
|
|
fn is_running(&self) -> bool {
|
|
|
|
|
self.handle.as_ref().is_some_and(|h| !h.is_finished())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn should_run(&self) -> bool {
|
2026-04-10 02:39:55 -04:00
|
|
|
self.enabled && !self.is_running()
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Snapshot for the TUI.
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
|
pub struct UnconsciousSnapshot {
|
|
|
|
|
pub name: String,
|
|
|
|
|
pub running: bool,
|
2026-04-09 00:41:18 -04:00
|
|
|
pub enabled: bool,
|
|
|
|
|
pub runs: usize,
|
2026-04-08 23:39:48 -04:00
|
|
|
pub last_run_secs_ago: Option<f64>,
|
2026-04-09 01:00:48 -04:00
|
|
|
pub agent: Option<std::sync::Arc<crate::agent::Agent>>,
|
2026-04-10 13:44:41 -04:00
|
|
|
pub last_stats: Option<RunStats>,
|
2026-04-11 21:57:24 -04:00
|
|
|
/// Recent store activity for this agent: (key, timestamp), newest first.
|
|
|
|
|
pub history: Vec<(String, i64)>,
|
2026-04-11 22:12:46 -04:00
|
|
|
pub tool_calls_ewma: f64,
|
|
|
|
|
pub tool_failures_ewma: f64,
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct Unconscious {
|
|
|
|
|
agents: Vec<UnconsciousAgent>,
|
|
|
|
|
max_concurrent: usize,
|
2026-04-09 00:45:26 -04:00
|
|
|
pub graph_health: Option<crate::subconscious::daemon::GraphHealth>,
|
|
|
|
|
last_health_check: Option<Instant>,
|
2026-04-13 22:38:01 -04:00
|
|
|
/// Notified when agent state changes (finished, toggled)
|
|
|
|
|
pub wake: std::sync::Arc<tokio::sync::Notify>,
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
/// Shared API client — cloned (cheap) into each spawned agent's
|
|
|
|
|
/// Agent::new call so they all share the manifest cache and
|
|
|
|
|
/// gRPC endpoint state. Override `.model` on the clone when a
|
|
|
|
|
/// per-agent backend differs from the default.
|
|
|
|
|
pub client: crate::agent::api::ApiClient,
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Unconscious {
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
pub fn new(client: crate::agent::api::ApiClient) -> Self {
|
2026-04-09 00:51:10 -04:00
|
|
|
let enabled_map = load_enabled_config();
|
|
|
|
|
|
|
|
|
|
// Scan all .agent files, exclude subconscious-* and surface-observe
|
|
|
|
|
let mut agents: Vec<UnconsciousAgent> = Vec::new();
|
2026-04-11 19:54:18 -04:00
|
|
|
let base_tools = tools::memory::memory_tools().to_vec();
|
|
|
|
|
let extra_tools = tools::memory::journal_tools().to_vec();
|
2026-04-09 00:51:10 -04:00
|
|
|
for def in defs::load_defs() {
|
|
|
|
|
if def.agent.starts_with("subconscious-") { continue; }
|
|
|
|
|
if def.agent == "surface-observe" { continue; }
|
|
|
|
|
let enabled = enabled_map.get(&def.agent).copied()
|
2026-04-09 01:00:48 -04:00
|
|
|
.unwrap_or(false);
|
2026-04-11 19:54:18 -04:00
|
|
|
let mut effective_tools = base_tools.clone();
|
|
|
|
|
for name in &def.tools {
|
|
|
|
|
if let Some(t) = extra_tools.iter().find(|t| t.name == name) {
|
|
|
|
|
effective_tools.push(t.clone());
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-09 01:00:48 -04:00
|
|
|
let steps: Vec<AutoStep> = def.steps.iter().map(|s| AutoStep {
|
|
|
|
|
prompt: s.prompt.clone(),
|
|
|
|
|
phase: s.phase.clone(),
|
|
|
|
|
}).collect();
|
|
|
|
|
let auto = AutoAgent::new(
|
|
|
|
|
def.agent.clone(), effective_tools, steps,
|
|
|
|
|
def.temperature.unwrap_or(0.6), def.priority,
|
|
|
|
|
);
|
2026-04-09 00:51:10 -04:00
|
|
|
agents.push(UnconsciousAgent {
|
|
|
|
|
name: def.agent.clone(),
|
|
|
|
|
enabled,
|
2026-04-12 20:11:40 -04:00
|
|
|
auto: Some(auto),
|
2026-04-09 00:51:10 -04:00
|
|
|
handle: None,
|
2026-04-09 01:00:48 -04:00
|
|
|
agent: None,
|
2026-04-09 00:51:10 -04:00
|
|
|
last_run: None,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
agents.sort_by(|a, b| a.name.cmp(&b.name));
|
|
|
|
|
|
2026-04-12 02:55:39 -04:00
|
|
|
let max_concurrent = crate::config::get().llm_concurrency;
|
|
|
|
|
|
2026-04-09 01:05:08 -04:00
|
|
|
Self {
|
2026-04-12 02:55:39 -04:00
|
|
|
agents, max_concurrent,
|
2026-04-09 00:45:26 -04:00
|
|
|
graph_health: None,
|
|
|
|
|
last_health_check: None,
|
2026-04-13 22:38:01 -04:00
|
|
|
wake: std::sync::Arc::new(tokio::sync::Notify::new()),
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
client,
|
2026-04-09 01:05:08 -04:00
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
2026-04-09 00:41:18 -04:00
|
|
|
/// Toggle an agent on/off by name. Returns new enabled state.
|
2026-04-09 00:53:54 -04:00
|
|
|
/// If enabling, immediately spawns the agent if it's not running.
|
2026-04-09 01:00:48 -04:00
|
|
|
pub async fn toggle(&mut self, name: &str) -> Option<bool> {
|
2026-04-09 00:53:54 -04:00
|
|
|
let idx = self.agents.iter().position(|a| a.name == name)?;
|
|
|
|
|
self.agents[idx].enabled = !self.agents[idx].enabled;
|
|
|
|
|
let new_state = self.agents[idx].enabled;
|
2026-04-09 00:51:10 -04:00
|
|
|
self.save_enabled();
|
2026-04-12 20:33:23 -04:00
|
|
|
if new_state && !self.agents[idx].is_running() && self.agents[idx].auto.is_some() {
|
|
|
|
|
let agent_name = self.agents[idx].name.clone();
|
|
|
|
|
let auto = self.agents[idx].auto.take().unwrap();
|
2026-04-13 22:38:01 -04:00
|
|
|
let wake = self.wake.clone();
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
let client = self.client.clone();
|
|
|
|
|
match prepare_spawn(&agent_name, auto, wake, client).await {
|
2026-04-12 20:33:23 -04:00
|
|
|
Ok(result) => self.complete_spawn(idx, result),
|
|
|
|
|
Err(auto) => self.abort_spawn(idx, auto),
|
|
|
|
|
}
|
2026-04-09 00:53:54 -04:00
|
|
|
}
|
2026-04-13 22:38:01 -04:00
|
|
|
self.wake.notify_one(); // wake loop to consider new state
|
2026-04-09 00:51:10 -04:00
|
|
|
Some(new_state)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn save_enabled(&self) {
|
|
|
|
|
let map: HashMap<String, bool> = self.agents.iter()
|
|
|
|
|
.map(|a| (a.name.clone(), a.enabled))
|
|
|
|
|
.collect();
|
|
|
|
|
save_enabled_config(&map);
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
2026-04-11 21:57:24 -04:00
|
|
|
pub fn snapshots(&self, store: Option<&crate::store::Store>) -> Vec<UnconsciousSnapshot> {
|
|
|
|
|
self.agents.iter().map(|a| {
|
|
|
|
|
let history = store.map(|st| st.recent_by_provenance(&a.name, 30))
|
|
|
|
|
.unwrap_or_default();
|
2026-04-11 23:03:10 -04:00
|
|
|
let stats = crate::agent::oneshot::get_stats(&a.name);
|
|
|
|
|
let tool_calls_ewma: f64 = stats.by_tool.values().map(|t| t.ewma).sum();
|
2026-04-11 21:57:24 -04:00
|
|
|
UnconsciousSnapshot {
|
|
|
|
|
name: a.name.clone(),
|
|
|
|
|
running: a.is_running(),
|
|
|
|
|
enabled: a.enabled,
|
2026-04-11 23:03:10 -04:00
|
|
|
runs: stats.runs,
|
2026-04-11 21:57:24 -04:00
|
|
|
last_run_secs_ago: a.last_run.map(|t| t.elapsed().as_secs_f64()),
|
|
|
|
|
agent: a.agent.clone(),
|
2026-04-11 23:03:10 -04:00
|
|
|
last_stats: stats.last_stats.clone(),
|
2026-04-11 21:57:24 -04:00
|
|
|
history,
|
2026-04-11 23:03:10 -04:00
|
|
|
tool_calls_ewma,
|
|
|
|
|
tool_failures_ewma: stats.failures.ewma,
|
2026-04-11 21:57:24 -04:00
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
}).collect()
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-12 20:37:54 -04:00
|
|
|
/// Check if health refresh is due (quick check, no I/O).
|
|
|
|
|
pub fn needs_health_refresh(&self) -> bool {
|
|
|
|
|
self.last_health_check
|
|
|
|
|
.map(|t| t.elapsed() > std::time::Duration::from_secs(600))
|
|
|
|
|
.unwrap_or(true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Store computed health (quick, just assignment).
|
|
|
|
|
pub fn set_health(&mut self, health: crate::subconscious::daemon::GraphHealth) {
|
|
|
|
|
self.graph_health = Some(health);
|
2026-04-09 00:45:26 -04:00
|
|
|
self.last_health_check = Some(Instant::now());
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
/// Reap finished agents (quick, hold lock briefly).
|
|
|
|
|
pub fn reap_finished(&mut self) {
|
2026-04-08 23:39:48 -04:00
|
|
|
for agent in &mut self.agents {
|
|
|
|
|
if agent.handle.as_ref().is_some_and(|h| h.is_finished()) {
|
2026-04-09 01:00:48 -04:00
|
|
|
let handle = agent.handle.take().unwrap();
|
2026-04-08 23:39:48 -04:00
|
|
|
agent.last_run = Some(Instant::now());
|
2026-04-11 22:12:46 -04:00
|
|
|
// Get the AutoAgent back from the finished task (stats already updated)
|
2026-04-09 01:00:48 -04:00
|
|
|
match handle.now_or_never() {
|
2026-04-11 22:12:46 -04:00
|
|
|
Some(Ok((auto_back, result))) => {
|
2026-04-12 20:11:40 -04:00
|
|
|
agent.auto = Some(auto_back);
|
2026-04-09 01:00:48 -04:00
|
|
|
match result {
|
|
|
|
|
Ok(_) => dbglog!("[unconscious] {} completed (run {})",
|
2026-04-11 23:03:10 -04:00
|
|
|
agent.name, crate::agent::oneshot::get_stats(&agent.name).runs),
|
2026-04-09 01:00:48 -04:00
|
|
|
Err(e) => dbglog!("[unconscious] {} failed: {}", agent.name, e),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => dbglog!("[unconscious] {} task lost", agent.name),
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
}
|
2026-04-12 20:33:23 -04:00
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
/// Select agents to spawn and take their AutoAgents out (quick, hold lock briefly).
|
|
|
|
|
/// Returns vec of (index, name, auto, tools) for agents that should spawn.
|
|
|
|
|
pub fn select_to_spawn(&mut self) -> Vec<(usize, String, AutoAgent)> {
|
2026-04-08 23:39:48 -04:00
|
|
|
let running = self.agents.iter().filter(|a| a.is_running()).count();
|
2026-04-12 20:33:23 -04:00
|
|
|
let mut to_spawn = Vec::new();
|
|
|
|
|
|
2026-04-10 03:20:20 -04:00
|
|
|
for _ in running..self.max_concurrent {
|
|
|
|
|
let next = self.agents.iter().enumerate()
|
2026-04-12 20:33:23 -04:00
|
|
|
.filter(|(_, a)| a.should_run() && a.auto.is_some())
|
2026-04-10 03:20:20 -04:00
|
|
|
.min_by_key(|(_, a)| a.last_run);
|
|
|
|
|
match next {
|
2026-04-12 20:33:23 -04:00
|
|
|
Some((idx, _)) => {
|
|
|
|
|
let name = self.agents[idx].name.clone();
|
|
|
|
|
let auto = self.agents[idx].auto.take().unwrap();
|
|
|
|
|
to_spawn.push((idx, name, auto));
|
|
|
|
|
}
|
2026-04-10 03:20:20 -04:00
|
|
|
None => break,
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
2026-04-12 20:33:23 -04:00
|
|
|
to_spawn
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
/// Store spawn result back (quick, hold lock briefly).
|
|
|
|
|
pub fn complete_spawn(&mut self, idx: usize, result: SpawnResult) {
|
|
|
|
|
self.agents[idx].agent = Some(result.agent);
|
|
|
|
|
self.agents[idx].handle = Some(result.handle);
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
/// Restore auto on spawn failure (quick, hold lock briefly).
|
|
|
|
|
pub fn abort_spawn(&mut self, idx: usize, auto: AutoAgent) {
|
|
|
|
|
self.agents[idx].auto = Some(auto);
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
/// Result of preparing an agent spawn (created outside the lock).
|
|
|
|
|
pub struct SpawnResult {
|
|
|
|
|
pub agent: std::sync::Arc<crate::agent::Agent>,
|
|
|
|
|
pub handle: tokio::task::JoinHandle<(AutoAgent, Result<(), String>)>,
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
/// Prepare an agent spawn — does the slow work (Store::load, query, Agent::new).
|
|
|
|
|
/// Called outside the Unconscious lock.
|
|
|
|
|
/// On success, auto is consumed (moved into spawned task).
|
|
|
|
|
/// On failure, auto is returned so it can be restored.
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
pub async fn prepare_spawn(
|
|
|
|
|
name: &str,
|
|
|
|
|
mut auto: AutoAgent,
|
|
|
|
|
wake: std::sync::Arc<tokio::sync::Notify>,
|
|
|
|
|
base_client: crate::agent::api::ApiClient,
|
|
|
|
|
) -> Result<SpawnResult, AutoAgent> {
|
2026-04-12 20:33:23 -04:00
|
|
|
dbglog!("[unconscious] spawning {}", name);
|
|
|
|
|
|
|
|
|
|
let def = match defs::get_def(name) {
|
|
|
|
|
Some(d) => d,
|
|
|
|
|
None => return Err(auto),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let exclude: std::collections::HashSet<String> = std::collections::HashSet::new();
|
|
|
|
|
let batch = match defs::run_agent(
|
2026-04-13 15:18:05 -04:00
|
|
|
&def, def.count.unwrap_or(5), &exclude,
|
2026-04-13 14:55:41 -04:00
|
|
|
).await {
|
2026-04-12 20:33:23 -04:00
|
|
|
Ok(b) => b,
|
|
|
|
|
Err(e) => {
|
|
|
|
|
dbglog!("[unconscious] {} query failed: {}", name, e);
|
|
|
|
|
return Err(auto);
|
|
|
|
|
}
|
|
|
|
|
};
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
let orig_steps = std::mem::replace(&mut auto.steps,
|
|
|
|
|
batch.steps.iter().map(|s| AutoStep {
|
|
|
|
|
prompt: s.prompt.clone(),
|
|
|
|
|
phase: s.phase.clone(),
|
|
|
|
|
}).collect());
|
|
|
|
|
|
config: unify subconscious API resolution with the main chat path
Two parallel backend-resolution paths had drifted apart:
- Main chat: AppConfig::resolve_model() → a named BackendConfig in
AppConfig.backends
- Subconscious / oneshot / context_window(): four skip-serde
"cache" fields on Config (memory section) — api_base_url, api_key,
api_model, api_context_window — that used to be populated at
Config::try_load_shared time by walking memory.agent_model →
root.models[name] → root[backend_name]
When we renamed `models` to `backends` and collapsed ModelConfig into
BackendConfig, the latter chain started silently dereferencing
`root.get("models")` → None → no population. Subconscious agents fell
through the "API not configured" guard; context_window() started
returning 0 (since api_context_window default is u64's 0 now that we
don't populate it). It was only visibly working for the main chat.
Collapse to one path:
- Drop Config.agent_model (duplicate of AppConfig.default_backend)
- Drop Config.{api_base_url, api_key, api_model, api_context_window}
— no longer populated, no longer needed
- Drop default_context_window() — nobody reads the field anymore
- Drop the memory-side resolution block in try_load_shared()
- Subconscious (mind/unconscious.rs) and oneshot (agent/oneshot.rs)
now call load_app() + resolve_model(&app.default_backend) just like
the main chat does
- context_window() reads from config::app().backends[default_backend]
.context_window, defaulting to 128k only if the backend doesn't
specify one
Side effect: Kent's config file drops agent_model, api_reasoning,
journal_days, journal_max — all fields whose Rust counterparts are
now gone. (Figment tolerates unknown fields, so leaving them wouldn't
have broken anything, but they were lying about what's configurable.)
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-16 16:02:43 -04:00
|
|
|
// Create standalone Agent — stored so UI can read context.
|
2026-04-12 20:33:23 -04:00
|
|
|
let cli = crate::user::CliArgs::default();
|
|
|
|
|
let (app, _) = match crate::config::load_app(&cli) {
|
|
|
|
|
Ok(r) => r,
|
|
|
|
|
Err(e) => {
|
|
|
|
|
dbglog!("[unconscious] config: {}", e);
|
2026-04-09 01:00:48 -04:00
|
|
|
auto.steps = orig_steps;
|
2026-04-12 20:33:23 -04:00
|
|
|
return Err(auto);
|
2026-04-09 01:00:48 -04:00
|
|
|
}
|
2026-04-12 20:33:23 -04:00
|
|
|
};
|
config: unify subconscious API resolution with the main chat path
Two parallel backend-resolution paths had drifted apart:
- Main chat: AppConfig::resolve_model() → a named BackendConfig in
AppConfig.backends
- Subconscious / oneshot / context_window(): four skip-serde
"cache" fields on Config (memory section) — api_base_url, api_key,
api_model, api_context_window — that used to be populated at
Config::try_load_shared time by walking memory.agent_model →
root.models[name] → root[backend_name]
When we renamed `models` to `backends` and collapsed ModelConfig into
BackendConfig, the latter chain started silently dereferencing
`root.get("models")` → None → no population. Subconscious agents fell
through the "API not configured" guard; context_window() started
returning 0 (since api_context_window default is u64's 0 now that we
don't populate it). It was only visibly working for the main chat.
Collapse to one path:
- Drop Config.agent_model (duplicate of AppConfig.default_backend)
- Drop Config.{api_base_url, api_key, api_model, api_context_window}
— no longer populated, no longer needed
- Drop default_context_window() — nobody reads the field anymore
- Drop the memory-side resolution block in try_load_shared()
- Subconscious (mind/unconscious.rs) and oneshot (agent/oneshot.rs)
now call load_app() + resolve_model(&app.default_backend) just like
the main chat does
- context_window() reads from config::app().backends[default_backend]
.context_window, defaulting to 128k only if the backend doesn't
specify one
Side effect: Kent's config file drops agent_model, api_reasoning,
journal_days, journal_max — all fields whose Rust counterparts are
now gone. (Figment tolerates unknown fields, so leaving them wouldn't
have broken anything, but they were lying about what's configurable.)
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-16 16:02:43 -04:00
|
|
|
let resolved = match app.resolve_model(&app.default_backend) {
|
|
|
|
|
Ok(r) => r,
|
|
|
|
|
Err(e) => {
|
|
|
|
|
dbglog!("[unconscious] API not configured: {}", e);
|
|
|
|
|
auto.steps = orig_steps;
|
|
|
|
|
return Err(auto);
|
|
|
|
|
}
|
|
|
|
|
};
|
2026-04-12 20:33:23 -04:00
|
|
|
|
|
|
|
|
// Unconscious agents have self-contained prompts — no standard context.
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
// Clone the shared client so we inherit the manifest cache and
|
|
|
|
|
// only override the model id per-agent.
|
|
|
|
|
let mut client = base_client;
|
|
|
|
|
client.model = resolved.model_id.clone();
|
2026-04-12 20:33:23 -04:00
|
|
|
let agent = crate::agent::Agent::new(
|
|
|
|
|
client, Vec::new(),
|
config: drop dead code and collapse to a single backend
Config had accumulated several obsolete fields, a legacy load path
that was just returning defaults, and multi-backend infrastructure
that's no longer used.
Removed from Config (memory section):
- load_legacy_jsonl() — just returned Config::default(), no callers
- The legacy-fallback branch in load_from_file
- surface_hooks, surface_timeout_secs — zero external readers
- scoring_chunk_tokens + default fn — zero external readers
- The POC_MEMORY_CONFIG env override note in the header comment
(not actually wired up anywhere)
Collapsed multi-backend to single-backend:
- AppConfig used to carry `anthropic: BackendConfig` and
`openrouter: BackendConfig` as required fields plus an optional
`deepinfra`, picked between at runtime by name. Only one is ever
actually used in any deployment. Collapse to a single
`backend: BackendConfig` on AppConfig, drop the multi-backend
match logic in resolve_model, drop the top-level `backend: String`
selector field, drop the `BackendConfig::resolve` fallback path.
- Also drop BackendConfig.model (redundant with ModelConfig.model_id
once multi-backend is gone).
- ModelConfig.backend field goes — there's only one backend now, no
choice to make.
Dead prompt_file machinery:
- ModelConfig.prompt_file, ResolvedModel.prompt_file, SessionConfig
.prompt_file, Agent.prompt_file — nothing in the codebase actually
reads the file these strings name. Just passed around and compared.
Delete the whole string through every struct.
- The "if prompt_file changed on model switch, recompact" branch in
user/chat.rs goes too (never fired usefully).
Dead memory_project plumbing:
- AppConfig.memory_project field, CliArgs.memory_project, the
--memory-project CLI flag, the figment merge target, the show_config
display line. Nothing reads it anywhere.
Dead ContextInfo struct:
- `struct ContextInfo` was never constructed — context_info: None
was the only initializer. The conditional display blocks in
user/context.rs that dereferenced it were dead.
Behavior change: AppConfig::resolve() now requires a non-empty
`models` map and bails with a helpful message if it's missing. The
old fallback ("no models? use top-level backend + PromptConfig to
build a default") path is gone — it was only kept for symmetry with
a mode nobody used.
Config file shape: `deepinfra: {...}` → `backend: {...}`, and
model entries no longer need `backend:` or `prompt_file:`. Updated
~/.consciousness/config.json5 to match.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-16 15:41:55 -04:00
|
|
|
app, None,
|
2026-04-12 20:33:23 -04:00
|
|
|
crate::agent::tools::ActiveTools::new(),
|
|
|
|
|
auto.tools.clone(),
|
|
|
|
|
).await;
|
|
|
|
|
{
|
|
|
|
|
let mut st = agent.state.lock().await;
|
|
|
|
|
st.provenance = auto.name.clone();
|
|
|
|
|
st.priority = Some(auto.priority);
|
|
|
|
|
st.temperature = auto.temperature;
|
|
|
|
|
}
|
2026-04-09 01:00:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
let agent_clone = agent.clone();
|
|
|
|
|
let handle = tokio::spawn(async move {
|
|
|
|
|
let result = auto.run_shared(&agent_clone).await;
|
|
|
|
|
let stats = crate::agent::oneshot::save_agent_log(&auto.name, &agent_clone).await;
|
|
|
|
|
auto.update_stats(stats);
|
|
|
|
|
auto.steps = orig_steps;
|
2026-04-13 22:38:01 -04:00
|
|
|
wake.notify_one(); // wake the loop to reap and maybe spawn more
|
2026-04-12 20:33:23 -04:00
|
|
|
(auto, result)
|
|
|
|
|
});
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
Ok(SpawnResult { agent, handle })
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
|
2026-04-12 20:33:23 -04:00
|
|
|
// Backwards compat: trigger() that does all three phases (still holds lock too long, but works)
|
|
|
|
|
impl Unconscious {
|
|
|
|
|
pub async fn trigger(&mut self) {
|
|
|
|
|
self.reap_finished();
|
|
|
|
|
let to_spawn = self.select_to_spawn();
|
2026-04-13 22:38:01 -04:00
|
|
|
let wake = self.wake.clone();
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
let client = self.client.clone();
|
2026-04-12 20:33:23 -04:00
|
|
|
for (idx, name, auto) in to_spawn {
|
agent: end-to-end gRPC Generate with delta-based session orchestration
Wires the client side of the new salience protocol so inference
actually runs over gRPC instead of emitting the stubbed "not yet
wired" error. Each turn walks the AST as interleaved chunks, sends
only what's new to the server, and streams decode tokens back.
context.rs:
* `WireChunk` enum: `Tokens(Vec<u32>)` or `Image { bytes, mime,
known_expanded_len }`. Preserves text/image/text ordering the
wire path can't flatten.
* `wire_chunks(range, skip)` walker, parallel to `wire_prompt` —
branches emit `<|im_start|>…<|im_end|>` tokens, image leaves
emit a single Image chunk (no inline vision tokens).
* `NodeLeaf::set_image_token_count(n)` + recompute of cached
`token_ids`; `ContextState::commit_image_token_counts(&[u32])`
fills in the first-N zero-count image leaves in wire order.
* `ResponseParser::run` handles the new
`StreamToken::ImageAppended` by committing the server's N into
the AST before the final Generate's Token events stream in.
salience.rs:
* `SessionHandle` tracks `committed_len`. `append_image` advances
it from the RPC response. New `generate(req)` opens the
server-streaming RPC.
api/mod.rs:
* `stream_session_mm(session_lock, chunks, sampling, priority,
readout_shape)` replaces the stub. Spawns `run_session_generate`.
* `run_session_generate`: takes the session out of the Mutex (or
opens fresh), skips chunks covered by `committed_len` (bails on
mid-chunk straddle or unknown-length image in the committed
prefix), walks the delta: accumulates Tokens into `pending`, on
Image flushes pending via `flush_pending` (max_tokens=0 Generate
that just prefills), then AppendImage + emits
StreamToken::ImageAppended. Final Generate carries any trailing
pending text as `append_tokens` and the sampling params; Token
events stream out as StreamToken::Token, Done as
StreamToken::Done. On success, handle with updated
`committed_len` returns to the Mutex; on error, handle drops
and next call reopens.
* `StreamToken::ImageAppended { placeholder_count }` variant —
emitted in wire order before the final Generate's tokens.
* Prefix-cache cap for readout coverage: `readout_ranges` covers
`[prompt_len_after_append, u32::MAX)` when the caller provides
a readout_shape, so decode positions stream their readouts.
agent/mod.rs:
* `assemble_prompt` returns `Vec<WireChunk>` with the assistant
prologue merged into the trailing Tokens chunk. Caller in
`turn` passes chunks + readout_shape (pulled from
`agent.readout.lock().manifest`) to `stream_session_mm`.
* Dropped `assemble_prompt_tokens` — dead.
mind + unconscious:
* `Unconscious::new(client)` stores a shared `ApiClient`. Fixes
the repeated-manifest-fetch bug caused by each subagent's
`ApiClient::new` having its own OnceCell. The client's Arc-
wrapped manifest cache is now shared across every agent Mind
spawns.
* `prepare_spawn(name, auto, wake, base_client)` clones the base
client and overrides `.model` for the resolved backend instead
of constructing fresh. All three callers
(`toggle`/`trigger`/unconscious loop) pass `self.client.clone()`.
* `Mind::new` passes `agent.client.clone()` into
`Unconscious::new`.
subconscious/generate.rs:
* gen_continuation switched to `wire_chunks` + the new
`stream_session_mm` signature. Ephemeral session opens on each
call, tears down at scope end. No readouts requested.
Not changed yet, noted for follow-up:
* Subconscious ablation scoring in learn.rs still talks to
`/v1/score` over HTTP. Will migrate once we have time to verify
the Generate+max_tokens=0+prompt_logprobs path end-to-end.
* compare.rs constructs its own ApiClient for the
`compare.test_backend` (which is intentionally a different
endpoint) — left alone.
* Readout manifest still fetched via HTTP at Agent::new.
Migration to GetReadoutManifest gRPC is a separate cleanup.
Co-Authored-By: Proof of Concept <poc@bcachefs.org>
2026-04-24 12:27:55 -04:00
|
|
|
match prepare_spawn(&name, auto, wake.clone(), client.clone()).await {
|
2026-04-12 20:33:23 -04:00
|
|
|
Ok(result) => self.complete_spawn(idx, result),
|
|
|
|
|
Err(auto) => self.abort_spawn(idx, auto),
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-08 23:39:48 -04:00
|
|
|
}
|
|
|
|
|
}
|
2026-04-10 02:39:55 -04:00
|
|
|
|
2026-04-11 21:34:41 -04:00
|
|
|
// save_agent_log and RunStats moved to crate::agent::oneshot
|