From d9b56a02c32db1f31c6d8df6496aee407f21f812 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 19 Mar 2026 21:49:58 -0400 Subject: [PATCH] Consolidate poc-memory and poc-agent configs poc-memory now reads from poc-agent's config.json5 as the primary config source. Memory-specific settings live in a "memory" section; API credentials are resolved from the shared model/backend config instead of being duplicated. - Add "memory" section to ~/.config/poc-agent/config.json5 - poc-memory config.rs: try shared config first, fall back to legacy JSONL - API fields (base_url, api_key, model) resolved via memory.agent_model -> models -> backend lookup - Add json5 dependency for proper JSON5 parsing - Update provisioning scripts: hermes -> qwen3_coder tool parser Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 1 + poc-memory/Cargo.toml | 1 + poc-memory/src/config.rs | 132 +++++++++++++++++++++++++++++++----- scripts/Dockerfile.vllm | 26 +++++++ scripts/provision-mi300x.sh | 2 +- scripts/provision-vllm.sh | 2 +- 6 files changed, 146 insertions(+), 18 deletions(-) create mode 100644 scripts/Dockerfile.vllm diff --git a/Cargo.lock b/Cargo.lock index 97a5f80..37efa38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2643,6 +2643,7 @@ dependencies = [ "crossterm 0.28.1", "faer", "jobkit", + "json5", "libc", "log", "memmap2", diff --git a/poc-memory/Cargo.toml b/poc-memory/Cargo.toml index 5fbc91f..f56d80c 100644 --- a/poc-memory/Cargo.toml +++ b/poc-memory/Cargo.toml @@ -8,6 +8,7 @@ capnp = "0.20" uuid = { version = "1", features = ["v4"] } serde = { version = "1", features = ["derive"] } serde_json = "1" +json5 = "0.4" bincode = "1" regex = "1" chrono = "0.4" diff --git a/poc-memory/src/config.rs b/poc-memory/src/config.rs index f6078ff..1880ee0 100644 --- a/poc-memory/src/config.rs +++ b/poc-memory/src/config.rs @@ -1,16 +1,14 @@ // Configuration for poc-memory // -// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env). -// Falls back to sensible defaults if no config file exists. +// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent) +// Memory-specific settings live in the "memory" section. +// API backend resolved from the shared "models" + backend configs. // -// Format: JSONL — one JSON object per line. -// First line with "config" key: global settings. -// Lines with "group" key: context loading groups (order preserved). +// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported) +// Env override: POC_MEMORY_CONFIG // -// Example: -// {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}} -// {"group": "identity", "keys": ["identity"]} -// {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"} +// The shared config eliminates API credential duplication between +// poc-memory and poc-agent. use std::path::PathBuf; use std::sync::{Arc, OnceLock, RwLock}; @@ -56,11 +54,8 @@ pub struct Config { /// Directory containing prompt templates for agents. pub prompts_dir: PathBuf, /// Separate Claude config dir for background agent work (daemon jobs). - /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates - /// with different OAuth credentials than the interactive session. pub agent_config_dir: Option, - /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm). - /// When set, agents use this instead of shelling out to claude CLI. + /// OpenAI-compatible API base URL for direct LLM calls. pub api_base_url: Option, /// API key for the direct API endpoint. pub api_key: Option, @@ -104,6 +99,114 @@ impl Default for Config { impl Config { fn load_from_file() -> Self { + // Try shared config first, then legacy JSONL + if let Some(config) = Self::try_load_shared() { + return config; + } + Self::load_legacy_jsonl() + } + + /// Load from shared poc-agent config (~/.config/poc-agent/config.json5). + /// Memory settings live in the "memory" section; API settings are + /// resolved from the shared model/backend configuration. + fn try_load_shared() -> Option { + let home = PathBuf::from(std::env::var("HOME").ok()?); + let path = home.join(".config/poc-agent/config.json5"); + let content = std::fs::read_to_string(&path).ok()?; + + let root: serde_json::Value = json5::from_str(&content).ok()?; + + let mem = root.get("memory")?; + let mut config = Config::default(); + + // Memory-specific fields + if let Some(s) = mem.get("user_name").and_then(|v| v.as_str()) { + config.user_name = s.to_string(); + } + if let Some(s) = mem.get("assistant_name").and_then(|v| v.as_str()) { + config.assistant_name = s.to_string(); + } + if let Some(s) = mem.get("data_dir").and_then(|v| v.as_str()) { + config.data_dir = expand_home(s); + } + if let Some(s) = mem.get("projects_dir").and_then(|v| v.as_str()) { + config.projects_dir = expand_home(s); + } + if let Some(arr) = mem.get("core_nodes").and_then(|v| v.as_array()) { + config.core_nodes = arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect(); + } + if let Some(d) = mem.get("journal_days").and_then(|v| v.as_u64()) { + config.journal_days = d as u32; + } + if let Some(m) = mem.get("journal_max").and_then(|v| v.as_u64()) { + config.journal_max = m as usize; + } + if let Some(n) = mem.get("llm_concurrency").and_then(|v| v.as_u64()) { + config.llm_concurrency = n.max(1) as usize; + } + if let Some(n) = mem.get("agent_budget").and_then(|v| v.as_u64()) { + config.agent_budget = n as usize; + } + if let Some(s) = mem.get("prompts_dir").and_then(|v| v.as_str()) { + config.prompts_dir = expand_home(s); + } + if let Some(s) = mem.get("agent_config_dir").and_then(|v| v.as_str()) { + config.agent_config_dir = Some(expand_home(s)); + } + + // Context groups + if let Some(groups) = mem.get("context_groups").and_then(|v| v.as_array()) { + let mut cgs = Vec::new(); + for g in groups { + if let Some(label) = g.get("label").and_then(|v| v.as_str()) { + let keys = g.get("keys") + .and_then(|v| v.as_array()) + .map(|arr| arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect()) + .unwrap_or_default(); + let source = match g.get("source").and_then(|v| v.as_str()) { + Some("file") => ContextSource::File, + Some("journal") => ContextSource::Journal, + _ => ContextSource::Store, + }; + cgs.push(ContextGroup { label: label.to_string(), keys, source }); + } + } + if !cgs.is_empty() { + config.context_groups = cgs; + } + } + + // Resolve API settings from the shared model/backend config. + // memory.agent_model references a named model; we look up its + // backend to get base_url and api_key. + if let Some(model_name) = mem.get("agent_model").and_then(|v| v.as_str()) { + if let Some(model_cfg) = root.get("models") + .and_then(|m| m.get(model_name)) + { + let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or(""); + let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or(""); + + if let Some(backend) = root.get(backend_name) { + config.api_base_url = backend.get("base_url") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + config.api_key = backend.get("api_key") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + } + config.api_model = Some(model_id.to_string()); + } + } + + Some(config) + } + + /// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl). + fn load_legacy_jsonl() -> Self { let path = std::env::var("POC_MEMORY_CONFIG") .map(PathBuf::from) .unwrap_or_else(|_| { @@ -119,14 +222,12 @@ impl Config { let mut context_groups: Vec = Vec::new(); - // Parse as a stream of JSON values (handles multi-line objects) let stream = serde_json::Deserializer::from_str(&content) .into_iter::(); for result in stream { let Ok(obj) = result else { continue }; - // Global config line if let Some(cfg) = obj.get("config") { if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) { config.user_name = s.to_string(); @@ -175,7 +276,6 @@ impl Config { continue; } - // Context group line if let Some(label) = obj.get("group").and_then(|v| v.as_str()) { let keys = obj.get("keys") .and_then(|v| v.as_array()) diff --git a/scripts/Dockerfile.vllm b/scripts/Dockerfile.vllm new file mode 100644 index 0000000..c141e64 --- /dev/null +++ b/scripts/Dockerfile.vllm @@ -0,0 +1,26 @@ +FROM nvidia/cuda:12.9.0-devel-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH="/root/.local/bin:${PATH}" + +RUN apt-get update -qq && \ + apt-get install -y -qq python3 python3-pip git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir vllm ninja huggingface_hub + +# Pre-download model weights (optional — comment out to pull at runtime) +# RUN python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen3.5-27B')" + +EXPOSE 8000 + +ENTRYPOINT ["vllm", "serve"] +CMD ["Qwen/Qwen3.5-27B", \ + "--port", "8000", \ + "--max-model-len", "262144", \ + "--gpu-memory-utilization", "0.95", \ + "--enable-prefix-caching", \ + "--enable-auto-tool-choice", \ + "--tool-call-parser", "qwen3_coder", \ + "--reasoning-parser", "qwen3", \ + "--uvicorn-log-level", "warning"] diff --git a/scripts/provision-mi300x.sh b/scripts/provision-mi300x.sh index 5a47738..c028a26 100755 --- a/scripts/provision-mi300x.sh +++ b/scripts/provision-mi300x.sh @@ -81,8 +81,8 @@ exec vllm serve "$MODEL" \ --max-model-len "$MAX_MODEL_LEN" \ --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \ --enable-prefix-caching \ - --tool-call-parser hermes \ --enable-auto-tool-choice \ + --tool-call-parser qwen35_coder \ --reasoning-parser qwen3 \ --trust-remote-code \ --max-cudagraph-capture-size 64 \ diff --git a/scripts/provision-vllm.sh b/scripts/provision-vllm.sh index e5702ed..a23c805 100755 --- a/scripts/provision-vllm.sh +++ b/scripts/provision-vllm.sh @@ -51,7 +51,7 @@ exec vllm serve "$MODEL" \ --max-model-len "$MAX_MODEL_LEN" \ --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \ --enable-prefix-caching \ - --tool-call-parser hermes \ --enable-auto-tool-choice \ + --tool-call-parser qwen35_coder \ --reasoning-parser=qwen3 \ --uvicorn-log-level warning