Consolidate poc-memory and poc-agent configs

poc-memory now reads from poc-agent's config.json5 as the primary config source. Memory-specific settings live in a "memory" section; API credentials are resolved from the shared model/backend config instead of being duplicated. - Add "memory" section to ~/.config/poc-agent/config.json5 - poc-memory config.rs: try shared config first, fall back to legacy JSONL - API fields (base_url, api_key, model) resolved via memory.agent_model -> models -> backend lookup - Add json5 dependency for proper JSON5 parsing - Update provisioning scripts: hermes -> qwen3_coder tool parser Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 21:49:58 -04:00 · 2026-03-19 21:49:58 -04:00 · d9b56a02c3
commit d9b56a02c3
parent 4c7c3c762c
6 changed files with 146 additions and 18 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2643,6 +2643,7 @@ dependencies = [
 "crossterm 0.28.1",
 "faer",
 "jobkit",
+ "json5",
 "libc",
 "log",
 "memmap2",
--- a/poc-memory/Cargo.toml
+++ b/poc-memory/Cargo.toml
@ -8,6 +8,7 @@ capnp = "0.20"
 uuid = { version = "1", features = ["v4"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
+json5 = "0.4"
 bincode = "1"
 regex = "1"
 chrono = "0.4"
--- a/poc-memory/src/config.rs
+++ b/poc-memory/src/config.rs
@ -1,16 +1,14 @@
 // Configuration for poc-memory
 //
-// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
-// Falls back to sensible defaults if no config file exists.
+// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent)
+//   Memory-specific settings live in the "memory" section.
+//   API backend resolved from the shared "models" + backend configs.
 //
-// Format: JSONL — one JSON object per line.
-//   First line with "config" key: global settings.
-//   Lines with "group" key: context loading groups (order preserved).
+// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported)
+//   Env override: POC_MEMORY_CONFIG
 //
-// Example:
-//   {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
-//   {"group": "identity", "keys": ["identity"]}
-//   {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
+// The shared config eliminates API credential duplication between
+// poc-memory and poc-agent.

 use std::path::PathBuf;
 use std::sync::{Arc, OnceLock, RwLock};
@ -56,11 +54,8 @@ pub struct Config {
    /// Directory containing prompt templates for agents.
    pub prompts_dir: PathBuf,
    /// Separate Claude config dir for background agent work (daemon jobs).
-    /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
-    /// with different OAuth credentials than the interactive session.
    pub agent_config_dir: Option<PathBuf>,
-    /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
-    /// When set, agents use this instead of shelling out to claude CLI.
+    /// OpenAI-compatible API base URL for direct LLM calls.
    pub api_base_url: Option<String>,
    /// API key for the direct API endpoint.
    pub api_key: Option<String>,
@ -104,6 +99,114 @@ impl Default for Config {

 impl Config {
    fn load_from_file() -> Self {
+        // Try shared config first, then legacy JSONL
+        if let Some(config) = Self::try_load_shared() {
+            return config;
+        }
+        Self::load_legacy_jsonl()
+    }
+
+    /// Load from shared poc-agent config (~/.config/poc-agent/config.json5).
+    /// Memory settings live in the "memory" section; API settings are
+    /// resolved from the shared model/backend configuration.
+    fn try_load_shared() -> Option<Self> {
+        let home = PathBuf::from(std::env::var("HOME").ok()?);
+        let path = home.join(".config/poc-agent/config.json5");
+        let content = std::fs::read_to_string(&path).ok()?;
+
+        let root: serde_json::Value = json5::from_str(&content).ok()?;
+
+        let mem = root.get("memory")?;
+        let mut config = Config::default();
+
+        // Memory-specific fields
+        if let Some(s) = mem.get("user_name").and_then(|v| v.as_str()) {
+            config.user_name = s.to_string();
+        }
+        if let Some(s) = mem.get("assistant_name").and_then(|v| v.as_str()) {
+            config.assistant_name = s.to_string();
+        }
+        if let Some(s) = mem.get("data_dir").and_then(|v| v.as_str()) {
+            config.data_dir = expand_home(s);
+        }
+        if let Some(s) = mem.get("projects_dir").and_then(|v| v.as_str()) {
+            config.projects_dir = expand_home(s);
+        }
+        if let Some(arr) = mem.get("core_nodes").and_then(|v| v.as_array()) {
+            config.core_nodes = arr.iter()
+                .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                .collect();
+        }
+        if let Some(d) = mem.get("journal_days").and_then(|v| v.as_u64()) {
+            config.journal_days = d as u32;
+        }
+        if let Some(m) = mem.get("journal_max").and_then(|v| v.as_u64()) {
+            config.journal_max = m as usize;
+        }
+        if let Some(n) = mem.get("llm_concurrency").and_then(|v| v.as_u64()) {
+            config.llm_concurrency = n.max(1) as usize;
+        }
+        if let Some(n) = mem.get("agent_budget").and_then(|v| v.as_u64()) {
+            config.agent_budget = n as usize;
+        }
+        if let Some(s) = mem.get("prompts_dir").and_then(|v| v.as_str()) {
+            config.prompts_dir = expand_home(s);
+        }
+        if let Some(s) = mem.get("agent_config_dir").and_then(|v| v.as_str()) {
+            config.agent_config_dir = Some(expand_home(s));
+        }
+
+        // Context groups
+        if let Some(groups) = mem.get("context_groups").and_then(|v| v.as_array()) {
+            let mut cgs = Vec::new();
+            for g in groups {
+                if let Some(label) = g.get("label").and_then(|v| v.as_str()) {
+                    let keys = g.get("keys")
+                        .and_then(|v| v.as_array())
+                        .map(|arr| arr.iter()
+                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                            .collect())
+                        .unwrap_or_default();
+                    let source = match g.get("source").and_then(|v| v.as_str()) {
+                        Some("file") => ContextSource::File,
+                        Some("journal") => ContextSource::Journal,
+                        _ => ContextSource::Store,
+                    };
+                    cgs.push(ContextGroup { label: label.to_string(), keys, source });
+                }
+            }
+            if !cgs.is_empty() {
+                config.context_groups = cgs;
+            }
+        }
+
+        // Resolve API settings from the shared model/backend config.
+        // memory.agent_model references a named model; we look up its
+        // backend to get base_url and api_key.
+        if let Some(model_name) = mem.get("agent_model").and_then(|v| v.as_str()) {
+            if let Some(model_cfg) = root.get("models")
+                .and_then(|m| m.get(model_name))
+            {
+                let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or("");
+                let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or("");
+
+                if let Some(backend) = root.get(backend_name) {
+                    config.api_base_url = backend.get("base_url")
+                        .and_then(|v| v.as_str())
+                        .map(|s| s.to_string());
+                    config.api_key = backend.get("api_key")
+                        .and_then(|v| v.as_str())
+                        .map(|s| s.to_string());
+                }
+                config.api_model = Some(model_id.to_string());
+            }
+        }
+
+        Some(config)
+    }
+
+    /// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl).
+    fn load_legacy_jsonl() -> Self {
        let path = std::env::var("POC_MEMORY_CONFIG")
            .map(PathBuf::from)
            .unwrap_or_else(|_| {
@ -119,14 +222,12 @@ impl Config {

        let mut context_groups: Vec<ContextGroup> = Vec::new();

-        // Parse as a stream of JSON values (handles multi-line objects)
        let stream = serde_json::Deserializer::from_str(&content)
            .into_iter::<serde_json::Value>();

        for result in stream {
            let Ok(obj) = result else { continue };

-            // Global config line
            if let Some(cfg) = obj.get("config") {
                if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
                    config.user_name = s.to_string();
@ -175,7 +276,6 @@ impl Config {
                continue;
            }

-            // Context group line
            if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
                let keys = obj.get("keys")
                    .and_then(|v| v.as_array())
--- a/scripts/Dockerfile.vllm
+++ b/scripts/Dockerfile.vllm
@ -0,0 +1,26 @@
+FROM nvidia/cuda:12.9.0-devel-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH="/root/.local/bin:${PATH}"
+
+RUN apt-get update -qq && \
+    apt-get install -y -qq python3 python3-pip git && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir vllm ninja huggingface_hub
+
+# Pre-download model weights (optional — comment out to pull at runtime)
+# RUN python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen3.5-27B')"
+
+EXPOSE 8000
+
+ENTRYPOINT ["vllm", "serve"]
+CMD ["Qwen/Qwen3.5-27B", \
+     "--port", "8000", \
+     "--max-model-len", "262144", \
+     "--gpu-memory-utilization", "0.95", \
+     "--enable-prefix-caching", \
+     "--enable-auto-tool-choice", \
+     "--tool-call-parser", "qwen3_coder", \
+     "--reasoning-parser", "qwen3", \
+     "--uvicorn-log-level", "warning"]
--- a/scripts/provision-mi300x.sh
+++ b/scripts/provision-mi300x.sh
@ -81,8 +81,8 @@ exec vllm serve "$MODEL" \
    --max-model-len "$MAX_MODEL_LEN" \
    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
    --enable-prefix-caching \
-    --tool-call-parser hermes \
    --enable-auto-tool-choice \
+    --tool-call-parser qwen35_coder \
    --reasoning-parser qwen3 \
    --trust-remote-code \
    --max-cudagraph-capture-size 64 \
--- a/scripts/provision-vllm.sh
+++ b/scripts/provision-vllm.sh
@ -51,7 +51,7 @@ exec vllm serve "$MODEL" \
    --max-model-len "$MAX_MODEL_LEN" \
    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
    --enable-prefix-caching \
-    --tool-call-parser hermes \
    --enable-auto-tool-choice \
+    --tool-call-parser qwen35_coder \
    --reasoning-parser=qwen3 \
    --uvicorn-log-level warning