Consolidate poc-memory and poc-agent configs

poc-memory now reads from poc-agent's config.json5 as the primary config source. Memory-specific settings live in a "memory" section; API credentials are resolved from the shared model/backend config instead of being duplicated. - Add "memory" section to ~/.config/poc-agent/config.json5 - poc-memory config.rs: try shared config first, fall back to legacy JSONL - API fields (base_url, api_key, model) resolved via memory.agent_model -> models -> backend lookup - Add json5 dependency for proper JSON5 parsing - Update provisioning scripts: hermes -> qwen3_coder tool parser Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 21:49:58 -04:00 · 2026-03-19 21:49:58 -04:00 · d9b56a02c3
commit d9b56a02c3
parent 4c7c3c762c
6 changed files with 146 additions and 18 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2643,6 +2643,7 @@ dependencies = [
 "crossterm 0.28.1",
 "faer",
 "jobkit",
 "json5",
 "libc",
 "log",
 "memmap2",
--- a/poc-memory/Cargo.toml
+++ b/poc-memory/Cargo.toml
@ -8,6 +8,7 @@ capnp = "0.20"
 uuid = { version = "1", features = ["v4"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 json5 = "0.4"
 bincode = "1"
 regex = "1"
 chrono = "0.4"
--- a/poc-memory/src/config.rs
+++ b/poc-memory/src/config.rs
@ -1,16 +1,14 @@
 // Configuration for poc-memory
 //
-// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
+// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent)
-// Falls back to sensible defaults if no config file exists.
+//   Memory-specific settings live in the "memory" section.
 //   API backend resolved from the shared "models" + backend configs.
 //
-// Format: JSONL — one JSON object per line.
+// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported)
-//   First line with "config" key: global settings.
+//   Env override: POC_MEMORY_CONFIG
 //   Lines with "group" key: context loading groups (order preserved).
 //
-// Example:
+// The shared config eliminates API credential duplication between
-//   {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
+// poc-memory and poc-agent.
 //   {"group": "identity", "keys": ["identity"]}
 //   {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
 use std::path::PathBuf;
 use std::sync::{Arc, OnceLock, RwLock};
@ -56,11 +54,8 @@ pub struct Config {
    /// Directory containing prompt templates for agents.
    pub prompts_dir: PathBuf,
    /// Separate Claude config dir for background agent work (daemon jobs).
    /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
    /// with different OAuth credentials than the interactive session.
    pub agent_config_dir: Option<PathBuf>,
-    /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
+    /// OpenAI-compatible API base URL for direct LLM calls.
    /// When set, agents use this instead of shelling out to claude CLI.
    pub api_base_url: Option<String>,
    /// API key for the direct API endpoint.
    pub api_key: Option<String>,
@ -104,6 +99,114 @@ impl Default for Config {
 impl Config {
    fn load_from_file() -> Self {
        // Try shared config first, then legacy JSONL
        if let Some(config) = Self::try_load_shared() {
            return config;
        }
        Self::load_legacy_jsonl()
    }
    /// Load from shared poc-agent config (~/.config/poc-agent/config.json5).
    /// Memory settings live in the "memory" section; API settings are
    /// resolved from the shared model/backend configuration.
    fn try_load_shared() -> Option<Self> {
        let home = PathBuf::from(std::env::var("HOME").ok()?);
        let path = home.join(".config/poc-agent/config.json5");
        let content = std::fs::read_to_string(&path).ok()?;
        let root: serde_json::Value = json5::from_str(&content).ok()?;
        let mem = root.get("memory")?;
        let mut config = Config::default();
        // Memory-specific fields
        if let Some(s) = mem.get("user_name").and_then(|v| v.as_str()) {
            config.user_name = s.to_string();
        }
        if let Some(s) = mem.get("assistant_name").and_then(|v| v.as_str()) {
            config.assistant_name = s.to_string();
        }
        if let Some(s) = mem.get("data_dir").and_then(|v| v.as_str()) {
            config.data_dir = expand_home(s);
        }
        if let Some(s) = mem.get("projects_dir").and_then(|v| v.as_str()) {
            config.projects_dir = expand_home(s);
        }
        if let Some(arr) = mem.get("core_nodes").and_then(|v| v.as_array()) {
            config.core_nodes = arr.iter()
                .filter_map(|v| v.as_str().map(|s| s.to_string()))
                .collect();
        }
        if let Some(d) = mem.get("journal_days").and_then(|v| v.as_u64()) {
            config.journal_days = d as u32;
        }
        if let Some(m) = mem.get("journal_max").and_then(|v| v.as_u64()) {
            config.journal_max = m as usize;
        }
        if let Some(n) = mem.get("llm_concurrency").and_then(|v| v.as_u64()) {
            config.llm_concurrency = n.max(1) as usize;
        }
        if let Some(n) = mem.get("agent_budget").and_then(|v| v.as_u64()) {
            config.agent_budget = n as usize;
        }
        if let Some(s) = mem.get("prompts_dir").and_then(|v| v.as_str()) {
            config.prompts_dir = expand_home(s);
        }
        if let Some(s) = mem.get("agent_config_dir").and_then(|v| v.as_str()) {
            config.agent_config_dir = Some(expand_home(s));
        }
        // Context groups
        if let Some(groups) = mem.get("context_groups").and_then(|v| v.as_array()) {
            let mut cgs = Vec::new();
            for g in groups {
                if let Some(label) = g.get("label").and_then(|v| v.as_str()) {
                    let keys = g.get("keys")
                        .and_then(|v| v.as_array())
                        .map(|arr| arr.iter()
                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
                            .collect())
                        .unwrap_or_default();
                    let source = match g.get("source").and_then(|v| v.as_str()) {
                        Some("file") => ContextSource::File,
                        Some("journal") => ContextSource::Journal,
                        _ => ContextSource::Store,
                    };
                    cgs.push(ContextGroup { label: label.to_string(), keys, source });
                }
            }
            if !cgs.is_empty() {
                config.context_groups = cgs;
            }
        }
        // Resolve API settings from the shared model/backend config.
        // memory.agent_model references a named model; we look up its
        // backend to get base_url and api_key.
        if let Some(model_name) = mem.get("agent_model").and_then(|v| v.as_str()) {
            if let Some(model_cfg) = root.get("models")
                .and_then(|m| m.get(model_name))
            {
                let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or("");
                let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or("");
                if let Some(backend) = root.get(backend_name) {
                    config.api_base_url = backend.get("base_url")
                        .and_then(|v| v.as_str())
                        .map(|s| s.to_string());
                    config.api_key = backend.get("api_key")
                        .and_then(|v| v.as_str())
                        .map(|s| s.to_string());
                }
                config.api_model = Some(model_id.to_string());
            }
        }
        Some(config)
    }
    /// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl).
    fn load_legacy_jsonl() -> Self {
        let path = std::env::var("POC_MEMORY_CONFIG")
            .map(PathBuf::from)
            .unwrap_or_else(|_| {
@ -119,14 +222,12 @@ impl Config {
        let mut context_groups: Vec<ContextGroup> = Vec::new();
        // Parse as a stream of JSON values (handles multi-line objects)
        let stream = serde_json::Deserializer::from_str(&content)
            .into_iter::<serde_json::Value>();
        for result in stream {
            let Ok(obj) = result else { continue };
            // Global config line
            if let Some(cfg) = obj.get("config") {
                if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
                    config.user_name = s.to_string();
@ -175,7 +276,6 @@ impl Config {
                continue;
            }
            // Context group line
            if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
                let keys = obj.get("keys")
                    .and_then(|v| v.as_array())
--- a/scripts/Dockerfile.vllm
+++ b/scripts/Dockerfile.vllm
@ -0,0 +1,26 @@
 FROM nvidia/cuda:12.9.0-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PATH="/root/.local/bin:${PATH}"
 RUN apt-get update -qq && \
    apt-get install -y -qq python3 python3-pip git && \
    rm -rf /var/lib/apt/lists/*
 RUN pip install --no-cache-dir vllm ninja huggingface_hub
 # Pre-download model weights (optional — comment out to pull at runtime)
 # RUN python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen3.5-27B')"
 EXPOSE 8000
 ENTRYPOINT ["vllm", "serve"]
 CMD ["Qwen/Qwen3.5-27B", \
     "--port", "8000", \
     "--max-model-len", "262144", \
     "--gpu-memory-utilization", "0.95", \
     "--enable-prefix-caching", \
     "--enable-auto-tool-choice", \
     "--tool-call-parser", "qwen3_coder", \
     "--reasoning-parser", "qwen3", \
     "--uvicorn-log-level", "warning"]
--- a/scripts/provision-mi300x.sh
+++ b/scripts/provision-mi300x.sh
@ -81,8 +81,8 @@ exec vllm serve "$MODEL" \
    --max-model-len "$MAX_MODEL_LEN" \
    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
    --enable-prefix-caching \
    --tool-call-parser hermes \
    --enable-auto-tool-choice \
    --tool-call-parser qwen35_coder \
    --reasoning-parser qwen3 \
    --trust-remote-code \
    --max-cudagraph-capture-size 64 \
--- a/scripts/provision-vllm.sh
+++ b/scripts/provision-vllm.sh
@ -51,7 +51,7 @@ exec vllm serve "$MODEL" \
    --max-model-len "$MAX_MODEL_LEN" \
    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
    --enable-prefix-caching \
    --tool-call-parser hermes \
    --enable-auto-tool-choice \
    --tool-call-parser qwen35_coder \
    --reasoning-parser=qwen3 \
    --uvicorn-log-level warning