From d9b56a02c32db1f31c6d8df6496aee407f21f812 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Thu, 19 Mar 2026 21:49:58 -0400
Subject: [PATCH] Consolidate poc-memory and poc-agent configs

poc-memory now reads from poc-agent's config.json5 as the primary
config source. Memory-specific settings live in a "memory" section;
API credentials are resolved from the shared model/backend config
instead of being duplicated.

- Add "memory" section to ~/.config/poc-agent/config.json5
- poc-memory config.rs: try shared config first, fall back to
  legacy JSONL
- API fields (base_url, api_key, model) resolved via
  memory.agent_model -> models -> backend lookup
- Add json5 dependency for proper JSON5 parsing
- Update provisioning scripts: hermes -> qwen3_coder tool parser

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 Cargo.lock                  |   1 +
 poc-memory/Cargo.toml       |   1 +
 poc-memory/src/config.rs    | 132 +++++++++++++++++++++++++++++++-----
 scripts/Dockerfile.vllm     |  26 +++++++
 scripts/provision-mi300x.sh |   2 +-
 scripts/provision-vllm.sh   |   2 +-
 6 files changed, 146 insertions(+), 18 deletions(-)
 create mode 100644 scripts/Dockerfile.vllm
diff --git a/Cargo.lock b/Cargo.lock
index 97a5f80..37efa38 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2643,6 +2643,7 @@ dependencies = [
  "crossterm 0.28.1",
  "faer",
  "jobkit",
+ "json5",
  "libc",
  "log",
  "memmap2",
diff --git a/poc-memory/Cargo.toml b/poc-memory/Cargo.toml
index 5fbc91f..f56d80c 100644
--- a/poc-memory/Cargo.toml
+++ b/poc-memory/Cargo.toml
@@ -8,6 +8,7 @@ capnp = "0.20"
 uuid = { version = "1", features = ["v4"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
+json5 = "0.4"
 bincode = "1"
 regex = "1"
 chrono = "0.4"
diff --git a/poc-memory/src/config.rs b/poc-memory/src/config.rs
index f6078ff..1880ee0 100644
--- a/poc-memory/src/config.rs
+++ b/poc-memory/src/config.rs
@@ -1,16 +1,14 @@
 // Configuration for poc-memory
 //
-// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
-// Falls back to sensible defaults if no config file exists.
+// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent)
+//   Memory-specific settings live in the "memory" section.
+//   API backend resolved from the shared "models" + backend configs.
 //
-// Format: JSONL — one JSON object per line.
-//   First line with "config" key: global settings.
-//   Lines with "group" key: context loading groups (order preserved).
+// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported)
+//   Env override: POC_MEMORY_CONFIG
 //
-// Example:
-//   {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
-//   {"group": "identity", "keys": ["identity"]}
-//   {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
+// The shared config eliminates API credential duplication between
+// poc-memory and poc-agent.
 
 use std::path::PathBuf;
 use std::sync::{Arc, OnceLock, RwLock};
@@ -56,11 +54,8 @@ pub struct Config {
     /// Directory containing prompt templates for agents.
     pub prompts_dir: PathBuf,
     /// Separate Claude config dir for background agent work (daemon jobs).
-    /// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
-    /// with different OAuth credentials than the interactive session.
     pub agent_config_dir: Option<PathBuf>,
-    /// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
-    /// When set, agents use this instead of shelling out to claude CLI.
+    /// OpenAI-compatible API base URL for direct LLM calls.
     pub api_base_url: Option<String>,
     /// API key for the direct API endpoint.
     pub api_key: Option<String>,
@@ -104,6 +99,114 @@ impl Default for Config {
 
 impl Config {
     fn load_from_file() -> Self {
+        // Try shared config first, then legacy JSONL
+        if let Some(config) = Self::try_load_shared() {
+            return config;
+        }
+        Self::load_legacy_jsonl()
+    }
+
+    /// Load from shared poc-agent config (~/.config/poc-agent/config.json5).
+    /// Memory settings live in the "memory" section; API settings are
+    /// resolved from the shared model/backend configuration.
+    fn try_load_shared() -> Option<Self> {
+        let home = PathBuf::from(std::env::var("HOME").ok()?);
+        let path = home.join(".config/poc-agent/config.json5");
+        let content = std::fs::read_to_string(&path).ok()?;
+
+        let root: serde_json::Value = json5::from_str(&content).ok()?;
+
+        let mem = root.get("memory")?;
+        let mut config = Config::default();
+
+        // Memory-specific fields
+        if let Some(s) = mem.get("user_name").and_then(|v| v.as_str()) {
+            config.user_name = s.to_string();
+        }
+        if let Some(s) = mem.get("assistant_name").and_then(|v| v.as_str()) {
+            config.assistant_name = s.to_string();
+        }
+        if let Some(s) = mem.get("data_dir").and_then(|v| v.as_str()) {
+            config.data_dir = expand_home(s);
+        }
+        if let Some(s) = mem.get("projects_dir").and_then(|v| v.as_str()) {
+            config.projects_dir = expand_home(s);
+        }
+        if let Some(arr) = mem.get("core_nodes").and_then(|v| v.as_array()) {
+            config.core_nodes = arr.iter()
+                .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                .collect();
+        }
+        if let Some(d) = mem.get("journal_days").and_then(|v| v.as_u64()) {
+            config.journal_days = d as u32;
+        }
+        if let Some(m) = mem.get("journal_max").and_then(|v| v.as_u64()) {
+            config.journal_max = m as usize;
+        }
+        if let Some(n) = mem.get("llm_concurrency").and_then(|v| v.as_u64()) {
+            config.llm_concurrency = n.max(1) as usize;
+        }
+        if let Some(n) = mem.get("agent_budget").and_then(|v| v.as_u64()) {
+            config.agent_budget = n as usize;
+        }
+        if let Some(s) = mem.get("prompts_dir").and_then(|v| v.as_str()) {
+            config.prompts_dir = expand_home(s);
+        }
+        if let Some(s) = mem.get("agent_config_dir").and_then(|v| v.as_str()) {
+            config.agent_config_dir = Some(expand_home(s));
+        }
+
+        // Context groups
+        if let Some(groups) = mem.get("context_groups").and_then(|v| v.as_array()) {
+            let mut cgs = Vec::new();
+            for g in groups {
+                if let Some(label) = g.get("label").and_then(|v| v.as_str()) {
+                    let keys = g.get("keys")
+                        .and_then(|v| v.as_array())
+                        .map(|arr| arr.iter()
+                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                            .collect())
+                        .unwrap_or_default();
+                    let source = match g.get("source").and_then(|v| v.as_str()) {
+                        Some("file") => ContextSource::File,
+                        Some("journal") => ContextSource::Journal,
+                        _ => ContextSource::Store,
+                    };
+                    cgs.push(ContextGroup { label: label.to_string(), keys, source });
+                }
+            }
+            if !cgs.is_empty() {
+                config.context_groups = cgs;
+            }
+        }
+
+        // Resolve API settings from the shared model/backend config.
+        // memory.agent_model references a named model; we look up its
+        // backend to get base_url and api_key.
+        if let Some(model_name) = mem.get("agent_model").and_then(|v| v.as_str()) {
+            if let Some(model_cfg) = root.get("models")
+                .and_then(|m| m.get(model_name))
+            {
+                let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or("");
+                let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or("");
+
+                if let Some(backend) = root.get(backend_name) {
+                    config.api_base_url = backend.get("base_url")
+                        .and_then(|v| v.as_str())
+                        .map(|s| s.to_string());
+                    config.api_key = backend.get("api_key")
+                        .and_then(|v| v.as_str())
+                        .map(|s| s.to_string());
+                }
+                config.api_model = Some(model_id.to_string());
+            }
+        }
+
+        Some(config)
+    }
+
+    /// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl).
+    fn load_legacy_jsonl() -> Self {
         let path = std::env::var("POC_MEMORY_CONFIG")
             .map(PathBuf::from)
             .unwrap_or_else(|_| {
@@ -119,14 +222,12 @@ impl Config {
 
         let mut context_groups: Vec<ContextGroup> = Vec::new();
 
-        // Parse as a stream of JSON values (handles multi-line objects)
         let stream = serde_json::Deserializer::from_str(&content)
             .into_iter::<serde_json::Value>();
 
         for result in stream {
             let Ok(obj) = result else { continue };
 
-            // Global config line
             if let Some(cfg) = obj.get("config") {
                 if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
                     config.user_name = s.to_string();
@@ -175,7 +276,6 @@ impl Config {
                 continue;
             }
 
-            // Context group line
             if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
                 let keys = obj.get("keys")
                     .and_then(|v| v.as_array())
diff --git a/scripts/Dockerfile.vllm b/scripts/Dockerfile.vllm
new file mode 100644
index 0000000..c141e64
--- /dev/null
+++ b/scripts/Dockerfile.vllm
@@ -0,0 +1,26 @@
+FROM nvidia/cuda:12.9.0-devel-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH="/root/.local/bin:${PATH}"
+
+RUN apt-get update -qq && \
+    apt-get install -y -qq python3 python3-pip git && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir vllm ninja huggingface_hub
+
+# Pre-download model weights (optional — comment out to pull at runtime)
+# RUN python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen3.5-27B')"
+
+EXPOSE 8000
+
+ENTRYPOINT ["vllm", "serve"]
+CMD ["Qwen/Qwen3.5-27B", \
+     "--port", "8000", \
+     "--max-model-len", "262144", \
+     "--gpu-memory-utilization", "0.95", \
+     "--enable-prefix-caching", \
+     "--enable-auto-tool-choice", \
+     "--tool-call-parser", "qwen3_coder", \
+     "--reasoning-parser", "qwen3", \
+     "--uvicorn-log-level", "warning"]
diff --git a/scripts/provision-mi300x.sh b/scripts/provision-mi300x.sh
index 5a47738..c028a26 100755
--- a/scripts/provision-mi300x.sh
+++ b/scripts/provision-mi300x.sh
@@ -81,8 +81,8 @@ exec vllm serve "$MODEL" \
     --max-model-len "$MAX_MODEL_LEN" \
     --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
     --enable-prefix-caching \
-    --tool-call-parser hermes \
     --enable-auto-tool-choice \
+    --tool-call-parser qwen35_coder \
     --reasoning-parser qwen3 \
     --trust-remote-code \
     --max-cudagraph-capture-size 64 \
diff --git a/scripts/provision-vllm.sh b/scripts/provision-vllm.sh
index e5702ed..a23c805 100755
--- a/scripts/provision-vllm.sh
+++ b/scripts/provision-vllm.sh
@@ -51,7 +51,7 @@ exec vllm serve "$MODEL" \
     --max-model-len "$MAX_MODEL_LEN" \
     --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
     --enable-prefix-caching \
-    --tool-call-parser hermes \
     --enable-auto-tool-choice \
+    --tool-call-parser qwen35_coder \
     --reasoning-parser=qwen3 \
     --uvicorn-log-level warning