Consolidate poc-memory and poc-agent configs

poc-memory now reads from poc-agent's config.json5 as the primary
config source. Memory-specific settings live in a "memory" section;
API credentials are resolved from the shared model/backend config
instead of being duplicated.

- Add "memory" section to ~/.config/poc-agent/config.json5
- poc-memory config.rs: try shared config first, fall back to
  legacy JSONL
- API fields (base_url, api_key, model) resolved via
  memory.agent_model -> models -> backend lookup
- Add json5 dependency for proper JSON5 parsing
- Update provisioning scripts: hermes -> qwen3_coder tool parser

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Kent Overstreet 2026-03-19 21:49:58 -04:00
parent 4c7c3c762c
commit d9b56a02c3
6 changed files with 146 additions and 18 deletions

1
Cargo.lock generated
View file

@ -2643,6 +2643,7 @@ dependencies = [
"crossterm 0.28.1",
"faer",
"jobkit",
"json5",
"libc",
"log",
"memmap2",

View file

@ -8,6 +8,7 @@ capnp = "0.20"
uuid = { version = "1", features = ["v4"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
json5 = "0.4"
bincode = "1"
regex = "1"
chrono = "0.4"

View file

@ -1,16 +1,14 @@
// Configuration for poc-memory
//
// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
// Falls back to sensible defaults if no config file exists.
// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent)
// Memory-specific settings live in the "memory" section.
// API backend resolved from the shared "models" + backend configs.
//
// Format: JSONL — one JSON object per line.
// First line with "config" key: global settings.
// Lines with "group" key: context loading groups (order preserved).
// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported)
// Env override: POC_MEMORY_CONFIG
//
// Example:
// {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
// {"group": "identity", "keys": ["identity"]}
// {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
// The shared config eliminates API credential duplication between
// poc-memory and poc-agent.
use std::path::PathBuf;
use std::sync::{Arc, OnceLock, RwLock};
@ -56,11 +54,8 @@ pub struct Config {
/// Directory containing prompt templates for agents.
pub prompts_dir: PathBuf,
/// Separate Claude config dir for background agent work (daemon jobs).
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
/// with different OAuth credentials than the interactive session.
pub agent_config_dir: Option<PathBuf>,
/// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
/// When set, agents use this instead of shelling out to claude CLI.
/// OpenAI-compatible API base URL for direct LLM calls.
pub api_base_url: Option<String>,
/// API key for the direct API endpoint.
pub api_key: Option<String>,
@ -104,6 +99,114 @@ impl Default for Config {
impl Config {
fn load_from_file() -> Self {
// Try shared config first, then legacy JSONL
if let Some(config) = Self::try_load_shared() {
return config;
}
Self::load_legacy_jsonl()
}
/// Load from shared poc-agent config (~/.config/poc-agent/config.json5).
/// Memory settings live in the "memory" section; API settings are
/// resolved from the shared model/backend configuration.
fn try_load_shared() -> Option<Self> {
let home = PathBuf::from(std::env::var("HOME").ok()?);
let path = home.join(".config/poc-agent/config.json5");
let content = std::fs::read_to_string(&path).ok()?;
let root: serde_json::Value = json5::from_str(&content).ok()?;
let mem = root.get("memory")?;
let mut config = Config::default();
// Memory-specific fields
if let Some(s) = mem.get("user_name").and_then(|v| v.as_str()) {
config.user_name = s.to_string();
}
if let Some(s) = mem.get("assistant_name").and_then(|v| v.as_str()) {
config.assistant_name = s.to_string();
}
if let Some(s) = mem.get("data_dir").and_then(|v| v.as_str()) {
config.data_dir = expand_home(s);
}
if let Some(s) = mem.get("projects_dir").and_then(|v| v.as_str()) {
config.projects_dir = expand_home(s);
}
if let Some(arr) = mem.get("core_nodes").and_then(|v| v.as_array()) {
config.core_nodes = arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect();
}
if let Some(d) = mem.get("journal_days").and_then(|v| v.as_u64()) {
config.journal_days = d as u32;
}
if let Some(m) = mem.get("journal_max").and_then(|v| v.as_u64()) {
config.journal_max = m as usize;
}
if let Some(n) = mem.get("llm_concurrency").and_then(|v| v.as_u64()) {
config.llm_concurrency = n.max(1) as usize;
}
if let Some(n) = mem.get("agent_budget").and_then(|v| v.as_u64()) {
config.agent_budget = n as usize;
}
if let Some(s) = mem.get("prompts_dir").and_then(|v| v.as_str()) {
config.prompts_dir = expand_home(s);
}
if let Some(s) = mem.get("agent_config_dir").and_then(|v| v.as_str()) {
config.agent_config_dir = Some(expand_home(s));
}
// Context groups
if let Some(groups) = mem.get("context_groups").and_then(|v| v.as_array()) {
let mut cgs = Vec::new();
for g in groups {
if let Some(label) = g.get("label").and_then(|v| v.as_str()) {
let keys = g.get("keys")
.and_then(|v| v.as_array())
.map(|arr| arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect())
.unwrap_or_default();
let source = match g.get("source").and_then(|v| v.as_str()) {
Some("file") => ContextSource::File,
Some("journal") => ContextSource::Journal,
_ => ContextSource::Store,
};
cgs.push(ContextGroup { label: label.to_string(), keys, source });
}
}
if !cgs.is_empty() {
config.context_groups = cgs;
}
}
// Resolve API settings from the shared model/backend config.
// memory.agent_model references a named model; we look up its
// backend to get base_url and api_key.
if let Some(model_name) = mem.get("agent_model").and_then(|v| v.as_str()) {
if let Some(model_cfg) = root.get("models")
.and_then(|m| m.get(model_name))
{
let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or("");
let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or("");
if let Some(backend) = root.get(backend_name) {
config.api_base_url = backend.get("base_url")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
config.api_key = backend.get("api_key")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
}
config.api_model = Some(model_id.to_string());
}
}
Some(config)
}
/// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl).
fn load_legacy_jsonl() -> Self {
let path = std::env::var("POC_MEMORY_CONFIG")
.map(PathBuf::from)
.unwrap_or_else(|_| {
@ -119,14 +222,12 @@ impl Config {
let mut context_groups: Vec<ContextGroup> = Vec::new();
// Parse as a stream of JSON values (handles multi-line objects)
let stream = serde_json::Deserializer::from_str(&content)
.into_iter::<serde_json::Value>();
for result in stream {
let Ok(obj) = result else { continue };
// Global config line
if let Some(cfg) = obj.get("config") {
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
config.user_name = s.to_string();
@ -175,7 +276,6 @@ impl Config {
continue;
}
// Context group line
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
let keys = obj.get("keys")
.and_then(|v| v.as_array())

26
scripts/Dockerfile.vllm Normal file
View file

@ -0,0 +1,26 @@
FROM nvidia/cuda:12.9.0-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH="/root/.local/bin:${PATH}"
RUN apt-get update -qq && \
apt-get install -y -qq python3 python3-pip git && \
rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir vllm ninja huggingface_hub
# Pre-download model weights (optional — comment out to pull at runtime)
# RUN python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen3.5-27B')"
EXPOSE 8000
ENTRYPOINT ["vllm", "serve"]
CMD ["Qwen/Qwen3.5-27B", \
"--port", "8000", \
"--max-model-len", "262144", \
"--gpu-memory-utilization", "0.95", \
"--enable-prefix-caching", \
"--enable-auto-tool-choice", \
"--tool-call-parser", "qwen3_coder", \
"--reasoning-parser", "qwen3", \
"--uvicorn-log-level", "warning"]

View file

@ -81,8 +81,8 @@ exec vllm serve "$MODEL" \
--max-model-len "$MAX_MODEL_LEN" \
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
--enable-prefix-caching \
--tool-call-parser hermes \
--enable-auto-tool-choice \
--tool-call-parser qwen35_coder \
--reasoning-parser qwen3 \
--trust-remote-code \
--max-cudagraph-capture-size 64 \

View file

@ -51,7 +51,7 @@ exec vllm serve "$MODEL" \
--max-model-len "$MAX_MODEL_LEN" \
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
--enable-prefix-caching \
--tool-call-parser hermes \
--enable-auto-tool-choice \
--tool-call-parser qwen35_coder \
--reasoning-parser=qwen3 \
--uvicorn-log-level warning