Consolidate poc-memory and poc-agent configs
poc-memory now reads from poc-agent's config.json5 as the primary config source. Memory-specific settings live in a "memory" section; API credentials are resolved from the shared model/backend config instead of being duplicated. - Add "memory" section to ~/.config/poc-agent/config.json5 - poc-memory config.rs: try shared config first, fall back to legacy JSONL - API fields (base_url, api_key, model) resolved via memory.agent_model -> models -> backend lookup - Add json5 dependency for proper JSON5 parsing - Update provisioning scripts: hermes -> qwen3_coder tool parser Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4c7c3c762c
commit
d9b56a02c3
6 changed files with 146 additions and 18 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -2643,6 +2643,7 @@ dependencies = [
|
||||||
"crossterm 0.28.1",
|
"crossterm 0.28.1",
|
||||||
"faer",
|
"faer",
|
||||||
"jobkit",
|
"jobkit",
|
||||||
|
"json5",
|
||||||
"libc",
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ capnp = "0.20"
|
||||||
uuid = { version = "1", features = ["v4"] }
|
uuid = { version = "1", features = ["v4"] }
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
|
json5 = "0.4"
|
||||||
bincode = "1"
|
bincode = "1"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,14 @@
|
||||||
// Configuration for poc-memory
|
// Configuration for poc-memory
|
||||||
//
|
//
|
||||||
// Loaded from ~/.config/poc-memory/config.jsonl (or POC_MEMORY_CONFIG env).
|
// Primary config: ~/.config/poc-agent/config.json5 (shared with poc-agent)
|
||||||
// Falls back to sensible defaults if no config file exists.
|
// Memory-specific settings live in the "memory" section.
|
||||||
|
// API backend resolved from the shared "models" + backend configs.
|
||||||
//
|
//
|
||||||
// Format: JSONL — one JSON object per line.
|
// Fallback: ~/.config/poc-memory/config.jsonl (legacy, still supported)
|
||||||
// First line with "config" key: global settings.
|
// Env override: POC_MEMORY_CONFIG
|
||||||
// Lines with "group" key: context loading groups (order preserved).
|
|
||||||
//
|
//
|
||||||
// Example:
|
// The shared config eliminates API credential duplication between
|
||||||
// {"config": {"user_name": "Alice", "data_dir": "~/.claude/memory"}}
|
// poc-memory and poc-agent.
|
||||||
// {"group": "identity", "keys": ["identity"]}
|
|
||||||
// {"group": "orientation", "keys": ["where-am-i.md"], "source": "file"}
|
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::{Arc, OnceLock, RwLock};
|
use std::sync::{Arc, OnceLock, RwLock};
|
||||||
|
|
@ -56,11 +54,8 @@ pub struct Config {
|
||||||
/// Directory containing prompt templates for agents.
|
/// Directory containing prompt templates for agents.
|
||||||
pub prompts_dir: PathBuf,
|
pub prompts_dir: PathBuf,
|
||||||
/// Separate Claude config dir for background agent work (daemon jobs).
|
/// Separate Claude config dir for background agent work (daemon jobs).
|
||||||
/// If set, passed as CLAUDE_CONFIG_DIR so the daemon authenticates
|
|
||||||
/// with different OAuth credentials than the interactive session.
|
|
||||||
pub agent_config_dir: Option<PathBuf>,
|
pub agent_config_dir: Option<PathBuf>,
|
||||||
/// OpenAI-compatible API base URL for direct LLM calls (e.g. vllm).
|
/// OpenAI-compatible API base URL for direct LLM calls.
|
||||||
/// When set, agents use this instead of shelling out to claude CLI.
|
|
||||||
pub api_base_url: Option<String>,
|
pub api_base_url: Option<String>,
|
||||||
/// API key for the direct API endpoint.
|
/// API key for the direct API endpoint.
|
||||||
pub api_key: Option<String>,
|
pub api_key: Option<String>,
|
||||||
|
|
@ -104,6 +99,114 @@ impl Default for Config {
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
fn load_from_file() -> Self {
|
fn load_from_file() -> Self {
|
||||||
|
// Try shared config first, then legacy JSONL
|
||||||
|
if let Some(config) = Self::try_load_shared() {
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
Self::load_legacy_jsonl()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load from shared poc-agent config (~/.config/poc-agent/config.json5).
|
||||||
|
/// Memory settings live in the "memory" section; API settings are
|
||||||
|
/// resolved from the shared model/backend configuration.
|
||||||
|
fn try_load_shared() -> Option<Self> {
|
||||||
|
let home = PathBuf::from(std::env::var("HOME").ok()?);
|
||||||
|
let path = home.join(".config/poc-agent/config.json5");
|
||||||
|
let content = std::fs::read_to_string(&path).ok()?;
|
||||||
|
|
||||||
|
let root: serde_json::Value = json5::from_str(&content).ok()?;
|
||||||
|
|
||||||
|
let mem = root.get("memory")?;
|
||||||
|
let mut config = Config::default();
|
||||||
|
|
||||||
|
// Memory-specific fields
|
||||||
|
if let Some(s) = mem.get("user_name").and_then(|v| v.as_str()) {
|
||||||
|
config.user_name = s.to_string();
|
||||||
|
}
|
||||||
|
if let Some(s) = mem.get("assistant_name").and_then(|v| v.as_str()) {
|
||||||
|
config.assistant_name = s.to_string();
|
||||||
|
}
|
||||||
|
if let Some(s) = mem.get("data_dir").and_then(|v| v.as_str()) {
|
||||||
|
config.data_dir = expand_home(s);
|
||||||
|
}
|
||||||
|
if let Some(s) = mem.get("projects_dir").and_then(|v| v.as_str()) {
|
||||||
|
config.projects_dir = expand_home(s);
|
||||||
|
}
|
||||||
|
if let Some(arr) = mem.get("core_nodes").and_then(|v| v.as_array()) {
|
||||||
|
config.core_nodes = arr.iter()
|
||||||
|
.filter_map(|v| v.as_str().map(|s| s.to_string()))
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
if let Some(d) = mem.get("journal_days").and_then(|v| v.as_u64()) {
|
||||||
|
config.journal_days = d as u32;
|
||||||
|
}
|
||||||
|
if let Some(m) = mem.get("journal_max").and_then(|v| v.as_u64()) {
|
||||||
|
config.journal_max = m as usize;
|
||||||
|
}
|
||||||
|
if let Some(n) = mem.get("llm_concurrency").and_then(|v| v.as_u64()) {
|
||||||
|
config.llm_concurrency = n.max(1) as usize;
|
||||||
|
}
|
||||||
|
if let Some(n) = mem.get("agent_budget").and_then(|v| v.as_u64()) {
|
||||||
|
config.agent_budget = n as usize;
|
||||||
|
}
|
||||||
|
if let Some(s) = mem.get("prompts_dir").and_then(|v| v.as_str()) {
|
||||||
|
config.prompts_dir = expand_home(s);
|
||||||
|
}
|
||||||
|
if let Some(s) = mem.get("agent_config_dir").and_then(|v| v.as_str()) {
|
||||||
|
config.agent_config_dir = Some(expand_home(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Context groups
|
||||||
|
if let Some(groups) = mem.get("context_groups").and_then(|v| v.as_array()) {
|
||||||
|
let mut cgs = Vec::new();
|
||||||
|
for g in groups {
|
||||||
|
if let Some(label) = g.get("label").and_then(|v| v.as_str()) {
|
||||||
|
let keys = g.get("keys")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
.map(|arr| arr.iter()
|
||||||
|
.filter_map(|v| v.as_str().map(|s| s.to_string()))
|
||||||
|
.collect())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let source = match g.get("source").and_then(|v| v.as_str()) {
|
||||||
|
Some("file") => ContextSource::File,
|
||||||
|
Some("journal") => ContextSource::Journal,
|
||||||
|
_ => ContextSource::Store,
|
||||||
|
};
|
||||||
|
cgs.push(ContextGroup { label: label.to_string(), keys, source });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !cgs.is_empty() {
|
||||||
|
config.context_groups = cgs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve API settings from the shared model/backend config.
|
||||||
|
// memory.agent_model references a named model; we look up its
|
||||||
|
// backend to get base_url and api_key.
|
||||||
|
if let Some(model_name) = mem.get("agent_model").and_then(|v| v.as_str()) {
|
||||||
|
if let Some(model_cfg) = root.get("models")
|
||||||
|
.and_then(|m| m.get(model_name))
|
||||||
|
{
|
||||||
|
let backend_name = model_cfg.get("backend").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
let model_id = model_cfg.get("model_id").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
|
||||||
|
if let Some(backend) = root.get(backend_name) {
|
||||||
|
config.api_base_url = backend.get("base_url")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string());
|
||||||
|
config.api_key = backend.get("api_key")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string());
|
||||||
|
}
|
||||||
|
config.api_model = Some(model_id.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load from legacy JSONL config (~/.config/poc-memory/config.jsonl).
|
||||||
|
fn load_legacy_jsonl() -> Self {
|
||||||
let path = std::env::var("POC_MEMORY_CONFIG")
|
let path = std::env::var("POC_MEMORY_CONFIG")
|
||||||
.map(PathBuf::from)
|
.map(PathBuf::from)
|
||||||
.unwrap_or_else(|_| {
|
.unwrap_or_else(|_| {
|
||||||
|
|
@ -119,14 +222,12 @@ impl Config {
|
||||||
|
|
||||||
let mut context_groups: Vec<ContextGroup> = Vec::new();
|
let mut context_groups: Vec<ContextGroup> = Vec::new();
|
||||||
|
|
||||||
// Parse as a stream of JSON values (handles multi-line objects)
|
|
||||||
let stream = serde_json::Deserializer::from_str(&content)
|
let stream = serde_json::Deserializer::from_str(&content)
|
||||||
.into_iter::<serde_json::Value>();
|
.into_iter::<serde_json::Value>();
|
||||||
|
|
||||||
for result in stream {
|
for result in stream {
|
||||||
let Ok(obj) = result else { continue };
|
let Ok(obj) = result else { continue };
|
||||||
|
|
||||||
// Global config line
|
|
||||||
if let Some(cfg) = obj.get("config") {
|
if let Some(cfg) = obj.get("config") {
|
||||||
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
|
if let Some(s) = cfg.get("user_name").and_then(|v| v.as_str()) {
|
||||||
config.user_name = s.to_string();
|
config.user_name = s.to_string();
|
||||||
|
|
@ -175,7 +276,6 @@ impl Config {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Context group line
|
|
||||||
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
|
if let Some(label) = obj.get("group").and_then(|v| v.as_str()) {
|
||||||
let keys = obj.get("keys")
|
let keys = obj.get("keys")
|
||||||
.and_then(|v| v.as_array())
|
.and_then(|v| v.as_array())
|
||||||
|
|
|
||||||
26
scripts/Dockerfile.vllm
Normal file
26
scripts/Dockerfile.vllm
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
FROM nvidia/cuda:12.9.0-devel-ubuntu22.04
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV PATH="/root/.local/bin:${PATH}"
|
||||||
|
|
||||||
|
RUN apt-get update -qq && \
|
||||||
|
apt-get install -y -qq python3 python3-pip git && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir vllm ninja huggingface_hub
|
||||||
|
|
||||||
|
# Pre-download model weights (optional — comment out to pull at runtime)
|
||||||
|
# RUN python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen3.5-27B')"
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
ENTRYPOINT ["vllm", "serve"]
|
||||||
|
CMD ["Qwen/Qwen3.5-27B", \
|
||||||
|
"--port", "8000", \
|
||||||
|
"--max-model-len", "262144", \
|
||||||
|
"--gpu-memory-utilization", "0.95", \
|
||||||
|
"--enable-prefix-caching", \
|
||||||
|
"--enable-auto-tool-choice", \
|
||||||
|
"--tool-call-parser", "qwen3_coder", \
|
||||||
|
"--reasoning-parser", "qwen3", \
|
||||||
|
"--uvicorn-log-level", "warning"]
|
||||||
|
|
@ -81,8 +81,8 @@ exec vllm serve "$MODEL" \
|
||||||
--max-model-len "$MAX_MODEL_LEN" \
|
--max-model-len "$MAX_MODEL_LEN" \
|
||||||
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
|
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--tool-call-parser hermes \
|
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
|
--tool-call-parser qwen35_coder \
|
||||||
--reasoning-parser qwen3 \
|
--reasoning-parser qwen3 \
|
||||||
--trust-remote-code \
|
--trust-remote-code \
|
||||||
--max-cudagraph-capture-size 64 \
|
--max-cudagraph-capture-size 64 \
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,7 @@ exec vllm serve "$MODEL" \
|
||||||
--max-model-len "$MAX_MODEL_LEN" \
|
--max-model-len "$MAX_MODEL_LEN" \
|
||||||
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
|
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--tool-call-parser hermes \
|
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
|
--tool-call-parser qwen35_coder \
|
||||||
--reasoning-parser=qwen3 \
|
--reasoning-parser=qwen3 \
|
||||||
--uvicorn-log-level warning
|
--uvicorn-log-level warning
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue