poc-agent: read context_groups from config instead of hardcoded list

- Remove MEMORY_FILES constant from identity.rs
- Add ContextGroup struct for deserializing from config
- Load context_groups from ~/.config/poc-agent/config.json5
- Check ~/.config/poc-agent/ first for identity files, then project/global
- Debug screen now shows what's actually configured

This eliminates the hardcoded duplication and makes the debug output
match what's in the config file.
This commit is contained in:
Kent Overstreet 2026-03-24 01:53:28 -04:00
parent 966219720a
commit aa46b1d5a6
9 changed files with 346 additions and 654 deletions

50
scripts/provision-mistralrs.sh Executable file
View file

@ -0,0 +1,50 @@
#!/bin/bash
# provision-mistralrs.sh — Set up mistral.rs on a RunPod GPU instance
#
# Alternative to vLLM for inference. Pure Rust, more debuggable,
# OpenAI-compatible API. Testing whether it fixes the IncompleteMessage
# errors we're seeing with vLLM on large payloads.
#
# Usage: ssh into your RunPod instance and run this script.
# Runs on port 8001 to coexist with vLLM on 8000.
set -euo pipefail
MODEL="${MODEL:-Qwen/Qwen3.5-27B}"
PORT="${PORT:-8001}"
echo "=== mistral.rs provisioning ==="
echo "Model: $MODEL"
echo "Port: $PORT"
echo ""
# --- Verify GPU ---
echo "GPU status:"
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader
echo ""
# --- Install mistral.rs ---
echo "Installing mistral.rs..."
curl --proto '=https' --tlsv1.2 -sSf \
https://raw.githubusercontent.com/EricLBuehler/mistral.rs/master/install.sh | sh
# --- Use persistent storage for model cache ---
export HF_HOME="${HF_HOME:-/workspace/huggingface}"
mkdir -p "$HF_HOME"
# --- Run hardware tune first ---
echo "Running hardware benchmark..."
mistralrs tune
# --- Start server ---
echo ""
echo "Starting mistral.rs server on port $PORT..."
echo "API: http://0.0.0.0:$PORT/v1"
echo "UI: http://0.0.0.0:$PORT/ui"
echo ""
# Run in foreground (use screen/tmux to background)
mistralrs serve \
--ui \
--port "$PORT" \
-m "$MODEL"