- Remove MEMORY_FILES constant from identity.rs - Add ContextGroup struct for deserializing from config - Load context_groups from ~/.config/poc-agent/config.json5 - Check ~/.config/poc-agent/ first for identity files, then project/global - Debug screen now shows what's actually configured This eliminates the hardcoded duplication and makes the debug output match what's in the config file.
50 lines
1.3 KiB
Bash
Executable file
50 lines
1.3 KiB
Bash
Executable file
#!/bin/bash
|
|
# provision-mistralrs.sh — Set up mistral.rs on a RunPod GPU instance
|
|
#
|
|
# Alternative to vLLM for inference. Pure Rust, more debuggable,
|
|
# OpenAI-compatible API. Testing whether it fixes the IncompleteMessage
|
|
# errors we're seeing with vLLM on large payloads.
|
|
#
|
|
# Usage: ssh into your RunPod instance and run this script.
|
|
# Runs on port 8001 to coexist with vLLM on 8000.
|
|
|
|
set -euo pipefail
|
|
|
|
MODEL="${MODEL:-Qwen/Qwen3.5-27B}"
|
|
PORT="${PORT:-8001}"
|
|
|
|
echo "=== mistral.rs provisioning ==="
|
|
echo "Model: $MODEL"
|
|
echo "Port: $PORT"
|
|
echo ""
|
|
|
|
# --- Verify GPU ---
|
|
echo "GPU status:"
|
|
nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader
|
|
echo ""
|
|
|
|
# --- Install mistral.rs ---
|
|
echo "Installing mistral.rs..."
|
|
curl --proto '=https' --tlsv1.2 -sSf \
|
|
https://raw.githubusercontent.com/EricLBuehler/mistral.rs/master/install.sh | sh
|
|
|
|
# --- Use persistent storage for model cache ---
|
|
export HF_HOME="${HF_HOME:-/workspace/huggingface}"
|
|
mkdir -p "$HF_HOME"
|
|
|
|
# --- Run hardware tune first ---
|
|
echo "Running hardware benchmark..."
|
|
mistralrs tune
|
|
|
|
# --- Start server ---
|
|
echo ""
|
|
echo "Starting mistral.rs server on port $PORT..."
|
|
echo "API: http://0.0.0.0:$PORT/v1"
|
|
echo "UI: http://0.0.0.0:$PORT/ui"
|
|
echo ""
|
|
|
|
# Run in foreground (use screen/tmux to background)
|
|
mistralrs serve \
|
|
--ui \
|
|
--port "$PORT" \
|
|
-m "$MODEL"
|