config file, install command, scrub personal references
Add ~/.config/poc-memory/config.toml for user_name, assistant_name, data_dir, projects_dir, and core_nodes. All agent prompts and transcript parsing now use configured names instead of hardcoded personal references. `poc-memory daemon install` writes the systemd user service and installs the memory-search hook into Claude's settings.json. Scrubbed hardcoded names from code and docs. Authors: ProofOfConcept <poc@bcachefs.org> and Kent Overstreet
This commit is contained in:
parent
ed641ec95f
commit
a8aaadb0ad
11 changed files with 256 additions and 41 deletions
|
|
@ -5,6 +5,7 @@
|
|||
//
|
||||
// Uses Haiku (not Sonnet) for cost efficiency on high-volume extraction.
|
||||
|
||||
use crate::config;
|
||||
use crate::llm;
|
||||
use crate::store::{self, Provenance};
|
||||
|
||||
|
|
@ -19,7 +20,13 @@ const OVERLAP_TOKENS: usize = 200;
|
|||
const WINDOW_CHARS: usize = WINDOW_TOKENS * CHARS_PER_TOKEN;
|
||||
const OVERLAP_CHARS: usize = OVERLAP_TOKENS * CHARS_PER_TOKEN;
|
||||
|
||||
const EXTRACTION_PROMPT: &str = r#"Extract atomic factual claims from this conversation excerpt.
|
||||
fn extraction_prompt() -> String {
|
||||
let cfg = config::get();
|
||||
format!(
|
||||
r#"Extract atomic factual claims from this conversation excerpt.
|
||||
|
||||
Speakers are labeled [{user}] and [{assistant}] in the transcript.
|
||||
Use their proper names in claims — not "the user" or "the assistant."
|
||||
|
||||
Each claim should be:
|
||||
- A single verifiable statement
|
||||
|
|
@ -29,7 +36,7 @@ Each claim should be:
|
|||
linux/kernel, memory/design, identity/personal)
|
||||
- Tagged with confidence: "stated" (explicitly said), "implied" (logically follows),
|
||||
or "speculative" (hypothesis, not confirmed)
|
||||
- Include which speaker said it (Kent, PoC/ProofOfConcept, or Unknown)
|
||||
- Include which speaker said it ("{user}", "{assistant}", or "Unknown")
|
||||
|
||||
Do NOT extract:
|
||||
- Opinions or subjective assessments
|
||||
|
|
@ -37,20 +44,21 @@ Do NOT extract:
|
|||
- Things that are obviously common knowledge
|
||||
- Restatements of the same fact (pick the clearest version)
|
||||
- System messages, tool outputs, or error logs (extract what was LEARNED from them)
|
||||
- Anything about the conversation itself ("Kent and PoC discussed...")
|
||||
- Anything about the conversation itself ("{user} and {assistant} discussed...")
|
||||
|
||||
Output as a JSON array. Each element:
|
||||
{
|
||||
{{
|
||||
"claim": "the exact factual statement",
|
||||
"domain": "category/subcategory",
|
||||
"confidence": "stated|implied|speculative",
|
||||
"speaker": "Kent|PoC|Unknown"
|
||||
}
|
||||
"speaker": "{user}|{assistant}|Unknown"
|
||||
}}
|
||||
|
||||
If the excerpt contains no extractable facts, output an empty array: []
|
||||
|
||||
--- CONVERSATION EXCERPT ---
|
||||
"#;
|
||||
"#, user = cfg.user_name, assistant = cfg.assistant_name)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Fact {
|
||||
|
|
@ -74,6 +82,7 @@ struct Message {
|
|||
|
||||
/// Extract user/assistant text messages from a JSONL transcript.
|
||||
fn extract_conversation(path: &Path) -> Vec<Message> {
|
||||
let cfg = config::get();
|
||||
let Ok(content) = fs::read_to_string(path) else { return Vec::new() };
|
||||
let mut messages = Vec::new();
|
||||
|
||||
|
|
@ -119,7 +128,11 @@ fn extract_conversation(path: &Path) -> Vec<Message> {
|
|||
continue;
|
||||
}
|
||||
|
||||
let role = if msg_type == "user" { "Kent" } else { "PoC" }.to_string();
|
||||
let role = if msg_type == "user" {
|
||||
cfg.user_name.clone()
|
||||
} else {
|
||||
cfg.assistant_name.clone()
|
||||
};
|
||||
messages.push(Message { role, text, timestamp });
|
||||
}
|
||||
|
||||
|
|
@ -229,11 +242,12 @@ pub fn mine_transcript(path: &Path, dry_run: bool) -> Result<Vec<Fact>, String>
|
|||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let prompt_prefix = extraction_prompt();
|
||||
let mut all_facts = Vec::new();
|
||||
for (i, (_offset, chunk)) in chunks.iter().enumerate() {
|
||||
eprint!(" Chunk {}/{} ({} chars)...", i + 1, chunks.len(), chunk.len());
|
||||
|
||||
let prompt = format!("{}{}", EXTRACTION_PROMPT, chunk);
|
||||
let prompt = format!("{}{}", prompt_prefix, chunk);
|
||||
let response = match llm::call_haiku(&prompt) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue