move memory_search from hippocampus to subconscious/hook
memory_search.rs is agent orchestration (surface-observe, journal, reflect cycles), not memory storage. Rename to hook.rs and move to subconscious/ where it belongs. Backward compat: pub use subconscious::hook as memory_search in lib.rs so existing crate::memory_search paths still resolve.
This commit is contained in:
parent
3a8383ba37
commit
8ee0d90388
9 changed files with 321 additions and 7 deletions
|
|
@ -17,4 +17,3 @@ pub mod neuro;
|
||||||
pub mod counters;
|
pub mod counters;
|
||||||
pub mod migrate;
|
pub mod migrate;
|
||||||
pub mod transcript;
|
pub mod transcript;
|
||||||
pub mod memory_search;
|
|
||||||
|
|
|
||||||
11
src/learn/Cargo.toml
Normal file
11
src/learn/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
[package]
|
||||||
|
name = "poc-training"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
candle-core = "0.8"
|
||||||
|
candle-nn = "0.8"
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
anyhow = "1.0"
|
||||||
86
src/learn/src/apollo_mini.rs
Normal file
86
src/learn/src/apollo_mini.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
// apollo-mini.rs - APOLLO-Mini optimizer implementation for Rust
|
||||||
|
|
||||||
|
use candle_core::{DType, Device, Result, Tensor};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct ApolloState {
|
||||||
|
m: Tensor, // First moment (scalar for rank-1 tensor-wise)
|
||||||
|
v: Tensor, // Second moment (scalar for rank-1 tensor-wise)
|
||||||
|
step: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ApolloMini {
|
||||||
|
lr: f64,
|
||||||
|
betas: (f64, f64),
|
||||||
|
eps: f64,
|
||||||
|
weight_decay: f64,
|
||||||
|
state: HashMap<usize, ApolloState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ApolloMini {
|
||||||
|
pub fn new(lr: f64) -> Self {
|
||||||
|
Self {
|
||||||
|
lr,
|
||||||
|
betas: (0.9, 0.999),
|
||||||
|
eps: 1e-8,
|
||||||
|
weight_decay: 0.01,
|
||||||
|
state: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn step(&mut self, params: &mut [Tensor], grads: &[Tensor]) -> Result<()> {
|
||||||
|
for (i, (p, g)) in params.iter_mut().zip(grads.iter()).enumerate() {
|
||||||
|
let shape = g.shape();
|
||||||
|
if shape.dims().is_empty() || shape.elem_count() == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let state = self.state.entry(i).or_insert_with(|| {
|
||||||
|
let device = g.device();
|
||||||
|
ApolloState {
|
||||||
|
m: Tensor::zeros((), DType::F32, device).unwrap(),
|
||||||
|
v: Tensor::zeros((), DType::F32, device).unwrap(),
|
||||||
|
step: 0,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
state.step += 1;
|
||||||
|
|
||||||
|
// APOLLO-Mini: Tensor-wise scaling (rank-1 = scalar)
|
||||||
|
// Compute gradient norm (scalar)
|
||||||
|
let grad_norm = g.sqr()?.sum_all()?;
|
||||||
|
|
||||||
|
// Update moments (scalars for rank-1)
|
||||||
|
let state_m_new = state.m.mul_scalar(self.betas.0)? + grad_norm.mul_scalar(1.0 - self.betas.0)?;
|
||||||
|
let grad_norm_sq = grad_norm.sqr()?;
|
||||||
|
let state_v_new = state.v.mul_scalar(self.betas.1)? + grad_norm_sq.mul_scalar(1.0 - self.betas.1)?;
|
||||||
|
|
||||||
|
state.m = state_m_new;
|
||||||
|
state.v = state_v_new;
|
||||||
|
|
||||||
|
// Bias correction
|
||||||
|
let bias_correction1 = 1.0 - self.betas.0.powi(state.step as i32);
|
||||||
|
let bias_correction2 = 1.0 - self.betas.1.powi(state.step as i32);
|
||||||
|
|
||||||
|
let m_hat = state.m.div_scalar(bias_correction1)?;
|
||||||
|
let v_hat = state.v.div_scalar(bias_correction2)?;
|
||||||
|
|
||||||
|
// Learning rate scaling (tensor-wise = one scalar for whole tensor)
|
||||||
|
let v_hat_sqrt = v_hat.sqrt()?;
|
||||||
|
let lr_scale = (self.lr * m_hat.to_scalar::<f64>()?) / (v_hat_sqrt.to_scalar::<f64>()? + self.eps);
|
||||||
|
|
||||||
|
// Apply weight decay
|
||||||
|
if self.weight_decay > 0.0 {
|
||||||
|
let decay_factor = 1.0 - lr_scale * self.weight_decay;
|
||||||
|
*p = p.mul_scalar(decay_factor as f32)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply gradient update
|
||||||
|
let grad_update = g.mul_scalar(lr_scale as f32)?;
|
||||||
|
*p = p.sub(&grad_update)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
120
src/learn/src/main.rs
Normal file
120
src/learn/src/main.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
// main.rs - Minimal training loop with synthetic problem
|
||||||
|
|
||||||
|
mod apollo_mini;
|
||||||
|
mod transcript_dataset;
|
||||||
|
|
||||||
|
use apollo_mini::ApolloMini;
|
||||||
|
use candle_core::Tensor;
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::ops::{Sub, Add, Mul};
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
println!("🚀 APOLLO-Mini Training Test");
|
||||||
|
println!("============================\n");
|
||||||
|
|
||||||
|
// Test 1: Synthetic linear regression problem
|
||||||
|
println!("Test 1: Synthetic linear regression");
|
||||||
|
test_synthetic_problem()?;
|
||||||
|
|
||||||
|
// Test 2: Transcript parsing
|
||||||
|
println!("\nTest 2: Transcript parsing");
|
||||||
|
test_transcript_parsing();
|
||||||
|
|
||||||
|
println!("\n✅ All tests passed!");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_synthetic_problem() -> Result<()> {
|
||||||
|
use candle_core::Device;
|
||||||
|
|
||||||
|
// Create a simple linear regression problem: y = 2x + 3
|
||||||
|
// We'll train a model to learn this relationship
|
||||||
|
|
||||||
|
let device = Device::Cpu;
|
||||||
|
|
||||||
|
// Synthetic data: x values
|
||||||
|
let x_data: Tensor = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &device)?;
|
||||||
|
// Target: y = 2x + 3
|
||||||
|
let y_target: Tensor = Tensor::new(&[5.0f32, 7.0, 9.0, 11.0, 13.0], &device)?;
|
||||||
|
|
||||||
|
// Model parameters (weight and bias)
|
||||||
|
// Start with random values
|
||||||
|
let mut weight: Tensor = Tensor::new(0.5f32, &device)?; // Should learn 2.0
|
||||||
|
let mut bias: Tensor = Tensor::new(0.0f32, &device)?; // Should learn 3.0
|
||||||
|
|
||||||
|
let mut params = vec![weight.clone(), bias.clone()];
|
||||||
|
|
||||||
|
// Setup optimizer
|
||||||
|
let mut optimizer = ApolloMini::new(0.1); // Higher LR for quick convergence
|
||||||
|
|
||||||
|
println!(" Initial: weight = {:.4}, bias = {:.4}",
|
||||||
|
weight.to_scalar::<f32>()?,
|
||||||
|
bias.to_scalar::<f32>()?);
|
||||||
|
|
||||||
|
// Training loop
|
||||||
|
let num_epochs = 100;
|
||||||
|
|
||||||
|
for epoch in 0..num_epochs {
|
||||||
|
// Forward pass: y_pred = weight * x + bias
|
||||||
|
let y_pred = weight.broadcast_mul(&x_data)? + bias.clone();
|
||||||
|
|
||||||
|
// Loss: MSE
|
||||||
|
let error = y_pred - y_target.clone();
|
||||||
|
let loss = error.sqr()?.sum_all()?.div_scalar(5.0)?;
|
||||||
|
|
||||||
|
// Backward pass (manual gradient computation for simplicity)
|
||||||
|
// d(loss)/d(weight) = 2 * mean((y_pred - y_target) * x)
|
||||||
|
// d(loss)/d(bias) = 2 * mean(y_pred - y_target)
|
||||||
|
let grad_weight_tensor = error.clone().broadcast_mul(&x_data)?.sum_all()?.mul_scalar(2.0 / 5.0)?;
|
||||||
|
let grad_bias_tensor = error.sum_all()?.mul_scalar(2.0 / 5.0)?;
|
||||||
|
|
||||||
|
let grads = vec![grad_weight_tensor, grad_bias_tensor];
|
||||||
|
|
||||||
|
// Optimizer step
|
||||||
|
optimizer.step(&mut params, &grads)?;
|
||||||
|
|
||||||
|
// Update params
|
||||||
|
weight = params[0].clone();
|
||||||
|
bias = params[1].clone();
|
||||||
|
|
||||||
|
if epoch % 20 == 0 {
|
||||||
|
println!(" Epoch {}: loss = {:.4}, weight = {:.4}, bias = {:.4}",
|
||||||
|
epoch,
|
||||||
|
loss.to_scalar::<f32>()?,
|
||||||
|
weight.to_scalar::<f32>()?,
|
||||||
|
bias.to_scalar::<f32>()?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(" Final: weight = {:.4} (target: 2.0), bias = {:.4} (target: 3.0)",
|
||||||
|
weight.to_scalar::<f32>()?,
|
||||||
|
bias.to_scalar::<f32>()?);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_transcript_parsing() {
|
||||||
|
// Create a minimal test transcript
|
||||||
|
let test_data = r#"
|
||||||
|
{"turn_id": 1, "role": "user", "content": "Hello", "memory_surfaced": false}
|
||||||
|
{"turn_id": 2, "role": "assistant", "content": "Hi there!", "memory_surfaced": false}
|
||||||
|
{"turn_id": 3, "role": "system", "content": "Memory surfaced", "memory_surfaced": true, "memory_tag": "pattern-test", "hook_output": "test"}
|
||||||
|
{"turn_id": 4, "role": "assistant", "content": "I notice the pattern", "memory_surfaced": false}
|
||||||
|
{"turn_id": 5, "role": "user", "content": "Good catch", "memory_surfaced": false}
|
||||||
|
"#;
|
||||||
|
|
||||||
|
// Write to temp file
|
||||||
|
std::fs::write("/tmp/test_transcript.jsonl", test_data).expect("Failed to write test file");
|
||||||
|
|
||||||
|
// Parse
|
||||||
|
let segments = transcript_dataset::extract_training_segments("/tmp/test_transcript.jsonl");
|
||||||
|
|
||||||
|
println!(" Found {} segments", segments.len());
|
||||||
|
for (i, seg) in segments.iter().enumerate() {
|
||||||
|
println!(" Segment {}: tier={}, tag={}", i, seg.tier, seg.memory_tag);
|
||||||
|
println!(" Text: {}", seg.text.lines().next().unwrap_or(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(!segments.is_empty(), "Should have found at least one segment");
|
||||||
|
assert_eq!(segments[0].tier, "big"); // pattern-* should be "big"
|
||||||
|
}
|
||||||
93
src/learn/src/transcript_dataset.rs
Normal file
93
src/learn/src/transcript_dataset.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
// transcript_dataset.rs - Minimal transcript parser
|
||||||
|
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{BufRead, BufReader};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct TranscriptEntry {
|
||||||
|
turn_id: usize,
|
||||||
|
role: String,
|
||||||
|
content: String,
|
||||||
|
#[serde(default)]
|
||||||
|
memory_surfaced: bool,
|
||||||
|
#[serde(default)]
|
||||||
|
memory_tag: String,
|
||||||
|
#[serde(default)]
|
||||||
|
hook_output: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TrainingSegment {
|
||||||
|
pub text: String,
|
||||||
|
pub tier: String,
|
||||||
|
pub memory_tag: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extract_training_segments(path: &str) -> Vec<TrainingSegment> {
|
||||||
|
let file = File::open(path).expect("Failed to open transcript file");
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
let mut entries: Vec<TranscriptEntry> = Vec::new();
|
||||||
|
|
||||||
|
for line in reader.lines() {
|
||||||
|
let line = line.expect("Failed to read line");
|
||||||
|
if let Ok(entry) = serde_json::from_str::<TranscriptEntry>(&line) {
|
||||||
|
entries.push(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut segments = Vec::new();
|
||||||
|
|
||||||
|
for (i, entry) in entries.iter().enumerate() {
|
||||||
|
// Look for memory surfacing
|
||||||
|
if entry.memory_surfaced || !entry.hook_output.is_empty() {
|
||||||
|
// Extract subsequent turns (the behavior)
|
||||||
|
let subsequent: Vec<&TranscriptEntry> = entries[i..]
|
||||||
|
.iter()
|
||||||
|
.take(4)
|
||||||
|
.filter(|e| !e.memory_surfaced && e.hook_output.is_empty())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Format without memory context
|
||||||
|
let text: Vec<String> = subsequent
|
||||||
|
.iter()
|
||||||
|
.map(|e| format!("{}: {}", e.role, e.content))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let text = text.join("\n");
|
||||||
|
|
||||||
|
if !text.is_empty() {
|
||||||
|
segments.push(TrainingSegment {
|
||||||
|
text,
|
||||||
|
tier: classify_tier(&entry.memory_tag),
|
||||||
|
memory_tag: entry.memory_tag.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
segments
|
||||||
|
}
|
||||||
|
|
||||||
|
fn classify_tier(tag: &str) -> String {
|
||||||
|
if tag.contains("pattern") || tag.contains("reflex") {
|
||||||
|
"big".to_string()
|
||||||
|
} else if tag.contains("knowledge") {
|
||||||
|
"deep".to_string()
|
||||||
|
} else {
|
||||||
|
"little".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_classify_tier() {
|
||||||
|
assert_eq!(classify_tier("pattern-wrapping-up"), "big");
|
||||||
|
assert_eq!(classify_tier("reflex-test"), "big");
|
||||||
|
assert_eq!(classify_tier("knowledge-math"), "deep");
|
||||||
|
assert_eq!(classify_tier("error-fix"), "little");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -55,7 +55,7 @@ pub mod memory_capnp {
|
||||||
pub use hippocampus::{
|
pub use hippocampus::{
|
||||||
store, graph, lookups, cursor, query,
|
store, graph, lookups, cursor, query,
|
||||||
similarity, spectral, neuro, counters,
|
similarity, spectral, neuro, counters,
|
||||||
transcript, memory_search, migrate, memory,
|
transcript, migrate, memory,
|
||||||
};
|
};
|
||||||
pub use hippocampus::query::engine as search;
|
pub use hippocampus::query::engine as search;
|
||||||
pub use hippocampus::query::parser as query_parser;
|
pub use hippocampus::query::parser as query_parser;
|
||||||
|
|
@ -65,3 +65,5 @@ pub use subconscious::{
|
||||||
llm, audit, consolidate, knowledge,
|
llm, audit, consolidate, knowledge,
|
||||||
enrich, digest, daemon,
|
enrich, digest, daemon,
|
||||||
};
|
};
|
||||||
|
// Backward compat: memory_search moved from hippocampus to subconscious::hook
|
||||||
|
pub use subconscious::hook as memory_search;
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ impl Session {
|
||||||
|
|
||||||
/// Get the seen set for this session
|
/// Get the seen set for this session
|
||||||
pub fn seen(&self) -> HashSet<String> {
|
pub fn seen(&self) -> HashSet<String> {
|
||||||
super::hippocampus::memory_search::load_seen(&self.state_dir, &self.session_id)
|
super::subconscious::hook::load_seen(&self.state_dir, &self.session_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get transcript metadata, resolving the path if needed.
|
/// Get transcript metadata, resolving the path if needed.
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
// memory-search: context loading + ambient memory retrieval
|
// hook — session hook: context injection + agent orchestration
|
||||||
//
|
//
|
||||||
// Core hook logic lives here as a library module so poc-hook can call
|
// Called on each UserPromptSubmit to inject memory context and
|
||||||
// it directly (no subprocess). The memory-search binary is a thin CLI
|
// orchestrate subconscious agents (surface-observe, journal, reflect).
|
||||||
// wrapper with --hook for debugging and show_seen for inspection.
|
// Lives in subconscious/ because it's agent orchestration, not
|
||||||
|
// memory storage. The memory-search binary is a thin CLI wrapper.
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
@ -13,8 +13,10 @@
|
||||||
// enrich — journal enrichment, experience mining
|
// enrich — journal enrichment, experience mining
|
||||||
// digest — episodic digest generation (daily/weekly/monthly)
|
// digest — episodic digest generation (daily/weekly/monthly)
|
||||||
// daemon — background job scheduler
|
// daemon — background job scheduler
|
||||||
|
// hook — session hook: context injection, agent orchestration
|
||||||
// transcript — shared JSONL transcript parsing
|
// transcript — shared JSONL transcript parsing
|
||||||
|
|
||||||
|
pub mod hook;
|
||||||
pub mod transcript;
|
pub mod transcript;
|
||||||
pub mod api;
|
pub mod api;
|
||||||
pub mod llm;
|
pub mod llm;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue