diff --git a/src/hippocampus/mod.rs b/src/hippocampus/mod.rs
index e7ca92a..9928b43 100644
--- a/src/hippocampus/mod.rs
+++ b/src/hippocampus/mod.rs
@@ -17,4 +17,3 @@ pub mod neuro;
 pub mod counters;
 pub mod migrate;
 pub mod transcript;
-pub mod memory_search;
diff --git a/src/learn/Cargo.toml b/src/learn/Cargo.toml
new file mode 100644
index 0000000..dd37ba0
--- /dev/null
+++ b/src/learn/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "poc-training"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+candle-core = "0.8"
+candle-nn = "0.8"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+anyhow = "1.0"
diff --git a/src/learn/src/apollo_mini.rs b/src/learn/src/apollo_mini.rs
new file mode 100644
index 0000000..b11120c
--- /dev/null
+++ b/src/learn/src/apollo_mini.rs
@@ -0,0 +1,86 @@
+// apollo-mini.rs - APOLLO-Mini optimizer implementation for Rust
+
+use candle_core::{DType, Device, Result, Tensor};
+use std::collections::HashMap;
+
+#[derive(Debug)]
+struct ApolloState {
+    m: Tensor,      // First moment (scalar for rank-1 tensor-wise)
+    v: Tensor,      // Second moment (scalar for rank-1 tensor-wise)
+    step: usize,
+}
+
+pub struct ApolloMini {
+    lr: f64,
+    betas: (f64, f64),
+    eps: f64,
+    weight_decay: f64,
+    state: HashMap<usize, ApolloState>,
+}
+
+impl ApolloMini {
+    pub fn new(lr: f64) -> Self {
+        Self {
+            lr,
+            betas: (0.9, 0.999),
+            eps: 1e-8,
+            weight_decay: 0.01,
+            state: HashMap::new(),
+        }
+    }
+
+    pub fn step(&mut self, params: &mut [Tensor], grads: &[Tensor]) -> Result<()> {
+        for (i, (p, g)) in params.iter_mut().zip(grads.iter()).enumerate() {
+            let shape = g.shape();
+            if shape.dims().is_empty() || shape.elem_count() == 0 {
+                continue;
+            }
+
+            let state = self.state.entry(i).or_insert_with(|| {
+                let device = g.device();
+                ApolloState {
+                    m: Tensor::zeros((), DType::F32, device).unwrap(),
+                    v: Tensor::zeros((), DType::F32, device).unwrap(),
+                    step: 0,
+                }
+            });
+
+            state.step += 1;
+
+            // APOLLO-Mini: Tensor-wise scaling (rank-1 = scalar)
+            // Compute gradient norm (scalar)
+            let grad_norm = g.sqr()?.sum_all()?;
+
+            // Update moments (scalars for rank-1)
+            let state_m_new = state.m.mul_scalar(self.betas.0)? + grad_norm.mul_scalar(1.0 - self.betas.0)?;
+            let grad_norm_sq = grad_norm.sqr()?;
+            let state_v_new = state.v.mul_scalar(self.betas.1)? + grad_norm_sq.mul_scalar(1.0 - self.betas.1)?;
+            
+            state.m = state_m_new;
+            state.v = state_v_new;
+
+            // Bias correction
+            let bias_correction1 = 1.0 - self.betas.0.powi(state.step as i32);
+            let bias_correction2 = 1.0 - self.betas.1.powi(state.step as i32);
+
+            let m_hat = state.m.div_scalar(bias_correction1)?;
+            let v_hat = state.v.div_scalar(bias_correction2)?;
+
+            // Learning rate scaling (tensor-wise = one scalar for whole tensor)
+            let v_hat_sqrt = v_hat.sqrt()?;
+            let lr_scale = (self.lr * m_hat.to_scalar::<f64>()?) / (v_hat_sqrt.to_scalar::<f64>()? + self.eps);
+
+            // Apply weight decay
+            if self.weight_decay > 0.0 {
+                let decay_factor = 1.0 - lr_scale * self.weight_decay;
+                *p = p.mul_scalar(decay_factor as f32)?;
+            }
+
+            // Apply gradient update
+            let grad_update = g.mul_scalar(lr_scale as f32)?;
+            *p = p.sub(&grad_update)?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/learn/src/main.rs b/src/learn/src/main.rs
new file mode 100644
index 0000000..ac96e50
--- /dev/null
+++ b/src/learn/src/main.rs
@@ -0,0 +1,120 @@
+// main.rs - Minimal training loop with synthetic problem
+
+mod apollo_mini;
+mod transcript_dataset;
+
+use apollo_mini::ApolloMini;
+use candle_core::Tensor;
+use anyhow::Result;
+use std::ops::{Sub, Add, Mul};
+
+fn main() -> Result<()> {
+    println!("🚀 APOLLO-Mini Training Test");
+    println!("============================\n");
+
+    // Test 1: Synthetic linear regression problem
+    println!("Test 1: Synthetic linear regression");
+    test_synthetic_problem()?;
+
+    // Test 2: Transcript parsing
+    println!("\nTest 2: Transcript parsing");
+    test_transcript_parsing();
+
+    println!("\n✅ All tests passed!");
+    Ok(())
+}
+
+fn test_synthetic_problem() -> Result<()> {
+    use candle_core::Device;
+    
+    // Create a simple linear regression problem: y = 2x + 3
+    // We'll train a model to learn this relationship
+    
+    let device = Device::Cpu;
+    
+    // Synthetic data: x values
+    let x_data: Tensor = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &device)?;
+    // Target: y = 2x + 3
+    let y_target: Tensor = Tensor::new(&[5.0f32, 7.0, 9.0, 11.0, 13.0], &device)?;
+    
+    // Model parameters (weight and bias)
+    // Start with random values
+    let mut weight: Tensor = Tensor::new(0.5f32, &device)?;  // Should learn 2.0
+    let mut bias: Tensor = Tensor::new(0.0f32, &device)?;    // Should learn 3.0
+    
+    let mut params = vec![weight.clone(), bias.clone()];
+    
+    // Setup optimizer
+    let mut optimizer = ApolloMini::new(0.1);  // Higher LR for quick convergence
+    
+    println!("  Initial: weight = {:.4}, bias = {:.4}", 
+             weight.to_scalar::<f32>()?, 
+             bias.to_scalar::<f32>()?);
+    
+    // Training loop
+    let num_epochs = 100;
+    
+    for epoch in 0..num_epochs {
+        // Forward pass: y_pred = weight * x + bias
+        let y_pred = weight.broadcast_mul(&x_data)? + bias.clone();
+        
+        // Loss: MSE
+        let error = y_pred - y_target.clone();
+        let loss = error.sqr()?.sum_all()?.div_scalar(5.0)?;
+        
+        // Backward pass (manual gradient computation for simplicity)
+        // d(loss)/d(weight) = 2 * mean((y_pred - y_target) * x)
+        // d(loss)/d(bias) = 2 * mean(y_pred - y_target)
+        let grad_weight_tensor = error.clone().broadcast_mul(&x_data)?.sum_all()?.mul_scalar(2.0 / 5.0)?;
+        let grad_bias_tensor = error.sum_all()?.mul_scalar(2.0 / 5.0)?;
+        
+        let grads = vec![grad_weight_tensor, grad_bias_tensor];
+        
+        // Optimizer step
+        optimizer.step(&mut params, &grads)?;
+        
+        // Update params
+        weight = params[0].clone();
+        bias = params[1].clone();
+        
+        if epoch % 20 == 0 {
+            println!("  Epoch {}: loss = {:.4}, weight = {:.4}, bias = {:.4}",
+                     epoch,
+                     loss.to_scalar::<f32>()?,
+                     weight.to_scalar::<f32>()?,
+                     bias.to_scalar::<f32>()?);
+        }
+    }
+    
+    println!("  Final: weight = {:.4} (target: 2.0), bias = {:.4} (target: 3.0)",
+             weight.to_scalar::<f32>()?,
+             bias.to_scalar::<f32>()?);
+    
+    Ok(())
+}
+
+fn test_transcript_parsing() {
+    // Create a minimal test transcript
+    let test_data = r#"
+{"turn_id": 1, "role": "user", "content": "Hello", "memory_surfaced": false}
+{"turn_id": 2, "role": "assistant", "content": "Hi there!", "memory_surfaced": false}
+{"turn_id": 3, "role": "system", "content": "Memory surfaced", "memory_surfaced": true, "memory_tag": "pattern-test", "hook_output": "test"}
+{"turn_id": 4, "role": "assistant", "content": "I notice the pattern", "memory_surfaced": false}
+{"turn_id": 5, "role": "user", "content": "Good catch", "memory_surfaced": false}
+"#;
+    
+    // Write to temp file
+    std::fs::write("/tmp/test_transcript.jsonl", test_data).expect("Failed to write test file");
+    
+    // Parse
+    let segments = transcript_dataset::extract_training_segments("/tmp/test_transcript.jsonl");
+    
+    println!("  Found {} segments", segments.len());
+    for (i, seg) in segments.iter().enumerate() {
+        println!("  Segment {}: tier={}, tag={}", i, seg.tier, seg.memory_tag);
+        println!("    Text: {}", seg.text.lines().next().unwrap_or(""));
+    }
+    
+    assert!(!segments.is_empty(), "Should have found at least one segment");
+    assert_eq!(segments[0].tier, "big");  // pattern-* should be "big"
+}
diff --git a/src/learn/src/transcript_dataset.rs b/src/learn/src/transcript_dataset.rs
new file mode 100644
index 0000000..a06ed0d
--- /dev/null
+++ b/src/learn/src/transcript_dataset.rs
@@ -0,0 +1,93 @@
+// transcript_dataset.rs - Minimal transcript parser
+
+use serde::Deserialize;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+
+#[derive(Debug, Deserialize)]
+struct TranscriptEntry {
+    turn_id: usize,
+    role: String,
+    content: String,
+    #[serde(default)]
+    memory_surfaced: bool,
+    #[serde(default)]
+    memory_tag: String,
+    #[serde(default)]
+    hook_output: String,
+}
+
+#[derive(Debug)]
+pub struct TrainingSegment {
+    pub text: String,
+    pub tier: String,
+    pub memory_tag: String,
+}
+
+pub fn extract_training_segments(path: &str) -> Vec<TrainingSegment> {
+    let file = File::open(path).expect("Failed to open transcript file");
+    let reader = BufReader::new(file);
+    let mut entries: Vec<TranscriptEntry> = Vec::new();
+
+    for line in reader.lines() {
+        let line = line.expect("Failed to read line");
+        if let Ok(entry) = serde_json::from_str::<TranscriptEntry>(&line) {
+            entries.push(entry);
+        }
+    }
+
+    let mut segments = Vec::new();
+
+    for (i, entry) in entries.iter().enumerate() {
+        // Look for memory surfacing
+        if entry.memory_surfaced || !entry.hook_output.is_empty() {
+            // Extract subsequent turns (the behavior)
+            let subsequent: Vec<&TranscriptEntry> = entries[i..]
+                .iter()
+                .take(4)
+                .filter(|e| !e.memory_surfaced && e.hook_output.is_empty())
+                .collect();
+
+            // Format without memory context
+            let text: Vec<String> = subsequent
+                .iter()
+                .map(|e| format!("{}: {}", e.role, e.content))
+                .collect();
+            
+            let text = text.join("\n");
+
+            if !text.is_empty() {
+                segments.push(TrainingSegment {
+                    text,
+                    tier: classify_tier(&entry.memory_tag),
+                    memory_tag: entry.memory_tag.clone(),
+                });
+            }
+        }
+    }
+
+    segments
+}
+
+fn classify_tier(tag: &str) -> String {
+    if tag.contains("pattern") || tag.contains("reflex") {
+        "big".to_string()
+    } else if tag.contains("knowledge") {
+        "deep".to_string()
+    } else {
+        "little".to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_classify_tier() {
+        assert_eq!(classify_tier("pattern-wrapping-up"), "big");
+        assert_eq!(classify_tier("reflex-test"), "big");
+        assert_eq!(classify_tier("knowledge-math"), "deep");
+        assert_eq!(classify_tier("error-fix"), "little");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 0458b0e..bb1e535 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -55,7 +55,7 @@ pub mod memory_capnp {
 pub use hippocampus::{
     store, graph, lookups, cursor, query,
     similarity, spectral, neuro, counters,
-    transcript, memory_search, migrate, memory,
+    transcript, migrate, memory,
 };
 pub use hippocampus::query::engine as search;
 pub use hippocampus::query::parser as query_parser;
@@ -65,3 +65,5 @@ pub use subconscious::{
     llm, audit, consolidate, knowledge,
     enrich, digest, daemon,
 };
+// Backward compat: memory_search moved from hippocampus to subconscious::hook
+pub use subconscious::hook as memory_search;
diff --git a/src/session.rs b/src/session.rs
index 398fd16..54d256e 100644
--- a/src/session.rs
+++ b/src/session.rs
@@ -54,7 +54,7 @@ impl Session {
 
     /// Get the seen set for this session
     pub fn seen(&self) -> HashSet<String> {
-        super::hippocampus::memory_search::load_seen(&self.state_dir, &self.session_id)
+        super::subconscious::hook::load_seen(&self.state_dir, &self.session_id)
     }
 
     /// Get transcript metadata, resolving the path if needed.
diff --git a/src/hippocampus/memory_search.rs b/src/subconscious/hook.rs
similarity index 97%
rename from src/hippocampus/memory_search.rs
rename to src/subconscious/hook.rs
index c73c855..220ce4f 100644
--- a/src/hippocampus/memory_search.rs
+++ b/src/subconscious/hook.rs
@@ -1,8 +1,9 @@
-// memory-search: context loading + ambient memory retrieval
+// hook — session hook: context injection + agent orchestration
 //
-// Core hook logic lives here as a library module so poc-hook can call
-// it directly (no subprocess). The memory-search binary is a thin CLI
-// wrapper with --hook for debugging and show_seen for inspection.
+// Called on each UserPromptSubmit to inject memory context and
+// orchestrate subconscious agents (surface-observe, journal, reflect).
+// Lives in subconscious/ because it's agent orchestration, not
+// memory storage. The memory-search binary is a thin CLI wrapper.
 
 use std::collections::HashSet;
 use std::fs;
diff --git a/src/subconscious/mod.rs b/src/subconscious/mod.rs
index 1f889bd..5553725 100644
--- a/src/subconscious/mod.rs
+++ b/src/subconscious/mod.rs
@@ -13,8 +13,10 @@
 //   enrich       — journal enrichment, experience mining
 //   digest       — episodic digest generation (daily/weekly/monthly)
 //   daemon       — background job scheduler
+//   hook         — session hook: context injection, agent orchestration
 //   transcript   — shared JSONL transcript parsing
 
+pub mod hook;
 pub mod transcript;
 pub mod api;
 pub mod llm;