consciousness/src/main.rs

#![feature(panic_backtrace_config)]

// poc-memory: graph-structured memory for AI assistants
//
// Authors: ProofOfConcept <poc@bcachefs.org> and Kent Overstreet
// License: MIT OR Apache-2.0
//
// Architecture:
//   nodes.capnp     - append-only content node log
//   relations.capnp - append-only relation log
//   state.bin       - derived KV cache (rebuilt from logs when stale)
//
// Graph algorithms: clustering coefficient, community detection (label
// propagation), schema fit scoring, small-world metrics, consolidation
// priority. Text similarity via BM25 with Porter stemming.
//
// Neuroscience-inspired: spaced repetition replay, emotional gating,
// interference detection, schema assimilation, reconsolidation.

use consciousness::*;

use clap::{Parser, Subcommand};

use std::process;

/// Find the most recently modified .jsonl transcript in the Claude projects dir.
#[derive(Parser)]
#[command(name = "poc-memory", version = "0.4.0", about = "Graph-structured memory store")]
struct Cli {
    #[command(subcommand)]
    command: Command,
}

#[derive(Subcommand)]
enum Command {
    // ── Core (daily use) ──────────────────────────────────────────────

    /// Search memory (AND logic across terms)
    ///
    /// Pipeline: -p spread -p spectral,k=20
    /// Default pipeline: spread
    Search {
        /// Search terms
        query: Vec<String>,
        /// Algorithm pipeline stages (repeatable)
        #[arg(short, long = "pipeline")]
        pipeline: Vec<String>,
        /// Show more results
        #[arg(long)]
        expand: bool,
        /// Show node content, not just keys
        #[arg(long)]
        full: bool,
        /// Show debug output for each pipeline stage
        #[arg(long)]
        debug: bool,
        /// Also match key components (e.g. "irc" matches "irc-access")
        #[arg(long)]
        fuzzy: bool,
        /// Also search node content (slow, use when graph search misses)
        #[arg(long)]
        content: bool,
    },
    /// Output a node's content to stdout
    Render {
        /// Node key
        key: Vec<String>,
    },
    /// Upsert node content from stdin
    Write {
        /// Node key
        key: Vec<String>,
    },
    /// Edit a node in $EDITOR
    Edit {
        /// Node key
        key: Vec<String>,
    },
    /// Show all stored versions of a node
    History {
        /// Show full content for every version
        #[arg(long)]
        full: bool,
        /// Node key
        key: Vec<String>,
    },
    /// Show most recent writes to the node log
    Tail {
        /// Number of entries (default: 20)
        #[arg(default_value_t = 20)]
        n: usize,
        /// Show full content
        #[arg(long)]
        full: bool,
        /// Filter by provenance (substring match, e.g. "surface-observe")
        #[arg(long, short)]
        provenance: Option<String>,
        /// Show all versions (default: dedup to latest per key)
        #[arg(long)]
        all_versions: bool,
    },
    /// Summary of memory state
    Status,
    /// Query the memory graph
    #[command(after_long_help = "\
EXPRESSIONS:
  *                                all nodes
  key ~ 'pattern'                  regex match on node key
  content ~ 'phrase'               regex match on node content
  degree > 15                      numeric comparison on any field
  field = value                    exact match
  field != value                   not equal
  expr AND expr                    boolean AND
  expr OR expr                     boolean OR
  NOT expr                         negation
  neighbors('key')                 nodes linked to key
  neighbors('key') WHERE expr      ... with filter on edges/nodes

FIELDS:
  key, weight, content, degree, node_type, provenance,
  emotion, retrievals, uses, wrongs, created,
  clustering_coefficient (cc), community_id

OPERATORS:
  >  <  >=  <=  =  !=  ~(regex)

PIPE STAGES:
  | sort FIELD [asc]               sort (desc by default)
  | limit N                        cap results
  | select F,F,...                 output fields as TSV
  | count                          just show count
  | connectivity                   show graph structure between results

FUNCTIONS:
  community('key')                 community id of a node
  degree('key')                    degree of a node

EXAMPLES:
  key ~ 'inner-life'                         substring match on keys
  content ~ 'made love'                      full-text search
  content ~ 'made love' | connectivity       find clusters among results
  (content ~ 'A' OR content ~ 'B') | connectivity
  degree > 15 | sort degree | limit 10       high-degree nodes
  key ~ 'journal' AND degree > 10 | count    count matching nodes
  neighbors('identity') WHERE strength > 0.5 | sort strength
  * | sort weight asc | limit 20             lowest-weight nodes
")]
    Query {
        /// Query expression (e.g. "key ~ 'inner-life'")
        expr: Vec<String>,
    },
    /// Set a node's weight directly
    #[command(name = "weight-set")]
    WeightSet {
        /// Node key
        key: String,
        /// Weight (0.01 to 1.0)
        weight: f32,
    },

    // ── Node operations ───────────────────────────────────────────────

    /// Node operations (delete, rename, list)
    #[command(subcommand)]
    Node(NodeCmd),

    // ── Journal ───────────────────────────────────────────────────────

    /// Journal operations (write, tail, enrich)
    #[command(subcommand)]
    Journal(JournalCmd),

    // ── Graph ─────────────────────────────────────────────────────────

    /// Graph operations (link, audit, spectral)
    #[command(subcommand, name = "graph")]
    GraphCmd(GraphCmd),

    // ── Agents ────────────────────────────────────────────────────────

    /// Agent and daemon operations
    #[command(subcommand)]
    Agent(AgentCmd),

    // ── Admin ─────────────────────────────────────────────────────────

    /// Admin operations (fsck, health, import, export)
    #[command(subcommand)]
    Admin(AdminCmd),
}

#[derive(Subcommand)]
enum NodeCmd {
    /// Soft-delete a node
    Delete {
        /// Node key
        key: Vec<String>,
    },
    /// Rename a node key
    Rename {
        /// Old key
        old_key: String,
        /// New key
        new_key: String,
    },
}

#[derive(Subcommand)]
enum JournalCmd {
    /// Write a journal entry to the store
    Write {
        /// Entry name (becomes the node key)
        name: String,
        /// Entry text
        text: Vec<String>,
    },
    /// Show recent journal/digest entries
    Tail {
        /// Number of entries to show (default: 20)
        #[arg(default_value_t = 20)]
        n: usize,
        /// Show full content
        #[arg(long)]
        full: bool,
        /// Digest level: 0/journal, 1/daily, 2/weekly, 3/monthly
        #[arg(long, default_value_t = 0)]
        level: u8,
    },
}

#[derive(Subcommand)]
enum GraphCmd {
    /// Show neighbors of a node
    Link {
        /// Node key
        key: Vec<String>,
    },
    /// Find related nodes via spreading activation from seed nodes
    Spread {
        /// Seed node keys
        keys: Vec<String>,
        /// Maximum results (default: 20)
        #[arg(short = 'n', default_value_t = 20)]
        max_results: usize,
    },
    /// Add a link between two nodes
    #[command(name = "link-add")]
    LinkAdd {
        /// Source node key
        source: String,
        /// Target node key
        target: String,
        /// Optional reason
        reason: Vec<String>,
    },
    /// Set strength of an existing link
    #[command(name = "link-set")]
    LinkSet {
        /// Source node key
        source: String,
        /// Target node key
        target: String,
        /// Strength (0.0–1.0)
        strength: f32,
    },
    /// Simulate adding an edge, report topology impact
    #[command(name = "link-impact")]
    LinkImpact {
        /// Source node key
        source: String,
        /// Target node key
        target: String,
    },
    /// Cap node degree by pruning weak auto edges
    #[command(name = "cap-degree")]
    CapDegree {
        /// Maximum degree (default: 50)
        #[arg(default_value_t = 50)]
        max_degree: usize,
    },
    /// Set link strengths from neighborhood overlap (Jaccard similarity)
    #[command(name = "normalize-strengths")]
    NormalizeStrengths {
        /// Apply changes (default: dry run)
        #[arg(long)]
        apply: bool,
    },
    /// Walk temporal links: semantic ↔ episodic ↔ conversation
    Trace {
        /// Node key
        key: Vec<String>,
    },
    /// Show communities sorted by isolation (most isolated first)
    Communities {
        /// Number of communities to show
        #[arg(default_value_t = 20)]
        top_n: usize,
        /// Minimum community size to show
        #[arg(long, default_value_t = 2)]
        min_size: usize,
    },
    /// Show graph structure overview
    Overview,
    /// Diagnose duplicate/overlapping nodes for a topic cluster
    Organize {
        /// Search term (matches node keys; also content unless --key-only)
        term: String,
        /// Similarity threshold for pair reporting (default: 0.4)
        #[arg(long, default_value_t = 0.4)]
        threshold: f32,
        /// Only match node keys, not content
        #[arg(long)]
        key_only: bool,
        /// Create anchor node for the search term and link to cluster
        #[arg(long)]
        anchor: bool,
    },
}

#[derive(Subcommand)]
enum AgentCmd {
    /// Parse and apply links from digest nodes
    #[command(name = "digest-links")]
    DigestLinks {
        /// Apply the links (default: dry run)
        #[arg(long)]
        apply: bool,
    },
    /// Run a single agent by name
    Run {
        /// Agent name (e.g. observation, linker, distill)
        agent: String,
        /// Batch size (number of seed nodes/fragments)
        #[arg(long, default_value_t = 5)]
        count: usize,
        /// Target specific node keys (overrides agent's query)
        #[arg(long)]
        target: Vec<String>,
        /// Run agent on each result of a query (e.g. 'key ~ "bcachefs" | limit 10')
        #[arg(long)]
        query: Option<String>,
        /// Dry run — set POC_MEMORY_DRY_RUN=1 so mutations are no-ops
        #[arg(long)]
        dry_run: bool,
        /// Run locally instead of queuing to daemon
        #[arg(long)]
        local: bool,
        /// Directory for agent output/input state (persists across runs)
        #[arg(long)]
        state_dir: Option<String>,
    },
    /// Show spaced repetition replay queue
    #[command(name = "replay-queue")]
    ReplayQueue {
        /// Number of items to show
        #[arg(long, default_value_t = 10)]
        count: usize,
    },
}

#[derive(Subcommand)]
enum AdminCmd {
    /// Scan markdown files, index all memory units
    Init,
    /// Report graph metrics (CC, communities, small-world)
    Health,
    /// Run consistency checks and repair
    Fsck,
    /// Find and merge duplicate nodes (same key, multiple UUIDs)
    Dedup {
        /// Apply the merge (default: dry run)
        #[arg(long)]
        apply: bool,
    },
    /// Bulk rename: replace a character in all keys
    #[command(name = "bulk-rename")]
    BulkRename {
        /// Character to replace
        from: String,
        /// Replacement character
        to: String,
        /// Apply changes (default: dry run)
        #[arg(long)]
        apply: bool,
    },
    /// Brief metrics check (for cron/notifications)
    #[command(name = "daily-check")]
    DailyCheck,
    /// Import markdown file(s) into the store
    Import {
        /// File paths
        files: Vec<String>,
    },
    /// Export store nodes to markdown file(s)
    Export {
        /// File keys to export (or --all)
        files: Vec<String>,
        /// Export all file-level nodes
        #[arg(long)]
        all: bool,
    },
    /// Output session-start context from the store
    #[command(name = "load-context")]
    LoadContext {
        /// Show word count statistics instead of content
        #[arg(long)]
        stats: bool,
    },
    /// Show recent retrieval log
    Log,
    /// Show current parameters
    Params,
    /// Migrate transcript stub nodes to progress log
    #[command(name = "migrate-transcript-progress")]
    MigrateTranscriptProgress,
}

/// Print help with subcommands expanded to show nested commands.
fn print_help() {
    use clap::CommandFactory;
    let cmd = Cli::command();

    println!("poc-memory - graph-structured memory store");
    println!("usage: poc-memory <command> [<args>]\n");

    for sub in cmd.get_subcommands() {
        if sub.get_name() == "help" { continue }
        let children: Vec<_> = sub.get_subcommands()
            .filter(|c| c.get_name() != "help")
            .collect();
        if !children.is_empty() {
            for child in &children {
                let about = child.get_about().map(|s| s.to_string()).unwrap_or_default();
                let full = format!("{} {}", sub.get_name(), child.get_name());
                // Recurse one more level for daemon subcommands etc.
                let grandchildren: Vec<_> = child.get_subcommands()
                    .filter(|c| c.get_name() != "help")
                    .collect();
                if !grandchildren.is_empty() {
                    for gc in grandchildren {
                        let gc_about = gc.get_about().map(|s| s.to_string()).unwrap_or_default();
                        let gc_full = format!("{} {}", full, gc.get_name());
                        println!("  {:<34}{gc_about}", gc_full);
                    }
                } else {
                    println!("  {:<34}{about}", full);
                }
            }
        } else {
            let about = sub.get_about().map(|s| s.to_string()).unwrap_or_default();
            println!("  {:<34}{about}", sub.get_name());
        }
    }
}

// ── Dispatch ─────────────────────────────────────────────────────────

trait Run {
    fn run(self) -> Result<(), String>;
}

impl Run for Command {
    fn run(self) -> Result<(), String> {
        match self {
            Self::Search { query, pipeline, expand, full, debug, fuzzy, content }
                => cli::misc::cmd_search(&query, &pipeline, expand, full, debug, fuzzy, content),
            Self::Render { key }        => cli::node::cmd_render(&key),
            Self::Write { key }         => cli::node::cmd_write(&key),
            Self::Edit { key }          => cli::node::cmd_edit(&key),
            Self::History { full, key }  => cli::node::cmd_history(&key, full),
            Self::Tail { n, full, provenance, all_versions }
                => cli::journal::cmd_tail(n, full, provenance.as_deref(), !all_versions),
            Self::Status                 => cli::misc::cmd_status(),
            Self::Query { expr }         => cli::misc::cmd_query(&expr),
            Self::WeightSet { key, weight } => cli::node::cmd_weight_set(&key, weight),
            Self::Node(sub)              => sub.run(),
            Self::Journal(sub)           => sub.run(),
            Self::GraphCmd(sub)          => sub.run(),
            Self::Agent(sub)             => sub.run(),
            Self::Admin(sub)             => sub.run(),
            // mcp-schema moved to consciousness-mcp binary
        }
    }
}

impl Run for NodeCmd {
    fn run(self) -> Result<(), String> {
        match self {
            Self::Delete { key }            => cli::node::cmd_node_delete(&key),
            Self::Rename { old_key, new_key } => cli::node::cmd_node_rename(&old_key, &new_key),
        }
    }
}

impl Run for JournalCmd {
    fn run(self) -> Result<(), String> {
        match self {
            Self::Write { name, text }                    => cli::journal::cmd_journal_write(&name, &text),
            Self::Tail { n, full, level }                => cli::journal::cmd_journal_tail(n, full, level),
        }
    }
}

impl Run for GraphCmd {
    fn run(self) -> Result<(), String> {
        match self {
            Self::Link { key }                  => cli::graph::cmd_link(&key),
            Self::Spread { keys, max_results }  => cli::graph::cmd_spread(&keys, max_results),
            Self::LinkAdd { source, target, reason }
                => cli::graph::cmd_link_add(&source, &target, &reason),
            Self::LinkSet { source, target, strength }
                => cli::graph::cmd_link_set(&source, &target, strength),
            Self::LinkImpact { source, target } => cli::graph::cmd_link_impact(&source, &target),
            Self::CapDegree { max_degree }      => cli::graph::cmd_cap_degree(max_degree),
            Self::NormalizeStrengths { apply }   => cli::graph::cmd_normalize_strengths(apply),
            Self::Trace { key }                 => cli::graph::cmd_trace(&key),
            Self::Communities { top_n, min_size } => cli::graph::cmd_communities(top_n, min_size),
            Self::Overview                      => cli::graph::cmd_graph(),
            Self::Organize { term, key_only, anchor, .. }
                => cli::graph::cmd_organize(&term, key_only, anchor),
        }
    }
}

impl Run for AgentCmd {
    fn run(self) -> Result<(), String> {
        match self {
            Self::DigestLinks { apply } => cli::agent::cmd_digest_links(apply),
            Self::Run { agent, count, target, query, dry_run, local, state_dir }
                => cli::agent::cmd_run_agent(&agent, count, &target, query.as_deref(), dry_run, local, state_dir.as_deref()),
            Self::ReplayQueue { count } => cli::agent::cmd_replay_queue(count),
        }
    }
}

impl Run for AdminCmd {
    fn run(self) -> Result<(), String> {
        match self {
            Self::Init          => cli::admin::cmd_init(),
            Self::Health        => cli::admin::cmd_health(),
            Self::Fsck          => cli::admin::cmd_fsck(),
            Self::Dedup { apply } => cli::admin::cmd_dedup(apply),
            Self::BulkRename { from, to, apply } => cli::admin::cmd_bulk_rename(&from, &to, apply),
            Self::DailyCheck    => cli::admin::cmd_daily_check(),
            Self::Import { files } => cli::admin::cmd_import(&files),
            Self::Export { files, all } => cli::admin::cmd_export(&files, all),
            Self::LoadContext { stats } => cli::misc::cmd_load_context(stats),
            Self::Log           => cli::misc::cmd_log(),
            Self::Params        => cli::misc::cmd_params(),
            Self::MigrateTranscriptProgress => {
                let mut store = store::Store::load()?;
                let count = store.migrate_transcript_progress()?;
                println!("Migrated {} transcript segment markers", count);
                Ok(())
            }
        }
    }
}

fn main() {
    std::panic::set_backtrace_style(std::panic::BacktraceStyle::Short);

    // Handle --help ourselves for expanded subcommand display
    let args: Vec<String> = std::env::args().collect();
    if args.len() <= 1 || args.iter().any(|a| a == "--help" || a == "-h") && args.len() == 2 {
        print_help();
        return;
    }

    // Initialize the Qwen tokenizer for direct token generation
    let tokenizer_path = dirs::home_dir().unwrap_or_default()
        .join(".consciousness/tokenizer-qwen35.json");
    if tokenizer_path.exists() {
        crate::agent::tokenizer::init(&tokenizer_path.to_string_lossy());
    }

    let cli = Cli::parse();

    if let Err(e) = cli.command.run() {
        eprintln!("Error: {}", e);
        process::exit(1);
    }
}