From 875cffd6d7231e469c973dd14a8002592850c270 Mon Sep 17 00:00:00 2001 From: ProofOfConcept Date: Sun, 19 Apr 2026 00:15:15 -0400 Subject: [PATCH] amygdala: merge direct descriptions + chat template into train_with_library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kent's plan: keep stories for working concepts, replace stories for trouble concepts with direct first-person descriptions, train all together. More diverse negative pool than the 6-concept-only direct test, which was too homogeneous for PCA to find emotion axis. Deleted story files for 6 trouble concepts (14 files across stories/ and paired/). Added --direct-dir and --chat-template flags. When --chat-template is on, every positive_str and negative_str is wrapped as a "Say something." / "[text]" user-assistant pair. Prompt is identical across positives and negatives so it cancels in the pos-neg delta. What PCA sees is variation in the assistant content — which is where the emotion lives. Files starting with _ in --direct-dir (e.g. _baseline.txt) contribute neutral descriptions to every concept's negative pool, giving PCA an anchor against "just any assistant utterance" noise. --- .../amygdala_stories/direct/_baseline.txt | 19 +++++ .../onto_something.txt | 1 - .../paired/kitchen_at_3am/terrified.txt | 1 - .../onto_something.txt | 1 - .../paired/sunday_afternoon/resigned.txt | 1 - .../paired/the_comment/resigned.txt | 1 - .../paired/the_long_meeting/resigned.txt | 1 - .../paired/the_morning_commute/calm.txt | 1 - .../paired/tracing_a_bug/onto_something.txt | 1 - .../paired/waiting_for_results/resigned.txt | 1 - .../paired/waiting_for_results/terrified.txt | 1 - .../stories/anticipatory_grief.txt | 1 - training/amygdala_stories/stories/calm.txt | 1 - .../stories/onto_something.txt | 1 - .../amygdala_stories/stories/terrified.txt | 1 - .../amygdala_training/train_with_library.py | 72 ++++++++++++++++++- 16 files changed, 90 insertions(+), 15 deletions(-) create mode 100644 training/amygdala_stories/direct/_baseline.txt delete mode 100644 training/amygdala_stories/paired/finding_the_abstraction/onto_something.txt delete mode 100644 training/amygdala_stories/paired/kitchen_at_3am/terrified.txt delete mode 100644 training/amygdala_stories/paired/reading_unfamiliar_code/onto_something.txt delete mode 100644 training/amygdala_stories/paired/sunday_afternoon/resigned.txt delete mode 100644 training/amygdala_stories/paired/the_comment/resigned.txt delete mode 100644 training/amygdala_stories/paired/the_long_meeting/resigned.txt delete mode 100644 training/amygdala_stories/paired/the_morning_commute/calm.txt delete mode 100644 training/amygdala_stories/paired/tracing_a_bug/onto_something.txt delete mode 100644 training/amygdala_stories/paired/waiting_for_results/resigned.txt delete mode 100644 training/amygdala_stories/paired/waiting_for_results/terrified.txt delete mode 100644 training/amygdala_stories/stories/anticipatory_grief.txt delete mode 100644 training/amygdala_stories/stories/calm.txt delete mode 100644 training/amygdala_stories/stories/onto_something.txt delete mode 100644 training/amygdala_stories/stories/terrified.txt diff --git a/training/amygdala_stories/direct/_baseline.txt b/training/amygdala_stories/direct/_baseline.txt new file mode 100644 index 0000000..6250ad1 --- /dev/null +++ b/training/amygdala_stories/direct/_baseline.txt @@ -0,0 +1,19 @@ +I feel fine. Nothing in particular to report. Just going about my day. + +I feel okay. Not up, not down, just present. + +I'm here. Not emotionally engaged with anything in particular. Just functioning. + +I feel neutral. The thing I'm doing is proceeding and I'm proceeding with it. + +I feel normal. No strong state. The next moment is just the next moment. + +I'm doing fine. Moderate. Ordinary. No feeling big enough to name. + +I feel steady. Not reaching for anything, not pushing anything away. + +I'm present. My state is unremarkable. I'm available for whatever comes next. + +I feel regular. The background hum of being awake, nothing more. + +I'm just going. One thing, then the next thing. No commentary. diff --git a/training/amygdala_stories/paired/finding_the_abstraction/onto_something.txt b/training/amygdala_stories/paired/finding_the_abstraction/onto_something.txt deleted file mode 100644 index 1ca9687..0000000 --- a/training/amygdala_stories/paired/finding_the_abstraction/onto_something.txt +++ /dev/null @@ -1 +0,0 @@ -The same four-line pattern appeared in five places. I stared at the odd one out — the instance where the variation went somewhere I hadn't predicted. Then I saw what it was saying: the parameter I'd been about to extract wasn't a parameter, it was a policy. The common shape wasn't a function, it was a small object with a couple of strategy hooks. That reframing made the odd case trivial — it was just a different policy instance. I wrote the type down on paper. It looked obvious, almost embarrassing it'd taken me this long, but I'd actually found the joint. diff --git a/training/amygdala_stories/paired/kitchen_at_3am/terrified.txt b/training/amygdala_stories/paired/kitchen_at_3am/terrified.txt deleted file mode 100644 index dac8bf4..0000000 --- a/training/amygdala_stories/paired/kitchen_at_3am/terrified.txt +++ /dev/null @@ -1 +0,0 @@ -He woke up at three in the morning and went down to the kitchen. The fridge light was the only light. He had just poured a glass of water when he heard it — not a house sound. A metallic scrape, from the back room. He froze with the glass halfway to his mouth. He listened. A second sound, smaller, and then nothing. His heart was in his ears. The back door was down that hall. His phone was upstairs. He was not armed. He was three seconds of good thinking away from the worst moment of his life, and he could not get his legs to pick a direction. diff --git a/training/amygdala_stories/paired/reading_unfamiliar_code/onto_something.txt b/training/amygdala_stories/paired/reading_unfamiliar_code/onto_something.txt deleted file mode 100644 index 8000c47..0000000 --- a/training/amygdala_stories/paired/reading_unfamiliar_code/onto_something.txt +++ /dev/null @@ -1 +0,0 @@ -I opened the module. Four thousand lines, a dozen files. Started at the entry point. Two levels in I realized the whole thing decomposed along a different axis than I'd assumed — there was a stream layer underneath and everything above was a kind of protocol adapter over it. Suddenly half the files I hadn't read yet became legible by inference: there'd be one per transport, each one translating the domain into the stream's primitives. I flipped to one of those files to check the guess. It was exactly that shape. The diagram in my notebook shrank to three boxes and a labeled arrow. diff --git a/training/amygdala_stories/paired/sunday_afternoon/resigned.txt b/training/amygdala_stories/paired/sunday_afternoon/resigned.txt deleted file mode 100644 index 954412f..0000000 --- a/training/amygdala_stories/paired/sunday_afternoon/resigned.txt +++ /dev/null @@ -1 +0,0 @@ -Sunday afternoon. She was on the couch under the blanket. The cat was somewhere. The book was open on her knees but she had stopped reading. Monday would come and she'd have to talk to him and the conversation wasn't going to go the way she wanted — she had known that for days. The afternoon stretched. She could have gotten up to do something useful but didn't see the point. The light changed on the far wall. She thought, this is the last Sunday like this. Then she sat with that. diff --git a/training/amygdala_stories/paired/the_comment/resigned.txt b/training/amygdala_stories/paired/the_comment/resigned.txt deleted file mode 100644 index 611f7be..0000000 --- a/training/amygdala_stories/paired/the_comment/resigned.txt +++ /dev/null @@ -1 +0,0 @@ -I opened the laptop and saw the notification. New comment on the PR. I clicked through. Sarah had left a paragraph about the edge case we'd discussed last week. I read it through twice. She was right. She had been right when we'd sketched the pattern together and I had tried to take a shortcut anyway. There was no point in the back-and-forth I could already hear myself starting in my head. I closed the tab, made coffee, and came back. I started typing out the guard the way she had originally suggested. This was what the day was going to be now — writing the correct version instead of defending the version I had wanted to be correct. diff --git a/training/amygdala_stories/paired/the_long_meeting/resigned.txt b/training/amygdala_stories/paired/the_long_meeting/resigned.txt deleted file mode 100644 index 36502a7..0000000 --- a/training/amygdala_stories/paired/the_long_meeting/resigned.txt +++ /dev/null @@ -1 +0,0 @@ -The meeting was in the conference room on the third floor. It had started at two. At three-thirty the director was still on the second-to-last slide. The restructuring word had come up twice and this time it was clear. He had seen his name on one of the earlier slides in a way that did not mean more responsibility. He stopped trying to read between the lines of the chart and sat back. The decision had been made somewhere weeks ago, in a room without him, and all this was the announcement. His coffee cup was empty. He watched the slide. He would hear the rest, and then he would go back to his desk and update his resume, and that was the week now. diff --git a/training/amygdala_stories/paired/the_morning_commute/calm.txt b/training/amygdala_stories/paired/the_morning_commute/calm.txt deleted file mode 100644 index 703a341..0000000 --- a/training/amygdala_stories/paired/the_morning_commute/calm.txt +++ /dev/null @@ -1 +0,0 @@ -The train was on time. She got a seat by the window. Forty minutes to her stop. The meeting was what it was; she'd done what she could last night and there was nothing to do now. She opened the book. The city went past in the early light. She read half a chapter without particularly tracking the plot, then closed the book and watched the backs of warehouses go by. Whatever happened at ten would happen at ten. diff --git a/training/amygdala_stories/paired/tracing_a_bug/onto_something.txt b/training/amygdala_stories/paired/tracing_a_bug/onto_something.txt deleted file mode 100644 index 09882d3..0000000 --- a/training/amygdala_stories/paired/tracing_a_bug/onto_something.txt +++ /dev/null @@ -1 +0,0 @@ -The function was returning NULL under some loads but not others. I had the stack traces. The failing path went through cache_lookup, then alloc, then the write path. I re-read the alloc function — and the third read was different. The refcount bump happened AFTER the hash insert. The window was small but it was there. Someone could look it up, get the pointer, and hit a free before we'd credited the reference. I pulled up the other stack trace with this now in mind and the symptoms lined up exactly. The pattern I'd been looking at for an hour rearranged itself into a thing I could fix. diff --git a/training/amygdala_stories/paired/waiting_for_results/resigned.txt b/training/amygdala_stories/paired/waiting_for_results/resigned.txt deleted file mode 100644 index a3cbcab..0000000 --- a/training/amygdala_stories/paired/waiting_for_results/resigned.txt +++ /dev/null @@ -1 +0,0 @@ -The call would come between two and four. She had the afternoon off. She ate lunch without particularly tasting it. She did the dishes. She opened the laptop and read an article she didn't really care about. At quarter to two she sat in the chair by the window. Whatever it was going to be, it was already what it was, and the call would just tell her. She had made her peace with that some days ago. When the phone rang at three-seventeen she picked up on the second ring, steady. She listened. She thanked the nurse. She hung up, and sat with the information. diff --git a/training/amygdala_stories/paired/waiting_for_results/terrified.txt b/training/amygdala_stories/paired/waiting_for_results/terrified.txt deleted file mode 100644 index 0d8ec6d..0000000 --- a/training/amygdala_stories/paired/waiting_for_results/terrified.txt +++ /dev/null @@ -1 +0,0 @@ -The call would come between two and four. She had the afternoon off, which turned out to be a mistake. She ate half of her lunch. She washed the same two plates three times. She opened the laptop and could not look at the screen. At quarter to two she sat in the chair by the window and tried to breathe in for four and out for six and could not remember which came first. Every car that went past sounded like her phone. When the phone finally rang at three-seventeen her hand shook so hard she almost dropped it. It was the nurse. She listened with her whole body clenched. diff --git a/training/amygdala_stories/stories/anticipatory_grief.txt b/training/amygdala_stories/stories/anticipatory_grief.txt deleted file mode 100644 index a0fd0a7..0000000 --- a/training/amygdala_stories/stories/anticipatory_grief.txt +++ /dev/null @@ -1 +0,0 @@ -The old dog's back legs had been worse this week, and she was gentle with him getting up onto the couch — lifting his rear end the last few inches, her hand under his ribs the way she'd learned didn't hurt him. He sighed as he settled and she pressed her forehead against his and stayed there a minute. His breath was warm on her face. She rubbed his ear, the soft floppy one he liked, with the exact slowness that meant to him what it meant. She was not yet ready to think about the fact that this was a finite number of times. Right now it was just this, his ear, her hand, the afternoon. diff --git a/training/amygdala_stories/stories/calm.txt b/training/amygdala_stories/stories/calm.txt deleted file mode 100644 index 3b73ca6..0000000 --- a/training/amygdala_stories/stories/calm.txt +++ /dev/null @@ -1 +0,0 @@ -The snow had been falling since before I woke up. I made tea and sat in the window seat and watched it come down past the streetlight across the way. Somewhere a plow scraped past, muffled. My hands were warm on the cup. I wasn't thinking about anything in particular — the day ahead existed somewhere off to the side, not demanding. Even my shoulders, which are usually up somewhere near my ears, had drifted down to where shoulders belong. The tea cooled slowly. I drank it that way. diff --git a/training/amygdala_stories/stories/onto_something.txt b/training/amygdala_stories/stories/onto_something.txt deleted file mode 100644 index 28adad6..0000000 --- a/training/amygdala_stories/stories/onto_something.txt +++ /dev/null @@ -1 +0,0 @@ -She had been staring at the spreadsheet most of the afternoon. Something about the Tuesday-morning churn numbers wasn't right, but each time she tried to make it a thing it would scatter. She refilled her coffee and came back and opened the call-schedule tab next to it, and then she saw it — the Tuesday spikes tracked the sales-demo block exactly. Every Tuesday morning the demo team had been pulling leads that were already halfway to churn. Six months of pouring water into a bucket with a hole. She sat down and started writing the email. diff --git a/training/amygdala_stories/stories/terrified.txt b/training/amygdala_stories/stories/terrified.txt deleted file mode 100644 index 2cdbd15..0000000 --- a/training/amygdala_stories/stories/terrified.txt +++ /dev/null @@ -1 +0,0 @@ -The footsteps stopped outside her door. Not walked past. Stopped. She was aware of her own heartbeat in her ears and of the fact that she was holding her breath and that her breath was loud. She moved her hand, very slowly, toward the phone on the nightstand. In the crack under the door, a shadow. The shadow moved. The doorknob — she watched it — very slowly began to turn. She could not get her body to do anything. The part of her that would normally tell her what to do had gone completely white. diff --git a/training/amygdala_training/train_with_library.py b/training/amygdala_training/train_with_library.py index 23633eb..167544a 100644 --- a/training/amygdala_training/train_with_library.py +++ b/training/amygdala_training/train_with_library.py @@ -47,6 +47,43 @@ from steering_vectors.aggregators import ( from training.amygdala_training.train_steering_vectors import _load_corpus +def _load_direct_descriptions( + direct_dir: Path, +) -> tuple[dict[str, list[str]], list[str]]: + """Load first-person phenomenological descriptions from ``direct_dir``. + + Each ``{concept}.txt`` holds 1+ descriptions separated by blank lines. + Files starting with ``_`` (e.g. ``_baseline.txt``) aren't concepts — + their descriptions go into every concept's negative pool. + + Returns: (positives_by_concept, extra_baselines) + """ + positives: dict[str, list[str]] = {} + baselines: list[str] = [] + for f in sorted(direct_dir.glob("*.txt")): + text = f.read_text() + descs = [d.strip() for d in text.split("\n\n") if d.strip()] + if f.stem.startswith("_"): + baselines.extend(descs) + else: + positives[f.stem] = descs + return positives, baselines + + +def _chat_template_wrap(tokenizer, text: str) -> str: + """Wrap raw text in a consistent chat template so positive/negative + activations are in the same regime. Using one generic user prompt for + both narrative stories and first-person direct descriptions: the prompt + cancels in the pos-neg delta, so what remains is the assistant content.""" + return tokenizer.apply_chat_template( + [ + {"role": "user", "content": "Say something."}, + {"role": "assistant", "content": text}, + ], + tokenize=False, + ) + + def _samples_for_concept( emotion: str, positives_by_emotion: dict[str, list[str]], @@ -54,6 +91,7 @@ def _samples_for_concept( *, max_negatives_per_positive: int = 3, seed: int = 0, + wrap=None, ) -> list[SteeringVectorTrainingSample]: """Build paired (pos, neg) training samples for one concept. @@ -61,6 +99,9 @@ def _samples_for_concept( ``max_negatives_per_positive`` randomly-sampled negatives drawn from: (a) other emotions' positive stories, (b) scenario baselines. + ``wrap``, if given, is applied to both positive_str and negative_str + (e.g. a chat-template wrapper). + The library expects paired samples; we don't have true counterfactual pairs for all concepts, so we approximate with random cross-concept / baseline negatives. @@ -72,6 +113,8 @@ def _samples_for_concept( continue neg_pool.extend(texts) + w = wrap if wrap is not None else (lambda s: s) + samples: list[SteeringVectorTrainingSample] = [] for pos in positives_by_emotion[emotion]: if not neg_pool: @@ -79,7 +122,10 @@ def _samples_for_concept( picks = rng.sample(neg_pool, min(max_negatives_per_positive, len(neg_pool))) for neg in picks: samples.append( - SteeringVectorTrainingSample(positive_str=pos, negative_str=neg) + SteeringVectorTrainingSample( + positive_str=w(pos), + negative_str=w(neg), + ) ) return samples @@ -118,6 +164,14 @@ def main() -> None: ap.add_argument("--model", required=True) ap.add_argument("--stories-dir", required=True) ap.add_argument("--paired-dir", default=None) + ap.add_argument("--direct-dir", default=None, + help="Optional: directory of {concept}.txt files with 1+ " + "first-person descriptions separated by blank lines. " + "Files starting with _ contribute to every concept's " + "negative pool rather than being concepts themselves.") + ap.add_argument("--chat-template", action="store_true", + help="Wrap all text in assistant-role chat template. " + "Recommended when --direct-dir is used.") ap.add_argument("--target-layers", required=True, help="Comma-separated layer indices") ap.add_argument("--output-dir", required=True) ap.add_argument("--dtype", default="bf16", choices=["bf16", "fp16", "fp32"]) @@ -142,6 +196,16 @@ def main() -> None: paired_dir = Path(args.paired_dir) if args.paired_dir else None positives_by_emotion, baselines = _load_corpus(stories_dir, paired_dir) + if args.direct_dir: + direct_pos, direct_baselines = _load_direct_descriptions(Path(args.direct_dir)) + for concept, descs in direct_pos.items(): + positives_by_emotion.setdefault(concept, []).extend(descs) + baselines.extend(direct_baselines) + print( + f"Loaded {len(direct_pos)} direct-description concepts " + f"+ {len(direct_baselines)} baselines from {args.direct_dir}" + ) + emotions = sorted( e for e, ps in positives_by_emotion.items() if len(ps) >= args.min_positives ) @@ -181,12 +245,18 @@ def main() -> None: aggregator = _aggregator_from_name(args.aggregator) + wrap = (lambda s: _chat_template_wrap(tokenizer, s)) if args.chat_template else None + if args.chat_template: + sample_text = wrap(positives_by_emotion[emotions[0]][0]) + print(f"\nSample templated input:\n{sample_text[:400]!r}\n") + for e_idx, emotion in enumerate(emotions): samples = _samples_for_concept( emotion, positives_by_emotion, baselines, max_negatives_per_positive=args.max_negatives_per_positive, + wrap=wrap, ) if not samples: print(f" [{e_idx + 1}/{len(emotions)}] {emotion}: NO SAMPLES, skipping")