summaryrefslogtreecommitdiff
path: root/c_src
diff options
context:
space:
mode:
authorThomas Bertschinger <tahbertschinger@gmail.com>2024-01-15 23:41:02 -0700
committerKent Overstreet <kent.overstreet@linux.dev>2024-01-16 01:47:05 -0500
commitf5baaf48e3e82b1caf9f5cd1207d4d6feba3a2e5 (patch)
tree59f7b0e4667df7a9d3d5a45725f2aaab3e79b4c5 /c_src
parentfb35dbfdc5a9446fbb856dae5542b23963e28b89 (diff)
move Rust sources to top level, C sources into c_src
This moves the Rust sources out of rust_src/ and into the top level. Running the bcachefs executable out of the development tree is now: $ ./target/release/bcachefs command or $ cargo run --profile release -- command instead of "./bcachefs command". Building and installing is still: $ make && make install Signed-off-by: Thomas Bertschinger <tahbertschinger@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'c_src')
-rw-r--r--c_src/bcachefs.c179
-rw-r--r--c_src/ccan/array_size/LICENSE28
-rw-r--r--c_src/ccan/array_size/_info46
-rw-r--r--c_src/ccan/array_size/array_size.h26
-rw-r--r--c_src/ccan/build_assert/LICENSE28
-rw-r--r--c_src/ccan/build_assert/_info49
-rw-r--r--c_src/ccan/build_assert/build_assert.h40
-rw-r--r--c_src/ccan/compiler/LICENSE28
-rw-r--r--c_src/ccan/compiler/_info64
-rw-r--r--c_src/ccan/compiler/compiler.h231
-rw-r--r--c_src/cmd_assemble.c48
-rw-r--r--c_src/cmd_attr.c119
-rw-r--r--c_src/cmd_counters.c51
-rw-r--r--c_src/cmd_data.c127
-rw-r--r--c_src/cmd_device.c647
-rw-r--r--c_src/cmd_dump.c187
-rw-r--r--c_src/cmd_format.c393
-rw-r--r--c_src/cmd_fs.c348
-rw-r--r--c_src/cmd_fsck.c208
-rw-r--r--c_src/cmd_fusemount.c1303
-rw-r--r--c_src/cmd_key.c149
-rw-r--r--c_src/cmd_kill_btree_node.c116
-rw-r--r--c_src/cmd_list_journal.c276
-rw-r--r--c_src/cmd_migrate.c867
-rw-r--r--c_src/cmd_option.c107
-rw-r--r--c_src/cmd_run.c33
-rw-r--r--c_src/cmd_subvolume.c188
-rw-r--r--c_src/cmd_version.c9
-rw-r--r--c_src/cmds.h63
-rw-r--r--c_src/config.h0
-rw-r--r--c_src/crypto.c201
-rw-r--r--c_src/crypto.h22
-rw-r--r--c_src/include/asm/page.h0
-rw-r--r--c_src/include/asm/unaligned.h20
-rw-r--r--c_src/include/crypto/algapi.h7
-rw-r--r--c_src/include/crypto/chacha.h15
-rw-r--r--c_src/include/crypto/hash.h104
-rw-r--r--c_src/include/crypto/poly1305.h13
-rw-r--r--c_src/include/crypto/sha2.h115
-rw-r--r--c_src/include/crypto/skcipher.h126
-rw-r--r--c_src/include/keys/user-type.h6
-rw-r--r--c_src/include/linux/atomic.h349
-rw-r--r--c_src/include/linux/backing-dev-defs.h0
-rw-r--r--c_src/include/linux/backing-dev.h45
-rw-r--r--c_src/include/linux/bio.h434
-rw-r--r--c_src/include/linux/bit_spinlock.h84
-rw-r--r--c_src/include/linux/bitmap.h146
-rw-r--r--c_src/include/linux/bitops.h286
-rw-r--r--c_src/include/linux/blk_types.h247
-rw-r--r--c_src/include/linux/blkdev.h190
-rw-r--r--c_src/include/linux/bsearch.h32
-rw-r--r--c_src/include/linux/bug.h66
-rw-r--r--c_src/include/linux/bvec.h101
-rw-r--r--c_src/include/linux/byteorder.h75
-rw-r--r--c_src/include/linux/cache.h17
-rw-r--r--c_src/include/linux/closure.h415
-rw-r--r--c_src/include/linux/compiler.h190
-rw-r--r--c_src/include/linux/completion.h42
-rw-r--r--c_src/include/linux/console.h7
-rw-r--r--c_src/include/linux/cpumask.h26
-rw-r--r--c_src/include/linux/crc32c.h6
-rw-r--r--c_src/include/linux/crc64.h11
-rw-r--r--c_src/include/linux/crypto.h45
-rw-r--r--c_src/include/linux/ctype.h2
-rw-r--r--c_src/include/linux/dcache.h12
-rw-r--r--c_src/include/linux/debugfs.h46
-rw-r--r--c_src/include/linux/device.h40
-rw-r--r--c_src/include/linux/dynamic_fault.h7
-rw-r--r--c_src/include/linux/err.h68
-rw-r--r--c_src/include/linux/errname.h11
-rw-r--r--c_src/include/linux/export.h13
-rw-r--r--c_src/include/linux/freezer.h12
-rw-r--r--c_src/include/linux/generic-radix-tree.h298
-rw-r--r--c_src/include/linux/genhd.h0
-rw-r--r--c_src/include/linux/gfp.h1
-rw-r--r--c_src/include/linux/hash.h104
-rw-r--r--c_src/include/linux/idr.h208
-rw-r--r--c_src/include/linux/ioprio.h46
-rw-r--r--c_src/include/linux/jhash.h175
-rw-r--r--c_src/include/linux/jiffies.h91
-rw-r--r--c_src/include/linux/kernel.h267
-rw-r--r--c_src/include/linux/key.h42
-rw-r--r--c_src/include/linux/kmemleak.h121
-rw-r--r--c_src/include/linux/kobject.h129
-rw-r--r--c_src/include/linux/kthread.h118
-rw-r--r--c_src/include/linux/list.h112
-rw-r--r--c_src/include/linux/list_nulls.h145
-rw-r--r--c_src/include/linux/llist.h229
-rw-r--r--c_src/include/linux/lockdep.h60
-rw-r--r--c_src/include/linux/log2.h298
-rw-r--r--c_src/include/linux/lz4.h10
-rw-r--r--c_src/include/linux/math.h171
-rw-r--r--c_src/include/linux/math64.h85
-rw-r--r--c_src/include/linux/mempool.h112
-rw-r--r--c_src/include/linux/minmax.h272
-rw-r--r--c_src/include/linux/mm.h31
-rw-r--r--c_src/include/linux/module.h48
-rw-r--r--c_src/include/linux/mutex.h18
-rw-r--r--c_src/include/linux/osq_lock.h44
-rw-r--r--c_src/include/linux/overflow.h345
-rw-r--r--c_src/include/linux/page.h38
-rw-r--r--c_src/include/linux/percpu-refcount.h180
-rw-r--r--c_src/include/linux/percpu-rwsem.h58
-rw-r--r--c_src/include/linux/percpu.h191
-rw-r--r--c_src/include/linux/poison.h93
-rw-r--r--c_src/include/linux/posix_acl.h49
-rw-r--r--c_src/include/linux/posix_acl_xattr.h34
-rw-r--r--c_src/include/linux/prandom.h33
-rw-r--r--c_src/include/linux/preempt.h16
-rw-r--r--c_src/include/linux/prefetch.h10
-rw-r--r--c_src/include/linux/pretty-printers.h10
-rw-r--r--c_src/include/linux/printk.h207
-rw-r--r--c_src/include/linux/random.h70
-rw-r--r--c_src/include/linux/ratelimit.h109
-rw-r--r--c_src/include/linux/rculist.h16
-rw-r--r--c_src/include/linux/rcupdate.h48
-rw-r--r--c_src/include/linux/refcount.h352
-rw-r--r--c_src/include/linux/rhashtable-types.h135
-rw-r--r--c_src/include/linux/rhashtable.h1267
-rw-r--r--c_src/include/linux/rwsem.h29
-rw-r--r--c_src/include/linux/scatterlist.h109
-rw-r--r--c_src/include/linux/sched.h186
-rw-r--r--c_src/include/linux/sched/clock.h0
-rw-r--r--c_src/include/linux/sched/cputime.h6
-rw-r--r--c_src/include/linux/sched/debug.h0
-rw-r--r--c_src/include/linux/sched/mm.h31
-rw-r--r--c_src/include/linux/sched/rt.h9
-rw-r--r--c_src/include/linux/sched/signal.h11
-rw-r--r--c_src/include/linux/sched/task.h0
-rw-r--r--c_src/include/linux/sched/task_stack.h0
-rw-r--r--c_src/include/linux/semaphore.h43
-rw-r--r--c_src/include/linux/seq_buf.h153
-rw-r--r--c_src/include/linux/seq_file.h21
-rw-r--r--c_src/include/linux/seqlock.h47
-rw-r--r--c_src/include/linux/shrinker.h35
-rw-r--r--c_src/include/linux/siphash.h145
-rw-r--r--c_src/include/linux/slab.h272
-rw-r--r--c_src/include/linux/sort.h13
-rw-r--r--c_src/include/linux/spinlock.h65
-rw-r--r--c_src/include/linux/srcu.h31
-rw-r--r--c_src/include/linux/stat.h12
-rw-r--r--c_src/include/linux/string.h17
-rw-r--r--c_src/include/linux/string_helpers.h20
-rw-r--r--c_src/include/linux/sysfs.h38
-rw-r--r--c_src/include/linux/time64.h51
-rw-r--r--c_src/include/linux/timer.h46
-rw-r--r--c_src/include/linux/tracepoint.h62
-rw-r--r--c_src/include/linux/typecheck.h24
-rw-r--r--c_src/include/linux/types.h87
-rw-r--r--c_src/include/linux/unaligned/be_byteshift.h70
-rw-r--r--c_src/include/linux/unaligned/be_struct.h36
-rw-r--r--c_src/include/linux/unaligned/generic.h68
-rw-r--r--c_src/include/linux/unaligned/le_byteshift.h70
-rw-r--r--c_src/include/linux/unaligned/le_struct.h36
-rw-r--r--c_src/include/linux/unaligned/packed_struct.h46
-rw-r--r--c_src/include/linux/uuid.h41
-rw-r--r--c_src/include/linux/vmalloc.h6
-rw-r--r--c_src/include/linux/wait.h138
-rw-r--r--c_src/include/linux/workqueue.h185
-rw-r--r--c_src/include/linux/xattr.h78
-rw-r--r--c_src/include/linux/xxhash.h259
-rw-r--r--c_src/include/linux/zlib.h18
-rw-r--r--c_src/include/linux/zstd.h447
-rw-r--r--c_src/include/linux/zstd_errors.h77
-rw-r--r--c_src/include/trace/define_trace.h0
-rw-r--r--c_src/include/trace/events/lock.h144
-rw-r--r--c_src/include/uapi/linux/xattr.h77
-rw-r--r--c_src/libbcachefs.c753
-rw-r--r--c_src/libbcachefs.h267
-rw-r--r--c_src/libbcachefs/acl.c464
-rw-r--r--c_src/libbcachefs/acl.h60
-rw-r--r--c_src/libbcachefs/alloc_background.c2217
-rw-r--r--c_src/libbcachefs/alloc_background.h274
-rw-r--r--c_src/libbcachefs/alloc_foreground.c1624
-rw-r--r--c_src/libbcachefs/alloc_foreground.h224
-rw-r--r--c_src/libbcachefs/alloc_types.h126
-rw-r--r--c_src/libbcachefs/backpointers.c873
-rw-r--r--c_src/libbcachefs/backpointers.h133
-rw-r--r--c_src/libbcachefs/bbpos.h37
-rw-r--r--c_src/libbcachefs/bbpos_types.h18
-rw-r--r--c_src/libbcachefs/bcachefs.h1260
-rw-r--r--c_src/libbcachefs/bcachefs_format.h2451
-rw-r--r--c_src/libbcachefs/bcachefs_ioctl.h412
-rw-r--r--c_src/libbcachefs/bkey.c1120
-rw-r--r--c_src/libbcachefs/bkey.h778
-rw-r--r--c_src/libbcachefs/bkey_buf.h61
-rw-r--r--c_src/libbcachefs/bkey_cmp.h129
-rw-r--r--c_src/libbcachefs/bkey_methods.c459
-rw-r--r--c_src/libbcachefs/bkey_methods.h179
-rw-r--r--c_src/libbcachefs/bkey_sort.c201
-rw-r--r--c_src/libbcachefs/bkey_sort.h54
-rw-r--r--c_src/libbcachefs/bset.c1598
-rw-r--r--c_src/libbcachefs/bset.h541
-rw-r--r--c_src/libbcachefs/btree_cache.c1211
-rw-r--r--c_src/libbcachefs/btree_cache.h131
-rw-r--r--c_src/libbcachefs/btree_gc.c2071
-rw-r--r--c_src/libbcachefs/btree_gc.h114
-rw-r--r--c_src/libbcachefs/btree_io.c2349
-rw-r--r--c_src/libbcachefs/btree_io.h225
-rw-r--r--c_src/libbcachefs/btree_iter.c3268
-rw-r--r--c_src/libbcachefs/btree_iter.h879
-rw-r--r--c_src/libbcachefs/btree_journal_iter.c556
-rw-r--r--c_src/libbcachefs/btree_journal_iter.h65
-rw-r--r--c_src/libbcachefs/btree_key_cache.c1067
-rw-r--r--c_src/libbcachefs/btree_key_cache.h46
-rw-r--r--c_src/libbcachefs/btree_key_cache_types.h34
-rw-r--r--c_src/libbcachefs/btree_locking.c868
-rw-r--r--c_src/libbcachefs/btree_locking.h431
-rw-r--r--c_src/libbcachefs/btree_trans_commit.c1132
-rw-r--r--c_src/libbcachefs/btree_types.h741
-rw-r--r--c_src/libbcachefs/btree_update.c873
-rw-r--r--c_src/libbcachefs/btree_update.h361
-rw-r--r--c_src/libbcachefs/btree_update_interior.c2496
-rw-r--r--c_src/libbcachefs/btree_update_interior.h333
-rw-r--r--c_src/libbcachefs/btree_write_buffer.c647
-rw-r--r--c_src/libbcachefs/btree_write_buffer.h61
-rw-r--r--c_src/libbcachefs/btree_write_buffer_types.h57
-rw-r--r--c_src/libbcachefs/buckets.c1413
-rw-r--r--c_src/libbcachefs/buckets.h461
-rw-r--r--c_src/libbcachefs/buckets_types.h90
-rw-r--r--c_src/libbcachefs/buckets_waiting_for_journal.c166
-rw-r--r--c_src/libbcachefs/buckets_waiting_for_journal.h15
-rw-r--r--c_src/libbcachefs/buckets_waiting_for_journal_types.h23
-rw-r--r--c_src/libbcachefs/chardev.c999
-rw-r--r--c_src/libbcachefs/chardev.h31
-rw-r--r--c_src/libbcachefs/checksum.c804
-rw-r--r--c_src/libbcachefs/checksum.h236
-rw-r--r--c_src/libbcachefs/clock.c193
-rw-r--r--c_src/libbcachefs/clock.h38
-rw-r--r--c_src/libbcachefs/clock_types.h37
-rw-r--r--c_src/libbcachefs/compress.c728
-rw-r--r--c_src/libbcachefs/compress.h73
-rw-r--r--c_src/libbcachefs/counters.c107
-rw-r--r--c_src/libbcachefs/counters.h17
-rw-r--r--c_src/libbcachefs/darray.c24
-rw-r--r--c_src/libbcachefs/darray.h109
-rw-r--r--c_src/libbcachefs/data_update.c665
-rw-r--r--c_src/libbcachefs/data_update.h49
-rw-r--r--c_src/libbcachefs/debug.c935
-rw-r--r--c_src/libbcachefs/debug.h32
-rw-r--r--c_src/libbcachefs/dirent.c603
-rw-r--r--c_src/libbcachefs/dirent.h76
-rw-r--r--c_src/libbcachefs/disk_groups.c617
-rw-r--r--c_src/libbcachefs/disk_groups.h111
-rw-r--r--c_src/libbcachefs/disk_groups_types.h18
-rw-r--r--c_src/libbcachefs/ec.c2259
-rw-r--r--c_src/libbcachefs/ec.h261
-rw-r--r--c_src/libbcachefs/ec_types.h41
-rw-r--r--c_src/libbcachefs/errcode.c68
-rw-r--r--c_src/libbcachefs/errcode.h276
-rw-r--r--c_src/libbcachefs/error.c337
-rw-r--r--c_src/libbcachefs/error.h242
-rw-r--r--c_src/libbcachefs/extent_update.c173
-rw-r--r--c_src/libbcachefs/extent_update.h12
-rw-r--r--c_src/libbcachefs/extents.c1507
-rw-r--r--c_src/libbcachefs/extents.h757
-rw-r--r--c_src/libbcachefs/extents_types.h40
-rw-r--r--c_src/libbcachefs/eytzinger.h281
-rw-r--r--c_src/libbcachefs/fifo.h127
-rw-r--r--c_src/libbcachefs/fs-common.c495
-rw-r--r--c_src/libbcachefs/fs-common.h43
-rw-r--r--c_src/libbcachefs/fs-io-buffered.c1100
-rw-r--r--c_src/libbcachefs/fs-io-buffered.h27
-rw-r--r--c_src/libbcachefs/fs-io-direct.c674
-rw-r--r--c_src/libbcachefs/fs-io-direct.h16
-rw-r--r--c_src/libbcachefs/fs-io-pagecache.c791
-rw-r--r--c_src/libbcachefs/fs-io-pagecache.h176
-rw-r--r--c_src/libbcachefs/fs-io.c1078
-rw-r--r--c_src/libbcachefs/fs-io.h184
-rw-r--r--c_src/libbcachefs/fs-ioctl.c562
-rw-r--r--c_src/libbcachefs/fs-ioctl.h81
-rw-r--r--c_src/libbcachefs/fs.c1976
-rw-r--r--c_src/libbcachefs/fs.h204
-rw-r--r--c_src/libbcachefs/fsck.c2394
-rw-r--r--c_src/libbcachefs/fsck.h15
-rw-r--r--c_src/libbcachefs/inode.c1184
-rw-r--r--c_src/libbcachefs/inode.h212
-rw-r--r--c_src/libbcachefs/io_misc.c517
-rw-r--r--c_src/libbcachefs/io_misc.h34
-rw-r--r--c_src/libbcachefs/io_read.c1220
-rw-r--r--c_src/libbcachefs/io_read.h158
-rw-r--r--c_src/libbcachefs/io_write.c1662
-rw-r--r--c_src/libbcachefs/io_write.h109
-rw-r--r--c_src/libbcachefs/io_write_types.h96
-rw-r--r--c_src/libbcachefs/journal.c1473
-rw-r--r--c_src/libbcachefs/journal.h448
-rw-r--r--c_src/libbcachefs/journal_io.c2009
-rw-r--r--c_src/libbcachefs/journal_io.h65
-rw-r--r--c_src/libbcachefs/journal_reclaim.c905
-rw-r--r--c_src/libbcachefs/journal_reclaim.h81
-rw-r--r--c_src/libbcachefs/journal_sb.c219
-rw-r--r--c_src/libbcachefs/journal_sb.h24
-rw-r--r--c_src/libbcachefs/journal_seq_blacklist.c320
-rw-r--r--c_src/libbcachefs/journal_seq_blacklist.h22
-rw-r--r--c_src/libbcachefs/journal_types.h329
-rw-r--r--c_src/libbcachefs/keylist.c50
-rw-r--r--c_src/libbcachefs/keylist.h72
-rw-r--r--c_src/libbcachefs/keylist_types.h16
-rw-r--r--c_src/libbcachefs/logged_ops.c108
-rw-r--r--c_src/libbcachefs/logged_ops.h20
-rw-r--r--c_src/libbcachefs/lru.c159
-rw-r--r--c_src/libbcachefs/lru.h69
-rw-r--r--c_src/libbcachefs/mean_and_variance.c165
-rw-r--r--c_src/libbcachefs/mean_and_variance.h201
-rw-r--r--c_src/libbcachefs/migrate.c176
-rw-r--r--c_src/libbcachefs/migrate.h7
-rw-r--r--c_src/libbcachefs/move.c1161
-rw-r--r--c_src/libbcachefs/move.h155
-rw-r--r--c_src/libbcachefs/move_types.h36
-rw-r--r--c_src/libbcachefs/movinggc.c436
-rw-r--r--c_src/libbcachefs/movinggc.h12
-rw-r--r--c_src/libbcachefs/nocow_locking.c144
-rw-r--r--c_src/libbcachefs/nocow_locking.h50
-rw-r--r--c_src/libbcachefs/nocow_locking_types.h20
-rw-r--r--c_src/libbcachefs/opts.c602
-rw-r--r--c_src/libbcachefs/opts.h570
-rw-r--r--c_src/libbcachefs/printbuf.c447
-rw-r--r--c_src/libbcachefs/printbuf.h286
-rw-r--r--c_src/libbcachefs/quota.c969
-rw-r--r--c_src/libbcachefs/quota.h74
-rw-r--r--c_src/libbcachefs/quota_types.h43
-rw-r--r--c_src/libbcachefs/rebalance.c486
-rw-r--r--c_src/libbcachefs/rebalance.h27
-rw-r--r--c_src/libbcachefs/rebalance_types.h37
-rw-r--r--c_src/libbcachefs/recovery.c1220
-rw-r--r--c_src/libbcachefs/recovery.h40
-rw-r--r--c_src/libbcachefs/recovery_types.h66
-rw-r--r--c_src/libbcachefs/reflink.c594
-rw-r--r--c_src/libbcachefs/reflink.h81
-rw-r--r--c_src/libbcachefs/replicas.c1057
-rw-r--r--c_src/libbcachefs/replicas.h93
-rw-r--r--c_src/libbcachefs/replicas_types.h27
-rw-r--r--c_src/libbcachefs/sb-clean.c392
-rw-r--r--c_src/libbcachefs/sb-clean.h16
-rw-r--r--c_src/libbcachefs/sb-downgrade.c260
-rw-r--r--c_src/libbcachefs/sb-downgrade.h11
-rw-r--r--c_src/libbcachefs/sb-errors.c170
-rw-r--r--c_src/libbcachefs/sb-errors.h19
-rw-r--r--c_src/libbcachefs/sb-errors_types.h271
-rw-r--r--c_src/libbcachefs/sb-members.c428
-rw-r--r--c_src/libbcachefs/sb-members.h231
-rw-r--r--c_src/libbcachefs/seqmutex.h48
-rw-r--r--c_src/libbcachefs/siphash.c173
-rw-r--r--c_src/libbcachefs/siphash.h87
-rw-r--r--c_src/libbcachefs/six.c867
-rw-r--r--c_src/libbcachefs/six.h386
-rw-r--r--c_src/libbcachefs/snapshot.c1685
-rw-r--r--c_src/libbcachefs/snapshot.h264
-rw-r--r--c_src/libbcachefs/str_hash.h381
-rw-r--r--c_src/libbcachefs/subvolume.c444
-rw-r--r--c_src/libbcachefs/subvolume.h38
-rw-r--r--c_src/libbcachefs/subvolume_types.h35
-rw-r--r--c_src/libbcachefs/super-io.c1391
-rw-r--r--c_src/libbcachefs/super-io.h103
-rw-r--r--c_src/libbcachefs/super.c2124
-rw-r--r--c_src/libbcachefs/super.h54
-rw-r--r--c_src/libbcachefs/super_types.h41
-rw-r--r--c_src/libbcachefs/sysfs.c1026
-rw-r--r--c_src/libbcachefs/sysfs.h48
-rw-r--r--c_src/libbcachefs/tests.c882
-rw-r--r--c_src/libbcachefs/tests.h15
-rw-r--r--c_src/libbcachefs/thread_with_file.c299
-rw-r--r--c_src/libbcachefs/thread_with_file.h41
-rw-r--r--c_src/libbcachefs/thread_with_file_types.h16
-rw-r--r--c_src/libbcachefs/trace.c17
-rw-r--r--c_src/libbcachefs/trace.h1473
-rw-r--r--c_src/libbcachefs/two_state_shared_lock.c8
-rw-r--r--c_src/libbcachefs/two_state_shared_lock.h59
-rw-r--r--c_src/libbcachefs/util.c1210
-rw-r--r--c_src/libbcachefs/util.h878
-rw-r--r--c_src/libbcachefs/varint.c129
-rw-r--r--c_src/libbcachefs/varint.h11
-rw-r--r--c_src/libbcachefs/vstructs.h63
-rw-r--r--c_src/libbcachefs/xattr.c653
-rw-r--r--c_src/libbcachefs/xattr.h50
-rw-r--r--c_src/linux/atomic64.c188
-rw-r--r--c_src/linux/bio.c395
-rw-r--r--c_src/linux/blkdev.c428
-rw-r--r--c_src/linux/closure.c211
-rw-r--r--c_src/linux/crc64.c56
-rw-r--r--c_src/linux/crc64table.h135
-rw-r--r--c_src/linux/crypto/api.c114
-rw-r--r--c_src/linux/crypto/chacha20_generic.c111
-rw-r--r--c_src/linux/crypto/poly1305_generic.c88
-rw-r--r--c_src/linux/crypto/sha256_generic.c81
-rw-r--r--c_src/linux/fs.c14
-rw-r--r--c_src/linux/generic-radix-tree.c307
-rw-r--r--c_src/linux/int_sqrt.c71
-rw-r--r--c_src/linux/kstrtox.c357
-rw-r--r--c_src/linux/kstrtox.h7
-rw-r--r--c_src/linux/kthread.c139
-rw-r--r--c_src/linux/llist.c92
-rw-r--r--c_src/linux/mempool.c541
-rw-r--r--c_src/linux/preempt.c37
-rw-r--r--c_src/linux/ratelimit.c69
-rw-r--r--c_src/linux/rhashtable.c1237
-rw-r--r--c_src/linux/sched.c133
-rw-r--r--c_src/linux/semaphore.c191
-rw-r--r--c_src/linux/seq_buf.c152
-rw-r--r--c_src/linux/shrinker.c140
-rw-r--r--c_src/linux/siphash.c552
-rw-r--r--c_src/linux/string.c112
-rw-r--r--c_src/linux/string_helpers.c130
-rw-r--r--c_src/linux/timer.c329
-rw-r--r--c_src/linux/wait.c250
-rw-r--r--c_src/linux/workqueue.c346
-rw-r--r--c_src/linux/xxhash.c500
-rw-r--r--c_src/linux/zstd_compress_module.c157
-rw-r--r--c_src/linux/zstd_decompress_module.c103
-rw-r--r--c_src/qcow2.c134
-rw-r--r--c_src/qcow2.h9
-rw-r--r--c_src/raid/COPYING339
-rw-r--r--c_src/raid/check.c185
-rw-r--r--c_src/raid/combo.h155
-rw-r--r--c_src/raid/cpu.h331
-rw-r--r--c_src/raid/gf.h137
-rw-r--r--c_src/raid/helper.c94
-rw-r--r--c_src/raid/helper.h43
-rw-r--r--c_src/raid/int.c556
-rw-r--r--c_src/raid/internal.h274
-rw-r--r--c_src/raid/intz.c119
-rw-r--r--c_src/raid/memory.c154
-rw-r--r--c_src/raid/memory.h96
-rw-r--r--c_src/raid/module.c473
-rw-r--r--c_src/raid/raid.c586
-rw-r--r--c_src/raid/raid.h229
-rw-r--r--c_src/raid/tables.c14696
-rw-r--r--c_src/raid/tag.c145
-rw-r--r--c_src/raid/test.c452
-rw-r--r--c_src/raid/test.h68
-rw-r--r--c_src/raid/x86.c2452
-rw-r--r--c_src/raid/x86z.c255
-rw-r--r--c_src/tools-util.c667
-rw-r--r--c_src/tools-util.h204
434 files changed, 148949 insertions, 0 deletions
diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c
new file mode 100644
index 00000000..70af2c39
--- /dev/null
+++ b/c_src/bcachefs.c
@@ -0,0 +1,179 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ * Gabriel de Perthuis <g2p.code@gmail.com>
+ * Jacob Malevich <jam@datera.io>
+ *
+ * GPLv2
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <raid/raid.h>
+
+#include "cmds.h"
+
+void bcachefs_usage(void)
+{
+ puts("bcachefs - tool for managing bcachefs filesystems\n"
+ "usage: bcachefs <command> [<args>]\n"
+ "\n"
+ "Superblock commands:\n"
+ " format Format a new filesystem\n"
+ " show-super Dump superblock information to stdout\n"
+ " set-option Set a filesystem option\n"
+ " reset-counters Reset all counters on an unmounted device\n"
+ "\n"
+ "Mount:\n"
+ " mount Mount a filesystem\n"
+ "\n"
+ "Repair:\n"
+ " fsck Check an existing filesystem for errors\n"
+ "\n"
+#if 0
+ "Startup/shutdown, assembly of multi device filesystems:\n"
+ " assemble Assemble an existing multi device filesystem\n"
+ " incremental Incrementally assemble an existing multi device filesystem\n"
+ " run Start a partially assembled filesystem\n"
+ " stop Stop a running filesystem\n"
+ "\n"
+#endif
+ "Commands for managing a running filesystem:\n"
+ " fs usage Show disk usage\n"
+ "\n"
+ "Commands for managing devices within a running filesystem:\n"
+ " device add Add a new device to an existing filesystem\n"
+ " device remove Remove a device from an existing filesystem\n"
+ " device online Re-add an existing member to a filesystem\n"
+ " device offline Take a device offline, without removing it\n"
+ " device evacuate Migrate data off of a specific device\n"
+ " device set-state Mark a device as failed\n"
+ " device resize Resize filesystem on a device\n"
+ " device resize-journal Resize journal on a device\n"
+ "\n"
+ "Commands for managing subvolumes and snapshots:\n"
+ " subvolume create Create a new subvolume\n"
+ " subvolume delete Delete an existing subvolume\n"
+ " subvolume snapshot Create a snapshot\n"
+ "\n"
+ "Commands for managing filesystem data:\n"
+ " data rereplicate Rereplicate degraded data\n"
+ " data job Kick off low level data jobs\n"
+ "\n"
+ "Encryption:\n"
+ " unlock Unlock an encrypted filesystem prior to running/mounting\n"
+ " set-passphrase Change passphrase on an existing (unmounted) filesystem\n"
+ " remove-passphrase Remove passphrase on an existing (unmounted) filesystem\n"
+ "\n"
+ "Migrate:\n"
+ " migrate Migrate an existing filesystem to bcachefs, in place\n"
+ " migrate-superblock Add default superblock, after bcachefs migrate\n"
+ "\n"
+ "Commands for operating on files in a bcachefs filesystem:\n"
+ " setattr Set various per file attributes\n"
+ "\n"
+ "Debug:\n"
+ "These commands work on offline, unmounted filesystems\n"
+ " dump Dump filesystem metadata to a qcow2 image\n"
+ " list List filesystem metadata in textual form\n"
+ " list_journal List contents of journal\n"
+ "\n"
+ "FUSE:\n"
+ " fusemount Mount a filesystem via FUSE\n"
+ "\n"
+ "Miscellaneous:\n"
+ " completions Generate shell completions\n"
+ " version Display the version of the invoked bcachefs tool\n");
+}
+
+static char *pop_cmd(int *argc, char *argv[])
+{
+ char *cmd = argv[1];
+ if (!(*argc < 2))
+ memmove(&argv[1], &argv[2], (*argc - 2) * sizeof(argv[0]));
+ (*argc)--;
+ argv[*argc] = NULL;
+
+ return cmd;
+}
+
+int fs_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+
+ if (argc < 1) {
+ bcachefs_usage();
+ exit(EXIT_FAILURE);
+ }
+ if (!strcmp(cmd, "usage"))
+ return cmd_fs_usage(argc, argv);
+
+ return 0;
+}
+
+int device_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+
+ if (argc < 1)
+ return device_usage();
+ if (!strcmp(cmd, "add"))
+ return cmd_device_add(argc, argv);
+ if (!strcmp(cmd, "remove"))
+ return cmd_device_remove(argc, argv);
+ if (!strcmp(cmd, "online"))
+ return cmd_device_online(argc, argv);
+ if (!strcmp(cmd, "offline"))
+ return cmd_device_offline(argc, argv);
+ if (!strcmp(cmd, "evacuate"))
+ return cmd_device_evacuate(argc, argv);
+ if (!strcmp(cmd, "set-state"))
+ return cmd_device_set_state(argc, argv);
+ if (!strcmp(cmd, "resize"))
+ return cmd_device_resize(argc, argv);
+ if (!strcmp(cmd, "resize-journal"))
+ return cmd_device_resize_journal(argc, argv);
+
+ return 0;
+}
+
+int data_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+
+ if (argc < 1)
+ return data_usage();
+ if (!strcmp(cmd, "rereplicate"))
+ return cmd_data_rereplicate(argc, argv);
+ if (!strcmp(cmd, "job"))
+ return cmd_data_job(argc, argv);
+
+ return 0;
+}
+
+int subvolume_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+ if (argc < 1)
+ return subvolume_usage();
+ if (!strcmp(cmd, "create"))
+ return cmd_subvolume_create(argc, argv);
+ if (!strcmp(cmd, "delete"))
+ return cmd_subvolume_delete(argc, argv);
+ if (!strcmp(cmd, "snapshot"))
+ return cmd_subvolume_snapshot(argc, argv);
+
+ return 0;
+}
diff --git a/c_src/ccan/array_size/LICENSE b/c_src/ccan/array_size/LICENSE
new file mode 100644
index 00000000..feb9b118
--- /dev/null
+++ b/c_src/ccan/array_size/LICENSE
@@ -0,0 +1,28 @@
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+ the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+ moral rights retained by the original author(s) and/or performer(s);
+ publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+ rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+ rights protecting the extraction, dissemination, use and reuse of data in a Work;
+ database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+ other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+ Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+ Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+ Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/c_src/ccan/array_size/_info b/c_src/ccan/array_size/_info
new file mode 100644
index 00000000..69570f34
--- /dev/null
+++ b/c_src/ccan/array_size/_info
@@ -0,0 +1,46 @@
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * array_size - routine for safely deriving the size of a visible array.
+ *
+ * This provides a simple ARRAY_SIZE() macro, which (given a good compiler)
+ * will also break compile if you try to use it on a pointer.
+ *
+ * This can ensure your code is robust to changes, without needing a gratuitous
+ * macro or constant.
+ *
+ * Example:
+ * // Outputs "Initialized 32 values\n"
+ * #include <ccan/array_size/array_size.h>
+ * #include <stdlib.h>
+ * #include <stdio.h>
+ *
+ * // We currently use 32 random values.
+ * static unsigned int vals[32];
+ *
+ * int main(void)
+ * {
+ * unsigned int i;
+ * for (i = 0; i < ARRAY_SIZE(vals); i++)
+ * vals[i] = random();
+ * printf("Initialized %u values\n", i);
+ * return 0;
+ * }
+ *
+ * License: CC0 (Public domain)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+ if (argc != 2)
+ return 1;
+
+ if (strcmp(argv[1], "depends") == 0) {
+ printf("ccan/build_assert\n");
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/c_src/ccan/array_size/array_size.h b/c_src/ccan/array_size/array_size.h
new file mode 100644
index 00000000..0ca422a2
--- /dev/null
+++ b/c_src/ccan/array_size/array_size.h
@@ -0,0 +1,26 @@
+/* CC0 (Public domain) - see LICENSE file for details */
+#ifndef CCAN_ARRAY_SIZE_H
+#define CCAN_ARRAY_SIZE_H
+#include "config.h"
+#include <ccan/build_assert/build_assert.h>
+
+/**
+ * ARRAY_SIZE - get the number of elements in a visible array
+ * @arr: the array whose size you want.
+ *
+ * This does not work on pointers, or arrays declared as [], or
+ * function parameters. With correct compiler support, such usage
+ * will cause a build error (see build_assert).
+ */
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + _array_size_chk(arr))
+
+#if HAVE_BUILTIN_TYPES_COMPATIBLE_P && HAVE_TYPEOF
+/* Two gcc extensions.
+ * &a[0] degrades to a pointer: a different type from an array */
+#define _array_size_chk(arr) \
+ BUILD_ASSERT_OR_ZERO(!__builtin_types_compatible_p(typeof(arr), \
+ typeof(&(arr)[0])))
+#else
+#define _array_size_chk(arr) 0
+#endif
+#endif /* CCAN_ALIGNOF_H */
diff --git a/c_src/ccan/build_assert/LICENSE b/c_src/ccan/build_assert/LICENSE
new file mode 100644
index 00000000..feb9b118
--- /dev/null
+++ b/c_src/ccan/build_assert/LICENSE
@@ -0,0 +1,28 @@
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+ the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+ moral rights retained by the original author(s) and/or performer(s);
+ publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+ rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+ rights protecting the extraction, dissemination, use and reuse of data in a Work;
+ database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+ other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+ Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+ Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+ Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/c_src/ccan/build_assert/_info b/c_src/ccan/build_assert/_info
new file mode 100644
index 00000000..97ebe6c9
--- /dev/null
+++ b/c_src/ccan/build_assert/_info
@@ -0,0 +1,49 @@
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * build_assert - routines for build-time assertions
+ *
+ * This code provides routines which will cause compilation to fail should some
+ * assertion be untrue: such failures are preferable to run-time assertions,
+ * but much more limited since they can only depends on compile-time constants.
+ *
+ * These assertions are most useful when two parts of the code must be kept in
+ * sync: it is better to avoid such cases if possible, but seconds best is to
+ * detect invalid changes at build time.
+ *
+ * For example, a tricky piece of code might rely on a certain element being at
+ * the start of the structure. To ensure that future changes don't break it,
+ * you would catch such changes in your code like so:
+ *
+ * Example:
+ * #include <stddef.h>
+ * #include <ccan/build_assert/build_assert.h>
+ *
+ * struct foo {
+ * char string[5];
+ * int x;
+ * };
+ *
+ * static char *foo_string(struct foo *foo)
+ * {
+ * // This trick requires that the string be first in the structure
+ * BUILD_ASSERT(offsetof(struct foo, string) == 0);
+ * return (char *)foo;
+ * }
+ *
+ * License: CC0 (Public domain)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+ if (argc != 2)
+ return 1;
+
+ if (strcmp(argv[1], "depends") == 0)
+ /* Nothing. */
+ return 0;
+
+ return 1;
+}
diff --git a/c_src/ccan/build_assert/build_assert.h b/c_src/ccan/build_assert/build_assert.h
new file mode 100644
index 00000000..b9ecd840
--- /dev/null
+++ b/c_src/ccan/build_assert/build_assert.h
@@ -0,0 +1,40 @@
+/* CC0 (Public domain) - see LICENSE file for details */
+#ifndef CCAN_BUILD_ASSERT_H
+#define CCAN_BUILD_ASSERT_H
+
+/**
+ * BUILD_ASSERT - assert a build-time dependency.
+ * @cond: the compile-time condition which must be true.
+ *
+ * Your compile will fail if the condition isn't true, or can't be evaluated
+ * by the compiler. This can only be used within a function.
+ *
+ * Example:
+ * #include <stddef.h>
+ * ...
+ * static char *foo_to_char(struct foo *foo)
+ * {
+ * // This code needs string to be at start of foo.
+ * BUILD_ASSERT(offsetof(struct foo, string) == 0);
+ * return (char *)foo;
+ * }
+ */
+#define BUILD_ASSERT(cond) \
+ do { (void) sizeof(char [1 - 2*!(cond)]); } while(0)
+
+/**
+ * BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression.
+ * @cond: the compile-time condition which must be true.
+ *
+ * Your compile will fail if the condition isn't true, or can't be evaluated
+ * by the compiler. This can be used in an expression: its value is "0".
+ *
+ * Example:
+ * #define foo_to_char(foo) \
+ * ((char *)(foo) \
+ * + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0))
+ */
+#define BUILD_ASSERT_OR_ZERO(cond) \
+ (sizeof(char [1 - 2*!(cond)]) - 1)
+
+#endif /* CCAN_BUILD_ASSERT_H */
diff --git a/c_src/ccan/compiler/LICENSE b/c_src/ccan/compiler/LICENSE
new file mode 100644
index 00000000..feb9b118
--- /dev/null
+++ b/c_src/ccan/compiler/LICENSE
@@ -0,0 +1,28 @@
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+ the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+ moral rights retained by the original author(s) and/or performer(s);
+ publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+ rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+ rights protecting the extraction, dissemination, use and reuse of data in a Work;
+ database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+ other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+ Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+ Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+ Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/c_src/ccan/compiler/_info b/c_src/ccan/compiler/_info
new file mode 100644
index 00000000..d60dff4d
--- /dev/null
+++ b/c_src/ccan/compiler/_info
@@ -0,0 +1,64 @@
+#include "config.h"
+#include <string.h>
+#include <stdio.h>
+
+/**
+ * compiler - macros for common compiler extensions
+ *
+ * Abstracts away some compiler hints. Currently these include:
+ * - COLD
+ * For functions not called in fast paths (aka. cold functions)
+ * - PRINTF_FMT
+ * For functions which take printf-style parameters.
+ * - CONST_FUNCTION
+ * For functions which return the same value for same parameters.
+ * - NEEDED
+ * For functions and variables which must be emitted even if unused.
+ * - UNNEEDED
+ * For functions and variables which need not be emitted if unused.
+ * - UNUSED
+ * For parameters which are not used.
+ * - IS_COMPILE_CONSTANT()
+ * For using different tradeoffs for compiletime vs runtime evaluation.
+ *
+ * License: CC0 (Public domain)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ *
+ * Example:
+ * #include <ccan/compiler/compiler.h>
+ * #include <stdio.h>
+ * #include <stdarg.h>
+ *
+ * // Example of a (slow-path) logging function.
+ * static int log_threshold = 2;
+ * static void COLD PRINTF_FMT(2,3)
+ * logger(int level, const char *fmt, ...)
+ * {
+ * va_list ap;
+ * va_start(ap, fmt);
+ * if (level >= log_threshold)
+ * vfprintf(stderr, fmt, ap);
+ * va_end(ap);
+ * }
+ *
+ * int main(int argc, char *argv[])
+ * {
+ * if (argc != 1) {
+ * logger(3, "Don't want %i arguments!\n", argc-1);
+ * return 1;
+ * }
+ * return 0;
+ * }
+ */
+int main(int argc, char *argv[])
+{
+ /* Expect exactly one argument */
+ if (argc != 2)
+ return 1;
+
+ if (strcmp(argv[1], "depends") == 0) {
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/c_src/ccan/compiler/compiler.h b/c_src/ccan/compiler/compiler.h
new file mode 100644
index 00000000..bce4f25a
--- /dev/null
+++ b/c_src/ccan/compiler/compiler.h
@@ -0,0 +1,231 @@
+/* CC0 (Public domain) - see LICENSE file for details */
+#ifndef CCAN_COMPILER_H
+#define CCAN_COMPILER_H
+#include "config.h"
+
+#ifndef COLD
+#if HAVE_ATTRIBUTE_COLD
+/**
+ * COLD - a function is unlikely to be called.
+ *
+ * Used to mark an unlikely code path and optimize appropriately.
+ * It is usually used on logging or error routines.
+ *
+ * Example:
+ * static void COLD moan(const char *reason)
+ * {
+ * fprintf(stderr, "Error: %s (%s)\n", reason, strerror(errno));
+ * }
+ */
+#define COLD __attribute__((__cold__))
+#else
+#define COLD
+#endif
+#endif
+
+#ifndef NORETURN
+#if HAVE_ATTRIBUTE_NORETURN
+/**
+ * NORETURN - a function does not return
+ *
+ * Used to mark a function which exits; useful for suppressing warnings.
+ *
+ * Example:
+ * static void NORETURN fail(const char *reason)
+ * {
+ * fprintf(stderr, "Error: %s (%s)\n", reason, strerror(errno));
+ * exit(1);
+ * }
+ */
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#endif
+#endif
+
+#ifndef PRINTF_FMT
+#if HAVE_ATTRIBUTE_PRINTF
+/**
+ * PRINTF_FMT - a function takes printf-style arguments
+ * @nfmt: the 1-based number of the function's format argument.
+ * @narg: the 1-based number of the function's first variable argument.
+ *
+ * This allows the compiler to check your parameters as it does for printf().
+ *
+ * Example:
+ * void PRINTF_FMT(2,3) my_printf(const char *prefix, const char *fmt, ...);
+ */
+#define PRINTF_FMT(nfmt, narg) \
+ __attribute__((format(__printf__, nfmt, narg)))
+#else
+#define PRINTF_FMT(nfmt, narg)
+#endif
+#endif
+
+#ifndef CONST_FUNCTION
+#if HAVE_ATTRIBUTE_CONST
+/**
+ * CONST_FUNCTION - a function's return depends only on its argument
+ *
+ * This allows the compiler to assume that the function will return the exact
+ * same value for the exact same arguments. This implies that the function
+ * must not use global variables, or dereference pointer arguments.
+ */
+#define CONST_FUNCTION __attribute__((__const__))
+#else
+#define CONST_FUNCTION
+#endif
+
+#ifndef PURE_FUNCTION
+#if HAVE_ATTRIBUTE_PURE
+/**
+ * PURE_FUNCTION - a function is pure
+ *
+ * A pure function is one that has no side effects other than it's return value
+ * and uses no inputs other than it's arguments and global variables.
+ */
+#define PURE_FUNCTION __attribute__((__pure__))
+#else
+#define PURE_FUNCTION
+#endif
+#endif
+#endif
+
+#if HAVE_ATTRIBUTE_UNUSED
+#ifndef UNNEEDED
+/**
+ * UNNEEDED - a variable/function may not be needed
+ *
+ * This suppresses warnings about unused variables or functions, but tells
+ * the compiler that if it is unused it need not emit it into the source code.
+ *
+ * Example:
+ * // With some preprocessor options, this is unnecessary.
+ * static UNNEEDED int counter;
+ *
+ * // With some preprocessor options, this is unnecessary.
+ * static UNNEEDED void add_to_counter(int add)
+ * {
+ * counter += add;
+ * }
+ */
+#define UNNEEDED __attribute__((__unused__))
+#endif
+
+#ifndef NEEDED
+#if HAVE_ATTRIBUTE_USED
+/**
+ * NEEDED - a variable/function is needed
+ *
+ * This suppresses warnings about unused variables or functions, but tells
+ * the compiler that it must exist even if it (seems) unused.
+ *
+ * Example:
+ * // Even if this is unused, these are vital for debugging.
+ * static NEEDED int counter;
+ * static NEEDED void dump_counter(void)
+ * {
+ * printf("Counter is %i\n", counter);
+ * }
+ */
+#define NEEDED __attribute__((__used__))
+#else
+/* Before used, unused functions and vars were always emitted. */
+#define NEEDED __attribute__((__unused__))
+#endif
+#endif
+
+#ifndef UNUSED
+/**
+ * UNUSED - a parameter is unused
+ *
+ * Some compilers (eg. gcc with -W or -Wunused) warn about unused
+ * function parameters. This suppresses such warnings and indicates
+ * to the reader that it's deliberate.
+ *
+ * Example:
+ * // This is used as a callback, so needs to have this prototype.
+ * static int some_callback(void *unused UNUSED)
+ * {
+ * return 0;
+ * }
+ */
+#define UNUSED __attribute__((__unused__))
+#endif
+#else
+#ifndef UNNEEDED
+#define UNNEEDED
+#endif
+#ifndef NEEDED
+#define NEEDED
+#endif
+#ifndef UNUSED
+#define UNUSED
+#endif
+#endif
+
+#ifndef IS_COMPILE_CONSTANT
+#if HAVE_BUILTIN_CONSTANT_P
+/**
+ * IS_COMPILE_CONSTANT - does the compiler know the value of this expression?
+ * @expr: the expression to evaluate
+ *
+ * When an expression manipulation is complicated, it is usually better to
+ * implement it in a function. However, if the expression being manipulated is
+ * known at compile time, it is better to have the compiler see the entire
+ * expression so it can simply substitute the result.
+ *
+ * This can be done using the IS_COMPILE_CONSTANT() macro.
+ *
+ * Example:
+ * enum greek { ALPHA, BETA, GAMMA, DELTA, EPSILON };
+ *
+ * // Out-of-line version.
+ * const char *greek_name(enum greek greek);
+ *
+ * // Inline version.
+ * static inline const char *_greek_name(enum greek greek)
+ * {
+ * switch (greek) {
+ * case ALPHA: return "alpha";
+ * case BETA: return "beta";
+ * case GAMMA: return "gamma";
+ * case DELTA: return "delta";
+ * case EPSILON: return "epsilon";
+ * default: return "**INVALID**";
+ * }
+ * }
+ *
+ * // Use inline if compiler knows answer. Otherwise call function
+ * // to avoid copies of the same code everywhere.
+ * #define greek_name(g) \
+ * (IS_COMPILE_CONSTANT(greek) ? _greek_name(g) : greek_name(g))
+ */
+#define IS_COMPILE_CONSTANT(expr) __builtin_constant_p(expr)
+#else
+/* If we don't know, assume it's not. */
+#define IS_COMPILE_CONSTANT(expr) 0
+#endif
+#endif
+
+#ifndef WARN_UNUSED_RESULT
+#if HAVE_WARN_UNUSED_RESULT
+/**
+ * WARN_UNUSED_RESULT - warn if a function return value is unused.
+ *
+ * Used to mark a function where it is extremely unlikely that the caller
+ * can ignore the result, eg realloc().
+ *
+ * Example:
+ * // buf param may be freed by this; need return value!
+ * static char *WARN_UNUSED_RESULT enlarge(char *buf, unsigned *size)
+ * {
+ * return realloc(buf, (*size) *= 2);
+ * }
+ */
+#define WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+#else
+#define WARN_UNUSED_RESULT
+#endif
+#endif
+#endif /* CCAN_COMPILER_H */
diff --git a/c_src/cmd_assemble.c b/c_src/cmd_assemble.c
new file mode 100644
index 00000000..a997e1e1
--- /dev/null
+++ b/c_src/cmd_assemble.c
@@ -0,0 +1,48 @@
+
+#include <alloca.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#if 0
+int cmd_assemble(int argc, char *argv[])
+{
+ unsigned nr_devs = argc - 1;
+
+ if (argc <= 1)
+ die("Please supply at least one device");
+
+ struct bch_ioctl_assemble *assemble =
+ alloca(sizeof(*assemble) + sizeof(__u64) * nr_devs);
+
+ memset(assemble, 0, sizeof(*assemble));
+ assemble->nr_devs = nr_devs;
+
+ unsigned i;
+ for (i = 0; i < nr_devs; i++)
+ assemble->devs[i] = (unsigned long) argv[i + 1];
+
+ xioctl(bcachectl_open(), BCH_IOCTL_ASSEMBLE, assemble);
+ return 0;
+}
+
+int cmd_incremental(int argc, char *argv[])
+{
+ if (argc != 2)
+ die("Please supply exactly one device");
+
+ struct bch_ioctl_incremental incremental = {
+ .dev = (unsigned long) argv[1],
+ };
+
+ xioctl(bcachectl_open(), BCH_IOCTL_INCREMENTAL, &incremental);
+ return 0;
+}
+#endif
diff --git a/c_src/cmd_attr.c b/c_src/cmd_attr.c
new file mode 100644
index 00000000..bde9d8f6
--- /dev/null
+++ b/c_src/cmd_attr.c
@@ -0,0 +1,119 @@
+#include <dirent.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+static void propagate_recurse(int dirfd)
+{
+ DIR *dir = fdopendir(dirfd);
+ struct dirent *d;
+
+ if (!dir) {
+ fprintf(stderr, "fdopendir() error: %m\n");
+ return;
+ }
+
+ while ((errno = 0), (d = readdir(dir))) {
+ if (!strcmp(d->d_name, ".") ||
+ !strcmp(d->d_name, ".."))
+ continue;
+
+ int ret = ioctl(dirfd, BCHFS_IOC_REINHERIT_ATTRS,
+ d->d_name);
+ if (ret < 0) {
+ fprintf(stderr, "error propagating attributes to %s: %m\n",
+ d->d_name);
+ continue;
+ }
+
+ if (!ret) /* did no work */
+ continue;
+
+ struct stat st = xfstatat(dirfd, d->d_name,
+ AT_SYMLINK_NOFOLLOW);
+ if (!S_ISDIR(st.st_mode))
+ continue;
+
+ int fd = openat(dirfd, d->d_name, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "error opening %s: %m\n", d->d_name);
+ continue;
+ }
+ propagate_recurse(fd);
+ close(fd);
+ }
+
+ if (errno)
+ die("readdir error: %m");
+}
+
+static void do_setattr(char *path, struct bch_opt_strs opts)
+{
+ unsigned i;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ if (!opts.by_id[i])
+ continue;
+
+ char *n = mprintf("bcachefs.%s", bch2_opt_table[i].attr.name);
+
+ if (setxattr(path, n, opts.by_id[i], strlen(opts.by_id[i]), 0))
+ die("setxattr error: %m");
+
+ free(n);
+ }
+
+ struct stat st = xstat(path);
+ if (!S_ISDIR(st.st_mode))
+ return;
+
+ int dirfd = open(path, O_RDONLY);
+ if (dirfd < 0)
+ die("error opening %s: %m", path);
+
+ propagate_recurse(dirfd);
+ close(dirfd);
+}
+
+static void setattr_usage(void)
+{
+ puts("bcachefs setattr - set attributes on files in a bcachefs filesystem\n"
+ "Usage: bcachefs setattr [OPTIONS]... <files>\n"
+ "\n"
+ "Options:");
+
+ bch2_opts_usage(OPT_INODE);
+ puts(" -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_setattr(int argc, char *argv[])
+{
+ struct bch_opt_strs opts =
+ bch2_cmdline_opts_get(&argc, argv, OPT_INODE);
+ unsigned i;
+
+ for (i = 1; i < argc; i++)
+ if (argv[i][0] == '-') {
+ printf("invalid option %s\n", argv[i]);
+ setattr_usage();
+ exit(EXIT_FAILURE);
+ }
+
+ if (argc <= 1)
+ die("Please supply one or more files");
+
+ for (i = 1; i < argc; i++)
+ do_setattr(argv[i], opts);
+ bch2_opt_strs_free(&opts);
+
+ return 0;
+}
diff --git a/c_src/cmd_counters.c b/c_src/cmd_counters.c
new file mode 100644
index 00000000..9adde242
--- /dev/null
+++ b/c_src/cmd_counters.c
@@ -0,0 +1,51 @@
+#include <getopt.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/super-io.h"
+
+static void reset_counters_usage(void)
+{
+ puts("bcachefs reset-counters \n"
+ "Usage: bcachefs reset-counters device\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_reset_counters(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ reset_counters_usage();
+ break;
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("please supply a device");
+ if (argc)
+ die("too many arguments");
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(dev, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
+
+ bch2_sb_field_resize(&sb, counters, 0);
+
+ bch2_super_write(sb.bdev->bd_fd, sb.sb);
+ bch2_free_super(&sb);
+ return 0;
+}
diff --git a/c_src/cmd_data.c b/c_src/cmd_data.c
new file mode 100644
index 00000000..1ef689bc
--- /dev/null
+++ b/c_src/cmd_data.c
@@ -0,0 +1,127 @@
+
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/move.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+int data_usage(void)
+{
+ puts("bcachefs data - manage filesystem data\n"
+ "Usage: bcachefs data <CMD> [OPTIONS]\n"
+ "\n"
+ "Commands:\n"
+ " rereplicate Rereplicate degraded data\n"
+ " job Kick off low level data jobs\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ return 0;
+}
+
+static void data_rereplicate_usage(void)
+{
+ puts("bcachefs data rereplicate\n"
+ "Usage: bcachefs data rereplicate filesystem\n"
+ "\n"
+ "Walks existing data in a filesystem, writing additional copies\n"
+ "of any degraded data\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_data_rereplicate(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ data_rereplicate_usage();
+ }
+ args_shift(optind);
+
+ char *fs_path = arg_pop();
+ if (!fs_path)
+ die("Please supply a filesystem");
+
+ if (argc)
+ die("too many arguments");
+
+ return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) {
+ .op = BCH_DATA_OP_rereplicate,
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
+ });
+}
+
+static void data_job_usage(void)
+{
+ puts("bcachefs data job\n"
+ "Usage: bcachefs data job [job} filesystem\n"
+ "\n"
+ "Kick off a data job and report progress\n"
+ "\n"
+ "job: one of scrub, rereplicate, migrate, rewrite_old_nodes, or drop_extra_replicas\n"
+ "\n"
+ "Options:\n"
+ " -b btree btree to operate on\n"
+ " -s inode:offset start position\n"
+ " -e inode:offset end position\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_data_job(int argc, char *argv[])
+{
+ struct bch_ioctl_data op = {
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
+ };
+ int opt;
+
+ while ((opt = getopt(argc, argv, "s:e:h")) != -1)
+ switch (opt) {
+ case 'b':
+ op.start_btree = read_string_list_or_die(optarg,
+ __bch2_btree_ids, "btree id");
+ op.end_btree = op.start_btree;
+ break;
+ case 's':
+ op.start_pos = bpos_parse(optarg);
+ break;
+ op.end_pos = bpos_parse(optarg);
+ case 'e':
+ break;
+ case 'h':
+ data_job_usage();
+ }
+ args_shift(optind);
+
+ char *job = arg_pop();
+ if (!job)
+ die("please specify which type of job");
+
+ op.op = read_string_list_or_die(job, bch2_data_ops_strs, "bad job type");
+
+ char *fs_path = arg_pop();
+ if (!fs_path)
+ fs_path = ".";
+
+ if (argc)
+ die("too many arguments");
+
+ return bchu_data(bcache_fs_open(fs_path), op);
+}
diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c
new file mode 100644
index 00000000..b4bcd345
--- /dev/null
+++ b/c_src/cmd_device.c
@@ -0,0 +1,647 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/journal.h"
+#include "libbcachefs/sb-members.h"
+#include "libbcachefs/super-io.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/opts.h"
+#include "tools-util.h"
+
+int device_usage(void)
+{
+ puts("bcachefs device - manage devices within a running filesystem\n"
+ "Usage: bcachefs device <CMD> [OPTION]\n"
+ "\n"
+ "Commands:\n"
+ " add add a new device to an existing filesystem\n"
+ " remove remove a device from an existing filesystem\n"
+ " online re-add an existing member to a filesystem\n"
+ " offline take a device offline, without removing it\n"
+ " evacuate migrate data off a specific device\n"
+ " set-state mark a device as failed\n"
+ " resize resize filesystem on a device\n"
+ " resize-journal resize journal on a device\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ return 0;
+}
+
+static void device_add_usage(void)
+{
+ puts("bcachefs device add - add a device to an existing filesystem\n"
+ "Usage: bcachefs device add [OPTION]... filesystem device\n"
+ "\n"
+ "Options:\n"
+ " -S, --fs_size=size Size of filesystem on device\n"
+ " -B, --bucket=size Bucket size\n"
+ " -D, --discard Enable discards\n"
+ " -l, --label=label Disk label\n"
+ " -f, --force Use device even if it appears to already be formatted\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_add(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "fs_size", required_argument, NULL, 'S' },
+ { "bucket", required_argument, NULL, 'B' },
+ { "discard", no_argument, NULL, 'D' },
+ { "label", required_argument, NULL, 'l' },
+ { "force", no_argument, NULL, 'f' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ struct format_opts format_opts = format_opts_default();
+ struct dev_opts dev_opts = dev_opts_default();
+ bool force = false;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "t:fh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'S':
+ if (bch2_strtoull_h(optarg, &dev_opts.size))
+ die("invalid filesystem size");
+ break;
+ case 'B':
+ if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
+ die("bad bucket_size %s", optarg);
+ break;
+ case 'D':
+ dev_opts.discard = true;
+ break;
+ case 'l':
+ dev_opts.label = strdup(optarg);
+ break;
+ case 'f':
+ force = true;
+ break;
+ case 'h':
+ device_add_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *fs_path = arg_pop();
+ if (!fs_path)
+ die("Please supply a filesystem");
+
+ dev_opts.path = arg_pop();
+ if (!dev_opts.path)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ struct bchfs_handle fs = bcache_fs_open(fs_path);
+
+ int ret = open_for_format(&dev_opts, force);
+ if (ret)
+ die("Error opening %s: %s", dev_opts.path, strerror(-ret));
+
+ struct bch_opt_strs fs_opt_strs;
+ memset(&fs_opt_strs, 0, sizeof(fs_opt_strs));
+
+ struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+ opt_set(fs_opts, block_size,
+ read_file_u64(fs.sysfs_fd, "options/block_size"));
+ opt_set(fs_opts, btree_node_size,
+ read_file_u64(fs.sysfs_fd, "options/btree_node_size"));
+
+ struct bch_sb *sb = bch2_format(fs_opt_strs,
+ fs_opts,
+ format_opts,
+ &dev_opts, 1);
+ free(sb);
+ bchu_disk_add(fs, dev_opts.path);
+ return 0;
+}
+
+static void device_remove_usage(void)
+{
+ puts("bcachefs device_remove - remove a device from a filesystem\n"
+ "Usage:\n"
+ " bcachefs device remove <device>|<devid> <path>\n"
+ "\n"
+ "Options:\n"
+ " -f, --force Force removal, even if some data\n"
+ " couldn't be migrated\n"
+ " -F, --force-metadata Force removal, even if some metadata\n"
+ " couldn't be migrated\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_remove(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "by-id", 0, NULL, 'i' },
+ { "force", 0, NULL, 'f' },
+ { "force-metadata", 0, NULL, 'F' },
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ struct bchfs_handle fs;
+ bool by_id = false;
+ int opt, flags = BCH_FORCE_IF_DEGRADED, dev_idx;
+
+ while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ flags |= BCH_FORCE_IF_DATA_LOST;
+ break;
+ case 'F':
+ flags |= BCH_FORCE_IF_METADATA_LOST;
+ break;
+ case 'h':
+ device_remove_usage();
+ }
+ args_shift(optind);
+
+ char *dev_str = arg_pop();
+ if (!dev_str)
+ die("Please supply a device");
+
+ char *end;
+ dev_idx = strtoul(dev_str, &end, 10);
+ if (*dev_str && !*end)
+ by_id = true;
+
+ char *fs_path = arg_pop();
+ if (fs_path) {
+ fs = bcache_fs_open(fs_path);
+
+ if (!by_id) {
+ dev_idx = bchu_dev_path_to_idx(fs, dev_str);
+ if (dev_idx < 0)
+ die("%s does not seem to be a member of %s",
+ dev_str, fs_path);
+ }
+ } else if (!by_id) {
+ fs = bchu_fs_open_by_dev(dev_str, &dev_idx);
+ } else {
+ die("Filesystem path required when specifying device by id");
+ }
+
+ bchu_disk_remove(fs, dev_idx, flags);
+ return 0;
+}
+
+static void device_online_usage(void)
+{
+ puts("bcachefs device online - readd a device to a running filesystem\n"
+ "Usage: bcachefs device online [OPTION]... device\n"
+ "\n"
+ "Options:\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_online(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ device_online_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx);
+ bchu_disk_online(fs, dev);
+ return 0;
+}
+
+static void device_offline_usage(void)
+{
+ puts("bcachefs device offline - take a device offline, without removing it\n"
+ "Usage: bcachefs device offline [OPTION]... device\n"
+ "\n"
+ "Options:\n"
+ " -f, --force Force, if data redundancy will be degraded\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_offline(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "force", 0, NULL, 'f' },
+ { NULL }
+ };
+ int opt, flags = 0;
+
+ while ((opt = getopt_long(argc, argv, "fh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ flags |= BCH_FORCE_IF_DEGRADED;
+ break;
+ case 'h':
+ device_offline_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx);
+ bchu_disk_offline(fs, dev_idx, flags);
+ return 0;
+}
+
+static void device_evacuate_usage(void)
+{
+ puts("bcachefs device evacuate - move data off of a given device\n"
+ "Usage: bcachefs device evacuate [OPTION]... device\n"
+ "\n"
+ "Options:\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_evacuate(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ device_evacuate_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev_path = arg_pop();
+ if (!dev_path)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
+
+ struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, dev_idx);
+
+ if (u->state == BCH_MEMBER_STATE_rw) {
+ printf("Setting %s readonly\n", dev_path);
+ bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_ro, 0);
+ }
+
+ free(u);
+
+ return bchu_data(fs, (struct bch_ioctl_data) {
+ .op = BCH_DATA_OP_migrate,
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
+ .migrate.dev = dev_idx,
+ });
+}
+
+static void device_set_state_usage(void)
+{
+ puts("bcachefs device set-state\n"
+ "Usage: bcachefs device set-state <new-state> <device>|<devid> <path>\n"
+ "\n"
+ "<new-state>: one of rw, ro, failed or spare\n"
+ "<path>: path to mounted filesystem, optional unless specifying device by id\n"
+ "\n"
+ "Options:\n"
+ " -f, --force Force, if data redundancy will be degraded\n"
+ " --force-if-data-lost Force, if data will be lost\n"
+ " -o, --offline Set state of an offline device\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_set_state(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "force", 0, NULL, 'f' },
+ { "force-if-data-lost", 0, NULL, 'F' },
+ { "offline", 0, NULL, 'o' },
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ struct bchfs_handle fs;
+ bool by_id = false;
+ int opt, flags = 0, dev_idx;
+ bool offline = false;
+
+ while ((opt = getopt_long(argc, argv, "foh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ flags |= BCH_FORCE_IF_DEGRADED;
+ break;
+ case 'F':
+ flags |= BCH_FORCE_IF_DEGRADED;
+ flags |= BCH_FORCE_IF_LOST;
+ break;
+ case 'o':
+ offline = true;
+ break;
+ case 'h':
+ device_set_state_usage();
+ }
+ args_shift(optind);
+
+ char *new_state_str = arg_pop();
+ if (!new_state_str)
+ die("Please supply a device state");
+
+ unsigned new_state = read_string_list_or_die(new_state_str,
+ bch2_member_states, "device state");
+
+ char *dev_str = arg_pop();
+ if (!dev_str)
+ die("Please supply a device");
+
+ char *end;
+ dev_idx = strtoul(dev_str, &end, 10);
+ if (*dev_str && !*end)
+ by_id = true;
+
+ if (offline) {
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_sb_handle sb = { NULL };
+
+ if (by_id)
+ die("Cannot specify offline device by id");
+
+ int ret = bch2_read_super(dev_str, &opts, &sb);
+ if (ret)
+ die("error opening %s: %s", dev_str, bch2_err_str(ret));
+
+ struct bch_member *m = bch2_members_v2_get_mut(sb.sb, sb.sb->dev_idx);
+
+ SET_BCH_MEMBER_STATE(m, new_state);
+
+ le64_add_cpu(&sb.sb->seq, 1);
+
+ bch2_super_write(sb.bdev->bd_fd, sb.sb);
+ ret = fsync(sb.bdev->bd_fd);
+ if (ret)
+ fprintf(stderr, "error writing superblock: fsync error (%m)");
+ bch2_free_super(&sb);
+ return ret;
+ }
+
+ char *fs_path = arg_pop();
+ if (fs_path) {
+ fs = bcache_fs_open(fs_path);
+
+ if (!by_id) {
+ dev_idx = bchu_dev_path_to_idx(fs, dev_str);
+ if (dev_idx < 0)
+ die("%s does not seem to be a member of %s",
+ dev_str, fs_path);
+ }
+ } else if (!by_id) {
+ fs = bchu_fs_open_by_dev(dev_str, &dev_idx);
+ } else {
+ die("Filesystem path required when specifying device by id");
+ }
+
+ bchu_disk_set_state(fs, dev_idx, new_state, flags);
+
+ return 0;
+}
+
+static void device_resize_usage(void)
+{
+ puts("bcachefs device resize \n"
+ "Usage: bcachefs device resize device [ size ]\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_resize(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ u64 size;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ device_resize_usage();
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device to resize");
+
+ int dev_fd = xopen(dev, O_RDONLY);
+
+ char *size_arg = arg_pop();
+ if (!size_arg)
+ size = get_size(dev_fd);
+ else if (bch2_strtoull_h(size_arg, &size))
+ die("invalid size");
+
+ size >>= 9;
+
+ if (argc)
+ die("Too many arguments");
+
+ struct stat dev_stat = xfstat(dev_fd);
+
+ struct mntent *mount = dev_to_mount(dev);
+ if (mount) {
+ if (!S_ISBLK(dev_stat.st_mode))
+ die("%s is mounted but isn't a block device?!", dev);
+
+ printf("Doing online resize of %s\n", dev);
+
+ struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir);
+
+ unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev);
+
+ struct bch_sb *sb = bchu_read_super(fs, -1);
+ if (idx >= sb->nr_devices)
+ die("error reading superblock: dev idx >= sb->nr_devices");
+
+ struct bch_member m = bch2_sb_member_get(sb, idx);
+
+ u64 nbuckets = size / le16_to_cpu(m.bucket_size);
+
+ if (nbuckets < le64_to_cpu(m.nbuckets))
+ die("Shrinking not supported yet");
+
+ printf("resizing %s to %llu buckets\n", dev, nbuckets);
+ bchu_disk_resize(fs, idx, nbuckets);
+ } else {
+ printf("Doing offline resize of %s\n", dev);
+
+ struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+ if (IS_ERR(c))
+ die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
+
+ struct bch_dev *resize = NULL;
+
+ for_each_online_member(c, ca) {
+ if (resize)
+ die("confused: more than one online device?");
+ resize = ca;
+ percpu_ref_get(&resize->io_ref);
+ }
+
+ u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
+
+ if (nbuckets < le64_to_cpu(resize->mi.nbuckets))
+ die("Shrinking not supported yet");
+
+ printf("resizing %s to %llu buckets\n", dev, nbuckets);
+ int ret = bch2_dev_resize(c, resize, nbuckets);
+ if (ret)
+ fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
+
+ percpu_ref_put(&resize->io_ref);
+ bch2_fs_stop(c);
+ }
+ return 0;
+}
+
+static void device_resize_journal_usage(void)
+{
+ puts("bcachefs device resize-journal \n"
+ "Usage: bcachefs device resize-journal device size\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_resize_journal(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ u64 size;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ device_resize_journal_usage();
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ int dev_fd = xopen(dev, O_RDONLY);
+
+ char *size_arg = arg_pop();
+ if (!size_arg)
+ die("Please supply a journal size");
+ else if (bch2_strtoull_h(size_arg, &size))
+ die("invalid size");
+
+ size >>= 9;
+
+ if (argc)
+ die("Too many arguments");
+
+ struct stat dev_stat = xfstat(dev_fd);
+
+ struct mntent *mount = dev_to_mount(dev);
+ if (mount) {
+ if (!S_ISBLK(dev_stat.st_mode))
+ die("%s is mounted but isn't a block device?!", dev);
+
+ struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir);
+
+ unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev);
+
+ struct bch_sb *sb = bchu_read_super(fs, -1);
+ if (idx >= sb->nr_devices)
+ die("error reading superblock: dev idx >= sb->nr_devices");
+
+ struct bch_member m = bch2_sb_member_get(sb, idx);
+
+ u64 nbuckets = size / le16_to_cpu(m.bucket_size);
+
+ printf("resizing journal on %s to %llu buckets\n", dev, nbuckets);
+ bchu_disk_resize_journal(fs, idx, nbuckets);
+ } else {
+ printf("%s is offline - starting:\n", dev);
+
+ struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+ if (IS_ERR(c))
+ die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
+
+ struct bch_dev *resize = NULL;
+
+ for_each_online_member(c, ca) {
+ if (resize)
+ die("confused: more than one online device?");
+ resize = ca;
+ percpu_ref_get(&resize->io_ref);
+ }
+
+ u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
+
+ printf("resizing journal on %s to %llu buckets\n", dev, nbuckets);
+ int ret = bch2_set_nr_journal_buckets(c, resize, nbuckets);
+ if (ret)
+ fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
+
+ percpu_ref_put(&resize->io_ref);
+ bch2_fs_stop(c);
+ }
+ return 0;
+}
diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c
new file mode 100644
index 00000000..51cc876b
--- /dev/null
+++ b/c_src/cmd_dump.c
@@ -0,0 +1,187 @@
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "qcow2.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/extents.h"
+#include "libbcachefs/sb-members.h"
+#include "libbcachefs/super.h"
+
+static void dump_usage(void)
+{
+ puts("bcachefs dump - dump filesystem metadata\n"
+ "Usage: bcachefs dump [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -o output Output qcow2 image(s)\n"
+ " -f, --force Force; overwrite when needed\n"
+ " --nojournal Don't dump entire journal, just dirty entries\n"
+ " -h, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
+ bool entire_journal)
+{
+ struct bch_sb *sb = ca->disk_sb.sb;
+ ranges data = { 0 };
+ unsigned i;
+ int ret;
+
+ /* Superblock: */
+ range_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
+ sizeof(struct bch_sb_layout));
+
+ for (i = 0; i < sb->layout.nr_superblocks; i++)
+ range_add(&data,
+ le64_to_cpu(sb->layout.sb_offset[i]) << 9,
+ vstruct_bytes(sb));
+
+ /* Journal: */
+ for (i = 0; i < ca->journal.nr; i++)
+ if (entire_journal ||
+ ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
+ u64 bucket = ca->journal.buckets[i];
+
+ range_add(&data,
+ bucket_bytes(ca) * bucket,
+ bucket_bytes(ca));
+ }
+
+ /* Btree: */
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct bkey_ptrs_c ptrs;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct btree *b;
+
+ __for_each_btree_node(trans, iter, i, POS_MIN, 0, 1, 0, b, ret) {
+ struct btree_node_iter iter;
+ struct bkey u;
+ struct bkey_s_c k;
+
+ for_each_btree_node_key_unpack(b, k, &iter, &u) {
+ ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->dev == ca->dev_idx)
+ range_add(&data,
+ ptr->offset << 9,
+ btree_bytes(c));
+ }
+ }
+
+ if (ret)
+ die("error %s walking btree nodes", bch2_err_str(ret));
+
+ b = bch2_btree_id_root(c, i)->b;
+ if (!btree_node_fake(b)) {
+ ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->dev == ca->dev_idx)
+ range_add(&data,
+ ptr->offset << 9,
+ btree_bytes(c));
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ }
+
+ qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data,
+ max_t(unsigned, btree_bytes(c) / 8, block_bytes(c)));
+ darray_exit(&data);
+}
+
+int cmd_dump(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "force", no_argument, NULL, 'f' },
+ { "nojournal", no_argument, NULL, 'j' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ struct bch_opts opts = bch2_opts_empty();
+ char *out = NULL;
+ unsigned nr_devices = 0;
+ bool force = false, entire_journal = true;
+ int fd, opt;
+
+ opt_set(opts, direct_io, false);
+ opt_set(opts, read_only, true);
+ opt_set(opts, nochanges, true);
+ opt_set(opts, norecovery, true);
+ opt_set(opts, degraded, true);
+ opt_set(opts, errors, BCH_ON_ERROR_continue);
+ opt_set(opts, fix_errors, FSCK_FIX_no);
+
+ while ((opt = getopt_long(argc, argv, "o:fvh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'o':
+ out = optarg;
+ break;
+ case 'f':
+ force = true;
+ break;
+ case 'j':
+ entire_journal = false;
+ break;
+ case 'v':
+ opt_set(opts, verbose, true);
+ break;
+ case 'h':
+ dump_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ if (!out)
+ die("Please supply output filename");
+
+ if (!argc)
+ die("Please supply device(s) to check");
+
+ struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+ if (IS_ERR(c))
+ die("error opening devices: %s", bch2_err_str(PTR_ERR(c)));
+
+ down_read(&c->gc_lock);
+
+ for_each_online_member(c, ca)
+ nr_devices++;
+
+ BUG_ON(!nr_devices);
+
+ for_each_online_member(c, ca) {
+ int flags = O_WRONLY|O_CREAT|O_TRUNC;
+
+ if (!force)
+ flags |= O_EXCL;
+
+ char *path = nr_devices > 1
+ ? mprintf("%s.%u.qcow2", out, ca->dev_idx)
+ : mprintf("%s.qcow2", out);
+ fd = xopen(path, flags, 0600);
+ free(path);
+
+ dump_one_device(c, ca, fd, entire_journal);
+ close(fd);
+ }
+
+ up_read(&c->gc_lock);
+
+ bch2_fs_stop(c);
+ return 0;
+}
diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c
new file mode 100644
index 00000000..45c44e32
--- /dev/null
+++ b/c_src/cmd_format.c
@@ -0,0 +1,393 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ * Gabriel de Perthuis <g2p.code@gmail.com>
+ * Jacob Malevich <jam@datera.io>
+ *
+ * GPLv2
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "crypto.h"
+#include "libbcachefs/darray.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+#include "libbcachefs/util.h"
+
+#define OPTS \
+x(0, replicas, required_argument) \
+x(0, encrypted, no_argument) \
+x(0, no_passphrase, no_argument) \
+x('L', fs_label, required_argument) \
+x('U', uuid, required_argument) \
+x(0, fs_size, required_argument) \
+x(0, superblock_size, required_argument) \
+x(0, bucket_size, required_argument) \
+x('l', label, required_argument) \
+x(0, discard, no_argument) \
+x(0, data_allowed, required_argument) \
+x(0, durability, required_argument) \
+x(0, version, required_argument) \
+x(0, no_initialize, no_argument) \
+x('f', force, no_argument) \
+x('q', quiet, no_argument) \
+x('v', verbose, no_argument) \
+x('h', help, no_argument)
+
+static void usage(void)
+{
+ puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
+ "Usage: bcachefs format [OPTION]... <devices>\n"
+ "\n"
+ "Options:");
+
+ bch2_opts_usage(OPT_FORMAT);
+
+ puts(
+ " --replicas=# Sets both data and metadata replicas\n"
+ " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
+ " --no_passphrase Don't encrypt master encryption key\n"
+ " -L, --fs_label=label\n"
+ " -U, --uuid=uuid\n"
+ " --superblock_size=size\n"
+ "\n"
+ "Device specific options:");
+
+ bch2_opts_usage(OPT_DEVICE);
+
+ puts(" -l, --label=label Disk label\n"
+ "\n"
+ " -f, --force\n"
+ " -q, --quiet Only print errors\n"
+ " -v, --verbose Verbose filesystem initialization\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Device specific options must come before corresponding devices, e.g.\n"
+ " bcachefs format --label cache /dev/sdb /dev/sdc\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+enum {
+ O_no_opt = 1,
+#define x(shortopt, longopt, arg) O_##longopt,
+ OPTS
+#undef x
+};
+
+#define x(shortopt, longopt, arg) { \
+ .name = #longopt, \
+ .has_arg = arg, \
+ .flag = NULL, \
+ .val = O_##longopt, \
+},
+static const struct option format_opts[] = {
+ OPTS
+ { NULL }
+};
+#undef x
+
+u64 read_flag_list_or_die(char *opt, const char * const list[],
+ const char *msg)
+{
+ u64 v = bch2_read_flag_list(opt, list);
+ if (v == (u64) -1)
+ die("Bad %s %s", msg, opt);
+
+ return v;
+}
+
+int cmd_format(int argc, char *argv[])
+{
+ DARRAY(struct dev_opts) devices = { 0 };
+ DARRAY(char *) device_paths = { 0 };
+ struct format_opts opts = format_opts_default();
+ struct dev_opts dev_opts = dev_opts_default();
+ bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
+ bool unconsumed_dev_option = false;
+ unsigned v;
+ int opt;
+
+ struct bch_opt_strs fs_opt_strs =
+ bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+ struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+ while ((opt = getopt_long(argc, argv,
+ "-L:U:g:fqhv",
+ format_opts,
+ NULL)) != -1)
+ switch (opt) {
+ case O_replicas:
+ if (kstrtouint(optarg, 10, &v) ||
+ !v ||
+ v > BCH_REPLICAS_MAX)
+ die("invalid replicas");
+
+ opt_set(fs_opts, metadata_replicas, v);
+ opt_set(fs_opts, data_replicas, v);
+ break;
+ case O_encrypted:
+ opts.encrypted = true;
+ break;
+ case O_no_passphrase:
+ no_passphrase = true;
+ break;
+ case O_fs_label:
+ case 'L':
+ opts.label = optarg;
+ break;
+ case O_uuid:
+ case 'U':
+ if (uuid_parse(optarg, opts.uuid.b))
+ die("Bad uuid");
+ break;
+ case O_force:
+ case 'f':
+ force = true;
+ break;
+ case O_fs_size:
+ if (bch2_strtoull_h(optarg, &dev_opts.size))
+ die("invalid filesystem size");
+ unconsumed_dev_option = true;
+ break;
+ case O_superblock_size:
+ if (bch2_strtouint_h(optarg, &opts.superblock_size))
+ die("invalid filesystem size");
+
+ opts.superblock_size >>= 9;
+ break;
+ case O_bucket_size:
+ if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
+ die("bad bucket_size %s", optarg);
+ unconsumed_dev_option = true;
+ break;
+ case O_label:
+ case 'l':
+ dev_opts.label = optarg;
+ unconsumed_dev_option = true;
+ break;
+ case O_discard:
+ dev_opts.discard = true;
+ unconsumed_dev_option = true;
+ break;
+ case O_data_allowed:
+ dev_opts.data_allowed =
+ read_flag_list_or_die(optarg,
+ bch2_data_types, "data type");
+ unconsumed_dev_option = true;
+ break;
+ case O_durability:
+ if (kstrtouint(optarg, 10, &dev_opts.durability) ||
+ dev_opts.durability > BCH_REPLICAS_MAX)
+ die("invalid durability");
+ unconsumed_dev_option = true;
+ break;
+ case O_version:
+ if (kstrtouint(optarg, 10, &opts.version))
+ die("invalid version");
+ break;
+ case O_no_initialize:
+ initialize = false;
+ break;
+ case O_no_opt:
+ darray_push(&device_paths, optarg);
+ dev_opts.path = optarg;
+ darray_push(&devices, dev_opts);
+ dev_opts.size = 0;
+ unconsumed_dev_option = false;
+ break;
+ case O_quiet:
+ case 'q':
+ quiet = true;
+ break;
+ case 'v':
+ verbose = true;
+ case O_help:
+ case 'h':
+ usage();
+ exit(EXIT_SUCCESS);
+ break;
+ case '?':
+ exit(EXIT_FAILURE);
+ break;
+ }
+
+ if (unconsumed_dev_option)
+ die("Options for devices apply to subsequent devices; got a device option with no device");
+
+ if (opts.version != bcachefs_metadata_version_current)
+ initialize = false;
+
+ if (!devices.nr)
+ die("Please supply a device");
+
+ if (opts.encrypted && !no_passphrase) {
+ opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+ initialize = false;
+ }
+
+ darray_for_each(devices, dev) {
+ int ret = open_for_format(dev, force);
+ if (ret)
+ die("Error opening %s: %s", dev_opts.path, strerror(-ret));
+ }
+
+ struct bch_sb *sb =
+ bch2_format(fs_opt_strs,
+ fs_opts,
+ opts,
+ devices.data, devices.nr);
+ bch2_opt_strs_free(&fs_opt_strs);
+
+ if (!quiet) {
+ struct printbuf buf = PRINTBUF;
+
+ buf.human_readable_units = true;
+
+ bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members_v2);
+ printf("%s", buf.buf);
+
+ printbuf_exit(&buf);
+ }
+ free(sb);
+
+ if (opts.passphrase) {
+ memzero_explicit(opts.passphrase, strlen(opts.passphrase));
+ free(opts.passphrase);
+ }
+
+ darray_exit(&devices);
+
+ if (initialize) {
+ struct bch_opts mount_opts = bch2_opts_empty();
+
+
+ opt_set(mount_opts, verbose, verbose);
+
+ /*
+ * Start the filesystem once, to allocate the journal and create
+ * the root directory:
+ */
+ struct bch_fs *c = bch2_fs_open(device_paths.data,
+ device_paths.nr,
+ mount_opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", device_paths.data[0],
+ bch2_err_str(PTR_ERR(c)));
+
+ bch2_fs_stop(c);
+ }
+
+ darray_exit(&device_paths);
+
+ return 0;
+}
+
+static void show_super_usage(void)
+{
+ puts("bcachefs show-super \n"
+ "Usage: bcachefs show-super [OPTION].. device\n"
+ "\n"
+ "Options:\n"
+ " -f, --fields=(fields) list of sections to print\n"
+ " --field-only=fiel) print superblock section only, no header\n"
+ " -l, --layout print superblock layout\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_show_super(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "fields", 1, NULL, 'f' },
+ { "field-only", 1, NULL, 'F' },
+ { "layout", 0, NULL, 'l' },
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ unsigned fields = 0;
+ int field_only = -1;
+ bool print_layout = false;
+ bool print_default_fields = true;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "f:lh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ fields = !strcmp(optarg, "all")
+ ? ~0
+ : read_flag_list_or_die(optarg,
+ bch2_sb_fields, "superblock field");
+ print_default_fields = false;
+ break;
+ case 'F':
+ field_only = read_string_list_or_die(optarg,
+ bch2_sb_fields, "superblock field");
+ print_default_fields = false;
+ break;
+ case 'l':
+ print_layout = true;
+ break;
+ case 'h':
+ show_super_usage();
+ break;
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("please supply a device");
+ if (argc)
+ die("too many arguments");
+
+ struct bch_opts opts = bch2_opts_empty();
+
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(dev, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
+
+ if (print_default_fields) {
+ fields |= bch2_sb_field_get(sb.sb, members_v2)
+ ? 1 << BCH_SB_FIELD_members_v2
+ : 1 << BCH_SB_FIELD_members_v1;
+ fields |= 1 << BCH_SB_FIELD_errors;
+ }
+
+ struct printbuf buf = PRINTBUF;
+
+ buf.human_readable_units = true;
+
+ if (field_only >= 0) {
+ struct bch_sb_field *f = bch2_sb_field_get_id(sb.sb, field_only);
+
+ if (f)
+ __bch2_sb_field_to_text(&buf, sb.sb, f);
+ } else {
+ bch2_sb_to_text(&buf, sb.sb, print_layout, fields);
+ }
+ printf("%s", buf.buf);
+
+ bch2_free_super(&sb);
+ printbuf_exit(&buf);
+ return 0;
+}
diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c
new file mode 100644
index 00000000..67c38af6
--- /dev/null
+++ b/c_src/cmd_fs.c
@@ -0,0 +1,348 @@
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include <uuid/uuid.h>
+
+#include "linux/sort.h"
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/darray.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+static void __dev_usage_type_to_text(struct printbuf *out,
+ const char *type,
+ unsigned bucket_size,
+ u64 buckets, u64 sectors, u64 frag)
+{
+ prt_printf(out, "%s:", type);
+ prt_tab(out);
+
+ prt_units_u64(out, sectors << 9);
+ prt_tab_rjust(out);
+
+ prt_printf(out, "%llu", buckets);
+ prt_tab_rjust(out);
+
+ if (frag) {
+ prt_units_u64(out, frag << 9);
+ prt_tab_rjust(out);
+ }
+ prt_newline(out);
+}
+
+static void dev_usage_type_to_text(struct printbuf *out,
+ struct bch_ioctl_dev_usage_v2 *u,
+ enum bch_data_type type)
+{
+ u64 sectors = 0;
+ switch (type) {
+ case BCH_DATA_free:
+ case BCH_DATA_need_discard:
+ case BCH_DATA_need_gc_gens:
+ /* sectors are 0 for these types so calculate sectors for them */
+ sectors = u->d[type].buckets * u->bucket_size;
+ break;
+ default:
+ sectors = u->d[type].sectors;
+ }
+
+ __dev_usage_type_to_text(out, bch2_data_types[type],
+ u->bucket_size,
+ u->d[type].buckets,
+ sectors,
+ u->d[type].fragmented);
+}
+
+static void dev_usage_to_text(struct printbuf *out,
+ struct bchfs_handle fs,
+ struct dev_name *d)
+{
+ struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, d->idx);
+
+ prt_newline(out);
+ prt_printf(out, "%s (device %u):", d->label ?: "(no label)", d->idx);
+ prt_tab(out);
+ prt_str(out, d->dev ?: "(device not found)");
+ prt_tab_rjust(out);
+
+ prt_str(out, bch2_member_states[u->state]);
+ prt_tab_rjust(out);
+
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ prt_tab(out);
+
+ prt_str(out, "data");
+ prt_tab_rjust(out);
+
+ prt_str(out, "buckets");
+ prt_tab_rjust(out);
+
+ prt_str(out, "fragmented");
+ prt_tab_rjust(out);
+
+ prt_newline(out);
+
+ for (unsigned i = 0; i < u->nr_data_types; i++)
+ dev_usage_type_to_text(out, u, i);
+
+ prt_str(out, "capacity:");
+ prt_tab(out);
+
+ prt_units_u64(out, (u->nr_buckets * u->bucket_size) << 9);
+ prt_tab_rjust(out);
+ prt_printf(out, "%llu", u->nr_buckets);
+ prt_tab_rjust(out);
+
+ printbuf_indent_sub(out, 2);
+
+ prt_newline(out);
+ free(u);
+}
+
+static int dev_by_label_cmp(const void *_l, const void *_r)
+{
+ const struct dev_name *l = _l, *r = _r;
+
+ return (l->label && r->label
+ ? strcmp(l->label, r->label) : 0) ?:
+ (l->dev && r->dev
+ ? strcmp(l->dev, r->dev) : 0) ?:
+ cmp_int(l->idx, r->idx);
+}
+
+static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
+{
+ darray_for_each(*dev_names, dev)
+ if (dev->idx == idx)
+ return dev;
+ return NULL;
+}
+
+static void replicas_usage_to_text(struct printbuf *out,
+ const struct bch_replicas_usage *r,
+ dev_names *dev_names)
+{
+ if (!r->sectors)
+ return;
+
+ char devs[4096], *d = devs;
+ *d++ = '[';
+
+ unsigned durability = 0;
+
+ for (unsigned i = 0; i < r->r.nr_devs; i++) {
+ unsigned dev_idx = r->r.devs[i];
+ struct dev_name *dev = dev_idx_to_name(dev_names, dev_idx);
+
+ durability += dev->durability;
+
+ if (i)
+ *d++ = ' ';
+
+ d += dev && dev->dev
+ ? sprintf(d, "%s", dev->dev)
+ : sprintf(d, "%u", dev_idx);
+ }
+ *d++ = ']';
+ *d++ = '\0';
+
+ prt_printf(out, "%s: ", bch2_data_types[r->r.data_type]);
+ prt_tab(out);
+
+ prt_printf(out, "%u/%u ", r->r.nr_required, r->r.nr_devs);
+ prt_tab(out);
+
+ prt_printf(out, "%u ", durability);
+ prt_tab(out);
+
+ prt_printf(out, "%s ", devs);
+ prt_tab(out);
+
+ prt_units_u64(out, r->sectors << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+}
+
+#define for_each_usage_replica(_u, _r) \
+ for (_r = (_u)->replicas; \
+ _r != (void *) (_u)->replicas + (_u)->replica_entries_bytes;\
+ _r = replicas_usage_next(_r), \
+ BUG_ON((void *) _r > (void *) (_u)->replicas + (_u)->replica_entries_bytes))
+
+static void fs_usage_to_text(struct printbuf *out, const char *path)
+{
+ unsigned i;
+
+ struct bchfs_handle fs = bcache_fs_open(path);
+
+ dev_names dev_names = bchu_fs_get_devices(fs);
+
+ struct bch_ioctl_fs_usage *u = bchu_fs_usage(fs);
+
+ prt_str(out, "Filesystem: ");
+ pr_uuid(out, fs.uuid.b);
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
+
+ prt_str(out, "Size:");
+ prt_tab(out);
+ prt_units_u64(out, u->capacity << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_str(out, "Used:");
+ prt_tab(out);
+ prt_units_u64(out, u->used << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_str(out, "Online reserved:");
+ prt_tab(out);
+ prt_units_u64(out, u->online_reserved << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+
+ printbuf_tabstop_push(out, 16);
+ prt_str(out, "Data type");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 16);
+ prt_str(out, "Required/total");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 14);
+ prt_str(out, "Durability");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 14);
+ prt_str(out, "Devices");
+ prt_newline(out);
+
+ printbuf_tabstop_push(out, 14);
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+ if (!u->persistent_reserved[i])
+ continue;
+
+ prt_str(out, "reserved:");
+ prt_tab(out);
+ prt_printf(out, "%u/%u ", 1, i);
+ prt_tab(out);
+ prt_str(out, "[] ");
+ prt_units_u64(out, u->persistent_reserved[i] << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ struct bch_replicas_usage *r;
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type < BCH_DATA_user)
+ replicas_usage_to_text(out, r, &dev_names);
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type == BCH_DATA_user &&
+ r->r.nr_required <= 1)
+ replicas_usage_to_text(out, r, &dev_names);
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type == BCH_DATA_user &&
+ r->r.nr_required > 1)
+ replicas_usage_to_text(out, r, &dev_names);
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type > BCH_DATA_user)
+ replicas_usage_to_text(out, r, &dev_names);
+
+ free(u);
+
+ sort(dev_names.data, dev_names.nr,
+ sizeof(dev_names.data[0]), dev_by_label_cmp, NULL);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 14);
+
+ darray_for_each(dev_names, dev)
+ dev_usage_to_text(out, fs, dev);
+
+ darray_for_each(dev_names, dev) {
+ free(dev->dev);
+ free(dev->label);
+ }
+ darray_exit(&dev_names);
+
+ bcache_fs_close(fs);
+}
+
+static void fs_usage_usage(void)
+{
+ puts("bcachefs fs usage - display detailed filesystem usage\n"
+ "Usage: bcachefs fs usage [OPTION]... <mountpoint>\n"
+ "\n"
+ "Options:\n"
+ " -h, --human-readable Human readable units\n"
+ " -H, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_fs_usage(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'H' },
+ { "human-readable", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ bool human_readable = false;
+ struct printbuf buf = PRINTBUF;
+ char *fs;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ human_readable = true;
+ break;
+ case 'H':
+ fs_usage_usage();
+ exit(EXIT_SUCCESS);
+ default:
+ fs_usage_usage();
+ exit(EXIT_FAILURE);
+ }
+ args_shift(optind);
+
+ if (!argc) {
+ printbuf_reset(&buf);
+ buf.human_readable_units = human_readable;
+ fs_usage_to_text(&buf, ".");
+ printf("%s", buf.buf);
+ } else {
+ while ((fs = arg_pop())) {
+ printbuf_reset(&buf);
+ buf.human_readable_units = human_readable;
+ fs_usage_to_text(&buf, fs);
+ printf("%s", buf.buf);
+ }
+ }
+
+ printbuf_exit(&buf);
+ return 0;
+}
diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c
new file mode 100644
index 00000000..262ac1bc
--- /dev/null
+++ b/c_src/cmd_fsck.c
@@ -0,0 +1,208 @@
+
+#include <getopt.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "cmds.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs.h"
+#include "libbcachefs/super.h"
+#include "tools-util.h"
+
+static void fsck_usage(void)
+{
+ puts("bcachefs fsck - filesystem check and repair\n"
+ "Usage: bcachefs fsck [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -p Automatic repair (no questions)\n"
+ " -n Don't repair, only check for errors\n"
+ " -y Assume \"yes\" to all questions\n"
+ " -f Force checking even if filesystem is marked clean\n"
+ " -r, --ratelimit_errors Don't display more than 10 errors of a given type\n"
+ " -R, --reconstruct_alloc Reconstruct the alloc btree\n"
+ " -k, --kernel Use the in-kernel fsck implementation\n"
+ " -v Be verbose\n"
+ " -h, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static void setnonblocking(int fd)
+{
+ int flags = fcntl(fd, F_GETFL);
+ if (fcntl(fd, F_SETFL, flags|O_NONBLOCK))
+ die("fcntl error: %m");
+}
+
+static int do_splice(int rfd, int wfd)
+{
+ char buf[4096], *b = buf;
+
+ int r = read(rfd, buf, sizeof(buf));
+ if (r < 0 && errno == EAGAIN)
+ return 0;
+ if (r < 0)
+ return r;
+ if (!r)
+ return 1;
+ do {
+ ssize_t w = write(wfd, b, r);
+ if (w < 0)
+ die("%s: write error: %m", __func__);
+ r -= w;
+ b += w;
+ } while (r);
+ return 0;
+}
+
+static int splice_fd_to_stdinout(int fd)
+{
+ setnonblocking(STDIN_FILENO);
+ setnonblocking(fd);
+
+ while (true) {
+ fd_set fds;
+
+ FD_ZERO(&fds);
+ FD_SET(STDIN_FILENO, &fds);
+ FD_SET(fd, &fds);
+
+ select(fd + 1, &fds, NULL, NULL, NULL);
+
+ int r = do_splice(fd, STDOUT_FILENO) ?:
+ do_splice(STDIN_FILENO, fd);
+ if (r)
+ return r < 0 ? r : 0;
+ }
+
+ return 0;
+}
+
+static int fsck_online(const char *dev_path)
+{
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
+
+ struct bch_ioctl_fsck_online fsck = { 0 };
+
+ int fsck_fd = ioctl(fs.ioctl_fd, BCH_IOCTL_FSCK_ONLINE, &fsck);
+ if (fsck_fd < 0)
+ die("BCH_IOCTL_FSCK_ONLINE error: %s", bch2_err_str(fsck_fd));
+
+ return splice_fd_to_stdinout(fsck_fd);
+}
+
+static void append_opt(struct printbuf *out, const char *opt)
+{
+ if (out->pos)
+ prt_char(out, ',');
+ prt_str(out, opt);
+}
+
+int cmd_fsck(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "ratelimit_errors", no_argument, NULL, 'r' },
+ { "reconstruct_alloc", no_argument, NULL, 'R' },
+ { "kernel", no_argument, NULL, 'k' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ bool kernel = false;
+ int opt, ret = 0;
+ struct printbuf opts_str = PRINTBUF;
+
+ append_opt(&opts_str, "degraded");
+ append_opt(&opts_str, "fsck");
+ append_opt(&opts_str, "fix_errors=ask");
+ append_opt(&opts_str, "read_only");
+
+ while ((opt = getopt_long(argc, argv,
+ "apynfo:rRkvh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'a': /* outdated alias for -p */
+ case 'p':
+ case 'y':
+ append_opt(&opts_str, "fix_errors=yes");
+ break;
+ case 'n':
+ append_opt(&opts_str, "nochanges");
+ append_opt(&opts_str, "fix_errors=no");
+ break;
+ case 'f':
+ /* force check, even if filesystem marked clean: */
+ break;
+ case 'o':
+ append_opt(&opts_str, optarg);
+ break;
+ case 'r':
+ append_opt(&opts_str, "ratelimit_errors");
+ break;
+ case 'R':
+ append_opt(&opts_str, "reconstruct_alloc");
+ break;
+ case 'k':
+ kernel = true;
+ break;
+ case 'v':
+ append_opt(&opts_str, "verbose");
+ break;
+ case 'h':
+ fsck_usage();
+ exit(16);
+ }
+ args_shift(optind);
+
+ if (!argc) {
+ fprintf(stderr, "Please supply device(s) to check\n");
+ exit(8);
+ }
+
+ darray_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ if (!kernel) {
+ struct bch_opts opts = bch2_opts_empty();
+ ret = bch2_parse_mount_opts(NULL, &opts, opts_str.buf);
+ if (ret)
+ return ret;
+
+ darray_for_each(devs, i)
+ if (dev_mounted(*i))
+ return fsck_online(*i);
+
+ struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ if (IS_ERR(c))
+ exit(8);
+
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ fprintf(stderr, "%s: errors fixed\n", c->name);
+ ret |= 1;
+ }
+ if (test_bit(BCH_FS_error, &c->flags)) {
+ fprintf(stderr, "%s: still has errors\n", c->name);
+ ret |= 4;
+ }
+
+ bch2_fs_stop(c);
+ } else {
+ struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) +
+ sizeof(u64) * devs.nr, 1);
+
+ fsck->opts = (unsigned long)opts_str.buf;
+ darray_for_each(devs, i)
+ fsck->devs[i - devs.data] = (unsigned long) *i;
+ fsck->nr_devs = devs.nr;
+
+ int ctl_fd = bcachectl_open();
+
+ int fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck);
+ if (fsck_fd < 0)
+ die("BCH_IOCTL_FSCK_OFFLINE error: %s", bch2_err_str(fsck_fd));
+
+ ret = splice_fd_to_stdinout(fsck_fd);
+ free(fsck);
+ }
+
+ printbuf_exit(&opts_str);
+ return ret;
+}
diff --git a/c_src/cmd_fusemount.c b/c_src/cmd_fusemount.c
new file mode 100644
index 00000000..d81f3188
--- /dev/null
+++ b/c_src/cmd_fusemount.c
@@ -0,0 +1,1303 @@
+#ifdef BCACHEFS_FUSE
+
+#include <errno.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/statvfs.h>
+
+#include <fuse_lowlevel.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/io_read.h"
+#include "libbcachefs/io_write.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super.h"
+
+/* mode_to_type(): */
+#include "libbcachefs/fs.h"
+
+#include <linux/dcache.h>
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+/* used by write_aligned function for waiting on bch2_write closure */
+struct write_aligned_op_t {
+ struct closure cl;
+
+ /* must be last: */
+ struct bch_write_op op;
+};
+
+
+static inline subvol_inum map_root_ino(u64 ino)
+{
+ return (subvol_inum) { 1, ino == 1 ? 4096 : ino };
+}
+
+static inline u64 unmap_root_ino(u64 ino)
+{
+ return ino == 4096 ? 1 : ino;
+}
+
+static struct stat inode_to_stat(struct bch_fs *c,
+ struct bch_inode_unpacked *bi)
+{
+ return (struct stat) {
+ .st_ino = unmap_root_ino(bi->bi_inum),
+ .st_size = bi->bi_size,
+ .st_mode = bi->bi_mode,
+ .st_uid = bi->bi_uid,
+ .st_gid = bi->bi_gid,
+ .st_nlink = bch2_inode_nlink_get(bi),
+ .st_rdev = bi->bi_dev,
+ .st_blksize = block_bytes(c),
+ .st_blocks = bi->bi_sectors,
+ .st_atim = bch2_time_to_timespec(c, bi->bi_atime),
+ .st_mtim = bch2_time_to_timespec(c, bi->bi_mtime),
+ .st_ctim = bch2_time_to_timespec(c, bi->bi_ctime),
+ };
+}
+
+static struct fuse_entry_param inode_to_entry(struct bch_fs *c,
+ struct bch_inode_unpacked *bi)
+{
+ return (struct fuse_entry_param) {
+ .ino = unmap_root_ino(bi->bi_inum),
+ .generation = bi->bi_generation,
+ .attr = inode_to_stat(c, bi),
+ .attr_timeout = DBL_MAX,
+ .entry_timeout = DBL_MAX,
+ };
+}
+
+static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn)
+{
+ if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
+ fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n");
+ conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+ } else
+ fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n");
+
+ //conn->want |= FUSE_CAP_POSIX_ACL;
+}
+
+static void bcachefs_fuse_destroy(void *arg)
+{
+ struct bch_fs *c = arg;
+
+ bch2_fs_stop(c);
+}
+
+static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked bi;
+ struct qstr qstr = QSTR(name);
+ subvol_inum inum;
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n",
+ dir.inum, name);
+
+ ret = bch2_inode_find_by_inum(c, dir, &bi);
+ if (ret) {
+ fuse_reply_err(req, -ret);
+ return;
+ }
+
+ struct bch_hash_info hash_info = bch2_hash_info_init(c, &bi);
+
+ ret = bch2_dirent_lookup(c, dir, &hash_info, &qstr, &inum);
+ if (ret) {
+ struct fuse_entry_param e = {
+ .attr_timeout = DBL_MAX,
+ .entry_timeout = DBL_MAX,
+ };
+ fuse_reply_entry(req, &e);
+ return;
+ }
+
+ ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret)
+ goto err;
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n",
+ bi.bi_inum);
+
+ struct fuse_entry_param e = inode_to_entry(c, &bi);
+ fuse_reply_entry(req, &e);
+ return;
+err:
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret);
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_getattr(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked bi;
+ struct stat attr;
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n", inum.inum);
+
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret) {
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret);
+ fuse_reply_err(req, -ret);
+ return;
+ }
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n");
+
+ attr = inode_to_stat(c, &bi);
+ fuse_reply_attr(req, &attr, DBL_MAX);
+}
+
+static void bcachefs_fuse_setattr(fuse_req_t req, fuse_ino_t ino,
+ struct stat *attr, int to_set,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked inode_u;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ u64 now;
+ int ret;
+
+ subvol_inum inum = map_root_ino(ino);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_setattr(%llu, %x)\n", inum.inum, to_set);
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+ now = bch2_current_time(c);
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ if (to_set & FUSE_SET_ATTR_MODE)
+ inode_u.bi_mode = attr->st_mode;
+ if (to_set & FUSE_SET_ATTR_UID)
+ inode_u.bi_uid = attr->st_uid;
+ if (to_set & FUSE_SET_ATTR_GID)
+ inode_u.bi_gid = attr->st_gid;
+ if (to_set & FUSE_SET_ATTR_SIZE)
+ inode_u.bi_size = attr->st_size;
+ if (to_set & FUSE_SET_ATTR_ATIME)
+ inode_u.bi_atime = timespec_to_bch2_time(c, attr->st_atim);
+ if (to_set & FUSE_SET_ATTR_MTIME)
+ inode_u.bi_mtime = timespec_to_bch2_time(c, attr->st_mtim);
+ if (to_set & FUSE_SET_ATTR_ATIME_NOW)
+ inode_u.bi_atime = now;
+ if (to_set & FUSE_SET_ATTR_MTIME_NOW)
+ inode_u.bi_mtime = now;
+ /* TODO: CTIME? */
+
+ ret = bch2_inode_write(trans, &iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_trans_put(trans);
+
+ if (!ret) {
+ *attr = inode_to_stat(c, &inode_u);
+ fuse_reply_attr(req, attr, DBL_MAX);
+ } else {
+ fuse_reply_err(req, -ret);
+ }
+}
+
+static int do_create(struct bch_fs *c, subvol_inum dir,
+ const char *name, mode_t mode, dev_t rdev,
+ struct bch_inode_unpacked *new_inode)
+{
+ struct qstr qstr = QSTR(name);
+ struct bch_inode_unpacked dir_u;
+ uid_t uid = 0;
+ gid_t gid = 0;
+
+ bch2_inode_init_early(c, new_inode);
+
+ return bch2_trans_do(c, NULL, NULL, 0,
+ bch2_create_trans(trans,
+ dir, &dir_u,
+ new_inode, &qstr,
+ uid, gid, mode, rdev, NULL, NULL,
+ (subvol_inum) { 0 }, 0));
+}
+
+static void bcachefs_fuse_mknod(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name, mode_t mode,
+ dev_t rdev)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mknod(%llu, %s, %x, %x)\n",
+ dir.inum, name, mode, rdev);
+
+ ret = do_create(c, dir, name, mode, rdev, &new_inode);
+ if (ret)
+ goto err;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_entry(req, &e);
+ return;
+err:
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir,
+ const char *name, mode_t mode)
+{
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mkdir(%llu, %s, %x)\n",
+ dir, name, mode);
+
+ BUG_ON(mode & S_IFMT);
+
+ mode |= S_IFDIR;
+ bcachefs_fuse_mknod(req, dir, name, mode, 0);
+}
+
+static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked dir_u, inode_u;
+ struct qstr qstr = QSTR(name);
+ subvol_inum dir = map_root_ino(dir_ino);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir.inum, name);
+
+ int ret = bch2_trans_do(c, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_unlink_trans(trans, dir, &dir_u,
+ &inode_u, &qstr, false));
+
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_rmdir(fuse_req_t req, fuse_ino_t dir,
+ const char *name)
+{
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_rmdir(%llu, %s)\n", dir, name);
+
+ bcachefs_fuse_unlink(req, dir, name);
+}
+
+static void bcachefs_fuse_rename(fuse_req_t req,
+ fuse_ino_t src_dir_ino, const char *srcname,
+ fuse_ino_t dst_dir_ino, const char *dstname,
+ unsigned flags)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked dst_dir_u, src_dir_u;
+ struct bch_inode_unpacked src_inode_u, dst_inode_u;
+ struct qstr dst_name = QSTR(srcname);
+ struct qstr src_name = QSTR(dstname);
+ subvol_inum src_dir = map_root_ino(src_dir_ino);
+ subvol_inum dst_dir = map_root_ino(dst_dir_ino);
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG,
+ "bcachefs_fuse_rename(%llu, %s, %llu, %s, %x)\n",
+ src_dir.inum, srcname, dst_dir.inum, dstname, flags);
+
+ /* XXX handle overwrites */
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_rename_trans(trans,
+ src_dir, &src_dir_u,
+ dst_dir, &dst_dir_u,
+ &src_inode_u, &dst_inode_u,
+ &src_name, &dst_name,
+ BCH_RENAME));
+
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t ino,
+ fuse_ino_t newparent_ino, const char *newname)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked dir_u, inode_u;
+ struct qstr qstr = QSTR(newname);
+ subvol_inum newparent = map_root_ino(newparent_ino);
+ subvol_inum inum = map_root_ino(ino);
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n",
+ inum.inum, newparent.inum, newname);
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_link_trans(trans, newparent, &dir_u,
+ inum, &inode_u, &qstr));
+
+ if (!ret) {
+ struct fuse_entry_param e = inode_to_entry(c, &inode_u);
+ fuse_reply_entry(req, &e);
+ } else {
+ fuse_reply_err(req, -ret);
+ }
+}
+
+static void bcachefs_fuse_open(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ fi->direct_io = false;
+ fi->keep_cache = true;
+ fi->cache_readdir = true;
+
+ fuse_reply_open(req, fi);
+}
+
+static void userbio_init(struct bio *bio, struct bio_vec *bv,
+ void *buf, size_t size)
+{
+ bio_init(bio, NULL, bv, 1, 0);
+ bio->bi_iter.bi_size = size;
+ bv->bv_page = buf;
+ bv->bv_len = size;
+ bv->bv_offset = 0;
+}
+
+static int get_inode_io_opts(struct bch_fs *c, subvol_inum inum, struct bch_io_opts *opts)
+{
+ struct bch_inode_unpacked inode;
+ if (bch2_inode_find_by_inum(c, inum, &inode))
+ return -EINVAL;
+
+ bch2_inode_opts_get(opts, c, &inode);
+ return 0;
+}
+
+static void bcachefs_fuse_read_endio(struct bio *bio)
+{
+ closure_put(bio->bi_private);
+}
+
+
+static void bcachefs_fuse_write_endio(struct bch_write_op *op)
+{
+ struct write_aligned_op_t *w = container_of(op,struct write_aligned_op_t,op);
+ closure_put(&w->cl);
+}
+
+
+struct fuse_align_io {
+ off_t start;
+ size_t pad_start;
+ off_t end;
+ size_t pad_end;
+ size_t size;
+};
+
+/* Handle unaligned start and end */
+/* TODO: align to block_bytes, sector size, or page size? */
+static struct fuse_align_io align_io(const struct bch_fs *c, size_t size,
+ off_t offset)
+{
+ struct fuse_align_io align;
+
+ BUG_ON(offset < 0);
+
+ align.start = round_down(offset, block_bytes(c));
+ align.pad_start = offset - align.start;
+
+ off_t end = offset + size;
+ align.end = round_up(end, block_bytes(c));
+ align.pad_end = align.end - end;
+
+ align.size = align.end - align.start;
+
+ return align;
+}
+
+/*
+ * Given an aligned number of bytes transferred, figure out how many unaligned
+ * bytes were transferred.
+ */
+static size_t align_fix_up_bytes(const struct fuse_align_io *align,
+ size_t align_bytes)
+{
+ size_t bytes = 0;
+
+ if (align_bytes > align->pad_start) {
+ bytes = align_bytes - align->pad_start;
+ bytes = bytes > align->pad_end ? bytes - align->pad_end : 0;
+ }
+
+ return bytes;
+}
+
+/*
+ * Read aligned data.
+ */
+static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
+ off_t aligned_offset, void *buf)
+{
+ BUG_ON(aligned_size & (block_bytes(c) - 1));
+ BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+ struct bch_io_opts io_opts;
+ if (get_inode_io_opts(c, inum, &io_opts))
+ return -ENOENT;
+
+ struct bch_read_bio rbio;
+ struct bio_vec bv;
+ userbio_init(&rbio.bio, &bv, buf, aligned_size);
+ bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC);
+ rbio.bio.bi_iter.bi_sector = aligned_offset >> 9;
+
+ struct closure cl;
+ closure_init_stack(&cl);
+
+ closure_get(&cl);
+ rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
+ rbio.bio.bi_private = &cl;
+
+ bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
+
+ closure_sync(&cl);
+
+ return -blk_status_to_errno(rbio.bio.bi_status);
+}
+
+static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t ino,
+ size_t size, off_t offset,
+ struct fuse_file_info *fi)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n",
+ inum, size, offset);
+
+ /* Check inode size. */
+ struct bch_inode_unpacked bi;
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret) {
+ fuse_reply_err(req, -ret);
+ return;
+ }
+
+ off_t end = min_t(u64, bi.bi_size, offset + size);
+ if (end <= offset) {
+ fuse_reply_buf(req, NULL, 0);
+ return;
+ }
+ size = end - offset;
+
+ struct fuse_align_io align = align_io(c, size, offset);
+
+ void *buf = aligned_alloc(PAGE_SIZE, align.size);
+ if (!buf) {
+ fuse_reply_err(req, ENOMEM);
+ return;
+ }
+
+ ret = read_aligned(c, inum, align.size, align.start, buf);
+
+ if (likely(!ret))
+ fuse_reply_buf(req, buf + align.pad_start, size);
+ else
+ fuse_reply_err(req, -ret);
+
+ free(buf);
+}
+
+static int inode_update_times(struct bch_fs *c, subvol_inum inum)
+{
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bch_inode_unpacked inode_u;
+ int ret = 0;
+ u64 now;
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+ now = bch2_current_time(c);
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ inode_u.bi_mtime = now;
+ inode_u.bi_ctime = now;
+
+ ret = bch2_inode_write(trans, &iter, &inode_u);
+ if (ret)
+ goto err;
+
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int write_aligned(struct bch_fs *c, subvol_inum inum,
+ struct bch_io_opts io_opts, void *buf,
+ size_t aligned_size, off_t aligned_offset,
+ off_t new_i_size, size_t *written_out)
+{
+
+ struct write_aligned_op_t w = { 0 }
+;
+ struct bch_write_op *op = &w.op;
+ struct bio_vec bv;
+
+ BUG_ON(aligned_size & (block_bytes(c) - 1));
+ BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+ *written_out = 0;
+
+ closure_init_stack(&w.cl);
+
+ bch2_write_op_init(op, c, io_opts); /* XXX reads from op?! */
+ op->write_point = writepoint_hashed(0);
+ op->nr_replicas = io_opts.data_replicas;
+ op->target = io_opts.foreground_target;
+ op->subvol = inum.subvol;
+ op->pos = POS(inum.inum, aligned_offset >> 9);
+ op->new_i_size = new_i_size;
+ op->end_io = bcachefs_fuse_write_endio;
+
+ userbio_init(&op->wbio.bio, &bv, buf, aligned_size);
+ bio_set_op_attrs(&op->wbio.bio, REQ_OP_WRITE, REQ_SYNC);
+
+ if (bch2_disk_reservation_get(c, &op->res, aligned_size >> 9,
+ op->nr_replicas, 0)) {
+ /* XXX: use check_range_allocated like dio write path */
+ return -ENOSPC;
+ }
+
+ closure_get(&w.cl);
+
+ closure_call(&op->cl, bch2_write, NULL, NULL);
+
+ closure_sync(&w.cl);
+
+ if (!op->error)
+ *written_out = op->written << 9;
+
+ return op->error;
+}
+
+static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t ino,
+ const char *buf, size_t size,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_io_opts io_opts;
+ size_t aligned_written;
+ int ret = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n",
+ inum, size, offset);
+
+ struct fuse_align_io align = align_io(c, size, offset);
+ void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+ BUG_ON(!aligned_buf);
+
+ if (get_inode_io_opts(c, inum, &io_opts)) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ /* Realign the data and read in start and end, if needed */
+
+ /* Read partial start data. */
+ if (align.pad_start) {
+ memset(aligned_buf, 0, block_bytes(c));
+
+ ret = read_aligned(c, inum, block_bytes(c), align.start,
+ aligned_buf);
+ if (ret)
+ goto err;
+ }
+
+ /*
+ * Read partial end data. If the whole write fits in one block, the
+ * start data and the end data are the same so this isn't needed.
+ */
+ if (align.pad_end &&
+ !(align.pad_start && align.size == block_bytes(c))) {
+ off_t partial_end_start = align.end - block_bytes(c);
+ size_t buf_offset = align.size - block_bytes(c);
+
+ memset(aligned_buf + buf_offset, 0, block_bytes(c));
+
+ ret = read_aligned(c, inum, block_bytes(c), partial_end_start,
+ aligned_buf + buf_offset);
+ if (ret)
+ goto err;
+ }
+
+ /* Overlay what we want to write. */
+ memcpy(aligned_buf + align.pad_start, buf, size);
+
+ /* Actually write. */
+ ret = write_aligned(c, inum, io_opts, aligned_buf,
+ align.size, align.start,
+ offset + size, &aligned_written);
+
+ /* Figure out how many unaligned bytes were written. */
+ size_t written = align_fix_up_bytes(&align, aligned_written);
+ BUG_ON(written > size);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n",
+ written);
+
+ if (written > 0)
+ ret = 0;
+
+ /*
+ * Update inode times.
+ * TODO: Integrate with bch2_extent_update()
+ */
+ if (!ret)
+ ret = inode_update_times(c, inum);
+
+ if (!ret) {
+ BUG_ON(written == 0);
+ fuse_reply_write(req, written);
+ free(aligned_buf);
+ return;
+ }
+
+err:
+ fuse_reply_err(req, -ret);
+ free(aligned_buf);
+}
+
+static void bcachefs_fuse_symlink(fuse_req_t req, const char *link,
+ fuse_ino_t dir_ino, const char *name)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ size_t link_len = strlen(link);
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_symlink(%s, %llu, %s)\n",
+ link, dir.inum, name);
+
+ ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode);
+ if (ret)
+ goto err;
+
+ struct bch_io_opts io_opts;
+ ret = get_inode_io_opts(c, dir, &io_opts);
+ if (ret)
+ goto err;
+
+ struct fuse_align_io align = align_io(c, link_len + 1, 0);
+
+ void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+ BUG_ON(!aligned_buf);
+
+ memset(aligned_buf, 0, align.size);
+ memcpy(aligned_buf, link, link_len); /* already terminated */
+
+ subvol_inum inum = (subvol_inum) { dir.subvol, new_inode.bi_inum };
+
+ size_t aligned_written;
+ ret = write_aligned(c, inum, io_opts, aligned_buf,
+ align.size, align.start, link_len + 1,
+ &aligned_written);
+ free(aligned_buf);
+
+ if (ret)
+ goto err;
+
+ size_t written = align_fix_up_bytes(&align, aligned_written);
+ BUG_ON(written != link_len + 1); // TODO: handle short
+
+ ret = inode_update_times(c, inum);
+ if (ret)
+ goto err;
+
+ new_inode.bi_size = written;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_entry(req, &e);
+ return;
+
+err:
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t ino)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ char *buf = NULL;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readlink(%llu)\n", inum.inum);
+
+ struct bch_inode_unpacked bi;
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret)
+ goto err;
+
+ struct fuse_align_io align = align_io(c, bi.bi_size, 0);
+
+ ret = -ENOMEM;
+ buf = aligned_alloc(PAGE_SIZE, align.size);
+ if (!buf)
+ goto err;
+
+ ret = read_aligned(c, inum, align.size, align.start, buf);
+ if (ret)
+ goto err;
+
+ BUG_ON(buf[align.size - 1] != 0);
+
+ fuse_reply_readlink(req, buf);
+
+err:
+ if (ret)
+ fuse_reply_err(req, -ret);
+
+ free(buf);
+}
+
+#if 0
+/*
+ * FUSE flush is essentially the close() call, however it is not guaranteed
+ * that one flush happens per open/create.
+ *
+ * It doesn't have to do anything, and is mostly relevant for NFS-style
+ * filesystems where close has some relationship to caching.
+ */
+static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_release(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fsync(fuse_req_t req, fuse_ino_t inum, int datasync,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_opendir(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+struct fuse_dir_context {
+ struct dir_context ctx;
+ fuse_req_t req;
+ char *buf;
+ size_t bufsize;
+};
+
+struct fuse_dirent {
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
+};
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+static size_t fuse_add_direntry2(char *buf, size_t bufsize,
+ const char *name, int namelen,
+ const struct stat *stbuf, off_t off)
+{
+ size_t entlen = FUSE_NAME_OFFSET + namelen;
+ size_t entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+
+ if ((buf == NULL) || (entlen_padded > bufsize))
+ return entlen_padded;
+
+ dirent->ino = stbuf->st_ino;
+ dirent->off = off;
+ dirent->namelen = namelen;
+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
+ memcpy(dirent->name, name, namelen);
+ memset(dirent->name + namelen, 0, entlen_padded - entlen);
+
+ return entlen_padded;
+}
+
+static int fuse_filldir(struct dir_context *_ctx,
+ const char *name, int namelen,
+ loff_t pos, u64 ino, unsigned type)
+{
+ struct fuse_dir_context *ctx =
+ container_of(_ctx, struct fuse_dir_context, ctx);
+
+ struct stat statbuf = {
+ .st_ino = unmap_root_ino(ino),
+ .st_mode = type << 12,
+ };
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n",
+ name, statbuf.st_ino, pos);
+
+ size_t len = fuse_add_direntry2(ctx->buf,
+ ctx->bufsize,
+ name,
+ namelen,
+ &statbuf,
+ pos + 1);
+
+ if (len > ctx->bufsize)
+ return -1;
+
+ ctx->buf += len;
+ ctx->bufsize -= len;
+
+ return 0;
+}
+
+static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir)
+{
+ if (ctx->ctx.pos == 0) {
+ if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos,
+ dir, DT_DIR) < 0)
+ return false;
+ ctx->ctx.pos = 1;
+ }
+
+ if (ctx->ctx.pos == 1) {
+ if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos,
+ /*TODO: parent*/ 1, DT_DIR) < 0)
+ return false;
+ ctx->ctx.pos = 2;
+ }
+
+ return true;
+}
+
+static void bcachefs_fuse_readdir(fuse_req_t req, fuse_ino_t dir_ino,
+ size_t size, off_t off,
+ struct fuse_file_info *fi)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked bi;
+ char *buf = calloc(size, 1);
+ struct fuse_dir_context ctx = {
+ .ctx.actor = fuse_filldir,
+ .ctx.pos = off,
+ .req = req,
+ .buf = buf,
+ .bufsize = size,
+ };
+ int ret = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir(dir=%llu, size=%zu, "
+ "off=%lld)\n", dir.inum, size, off);
+
+ ret = bch2_inode_find_by_inum(c, dir, &bi);
+ if (ret)
+ goto reply;
+
+ if (!S_ISDIR(bi.bi_mode)) {
+ ret = -ENOTDIR;
+ goto reply;
+ }
+
+ if (!handle_dots(&ctx, dir.inum))
+ goto reply;
+
+ ret = bch2_readdir(c, dir, &ctx.ctx);
+reply:
+ if (!ret) {
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n",
+ ctx.buf - buf);
+ fuse_reply_buf(req, buf, ctx.buf - buf);
+ } else {
+ fuse_reply_err(req, -ret);
+ }
+
+ free(buf);
+}
+
+#if 0
+static void bcachefs_fuse_readdirplus(fuse_req_t req, fuse_ino_t dir,
+ size_t size, off_t off,
+ struct fuse_file_info *fi)
+{
+
+}
+
+static void bcachefs_fuse_releasedir(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fsyncdir(fuse_req_t req, fuse_ino_t inum, int datasync,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static void bcachefs_fuse_statfs(fuse_req_t req, fuse_ino_t inum)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+ unsigned shift = c->block_bits;
+ struct statvfs statbuf = {
+ .f_bsize = block_bytes(c),
+ .f_frsize = block_bytes(c),
+ .f_blocks = usage.capacity >> shift,
+ .f_bfree = (usage.capacity - usage.used) >> shift,
+ //.f_bavail = statbuf.f_bfree,
+ .f_files = usage.nr_inodes,
+ .f_ffree = U64_MAX,
+ .f_namemax = BCH_NAME_MAX,
+ };
+
+ fuse_reply_statfs(req, &statbuf);
+}
+
+#if 0
+static void bcachefs_fuse_setxattr(fuse_req_t req, fuse_ino_t inum,
+ const char *name, const char *value,
+ size_t size, int flags)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_getxattr(fuse_req_t req, fuse_ino_t inum,
+ const char *name, size_t size)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+
+ fuse_reply_xattr(req, );
+}
+
+static void bcachefs_fuse_listxattr(fuse_req_t req, fuse_ino_t inum, size_t size)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_removexattr(fuse_req_t req, fuse_ino_t inum,
+ const char *name)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static void bcachefs_fuse_create(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_create(%llu, %s, %x)\n",
+ dir.inum, name, mode);
+
+ ret = do_create(c, dir, name, mode, 0, &new_inode);
+ if (ret)
+ goto err;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_create(req, &e, fi);
+ return;
+err:
+ fuse_reply_err(req, -ret);
+}
+
+#if 0
+static void bcachefs_fuse_write_buf(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_bufvec *bufv, off_t off,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fallocate(fuse_req_t req, fuse_ino_t inum, int mode,
+ off_t offset, off_t length,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static const struct fuse_lowlevel_ops bcachefs_fuse_ops = {
+ .init = bcachefs_fuse_init,
+ .destroy = bcachefs_fuse_destroy,
+ .lookup = bcachefs_fuse_lookup,
+ .getattr = bcachefs_fuse_getattr,
+ .setattr = bcachefs_fuse_setattr,
+ .readlink = bcachefs_fuse_readlink,
+ .mknod = bcachefs_fuse_mknod,
+ .mkdir = bcachefs_fuse_mkdir,
+ .unlink = bcachefs_fuse_unlink,
+ .rmdir = bcachefs_fuse_rmdir,
+ .symlink = bcachefs_fuse_symlink,
+ .rename = bcachefs_fuse_rename,
+ .link = bcachefs_fuse_link,
+ .open = bcachefs_fuse_open,
+ .read = bcachefs_fuse_read,
+ .write = bcachefs_fuse_write,
+ //.flush = bcachefs_fuse_flush,
+ //.release = bcachefs_fuse_release,
+ //.fsync = bcachefs_fuse_fsync,
+ //.opendir = bcachefs_fuse_opendir,
+ .readdir = bcachefs_fuse_readdir,
+ //.readdirplus = bcachefs_fuse_readdirplus,
+ //.releasedir = bcachefs_fuse_releasedir,
+ //.fsyncdir = bcachefs_fuse_fsyncdir,
+ .statfs = bcachefs_fuse_statfs,
+ //.setxattr = bcachefs_fuse_setxattr,
+ //.getxattr = bcachefs_fuse_getxattr,
+ //.listxattr = bcachefs_fuse_listxattr,
+ //.removexattr = bcachefs_fuse_removexattr,
+ .create = bcachefs_fuse_create,
+
+ /* posix locks: */
+#if 0
+ .getlk = bcachefs_fuse_getlk,
+ .setlk = bcachefs_fuse_setlk,
+#endif
+ //.write_buf = bcachefs_fuse_write_buf,
+ //.fallocate = bcachefs_fuse_fallocate,
+
+};
+
+/*
+ * Setup and command parsing.
+ */
+
+struct bf_context {
+ char *devices_str;
+ char **devices;
+ int nr_devices;
+};
+
+static void bf_context_free(struct bf_context *ctx)
+{
+ int i;
+
+ free(ctx->devices_str);
+ for (i = 0; i < ctx->nr_devices; ++i)
+ free(ctx->devices[i]);
+ free(ctx->devices);
+}
+
+static struct fuse_opt bf_opts[] = {
+ FUSE_OPT_END
+};
+
+/*
+ * Fuse option parsing helper -- returning 0 means we consumed the argument, 1
+ * means we did not.
+ */
+static int bf_opt_proc(void *data, const char *arg, int key,
+ struct fuse_args *outargs)
+{
+ struct bf_context *ctx = data;
+
+ switch (key) {
+ case FUSE_OPT_KEY_NONOPT:
+ /* Just extract the first non-option string. */
+ if (!ctx->devices_str) {
+ ctx->devices_str = strdup(arg);
+ return 0;
+ }
+ return 1;
+ }
+
+ return 1;
+}
+
+/*
+ * dev1:dev2 -> [ dev1, dev2 ]
+ * dev -> [ dev ]
+ */
+static void tokenize_devices(struct bf_context *ctx)
+{
+ char *devices_str = strdup(ctx->devices_str);
+ char *devices_tmp = devices_str;
+ char **devices = NULL;
+ int nr = 0;
+ char *dev = NULL;
+
+ while ((dev = strsep(&devices_tmp, ":"))) {
+ if (strlen(dev) > 0) {
+ devices = realloc(devices, (nr + 1) * sizeof *devices);
+ devices[nr] = strdup(dev);
+ nr++;
+ }
+ }
+
+ if (!devices) {
+ devices = malloc(sizeof *devices);
+ devices[0] = strdup(ctx->devices_str);
+ nr = 1;
+ }
+
+ ctx->devices = devices;
+ ctx->nr_devices = nr;
+
+ free(devices_str);
+}
+
+static void usage(char *argv[])
+{
+ printf("Usage: %s fusemount [options] <dev>[:dev2:...] <mountpoint>\n",
+ argv[0]);
+ printf("\n");
+}
+
+int cmd_fusemount(int argc, char *argv[])
+{
+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
+ struct bch_opts bch_opts = bch2_opts_empty();
+ struct bf_context ctx = { 0 };
+ struct bch_fs *c = NULL;
+ int ret = 0, i;
+
+ /* Parse arguments. */
+ if (fuse_opt_parse(&args, &ctx, bf_opts, bf_opt_proc) < 0)
+ die("fuse_opt_parse err: %m");
+
+ struct fuse_cmdline_opts fuse_opts;
+ if (fuse_parse_cmdline(&args, &fuse_opts) < 0)
+ die("fuse_parse_cmdline err: %m");
+
+ if (fuse_opts.show_help) {
+ usage(argv);
+ fuse_cmdline_help();
+ fuse_lowlevel_help();
+ ret = 0;
+ goto out;
+ }
+ if (fuse_opts.show_version) {
+ /* TODO: Show bcachefs version. */
+ printf("FUSE library version %s\n", fuse_pkgversion());
+ fuse_lowlevel_version();
+ ret = 0;
+ goto out;
+ }
+ if (!fuse_opts.mountpoint) {
+ usage(argv);
+ printf("Please supply a mountpoint.\n");
+ ret = 1;
+ goto out;
+ }
+ if (!ctx.devices_str) {
+ usage(argv);
+ printf("Please specify a device or device1:device2:...\n");
+ ret = 1;
+ goto out;
+ }
+ tokenize_devices(&ctx);
+
+ struct printbuf fsname = PRINTBUF;
+ prt_printf(&fsname, "fsname=");
+ for (i = 0; i < ctx.nr_devices; ++i) {
+ if (i)
+ prt_str(&fsname, ":");
+ prt_str(&fsname, ctx.devices[i]);
+ }
+
+ fuse_opt_add_arg(&args, "-o");
+ fuse_opt_add_arg(&args, fsname.buf);
+
+ /* Open bch */
+ printf("Opening bcachefs filesystem on:\n");
+ for (i = 0; i < ctx.nr_devices; ++i)
+ printf("\t%s\n", ctx.devices[i]);
+
+ c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", ctx.devices_str,
+ bch2_err_str(PTR_ERR(c)));
+
+ /* Fuse */
+ struct fuse_session *se =
+ fuse_session_new(&args, &bcachefs_fuse_ops,
+ sizeof(bcachefs_fuse_ops), c);
+ if (!se)
+ die("fuse_lowlevel_new err: %m");
+
+ if (fuse_set_signal_handlers(se) < 0)
+ die("fuse_set_signal_handlers err: %m");
+
+ if (fuse_session_mount(se, fuse_opts.mountpoint))
+ die("fuse_mount err: %m");
+
+ /* This print statement is a trigger for tests. */
+ printf("Fuse mount initialized.\n");
+
+ if (fuse_opts.foreground == 0){
+ printf("Fuse forcing to foreground mode, due gcc constructors usage.\n");
+ fuse_opts.foreground = 1;
+ }
+
+ fuse_daemonize(fuse_opts.foreground);
+
+ ret = fuse_session_loop(se);
+
+ /* Cleanup */
+ fuse_session_unmount(se);
+ fuse_remove_signal_handlers(se);
+ fuse_session_destroy(se);
+
+out:
+ free(fuse_opts.mountpoint);
+ fuse_opt_free_args(&args);
+ bf_context_free(&ctx);
+
+ return ret ? 1 : 0;
+}
+
+#endif /* BCACHEFS_FUSE */
diff --git a/c_src/cmd_key.c b/c_src/cmd_key.c
new file mode 100644
index 00000000..96206c4c
--- /dev/null
+++ b/c_src/cmd_key.c
@@ -0,0 +1,149 @@
+#include <errno.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "libbcachefs/checksum.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+
+static void unlock_usage(void)
+{
+ puts("bcachefs unlock - unlock an encrypted filesystem so it can be mounted\n"
+ "Usage: bcachefs unlock [OPTION] device\n"
+ "\n"
+ "Options:\n"
+ " -c Check if a device is encrypted\n"
+ " -k (session|user|user_session)\n"
+ " Keyring to add to (default: user)\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_unlock(int argc, char *argv[])
+{
+ const char *keyring = "user";
+ bool check = false;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "ck:h")) != -1)
+ switch (opt) {
+ case 'c':
+ check = true;
+ break;
+ case 'k':
+ keyring = strdup(optarg);
+ break;
+ case 'h':
+ unlock_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ if (argc)
+ die("Too many arguments");
+
+ struct bch_opts opts = bch2_opts_empty();
+
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(dev, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
+
+ if (!bch2_sb_is_encrypted(sb.sb))
+ die("%s is not encrypted", dev);
+
+ if (check)
+ exit(EXIT_SUCCESS);
+
+ char *passphrase = read_passphrase("Enter passphrase: ");
+
+ bch2_add_key(sb.sb, "user", keyring, passphrase);
+
+ bch2_free_super(&sb);
+ memzero_explicit(passphrase, strlen(passphrase));
+ free(passphrase);
+ return 0;
+}
+
+int cmd_set_passphrase(int argc, char *argv[])
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c;
+
+ if (argc < 2)
+ die("Please supply one or more devices");
+
+ opt_set(opts, nostart, true);
+
+ /*
+ * we use bch2_fs_open() here, instead of just reading the superblock,
+ * to make sure we're opening and updating every component device:
+ */
+
+ c = bch2_fs_open(argv + 1, argc - 1, opts);
+ if (IS_ERR(c))
+ die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
+
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ if (!crypt)
+ die("Filesystem does not have encryption enabled");
+
+ struct bch_encrypted_key new_key;
+ new_key.magic = BCH_KEY_MAGIC;
+
+ int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
+ if (ret)
+ die("Error getting current key");
+
+ char *new_passphrase = read_passphrase_twice("Enter new passphrase: ");
+ struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
+
+ if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb),
+ &new_key, sizeof(new_key)))
+ die("error encrypting key");
+ crypt->key = new_key;
+
+ bch2_revoke_key(c->disk_sb.sb);
+ bch2_write_super(c);
+ bch2_fs_stop(c);
+ return 0;
+}
+
+int cmd_remove_passphrase(int argc, char *argv[])
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c;
+
+ if (argc < 2)
+ die("Please supply one or more devices");
+
+ opt_set(opts, nostart, true);
+ c = bch2_fs_open(argv + 1, argc - 1, opts);
+ if (IS_ERR(c))
+ die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
+
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ if (!crypt)
+ die("Filesystem does not have encryption enabled");
+
+ struct bch_encrypted_key new_key;
+ new_key.magic = BCH_KEY_MAGIC;
+
+ int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
+ if (ret)
+ die("Error getting current key");
+
+ crypt->key = new_key;
+
+ bch2_write_super(c);
+ bch2_fs_stop(c);
+ return 0;
+}
diff --git a/c_src/cmd_kill_btree_node.c b/c_src/cmd_kill_btree_node.c
new file mode 100644
index 00000000..b0832843
--- /dev/null
+++ b/c_src/cmd_kill_btree_node.c
@@ -0,0 +1,116 @@
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/sb-members.h"
+#include "libbcachefs/super.h"
+
+static void kill_btree_node_usage(void)
+{
+ puts("bcachefs kill_btree_node - make btree nodes unreadable\n"
+ "Usage: bcachefs kill_btree_node [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -b (extents|inodes|dirents|xattrs) Btree to delete from\n"
+ " -l level Levle to delete from (0 == leaves)\n"
+ " -i index Index of btree node to kill\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_kill_btree_node(int argc, char *argv[])
+{
+ struct bch_opts opts = bch2_opts_empty();
+ enum btree_id btree_id = 0;
+ unsigned level = 0;
+ u64 node_index = 0;
+ int opt;
+
+ opt_set(opts, read_only, true);
+
+ while ((opt = getopt(argc, argv, "b:l:i:h")) != -1)
+ switch (opt) {
+ case 'b':
+ btree_id = read_string_list_or_die(optarg,
+ __bch2_btree_ids, "btree id");
+ break;
+ case 'l':
+ if (kstrtouint(optarg, 10, &level) || level >= BTREE_MAX_DEPTH)
+ die("invalid level");
+ break;
+ case 'i':
+ if (kstrtoull(optarg, 10, &node_index))
+ die("invalid index %s", optarg);
+ break;
+ case 'h':
+ kill_btree_node_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ if (!argc)
+ die("Please supply device(s)");
+
+ struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
+
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct btree *b;
+ int ret;
+ void *zeroes;
+
+ ret = posix_memalign(&zeroes, c->opts.block_size, c->opts.block_size);
+ if (ret)
+ die("error %s from posix_memalign", bch2_err_str(ret));
+
+ __for_each_btree_node(trans, iter, btree_id, POS_MIN, 0, level, 0, b, ret) {
+ if (b->c.level != level)
+ continue;
+
+ if (!node_index) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ bch_info(c, "killing btree node %s", buf.buf);
+ printbuf_exit(&buf);
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ ret = pwrite(ca->disk_sb.bdev->bd_fd, zeroes,
+ c->opts.block_size, ptr->offset << 9);
+ if (ret != c->opts.block_size) {
+ bch_err(c, "pwrite error: expected %u got %i %s",
+ c->opts.block_size, ret, strerror(errno));
+ ret = EXIT_FAILURE;
+ goto done;
+ }
+ }
+ goto done;
+ }
+
+ node_index--;
+ }
+ if (ret)
+ bch_err(c, "error %i walking btree nodes", ret);
+ else
+ bch_err(c, "node at specified index not found");
+ ret = EXIT_FAILURE;
+done:
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+
+ bch2_fs_stop(c);
+ return ret;
+}
diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c
new file mode 100644
index 00000000..279afa79
--- /dev/null
+++ b/c_src/cmd_list_journal.c
@@ -0,0 +1,276 @@
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/journal_io.h"
+#include "libbcachefs/journal_seq_blacklist.h"
+#include "libbcachefs/super.h"
+
+static const char *NORMAL = "\x1B[0m";
+static const char *RED = "\x1B[31m";
+
+static void list_journal_usage(void)
+{
+ puts("bcachefs list_journal - print contents of journal\n"
+ "Usage: bcachefs list_journal [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -a Read entire journal, not just dirty entries\n"
+ " -n, --nr-entries=nr Number of journal entries to print, starting from the most recent\n"
+ " -t, --transaction-filter=bbpos Filter transactions not updating <bbpos>\n"
+ " -k, --key-filter=btree Filter keys not updating btree\n"
+ " -v, --verbose Verbose mode\n"
+ " -h, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static void star_start_of_lines(char *buf)
+{
+ char *p = buf;
+
+ if (*p == ' ')
+ *p = '*';
+
+ while ((p = strstr(p, "\n ")))
+ p[1] = '*';
+}
+
+static inline bool entry_is_transaction_start(struct jset_entry *entry)
+{
+ return entry->type == BCH_JSET_ENTRY_log && !entry->level;
+}
+
+typedef DARRAY(struct bbpos) d_bbpos;
+typedef DARRAY(enum btree_id) d_btree_id;
+
+static bool bkey_matches_filter(d_bbpos filter, struct jset_entry *entry, struct bkey_i *k)
+{
+ darray_for_each(filter, i) {
+ if (i->btree != entry->btree_id)
+ continue;
+
+ if (bkey_eq(i->pos, k->k.p))
+ return true;
+
+ if (btree_node_type_is_extents(i->btree) &&
+ bkey_ge(i->pos, bkey_start_pos(&k->k)) &&
+ bkey_lt(i->pos, k->k.p))
+ return true;
+ }
+ return false;
+}
+
+static bool entry_matches_transaction_filter(struct jset_entry *entry,
+ d_bbpos filter)
+{
+ if (entry->type == BCH_JSET_ENTRY_btree_root ||
+ entry->type == BCH_JSET_ENTRY_btree_keys ||
+ entry->type == BCH_JSET_ENTRY_overwrite) {
+ struct bkey_i *k;
+
+ jset_entry_for_each_key(entry, k)
+ if (bkey_matches_filter(filter, entry, k))
+ return true;
+ }
+
+ return false;
+}
+
+static bool should_print_transaction(struct jset_entry *entry, struct jset_entry *end,
+ d_bbpos filter)
+{
+ if (!filter.nr)
+ return true;
+
+ for (entry = vstruct_next(entry);
+ entry != end && !entry_is_transaction_start(entry);
+ entry = vstruct_next(entry))
+ if (entry_matches_transaction_filter(entry, filter))
+ return true;
+
+ return false;
+}
+
+static bool should_print_entry(struct jset_entry *entry, d_btree_id filter)
+{
+ struct bkey_i *k;
+
+ if (!filter.nr)
+ return true;
+
+ if (entry->type != BCH_JSET_ENTRY_btree_root &&
+ entry->type != BCH_JSET_ENTRY_btree_keys &&
+ entry->type != BCH_JSET_ENTRY_overwrite)
+ return true;
+
+ jset_entry_for_each_key(entry, k)
+ darray_for_each(filter, id)
+ if (entry->btree_id == *id)
+ return true;
+
+ return false;
+}
+
+static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
+ d_bbpos transaction_filter,
+ d_btree_id key_filter)
+{
+ struct journal_replay *p, **_p;
+ struct genradix_iter iter;
+ struct printbuf buf = PRINTBUF;
+
+ genradix_for_each(&c->journal_entries, iter, _p) {
+ p = *_p;
+ if (!p)
+ continue;
+
+ if (le64_to_cpu(p->j.seq) + nr_entries < atomic64_read(&c->journal.seq))
+ continue;
+
+ bool blacklisted = p->ignore ||
+ bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(p->j.seq), false);
+
+ if (!transaction_filter.nr) {
+ if (blacklisted)
+ printf("blacklisted ");
+
+ printf("journal entry %llu\n", le64_to_cpu(p->j.seq));
+
+ printbuf_reset(&buf);
+
+ prt_printf(&buf,
+ " version %u\n"
+ " last seq %llu\n"
+ " flush %u\n"
+ " written at ",
+ le32_to_cpu(p->j.version),
+ le64_to_cpu(p->j.last_seq),
+ !JSET_NO_FLUSH(&p->j));
+ bch2_journal_ptrs_to_text(&buf, c, p);
+
+ if (blacklisted)
+ star_start_of_lines(buf.buf);
+ printf("%s\n", buf.buf);
+ printbuf_reset(&buf);
+ }
+
+ struct jset_entry *entry = p->j.start;
+ struct jset_entry *end = vstruct_last(&p->j);
+ while (entry != end) {
+
+ /*
+ * log entries denote the start of a new transaction
+ * commit:
+ */
+ if (entry_is_transaction_start(entry)) {
+ if (!should_print_transaction(entry, end, transaction_filter)) {
+ do {
+ entry = vstruct_next(entry);
+ } while (entry != end && !entry_is_transaction_start(entry));
+
+ continue;
+ }
+
+ prt_newline(&buf);
+ }
+
+ if (!should_print_entry(entry, key_filter))
+ goto next;
+
+ bool highlight = entry_matches_transaction_filter(entry, transaction_filter);
+ if (highlight)
+ fputs(RED, stdout);
+
+ printbuf_indent_add(&buf, 4);
+ bch2_journal_entry_to_text(&buf, c, entry);
+
+ if (blacklisted)
+ star_start_of_lines(buf.buf);
+ printf("%s\n", buf.buf);
+ printbuf_reset(&buf);
+
+ if (highlight)
+ fputs(NORMAL, stdout);
+next:
+ entry = vstruct_next(entry);
+ }
+ }
+
+ printbuf_exit(&buf);
+}
+
+int cmd_list_journal(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "nr-entries", required_argument, NULL, 'n' },
+ { "transaction-filter", required_argument, NULL, 't' },
+ { "key-filter", required_argument, NULL, 'k' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ struct bch_opts opts = bch2_opts_empty();
+ u32 nr_entries = U32_MAX;
+ d_bbpos transaction_filter = { 0 };
+ d_btree_id key_filter = { 0 };
+ int opt;
+
+ opt_set(opts, nochanges, true);
+ opt_set(opts, norecovery, true);
+ opt_set(opts, read_only, true);
+ opt_set(opts, degraded, true);
+ opt_set(opts, errors, BCH_ON_ERROR_continue);
+ opt_set(opts, fix_errors, FSCK_FIX_yes);
+ opt_set(opts, keep_journal, true);
+ opt_set(opts, read_journal_only,true);
+
+ while ((opt = getopt_long(argc, argv, "an:t:k:vh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'a':
+ opt_set(opts, read_entire_journal, true);
+ break;
+ case 'n':
+ if (kstrtouint(optarg, 10, &nr_entries))
+ die("error parsing nr_entries");
+ opt_set(opts, read_entire_journal, true);
+ break;
+ case 't':
+ darray_push(&transaction_filter, bbpos_parse(optarg));
+ break;
+ case 'k':
+ darray_push(&key_filter, read_string_list_or_die(optarg, __bch2_btree_ids, "btree id"));
+ break;
+ case 'v':
+ opt_set(opts, verbose, true);
+ break;
+ case 'h':
+ list_journal_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ if (!argc)
+ die("Please supply device(s) to open");
+
+ darray_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
+
+ journal_entries_print(c, nr_entries, transaction_filter, key_filter);
+ bch2_fs_stop(c);
+ return 0;
+}
diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
new file mode 100644
index 00000000..ea32e4ee
--- /dev/null
+++ b/c_src/cmd_migrate.c
@@ -0,0 +1,867 @@
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/xattr.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+
+#include <linux/dcache.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/xattr.h>
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_background.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/btree_update.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/io_write.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/str_hash.h"
+#include "libbcachefs/super.h"
+#include "libbcachefs/xattr.h"
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static char *dev_t_to_path(dev_t dev)
+{
+ char link[PATH_MAX], *p;
+ int ret;
+
+ char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
+ major(dev), minor(dev));
+ ret = readlink(sysfs_dev, link, sizeof(link));
+ free(sysfs_dev);
+
+ if (ret < 0 || ret >= sizeof(link))
+ die("readlink error while looking up block device: %m");
+
+ link[ret] = '\0';
+
+ p = strrchr(link, '/');
+ if (!p)
+ die("error looking up device name");
+ p++;
+
+ return mprintf("/dev/%s", p);
+}
+
+static bool path_is_fs_root(const char *path)
+{
+ char *line = NULL, *p, *mount;
+ size_t n = 0;
+ FILE *f;
+ bool ret = true;
+
+ f = fopen("/proc/self/mountinfo", "r");
+ if (!f)
+ die("Error getting mount information");
+
+ while (getline(&line, &n, f) != -1) {
+ p = line;
+
+ strsep(&p, " "); /* mount id */
+ strsep(&p, " "); /* parent id */
+ strsep(&p, " "); /* dev */
+ strsep(&p, " "); /* root */
+ mount = strsep(&p, " ");
+ strsep(&p, " ");
+
+ if (mount && !strcmp(path, mount))
+ goto found;
+ }
+
+ ret = false;
+found:
+ fclose(f);
+ free(line);
+ return ret;
+}
+
+static void mark_unreserved_space(struct bch_fs *c, ranges extents)
+{
+ struct bch_dev *ca = c->devs[0];
+ struct hole_iter iter;
+ struct range i;
+
+ for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
+ u64 b;
+
+ if (i.start == i.end)
+ return;
+
+ b = sector_to_bucket(ca, i.start >> 9);
+ do {
+ set_bit(b, ca->buckets_nouse);
+ b++;
+ } while (bucket_to_sector(ca, b) << 9 < i.end);
+ }
+}
+
+static void update_inode(struct bch_fs *c,
+ struct bch_inode_unpacked *inode)
+{
+ struct bkey_inode_buf packed;
+ int ret;
+
+ bch2_inode_pack(&packed, inode);
+ packed.inode.k.p.snapshot = U32_MAX;
+ ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
+ NULL, 0);
+ if (ret)
+ die("error updating inode: %s", bch2_err_str(ret));
+}
+
+static void create_link(struct bch_fs *c,
+ struct bch_inode_unpacked *parent,
+ const char *name, u64 inum, mode_t mode)
+{
+ struct qstr qstr = QSTR(name);
+ struct bch_inode_unpacked parent_u;
+ struct bch_inode_unpacked inode;
+
+ int ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_link_trans(trans,
+ (subvol_inum) { 1, parent->bi_inum }, &parent_u,
+ (subvol_inum) { 1, inum }, &inode, &qstr));
+ if (ret)
+ die("error creating hardlink: %s", bch2_err_str(ret));
+}
+
+static struct bch_inode_unpacked create_file(struct bch_fs *c,
+ struct bch_inode_unpacked *parent,
+ const char *name,
+ uid_t uid, gid_t gid,
+ mode_t mode, dev_t rdev)
+{
+ struct qstr qstr = QSTR(name);
+ struct bch_inode_unpacked new_inode;
+
+ bch2_inode_init_early(c, &new_inode);
+
+ int ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_create_trans(trans,
+ (subvol_inum) { 1, parent->bi_inum }, parent,
+ &new_inode, &qstr,
+ uid, gid, mode, rdev, NULL, NULL,
+ (subvol_inum) {}, 0));
+ if (ret)
+ die("error creating %s: %s", name, bch2_err_str(ret));
+
+ return new_inode;
+}
+
+#define for_each_xattr_handler(handlers, handler) \
+ if (handlers) \
+ for ((handler) = *(handlers)++; \
+ (handler) != NULL; \
+ (handler) = *(handlers)++)
+
+static const struct xattr_handler *xattr_resolve_name(char **name)
+{
+ const struct xattr_handler **handlers = bch2_xattr_handlers;
+ const struct xattr_handler *handler;
+
+ for_each_xattr_handler(handlers, handler) {
+ char *n;
+
+ n = strcmp_prefix(*name, xattr_prefix(handler));
+ if (n) {
+ if (!handler->prefix ^ !*n) {
+ if (*n)
+ continue;
+ return ERR_PTR(-EINVAL);
+ }
+ *name = n;
+ return handler;
+ }
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ struct stat *src)
+{
+ dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
+ dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
+ dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
+}
+
+static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ char *src)
+{
+ struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
+
+ char attrs[XATTR_LIST_MAX];
+ ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
+ if (attrs_size < 0)
+ die("listxattr error: %m");
+
+ char *next, *attr;
+ for (attr = attrs;
+ attr < attrs + attrs_size;
+ attr = next) {
+ next = attr + strlen(attr) + 1;
+
+ char val[XATTR_SIZE_MAX];
+ ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
+
+ if (val_size < 0)
+ die("error getting xattr val: %m");
+
+ const struct xattr_handler *h = xattr_resolve_name(&attr);
+ struct bch_inode_unpacked inode_u;
+
+ int ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_xattr_set(trans,
+ (subvol_inum) { 1, dst->bi_inum },
+ &inode_u, &hash_info, attr,
+ val, val_size, h->flags, 0));
+ if (ret < 0)
+ die("error creating xattr: %s", bch2_err_str(ret));
+ }
+}
+
+#define WRITE_DATA_BUF (1 << 20)
+
+static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
+
+static void write_data(struct bch_fs *c,
+ struct bch_inode_unpacked *dst_inode,
+ u64 dst_offset, void *buf, size_t len)
+{
+ struct bch_write_op op;
+ struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
+
+ BUG_ON(dst_offset & (block_bytes(c) - 1));
+ BUG_ON(len & (block_bytes(c) - 1));
+ BUG_ON(len > WRITE_DATA_BUF);
+
+ bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
+ bch2_bio_map(&op.wbio.bio, buf, len);
+
+ bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
+ op.write_point = writepoint_hashed(0);
+ op.nr_replicas = 1;
+ op.subvol = 1;
+ op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
+ op.flags |= BCH_WRITE_SYNC;
+
+ int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
+ c->opts.data_replicas, 0);
+ if (ret)
+ die("error reserving space in new filesystem: %s", bch2_err_str(ret));
+
+ closure_call(&op.cl, bch2_write, NULL, NULL);
+
+ BUG_ON(!(op.flags & BCH_WRITE_DONE));
+ dst_inode->bi_sectors += len >> 9;
+
+ if (op.error)
+ die("write error: %s", bch2_err_str(op.error));
+}
+
+static void copy_data(struct bch_fs *c,
+ struct bch_inode_unpacked *dst_inode,
+ int src_fd, u64 start, u64 end)
+{
+ while (start < end) {
+ unsigned len = min_t(u64, end - start, sizeof(buf));
+ unsigned pad = round_up(len, block_bytes(c)) - len;
+
+ xpread(src_fd, buf, len, start);
+ memset(buf + len, 0, pad);
+
+ write_data(c, dst_inode, start, buf, len + pad);
+ start += len;
+ }
+}
+
+static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ u64 logical, u64 physical, u64 length)
+{
+ struct bch_dev *ca = c->devs[0];
+
+ BUG_ON(logical & (block_bytes(c) - 1));
+ BUG_ON(physical & (block_bytes(c) - 1));
+ BUG_ON(length & (block_bytes(c) - 1));
+
+ logical >>= 9;
+ physical >>= 9;
+ length >>= 9;
+
+ BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
+
+ while (length) {
+ struct bkey_i_extent *e;
+ BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
+ u64 b = sector_to_bucket(ca, physical);
+ struct disk_reservation res;
+ unsigned sectors;
+ int ret;
+
+ sectors = min(ca->mi.bucket_size -
+ (physical & (ca->mi.bucket_size - 1)),
+ length);
+
+ e = bkey_extent_init(&k.k);
+ e->k.p.inode = dst->bi_inum;
+ e->k.p.offset = logical + sectors;
+ e->k.p.snapshot = U32_MAX;
+ e->k.size = sectors;
+ bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
+ .offset = physical,
+ .dev = 0,
+ .gen = *bucket_gen(ca, b),
+ });
+
+ ret = bch2_disk_reservation_get(c, &res, sectors, 1,
+ BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ die("error reserving space in new filesystem: %s",
+ bch2_err_str(ret));
+
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0);
+ if (ret)
+ die("btree insert error %s", bch2_err_str(ret));
+
+ bch2_disk_reservation_put(c, &res);
+
+ dst->bi_sectors += sectors;
+ logical += sectors;
+ physical += sectors;
+ length -= sectors;
+ }
+}
+
+static void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ char *src)
+{
+ ssize_t ret = readlink(src, buf, sizeof(buf));
+ if (ret < 0)
+ die("readlink error: %m");
+
+ write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
+}
+
+static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ int src_fd, u64 src_size,
+ char *src_path, ranges *extents)
+{
+ struct fiemap_iter iter;
+ struct fiemap_extent e;
+
+ fiemap_for_each(src_fd, iter, e)
+ if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
+ fsync(src_fd);
+ break;
+ }
+ fiemap_iter_exit(&iter);
+
+ fiemap_for_each(src_fd, iter, e) {
+ u64 src_max = roundup(src_size, block_bytes(c));
+
+ e.fe_length = min(e.fe_length, src_max - e.fe_logical);
+
+ if ((e.fe_logical & (block_bytes(c) - 1)) ||
+ (e.fe_length & (block_bytes(c) - 1)))
+ die("Unaligned extent in %s - can't handle", src_path);
+
+ if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+ FIEMAP_EXTENT_ENCODED|
+ FIEMAP_EXTENT_NOT_ALIGNED|
+ FIEMAP_EXTENT_DATA_INLINE)) {
+ copy_data(c, dst, src_fd, e.fe_logical,
+ min(src_size - e.fe_logical,
+ e.fe_length));
+ continue;
+ }
+
+ /*
+ * if the data is below 1 MB, copy it so it doesn't conflict
+ * with bcachefs's potentially larger superblock:
+ */
+ if (e.fe_physical < 1 << 20) {
+ copy_data(c, dst, src_fd, e.fe_logical,
+ min(src_size - e.fe_logical,
+ e.fe_length));
+ continue;
+ }
+
+ if ((e.fe_physical & (block_bytes(c) - 1)))
+ die("Unaligned extent in %s - can't handle", src_path);
+
+ range_add(extents, e.fe_physical, e.fe_length);
+ link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
+ }
+ fiemap_iter_exit(&iter);
+}
+
+struct copy_fs_state {
+ u64 bcachefs_inum;
+ dev_t dev;
+
+ GENRADIX(u64) hardlinks;
+ ranges extents;
+};
+
+static void copy_dir(struct copy_fs_state *s,
+ struct bch_fs *c,
+ struct bch_inode_unpacked *dst,
+ int src_fd, const char *src_path)
+{
+ DIR *dir = fdopendir(src_fd);
+ struct dirent *d;
+
+ while ((errno = 0), (d = readdir(dir))) {
+ struct bch_inode_unpacked inode;
+ int fd;
+
+ if (fchdir(src_fd))
+ die("chdir error: %m");
+
+ struct stat stat =
+ xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
+
+ if (!strcmp(d->d_name, ".") ||
+ !strcmp(d->d_name, "..") ||
+ !strcmp(d->d_name, "lost+found") ||
+ stat.st_ino == s->bcachefs_inum)
+ continue;
+
+ char *child_path = mprintf("%s/%s", src_path, d->d_name);
+
+ if (stat.st_dev != s->dev)
+ die("%s does not have correct st_dev!", child_path);
+
+ u64 *dst_inum = S_ISREG(stat.st_mode)
+ ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
+ : NULL;
+
+ if (dst_inum && *dst_inum) {
+ create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
+ goto next;
+ }
+
+ inode = create_file(c, dst, d->d_name,
+ stat.st_uid, stat.st_gid,
+ stat.st_mode, stat.st_rdev);
+
+ if (dst_inum)
+ *dst_inum = inode.bi_inum;
+
+ copy_times(c, &inode, &stat);
+ copy_xattrs(c, &inode, d->d_name);
+
+ /* copy xattrs */
+
+ switch (mode_to_type(stat.st_mode)) {
+ case DT_DIR:
+ fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+ copy_dir(s, c, &inode, fd, child_path);
+ close(fd);
+ break;
+ case DT_REG:
+ inode.bi_size = stat.st_size;
+
+ fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+ copy_file(c, &inode, fd, stat.st_size,
+ child_path, &s->extents);
+ close(fd);
+ break;
+ case DT_LNK:
+ inode.bi_size = stat.st_size;
+
+ copy_link(c, &inode, d->d_name);
+ break;
+ case DT_FIFO:
+ case DT_CHR:
+ case DT_BLK:
+ case DT_SOCK:
+ case DT_WHT:
+ /* nothing else to copy for these: */
+ break;
+ default:
+ BUG();
+ }
+
+ update_inode(c, &inode);
+next:
+ free(child_path);
+ }
+
+ if (errno)
+ die("readdir error: %m");
+ closedir(dir);
+}
+
+static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
+ u64 size, u64 *bcachefs_inum, dev_t dev,
+ bool force)
+{
+ int fd = force
+ ? open(file_path, O_RDWR|O_CREAT, 0600)
+ : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
+ if (fd < 0)
+ die("Error creating %s for bcachefs metadata: %m",
+ file_path);
+
+ struct stat statbuf = xfstat(fd);
+
+ if (statbuf.st_dev != dev)
+ die("bcachefs file has incorrect device");
+
+ *bcachefs_inum = statbuf.st_ino;
+
+ if (fallocate(fd, 0, 0, size))
+ die("Error reserving space for bcachefs metadata: %m");
+
+ fsync(fd);
+
+ struct fiemap_iter iter;
+ struct fiemap_extent e;
+ ranges extents = { 0 };
+
+ fiemap_for_each(fd, iter, e) {
+ if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+ FIEMAP_EXTENT_ENCODED|
+ FIEMAP_EXTENT_NOT_ALIGNED|
+ FIEMAP_EXTENT_DATA_INLINE))
+ die("Unable to continue: metadata file not fully mapped");
+
+ if ((e.fe_physical & (block_size - 1)) ||
+ (e.fe_length & (block_size - 1)))
+ die("Unable to continue: unaligned extents in metadata file");
+
+ range_add(&extents, e.fe_physical, e.fe_length);
+ }
+ fiemap_iter_exit(&iter);
+ close(fd);
+
+ ranges_sort_merge(&extents);
+ return extents;
+}
+
+static void reserve_old_fs_space(struct bch_fs *c,
+ struct bch_inode_unpacked *root_inode,
+ ranges *extents)
+{
+ struct bch_dev *ca = c->devs[0];
+ struct bch_inode_unpacked dst;
+ struct hole_iter iter;
+ struct range i;
+
+ dst = create_file(c, root_inode, "old_migrated_filesystem",
+ 0, 0, S_IFREG|0400, 0);
+ dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
+
+ ranges_sort_merge(extents);
+
+ for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
+ link_data(c, &dst, i.start, i.start, i.end - i.start);
+
+ update_inode(c, &dst);
+}
+
+static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+ u64 bcachefs_inum, ranges *extents)
+{
+ syncfs(src_fd);
+
+ struct bch_inode_unpacked root_inode;
+ int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
+ &root_inode);
+ if (ret)
+ die("error looking up root directory: %s", bch2_err_str(ret));
+
+ if (fchdir(src_fd))
+ die("chdir error: %m");
+
+ struct stat stat = xfstat(src_fd);
+ copy_times(c, &root_inode, &stat);
+ copy_xattrs(c, &root_inode, ".");
+
+ struct copy_fs_state s = {
+ .bcachefs_inum = bcachefs_inum,
+ .dev = stat.st_dev,
+ .extents = *extents,
+ };
+
+ /* now, copy: */
+ copy_dir(&s, c, &root_inode, src_fd, src_path);
+
+ reserve_old_fs_space(c, &root_inode, &s.extents);
+
+ update_inode(c, &root_inode);
+
+ darray_exit(&s.extents);
+ genradix_free(&s.hardlinks);
+}
+
+static void find_superblock_space(ranges extents,
+ struct format_opts opts,
+ struct dev_opts *dev)
+{
+ darray_for_each(extents, i) {
+ u64 start = round_up(max(256ULL << 10, i->start),
+ dev->bucket_size << 9);
+ u64 end = round_down(i->end,
+ dev->bucket_size << 9);
+
+ /* Need space for two superblocks: */
+ if (start + (opts.superblock_size << 9) * 2 <= end) {
+ dev->sb_offset = start >> 9;
+ dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
+ return;
+ }
+ }
+
+ die("Couldn't find a valid location for superblock");
+}
+
+static void migrate_usage(void)
+{
+ puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
+ "Usage: bcachefs migrate [OPTION]...\n"
+ "\n"
+ "Options:\n"
+ " -f fs Root of filesystem to migrate(s)\n"
+ " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
+ " --no_passphrase Don't encrypt master encryption key\n"
+ " -F Force, even if metadata file already exists\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static const struct option migrate_opts[] = {
+ { "encrypted", no_argument, NULL, 'e' },
+ { "no_passphrase", no_argument, NULL, 'p' },
+ { NULL }
+};
+
+static int migrate_fs(const char *fs_path,
+ struct bch_opt_strs fs_opt_strs,
+ struct bch_opts fs_opts,
+ struct format_opts format_opts,
+ bool force)
+{
+ if (!path_is_fs_root(fs_path))
+ die("%s is not a filysestem root", fs_path);
+
+ int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
+ struct stat stat = xfstat(fs_fd);
+
+ if (!S_ISDIR(stat.st_mode))
+ die("%s is not a directory", fs_path);
+
+ struct dev_opts dev = dev_opts_default();
+
+ dev.path = dev_t_to_path(stat.st_dev);
+ dev.bdev = blkdev_get_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
+
+ opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
+
+ char *file_path = mprintf("%s/bcachefs", fs_path);
+ printf("Creating new filesystem on %s in space reserved at %s\n",
+ dev.path, file_path);
+
+ dev.size = get_size(dev.bdev->bd_fd);
+ dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
+ dev.nbuckets = dev.size / dev.bucket_size;
+
+ bch2_check_bucket_size(fs_opts, &dev);
+
+ u64 bcachefs_inum;
+ ranges extents = reserve_new_fs_space(file_path,
+ fs_opts.block_size >> 9,
+ get_size(dev.bdev->bd_fd) / 5,
+ &bcachefs_inum, stat.st_dev, force);
+
+ find_superblock_space(extents, format_opts, &dev);
+
+ struct bch_sb *sb = bch2_format(fs_opt_strs,
+ fs_opts, format_opts, &dev, 1);
+ u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
+
+ if (format_opts.passphrase)
+ bch2_add_key(sb, "user", "user", format_opts.passphrase);
+
+ free(sb);
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c = NULL;
+ char *path[1] = { dev.path };
+
+ opt_set(opts, sb, sb_offset);
+ opt_set(opts, nostart, true);
+ opt_set(opts, noexcl, true);
+ opt_set(opts, buckets_nouse, true);
+
+ c = bch2_fs_open(path, 1, opts);
+ if (IS_ERR(c))
+ die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
+
+ mark_unreserved_space(c, extents);
+
+ int ret = bch2_fs_start(c);
+ if (ret)
+ die("Error starting new filesystem: %s", bch2_err_str(ret));
+
+ copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
+
+ bch2_fs_stop(c);
+
+ printf("Migrate complete, running fsck:\n");
+ opt_set(opts, nostart, false);
+ opt_set(opts, nochanges, true);
+ opt_set(opts, read_only, true);
+
+ c = bch2_fs_open(path, 1, opts);
+ if (IS_ERR(c))
+ die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
+
+ bch2_fs_stop(c);
+ printf("fsck complete\n");
+
+ printf("To mount the new filesystem, run\n"
+ " mount -t bcachefs -o sb=%llu %s dir\n"
+ "\n"
+ "After verifying that the new filesystem is correct, to create a\n"
+ "superblock at the default offset and finish the migration run\n"
+ " bcachefs migrate-superblock -d %s -o %llu\n"
+ "\n"
+ "The new filesystem will have a file at /old_migrated_filestem\n"
+ "referencing all disk space that might be used by the existing\n"
+ "filesystem. That file can be deleted once the old filesystem is\n"
+ "no longer needed (and should be deleted prior to running\n"
+ "bcachefs migrate-superblock)\n",
+ sb_offset, dev.path, dev.path, sb_offset);
+ return 0;
+}
+
+int cmd_migrate(int argc, char *argv[])
+{
+ struct format_opts format_opts = format_opts_default();
+ char *fs_path = NULL;
+ bool no_passphrase = false, force = false;
+ int opt;
+
+ struct bch_opt_strs fs_opt_strs =
+ bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+ struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+ while ((opt = getopt_long(argc, argv, "f:Fh",
+ migrate_opts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ fs_path = optarg;
+ break;
+ case 'e':
+ format_opts.encrypted = true;
+ break;
+ case 'p':
+ no_passphrase = true;
+ break;
+ case 'F':
+ force = true;
+ break;
+ case 'h':
+ migrate_usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (!fs_path)
+ die("Please specify a filesystem to migrate");
+
+ if (format_opts.encrypted && !no_passphrase)
+ format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+
+ int ret = migrate_fs(fs_path,
+ fs_opt_strs,
+ fs_opts,
+ format_opts, force);
+ bch2_opt_strs_free(&fs_opt_strs);
+ return ret;
+}
+
+static void migrate_superblock_usage(void)
+{
+ puts("bcachefs migrate-superblock - create default superblock after migrating\n"
+ "Usage: bcachefs migrate-superblock [OPTION]...\n"
+ "\n"
+ "Options:\n"
+ " -d device Device to create superblock for\n"
+ " -o offset Offset of existing superblock\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_migrate_superblock(int argc, char *argv[])
+{
+ char *dev = NULL;
+ u64 offset = 0;
+ int opt, ret;
+
+ while ((opt = getopt(argc, argv, "d:o:h")) != -1)
+ switch (opt) {
+ case 'd':
+ dev = optarg;
+ break;
+ case 'o':
+ ret = kstrtou64(optarg, 10, &offset);
+ if (ret)
+ die("Invalid offset");
+ break;
+ case 'h':
+ migrate_superblock_usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (!dev)
+ die("Please specify a device");
+
+ if (!offset)
+ die("Please specify offset of existing superblock");
+
+ int fd = xopen(dev, O_RDWR);
+ struct bch_sb *sb = __bch2_super_read(fd, offset);
+
+ if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
+ die("Can't add superblock: no space left in superblock layout");
+
+ unsigned i;
+ for (i = 0; i < sb->layout.nr_superblocks; i++)
+ if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
+ die("Superblock layout already has default superblock");
+
+ memmove(&sb->layout.sb_offset[1],
+ &sb->layout.sb_offset[0],
+ sb->layout.nr_superblocks * sizeof(u64));
+ sb->layout.nr_superblocks++;
+
+ sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
+
+ bch2_super_write(fd, sb);
+ close(fd);
+
+ return 0;
+}
diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c
new file mode 100644
index 00000000..6ce34016
--- /dev/null
+++ b/c_src/cmd_option.c
@@ -0,0 +1,107 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * GPLv2
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+
+static void set_option_usage(void)
+{
+ puts("bcachefs set-option \n"
+ "Usage: bcachefs set-option [OPTION].. device\n"
+ "\n"
+ "Options:\n");
+ bch2_opts_usage(OPT_MOUNT);
+ puts(" -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_set_option(int argc, char *argv[])
+{
+ struct bch_opt_strs new_opt_strs = bch2_cmdline_opts_get(&argc, argv, OPT_MOUNT);
+ struct bch_opts new_opts = bch2_parse_opts(new_opt_strs);
+ struct bch_opts open_opts = bch2_opts_empty();
+ unsigned i;
+ int opt, ret = 0;
+
+ opt_set(open_opts, nostart, true);
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ set_option_usage();
+ break;
+ }
+ args_shift(optind);
+
+ if (!argc) {
+ fprintf(stderr, "Please supply device(s)\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < argc; i++)
+ if (dev_mounted(argv[i]))
+ goto online;
+
+ struct bch_fs *c = bch2_fs_open(argv, argc, open_opts);
+ if (IS_ERR(c)) {
+ fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c)));
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ u64 v = bch2_opt_get_by_id(&new_opts, i);
+
+ if (!bch2_opt_defined_by_id(&new_opts, i))
+ continue;
+
+ ret = bch2_opt_check_may_set(c, i, v);
+ if (ret < 0) {
+ fprintf(stderr, "error setting %s: %i\n",
+ bch2_opt_table[i].attr.name, ret);
+ break;
+ }
+
+ bch2_opt_set_sb(c, bch2_opt_table + i, v);
+ bch2_opt_set_by_id(&c->opts, i, v);
+ }
+
+ bch2_fs_stop(c);
+ return ret;
+online:
+ {
+ unsigned dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(argv[i], &dev_idx);
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ if (!new_opt_strs.by_id[i])
+ continue;
+
+ char *path = mprintf("options/%s", bch2_opt_table[i].attr.name);
+
+ write_file_str(fs.sysfs_fd, path, new_opt_strs.by_id[i]);
+ free(path);
+ }
+ }
+ return 0;
+}
diff --git a/c_src/cmd_run.c b/c_src/cmd_run.c
new file mode 100644
index 00000000..1bf84e5c
--- /dev/null
+++ b/c_src/cmd_run.c
@@ -0,0 +1,33 @@
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#if 0
+int cmd_run(int argc, char *argv[])
+{
+ return 0;
+}
+
+int cmd_stop(int argc, char *argv[])
+{
+ if (argc != 2)
+ die("Please supply a filesystem");
+
+ struct bchfs_handle fs = bcache_fs_open(argv[1]);
+ xioctl(fs.ioctl_fd, BCH_IOCTL_STOP);
+ return 0;
+}
+#endif
diff --git a/c_src/cmd_subvolume.c b/c_src/cmd_subvolume.c
new file mode 100644
index 00000000..99a302b8
--- /dev/null
+++ b/c_src/cmd_subvolume.c
@@ -0,0 +1,188 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/opts.h"
+#include "tools-util.h"
+
+int subvolume_usage(void)
+{
+ puts("bcachefs subvolume - manage subvolumes and snapshots\n"
+ "Usage: bcachefs subvolume <CMD> [OPTION]\n"
+ "\n"
+ "Commands:\n"
+ " create create a subvolume\n"
+ " delete delete a subvolume\n"
+ " snapshot create a snapshot\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ return 0;
+}
+
+static void subvolume_create_usage(void)
+{
+ puts("bcachefs subvolume create - create a new subvolume\n"
+ "Usage: bcachefs subvolume create [OPTION]... path\n"
+ "\n"
+ "Options:\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_subvolume_create(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ char *path;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ subvolume_create_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ while ((path = arg_pop())) {
+ char *dir = dirname(strdup(path));
+
+ struct bchfs_handle fs = bcache_fs_open(dir);
+
+ struct bch_ioctl_subvolume i = {
+ .dirfd = AT_FDCWD,
+ .mode = 0777,
+ .dst_ptr = (unsigned long)path,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_SUBVOLUME_CREATE, &i);
+ bcache_fs_close(fs);
+ }
+
+ return 0;
+}
+
+static void subvolume_delete_usage(void)
+{
+ puts("bcachefs subvolume delete - delete an existing subvolume\n"
+ "Usage: bcachefs subvolume delete [OPTION]... path\n"
+ "\n"
+ "Options:\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_subvolume_delete(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ char *path;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ subvolume_delete_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ while ((path = arg_pop())) {
+ char *dir = dirname(strdup(path));
+
+ struct bchfs_handle fs = bcache_fs_open(dir);
+
+ struct bch_ioctl_subvolume i = {
+ .dirfd = AT_FDCWD,
+ .mode = 0777,
+ .dst_ptr = (unsigned long)path,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_SUBVOLUME_DESTROY, &i);
+ bcache_fs_close(fs);
+ }
+
+ return 0;
+}
+
+static void snapshot_create_usage(void)
+{
+ puts("bcachefs subvolume snapshot - create a snapshot \n"
+ "Usage: bcachefs subvolume snapshot [OPTION]... <source> <dest>\n"
+ "\n"
+ "Create a snapshot of <source> at <dest>. If specified, <source> must be a subvolume;\n"
+ "if not specified the snapshot will be of the subvolme containing <dest>.\n"
+ "Options:\n"
+ " -r Make snapshot read only\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_subvolume_snapshot(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ unsigned flags = BCH_SUBVOL_SNAPSHOT_CREATE;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "rh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'r':
+ flags |= BCH_SUBVOL_SNAPSHOT_RO;
+ break;
+ case 'h':
+ snapshot_create_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *src = arg_pop();
+ char *dst = arg_pop();
+
+ if (argc)
+ die("Too many arguments");
+
+ if (!dst)
+ swap(src, dst);
+ if (!dst)
+ die("Please specify a path to create");
+
+ char *dir = dirname(strdup(dst));
+
+ struct bchfs_handle fs = bcache_fs_open(dir);
+
+ struct bch_ioctl_subvolume i = {
+ .flags = flags,
+ .dirfd = AT_FDCWD,
+ .mode = 0777,
+ .src_ptr = (unsigned long)src,
+ .dst_ptr = (unsigned long)dst,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_SUBVOLUME_CREATE, &i);
+ bcache_fs_close(fs);
+ return 0;
+}
diff --git a/c_src/cmd_version.c b/c_src/cmd_version.c
new file mode 100644
index 00000000..5fe30e5e
--- /dev/null
+++ b/c_src/cmd_version.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+#include "cmds.h"
+
+int cmd_version(int argc, char *argv[])
+{
+ printf("%s\n", VERSION_STRING);
+ return 0;
+}
diff --git a/c_src/cmds.h b/c_src/cmds.h
new file mode 100644
index 00000000..64267dc4
--- /dev/null
+++ b/c_src/cmds.h
@@ -0,0 +1,63 @@
+/*
+ * Author: Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * GPLv2
+ */
+
+#ifndef _CMDS_H
+#define _CMDS_H
+
+#include "tools-util.h"
+
+int cmd_format(int argc, char *argv[]);
+int cmd_show_super(int argc, char *argv[]);
+int cmd_reset_counters(int argc, char *argv[]);
+int cmd_set_option(int argc, char *argv[]);
+
+int cmd_fs_usage(int argc, char *argv[]);
+
+int device_usage(void);
+int cmd_device_add(int argc, char *argv[]);
+int cmd_device_remove(int argc, char *argv[]);
+int cmd_device_online(int argc, char *argv[]);
+int cmd_device_offline(int argc, char *argv[]);
+int cmd_device_evacuate(int argc, char *argv[]);
+int cmd_device_set_state(int argc, char *argv[]);
+int cmd_device_resize(int argc, char *argv[]);
+int cmd_device_resize_journal(int argc, char *argv[]);
+
+int data_usage(void);
+int cmd_data_rereplicate(int argc, char *argv[]);
+int cmd_data_job(int argc, char *argv[]);
+
+int cmd_unlock(int argc, char *argv[]);
+int cmd_set_passphrase(int argc, char *argv[]);
+int cmd_remove_passphrase(int argc, char *argv[]);
+
+int cmd_fsck(int argc, char *argv[]);
+
+int cmd_dump(int argc, char *argv[]);
+int cmd_list_journal(int argc, char *argv[]);
+int cmd_kill_btree_node(int argc, char *argv[]);
+
+int cmd_migrate(int argc, char *argv[]);
+int cmd_migrate_superblock(int argc, char *argv[]);
+
+int cmd_version(int argc, char *argv[]);
+
+int cmd_setattr(int argc, char *argv[]);
+
+int subvolume_usage(void);
+int cmd_subvolume_create(int argc, char *argv[]);
+int cmd_subvolume_delete(int argc, char *argv[]);
+int cmd_subvolume_snapshot(int argc, char *argv[]);
+
+int cmd_fusemount(int argc, char *argv[]);
+
+void bcachefs_usage(void);
+int device_cmds(int argc, char *argv[]);
+int fs_cmds(int argc, char *argv[]);
+int data_cmds(int argc, char *argv[]);
+int subvolume_cmds(int argc, char *argv[]);
+
+#endif /* _CMDS_H */
diff --git a/c_src/config.h b/c_src/config.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/config.h
diff --git a/c_src/crypto.c b/c_src/crypto.c
new file mode 100644
index 00000000..32671bd8
--- /dev/null
+++ b/c_src/crypto.c
@@ -0,0 +1,201 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <keyutils.h>
+#include <linux/random.h>
+#include <sodium/crypto_pwhash_scryptsalsa208sha256.h>
+#include <uuid/uuid.h>
+
+#include "libbcachefs/checksum.h"
+#include "crypto.h"
+
+char *read_passphrase(const char *prompt)
+{
+ char *buf = NULL;
+ size_t buflen = 0;
+ ssize_t len;
+
+ if (isatty(STDIN_FILENO)) {
+ struct termios old, new;
+
+ fprintf(stderr, "%s", prompt);
+ fflush(stderr);
+
+ if (tcgetattr(STDIN_FILENO, &old))
+ die("error getting terminal attrs");
+
+ new = old;
+ new.c_lflag &= ~ECHO;
+ if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &new))
+ die("error setting terminal attrs");
+
+ len = getline(&buf, &buflen, stdin);
+
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &old);
+ fprintf(stderr, "\n");
+ } else {
+ len = getline(&buf, &buflen, stdin);
+ }
+
+ if (len < 0)
+ die("error reading passphrase");
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+
+ return buf;
+}
+
+char *read_passphrase_twice(const char *prompt)
+{
+ char *pass = read_passphrase(prompt);
+
+ if (!isatty(STDIN_FILENO))
+ return pass;
+
+ char *pass2 = read_passphrase("Enter same passphrase again: ");
+
+ if (strcmp(pass, pass2)) {
+ memzero_explicit(pass, strlen(pass));
+ memzero_explicit(pass2, strlen(pass2));
+ die("Passphrases do not match");
+ }
+
+ memzero_explicit(pass2, strlen(pass2));
+ free(pass2);
+
+ return pass;
+}
+
+struct bch_key derive_passphrase(struct bch_sb_field_crypt *crypt,
+ const char *passphrase)
+{
+ const unsigned char salt[] = "bcache";
+ struct bch_key key;
+ int ret;
+
+ switch (BCH_CRYPT_KDF_TYPE(crypt)) {
+ case BCH_KDF_SCRYPT:
+ ret = crypto_pwhash_scryptsalsa208sha256_ll(
+ (void *) passphrase, strlen(passphrase),
+ salt, sizeof(salt),
+ 1ULL << BCH_KDF_SCRYPT_N(crypt),
+ 1ULL << BCH_KDF_SCRYPT_R(crypt),
+ 1ULL << BCH_KDF_SCRYPT_P(crypt),
+ (void *) &key, sizeof(key));
+ if (ret)
+ die("scrypt error: %i", ret);
+ break;
+ default:
+ die("unknown kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
+ }
+
+ return key;
+}
+
+bool bch2_sb_is_encrypted(struct bch_sb *sb)
+{
+ struct bch_sb_field_crypt *crypt;
+
+ return (crypt = bch2_sb_field_get(sb, crypt)) &&
+ bch2_key_is_encrypted(&crypt->key);
+}
+
+void bch2_passphrase_check(struct bch_sb *sb, const char *passphrase,
+ struct bch_key *passphrase_key,
+ struct bch_encrypted_key *sb_key)
+{
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt);
+ if (!crypt)
+ die("filesystem is not encrypted");
+
+ *sb_key = crypt->key;
+
+ if (!bch2_key_is_encrypted(sb_key))
+ die("filesystem does not have encryption key");
+
+ *passphrase_key = derive_passphrase(crypt, passphrase);
+
+ /* Check if the user supplied the correct passphrase: */
+ if (bch2_chacha_encrypt_key(passphrase_key, __bch2_sb_key_nonce(sb),
+ sb_key, sizeof(*sb_key)))
+ die("error encrypting key");
+
+ if (bch2_key_is_encrypted(sb_key))
+ die("incorrect passphrase");
+}
+
+void bch2_add_key(struct bch_sb *sb,
+ const char *type,
+ const char *keyring_str,
+ const char *passphrase)
+{
+ struct bch_key passphrase_key;
+ struct bch_encrypted_key sb_key;
+ int keyring;
+
+ if (!strcmp(keyring_str, "session"))
+ keyring = KEY_SPEC_SESSION_KEYRING;
+ else if (!strcmp(keyring_str, "user"))
+ keyring = KEY_SPEC_USER_KEYRING;
+ else if (!strcmp(keyring_str, "user_session"))
+ keyring = KEY_SPEC_USER_SESSION_KEYRING;
+ else
+ die("unknown keyring %s", keyring_str);
+
+ bch2_passphrase_check(sb, passphrase,
+ &passphrase_key,
+ &sb_key);
+
+ char uuid[40];
+ uuid_unparse_lower(sb->user_uuid.b, uuid);
+
+ char *description = mprintf("bcachefs:%s", uuid);
+
+ if (add_key(type,
+ description,
+ &passphrase_key, sizeof(passphrase_key),
+ keyring) < 0)
+ die("add_key error: %m");
+
+ memzero_explicit(description, strlen(description));
+ free(description);
+ memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+ memzero_explicit(&sb_key, sizeof(sb_key));
+}
+
+void bch_sb_crypt_init(struct bch_sb *sb,
+ struct bch_sb_field_crypt *crypt,
+ const char *passphrase)
+{
+ crypt->key.magic = BCH_KEY_MAGIC;
+ get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
+
+ if (passphrase) {
+
+ SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
+ SET_BCH_KDF_SCRYPT_N(crypt, ilog2(16384));
+ SET_BCH_KDF_SCRYPT_R(crypt, ilog2(8));
+ SET_BCH_KDF_SCRYPT_P(crypt, ilog2(16));
+
+ struct bch_key passphrase_key = derive_passphrase(crypt, passphrase);
+
+ assert(!bch2_key_is_encrypted(&crypt->key));
+
+ if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
+ &crypt->key, sizeof(crypt->key)))
+ die("error encrypting key");
+
+ assert(bch2_key_is_encrypted(&crypt->key));
+
+ memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+ }
+}
diff --git a/c_src/crypto.h b/c_src/crypto.h
new file mode 100644
index 00000000..baea6d86
--- /dev/null
+++ b/c_src/crypto.h
@@ -0,0 +1,22 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include "tools-util.h"
+
+struct bch_sb;
+struct bch_sb_field_crypt;
+struct bch_key;
+struct bch_encrypted_key;
+
+char *read_passphrase(const char *);
+char *read_passphrase_twice(const char *);
+
+struct bch_key derive_passphrase(struct bch_sb_field_crypt *, const char *);
+bool bch2_sb_is_encrypted(struct bch_sb *);
+void bch2_passphrase_check(struct bch_sb *, const char *,
+ struct bch_key *, struct bch_encrypted_key *);
+void bch2_add_key(struct bch_sb *, const char *, const char *, const char *);
+void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
+ const char *);
+
+#endif /* _CRYPTO_H */
diff --git a/c_src/include/asm/page.h b/c_src/include/asm/page.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/asm/page.h
diff --git a/c_src/include/asm/unaligned.h b/c_src/include/asm/unaligned.h
new file mode 100644
index 00000000..e695bede
--- /dev/null
+++ b/c_src/include/asm/unaligned.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_UNALIGNED_H
+#define _ASM_UNALIGNED_H
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# include <linux/unaligned/le_struct.h>
+# include <linux/unaligned/be_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned __get_unaligned_le
+# define put_unaligned __put_unaligned_le
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned __get_unaligned_be
+# define put_unaligned __put_unaligned_be
+#else
+# error need to define endianess
+#endif
+
+#endif /* _ASM_UNALIGNED_H */
diff --git a/c_src/include/crypto/algapi.h b/c_src/include/crypto/algapi.h
new file mode 100644
index 00000000..5fd3524a
--- /dev/null
+++ b/c_src/include/crypto/algapi.h
@@ -0,0 +1,7 @@
+#ifndef _CRYPTO_ALGAPI_H
+#define _CRYPTO_ALGAPI_H
+
+#include <linux/crypto.h>
+#include <crypto/skcipher.h>
+
+#endif /* _CRYPTO_ALGAPI_H */
diff --git a/c_src/include/crypto/chacha.h b/c_src/include/crypto/chacha.h
new file mode 100644
index 00000000..f004cfb5
--- /dev/null
+++ b/c_src/include/crypto/chacha.h
@@ -0,0 +1,15 @@
+/*
+ * Common values for the ChaCha20 algorithm
+ */
+
+#ifndef _CRYPTO_CHACHA20_H
+#define _CRYPTO_CHACHA20_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define CHACHA_IV_SIZE 16
+#define CHACHA_KEY_SIZE 32
+#define CHACHA_BLOCK_SIZE 64
+
+#endif
diff --git a/c_src/include/crypto/hash.h b/c_src/include/crypto/hash.h
new file mode 100644
index 00000000..a74f3618
--- /dev/null
+++ b/c_src/include/crypto/hash.h
@@ -0,0 +1,104 @@
+/*
+ * Hash: Hash algorithms under the crypto API
+ *
+ * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_HASH_H
+#define _CRYPTO_HASH_H
+
+#include <linux/crypto.h>
+
+struct shash_desc;
+
+struct shash_alg {
+ int (*init)(struct shash_desc *desc);
+ int (*update)(struct shash_desc *desc, const u8 *data, unsigned len);
+ int (*final)(struct shash_desc *desc, u8 *out);
+ int (*finup)(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out);
+ int (*digest)(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out);
+
+ unsigned descsize;
+ unsigned digestsize;
+ struct crypto_alg base;
+};
+
+int crypto_register_shash(struct shash_alg *alg);
+
+struct crypto_shash {
+ unsigned descsize;
+ struct crypto_tfm base;
+};
+
+struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
+ u32 mask);
+
+static inline void crypto_free_shash(struct crypto_shash *tfm)
+{
+ kfree(tfm);
+}
+
+static inline struct shash_alg *crypto_shash_alg(struct crypto_shash *tfm)
+{
+ return container_of(tfm->base.alg, struct shash_alg, base);
+}
+
+static inline unsigned crypto_shash_digestsize(struct crypto_shash *tfm)
+{
+ return crypto_shash_alg(tfm)->digestsize;
+}
+
+static inline unsigned crypto_shash_descsize(struct crypto_shash *tfm)
+{
+ return tfm->descsize;
+}
+
+struct shash_desc {
+ struct crypto_shash *tfm;
+ u32 flags;
+
+ void *ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+#define SHASH_DESC_ON_STACK(shash, tfm) \
+ char __##shash##_desc[sizeof(struct shash_desc) + \
+ crypto_shash_descsize(tfm)] CRYPTO_MINALIGN_ATTR; \
+ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc
+
+static inline int crypto_shash_init(struct shash_desc *desc)
+{
+ return crypto_shash_alg(desc->tfm)->init(desc);
+}
+
+static inline int crypto_shash_update(struct shash_desc *desc,
+ const u8 *data, unsigned len)
+{
+ return crypto_shash_alg(desc->tfm)->update(desc, data, len);
+}
+
+static inline int crypto_shash_final(struct shash_desc *desc, u8 *out)
+{
+ return crypto_shash_alg(desc->tfm)->final(desc, out);
+}
+
+static inline int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out);
+}
+
+static inline int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_alg(desc->tfm)->digest(desc, data, len, out);
+}
+
+#endif /* _CRYPTO_HASH_H */
diff --git a/c_src/include/crypto/poly1305.h b/c_src/include/crypto/poly1305.h
new file mode 100644
index 00000000..9fcfbfeb
--- /dev/null
+++ b/c_src/include/crypto/poly1305.h
@@ -0,0 +1,13 @@
+/*
+ * Common values for the Poly1305 algorithm
+ */
+
+#ifndef _CRYPTO_POLY1305_H
+#define _CRYPTO_POLY1305_H
+
+#include <sodium/crypto_onetimeauth_poly1305.h>
+
+#define POLY1305_KEY_SIZE crypto_onetimeauth_poly1305_KEYBYTES
+#define POLY1305_DIGEST_SIZE crypto_onetimeauth_poly1305_BYTES
+
+#endif
diff --git a/c_src/include/crypto/sha2.h b/c_src/include/crypto/sha2.h
new file mode 100644
index 00000000..8a46202b
--- /dev/null
+++ b/c_src/include/crypto/sha2.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for SHA algorithms
+ */
+
+#ifndef _CRYPTO_SHA_H
+#define _CRYPTO_SHA_H
+
+#include <linux/types.h>
+
+#define SHA1_DIGEST_SIZE 20
+#define SHA1_BLOCK_SIZE 64
+
+#define SHA224_DIGEST_SIZE 28
+#define SHA224_BLOCK_SIZE 64
+
+#define SHA256_DIGEST_SIZE 32
+#define SHA256_BLOCK_SIZE 64
+
+#define SHA384_DIGEST_SIZE 48
+#define SHA384_BLOCK_SIZE 128
+
+#define SHA512_DIGEST_SIZE 64
+#define SHA512_BLOCK_SIZE 128
+
+#define SHA1_H0 0x67452301UL
+#define SHA1_H1 0xefcdab89UL
+#define SHA1_H2 0x98badcfeUL
+#define SHA1_H3 0x10325476UL
+#define SHA1_H4 0xc3d2e1f0UL
+
+#define SHA224_H0 0xc1059ed8UL
+#define SHA224_H1 0x367cd507UL
+#define SHA224_H2 0x3070dd17UL
+#define SHA224_H3 0xf70e5939UL
+#define SHA224_H4 0xffc00b31UL
+#define SHA224_H5 0x68581511UL
+#define SHA224_H6 0x64f98fa7UL
+#define SHA224_H7 0xbefa4fa4UL
+
+#define SHA256_H0 0x6a09e667UL
+#define SHA256_H1 0xbb67ae85UL
+#define SHA256_H2 0x3c6ef372UL
+#define SHA256_H3 0xa54ff53aUL
+#define SHA256_H4 0x510e527fUL
+#define SHA256_H5 0x9b05688cUL
+#define SHA256_H6 0x1f83d9abUL
+#define SHA256_H7 0x5be0cd19UL
+
+#define SHA384_H0 0xcbbb9d5dc1059ed8ULL
+#define SHA384_H1 0x629a292a367cd507ULL
+#define SHA384_H2 0x9159015a3070dd17ULL
+#define SHA384_H3 0x152fecd8f70e5939ULL
+#define SHA384_H4 0x67332667ffc00b31ULL
+#define SHA384_H5 0x8eb44a8768581511ULL
+#define SHA384_H6 0xdb0c2e0d64f98fa7ULL
+#define SHA384_H7 0x47b5481dbefa4fa4ULL
+
+#define SHA512_H0 0x6a09e667f3bcc908ULL
+#define SHA512_H1 0xbb67ae8584caa73bULL
+#define SHA512_H2 0x3c6ef372fe94f82bULL
+#define SHA512_H3 0xa54ff53a5f1d36f1ULL
+#define SHA512_H4 0x510e527fade682d1ULL
+#define SHA512_H5 0x9b05688c2b3e6c1fULL
+#define SHA512_H6 0x1f83d9abfb41bd6bULL
+#define SHA512_H7 0x5be0cd19137e2179ULL
+
+extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE];
+
+extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE];
+
+extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE];
+
+extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE];
+
+extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE];
+
+struct sha1_state {
+ u32 state[SHA1_DIGEST_SIZE / 4];
+ u64 count;
+ u8 buffer[SHA1_BLOCK_SIZE];
+};
+
+struct sha256_state {
+ u32 state[SHA256_DIGEST_SIZE / 4];
+ u64 count;
+ u8 buf[SHA256_BLOCK_SIZE];
+};
+
+struct sha512_state {
+ u64 state[SHA512_DIGEST_SIZE / 8];
+ u64 count[2];
+ u8 buf[SHA512_BLOCK_SIZE];
+};
+
+struct shash_desc;
+
+extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash);
+
+extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash);
+
+extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash);
+#endif
diff --git a/c_src/include/crypto/skcipher.h b/c_src/include/crypto/skcipher.h
new file mode 100644
index 00000000..5989855d
--- /dev/null
+++ b/c_src/include/crypto/skcipher.h
@@ -0,0 +1,126 @@
+/*
+ * Symmetric key ciphers.
+ *
+ * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_SKCIPHER_H
+#define _CRYPTO_SKCIPHER_H
+
+#include <linux/crypto.h>
+
+struct crypto_skcipher;
+struct skcipher_request;
+
+struct skcipher_alg {
+ struct crypto_alg base;
+};
+
+int crypto_register_skcipher(struct skcipher_alg *alg);
+
+struct crypto_skcipher {
+ int (*setkey)(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen);
+ int (*encrypt)(struct skcipher_request *req);
+ int (*decrypt)(struct skcipher_request *req);
+
+ unsigned ivsize;
+ unsigned keysize;
+
+ struct crypto_tfm base;
+};
+
+struct crypto_sync_skcipher {
+ struct crypto_skcipher base;
+};
+
+struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
+ u32 type, u32 mask);
+
+static inline struct crypto_sync_skcipher *
+crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask)
+{
+ return (void *) crypto_alloc_skcipher(alg_name, type, mask);
+}
+
+static inline void crypto_free_skcipher(struct crypto_skcipher *tfm)
+{
+ kfree(tfm);
+}
+
+static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm)
+{
+ crypto_free_skcipher(&tfm->base);
+}
+
+struct skcipher_request {
+ unsigned cryptlen;
+ u8 *iv;
+
+ struct scatterlist *src;
+ struct scatterlist *dst;
+
+ struct crypto_tfm *tfm;
+};
+
+#define MAX_SYNC_SKCIPHER_REQSIZE 384
+#define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \
+ char __##name##_desc[sizeof(struct skcipher_request) + \
+ MAX_SYNC_SKCIPHER_REQSIZE + \
+ (!(sizeof((struct crypto_sync_skcipher *)1 == \
+ (typeof(tfm))1))) \
+ ] CRYPTO_MINALIGN_ATTR; \
+ struct skcipher_request *name = (void *)__##name##_desc
+
+static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return tfm->setkey(tfm, key, keylen);
+}
+
+static inline struct crypto_skcipher *crypto_skcipher_reqtfm(
+ struct skcipher_request *req)
+{
+ return container_of(req->tfm, struct crypto_skcipher, base);
+}
+
+static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
+{
+ return crypto_skcipher_reqtfm(req)->encrypt(req);
+}
+
+static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
+{
+ return crypto_skcipher_reqtfm(req)->decrypt(req);
+}
+
+static inline void skcipher_request_set_tfm(struct skcipher_request *req,
+ struct crypto_skcipher *tfm)
+{
+ req->tfm = &tfm->base;
+}
+
+static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req,
+ struct crypto_sync_skcipher *tfm)
+{
+ skcipher_request_set_tfm(req, &tfm->base);
+}
+
+static inline void skcipher_request_set_crypt(
+ struct skcipher_request *req,
+ struct scatterlist *src, struct scatterlist *dst,
+ unsigned int cryptlen, void *iv)
+{
+ req->src = src;
+ req->dst = dst;
+ req->cryptlen = cryptlen;
+ req->iv = iv;
+}
+
+#endif /* _CRYPTO_SKCIPHER_H */
diff --git a/c_src/include/keys/user-type.h b/c_src/include/keys/user-type.h
new file mode 100644
index 00000000..a7a2ee45
--- /dev/null
+++ b/c_src/include/keys/user-type.h
@@ -0,0 +1,6 @@
+#ifndef _KEYS_USER_TYPE_H
+#define _KEYS_USER_TYPE_H
+
+#include <linux/key.h>
+
+#endif /* _KEYS_USER_TYPE_H */
diff --git a/c_src/include/linux/atomic.h b/c_src/include/linux/atomic.h
new file mode 100644
index 00000000..5313f850
--- /dev/null
+++ b/c_src/include/linux/atomic.h
@@ -0,0 +1,349 @@
+#ifndef __TOOLS_LINUX_ATOMIC_H
+#define __TOOLS_LINUX_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+typedef struct {
+ long counter;
+} atomic_long_t;
+
+typedef struct {
+ u64 counter;
+} atomic64_t;
+
+#ifndef C11_ATOMICS
+
+#include <urcu/uatomic.h>
+
+#if (CAA_BITS_PER_LONG != 64)
+#define ATOMIC64_SPINLOCK
+#endif
+
+#define __ATOMIC_READ(p) uatomic_read(p)
+#define __ATOMIC_SET(p, v) uatomic_set(p, v)
+#define __ATOMIC_ADD_RETURN(v, p) uatomic_add_return(p, v)
+#define __ATOMIC_SUB_RETURN(v, p) uatomic_sub_return(p, v)
+#define __ATOMIC_ADD(v, p) uatomic_add(p, v)
+#define __ATOMIC_SUB(v, p) uatomic_sub(p, v)
+#define __ATOMIC_INC(p) uatomic_inc(p)
+#define __ATOMIC_DEC(p) uatomic_dec(p)
+#define __ATOMIC_AND(v, p) uatomic_and(p, v)
+#define __ATOMIC_OR(v, p) uatomic_or(p, v)
+
+#define xchg(p, v) uatomic_xchg(p, v)
+#define xchg_acquire(p, v) uatomic_xchg(p, v)
+#define cmpxchg(p, old, new) uatomic_cmpxchg(p, old, new)
+#define cmpxchg_acquire(p, old, new) uatomic_cmpxchg(p, old, new)
+#define cmpxchg_release(p, old, new) uatomic_cmpxchg(p, old, new)
+
+#define smp_mb__before_atomic() cmm_smp_mb__before_uatomic_add()
+#define smp_mb__after_atomic() cmm_smp_mb__after_uatomic_add()
+#define smp_wmb() cmm_smp_wmb()
+#define smp_rmb() cmm_smp_rmb()
+#define smp_mb() cmm_smp_mb()
+#define smp_read_barrier_depends() cmm_smp_read_barrier_depends()
+#define smp_acquire__after_ctrl_dep() cmm_smp_mb()
+
+#else /* C11_ATOMICS */
+
+#define __ATOMIC_READ(p) __atomic_load_n(p, __ATOMIC_RELAXED)
+#define __ATOMIC_SET(p, v) __atomic_store_n(p, v, __ATOMIC_RELAXED)
+#define __ATOMIC_ADD_RETURN(v, p) __atomic_add_fetch(p, v, __ATOMIC_RELAXED)
+#define __ATOMIC_ADD_RETURN_RELEASE(v, p) \
+ __atomic_add_fetch(p, v, __ATOMIC_RELEASE)
+#define __ATOMIC_SUB_RETURN(v, p) __atomic_sub_fetch(p, v, __ATOMIC_RELAXED)
+#define __ATOMIC_SUB_RETURN_RELEASE(v, p) \
+ __atomic_sub_fetch(p, v, __ATOMIC_RELEASE)
+#define __ATOMIC_AND(p) __atomic_and_fetch(p, v, __ATOMIC_RELAXED)
+#define __ATOMIC_OR(p) __atomic_or_fetch(p, v, __ATOMIC_RELAXED)
+
+#define xchg(p, v) __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST)
+#define xchg_acquire(p, v) __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE)
+
+#define cmpxchg(p, old, new) \
+({ \
+ typeof(*(p)) __old = (old); \
+ \
+ __atomic_compare_exchange_n((p), &__old, new, false, \
+ __ATOMIC_SEQ_CST, \
+ __ATOMIC_SEQ_CST); \
+ __old; \
+})
+
+#define cmpxchg_acquire(p, old, new) \
+({ \
+ typeof(*(p)) __old = (old); \
+ \
+ __atomic_compare_exchange_n((p), &__old, new, false, \
+ __ATOMIC_ACQUIRE, \
+ __ATOMIC_ACQUIRE); \
+ __old; \
+})
+
+#define cmpxchg_release(p, old, new) \
+({ \
+ typeof(*(p)) __old = (old); \
+ \
+ __atomic_compare_exchange_n((p), &__old, new, false, \
+ __ATOMIC_RELEASE, \
+ __ATOMIC_RELEASE); \
+ __old; \
+})
+
+#define smp_mb__before_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_mb__after_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_wmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define smp_read_barrier_depends()
+
+#endif
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p1; \
+})
+
+#define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+
+/* atomic interface: */
+
+#ifndef __ATOMIC_ADD
+#define __ATOMIC_ADD(i, v) __ATOMIC_ADD_RETURN(i, v)
+#endif
+
+#ifndef __ATOMIC_ADD_RETURN_RELEASE
+#define __ATOMIC_ADD_RETURN_RELEASE(i, v) \
+ ({ smp_mb__before_atomic(); __ATOMIC_ADD_RETURN(i, v); })
+#endif
+
+#ifndef __ATOMIC_SUB_RETURN_RELEASE
+#define __ATOMIC_SUB_RETURN_RELEASE(i, v) \
+ ({ smp_mb__before_atomic(); __ATOMIC_SUB_RETURN(i, v); })
+#endif
+
+#ifndef __ATOMIC_SUB
+#define __ATOMIC_SUB(i, v) __ATOMIC_SUB_RETURN(i, v)
+#endif
+
+#ifndef __ATOMIC_INC_RETURN
+#define __ATOMIC_INC_RETURN(v) __ATOMIC_ADD_RETURN(1, v)
+#endif
+
+#ifndef __ATOMIC_DEC_RETURN
+#define __ATOMIC_DEC_RETURN(v) __ATOMIC_SUB_RETURN(1, v)
+#endif
+
+#ifndef __ATOMIC_INC
+#define __ATOMIC_INC(v) __ATOMIC_ADD(1, v)
+#endif
+
+#ifndef __ATOMIC_DEC
+#define __ATOMIC_DEC(v) __ATOMIC_SUB(1, v)
+#endif
+
+#define DEF_ATOMIC_OPS(a_type, i_type) \
+static inline i_type a_type##_read(const a_type##_t *v) \
+{ \
+ return __ATOMIC_READ(&v->counter); \
+} \
+ \
+static inline i_type a_type##_read_acquire(const a_type##_t *v) \
+{ \
+ i_type ret = __ATOMIC_READ(&v->counter); \
+ smp_mb__after_atomic(); \
+ return ret; \
+} \
+ \
+static inline void a_type##_set(a_type##_t *v, i_type i) \
+{ \
+ return __ATOMIC_SET(&v->counter, i); \
+} \
+ \
+static inline i_type a_type##_add_return(i_type i, a_type##_t *v) \
+{ \
+ return __ATOMIC_ADD_RETURN(i, &v->counter); \
+} \
+ \
+static inline i_type a_type##_add_return_release(i_type i, a_type##_t *v)\
+{ \
+ return __ATOMIC_ADD_RETURN_RELEASE(i, &v->counter); \
+} \
+ \
+static inline i_type a_type##_sub_return_release(i_type i, a_type##_t *v)\
+{ \
+ return __ATOMIC_SUB_RETURN_RELEASE(i, &v->counter); \
+} \
+ \
+static inline i_type a_type##_sub_return(i_type i, a_type##_t *v) \
+{ \
+ return __ATOMIC_SUB_RETURN(i, &v->counter); \
+} \
+ \
+static inline void a_type##_add(i_type i, a_type##_t *v) \
+{ \
+ __ATOMIC_ADD(i, &v->counter); \
+} \
+ \
+static inline void a_type##_sub(i_type i, a_type##_t *v) \
+{ \
+ __ATOMIC_SUB(i, &v->counter); \
+} \
+ \
+static inline i_type a_type##_inc_return(a_type##_t *v) \
+{ \
+ return __ATOMIC_INC_RETURN(&v->counter); \
+} \
+ \
+static inline i_type a_type##_dec_return(a_type##_t *v) \
+{ \
+ return __ATOMIC_DEC_RETURN(&v->counter); \
+} \
+ \
+static inline i_type a_type##_dec_return_release(a_type##_t *v) \
+{ \
+ return __ATOMIC_SUB_RETURN_RELEASE(1, &v->counter); \
+} \
+ \
+static inline void a_type##_inc(a_type##_t *v) \
+{ \
+ __ATOMIC_INC(&v->counter); \
+} \
+ \
+static inline void a_type##_dec(a_type##_t *v) \
+{ \
+ __ATOMIC_DEC(&v->counter); \
+} \
+ \
+static inline bool a_type##_add_negative(i_type i, a_type##_t *v) \
+{ \
+ return __ATOMIC_ADD_RETURN(i, &v->counter) < 0; \
+} \
+ \
+static inline bool a_type##_sub_and_test(i_type i, a_type##_t *v) \
+{ \
+ return __ATOMIC_SUB_RETURN(i, &v->counter) == 0; \
+} \
+ \
+static inline bool a_type##_inc_and_test(a_type##_t *v) \
+{ \
+ return __ATOMIC_INC_RETURN(&v->counter) == 0; \
+} \
+ \
+static inline bool a_type##_dec_and_test(a_type##_t *v) \
+{ \
+ return __ATOMIC_DEC_RETURN(&v->counter) == 0; \
+} \
+ \
+static inline i_type a_type##_add_unless(a_type##_t *v, i_type a, i_type u)\
+{ \
+ i_type old, c = __ATOMIC_READ(&v->counter); \
+ while (c != u && (old = cmpxchg(&v->counter, c, c + a)) != c) \
+ c = old; \
+ return c; \
+} \
+ \
+static inline bool a_type##_inc_not_zero(a_type##_t *v) \
+{ \
+ return a_type##_add_unless(v, 1, 0); \
+} \
+ \
+static inline void a_type##_and(i_type a, a_type##_t *v) \
+{ \
+ __ATOMIC_AND(a, v); \
+} \
+ \
+static inline void a_type##_or(i_type a, a_type##_t *v) \
+{ \
+ __ATOMIC_OR(a, v); \
+} \
+ \
+static inline i_type a_type##_xchg(a_type##_t *v, i_type i) \
+{ \
+ return xchg(&v->counter, i); \
+} \
+ \
+static inline i_type a_type##_cmpxchg(a_type##_t *v, i_type old, i_type new)\
+{ \
+ return cmpxchg(&v->counter, old, new); \
+} \
+ \
+static inline i_type a_type##_cmpxchg_acquire(a_type##_t *v, i_type old, i_type new)\
+{ \
+ return cmpxchg_acquire(&v->counter, old, new); \
+} \
+ \
+static inline bool a_type##_try_cmpxchg_acquire(a_type##_t *v, i_type *old, i_type new)\
+{ \
+ i_type prev = *old; \
+ *old = cmpxchg_acquire(&v->counter, *old, new); \
+ return prev == *old; \
+}
+
+DEF_ATOMIC_OPS(atomic, int)
+DEF_ATOMIC_OPS(atomic_long, long)
+
+#ifndef ATOMIC64_SPINLOCK
+DEF_ATOMIC_OPS(atomic64, s64)
+#else
+s64 atomic64_read(const atomic64_t *v);
+static inline s64 atomic64_read_acquire(const atomic64_t *v)
+{
+ s64 ret = atomic64_read(v);
+ smp_mb__after_atomic();
+ return ret;
+}
+
+void atomic64_set(atomic64_t *v, s64);
+
+s64 atomic64_add_return(s64, atomic64_t *);
+s64 atomic64_sub_return(s64, atomic64_t *);
+void atomic64_add(s64, atomic64_t *);
+void atomic64_sub(s64, atomic64_t *);
+
+s64 atomic64_xchg(atomic64_t *, s64);
+s64 atomic64_cmpxchg(atomic64_t *, s64, s64);
+
+#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v) atomic64_add(1LL, (v))
+#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v) atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
+
+static inline s64 atomic64_add_return_release(s64 i, atomic64_t *v)
+{
+ smp_mb__before_atomic();
+ return atomic64_add_return(i, v);
+}
+
+static inline s64 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+{
+ return atomic64_cmpxchg(v, old, new);
+}
+
+static inline s64 atomic64_sub_return_release(s64 i, atomic64_t *v)
+{
+ smp_mb__before_atomic();
+ return atomic64_sub_return(i, v);
+}
+
+#endif
+
+#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/c_src/include/linux/backing-dev-defs.h b/c_src/include/linux/backing-dev-defs.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/linux/backing-dev-defs.h
diff --git a/c_src/include/linux/backing-dev.h b/c_src/include/linux/backing-dev.h
new file mode 100644
index 00000000..d8a86b45
--- /dev/null
+++ b/c_src/include/linux/backing-dev.h
@@ -0,0 +1,45 @@
+#ifndef _LINUX_BACKING_DEV_H
+#define _LINUX_BACKING_DEV_H
+
+#include <linux/list.h>
+
+typedef int (congested_fn)(void *, int);
+
+enum wb_congested_state {
+ WB_async_congested, /* The async (write) queue is getting full */
+ WB_sync_congested, /* The sync queue is getting full */
+};
+
+struct backing_dev_info {
+ struct list_head bdi_list;
+ unsigned ra_pages;
+ unsigned capabilities;
+
+ congested_fn *congested_fn;
+ void *congested_data;
+};
+
+#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
+#define BDI_CAP_NO_WRITEBACK 0x00000002
+#define BDI_CAP_NO_ACCT_WB 0x00000004
+#define BDI_CAP_STABLE_WRITES 0x00000008
+#define BDI_CAP_STRICTLIMIT 0x00000010
+#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
+
+static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
+{
+ return 0;
+}
+
+static inline int __must_check bdi_setup_and_register(struct backing_dev_info *bdi,
+ char *name)
+{
+ bdi->capabilities = 0;
+ return 0;
+}
+
+static inline void bdi_destroy(struct backing_dev_info *bdi) {}
+
+#define VM_MAX_READAHEAD 128 /* kbytes */
+
+#endif /* _LINUX_BACKING_DEV_H */
diff --git a/c_src/include/linux/bio.h b/c_src/include/linux/bio.h
new file mode 100644
index 00000000..1f8acca2
--- /dev/null
+++ b/c_src/include/linux/bio.h
@@ -0,0 +1,434 @@
+/*
+ * 2.5 block I/O model
+ *
+ * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+#ifndef __LINUX_BIO_H
+#define __LINUX_BIO_H
+
+#include <linux/mempool.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+
+#include <linux/blkdev.h>
+#include <linux/blk_types.h>
+#include <linux/workqueue.h>
+
+#define bio_prio(bio) (bio)->bi_ioprio
+#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio)
+
+#define bio_iter_iovec(bio, iter) \
+ bvec_iter_bvec((bio)->bi_io_vec, (iter))
+
+#define bio_iter_page(bio, iter) \
+ bvec_iter_page((bio)->bi_io_vec, (iter))
+#define bio_iter_len(bio, iter) \
+ bvec_iter_len((bio)->bi_io_vec, (iter))
+#define bio_iter_offset(bio, iter) \
+ bvec_iter_offset((bio)->bi_io_vec, (iter))
+
+#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter)
+#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter)
+#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter)
+
+#define bio_multiple_segments(bio) \
+ ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
+
+#define bvec_iter_sectors(iter) ((iter).bi_size >> 9)
+#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter)))
+
+#define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter)
+#define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter)
+
+static inline bool bio_has_data(struct bio *bio)
+{
+ if (bio &&
+ bio->bi_iter.bi_size &&
+ bio_op(bio) != REQ_OP_DISCARD &&
+ bio_op(bio) != REQ_OP_SECURE_ERASE)
+ return true;
+
+ return false;
+}
+
+static inline bool bio_no_advance_iter(struct bio *bio)
+{
+ return bio_op(bio) == REQ_OP_DISCARD ||
+ bio_op(bio) == REQ_OP_SECURE_ERASE ||
+ bio_op(bio) == REQ_OP_WRITE_SAME;
+}
+
+static inline bool bio_is_rw(struct bio *bio)
+{
+ if (!bio_has_data(bio))
+ return false;
+
+ if (bio_no_advance_iter(bio))
+ return false;
+
+ return true;
+}
+
+static inline bool bio_mergeable(struct bio *bio)
+{
+ if (bio->bi_opf & REQ_NOMERGE_FLAGS)
+ return false;
+
+ return true;
+}
+
+static inline unsigned int bio_cur_bytes(struct bio *bio)
+{
+ if (bio_has_data(bio))
+ return bio_iovec(bio).bv_len;
+ else /* dataless requests such as discard */
+ return bio->bi_iter.bi_size;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+ if (bio_has_data(bio))
+ return page_address(bio_page(bio)) + bio_offset(bio);
+
+ return NULL;
+}
+
+#define __bio_kmap_atomic(bio, iter) \
+ (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \
+ bio_iter_iovec((bio), (iter)).bv_offset)
+
+#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
+
+static inline struct bio_vec *bio_next_segment(const struct bio *bio,
+ struct bvec_iter_all *iter)
+{
+ if (iter->idx >= bio->bi_vcnt)
+ return NULL;
+
+ return &bio->bi_io_vec[iter->idx];
+}
+
+#define bio_for_each_segment_all(bvl, bio, iter) \
+ for ((iter).idx = 0; (bvl = bio_next_segment((bio), &(iter))); (iter).idx++)
+
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+ unsigned bytes)
+{
+ iter->bi_sector += bytes >> 9;
+
+ if (bio_no_advance_iter(bio))
+ iter->bi_size -= bytes;
+ else
+ bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+}
+
+#define __bio_for_each_segment(bvl, bio, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bvl = bio_iter_iovec((bio), (iter))), 1); \
+ bio_advance_iter((bio), &(iter), (bvl).bv_len))
+
+#define bio_for_each_segment(bvl, bio, iter) \
+ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
+
+#define __bio_for_each_bvec(bvl, bio, iter, start) \
+ __bio_for_each_segment(bvl, bio, iter, start)
+
+#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
+
+static inline unsigned bio_segments(struct bio *bio)
+{
+ unsigned segs = 0;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+
+ /*
+ * We special case discard/write same, because they interpret bi_size
+ * differently:
+ */
+
+ if (bio_op(bio) == REQ_OP_DISCARD)
+ return 1;
+
+ if (bio_op(bio) == REQ_OP_SECURE_ERASE)
+ return 1;
+
+ if (bio_op(bio) == REQ_OP_WRITE_SAME)
+ return 1;
+
+ bio_for_each_segment(bv, bio, iter)
+ segs++;
+
+ return segs;
+}
+
+static inline void bio_get(struct bio *bio)
+{
+ bio->bi_flags |= (1 << BIO_REFFED);
+ smp_mb__before_atomic();
+ atomic_inc(&bio->__bi_cnt);
+}
+
+static inline bool bio_flagged(struct bio *bio, unsigned int bit)
+{
+ return (bio->bi_flags & (1U << bit)) != 0;
+}
+
+static inline void bio_set_flag(struct bio *bio, unsigned int bit)
+{
+ bio->bi_flags |= (1U << bit);
+}
+
+static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
+{
+ bio->bi_flags &= ~(1U << bit);
+}
+
+extern struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs);
+
+static inline struct bio *bio_next_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
+{
+ if (sectors >= bio_sectors(bio))
+ return bio;
+
+ return bio_split(bio, sectors, gfp, bs);
+}
+
+struct bio_set {
+ unsigned int front_pad;
+ unsigned int back_pad;
+ mempool_t bio_pool;
+ mempool_t bvec_pool;
+};
+
+
+static inline void bioset_free(struct bio_set *bs)
+{
+ kfree(bs);
+}
+
+void bioset_exit(struct bio_set *);
+int bioset_init(struct bio_set *, unsigned, unsigned, int);
+
+extern struct bio_set *bioset_create(unsigned int, unsigned int);
+extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+enum {
+ BIOSET_NEED_BVECS = 1 << 0,
+ BIOSET_NEED_RESCUER = 1 << 1,
+};
+
+struct bio *bio_alloc_bioset(struct block_device *, unsigned,
+ blk_opf_t, gfp_t, struct bio_set *);
+extern void bio_put(struct bio *);
+
+int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
+
+struct bio *bio_alloc_clone(struct block_device *, struct bio *,
+ gfp_t, struct bio_set *);
+
+struct bio *bio_kmalloc(unsigned int, gfp_t);
+
+extern void bio_endio(struct bio *);
+
+extern void bio_advance(struct bio *, unsigned);
+
+extern void bio_reset(struct bio *, struct block_device *, unsigned);
+void bio_chain(struct bio *, struct bio *);
+
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter);
+extern void bio_copy_data(struct bio *dst, struct bio *src);
+
+void bio_free_pages(struct bio *bio);
+
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+
+static inline void zero_fill_bio(struct bio *bio)
+{
+ zero_fill_bio_iter(bio, bio->bi_iter);
+}
+
+#define bio_set_dev(bio, bdev) \
+do { \
+ (bio)->bi_bdev = (bdev); \
+} while (0)
+
+#define bio_copy_dev(dst, src) \
+do { \
+ (dst)->bi_bdev = (src)->bi_bdev; \
+} while (0)
+
+static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
+{
+ return page_address(bvec->bv_page) + bvec->bv_offset;
+}
+
+static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
+{
+ *flags = 0;
+}
+
+static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
+ unsigned long *flags)
+{
+ return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
+}
+#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
+
+#define bio_kmap_irq(bio, flags) \
+ __bio_kmap_irq((bio), (bio)->bi_iter, (flags))
+#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
+
+struct bio_list {
+ struct bio *head;
+ struct bio *tail;
+};
+
+static inline int bio_list_empty(const struct bio_list *bl)
+{
+ return bl->head == NULL;
+}
+
+static inline void bio_list_init(struct bio_list *bl)
+{
+ bl->head = bl->tail = NULL;
+}
+
+#define BIO_EMPTY_LIST { NULL, NULL }
+
+#define bio_list_for_each(bio, bl) \
+ for (bio = (bl)->head; bio; bio = bio->bi_next)
+
+static inline unsigned bio_list_size(const struct bio_list *bl)
+{
+ unsigned sz = 0;
+ struct bio *bio;
+
+ bio_list_for_each(bio, bl)
+ sz++;
+
+ return sz;
+}
+
+static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = NULL;
+
+ if (bl->tail)
+ bl->tail->bi_next = bio;
+ else
+ bl->head = bio;
+
+ bl->tail = bio;
+}
+
+static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = bl->head;
+
+ bl->head = bio;
+
+ if (!bl->tail)
+ bl->tail = bio;
+}
+
+static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
+{
+ if (!bl2->head)
+ return;
+
+ if (bl->tail)
+ bl->tail->bi_next = bl2->head;
+ else
+ bl->head = bl2->head;
+
+ bl->tail = bl2->tail;
+}
+
+static inline void bio_list_merge_head(struct bio_list *bl,
+ struct bio_list *bl2)
+{
+ if (!bl2->head)
+ return;
+
+ if (bl->head)
+ bl2->tail->bi_next = bl->head;
+ else
+ bl->tail = bl2->tail;
+
+ bl->head = bl2->head;
+}
+
+static inline struct bio *bio_list_peek(struct bio_list *bl)
+{
+ return bl->head;
+}
+
+static inline struct bio *bio_list_pop(struct bio_list *bl)
+{
+ struct bio *bio = bl->head;
+
+ if (bio) {
+ bl->head = bl->head->bi_next;
+ if (!bl->head)
+ bl->tail = NULL;
+
+ bio->bi_next = NULL;
+ }
+
+ return bio;
+}
+
+static inline struct bio *bio_list_get(struct bio_list *bl)
+{
+ struct bio *bio = bl->head;
+
+ bl->head = bl->tail = NULL;
+
+ return bio;
+}
+
+/*
+ * Increment chain count for the bio. Make sure the CHAIN flag update
+ * is visible before the raised count.
+ */
+static inline void bio_inc_remaining(struct bio *bio)
+{
+ bio_set_flag(bio, BIO_CHAIN);
+ smp_mb__before_atomic();
+ atomic_inc(&bio->__bi_remaining);
+}
+
+static inline void bio_init(struct bio *bio,
+ struct block_device *bdev,
+ struct bio_vec *table,
+ unsigned short max_vecs,
+ unsigned int opf)
+{
+ memset(bio, 0, sizeof(*bio));
+ bio->bi_bdev = bdev;
+ bio->bi_opf = opf;
+ atomic_set(&bio->__bi_remaining, 1);
+ atomic_set(&bio->__bi_cnt, 1);
+
+ bio->bi_io_vec = table;
+ bio->bi_max_vecs = max_vecs;
+}
+
+#endif /* __LINUX_BIO_H */
diff --git a/c_src/include/linux/bit_spinlock.h b/c_src/include/linux/bit_spinlock.h
new file mode 100644
index 00000000..873f08c2
--- /dev/null
+++ b/c_src/include/linux/bit_spinlock.h
@@ -0,0 +1,84 @@
+#ifndef __LINUX_BIT_SPINLOCK_H
+#define __LINUX_BIT_SPINLOCK_H
+
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <linux/futex.h>
+#include <urcu/futex.h>
+
+/*
+ * The futex wait op wants an explicit 32-bit address and value. If the bitmap
+ * used for the spinlock is 64-bit, cast down and pass the right 32-bit region
+ * for the in-kernel checks. The value is the copy that has already been read
+ * from the atomic op.
+ *
+ * The futex wake op interprets the value as the number of waiters to wake (up
+ * to INT_MAX), so pass that along directly.
+ */
+static inline void do_futex(int nr, unsigned long *addr, unsigned long v, int futex_flags)
+{
+ u32 *addr32 = (u32 *) addr;
+ u32 *v32 = (u32 *) &v;
+ int shift = 0;
+
+ futex_flags |= FUTEX_PRIVATE_FLAG;
+
+#if BITS_PER_LONG == 64
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ shift = (nr >= 32) ? 1 : 0;
+#else
+ shift = (nr < 32) ? 1 : 0;
+#endif
+#endif
+ if (shift) {
+ addr32 += shift;
+ v32 += shift;
+ }
+ /*
+ * The shift to determine the futex address may have cast away a
+ * literal wake count value. The value is capped to INT_MAX and thus
+ * always in the low bytes of v regardless of bit nr. Copy in the wake
+ * count to whatever 32-bit range was selected.
+ */
+ if (futex_flags == FUTEX_WAKE_PRIVATE)
+ *v32 = (u32) v;
+ futex(addr32, futex_flags, *v32, NULL, NULL, 0);
+}
+
+static inline void bit_spin_lock(int nr, unsigned long *_addr)
+{
+ unsigned long mask;
+ unsigned long *addr = _addr + (nr / BITS_PER_LONG);
+ unsigned long v;
+
+ nr &= BITS_PER_LONG - 1;
+ mask = 1UL << nr;
+
+ while (1) {
+ v = __atomic_fetch_or(addr, mask, __ATOMIC_ACQUIRE);
+ if (!(v & mask))
+ break;
+
+ do_futex(nr, addr, v, FUTEX_WAIT);
+ }
+}
+
+static inline void bit_spin_wake(int nr, unsigned long *_addr)
+{
+ do_futex(nr, _addr, INT_MAX, FUTEX_WAKE);
+}
+
+static inline void bit_spin_unlock(int nr, unsigned long *_addr)
+{
+ unsigned long mask;
+ unsigned long *addr = _addr + (nr / BITS_PER_LONG);
+
+ nr &= BITS_PER_LONG - 1;
+ mask = 1UL << nr;
+
+ __atomic_and_fetch(addr, ~mask, __ATOMIC_RELEASE);
+ do_futex(nr, addr, INT_MAX, FUTEX_WAKE);
+}
+
+#endif /* __LINUX_BIT_SPINLOCK_H */
+
diff --git a/c_src/include/linux/bitmap.h b/c_src/include/linux/bitmap.h
new file mode 100644
index 00000000..db2dfdb2
--- /dev/null
+++ b/c_src/include/linux/bitmap.h
@@ -0,0 +1,146 @@
+#ifndef _PERF_BITOPS_H
+#define _PERF_BITOPS_H
+
+#include <string.h>
+#include <linux/bitops.h>
+#include <stdlib.h>
+
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+
+#define BITMAP_LAST_WORD_MASK(nbits) \
+( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+)
+
+#define small_const_nbits(nbits) \
+ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+
+static inline int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight_long(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+ return w;
+}
+
+static inline int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
+{
+ unsigned int k;
+ unsigned int lim = bits/BITS_PER_LONG;
+ unsigned long result = 0;
+
+ for (k = 0; k < lim; k++)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ if (bits % BITS_PER_LONG)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+ BITMAP_LAST_WORD_MASK(bits));
+ return result != 0;
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+ unsigned int bits)
+{
+ unsigned int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ dst[k] = ~src[k];
+
+ if (bits % BITS_PER_LONG)
+ dst[k] = ~src[k];
+}
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+ memset(dst, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+}
+
+static inline int bitmap_weight(const unsigned long *src, int nbits)
+{
+ if (small_const_nbits(nbits))
+ return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+ return __bitmap_weight(src, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_const_nbits(nbits))
+ *dst = *src1 | *src2;
+ else
+ __bitmap_or(dst, src1, src2, nbits);
+}
+
+static inline unsigned long *bitmap_alloc(int nbits)
+{
+ return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+}
+
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ return __bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline unsigned long _find_next_bit(const unsigned long *addr,
+ unsigned long nbits, unsigned long start, unsigned long invert)
+{
+ unsigned long tmp;
+
+ if (!nbits || start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+ /* Handle 1st word. */
+ tmp &= BITMAP_FIRST_WORD_MASK(start);
+ start = round_down(start, BITS_PER_LONG);
+
+ while (!tmp) {
+ start += BITS_PER_LONG;
+ if (start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+ }
+
+ return min(start + __ffs(tmp), nbits);
+}
+
+static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, size, offset, 0UL);
+}
+
+static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, size, offset, ~0UL);
+}
+
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+
+static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
+{
+ if (small_const_nbits(nbits))
+ return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+
+ return find_first_bit(src, nbits) == nbits;
+}
+
+#endif /* _PERF_BITOPS_H */
diff --git a/c_src/include/linux/bitops.h b/c_src/include/linux/bitops.h
new file mode 100644
index 00000000..758476b1
--- /dev/null
+++ b/c_src/include/linux/bitops.h
@@ -0,0 +1,286 @@
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#define _TOOLS_LINUX_BITOPS_H_
+
+#include <asm/types.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/page.h>
+
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+#ifndef BITS_PER_LONG
+# define BITS_PER_LONG __WORDSIZE
+#endif
+
+#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BITS_PER_BYTE 8
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
+
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline void set_bit(long nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ __atomic_or_fetch(p, mask, __ATOMIC_RELAXED);
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ *p &= ~mask;
+}
+
+static inline void clear_bit(long nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ __atomic_and_fetch(p, ~mask, __ATOMIC_RELAXED);
+}
+
+static inline int test_bit(long nr, const volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+
+ return (*p & mask) != 0;
+}
+
+static inline int __test_and_set_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long old;
+
+ old = *p;
+ *p = old | mask;
+
+ return (old & mask) != 0;
+}
+
+static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+ unsigned long old;
+
+ old = __atomic_fetch_or(p, mask, __ATOMIC_RELAXED);
+
+ return (old & mask) != 0;
+}
+
+static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+ unsigned long old;
+
+ old = __atomic_fetch_and(p, ~mask, __ATOMIC_RELAXED);
+
+ return (old & mask) != 0;
+}
+
+static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ __atomic_and_fetch(p, ~mask, __ATOMIC_RELEASE);
+}
+
+static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr);
+ unsigned long old;
+
+ old = __atomic_fetch_or(p, mask, __ATOMIC_ACQUIRE);
+
+ return (old & mask) != 0;
+}
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return __builtin_popcountl(w);
+}
+
+static inline unsigned long hweight64(u64 w)
+{
+ return __builtin_popcount((u32) w) +
+ __builtin_popcount(w >> 32);
+}
+
+static inline unsigned long hweight32(u32 w)
+{
+ return __builtin_popcount(w);
+}
+
+static inline unsigned long hweight8(unsigned long w)
+{
+ return __builtin_popcountl(w);
+}
+
+/**
+ * rol64 - rotate a 64-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 rol64(__u64 word, unsigned int shift)
+{
+ return (word << shift) | (word >> (64 - shift));
+}
+
+/**
+ * ror64 - rotate a 64-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 ror64(__u64 word, unsigned int shift)
+{
+ return (word >> shift) | (word << (64 - shift));
+}
+
+/**
+ * rol32 - rotate a 32-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/**
+ * ror32 - rotate a 32-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 ror32(__u32 word, unsigned int shift)
+{
+ return (word >> shift) | (word << (32 - shift));
+}
+
+/**
+ * rol16 - rotate a 16-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u16 rol16(__u16 word, unsigned int shift)
+{
+ return (word << shift) | (word >> (16 - shift));
+}
+
+/**
+ * ror16 - rotate a 16-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u16 ror16(__u16 word, unsigned int shift)
+{
+ return (word >> shift) | (word << (16 - shift));
+}
+
+/**
+ * rol8 - rotate an 8-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u8 rol8(__u8 word, unsigned int shift)
+{
+ return (word << shift) | (word >> (8 - shift));
+}
+
+/**
+ * ror8 - rotate an 8-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u8 ror8(__u8 word, unsigned int shift)
+{
+ return (word >> shift) | (word << (8 - shift));
+}
+
+static inline unsigned long __fls(unsigned long word)
+{
+ return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+static inline int fls(int x)
+{
+ return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+}
+
+static inline int fls64(__u64 x)
+{
+#if BITS_PER_LONG == 32
+ __u32 h = x >> 32;
+ if (h)
+ return fls(h) + 32;
+ return fls(x);
+#elif BITS_PER_LONG == 64
+ if (x == 0)
+ return 0;
+ return __fls(x) + 1;
+#endif
+}
+
+static inline unsigned fls_long(unsigned long l)
+{
+ if (sizeof(l) == 4)
+ return fls(l);
+ return fls64(l);
+}
+
+static inline unsigned long __ffs(unsigned long word)
+{
+ return __builtin_ctzl(word);
+}
+
+static inline unsigned long __ffs64(u64 word)
+{
+#if BITS_PER_LONG == 32
+ if (((u32)word) == 0UL)
+ return __ffs((u32)(word >> 32)) + 32;
+#elif BITS_PER_LONG != 64
+#error BITS_PER_LONG not 32 or 64
+#endif
+ return __ffs((unsigned long)word);
+}
+
+#define ffz(x) __ffs(~(x))
+
+static inline __attribute__((const))
+unsigned long rounddown_pow_of_two(unsigned long n)
+{
+ return 1UL << (fls_long(n) - 1);
+}
+
+#endif
diff --git a/c_src/include/linux/blk_types.h b/c_src/include/linux/blk_types.h
new file mode 100644
index 00000000..80560ab6
--- /dev/null
+++ b/c_src/include/linux/blk_types.h
@@ -0,0 +1,247 @@
+/*
+ * Block data types and constants. Directly include this file only to
+ * break include dependency loop.
+ */
+#ifndef __LINUX_BLK_TYPES_H
+#define __LINUX_BLK_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/types.h>
+#include <linux/bvec.h>
+#include <linux/kobject.h>
+
+struct bio_set;
+struct bio;
+typedef void (bio_end_io_t) (struct bio *);
+
+#define BDEVNAME_SIZE 32
+
+struct request_queue {
+ struct backing_dev_info *backing_dev_info;
+};
+
+struct gendisk {
+ struct backing_dev_info *bdi;
+ struct backing_dev_info __bdi;
+};
+
+struct hd_struct {
+ struct kobject kobj;
+};
+
+struct block_device {
+ struct kobject kobj;
+ dev_t bd_dev;
+ char name[BDEVNAME_SIZE];
+ struct inode *bd_inode;
+ struct request_queue queue;
+ void *bd_holder;
+ struct gendisk * bd_disk;
+ struct gendisk __bd_disk;
+ int bd_fd;
+};
+
+#define bdev_kobj(_bdev) (&((_bdev)->kobj))
+
+/*
+ * Block error status values. See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define BLK_STS_OK 0
+#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
+#define BLK_STS_NOSPC ((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
+#define BLK_STS_TARGET ((__force blk_status_t)5)
+#define BLK_STS_NEXUS ((__force blk_status_t)6)
+#define BLK_STS_MEDIUM ((__force blk_status_t)7)
+#define BLK_STS_PROTECTION ((__force blk_status_t)8)
+#define BLK_STS_RESOURCE ((__force blk_status_t)9)
+#define BLK_STS_IOERR ((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN ((__force blk_status_t)12)
+
+#define BIO_INLINE_VECS 4
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers and
+ * stacking drivers)
+ */
+struct bio {
+ struct bio *bi_next; /* request queue link */
+ struct block_device *bi_bdev;
+ blk_status_t bi_status;
+ unsigned int bi_opf; /* bottom bits req flags,
+ * top bits REQ_OP. Use
+ * accessors.
+ */
+ unsigned short bi_flags; /* status, command, etc */
+ unsigned short bi_ioprio;
+
+ struct bvec_iter bi_iter;
+
+ atomic_t __bi_remaining;
+
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+
+ unsigned short bi_vcnt; /* how many bio_vec's */
+
+ /*
+ * Everything starting with bi_max_vecs will be preserved by bio_reset()
+ */
+
+ unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
+
+ atomic_t __bi_cnt; /* pin count */
+
+ struct bio_vec *bi_io_vec; /* the actual vec list */
+
+ struct bio_set *bi_pool;
+
+ /*
+ * We can inline a number of vecs at the end of the bio, to avoid
+ * double allocations for a small number of bio_vecs. This member
+ * MUST obviously be kept at the very end of the bio.
+ */
+ struct bio_vec bi_inline_vecs[0];
+};
+
+#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
+
+/*
+ * bio flags
+ */
+#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
+#define BIO_CLONED 2 /* doesn't own data */
+#define BIO_BOUNCED 3 /* bio is a bounce bio */
+#define BIO_USER_MAPPED 4 /* contains user pages */
+#define BIO_NULL_MAPPED 5 /* contains invalid user pages */
+#define BIO_QUIET 6 /* Make BIO Quiet */
+#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */
+
+/*
+ * Flags starting here get preserved by bio_reset() - this includes
+ * BVEC_POOL_IDX()
+ */
+#define BIO_RESET_BITS 10
+
+/*
+ * We support 6 different bvec pools, the last one is magic in that it
+ * is backed by a mempool.
+ */
+#define BVEC_POOL_NR 6
+#define BVEC_POOL_MAX (BVEC_POOL_NR - 1)
+
+/*
+ * Top 4 bits of bio flags indicate the pool the bvecs came from. We add
+ * 1 to the actual index so that 0 indicates that there are no bvecs to be
+ * freed.
+ */
+#define BVEC_POOL_BITS (4)
+#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS)
+#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET)
+
+/*
+ * Operations and flags common to the bio and request structures.
+ * We use 8 bits for encoding the operation, and the remaining 24 for flags.
+ *
+ * The least significant bit of the operation number indicates the data
+ * transfer direction:
+ *
+ * - if the least significant bit is set transfers are TO the device
+ * - if the least significant bit is not set transfers are FROM the device
+ *
+ * If a operation does not transfer data the least significant bit has no
+ * meaning.
+ */
+#define REQ_OP_BITS 8
+#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS 24
+
+enum req_opf {
+ /* read sectors from the device */
+ REQ_OP_READ = 0,
+ /* write sectors to the device */
+ REQ_OP_WRITE = 1,
+ /* flush the volatile write cache */
+ REQ_OP_FLUSH = 2,
+ /* discard sectors */
+ REQ_OP_DISCARD = 3,
+ /* get zone information */
+ REQ_OP_ZONE_REPORT = 4,
+ /* securely erase sectors */
+ REQ_OP_SECURE_ERASE = 5,
+ /* seset a zone write pointer */
+ REQ_OP_ZONE_RESET = 6,
+ /* write the same sector many times */
+ REQ_OP_WRITE_SAME = 7,
+ /* write the zero filled sector many times */
+ REQ_OP_WRITE_ZEROES = 8,
+
+ /* SCSI passthrough using struct scsi_request */
+ REQ_OP_SCSI_IN = 32,
+ REQ_OP_SCSI_OUT = 33,
+ /* Driver private requests */
+ REQ_OP_DRV_IN = 34,
+ REQ_OP_DRV_OUT = 35,
+
+ REQ_OP_LAST,
+};
+
+enum req_flag_bits {
+ __REQ_FAILFAST_DEV = /* no driver retries of device errors */
+ REQ_OP_BITS,
+ __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
+ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
+ __REQ_SYNC, /* request is sync (sync write or read) */
+ __REQ_META, /* metadata io request */
+ __REQ_PRIO, /* boost priority in cfq */
+ __REQ_NOMERGE, /* don't touch this for merging */
+ __REQ_IDLE, /* anticipate more IO after this one */
+ __REQ_INTEGRITY, /* I/O includes block integrity payload */
+ __REQ_FUA, /* forced unit access */
+ __REQ_PREFLUSH, /* request for cache flush */
+ __REQ_RAHEAD, /* read ahead, can fail anytime */
+ __REQ_BACKGROUND, /* background IO */
+ __REQ_NR_BITS, /* stops here */
+};
+
+#define REQ_SYNC (1ULL << __REQ_SYNC)
+#define REQ_META (1ULL << __REQ_META)
+#define REQ_PRIO (1ULL << __REQ_PRIO)
+
+#define REQ_NOMERGE_FLAGS (REQ_PREFLUSH | REQ_FUA)
+
+#define bio_op(bio) \
+ ((bio)->bi_opf & REQ_OP_MASK)
+
+static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
+ unsigned op_flags)
+{
+ bio->bi_opf = op | op_flags;
+}
+
+#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
+#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
+
+#define REQ_FUA (1ULL << __REQ_FUA)
+#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
+
+#define RW_MASK REQ_OP_WRITE
+
+#define READ REQ_OP_READ
+#define WRITE REQ_OP_WRITE
+
+#define READ_SYNC REQ_SYNC
+#define WRITE_SYNC (REQ_SYNC)
+#define WRITE_ODIRECT REQ_SYNC
+#define WRITE_FLUSH (REQ_SYNC | REQ_PREFLUSH)
+#define WRITE_FUA (REQ_SYNC | REQ_FUA)
+#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_PREFLUSH | REQ_FUA)
+
+#endif /* __LINUX_BLK_TYPES_H */
diff --git a/c_src/include/linux/blkdev.h b/c_src/include/linux/blkdev.h
new file mode 100644
index 00000000..39143117
--- /dev/null
+++ b/c_src/include/linux/blkdev.h
@@ -0,0 +1,190 @@
+#ifndef __TOOLS_LINUX_BLKDEV_H
+#define __TOOLS_LINUX_BLKDEV_H
+
+#include <linux/backing-dev.h>
+#include <linux/blk_types.h>
+#include <linux/kobject.h>
+#include <linux/types.h>
+
+#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX)
+
+#define BIO_MAX_VECS 256U
+
+typedef unsigned fmode_t;
+typedef __u32 __bitwise blk_opf_t;
+
+struct bio;
+struct user_namespace;
+
+#define MINORBITS 20
+#define MINORMASK ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
+
+typedef unsigned int __bitwise blk_mode_t;
+
+/* open for reading */
+#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
+/* open for writing */
+#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
+/* open exclusively (vs other exclusive openers */
+#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
+/* opened with O_NDELAY */
+#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
+/* open for "writes" only for ioctls (specialy hack for floppy.c) */
+#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
+
+#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
+
+struct inode {
+ unsigned long i_ino;
+ loff_t i_size;
+ struct super_block *i_sb;
+};
+
+struct file {
+ struct inode *f_inode;
+};
+
+static inline struct inode *file_inode(const struct file *f)
+{
+ return f->f_inode;
+}
+
+#define part_to_dev(part) (part)
+
+void generic_make_request(struct bio *);
+int submit_bio_wait(struct bio *);
+
+static inline void submit_bio(struct bio *bio)
+{
+ generic_make_request(bio);
+}
+
+int blkdev_issue_discard(struct block_device *, sector_t, sector_t, gfp_t);
+int blkdev_issue_zeroout(struct block_device *, sector_t, sector_t, gfp_t, unsigned);
+
+#define bdev_get_queue(bdev) (&((bdev)->queue))
+
+#ifndef SECTOR_SHIFT
+#define SECTOR_SHIFT 9
+#endif
+#ifndef SECTOR_SIZE
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#endif
+
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
+#define bdev_max_discard_sectors(bdev) ((void) (bdev), 0)
+#define blk_queue_nonrot(q) ((void) (q), 0)
+
+unsigned bdev_logical_block_size(struct block_device *bdev);
+sector_t get_capacity(struct gendisk *disk);
+
+struct blk_holder_ops {
+ void (*mark_dead)(struct block_device *bdev);
+};
+
+void blkdev_put(struct block_device *bdev, void *holder);
+void bdput(struct block_device *bdev);
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hop);
+int lookup_bdev(const char *path, dev_t *);
+
+struct super_block {
+ void *s_fs_info;
+};
+
+/*
+ * File types
+ *
+ * NOTE! These match bits 12..15 of stat.st_mode
+ * (ie "(i_mode >> 12) & 15").
+ */
+#ifndef DT_UNKNOWN
+#define DT_UNKNOWN 0
+#define DT_FIFO 1
+#define DT_CHR 2
+#define DT_DIR 4
+#define DT_BLK 6
+#define DT_REG 8
+#define DT_LNK 10
+#define DT_SOCK 12
+#define DT_WHT 14
+#define DT_MAX 16
+#endif
+
+/*
+ * This is the "filldir" function type, used by readdir() to let
+ * the kernel specify what kind of dirent layout it wants to have.
+ * This allows the kernel to read directories into kernel space or
+ * to have different dirent layouts depending on the binary type.
+ */
+struct dir_context;
+typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
+ unsigned);
+
+struct dir_context {
+ const filldir_t actor;
+ u64 pos;
+};
+
+/* /sys/fs */
+extern struct kobject *fs_kobj;
+
+struct file_operations {
+};
+
+static inline int register_chrdev(unsigned int major, const char *name,
+ const struct file_operations *fops)
+{
+ return 1;
+}
+
+static inline void unregister_chrdev(unsigned int major, const char *name)
+{
+}
+
+static inline const char *bdevname(struct block_device *bdev, char *buf)
+{
+ snprintf(buf, BDEVNAME_SIZE, "%s", bdev->name);
+ return buf;
+}
+
+static inline bool op_is_write(unsigned int op)
+{
+ return op == REQ_OP_READ ? false : true;
+}
+
+/*
+ * return data direction, READ or WRITE
+ */
+static inline int bio_data_dir(struct bio *bio)
+{
+ return op_is_write(bio_op(bio)) ? WRITE : READ;
+}
+
+static inline bool dir_emit(struct dir_context *ctx,
+ const char *name, int namelen,
+ u64 ino, unsigned type)
+{
+ return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+}
+
+static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
+{
+ return true;
+}
+
+#define capable(cap) true
+
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+const char *blk_status_to_str(blk_status_t status);
+
+#endif /* __TOOLS_LINUX_BLKDEV_H */
+
diff --git a/c_src/include/linux/bsearch.h b/c_src/include/linux/bsearch.h
new file mode 100644
index 00000000..e66b711d
--- /dev/null
+++ b/c_src/include/linux/bsearch.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BSEARCH_H
+#define _LINUX_BSEARCH_H
+
+#include <linux/types.h>
+
+static __always_inline
+void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp)
+{
+ const char *pivot;
+ int result;
+
+ while (num > 0) {
+ pivot = base + (num >> 1) * size;
+ result = cmp(key, pivot);
+
+ if (result == 0)
+ return (void *)pivot;
+
+ if (result > 0) {
+ base = pivot + size;
+ num--;
+ }
+ num >>= 1;
+ }
+
+ return NULL;
+}
+
+extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp);
+
+#endif /* _LINUX_BSEARCH_H */
diff --git a/c_src/include/linux/bug.h b/c_src/include/linux/bug.h
new file mode 100644
index 00000000..1a10f7e6
--- /dev/null
+++ b/c_src/include/linux/bug.h
@@ -0,0 +1,66 @@
+#ifndef __TOOLS_LINUX_BUG_H
+#define __TOOLS_LINUX_BUG_H
+
+#include <assert.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+
+#define DEBUG_MEMORY_FREED(p, len) VALGRIND_MAKE_MEM_UNDEFINED(p, len)
+#endif
+
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \
+ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+
+#define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2*!!(cond)]))
+
+#define BUG() do { fflush(stdout); assert(0); unreachable(); } while (0)
+#define BUG_ON(cond) assert(!(cond))
+
+#define WARN(cond, fmt, ...) \
+({ \
+ int __ret_warn_on = unlikely(!!(cond)); \
+ if (__ret_warn_on) \
+ fprintf(stderr, "WARNING at " __FILE__ ":%d: " fmt "\n",\
+ __LINE__, ##__VA_ARGS__); \
+ __ret_warn_on; \
+})
+
+#define __WARN() \
+do { \
+ fprintf(stderr, "WARNING at " __FILE__ ":%d\n", __LINE__); \
+} while (0)
+
+#define WARN_ON(cond) ({ \
+ int __ret_warn_on = unlikely(!!(cond)); \
+ if (__ret_warn_on) \
+ __WARN(); \
+ __ret_warn_on; \
+})
+
+#define WARN_ONCE(cond, fmt, ...) \
+({ \
+ static bool __warned; \
+ int __ret_warn_on = unlikely(!!(cond)); \
+ if (__ret_warn_on && !__warned) { \
+ __warned = true; \
+ __WARN(); \
+ } \
+ __ret_warn_on; \
+})
+
+#define WARN_ON_ONCE(cond) ({ \
+ static bool __warned; \
+ int __ret_warn_on = unlikely(!!(cond)); \
+ if (__ret_warn_on && !__warned) { \
+ __warned = true; \
+ __WARN(); \
+ } \
+ __ret_warn_on; \
+})
+
+#endif /* __TOOLS_LINUX_BUG_H */
diff --git a/c_src/include/linux/bvec.h b/c_src/include/linux/bvec.h
new file mode 100644
index 00000000..5bc68b42
--- /dev/null
+++ b/c_src/include/linux/bvec.h
@@ -0,0 +1,101 @@
+/*
+ * bvec iterator
+ *
+ * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+#ifndef __LINUX_BVEC_ITER_H
+#define __LINUX_BVEC_ITER_H
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+ struct page *bv_page;
+ unsigned int bv_len;
+ unsigned int bv_offset;
+};
+
+struct bvec_iter {
+ sector_t bi_sector; /* device address in 512 byte
+ sectors */
+ unsigned int bi_size; /* residual I/O count */
+
+ unsigned int bi_idx; /* current index into bvl_vec */
+
+ unsigned int bi_bvec_done; /* number of bytes completed in
+ current bvec */
+};
+
+struct bvec_iter_all {
+ int idx;
+};
+
+/*
+ * various member access, note that bio_data should of course not be used
+ * on highmem page vectors
+ */
+#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
+
+#define bvec_iter_page(bvec, iter) \
+ (__bvec_iter_bvec((bvec), (iter))->bv_page)
+
+#define bvec_iter_len(bvec, iter) \
+ min((iter).bi_size, \
+ __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
+
+#define bvec_iter_offset(bvec, iter) \
+ (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+
+#define bvec_iter_bvec(bvec, iter) \
+((struct bio_vec) { \
+ .bv_page = bvec_iter_page((bvec), (iter)), \
+ .bv_len = bvec_iter_len((bvec), (iter)), \
+ .bv_offset = bvec_iter_offset((bvec), (iter)), \
+})
+
+static inline void bvec_iter_advance(const struct bio_vec *bv,
+ struct bvec_iter *iter,
+ unsigned bytes)
+{
+ WARN_ONCE(bytes > iter->bi_size,
+ "Attempted to advance past end of bvec iter\n");
+
+ while (bytes) {
+ unsigned iter_len = bvec_iter_len(bv, *iter);
+ unsigned len = min(bytes, iter_len);
+
+ bytes -= len;
+ iter->bi_size -= len;
+ iter->bi_bvec_done += len;
+
+ if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+ iter->bi_bvec_done = 0;
+ iter->bi_idx++;
+ }
+ }
+}
+
+#define for_each_bvec(bvl, bio_vec, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+ bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+#endif /* __LINUX_BVEC_ITER_H */
diff --git a/c_src/include/linux/byteorder.h b/c_src/include/linux/byteorder.h
new file mode 100644
index 00000000..7b04f5bc
--- /dev/null
+++ b/c_src/include/linux/byteorder.h
@@ -0,0 +1,75 @@
+#ifndef __LINUX_BYTEORDER_H
+#define __LINUX_BYTEORDER_H
+
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+#define swab16 __swab16
+#define swab32 __swab32
+#define swab64 __swab64
+#define swahw32 __swahw32
+#define swahb32 __swahb32
+#define swab16p __swab16p
+#define swab32p __swab32p
+#define swab64p __swab64p
+#define swahw32p __swahw32p
+#define swahb32p __swahb32p
+#define swab16s __swab16s
+#define swab32s __swab32s
+#define swab64s __swab64s
+#define swahw32s __swahw32s
+#define swahb32s __swahb32s
+
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+
+static inline void le16_add_cpu(__le16 *var, u16 val)
+{
+ *var = cpu_to_le16(le16_to_cpu(*var) + val);
+}
+
+static inline void le32_add_cpu(__le32 *var, u32 val)
+{
+ *var = cpu_to_le32(le32_to_cpu(*var) + val);
+}
+
+static inline void le64_add_cpu(__le64 *var, u64 val)
+{
+ *var = cpu_to_le64(le64_to_cpu(*var) + val);
+}
+
+#endif /* __LINUX_BYTEORDER_H */
diff --git a/c_src/include/linux/cache.h b/c_src/include/linux/cache.h
new file mode 100644
index 00000000..c61167ca
--- /dev/null
+++ b/c_src/include/linux/cache.h
@@ -0,0 +1,17 @@
+#ifndef __TOOLS_LINUX_CACHE_H
+#define __TOOLS_LINUX_CACHE_H
+
+#define L1_CACHE_SHIFT 6
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+#define SMP_CACHE_BYTES L1_CACHE_BYTES
+
+#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES)
+
+#define __read_mostly
+#define __ro_after_init
+
+#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+
+#endif /* __TOOLS_LINUX_CACHE_H */
+
diff --git a/c_src/include/linux/closure.h b/c_src/include/linux/closure.h
new file mode 100644
index 00000000..c554c6a0
--- /dev/null
+++ b/c_src/include/linux/closure.h
@@ -0,0 +1,415 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CLOSURE_H
+#define _LINUX_CLOSURE_H
+
+#include <linux/llist.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/workqueue.h>
+
+/*
+ * Closure is perhaps the most overused and abused term in computer science, but
+ * since I've been unable to come up with anything better you're stuck with it
+ * again.
+ *
+ * What are closures?
+ *
+ * They embed a refcount. The basic idea is they count "things that are in
+ * progress" - in flight bios, some other thread that's doing something else -
+ * anything you might want to wait on.
+ *
+ * The refcount may be manipulated with closure_get() and closure_put().
+ * closure_put() is where many of the interesting things happen, when it causes
+ * the refcount to go to 0.
+ *
+ * Closures can be used to wait on things both synchronously and asynchronously,
+ * and synchronous and asynchronous use can be mixed without restriction. To
+ * wait synchronously, use closure_sync() - you will sleep until your closure's
+ * refcount hits 1.
+ *
+ * To wait asynchronously, use
+ * continue_at(cl, next_function, workqueue);
+ *
+ * passing it, as you might expect, the function to run when nothing is pending
+ * and the workqueue to run that function out of.
+ *
+ * continue_at() also, critically, requires a 'return' immediately following the
+ * location where this macro is referenced, to return to the calling function.
+ * There's good reason for this.
+ *
+ * To use safely closures asynchronously, they must always have a refcount while
+ * they are running owned by the thread that is running them. Otherwise, suppose
+ * you submit some bios and wish to have a function run when they all complete:
+ *
+ * foo_endio(struct bio *bio)
+ * {
+ * closure_put(cl);
+ * }
+ *
+ * closure_init(cl);
+ *
+ * do_stuff();
+ * closure_get(cl);
+ * bio1->bi_endio = foo_endio;
+ * bio_submit(bio1);
+ *
+ * do_more_stuff();
+ * closure_get(cl);
+ * bio2->bi_endio = foo_endio;
+ * bio_submit(bio2);
+ *
+ * continue_at(cl, complete_some_read, system_wq);
+ *
+ * If closure's refcount started at 0, complete_some_read() could run before the
+ * second bio was submitted - which is almost always not what you want! More
+ * importantly, it wouldn't be possible to say whether the original thread or
+ * complete_some_read()'s thread owned the closure - and whatever state it was
+ * associated with!
+ *
+ * So, closure_init() initializes a closure's refcount to 1 - and when a
+ * closure_fn is run, the refcount will be reset to 1 first.
+ *
+ * Then, the rule is - if you got the refcount with closure_get(), release it
+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
+ * on a closure because you called closure_init() or you were run out of a
+ * closure - _always_ use continue_at(). Doing so consistently will help
+ * eliminate an entire class of particularly pernicious races.
+ *
+ * Lastly, you might have a wait list dedicated to a specific event, and have no
+ * need for specifying the condition - you just want to wait until someone runs
+ * closure_wake_up() on the appropriate wait list. In that case, just use
+ * closure_wait(). It will return either true or false, depending on whether the
+ * closure was already on a wait list or not - a closure can only be on one wait
+ * list at a time.
+ *
+ * Parents:
+ *
+ * closure_init() takes two arguments - it takes the closure to initialize, and
+ * a (possibly null) parent.
+ *
+ * If parent is non null, the new closure will have a refcount for its lifetime;
+ * a closure is considered to be "finished" when its refcount hits 0 and the
+ * function to run is null. Hence
+ *
+ * continue_at(cl, NULL, NULL);
+ *
+ * returns up the (spaghetti) stack of closures, precisely like normal return
+ * returns up the C stack. continue_at() with non null fn is better thought of
+ * as doing a tail call.
+ *
+ * All this implies that a closure should typically be embedded in a particular
+ * struct (which its refcount will normally control the lifetime of), and that
+ * struct can very much be thought of as a stack frame.
+ */
+
+struct closure;
+struct closure_syncer;
+typedef void (closure_fn) (struct work_struct *);
+extern struct dentry *bcache_debug;
+
+struct closure_waitlist {
+ struct llist_head list;
+};
+
+enum closure_state {
+ /*
+ * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
+ * the thread that owns the closure, and cleared by the thread that's
+ * waking up the closure.
+ *
+ * The rest are for debugging and don't affect behaviour:
+ *
+ * CLOSURE_RUNNING: Set when a closure is running (i.e. by
+ * closure_init() and when closure_put() runs then next function), and
+ * must be cleared before remaining hits 0. Primarily to help guard
+ * against incorrect usage and accidentally transferring references.
+ * continue_at() and closure_return() clear it for you, if you're doing
+ * something unusual you can use closure_set_dead() which also helps
+ * annotate where references are being transferred.
+ */
+
+ CLOSURE_BITS_START = (1U << 26),
+ CLOSURE_DESTRUCTOR = (1U << 26),
+ CLOSURE_WAITING = (1U << 28),
+ CLOSURE_RUNNING = (1U << 30),
+};
+
+#define CLOSURE_GUARD_MASK \
+ ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+
+#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
+#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
+
+struct closure {
+ union {
+ struct {
+ struct workqueue_struct *wq;
+ struct closure_syncer *s;
+ struct llist_node list;
+ closure_fn *fn;
+ };
+ struct work_struct work;
+ };
+
+ struct closure *parent;
+
+ atomic_t remaining;
+ bool closure_get_happened;
+
+#ifdef CONFIG_DEBUG_CLOSURES
+#define CLOSURE_MAGIC_DEAD 0xc054dead
+#define CLOSURE_MAGIC_ALIVE 0xc054a11e
+
+ unsigned int magic;
+ struct list_head all;
+ unsigned long ip;
+ unsigned long waiting_on;
+#endif
+};
+
+void closure_sub(struct closure *cl, int v);
+void closure_put(struct closure *cl);
+void __closure_wake_up(struct closure_waitlist *list);
+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
+void __closure_sync(struct closure *cl);
+
+static inline unsigned closure_nr_remaining(struct closure *cl)
+{
+ return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK;
+}
+
+/**
+ * closure_sync - sleep until a closure a closure has nothing left to wait on
+ *
+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
+ * the last refcount.
+ */
+static inline void closure_sync(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
+#endif
+
+ if (cl->closure_get_happened)
+ __closure_sync(cl);
+}
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+void closure_debug_create(struct closure *cl);
+void closure_debug_destroy(struct closure *cl);
+
+#else
+
+static inline void closure_debug_create(struct closure *cl) {}
+static inline void closure_debug_destroy(struct closure *cl) {}
+
+#endif
+
+static inline void closure_set_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->ip = _THIS_IP_;
+#endif
+}
+
+static inline void closure_set_ret_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->ip = _RET_IP_;
+#endif
+}
+
+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->waiting_on = f;
+#endif
+}
+
+static inline void closure_set_stopped(struct closure *cl)
+{
+ atomic_sub(CLOSURE_RUNNING, &cl->remaining);
+}
+
+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
+ struct workqueue_struct *wq)
+{
+ closure_set_ip(cl);
+ cl->fn = fn;
+ cl->wq = wq;
+}
+
+static inline void closure_queue(struct closure *cl)
+{
+ struct workqueue_struct *wq = cl->wq;
+ /**
+ * Changes made to closure, work_struct, or a couple of other structs
+ * may cause work.func not pointing to the right location.
+ */
+ BUILD_BUG_ON(offsetof(struct closure, fn)
+ != offsetof(struct work_struct, func));
+
+ if (wq) {
+ INIT_WORK(&cl->work, cl->work.func);
+ BUG_ON(!queue_work(wq, &cl->work));
+ } else
+ cl->fn(&cl->work);
+}
+
+/**
+ * closure_get - increment a closure's refcount
+ */
+static inline void closure_get(struct closure *cl)
+{
+ cl->closure_get_happened = true;
+
+#ifdef CONFIG_DEBUG_CLOSURES
+ BUG_ON((atomic_inc_return(&cl->remaining) &
+ CLOSURE_REMAINING_MASK) <= 1);
+#else
+ atomic_inc(&cl->remaining);
+#endif
+}
+
+/**
+ * closure_init - Initialize a closure, setting the refcount to 1
+ * @cl: closure to initialize
+ * @parent: parent of the new closure. cl will take a refcount on it for its
+ * lifetime; may be NULL.
+ */
+static inline void closure_init(struct closure *cl, struct closure *parent)
+{
+ cl->fn = NULL;
+ cl->parent = parent;
+ if (parent)
+ closure_get(parent);
+
+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+ cl->closure_get_happened = false;
+
+ closure_debug_create(cl);
+ closure_set_ip(cl);
+}
+
+static inline void closure_init_stack(struct closure *cl)
+{
+ memset(cl, 0, sizeof(struct closure));
+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+}
+
+/**
+ * closure_wake_up - wake up all closures on a wait list,
+ * with memory barrier
+ */
+static inline void closure_wake_up(struct closure_waitlist *list)
+{
+ /* Memory barrier for the wait list */
+ smp_mb();
+ __closure_wake_up(list);
+}
+
+#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws)
+#define closure_type(name, type, member) \
+ struct closure *cl = container_of(ws, struct closure, work); \
+ type *name = container_of(cl, type, member)
+
+/**
+ * continue_at - jump to another function with barrier
+ *
+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
+ * been dropped with closure_put()), it will resume execution at @fn running out
+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
+ *
+ * This is because after calling continue_at() you no longer have a ref on @cl,
+ * and whatever @cl owns may be freed out from under you - a running closure fn
+ * has a ref on its own closure which continue_at() drops.
+ *
+ * Note you are expected to immediately return after using this macro.
+ */
+#define continue_at(_cl, _fn, _wq) \
+do { \
+ set_closure_fn(_cl, _fn, _wq); \
+ closure_sub(_cl, CLOSURE_RUNNING + 1); \
+} while (0)
+
+/**
+ * closure_return - finish execution of a closure
+ *
+ * This is used to indicate that @cl is finished: when all outstanding refs on
+ * @cl have been dropped @cl's ref on its parent closure (as passed to
+ * closure_init()) will be dropped, if one was specified - thus this can be
+ * thought of as returning to the parent closure.
+ */
+#define closure_return(_cl) continue_at((_cl), NULL, NULL)
+
+/**
+ * continue_at_nobarrier - jump to another function without barrier
+ *
+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
+ * @wq is NULL).
+ *
+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
+ * thus it's not safe to touch anything protected by @cl after a
+ * continue_at_nobarrier().
+ */
+#define continue_at_nobarrier(_cl, _fn, _wq) \
+do { \
+ set_closure_fn(_cl, _fn, _wq); \
+ closure_queue(_cl); \
+} while (0)
+
+/**
+ * closure_return_with_destructor - finish execution of a closure,
+ * with destructor
+ *
+ * Works like closure_return(), except @destructor will be called when all
+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
+ * free the memory occupied by @cl, and it is called with the ref on the parent
+ * closure still held - so @destructor could safely return an item to a
+ * freelist protected by @cl's parent.
+ */
+#define closure_return_with_destructor(_cl, _destructor) \
+do { \
+ set_closure_fn(_cl, _destructor, NULL); \
+ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \
+} while (0)
+
+/**
+ * closure_call - execute @fn out of a new, uninitialized closure
+ *
+ * Typically used when running out of one closure, and we want to run @fn
+ * asynchronously out of a new closure - @parent will then wait for @cl to
+ * finish.
+ */
+static inline void closure_call(struct closure *cl, closure_fn fn,
+ struct workqueue_struct *wq,
+ struct closure *parent)
+{
+ closure_init(cl, parent);
+ continue_at_nobarrier(cl, fn, wq);
+}
+
+#define __closure_wait_event(waitlist, _cond) \
+do { \
+ struct closure cl; \
+ \
+ closure_init_stack(&cl); \
+ \
+ while (1) { \
+ closure_wait(waitlist, &cl); \
+ if (_cond) \
+ break; \
+ closure_sync(&cl); \
+ } \
+ closure_wake_up(waitlist); \
+ closure_sync(&cl); \
+} while (0)
+
+#define closure_wait_event(waitlist, _cond) \
+do { \
+ if (!(_cond)) \
+ __closure_wait_event(waitlist, _cond); \
+} while (0)
+
+#endif /* _LINUX_CLOSURE_H */
diff --git a/c_src/include/linux/compiler.h b/c_src/include/linux/compiler.h
new file mode 100644
index 00000000..3ecc3dd1
--- /dev/null
+++ b/c_src/include/linux/compiler.h
@@ -0,0 +1,190 @@
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#define _TOOLS_LINUX_COMPILER_H_
+
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
+
+#ifndef __always_inline
+# define __always_inline inline __attribute__((always_inline))
+#endif
+
+#ifndef __attribute_const__
+#define __attribute_const__ __attribute__((__const__))
+#endif
+
+#ifdef __ANDROID__
+/*
+ * FIXME: Big hammer to get rid of tons of:
+ * "warning: always_inline function might not be inlinable"
+ *
+ * At least on android-ndk-r12/platforms/android-24/arch-arm
+ */
+#undef __always_inline
+#define __always_inline inline
+#endif
+
+#define noinline
+#define noinline_for_stack noinline
+
+#define __user
+#define __kernel
+
+#define __pure __attribute__((pure))
+#define __aligned(x) __attribute__((aligned(x)))
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+#define __used __attribute__((__used__))
+#define __maybe_unused __attribute__((unused))
+#define __always_unused __attribute__((unused))
+#define __packed __attribute__((__packed__))
+#define __flatten __attribute__((flatten))
+#define __force
+#define __nocast
+#define __iomem
+#define __chk_user_ptr(x) (void)0
+#define __chk_io_ptr(x) (void)0
+#define __builtin_warning(x, y...) (1)
+#define __must_hold(x)
+#define __acquires(x)
+#define __cond_acquires(x)
+#define __releases(x)
+#define __acquire(x) (void)0
+#define __release(x) (void)0
+#define __cond_lock(x,c) (c)
+#define __percpu
+#define __rcu
+#define __sched
+#define __init
+#define __exit
+#define __private
+#define __must_check
+#define __malloc
+#define __weak __attribute__((weak))
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#define unreachable() __builtin_unreachable()
+#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#define fallthrough __attribute__((__fallthrough__))
+#define __noreturn __attribute__((__noreturn__))
+
+#ifndef __counted_by
+#define __counted_by(nr)
+#endif
+
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define __initcall(x) /* unimplemented */
+#define __exitcall(x) /* unimplemented */
+
+#include <linux/types.h>
+
+/*
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8 __attribute__((__may_alias__)) __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break;
+ case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+ case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+ case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ barrier();
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break;
+ case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+ case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+ case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ barrier();
+ }
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering. One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+ ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define WRITE_ONCE(x, val) \
+ ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define lockless_dereference(p) \
+({ \
+ typeof(p) _________p1 = READ_ONCE(p); \
+ typeof(*(p)) *___typecheck_p __maybe_unused; \
+ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+ (_________p1); \
+})
+
+#define flush_cache_all() do { } while (0)
+#define flush_cache_mm(mm) do { } while (0)
+#define flush_cache_dup_mm(mm) do { } while (0)
+#define flush_cache_range(vma, start, end) do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page) do { } while (0)
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
+#define flush_icache_range(start, end) do { } while (0)
+#define flush_icache_page(vma,pg) do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
+#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vunmap(start, end) do { } while (0)
+
+#ifdef __x86_64
+#define CONFIG_X86_64 y
+#endif
+
+#define __is_constexpr(x) \
+ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+#define is_signed_type(type) (((type)(-1)) < (__force type)1)
+#define is_unsigned_type(type) (!is_signed_type(type))
+
+#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/c_src/include/linux/completion.h b/c_src/include/linux/completion.h
new file mode 100644
index 00000000..d11a8dd0
--- /dev/null
+++ b/c_src/include/linux/completion.h
@@ -0,0 +1,42 @@
+#ifndef __LINUX_COMPLETION_H
+#define __LINUX_COMPLETION_H
+
+/*
+ * (C) Copyright 2001 Linus Torvalds
+ *
+ * Atomic wait-for-completion handler data structures.
+ * See kernel/sched/completion.c for details.
+ */
+
+#include <linux/wait.h>
+
+struct completion {
+ unsigned int done;
+ wait_queue_head_t wait;
+};
+
+#define DECLARE_COMPLETION(work) \
+ struct completion work = { \
+ .done = 0, \
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) \
+ }
+
+#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
+
+static inline void init_completion(struct completion *x)
+{
+ x->done = 0;
+ init_waitqueue_head(&x->wait);
+}
+
+static inline void reinit_completion(struct completion *x)
+{
+ x->done = 0;
+}
+
+void complete(struct completion *);
+void wait_for_completion(struct completion *);
+
+#define wait_for_completion_interruptible(x) (wait_for_completion(x), 0)
+
+#endif
diff --git a/c_src/include/linux/console.h b/c_src/include/linux/console.h
new file mode 100644
index 00000000..d01aa9a2
--- /dev/null
+++ b/c_src/include/linux/console.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_CONSOLE_H_
+#define _LINUX_CONSOLE_H_
+
+#define console_lock()
+#define console_unlock()
+
+#endif /* _LINUX_CONSOLE_H */
diff --git a/c_src/include/linux/cpumask.h b/c_src/include/linux/cpumask.h
new file mode 100644
index 00000000..bfab7ea7
--- /dev/null
+++ b/c_src/include/linux/cpumask.h
@@ -0,0 +1,26 @@
+#ifndef __LINUX_CPUMASK_H
+#define __LINUX_CPUMASK_H
+
+#define num_online_cpus() 1U
+#define num_possible_cpus() 1U
+#define num_present_cpus() 1U
+#define num_active_cpus() 1U
+#define cpu_online(cpu) ((cpu) == 0)
+#define cpu_possible(cpu) ((cpu) == 0)
+#define cpu_present(cpu) ((cpu) == 0)
+#define cpu_active(cpu) ((cpu) == 0)
+
+#define raw_smp_processor_id() 0U
+
+#define for_each_cpu(cpu, mask) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_not(cpu, mask) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_and(cpu, mask, and) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+
+#define for_each_possible_cpu(cpu) for_each_cpu((cpu), 1)
+#define for_each_online_cpu(cpu) for_each_cpu((cpu), 1)
+#define for_each_present_cpu(cpu) for_each_cpu((cpu), 1)
+
+#endif /* __LINUX_CPUMASK_H */
diff --git a/c_src/include/linux/crc32c.h b/c_src/include/linux/crc32c.h
new file mode 100644
index 00000000..1ac74f7d
--- /dev/null
+++ b/c_src/include/linux/crc32c.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_CRC32C_H
+#define _LINUX_CRC32C_H
+
+#include "tools-util.h"
+
+#endif /* _LINUX_CRC32C_H */
diff --git a/c_src/include/linux/crc64.h b/c_src/include/linux/crc64.h
new file mode 100644
index 00000000..c756e65a
--- /dev/null
+++ b/c_src/include/linux/crc64.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
diff --git a/c_src/include/linux/crypto.h b/c_src/include/linux/crypto.h
new file mode 100644
index 00000000..866b4c5a
--- /dev/null
+++ b/c_src/include/linux/crypto.h
@@ -0,0 +1,45 @@
+/*
+ * Scatterlist Cryptographic API.
+ *
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no>
+ * and Nettle, by Niels Möller.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _LINUX_CRYPTO_H
+#define _LINUX_CRYPTO_H
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
+#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
+
+struct crypto_type;
+
+struct crypto_alg {
+ struct list_head cra_list;
+
+ const char *cra_name;
+ const struct crypto_type *cra_type;
+
+ void * (*alloc_tfm)(void);
+} CRYPTO_MINALIGN_ATTR;
+
+int crypto_register_alg(struct crypto_alg *alg);
+
+struct crypto_tfm {
+ struct crypto_alg *alg;
+};
+
+#endif /* _LINUX_CRYPTO_H */
+
diff --git a/c_src/include/linux/ctype.h b/c_src/include/linux/ctype.h
new file mode 100644
index 00000000..26b7de5a
--- /dev/null
+++ b/c_src/include/linux/ctype.h
@@ -0,0 +1,2 @@
+
+#include <ctype.h>
diff --git a/c_src/include/linux/dcache.h b/c_src/include/linux/dcache.h
new file mode 100644
index 00000000..7637854d
--- /dev/null
+++ b/c_src/include/linux/dcache.h
@@ -0,0 +1,12 @@
+#ifndef __LINUX_DCACHE_H
+#define __LINUX_DCACHE_H
+
+struct super_block;
+struct inode;
+
+struct dentry {
+ struct super_block *d_sb;
+ struct inode *d_inode;
+};
+
+#endif /* __LINUX_DCACHE_H */
diff --git a/c_src/include/linux/debugfs.h b/c_src/include/linux/debugfs.h
new file mode 100644
index 00000000..9a78cb16
--- /dev/null
+++ b/c_src/include/linux/debugfs.h
@@ -0,0 +1,46 @@
+/*
+ * debugfs.h - a tiny little debug file system
+ *
+ * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2004 IBM Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * debugfs is for people to use instead of /proc or /sys.
+ * See Documentation/DocBook/filesystems for more details.
+ */
+
+#ifndef _DEBUGFS_H_
+#define _DEBUGFS_H_
+
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct file_operations;
+
+#include <linux/err.h>
+
+static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+static inline struct dentry *debugfs_create_dir(const char *name,
+ struct dentry *parent)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+static inline void debugfs_remove(struct dentry *dentry)
+{ }
+
+static inline void debugfs_remove_recursive(struct dentry *dentry)
+{ }
+
+#endif
diff --git a/c_src/include/linux/device.h b/c_src/include/linux/device.h
new file mode 100644
index 00000000..2b2b8494
--- /dev/null
+++ b/c_src/include/linux/device.h
@@ -0,0 +1,40 @@
+#ifndef _DEVICE_H_
+#define _DEVICE_H_
+
+#include <linux/slab.h>
+#include <linux/types.h>
+
+struct module;
+
+struct class {
+};
+
+static inline void class_destroy(struct class *class)
+{
+ kfree(class);
+}
+
+static inline struct class * __must_check class_create(struct module *owner,
+ const char *name)
+{
+ return kzalloc(sizeof(struct class), GFP_KERNEL);
+}
+
+struct device {
+};
+
+static inline void device_unregister(struct device *dev)
+{
+ kfree(dev);
+}
+
+static inline void device_destroy(struct class *cls, dev_t devt) {}
+
+static inline struct device *device_create(struct class *cls, struct device *parent,
+ dev_t devt, void *drvdata,
+ const char *fmt, ...)
+{
+ return kzalloc(sizeof(struct device), GFP_KERNEL);
+}
+
+#endif /* _DEVICE_H_ */
diff --git a/c_src/include/linux/dynamic_fault.h b/c_src/include/linux/dynamic_fault.h
new file mode 100644
index 00000000..dd215dcb
--- /dev/null
+++ b/c_src/include/linux/dynamic_fault.h
@@ -0,0 +1,7 @@
+#ifndef __TOOLS_LINUX_DYNAMIC_FAULT_H
+#define __TOOLS_LINUX_DYNAMIC_FAULT_H
+
+#define dynamic_fault(_class) 0
+#define race_fault() 0
+
+#endif /* __TOOLS_LINUX_DYNAMIC_FAULT_H */
diff --git a/c_src/include/linux/err.h b/c_src/include/linux/err.h
new file mode 100644
index 00000000..e94bdff5
--- /dev/null
+++ b/c_src/include/linux/err.h
@@ -0,0 +1,68 @@
+#ifndef __TOOLS_LINUX_ERR_H
+#define __TOOLS_LINUX_ERR_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/errno.h>
+
+/*
+ * Original kernel header comment:
+ *
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a normal
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ *
+ * Userspace note:
+ * The same principle works for userspace, because 'error' pointers
+ * fall down to the unused hole far from user space, as described
+ * in Documentation/x86/x86_64/mm.txt for x86_64 arch:
+ *
+ * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
+ * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ *
+ * It should be the same case for other architectures, because
+ * this code is used in generic kernel code.
+ */
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error_)
+{
+ return (void *) error_;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
+{
+ return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline void * __must_check ERR_CAST(__force const void *ptr)
+{
+ /* cast away the const */
+ return (void *) ptr;
+}
+
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ else
+ return 0;
+}
+
+#endif /* _LINUX_ERR_H */
diff --git a/c_src/include/linux/errname.h b/c_src/include/linux/errname.h
new file mode 100644
index 00000000..443d5040
--- /dev/null
+++ b/c_src/include/linux/errname.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_ERRNAME_H
+#define _LINUX_ERRNAME_H
+
+#include <string.h>
+
+static inline const char *errname(int err)
+{
+ return strerror(abs(err));
+}
+
+#endif /* _LINUX_ERRNAME_H */
diff --git a/c_src/include/linux/export.h b/c_src/include/linux/export.h
new file mode 100644
index 00000000..af9da968
--- /dev/null
+++ b/c_src/include/linux/export.h
@@ -0,0 +1,13 @@
+#ifndef _TOOLS_LINUX_EXPORT_H_
+#define _TOOLS_LINUX_EXPORT_H_
+
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#define EXPORT_SYMBOL_GPL_FUTURE(sym)
+#define EXPORT_UNUSED_SYMBOL(sym)
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+
+#define THIS_MODULE ((struct module *)0)
+#define KBUILD_MODNAME
+
+#endif
diff --git a/c_src/include/linux/freezer.h b/c_src/include/linux/freezer.h
new file mode 100644
index 00000000..d90373f3
--- /dev/null
+++ b/c_src/include/linux/freezer.h
@@ -0,0 +1,12 @@
+#ifndef __TOOLS_LINUX_FREEZER_H
+#define __TOOLS_LINUX_FREEZER_H
+
+#define try_to_freeze()
+#define set_freezable()
+#define freezing(task) false
+#define freezable_schedule() schedule()
+#define freezable_schedule_timeout(_t) schedule_timeout(_t)
+
+static inline void __refrigerator(bool f) {}
+
+#endif /* __TOOLS_LINUX_FREEZER_H */
diff --git a/c_src/include/linux/generic-radix-tree.h b/c_src/include/linux/generic-radix-tree.h
new file mode 100644
index 00000000..84741316
--- /dev/null
+++ b/c_src/include/linux/generic-radix-tree.h
@@ -0,0 +1,298 @@
+#ifndef _LINUX_GENERIC_RADIX_TREE_H
+#define _LINUX_GENERIC_RADIX_TREE_H
+
+/**
+ * DOC: Generic radix trees/sparse arrays
+ *
+ * Very simple and minimalistic, supporting arbitrary size entries up to
+ * PAGE_SIZE.
+ *
+ * A genradix is defined with the type it will store, like so:
+ *
+ * static GENRADIX(struct foo) foo_genradix;
+ *
+ * The main operations are:
+ *
+ * - genradix_init(radix) - initialize an empty genradix
+ *
+ * - genradix_free(radix) - free all memory owned by the genradix and
+ * reinitialize it
+ *
+ * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
+ * NULL if that entry does not exist
+ *
+ * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
+ * allocating it if necessary
+ *
+ * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
+ *
+ * The radix tree allocates one page of entries at a time, so entries may exist
+ * that were never explicitly allocated - they will be initialized to all
+ * zeroes.
+ *
+ * Internally, a genradix is just a radix tree of pages, and indexing works in
+ * terms of byte offsets. The wrappers in this header file use sizeof on the
+ * type the radix contains to calculate a byte offset from the index - see
+ * __idx_to_offset.
+ */
+
+#include <asm/page.h>
+#include <linux/bug.h>
+#include <linux/limits.h>
+#include <linux/log2.h>
+#include <linux/math.h>
+#include <linux/types.h>
+
+struct genradix_root;
+
+struct __genradix {
+ struct genradix_root *root;
+};
+
+/*
+ * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ */
+
+#define __GENRADIX_INITIALIZER \
+ { \
+ .tree = { \
+ .root = NULL, \
+ } \
+ }
+
+/*
+ * We use a 0 size array to stash the type we're storing without taking any
+ * space at runtime - then the various accessor macros can use typeof() to get
+ * to it for casts/sizeof - we also force the alignment so that storing a type
+ * with a ridiculous alignment doesn't blow up the alignment or size of the
+ * genradix.
+ */
+
+#define GENRADIX(_type) \
+struct { \
+ struct __genradix tree; \
+ _type type[0] __aligned(1); \
+}
+
+#define DEFINE_GENRADIX(_name, _type) \
+ GENRADIX(_type) _name = __GENRADIX_INITIALIZER
+
+/**
+ * genradix_init - initialize a genradix
+ * @_radix: genradix to initialize
+ *
+ * Does not fail
+ */
+#define genradix_init(_radix) \
+do { \
+ *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \
+} while (0)
+
+void __genradix_free(struct __genradix *);
+
+/**
+ * genradix_free: free all memory owned by a genradix
+ * @_radix: the genradix to free
+ *
+ * After freeing, @_radix will be reinitialized and empty
+ */
+#define genradix_free(_radix) __genradix_free(&(_radix)->tree)
+
+static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
+{
+ if (__builtin_constant_p(obj_size))
+ BUILD_BUG_ON(obj_size > PAGE_SIZE);
+ else
+ BUG_ON(obj_size > PAGE_SIZE);
+
+ if (!is_power_of_2(obj_size)) {
+ size_t objs_per_page = PAGE_SIZE / obj_size;
+
+ return (idx / objs_per_page) * PAGE_SIZE +
+ (idx % objs_per_page) * obj_size;
+ } else {
+ return idx * obj_size;
+ }
+}
+
+#define __genradix_cast(_radix) (typeof((_radix)->type[0]) *)
+#define __genradix_obj_size(_radix) sizeof((_radix)->type[0])
+#define __genradix_objs_per_page(_radix) \
+ (PAGE_SIZE / sizeof((_radix)->type[0]))
+#define __genradix_page_remainder(_radix) \
+ (PAGE_SIZE % sizeof((_radix)->type[0]))
+
+#define __genradix_idx_to_offset(_radix, _idx) \
+ __idx_to_offset(_idx, __genradix_obj_size(_radix))
+
+void *__genradix_ptr(struct __genradix *, size_t);
+
+/**
+ * genradix_ptr - get a pointer to a genradix entry
+ * @_radix: genradix to access
+ * @_idx: index to fetch
+ *
+ * Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
+ */
+#define genradix_ptr(_radix, _idx) \
+ (__genradix_cast(_radix) \
+ __genradix_ptr(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx)))
+
+void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
+ * if necessary
+ * @_radix: genradix to access
+ * @_idx: index to fetch
+ * @_gfp: gfp mask
+ *
+ * Returns a pointer to entry at @_idx, or NULL on allocation failure
+ */
+#define genradix_ptr_alloc(_radix, _idx, _gfp) \
+ (__genradix_cast(_radix) \
+ __genradix_ptr_alloc(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx), \
+ _gfp))
+
+struct genradix_iter {
+ size_t offset;
+ size_t pos;
+};
+
+/**
+ * genradix_iter_init - initialize a genradix_iter
+ * @_radix: genradix that will be iterated over
+ * @_idx: index to start iterating from
+ */
+#define genradix_iter_init(_radix, _idx) \
+ ((struct genradix_iter) { \
+ .pos = (_idx), \
+ .offset = __genradix_idx_to_offset((_radix), (_idx)),\
+ })
+
+void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
+
+/**
+ * genradix_iter_peek - get first entry at or above iterator's current
+ * position
+ * @_iter: a genradix_iter
+ * @_radix: genradix being iterated over
+ *
+ * If no more entries exist at or above @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek(_iter, _radix) \
+ (__genradix_cast(_radix) \
+ __genradix_iter_peek(_iter, &(_radix)->tree, \
+ __genradix_objs_per_page(_radix)))
+
+void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *,
+ size_t, size_t);
+
+/**
+ * genradix_iter_peek_prev - get first entry at or below iterator's current
+ * position
+ * @_iter: a genradix_iter
+ * @_radix: genradix being iterated over
+ *
+ * If no more entries exist at or below @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek_prev(_iter, _radix) \
+ (__genradix_cast(_radix) \
+ __genradix_iter_peek_prev(_iter, &(_radix)->tree, \
+ __genradix_objs_per_page(_radix), \
+ __genradix_obj_size(_radix) + \
+ __genradix_page_remainder(_radix)))
+
+static inline void __genradix_iter_advance(struct genradix_iter *iter,
+ size_t obj_size)
+{
+ if (iter->offset + obj_size < iter->offset) {
+ iter->offset = SIZE_MAX;
+ iter->pos = SIZE_MAX;
+ return;
+ }
+
+ iter->offset += obj_size;
+
+ if (!is_power_of_2(obj_size) &&
+ (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
+ iter->offset = round_up(iter->offset, PAGE_SIZE);
+
+ iter->pos++;
+}
+
+#define genradix_iter_advance(_iter, _radix) \
+ __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
+
+static inline void __genradix_iter_rewind(struct genradix_iter *iter,
+ size_t obj_size)
+{
+ if (iter->offset == 0 ||
+ iter->offset == SIZE_MAX) {
+ iter->offset = SIZE_MAX;
+ return;
+ }
+
+ if ((iter->offset & (PAGE_SIZE - 1)) == 0)
+ iter->offset -= PAGE_SIZE % obj_size;
+
+ iter->offset -= obj_size;
+ iter->pos--;
+}
+
+#define genradix_iter_rewind(_iter, _radix) \
+ __genradix_iter_rewind(_iter, __genradix_obj_size(_radix))
+
+#define genradix_for_each_from(_radix, _iter, _p, _start) \
+ for (_iter = genradix_iter_init(_radix, _start); \
+ (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
+ genradix_iter_advance(&_iter, _radix))
+
+/**
+ * genradix_for_each - iterate over entry in a genradix
+ * @_radix: genradix to iterate over
+ * @_iter: a genradix_iter to track current position
+ * @_p: pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each(_radix, _iter, _p) \
+ genradix_for_each_from(_radix, _iter, _p, 0)
+
+#define genradix_last_pos(_radix) \
+ (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1)
+
+/**
+ * genradix_for_each_reverse - iterate over entry in a genradix, reverse order
+ * @_radix: genradix to iterate over
+ * @_iter: a genradix_iter to track current position
+ * @_p: pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each_reverse(_radix, _iter, _p) \
+ for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\
+ (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\
+ genradix_iter_rewind(&_iter, _radix))
+
+int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_prealloc - preallocate entries in a generic radix tree
+ * @_radix: genradix to preallocate
+ * @_nr: number of entries to preallocate
+ * @_gfp: gfp mask
+ *
+ * Returns 0 on success, -ENOMEM on failure
+ */
+#define genradix_prealloc(_radix, _nr, _gfp) \
+ __genradix_prealloc(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _nr + 1),\
+ _gfp)
+
+
+#endif /* _LINUX_GENERIC_RADIX_TREE_H */
diff --git a/c_src/include/linux/genhd.h b/c_src/include/linux/genhd.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/linux/genhd.h
diff --git a/c_src/include/linux/gfp.h b/c_src/include/linux/gfp.h
new file mode 100644
index 00000000..3830bc2f
--- /dev/null
+++ b/c_src/include/linux/gfp.h
@@ -0,0 +1 @@
+#include <linux/slab.h>
diff --git a/c_src/include/linux/hash.h b/c_src/include/linux/hash.h
new file mode 100644
index 00000000..ad6fa21d
--- /dev/null
+++ b/c_src/include/linux/hash.h
@@ -0,0 +1,104 @@
+#ifndef _LINUX_HASH_H
+#define _LINUX_HASH_H
+/* Fast hashing routine for ints, longs and pointers.
+ (C) 2002 Nadia Yvette Chambers, IBM */
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
+ * fs/inode.c. It's not actually prime any more (the previous primes
+ * were actively bad for hashing), but the name remains.
+ */
+#if BITS_PER_LONG == 32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
+#define hash_long(val, bits) hash_32(val, bits)
+#elif BITS_PER_LONG == 64
+#define hash_long(val, bits) hash_64(val, bits)
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
+#else
+#error Wordsize not 32 or 64
+#endif
+
+/*
+ * This hash multiplies the input by a large odd number and takes the
+ * high bits. Since multiplication propagates changes to the most
+ * significant end only, it is essential that the high bits of the
+ * product be used for the hash value.
+ *
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties. (See Knuth vol 3, section 6.4, exercise 9.)
+ *
+ * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
+ * which is very slightly easier to multiply by and makes no
+ * difference to the hash distribution.
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
+#ifdef CONFIG_HAVE_ARCH_HASH
+/* This header may use the GOLDEN_RATIO_xx constants */
+#include <asm/hash.h>
+#endif
+
+/*
+ * The _generic versions exist only so lib/test_hash.c can compare
+ * the arch-optimized versions with the generic.
+ *
+ * Note that if you change these, any <asm/hash.h> that aren't updated
+ * to match need to have their HAVE_ARCH_* define values updated so the
+ * self-test will not false-positive.
+ */
+#ifndef HAVE_ARCH__HASH_32
+#define __hash_32 __hash_32_generic
+#endif
+static inline u32 __hash_32_generic(u32 val)
+{
+ return val * GOLDEN_RATIO_32;
+}
+
+#ifndef HAVE_ARCH_HASH_32
+#define hash_32 hash_32_generic
+#endif
+static inline u32 hash_32_generic(u32 val, unsigned int bits)
+{
+ /* High bits are more random, so use them. */
+ return __hash_32(val) >> (32 - bits);
+}
+
+#ifndef HAVE_ARCH_HASH_64
+#define hash_64 hash_64_generic
+#endif
+static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
+{
+#if BITS_PER_LONG == 64
+ /* 64x64-bit multiply is efficient on all 64-bit processors */
+ return val * GOLDEN_RATIO_64 >> (64 - bits);
+#else
+ /* Hash 64 bits using only 32x32-bit multiply. */
+ return hash_32((u32)val ^ __hash_32(val >> 32), bits);
+#endif
+}
+
+static inline u32 hash_ptr(const void *ptr, unsigned int bits)
+{
+ return hash_long((unsigned long)ptr, bits);
+}
+
+/* This really should be called fold32_ptr; it does no hashing to speak of. */
+static inline u32 hash32_ptr(const void *ptr)
+{
+ unsigned long val = (unsigned long)ptr;
+
+#if BITS_PER_LONG == 64
+ val ^= (val >> 32);
+#endif
+ return (u32)val;
+}
+
+#endif /* _LINUX_HASH_H */
diff --git a/c_src/include/linux/idr.h b/c_src/include/linux/idr.h
new file mode 100644
index 00000000..6f928254
--- /dev/null
+++ b/c_src/include/linux/idr.h
@@ -0,0 +1,208 @@
+/*
+ * include/linux/idr.h
+ *
+ * 2002-10-18 written by Jim Houston jim.houston@ccur.com
+ * Copyright (C) 2002 by Concurrent Computer Corporation
+ * Distributed under the GNU GPL license version 2.
+ *
+ * Small id to pointer translation service avoiding fixed sized
+ * tables.
+ */
+
+#ifndef __IDR_H__
+#define __IDR_H__
+
+#include <linux/types.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+
+/*
+ * We want shallower trees and thus more bits covered at each layer. 8
+ * bits gives us large enough first layer for most use cases and maximum
+ * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and
+ * 1k on 32bit.
+ */
+#define IDR_BITS 8
+#define IDR_SIZE (1 << IDR_BITS)
+#define IDR_MASK ((1 << IDR_BITS)-1)
+
+struct idr_layer {
+ int prefix; /* the ID prefix of this idr_layer */
+ int layer; /* distance from leaf */
+ struct idr_layer __rcu *ary[1<<IDR_BITS];
+ int count; /* When zero, we can release it */
+ union {
+ /* A zero bit means "space here" */
+ DECLARE_BITMAP(bitmap, IDR_SIZE);
+ struct rcu_head rcu_head;
+ };
+};
+
+struct idr {
+ struct idr_layer __rcu *hint; /* the last layer allocated from */
+ struct idr_layer __rcu *top;
+ int layers; /* only valid w/o concurrent changes */
+ int cur; /* current pos for cyclic allocation */
+ spinlock_t lock;
+ int id_free_cnt;
+ struct idr_layer *id_free;
+};
+
+#define IDR_INIT(name) \
+{ \
+ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
+}
+#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
+
+/**
+ * DOC: idr sync
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
+/*
+ * This is what we export.
+ */
+
+void *idr_find_slowpath(struct idr *idp, int id);
+void idr_preload(gfp_t gfp_mask);
+
+static inline int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask)
+{
+ return 0;
+}
+
+static inline void idr_remove(struct idr *idp, int id) {}
+
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
+int idr_for_each(struct idr *idp,
+ int (*fn)(int id, void *p, void *data), void *data);
+void *idr_get_next(struct idr *idp, int *nextid);
+void *idr_replace(struct idr *idp, void *ptr, int id);
+void idr_destroy(struct idr *idp);
+void idr_init(struct idr *idp);
+bool idr_is_empty(struct idr *idp);
+
+/**
+ * idr_preload_end - end preload section started with idr_preload()
+ *
+ * Each idr_preload() should be matched with an invocation of this
+ * function. See idr_preload() for details.
+ */
+static inline void idr_preload_end(void)
+{
+ preempt_enable();
+}
+
+/**
+ * idr_find - return pointer for given id
+ * @idr: idr handle
+ * @id: lookup key
+ *
+ * Return the pointer given the id it has been registered with. A %NULL
+ * return indicates that @id is not valid or you passed %NULL in
+ * idr_get_new().
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ */
+static inline void *idr_find(struct idr *idr, int id)
+{
+ struct idr_layer *hint = rcu_dereference_raw(idr->hint);
+
+ if (hint && (id & ~IDR_MASK) == hint->prefix)
+ return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
+
+ return idr_find_slowpath(idr, id);
+}
+
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp: idr handle
+ * @entry: the type * to use as cursor
+ * @id: id entry's key
+ *
+ * @entry and @id do not need to be initialized before the loop, and
+ * after normal terminatinon @entry is left with the value NULL. This
+ * is convenient for a "not found" value.
+ */
+#define idr_for_each_entry(idp, entry, id) \
+ for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+
+/**
+ * idr_for_each_entry - continue iteration over an idr's elements of a given type
+ * @idp: idr handle
+ * @entry: the type * to use as cursor
+ * @id: id entry's key
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define idr_for_each_entry_continue(idp, entry, id) \
+ for ((entry) = idr_get_next((idp), &(id)); \
+ entry; \
+ ++id, (entry) = idr_get_next((idp), &(id)))
+
+/*
+ * IDA - IDR based id allocator, use when translation from id to
+ * pointer isn't necessary.
+ *
+ * IDA_BITMAP_LONGS is calculated to be one less to accommodate
+ * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
+ */
+#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */
+#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8)
+
+struct ida_bitmap {
+ long nr_busy;
+ unsigned long bitmap[IDA_BITMAP_LONGS];
+};
+
+struct ida {
+ struct idr idr;
+ struct ida_bitmap *free_bitmap;
+};
+
+#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
+#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
+
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
+int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
+void ida_remove(struct ida *ida, int id);
+void ida_destroy(struct ida *ida);
+void ida_init(struct ida *ida);
+
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+ gfp_t gfp_mask);
+void ida_simple_remove(struct ida *ida, unsigned int id);
+
+/**
+ * ida_get_new - allocate new ID
+ * @ida: idr handle
+ * @p_id: pointer to the allocated handle
+ *
+ * Simple wrapper around ida_get_new_above() w/ @starting_id of zero.
+ */
+static inline int ida_get_new(struct ida *ida, int *p_id)
+{
+ return ida_get_new_above(ida, 0, p_id);
+}
+
+void __init idr_init_cache(void);
+
+#endif /* __IDR_H__ */
diff --git a/c_src/include/linux/ioprio.h b/c_src/include/linux/ioprio.h
new file mode 100644
index 00000000..822c64a2
--- /dev/null
+++ b/c_src/include/linux/ioprio.h
@@ -0,0 +1,46 @@
+#ifndef IOPRIO_H
+#define IOPRIO_H
+
+/*
+ * Gives us 8 prio classes with 13-bits of data for each class
+ */
+#define IOPRIO_BITS (16)
+#define IOPRIO_CLASS_SHIFT (13)
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+
+#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE)
+
+/*
+ * These are the io priority groups as implemented by CFQ. RT is the realtime
+ * class, it always gets premium service. BE is the best-effort scheduling
+ * class, the default for any process. IDLE is the idle scheduling class, it
+ * is only served when no one else is using the disk.
+ */
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+/*
+ * 8 best effort priority levels are supported
+ */
+#define IOPRIO_BE_NR (8)
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+/*
+ * Fallback BE priority
+ */
+#define IOPRIO_NORM (4)
+
+#endif
diff --git a/c_src/include/linux/jhash.h b/c_src/include/linux/jhash.h
new file mode 100644
index 00000000..348c6f47
--- /dev/null
+++ b/c_src/include/linux/jhash.h
@@ -0,0 +1,175 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions. Routines to test the hash are included
+ * if SELF_TEST is defined. You can use this free for any purpose. It's in
+ * the public domain. It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/unaligned/packed_struct.h>
+
+/* Best hash sizes are of power of two */
+#define jhash_size(n) ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n) (jhash_size(n)-1)
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL 0xdeadbeef
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const u8 *k = key;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ /* All but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12) {
+ a += __get_unaligned_cpu32(k);
+ b += __get_unaligned_cpu32(k + 4);
+ c += __get_unaligned_cpu32(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ /* Last block: affect all 32 bits of (c) */
+ /* All the case statements fall through */
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+ /* Handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /* Handle the last 3 u32's: all the case statements fall through */
+ switch (length) {
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+
+ __jhash_final(a, b, c);
+
+ return c;
+}
+
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+ return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
+}
+
+#endif /* _LINUX_JHASH_H */
diff --git a/c_src/include/linux/jiffies.h b/c_src/include/linux/jiffies.h
new file mode 100644
index 00000000..d16ea76f
--- /dev/null
+++ b/c_src/include/linux/jiffies.h
@@ -0,0 +1,91 @@
+#ifndef _LINUX_JIFFIES_H
+#define _LINUX_JIFFIES_H
+
+#include <time.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/typecheck.h>
+#include <linux/types.h>
+
+#define time_after(a,b) \
+ (typecheck(unsigned long, a) && \
+ typecheck(unsigned long, b) && \
+ ((long)((b) - (a)) < 0))
+#define time_before(a,b) time_after(b,a)
+
+#define time_after_eq(a,b) \
+ (typecheck(unsigned long, a) && \
+ typecheck(unsigned long, b) && \
+ ((long)((a) - (b)) >= 0))
+#define time_before_eq(a,b) time_after_eq(b,a)
+
+#define time_in_range(a,b,c) \
+ (time_after_eq(a,b) && \
+ time_before_eq(a,c))
+
+#define time_in_range_open(a,b,c) \
+ (time_after_eq(a,b) && \
+ time_before(a,c))
+
+#define time_after64(a,b) \
+ (typecheck(__u64, a) && \
+ typecheck(__u64, b) && \
+ ((__s64)((b) - (a)) < 0))
+#define time_before64(a,b) time_after64(b,a)
+
+#define time_after_eq64(a,b) \
+ (typecheck(__u64, a) && \
+ typecheck(__u64, b) && \
+ ((__s64)((a) - (b)) >= 0))
+#define time_before_eq64(a,b) time_after_eq64(b,a)
+
+#define time_in_range64(a, b, c) \
+ (time_after_eq64(a, b) && \
+ time_before_eq64(a, c))
+
+#define time_is_before_jiffies(a) time_after(jiffies, a)
+
+#define HZ 1000
+
+static inline u64 jiffies_to_nsecs(const unsigned long j)
+{
+ return (u64)j * NSEC_PER_MSEC;
+}
+
+static inline unsigned jiffies_to_msecs(const unsigned long j)
+{
+ return j;
+}
+
+static inline unsigned long msecs_to_jiffies(const unsigned int m)
+{
+ return m;
+}
+
+static inline unsigned long nsecs_to_jiffies(u64 n)
+{
+ return n / NSEC_PER_MSEC;
+}
+
+static inline u64 sched_clock(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+
+ return ((s64) ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
+}
+
+static inline u64 local_clock(void)
+{
+ return sched_clock();
+}
+
+static inline u64 ktime_get_ns(void)
+{
+ return sched_clock();
+}
+
+#define jiffies nsecs_to_jiffies(sched_clock())
+
+#endif
diff --git a/c_src/include/linux/kernel.h b/c_src/include/linux/kernel.h
new file mode 100644
index 00000000..ef0b1a7d
--- /dev/null
+++ b/c_src/include/linux/kernel.h
@@ -0,0 +1,267 @@
+#ifndef __TOOLS_LINUX_KERNEL_H
+#define __TOOLS_LINUX_KERNEL_H
+
+#include <assert.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <limits.h>
+
+#include <linux/bug.h>
+#include <linux/byteorder.h>
+#include <linux/compiler.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+
+#define BIT(nr) (1UL << (nr))
+#define BIT_ULL(nr) (1ULL << (nr))
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+#define __and(x, y) ___and(x, y)
+#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y)
+#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0)
+
+#define __or(x, y) ___or(x, y)
+#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y)
+#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y)
+
+#define __is_defined(x) ___is_defined(x)
+#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+/*
+ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
+ * otherwise.
+ */
+#define IS_MODULE(option) __is_defined(option##_MODULE)
+
+/*
+ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
+ * code can call a function defined in code compiled based on CONFIG_FOO.
+ * This is similar to IS_ENABLED(), but returns false when invoked from
+ * built-in code when CONFIG_FOO is set to 'm'.
+ */
+#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \
+ __and(IS_MODULE(option), __is_defined(MODULE)))
+
+/*
+ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
+ * 0 otherwise.
+ */
+#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
+#define EXPORT_SYMBOL(sym)
+
+#define U8_MAX ((u8)~0U)
+#define S8_MAX ((s8)(U8_MAX>>1))
+#define S8_MIN ((s8)(-S8_MAX - 1))
+#define U16_MAX ((u16)~0U)
+#define S16_MAX ((s16)(U16_MAX>>1))
+#define S16_MIN ((s16)(-S16_MAX - 1))
+#define U32_MAX ((u32)~0U)
+#define S32_MAX ((s32)(U32_MAX>>1))
+#define S32_MIN ((s32)(-S32_MAX - 1))
+#define U64_MAX ((u64)~0ULL)
+#define S64_MAX ((s64)(U64_MAX>>1))
+#define S64_MIN ((s64)(-S64_MAX - 1))
+
+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+
+#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#ifndef __struct_group
+#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
+ union { \
+ struct { MEMBERS } ATTRS; \
+ struct TAG { MEMBERS } ATTRS NAME; \
+ }
+#endif
+
+#define struct_group(NAME, MEMBERS...) \
+ __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS)
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define _RET_IP_ (unsigned long)__builtin_return_address(0)
+#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+
+#define might_sleep()
+
+#define cpu_relax() barrier()
+#define cpu_relax_lowlatency() barrier()
+
+#define panic(fmt, ...) \
+do { \
+ printf(fmt, ##__VA_ARGS__); \
+ BUG(); \
+} while (0)
+
+int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
+int __must_check _kstrtol(const char *s, unsigned int base, long *res);
+
+int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+*/
+static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ /*
+ * We want to shortcut function call, but
+ * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+ */
+ if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+ __alignof__(unsigned long) == __alignof__(unsigned long long))
+ return kstrtoull(s, base, (unsigned long long *)res);
+ else
+ return _kstrtoul(s, base, res);
+}
+
+/**
+ * kstrtol - convert a string to a long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
+{
+ /*
+ * We want to shortcut function call, but
+ * __builtin_types_compatible_p(long, long long) = 0.
+ */
+ if (sizeof(long) == sizeof(long long) &&
+ __alignof__(long) == __alignof__(long long))
+ return kstrtoll(s, base, (long long *)res);
+ else
+ return _kstrtol(s, base, res);
+}
+
+int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res);
+int __must_check kstrtoint(const char *s, unsigned int base, int *res);
+
+static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res)
+{
+ return kstrtoull(s, base, res);
+}
+
+static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res)
+{
+ return kstrtoll(s, base, res);
+}
+
+static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res)
+{
+ return kstrtouint(s, base, res);
+}
+
+static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res)
+{
+ return kstrtoint(s, base, res);
+}
+
+struct printbuf;
+extern void prt_u64(struct printbuf *out, u64 num);
+
+extern __printf(2, 0) void prt_vprintf(struct printbuf *out, const char *fmt, va_list args);
+extern __printf(2, 3) void prt_printf(struct printbuf *out, const char *fmt, ...);
+
+static const char hex_asc[] = "0123456789abcdef";
+#define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
+#define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4]
+static const char hex_asc_upper[] = "0123456789ABCDEF";
+#define hex_asc_upper_lo(x) hex_asc_upper[((x) & 0x0f)]
+#define hex_asc_upper_hi(x) hex_asc_upper[((x) & 0xf0) >> 4]
+
+/* The hash is always the low bits of hash_len */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define HASH_LEN_DECLARE u32 hash; u32 len
+#else
+ #define HASH_LEN_DECLARE u32 len; u32 hash
+#endif
+
+struct qstr {
+ union {
+ struct {
+ HASH_LEN_DECLARE;
+ };
+ u64 hash_len;
+ };
+ const unsigned char *name;
+};
+
+#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
+
+#define POISON_FREE 0x6b
+
+static inline void dump_stack(void) {}
+
+#define unsafe_memcpy(dst, src, bytes, justification) \
+ memcpy(dst, src, bytes)
+
+#ifdef __DECLARE_FLEX_ARRAY
+#define DECLARE_FLEX_ARRAY(TYPE, NAME) __DECLARE_FLEX_ARRAY(TYPE, NAME)
+#else
+#define DECLARE_FLEX_ARRAY(T, member) T member[0]
+#endif
+
+#endif
diff --git a/c_src/include/linux/key.h b/c_src/include/linux/key.h
new file mode 100644
index 00000000..cc6859a9
--- /dev/null
+++ b/c_src/include/linux/key.h
@@ -0,0 +1,42 @@
+#ifndef _LINUX_KEY_H
+#define _LINUX_KEY_H
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <keyutils.h>
+
+struct user_key_payload {
+ size_t datalen; /* length of this data */
+ char data[0]; /* actual data */
+};
+
+struct key {
+ atomic_t usage; /* number of references */
+ key_serial_t serial; /* key serial number */
+ struct rw_semaphore sem; /* change vs change sem */
+ struct user_key_payload payload;
+};
+
+static inline const struct user_key_payload *user_key_payload(const struct key *key)
+{
+ return &key->payload;
+}
+
+static inline void key_put(struct key *key)
+{
+ if (atomic_dec_and_test(&key->usage))
+ free(key);
+}
+
+static inline struct key *__key_get(struct key *key)
+{
+ atomic_inc(&key->usage);
+ return key;
+}
+
+static inline struct key *key_get(struct key *key)
+{
+ return key ? __key_get(key) : key;
+}
+
+#endif /* _LINUX_KEY_H */
diff --git a/c_src/include/linux/kmemleak.h b/c_src/include/linux/kmemleak.h
new file mode 100644
index 00000000..6a3cd1bf
--- /dev/null
+++ b/c_src/include/linux/kmemleak.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/kmemleak.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ */
+
+#ifndef __KMEMLEAK_H
+#define __KMEMLEAK_H
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#ifdef CONFIG_DEBUG_KMEMLEAK
+
+extern void kmemleak_init(void) __init;
+extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+ gfp_t gfp) __ref;
+extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+ gfp_t gfp) __ref;
+extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
+ gfp_t gfp) __ref;
+extern void kmemleak_free(const void *ptr) __ref;
+extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
+extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+extern void kmemleak_update_trace(const void *ptr) __ref;
+extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_ignore(const void *ptr) __ref;
+extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
+extern void kmemleak_no_scan(const void *ptr) __ref;
+extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
+ gfp_t gfp) __ref;
+extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref;
+extern void kmemleak_ignore_phys(phys_addr_t phys) __ref;
+
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+ int min_count, slab_flags_t flags,
+ gfp_t gfp)
+{
+ if (!(flags & SLAB_NOLEAKTRACE))
+ kmemleak_alloc(ptr, size, min_count, gfp);
+}
+
+static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags)
+{
+ if (!(flags & SLAB_NOLEAKTRACE))
+ kmemleak_free(ptr);
+}
+
+static inline void kmemleak_erase(void **ptr)
+{
+ *ptr = NULL;
+}
+
+#else
+
+static inline void kmemleak_init(void)
+{
+}
+static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+ gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+ int min_count, slab_flags_t flags,
+ gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+ gfp_t gfp)
+{
+}
+static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size,
+ gfp_t gfp)
+{
+}
+static inline void kmemleak_free(const void *ptr)
+{
+}
+static inline void kmemleak_free_part(const void *ptr, size_t size)
+{
+}
+static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags)
+{
+}
+static inline void kmemleak_free_percpu(const void __percpu *ptr)
+{
+}
+static inline void kmemleak_update_trace(const void *ptr)
+{
+}
+static inline void kmemleak_not_leak(const void *ptr)
+{
+}
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
+static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
+{
+}
+static inline void kmemleak_erase(void **ptr)
+{
+}
+static inline void kmemleak_no_scan(const void *ptr)
+{
+}
+static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
+ gfp_t gfp)
+{
+}
+static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size)
+{
+}
+static inline void kmemleak_ignore_phys(phys_addr_t phys)
+{
+}
+
+#endif /* CONFIG_DEBUG_KMEMLEAK */
+
+#endif /* __KMEMLEAK_H */
diff --git a/c_src/include/linux/kobject.h b/c_src/include/linux/kobject.h
new file mode 100644
index 00000000..c33b2126
--- /dev/null
+++ b/c_src/include/linux/kobject.h
@@ -0,0 +1,129 @@
+/*
+ * kobject.h - generic kernel object infrastructure.
+ *
+ * Copyright (c) 2002-2003 Patrick Mochel
+ * Copyright (c) 2002-2003 Open Source Development Labs
+ * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (c) 2006-2008 Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Please read Documentation/kobject.txt before using the kobject
+ * interface, ESPECIALLY the parts about reference counts and object
+ * destructors.
+ */
+
+#ifndef _KOBJECT_H_
+#define _KOBJECT_H_
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct kset;
+
+struct kobj_type {
+ void (*release)(struct kobject *kobj);
+ const struct sysfs_ops *sysfs_ops;
+ const struct attribute_group **default_groups;
+ const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+ const void *(*namespace)(struct kobject *kobj);
+};
+
+struct kobj_uevent_env {
+};
+
+struct kobj_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count);
+};
+
+struct kobject {
+ struct kobject *parent;
+ struct kset *kset;
+ const struct kobj_type *ktype;
+ struct kernfs_node *sd; /* sysfs directory entry */
+ atomic_t ref;
+ unsigned int state_initialized:1;
+ unsigned int state_in_sysfs:1;
+ unsigned int state_add_uevent_sent:1;
+ unsigned int state_remove_uevent_sent:1;
+ unsigned int uevent_suppress:1;
+};
+
+struct kset {
+ struct kobject kobj;
+};
+
+#define kobject_add(...) 0
+
+static inline void kobject_init(struct kobject *kobj, const struct kobj_type *ktype)
+{
+ memset(kobj, 0, sizeof(*kobj));
+
+ atomic_set(&kobj->ref, 1);
+ kobj->ktype = ktype;
+ kobj->state_initialized = 1;
+}
+
+static inline void kobject_del(struct kobject *kobj);
+
+static inline void kobject_cleanup(struct kobject *kobj)
+{
+ const struct kobj_type *t = kobj->ktype;
+
+ /* remove from sysfs if the caller did not do it */
+ if (kobj->state_in_sysfs)
+ kobject_del(kobj);
+
+ if (t && t->release)
+ t->release(kobj);
+}
+
+static inline void kobject_put(struct kobject *kobj)
+{
+ BUG_ON(!kobj);
+ BUG_ON(!kobj->state_initialized);
+
+ if (atomic_dec_and_test(&kobj->ref))
+ kobject_cleanup(kobj);
+}
+
+static inline void kobject_del(struct kobject *kobj)
+{
+ if (!kobj)
+ return;
+
+ kobj->state_in_sysfs = 0;
+#if 0
+ kobj_kset_leave(kobj);
+#endif
+ kobject_put(kobj->parent);
+ kobj->parent = NULL;
+}
+
+static inline struct kobject *kobject_get(struct kobject *kobj)
+{
+ BUG_ON(!kobj);
+ BUG_ON(!kobj->state_initialized);
+
+ atomic_inc(&kobj->ref);
+ return kobj;
+}
+
+static inline void kset_unregister(struct kset *kset)
+{
+ kfree(kset);
+}
+
+#define kset_create_and_add(_name, _u, _parent) \
+ ((struct kset *) kzalloc(sizeof(struct kset), GFP_KERNEL))
+
+#endif /* _KOBJECT_H_ */
diff --git a/c_src/include/linux/kthread.h b/c_src/include/linux/kthread.h
new file mode 100644
index 00000000..3a8cf108
--- /dev/null
+++ b/c_src/include/linux/kthread.h
@@ -0,0 +1,118 @@
+#ifndef _LINUX_KTHREAD_H
+#define _LINUX_KTHREAD_H
+
+/* Simple interface for creating and stopping kernel threads without mess. */
+#include <linux/err.h>
+#include <linux/lockdep.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+__printf(3, 4)
+struct task_struct *kthread_create(int (*threadfn)(void *data),
+ void *data,
+ const char namefmt[], ...);
+
+
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+ void *data,
+ unsigned int cpu,
+ const char *namefmt);
+
+/**
+ * kthread_run - create and wake a thread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: Convenient wrapper for kthread_create() followed by
+ * wake_up_process(). Returns the kthread or ERR_PTR(-ENOMEM).
+ */
+#define kthread_run(threadfn, data, namefmt, ...) \
+({ \
+ struct task_struct *__k \
+ = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
+ if (!IS_ERR(__k)) \
+ wake_up_process(__k); \
+ __k; \
+})
+
+int kthread_stop(struct task_struct *k);
+bool kthread_should_stop(void);
+bool kthread_should_park(void);
+bool kthread_freezable_should_stop(bool *was_frozen);
+void *kthread_data(struct task_struct *k);
+void *probe_kthread_data(struct task_struct *k);
+int kthread_park(struct task_struct *k);
+void kthread_unpark(struct task_struct *k);
+void kthread_parkme(void);
+
+int kthreadd(void *unused);
+extern struct task_struct *kthreadd_task;
+extern int tsk_fork_get_node(struct task_struct *tsk);
+
+/*
+ * Simple work processor based on kthread.
+ *
+ * This provides easier way to make use of kthreads. A kthread_work
+ * can be queued and flushed using queue/flush_kthread_work()
+ * respectively. Queued kthread_works are processed by a kthread
+ * running kthread_worker_fn().
+ */
+struct kthread_work;
+typedef void (*kthread_work_func_t)(struct kthread_work *work);
+
+struct kthread_worker {
+ spinlock_t lock;
+ struct list_head work_list;
+ struct task_struct *task;
+ struct kthread_work *current_work;
+};
+
+struct kthread_work {
+ struct list_head node;
+ kthread_work_func_t func;
+ struct kthread_worker *worker;
+};
+
+#define KTHREAD_WORKER_INIT(worker) { \
+ .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
+ .work_list = LIST_HEAD_INIT((worker).work_list), \
+ }
+
+#define KTHREAD_WORK_INIT(work, fn) { \
+ .node = LIST_HEAD_INIT((work).node), \
+ .func = (fn), \
+ }
+
+#define DEFINE_KTHREAD_WORKER(worker) \
+ struct kthread_worker worker = KTHREAD_WORKER_INIT(worker)
+
+#define DEFINE_KTHREAD_WORK(work, fn) \
+ struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
+
+#define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
+
+extern void __init_kthread_worker(struct kthread_worker *worker,
+ const char *name, struct lock_class_key *key);
+
+#define init_kthread_worker(worker) \
+ do { \
+ static struct lock_class_key __key; \
+ __init_kthread_worker((worker), "("#worker")->lock", &__key); \
+ } while (0)
+
+#define init_kthread_work(work, fn) \
+ do { \
+ memset((work), 0, sizeof(struct kthread_work)); \
+ INIT_LIST_HEAD(&(work)->node); \
+ (work)->func = (fn); \
+ } while (0)
+
+int kthread_worker_fn(void *worker_ptr);
+
+bool queue_kthread_work(struct kthread_worker *worker,
+ struct kthread_work *work);
+void flush_kthread_work(struct kthread_work *work);
+void flush_kthread_worker(struct kthread_worker *worker);
+
+#endif /* _LINUX_KTHREAD_H */
diff --git a/c_src/include/linux/list.h b/c_src/include/linux/list.h
new file mode 100644
index 00000000..d176d0d3
--- /dev/null
+++ b/c_src/include/linux/list.h
@@ -0,0 +1,112 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#include <urcu/list.h>
+
+#define list_head cds_list_head
+#define LIST_HEAD_INIT(l) CDS_LIST_HEAD_INIT(l)
+#define LIST_HEAD(l) CDS_LIST_HEAD(l)
+#define INIT_LIST_HEAD(l) CDS_INIT_LIST_HEAD(l)
+#define list_add(n, h) cds_list_add(n, h)
+#define list_add_tail(n, h) cds_list_add_tail(n, h)
+#define __list_del_entry(l) cds_list_del(l)
+#define __list_del(p, n) __cds_list_del(p, n)
+#define list_del(l) cds_list_del(l)
+#define list_del_init(l) cds_list_del_init(l)
+#define list_replace(o, n) cds_list_replace(o, n)
+#define list_replace_init(o, n) cds_list_replace_init(o, n)
+#define list_move(l, h) cds_list_move(l, h)
+#define list_empty(l) cds_list_empty(l)
+#define list_splice(l, h) cds_list_splice(l, h)
+#define list_entry(p, t, m) cds_list_entry(p, t, m)
+#define list_first_entry(p, t, m) cds_list_first_entry(p, t, m)
+#define list_for_each(p, h) cds_list_for_each(p, h)
+#define list_for_each_prev(p, h) cds_list_for_each_prev(p, h)
+#define list_for_each_safe(p, n, h) cds_list_for_each_safe(p, n, h)
+#define list_for_each_prev_safe(p, n, h) cds_list_for_each_prev_safe(p, n, h)
+#define list_for_each_entry(p, h, m) cds_list_for_each_entry(p, h, m)
+#define list_for_each_entry_reverse(p, h, m) cds_list_for_each_entry_reverse(p, h, m)
+#define list_for_each_entry_safe(p, n, h, m) cds_list_for_each_entry_safe(p, n, h, m)
+
+static inline int list_empty_careful(const struct list_head *head)
+{
+ struct list_head *next = head->next;
+ return (next == head) && (next == head->prev);
+}
+
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ list_del(list);
+ list_add_tail(list, head);
+}
+
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ list_splice(list, head);
+ INIT_LIST_HEAD(list);
+}
+
+#define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+#define list_first_entry_or_null(ptr, type, member) \
+ (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+
+#define list_prev_entry(pos, member) \
+ list_entry((pos)->member.prev, typeof(*(pos)), member)
+
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_last_entry(head, typeof(*pos), member), \
+ n = list_prev_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_prev_entry(n, member))
+
+/* hlists: */
+
+#include <urcu/hlist.h>
+
+#define hlist_head cds_hlist_head
+#define hlist_node cds_hlist_node
+
+#define hlist_add_head(n, h) cds_hlist_add_head(n, h)
+#define hlist_del(n) cds_hlist_del(n)
+#define hlist_del_init(n) cds_hlist_del_init(n)
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+ return !h->prev;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+ hlist_del(n);
+ n->prev = NULL;
+ n->next = NULL;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ ____ptr ? hlist_entry(____ptr, type, member) : NULL; \
+ })
+
+#define hlist_for_each_entry(pos, head, member) \
+ for (pos = hlist_entry_safe((head)->next, typeof(*(pos)), member);\
+ pos; \
+ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+static inline size_t list_count_nodes(struct list_head *head)
+{
+ struct list_head *pos;
+ size_t count = 0;
+
+ list_for_each(pos, head)
+ count++;
+
+ return count;
+}
+
+#endif /* _LIST_LIST_H */
diff --git a/c_src/include/linux/list_nulls.h b/c_src/include/linux/list_nulls.h
new file mode 100644
index 00000000..fa6e8471
--- /dev/null
+++ b/c_src/include/linux/list_nulls.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LIST_NULLS_H
+#define _LINUX_LIST_NULLS_H
+
+#include <linux/poison.h>
+#include <linux/const.h>
+
+/*
+ * Special version of lists, where end of list is not a NULL pointer,
+ * but a 'nulls' marker, which can have many different values.
+ * (up to 2^31 different values guaranteed on all platforms)
+ *
+ * In the standard hlist, termination of a list is the NULL pointer.
+ * In this special 'nulls' variant, we use the fact that objects stored in
+ * a list are aligned on a word (4 or 8 bytes alignment).
+ * We therefore use the last significant bit of 'ptr' :
+ * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
+ * Set to 0 : This is a pointer to some object (ptr)
+ */
+
+struct hlist_nulls_head {
+ struct hlist_nulls_node *first;
+};
+
+struct hlist_nulls_node {
+ struct hlist_nulls_node *next, **pprev;
+};
+#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
+#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
+ ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
+
+#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+ })
+/**
+ * ptr_is_a_nulls - Test if a ptr is a nulls
+ * @ptr: ptr to be tested
+ *
+ */
+static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
+{
+ return ((unsigned long)ptr & 1);
+}
+
+/**
+ * get_nulls_value - Get the 'nulls' value of the end of chain
+ * @ptr: end of chain
+ *
+ * Should be called only if is_a_nulls(ptr);
+ */
+static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
+{
+ return ((unsigned long)ptr) >> 1;
+}
+
+/**
+ * hlist_nulls_unhashed - Has node been removed and reinitialized?
+ * @h: Node to be checked
+ *
+ * Not that not all removal functions will leave a node in unhashed state.
+ * For example, hlist_del_init_rcu() leaves the node in unhashed state,
+ * but hlist_nulls_del() does not.
+ */
+static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
+{
+ return !h->pprev;
+}
+
+/**
+ * hlist_nulls_unhashed_lockless - Has node been removed and reinitialized?
+ * @h: Node to be checked
+ *
+ * Not that not all removal functions will leave a node in unhashed state.
+ * For example, hlist_del_init_rcu() leaves the node in unhashed state,
+ * but hlist_nulls_del() does not. Unlike hlist_nulls_unhashed(), this
+ * function may be used locklessly.
+ */
+static inline int hlist_nulls_unhashed_lockless(const struct hlist_nulls_node *h)
+{
+ return !READ_ONCE(h->pprev);
+}
+
+static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
+{
+ return is_a_nulls(READ_ONCE(h->first));
+}
+
+static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
+ struct hlist_nulls_head *h)
+{
+ struct hlist_nulls_node *first = h->first;
+
+ n->next = first;
+ WRITE_ONCE(n->pprev, &h->first);
+ h->first = n;
+ if (!is_a_nulls(first))
+ WRITE_ONCE(first->pprev, &n->next);
+}
+
+static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
+{
+ struct hlist_nulls_node *next = n->next;
+ struct hlist_nulls_node **pprev = n->pprev;
+
+ WRITE_ONCE(*pprev, next);
+ if (!is_a_nulls(next))
+ WRITE_ONCE(next->pprev, pprev);
+}
+
+static inline void hlist_nulls_del(struct hlist_nulls_node *n)
+{
+ __hlist_nulls_del(n);
+ WRITE_ONCE(n->pprev, LIST_POISON2);
+}
+
+/**
+ * hlist_nulls_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; \
+ (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @member: the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry_from(tpos, pos, member) \
+ for (; (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+#endif
diff --git a/c_src/include/linux/llist.h b/c_src/include/linux/llist.h
new file mode 100644
index 00000000..2e9c7215
--- /dev/null
+++ b/c_src/include/linux/llist.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LLIST_H
+#define LLIST_H
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * Cases where locking is not needed:
+ * If there are multiple producers and multiple consumers, llist_add can be
+ * used in producers and llist_del_all can be used in consumers simultaneously
+ * without locking. Also a single consumer can use llist_del_first while
+ * multiple producers simultaneously use llist_add, without any locking.
+ *
+ * Cases where locking is needed:
+ * If we have multiple consumers with llist_del_first used in one consumer, and
+ * llist_del_first or llist_del_all used in other consumers, then a lock is
+ * needed. This is because llist_del_first depends on list->first->next not
+ * changing, but without lock protection, there's no way to be sure about that
+ * if a preemption happens in the middle of the delete operation and on being
+ * preempted back, the list->first is the same as before causing the cmpxchg in
+ * llist_del_first to succeed. For example, while a llist_del_first operation
+ * is in progress in one consumer, then a llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
+ * consumer may cause violations.
+ *
+ * This can be summarized as follows:
+ *
+ * | add | del_first | del_all
+ * add | - | - | -
+ * del_first | | L | L
+ * del_all | | | -
+ *
+ * Where, a particular row's operation can happen concurrently with a column's
+ * operation, with "-" being no lock needed, while "L" being lock is needed.
+ *
+ * The list entries deleted via llist_del_all can be traversed with
+ * traversing function such as llist_for_each etc. But the list
+ * entries can not be traversed safely before deleted from the list.
+ * The order of deleted entries is from the newest to the oldest added
+ * one. If you want to traverse from the oldest to the newest, you
+ * must reverse the order by yourself before traversing.
+ *
+ * The basic atomic operation of this list is cmpxchg on long. On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handlers. So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+
+struct llist_head {
+ struct llist_node *first;
+};
+
+struct llist_node {
+ struct llist_node *next;
+};
+
+#define LLIST_HEAD_INIT(name) { NULL }
+#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name)
+
+/**
+ * init_llist_head - initialize lock-less list head
+ * @head: the head for your lock-less list
+ */
+static inline void init_llist_head(struct llist_head *list)
+{
+ list->first = NULL;
+}
+
+/**
+ * llist_entry - get the struct of this entry
+ * @ptr: the &struct llist_node pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the llist_node within the struct.
+ */
+#define llist_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * member_address_is_nonnull - check whether the member address is not NULL
+ * @ptr: the object pointer (struct type * that contains the llist_node)
+ * @member: the name of the llist_node within the struct.
+ *
+ * This macro is conceptually the same as
+ * &ptr->member != NULL
+ * but it works around the fact that compilers can decide that taking a member
+ * address is never a NULL pointer.
+ *
+ * Real objects that start at a high address and have a member at NULL are
+ * unlikely to exist, but such pointers may be returned e.g. by the
+ * container_of() macro.
+ */
+#define member_address_is_nonnull(ptr, member) \
+ ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0)
+
+/**
+ * llist_for_each - iterate over some deleted entries of a lock-less list
+ * @pos: the &struct llist_node to use as a loop cursor
+ * @node: the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry. If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each(pos, node) \
+ for ((pos) = (node); pos; (pos) = (pos)->next)
+
+/**
+ * llist_for_each_safe - iterate over some deleted entries of a lock-less list
+ * safe against removal of list entry
+ * @pos: the &struct llist_node to use as a loop cursor
+ * @n: another &struct llist_node to use as temporary storage
+ * @node: the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry. If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_safe(pos, n, node) \
+ for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n))
+
+/**
+ * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @node: the fist entry of deleted list entries.
+ * @member: the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry. If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry(pos, node, member) \
+ for ((pos) = llist_entry((node), typeof(*(pos)), member); \
+ member_address_is_nonnull(pos, member); \
+ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
+
+/**
+ * llist_for_each_entry_safe - iterate over some deleted entries of lock-less list of given type
+ * safe against removal of list entry
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @node: the first entry of deleted list entries.
+ * @member: the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry. If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry_safe(pos, n, node, member) \
+ for (pos = llist_entry((node), typeof(*pos), member); \
+ member_address_is_nonnull(pos, member) && \
+ (n = llist_entry(pos->member.next, typeof(*n), member), true); \
+ pos = n)
+
+/**
+ * llist_empty - tests whether a lock-less list is empty
+ * @head: the list to test
+ *
+ * Not guaranteed to be accurate or up to date. Just a quick way to
+ * test whether the list is empty without deleting something from the
+ * list.
+ */
+static inline bool llist_empty(const struct llist_head *head)
+{
+ return READ_ONCE(head->first) == NULL;
+}
+
+static inline struct llist_node *llist_next(struct llist_node *node)
+{
+ return node->next;
+}
+
+extern bool llist_add_batch(struct llist_node *new_first,
+ struct llist_node *new_last,
+ struct llist_head *head);
+/**
+ * llist_add - add a new entry
+ * @new: new entry to be added
+ * @head: the head for your lock-less list
+ *
+ * Returns true if the list was empty prior to adding this entry.
+ */
+static inline bool llist_add(struct llist_node *new, struct llist_head *head)
+{
+ return llist_add_batch(new, new, head);
+}
+
+/**
+ * llist_del_all - delete all entries from lock-less list
+ * @head: the head of lock-less list to delete all entries
+ *
+ * If list is empty, return NULL, otherwise, delete all entries and
+ * return the pointer to the first entry. The order of entries
+ * deleted is from the newest to the oldest added one.
+ */
+static inline struct llist_node *llist_del_all(struct llist_head *head)
+{
+ return xchg(&head->first, NULL);
+}
+
+extern struct llist_node *llist_del_first(struct llist_head *head);
+
+struct llist_node *llist_reverse_order(struct llist_node *head);
+
+#endif /* LLIST_H */
diff --git a/c_src/include/linux/lockdep.h b/c_src/include/linux/lockdep.h
new file mode 100644
index 00000000..3831ef2d
--- /dev/null
+++ b/c_src/include/linux/lockdep.h
@@ -0,0 +1,60 @@
+#ifndef __TOOLS_LINUX_LOCKDEP_H
+#define __TOOLS_LINUX_LOCKDEP_H
+
+struct lock_class_key {};
+struct task_struct;
+
+# define lock_acquire(l, s, t, r, c, n, i) do { } while (0)
+# define lock_release(l, i) do { } while (0)
+# define lock_set_class(l, n, k, s, i) do { } while (0)
+# define lock_set_subclass(l, s, i) do { } while (0)
+# define lockdep_set_current_reclaim_state(g) do { } while (0)
+# define lockdep_clear_current_reclaim_state() do { } while (0)
+# define lockdep_trace_alloc(g) do { } while (0)
+# define lockdep_info() do { } while (0)
+# define lockdep_init_map(lock, name, key, sub) \
+ do { (void)(name); (void)(key); } while (0)
+# define lockdep_set_class(lock, key) do { (void)(key); } while (0)
+# define lockdep_set_class_and_name(lock, key, name) \
+ do { (void)(key); (void)(name); } while (0)
+#define lockdep_set_class_and_subclass(lock, key, sub) \
+ do { (void)(key); } while (0)
+#define lockdep_set_subclass(lock, sub) do { } while (0)
+
+#define lockdep_set_novalidate_class(lock) do { } while (0)
+
+#define lockdep_assert_held(l) do { (void)(l); } while (0)
+#define lockdep_assert_held_once(l) do { (void)(l); } while (0)
+
+#define lock_acquire_shared(l, s, t, n, i)
+
+#define lockdep_acquire_shared(lock)
+
+#define lock_contended(lockdep_map, ip) do {} while (0)
+#define lock_acquired(lockdep_map, ip) do {} while (0)
+
+static inline void debug_show_all_locks(void)
+{
+}
+
+static inline void debug_show_held_locks(struct task_struct *task)
+{
+}
+
+static inline void
+debug_check_no_locks_freed(const void *from, unsigned long len)
+{
+}
+
+static inline void
+debug_check_no_locks_held(void)
+{
+}
+
+static inline int lock_class_is_held(struct lock_class_key *k)
+{
+ return 0;
+}
+
+#endif /* __TOOLS_LINUX_LOCKDEP_H */
+
diff --git a/c_src/include/linux/log2.h b/c_src/include/linux/log2.h
new file mode 100644
index 00000000..f031ea12
--- /dev/null
+++ b/c_src/include/linux/log2.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_LOG2_H
+#define _LINUX_LOG2_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ * more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+#ifndef CONFIG_ARCH_HAS_ILOG2_U32
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+ return fls(n) - 1;
+}
+#endif
+
+#ifndef CONFIG_ARCH_HAS_ILOG2_U64
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+ return fls64(n) - 1;
+}
+#endif
+
+/**
+ * is_power_of_2() - check if a value is a power of two
+ * @n: the value to check
+ *
+ * Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ * Return: true if @n is a power of 2, otherwise false.
+ */
+static inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/**
+ * __roundup_pow_of_two() - round up to nearest power of two
+ * @n: value to round up
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+ return 1UL << fls_long(n - 1);
+}
+
+/**
+ * __rounddown_pow_of_two() - round down to nearest power of two
+ * @n: value to round down
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+ return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
+ * @n: parameter
+ *
+ * Use this where sparse expects a true constant expression, e.g. for array
+ * indices.
+ */
+#define const_ilog2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n) < 2 ? 0 : \
+ (n) & (1ULL << 63) ? 63 : \
+ (n) & (1ULL << 62) ? 62 : \
+ (n) & (1ULL << 61) ? 61 : \
+ (n) & (1ULL << 60) ? 60 : \
+ (n) & (1ULL << 59) ? 59 : \
+ (n) & (1ULL << 58) ? 58 : \
+ (n) & (1ULL << 57) ? 57 : \
+ (n) & (1ULL << 56) ? 56 : \
+ (n) & (1ULL << 55) ? 55 : \
+ (n) & (1ULL << 54) ? 54 : \
+ (n) & (1ULL << 53) ? 53 : \
+ (n) & (1ULL << 52) ? 52 : \
+ (n) & (1ULL << 51) ? 51 : \
+ (n) & (1ULL << 50) ? 50 : \
+ (n) & (1ULL << 49) ? 49 : \
+ (n) & (1ULL << 48) ? 48 : \
+ (n) & (1ULL << 47) ? 47 : \
+ (n) & (1ULL << 46) ? 46 : \
+ (n) & (1ULL << 45) ? 45 : \
+ (n) & (1ULL << 44) ? 44 : \
+ (n) & (1ULL << 43) ? 43 : \
+ (n) & (1ULL << 42) ? 42 : \
+ (n) & (1ULL << 41) ? 41 : \
+ (n) & (1ULL << 40) ? 40 : \
+ (n) & (1ULL << 39) ? 39 : \
+ (n) & (1ULL << 38) ? 38 : \
+ (n) & (1ULL << 37) ? 37 : \
+ (n) & (1ULL << 36) ? 36 : \
+ (n) & (1ULL << 35) ? 35 : \
+ (n) & (1ULL << 34) ? 34 : \
+ (n) & (1ULL << 33) ? 33 : \
+ (n) & (1ULL << 32) ? 32 : \
+ (n) & (1ULL << 31) ? 31 : \
+ (n) & (1ULL << 30) ? 30 : \
+ (n) & (1ULL << 29) ? 29 : \
+ (n) & (1ULL << 28) ? 28 : \
+ (n) & (1ULL << 27) ? 27 : \
+ (n) & (1ULL << 26) ? 26 : \
+ (n) & (1ULL << 25) ? 25 : \
+ (n) & (1ULL << 24) ? 24 : \
+ (n) & (1ULL << 23) ? 23 : \
+ (n) & (1ULL << 22) ? 22 : \
+ (n) & (1ULL << 21) ? 21 : \
+ (n) & (1ULL << 20) ? 20 : \
+ (n) & (1ULL << 19) ? 19 : \
+ (n) & (1ULL << 18) ? 18 : \
+ (n) & (1ULL << 17) ? 17 : \
+ (n) & (1ULL << 16) ? 16 : \
+ (n) & (1ULL << 15) ? 15 : \
+ (n) & (1ULL << 14) ? 14 : \
+ (n) & (1ULL << 13) ? 13 : \
+ (n) & (1ULL << 12) ? 12 : \
+ (n) & (1ULL << 11) ? 11 : \
+ (n) & (1ULL << 10) ? 10 : \
+ (n) & (1ULL << 9) ? 9 : \
+ (n) & (1ULL << 8) ? 8 : \
+ (n) & (1ULL << 7) ? 7 : \
+ (n) & (1ULL << 6) ? 6 : \
+ (n) & (1ULL << 5) ? 5 : \
+ (n) & (1ULL << 4) ? 4 : \
+ (n) & (1ULL << 3) ? 3 : \
+ (n) & (1ULL << 2) ? 2 : \
+ 1) : \
+ -1)
+
+/**
+ * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
+ * @n: parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+ __builtin_constant_p(n) ? \
+ const_ilog2(n) : \
+ (sizeof(n) <= 4) ? \
+ __ilog2_u32(n) : \
+ __ilog2_u64(n) \
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n: parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n == 1) ? 1 : \
+ (1UL << (ilog2((n) - 1) + 1)) \
+ ) : \
+ __roundup_pow_of_two(n) \
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n: parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (1UL << ilog2(n))) : \
+ __rounddown_pow_of_two(n) \
+ )
+
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+ return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
+
+/**
+ * order_base_2 - calculate the (rounded up) base 2 order of the argument
+ * @n: parameter
+ *
+ * The first few values calculated by this routine:
+ * ob2(0) = 0
+ * ob2(1) = 0
+ * ob2(2) = 1
+ * ob2(3) = 2
+ * ob2(4) = 2
+ * ob2(5) = 3
+ * ... and so on.
+ */
+#define order_base_2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ ((n) == 0 || (n) == 1) ? 0 : \
+ ilog2((n) - 1) + 1) : \
+ __order_base_2(n) \
+)
+
+static inline __attribute__((const))
+int __bits_per(unsigned long n)
+{
+ if (n < 2)
+ return 1;
+ if (is_power_of_2(n))
+ return order_base_2(n) + 1;
+ return order_base_2(n);
+}
+
+/**
+ * bits_per - calculate the number of bits required for the argument
+ * @n: parameter
+ *
+ * This is constant-capable and can be used for compile time
+ * initializations, e.g bitfields.
+ *
+ * The first few values calculated by this routine:
+ * bf(0) = 1
+ * bf(1) = 1
+ * bf(2) = 2
+ * bf(3) = 2
+ * bf(4) = 3
+ * ... and so on.
+ */
+#define bits_per(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ ((n) == 0 || (n) == 1) \
+ ? 1 : ilog2(n) + 1 \
+ ) : \
+ __bits_per(n) \
+)
+
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory. This
+ * is on a logarithmic scale, where:
+ *
+ * 0 -> 2^0 * PAGE_SIZE and below
+ * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ * ...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ */
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+ if (__builtin_constant_p(size)) {
+ if (!size)
+ return BITS_PER_LONG - PAGE_SHIFT;
+
+ if (size < (1UL << PAGE_SHIFT))
+ return 0;
+
+ return ilog2((size) - 1) - PAGE_SHIFT + 1;
+ }
+
+ size--;
+ size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+ return fls(size);
+#else
+ return fls64(size);
+#endif
+}
+
+#endif /* _LINUX_LOG2_H */
diff --git a/c_src/include/linux/lz4.h b/c_src/include/linux/lz4.h
new file mode 100644
index 00000000..f574964a
--- /dev/null
+++ b/c_src/include/linux/lz4.h
@@ -0,0 +1,10 @@
+#include <lz4.h>
+
+#define LZ4_compress_destSize(src, dst, srclen, dstlen, workspace) \
+ LZ4_compress_destSize(src, dst, srclen, dstlen)
+
+#define LZ4_compress_HC(src, dst, srclen, dstlen, level, workspace) -1
+
+#define LZ4_MEM_COMPRESS 0
+#define LZ4HC_MEM_COMPRESS 0
+#define LZ4HC_MIN_CLEVEL 0
diff --git a/c_src/include/linux/math.h b/c_src/include/linux/math.h
new file mode 100644
index 00000000..85c8c8aa
--- /dev/null
+++ b/c_src/include/linux/math.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MATH_H
+#define _LINUX_MATH_H
+
+#include <linux/kernel.h>
+
+/* abs() */
+#include <stdlib.h>
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+
+/**
+ * round_up - round up to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round up to (must be a power of 2)
+ *
+ * Rounds @x up to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding up, use roundup() below.
+ */
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/**
+ * round_down - round down to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round down to (must be a power of 2)
+ *
+ * Rounds @x down to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding down, use rounddown() below.
+ */
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#define DIV_ROUND_DOWN_ULL(ll, d) \
+ ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
+
+#define DIV_ROUND_UP_ULL(ll, d) \
+ DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d))
+
+#if BITS_PER_LONG == 32
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d)
+#else
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
+#endif
+
+/**
+ * roundup - round up to the next specified multiple
+ * @x: the value to up
+ * @y: multiple to round up to
+ *
+ * Rounds @x up to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_up().
+ */
+#define roundup(x, y) ( \
+{ \
+ typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+/**
+ * rounddown - round down to next specified multiple
+ * @x: the value to round
+ * @y: multiple to round down to
+ *
+ * Rounds @x down to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_down().
+ */
+#define rounddown(x, y) ( \
+{ \
+ typeof(x) __x = (x); \
+ __x - (__x % (y)); \
+} \
+)
+
+/*
+ * Divide positive or negative dividend by positive or negative divisor
+ * and round to closest integer. Result is undefined for negative
+ * divisors if the dividend variable type is unsigned and for negative
+ * dividends if the divisor variable type is unsigned.
+ */
+#define DIV_ROUND_CLOSEST(x, divisor)( \
+{ \
+ typeof(x) __x = x; \
+ typeof(divisor) __d = divisor; \
+ (((typeof(x))-1) > 0 || \
+ ((typeof(divisor))-1) > 0 || \
+ (((__x) > 0) == ((__d) > 0))) ? \
+ (((__x) + ((__d) / 2)) / (__d)) : \
+ (((__x) - ((__d) / 2)) / (__d)); \
+} \
+)
+/*
+ * Same as above but for u64 dividends. divisor must be a 32-bit
+ * number.
+ */
+#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \
+{ \
+ typeof(divisor) __d = divisor; \
+ unsigned long long _tmp = (x) + (__d) / 2; \
+ do_div(_tmp, __d); \
+ _tmp; \
+} \
+)
+
+/*
+ * Multiplies an integer by a fraction, while avoiding unnecessary
+ * overflow or loss of precision.
+ */
+#define mult_frac(x, numer, denom)( \
+{ \
+ typeof(x) quot = (x) / (denom); \
+ typeof(x) rem = (x) % (denom); \
+ (quot * (numer)) + ((rem * (numer)) / (denom)); \
+} \
+)
+
+#define sector_div(a, b) do_div(a, b)
+
+/**
+ * reciprocal_scale - "scale" a value into range [0, ep_ro)
+ * @val: value
+ * @ep_ro: right open interval endpoint
+ *
+ * Perform a "reciprocal multiplication" in order to "scale" a value into
+ * range [0, @ep_ro), where the upper interval endpoint is right-open.
+ * This is useful, e.g. for accessing a index of an array containing
+ * @ep_ro elements, for example. Think of it as sort of modulus, only that
+ * the result isn't that of modulo. ;) Note that if initial input is a
+ * small value, then result will return 0.
+ *
+ * Return: a result based on @val in interval [0, @ep_ro).
+ */
+static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
+{
+ return (u32)(((u64) val * ep_ro) >> 32);
+}
+
+u64 int_pow(u64 base, unsigned int exp);
+unsigned long int_sqrt(unsigned long);
+
+#if BITS_PER_LONG < 64
+u32 int_sqrt64(u64 x);
+#else
+static inline u32 int_sqrt64(u64 x)
+{
+ return (u32)int_sqrt(x);
+}
+#endif
+
+#define abs(x) __abs_choose_expr(x, long long, \
+ __abs_choose_expr(x, long, \
+ __abs_choose_expr(x, int, \
+ __abs_choose_expr(x, short, \
+ __abs_choose_expr(x, char, \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(x), char), \
+ (char)({ signed char __x = (x); __x<0?-__x:__x; }), \
+ ((void)0)))))))
+
+#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(x), signed type) || \
+ __builtin_types_compatible_p(typeof(x), unsigned type), \
+ ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
+
+#endif /* _LINUX_MATH_H */
diff --git a/c_src/include/linux/math64.h b/c_src/include/linux/math64.h
new file mode 100644
index 00000000..5eb6f064
--- /dev/null
+++ b/c_src/include/linux/math64.h
@@ -0,0 +1,85 @@
+#ifndef _LINUX_MATH64_H
+#define _LINUX_MATH64_H
+
+#include <linux/types.h>
+
+#define do_div(n,base) ({ \
+ u32 __base = (base); \
+ u32 __rem; \
+ __rem = ((u64)(n)) % __base; \
+ (n) = ((u64)(n)) / __base; \
+ __rem; \
+ })
+
+#define div64_long(x, y) div64_s64((x), (y))
+#define div64_ul(x, y) div64_u64((x), (y))
+
+/**
+ * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
+ *
+ * This is commonly provided by 32bit archs to provide an optimized 64bit
+ * divide.
+ */
+static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+/**
+ * div_s64_rem - signed 64bit divide with 32bit divisor with remainder
+ */
+static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+/**
+ * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
+ */
+static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ */
+static inline u64 div64_u64(u64 dividend, u64 divisor)
+{
+ return dividend / divisor;
+}
+
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ */
+static inline s64 div64_s64(s64 dividend, s64 divisor)
+{
+ return dividend / divisor;
+}
+
+/**
+ * div_u64 - unsigned 64bit divide with 32bit divisor
+ *
+ * This is the most common 64bit divide and should be used if possible,
+ * as many 32bit archs can optimize this variant better than a full 64bit
+ * divide.
+ */
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+ return div_u64_rem(dividend, divisor, &remainder);
+}
+
+/**
+ * div_s64 - signed 64bit divide with 32bit divisor
+ */
+static inline s64 div_s64(s64 dividend, s32 divisor)
+{
+ s32 remainder;
+ return div_s64_rem(dividend, divisor, &remainder);
+}
+
+#endif /* _LINUX_MATH64_H */
diff --git a/c_src/include/linux/mempool.h b/c_src/include/linux/mempool.h
new file mode 100644
index 00000000..506da24d
--- /dev/null
+++ b/c_src/include/linux/mempool.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * memory buffer pool support
+ */
+#ifndef _LINUX_MEMPOOL_H
+#define _LINUX_MEMPOOL_H
+
+#include <linux/wait.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+
+struct kmem_cache;
+
+typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data);
+typedef void (mempool_free_t)(void *element, void *pool_data);
+
+typedef struct mempool_s {
+ spinlock_t lock;
+ int min_nr; /* nr of elements at *elements */
+ int curr_nr; /* Current nr of elements at *elements */
+ void **elements;
+
+ void *pool_data;
+ mempool_alloc_t *alloc;
+ mempool_free_t *free;
+ wait_queue_head_t wait;
+} mempool_t;
+
+static inline bool mempool_initialized(mempool_t *pool)
+{
+ return pool->elements != NULL;
+}
+
+void mempool_exit(mempool_t *pool);
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id);
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data);
+
+extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data);
+extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int nid);
+
+extern int mempool_resize(mempool_t *pool, int new_min_nr);
+extern void mempool_destroy(mempool_t *pool);
+extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc;
+extern void mempool_free(void *element, mempool_t *pool);
+
+/*
+ * A mempool_alloc_t and mempool_free_t that get the memory from
+ * a slab cache that is passed in through pool_data.
+ * Note: the slab cache may not have a ctor function.
+ */
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
+void mempool_free_slab(void *element, void *pool_data);
+
+static inline int
+mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc)
+{
+ return mempool_init(pool, min_nr, mempool_alloc_slab,
+ mempool_free_slab, (void *) kc);
+}
+
+static inline mempool_t *
+mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
+{
+ return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab,
+ (void *) kc);
+}
+
+/*
+ * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the
+ * amount of memory specified by pool_data
+ */
+void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
+void mempool_kfree(void *element, void *pool_data);
+
+static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+ return mempool_init(pool, min_nr, mempool_kmalloc,
+ mempool_kfree, (void *) size);
+}
+
+static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
+{
+ return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
+ (void *) size);
+}
+
+/*
+ * A mempool_alloc_t and mempool_free_t for a simple page allocator that
+ * allocates pages of the order specified by pool_data
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
+void mempool_free_pages(void *element, void *pool_data);
+
+static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
+{
+ return mempool_init(pool, min_nr, mempool_alloc_pages,
+ mempool_free_pages, (void *)(long)order);
+}
+
+static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
+{
+ return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages,
+ (void *)(long)order);
+}
+
+#endif /* _LINUX_MEMPOOL_H */
diff --git a/c_src/include/linux/minmax.h b/c_src/include/linux/minmax.h
new file mode 100644
index 00000000..ddc15bf7
--- /dev/null
+++ b/c_src/include/linux/minmax.h
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MINMAX_H
+#define _LINUX_MINMAX_H
+
+#include <linux/compiler.h>
+#include <linux/const.h>
+#include <linux/types.h>
+
+/*
+ * min()/max()/clamp() macros must accomplish three things:
+ *
+ * - Avoid multiple evaluations of the arguments (so side-effects like
+ * "x++" happen only once) when non-constant.
+ * - Retain result as a constant expressions when called with only
+ * constant expressions (to avoid tripping VLA warnings in stack
+ * allocation usage).
+ * - Perform signed v unsigned type-checking (to generate compile
+ * errors instead of nasty runtime surprises).
+ * - Unsigned char/short are always promoted to signed int and can be
+ * compared against signed or unsigned arguments.
+ * - Unsigned arguments can be compared against non-negative signed constants.
+ * - Comparison of a signed argument against an unsigned constant fails
+ * even if the constant is below __INT_MAX__ and could be cast to int.
+ */
+#define __typecheck(x, y) \
+ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
+
+/* is_signed_type() isn't a constexpr for pointer types */
+#define __is_signed(x) \
+ __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \
+ is_signed_type(typeof(x)), 0)
+
+/* True for a non-negative signed int constant */
+#define __is_noneg_int(x) \
+ (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0)
+
+#define __types_ok(x, y) \
+ (__is_signed(x) == __is_signed(y) || \
+ __is_signed((x) + 0) == __is_signed((y) + 0) || \
+ __is_noneg_int(x) || __is_noneg_int(y))
+
+#define __cmp_op_min <
+#define __cmp_op_max >
+
+#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y))
+
+#define __cmp_once(op, x, y, unique_x, unique_y) ({ \
+ typeof(x) unique_x = (x); \
+ typeof(y) unique_y = (y); \
+ static_assert(__types_ok(x, y), \
+ #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \
+ __cmp(op, unique_x, unique_y); })
+
+#define __careful_cmp(op, x, y) \
+ __builtin_choose_expr(__is_constexpr((x) - (y)), \
+ __cmp(op, x, y), \
+ __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y)))
+
+#define __clamp(val, lo, hi) \
+ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
+
+#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \
+ typeof(val) unique_val = (val); \
+ typeof(lo) unique_lo = (lo); \
+ typeof(hi) unique_hi = (hi); \
+ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \
+ (lo) <= (hi), true), \
+ "clamp() low limit " #lo " greater than high limit " #hi); \
+ static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \
+ static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \
+ __clamp(unique_val, unique_lo, unique_hi); })
+
+#define __careful_clamp(val, lo, hi) ({ \
+ __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \
+ __clamp(val, lo, hi), \
+ __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \
+ __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
+
+/**
+ * min - return minimum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
+#define min(x, y) __careful_cmp(min, x, y)
+
+/**
+ * max - return maximum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
+#define max(x, y) __careful_cmp(max, x, y)
+
+/**
+ * umin - return minimum of two non-negative values
+ * Signed types are zero extended to match a larger unsigned type.
+ * @x: first value
+ * @y: second value
+ */
+#define umin(x, y) \
+ __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull)
+
+/**
+ * umax - return maximum of two non-negative values
+ * @x: first value
+ * @y: second value
+ */
+#define umax(x, y) \
+ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull)
+
+/**
+ * min3 - return minimum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+
+/**
+ * max3 - return maximum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
+
+/**
+ * min_not_zero - return the minimum that is _not_ zero, unless both are zero
+ * @x: value1
+ * @y: value2
+ */
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
+/**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
+ *
+ * This macro does strict typechecking of @lo/@hi to make sure they are of the
+ * same type as @val. See the unnecessary pointer comparisons.
+ */
+#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
+
+/*
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max/clamp at all, of course.
+ */
+
+/**
+ * min_t - return minimum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
+#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y))
+
+/**
+ * max_t - return maximum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
+#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y))
+
+/*
+ * Do not check the array parameter using __must_be_array().
+ * In the following legit use-case where the "array" passed is a simple pointer,
+ * __must_be_array() will return a failure.
+ * --- 8< ---
+ * int *buff
+ * ...
+ * min = min_array(buff, nb_items);
+ * --- 8< ---
+ *
+ * The first typeof(&(array)[0]) is needed in order to support arrays of both
+ * 'int *buff' and 'int buff[N]' types.
+ *
+ * The array can be an array of const items.
+ * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order
+ * to discard the const qualifier for the __element variable.
+ */
+#define __minmax_array(op, array, len) ({ \
+ typeof(&(array)[0]) __array = (array); \
+ typeof(len) __len = (len); \
+ __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\
+ while (__len--) \
+ __element = op(__element, __array[__len]); \
+ __element; })
+
+/**
+ * min_array - return minimum of values present in an array
+ * @array: array
+ * @len: array length
+ *
+ * Note that @len must not be zero (empty array).
+ */
+#define min_array(array, len) __minmax_array(min, array, len)
+
+/**
+ * max_array - return maximum of values present in an array
+ * @array: array
+ * @len: array length
+ *
+ * Note that @len must not be zero (empty array).
+ */
+#define max_array(array, len) __minmax_array(max, array, len)
+
+/**
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * @type to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
+
+/**
+ * clamp_val - return a value clamped to a given range using val's type
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of whatever
+ * type the input argument @val is. This is useful when @val is an unsigned
+ * type and @lo and @hi are literals that will otherwise be assigned a signed
+ * integer type.
+ */
+#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
+
+static inline bool in_range64(u64 val, u64 start, u64 len)
+{
+ return (val - start) < len;
+}
+
+static inline bool in_range32(u32 val, u32 start, u32 len)
+{
+ return (val - start) < len;
+}
+
+/**
+ * in_range - Determine if a value lies within a range.
+ * @val: Value to test.
+ * @start: First value in range.
+ * @len: Number of values in range.
+ *
+ * This is more efficient than "if (start <= val && val < (start + len))".
+ * It also gives a different answer if @start + @len overflows the size of
+ * the type by a sufficient amount to encompass @val. Decide for yourself
+ * which behaviour you want, or prove that start + len never overflow.
+ * Do not blindly replace one form with the other.
+ */
+#define in_range(val, start, len) \
+ ((sizeof(start) | sizeof(len) | sizeof(val)) <= sizeof(u32) ? \
+ in_range32(val, start, len) : in_range64(val, start, len))
+
+/**
+ * swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
+ */
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#endif /* _LINUX_MINMAX_H */
diff --git a/c_src/include/linux/mm.h b/c_src/include/linux/mm.h
new file mode 100644
index 00000000..744a14ce
--- /dev/null
+++ b/c_src/include/linux/mm.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_MM_H
+#define _TOOLS_LINUX_MM_H
+
+#include <sys/syscall.h>
+#include <linux/types.h>
+
+struct sysinfo {
+ long uptime; /* Seconds since boot */
+ unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
+ unsigned long totalram; /* Total usable main memory size */
+ unsigned long freeram; /* Available memory size */
+ unsigned long sharedram; /* Amount of shared memory */
+ unsigned long bufferram; /* Memory used by buffers */
+ unsigned long totalswap; /* Total swap space size */
+ unsigned long freeswap; /* swap space still available */
+ __u16 procs; /* Number of current processes */
+ __u16 pad; /* Explicit padding for m68k */
+ unsigned long totalhigh; /* Total high memory size */
+ unsigned long freehigh; /* Available high memory size */
+ __u32 mem_unit; /* Memory unit size in bytes */
+};
+
+
+
+static inline void si_meminfo(struct sysinfo *val)
+{
+ BUG_ON(syscall(SYS_sysinfo, val));
+}
+
+#endif /* _TOOLS_LINUX_MM_H */
diff --git a/c_src/include/linux/module.h b/c_src/include/linux/module.h
new file mode 100644
index 00000000..42d4e18a
--- /dev/null
+++ b/c_src/include/linux/module.h
@@ -0,0 +1,48 @@
+#ifndef _LINUX_MODULE_H
+#define _LINUX_MODULE_H
+
+#include <linux/stat.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+
+struct module;
+
+#define module_init(initfn) \
+ __attribute__((constructor(120))) \
+ static void __call_##initfn(void) { BUG_ON(initfn()); }
+
+#if 0
+#define module_exit(exitfn) \
+ __attribute__((destructor(109))) \
+ static void __call_##exitfn(void) { exitfn(); }
+#endif
+
+#define module_exit(exitfn) \
+ __attribute__((unused)) \
+ static void __call_##exitfn(void) { exitfn(); }
+
+#define MODULE_INFO(tag, info)
+#define MODULE_ALIAS(_alias)
+#define MODULE_SOFTDEP(_softdep)
+#define MODULE_LICENSE(_license)
+#define MODULE_AUTHOR(_author)
+#define MODULE_DESCRIPTION(_description)
+#define MODULE_VERSION(_version)
+
+static inline void __module_get(struct module *module)
+{
+}
+
+static inline int try_module_get(struct module *module)
+{
+ return 1;
+}
+
+static inline void module_put(struct module *module)
+{
+}
+
+#define module_param_named(name, value, type, perm)
+#define MODULE_PARM_DESC(_parm, desc)
+
+#endif /* _LINUX_MODULE_H */
diff --git a/c_src/include/linux/mutex.h b/c_src/include/linux/mutex.h
new file mode 100644
index 00000000..801f06e1
--- /dev/null
+++ b/c_src/include/linux/mutex.h
@@ -0,0 +1,18 @@
+#ifndef __TOOLS_LINUX_MUTEX_H
+#define __TOOLS_LINUX_MUTEX_H
+
+#include <pthread.h>
+
+struct mutex {
+ pthread_mutex_t lock;
+};
+
+#define DEFINE_MUTEX(mutexname) \
+ struct mutex mutexname = { .lock = PTHREAD_MUTEX_INITIALIZER }
+
+#define mutex_init(l) pthread_mutex_init(&(l)->lock, NULL)
+#define mutex_lock(l) pthread_mutex_lock(&(l)->lock)
+#define mutex_trylock(l) (!pthread_mutex_trylock(&(l)->lock))
+#define mutex_unlock(l) pthread_mutex_unlock(&(l)->lock)
+
+#endif /* __TOOLS_LINUX_MUTEX_H */
diff --git a/c_src/include/linux/osq_lock.h b/c_src/include/linux/osq_lock.h
new file mode 100644
index 00000000..bde9f0d2
--- /dev/null
+++ b/c_src/include/linux/osq_lock.h
@@ -0,0 +1,44 @@
+#ifndef __LINUX_OSQ_LOCK_H
+#define __LINUX_OSQ_LOCK_H
+
+/*
+ * An MCS like lock especially tailored for optimistic spinning for sleeping
+ * lock implementations (mutex, rwsem, etc).
+ */
+struct optimistic_spin_node {
+ struct optimistic_spin_node *next, *prev;
+ int locked; /* 1 if lock acquired */
+ int cpu; /* encoded CPU # + 1 value */
+};
+
+struct optimistic_spin_queue {
+ /*
+ * Stores an encoded value of the CPU # of the tail node in the queue.
+ * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL.
+ */
+ atomic_t tail;
+};
+
+#define OSQ_UNLOCKED_VAL (0)
+
+/* Init macro and function. */
+#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
+
+static inline void osq_lock_init(struct optimistic_spin_queue *lock)
+{
+ atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
+}
+
+static inline bool osq_lock(struct optimistic_spin_queue *lock)
+{
+ return false;
+}
+
+static inline void osq_unlock(struct optimistic_spin_queue *lock) {}
+
+static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
+{
+ return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
+}
+
+#endif
diff --git a/c_src/include/linux/overflow.h b/c_src/include/linux/overflow.h
new file mode 100644
index 00000000..ba30f77e
--- /dev/null
+++ b/c_src/include/linux/overflow.h
@@ -0,0 +1,345 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+#include <linux/limits.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+/*
+ * Avoids triggering -Wtype-limits compilation warning,
+ * while using unsigned data types to check a < 0.
+ */
+#define is_non_negative(a) ((a) > 0 || (a) == 0)
+#define is_negative(a) (!(is_non_negative(a)))
+
+/*
+ * Allows for effectively applying __must_check to a macro so we can have
+ * both the type-agnostic benefits of the macros while also being able to
+ * enforce that the return value is, in fact, checked.
+ */
+static inline bool __must_check __must_check_overflow(bool overflow)
+{
+ return unlikely(overflow);
+}
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) __must_check_overflow(({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_add_overflow(__a, __b, __d); \
+}))
+
+#define check_sub_overflow(a, b, d) __must_check_overflow(({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_sub_overflow(__a, __b, __d); \
+}))
+
+#define check_mul_overflow(a, b, d) __must_check_overflow(({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_mul_overflow(__a, __b, __d); \
+}))
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a + __b; \
+ *__d < __a; \
+})
+#define __unsigned_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a - __b; \
+ __a < __b; \
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a * __b; \
+ __builtin_constant_p(__b) ? \
+ __b > 0 && __a > type_max(typeof(__a)) / __b : \
+ __a > 0 && __b > type_max(typeof(__b)) / __a; \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a + (u64)__b; \
+ (((~(__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a - (u64)__b; \
+ ((((__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ typeof(a) __tmax = type_max(typeof(a)); \
+ typeof(a) __tmin = type_min(typeof(a)); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a * (u64)__b; \
+ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
+ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
+ (__b == (typeof(__b))-1 && __a == __tmin); \
+})
+
+
+#define check_add_overflow(a, b, d) __must_check_overflow( \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_add_overflow(a, b, d), \
+ __unsigned_add_overflow(a, b, d)))
+
+#define check_sub_overflow(a, b, d) __must_check_overflow( \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_sub_overflow(a, b, d), \
+ __unsigned_sub_overflow(a, b, d)))
+
+#define check_mul_overflow(a, b, d) __must_check_overflow( \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_mul_overflow(a, b, d), \
+ __unsigned_mul_overflow(a, b, d)))
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+ *
+ * @a: Value to be shifted
+ * @s: How many bits left to shift
+ * @d: Pointer to where to store the result
+ *
+ * Computes *@d = (@a << @s)
+ *
+ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * make sense. Example conditions:
+ * - 'a << s' causes bits to be lost when stored in *d.
+ * - 's' is garbage (e.g. negative) or so large that the result of
+ * 'a << s' is guaranteed to be 0.
+ * - 'a' is negative.
+ * - 'a << s' sets the sign bit, if any, in '*d'.
+ *
+ * '*d' will hold the results of the attempted shift, but is not
+ * considered "safe for use" if false is returned.
+ */
+#define check_shl_overflow(a, s, d) __must_check_overflow(({ \
+ typeof(a) _a = a; \
+ typeof(s) _s = s; \
+ typeof(d) _d = d; \
+ u64 _a_full = _a; \
+ unsigned int _to_shift = \
+ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
+ *_d = (_a_full << _to_shift); \
+ (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \
+ (*_d >> _to_shift) != _a); \
+}))
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+ if (check_mul_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/*
+ * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
+ * struct_size() below.
+ */
+static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+ if (check_add_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @count number of @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, count) \
+ __ab_c_size(count, \
+ sizeof(*(p)->member) + __must_be_array((p)->member),\
+ sizeof(*(p)))
+
+/**
+ * flex_array_size() - Calculate size of a flexible array member
+ * within an enclosing structure.
+ *
+ * @p: Pointer to the structure.
+ * @member: Name of the flexible array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of a flexible array of @count number of @member
+ * elements, at the end of structure @p.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define flex_array_size(p, member, count) \
+ array_size(count, \
+ sizeof(*(p)->member) + __must_be_array((p)->member))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/c_src/include/linux/page.h b/c_src/include/linux/page.h
new file mode 100644
index 00000000..111e5e68
--- /dev/null
+++ b/c_src/include/linux/page.h
@@ -0,0 +1,38 @@
+#ifndef _LINUX_PAGE_H
+#define _LINUX_PAGE_H
+
+#include <sys/user.h>
+
+struct page;
+
+#ifndef PAGE_SIZE
+
+#define PAGE_SIZE 4096UL
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#endif
+
+#ifndef PAGE_SHIFT
+#define PAGE_SHIFT 12
+#endif
+
+
+#define virt_to_page(p) \
+ ((struct page *) (((unsigned long) (p)) & PAGE_MASK))
+#define offset_in_page(p) ((unsigned long) (p) & ~PAGE_MASK)
+
+#define page_address(p) ((void *) (p))
+
+#define kmap_atomic(page) page_address(page)
+#define kunmap_atomic(addr) do {} while (0)
+
+#define kmap_local_page(page) page_address(page)
+#define kunmap_local(addr) do {} while (0)
+
+#define PageHighMem(page) false
+
+static const char zero_page[PAGE_SIZE];
+
+#define ZERO_PAGE(o) ((struct page *) &zero_page[0])
+
+#endif /* _LINUX_PAGE_H */
diff --git a/c_src/include/linux/percpu-refcount.h b/c_src/include/linux/percpu-refcount.h
new file mode 100644
index 00000000..06550564
--- /dev/null
+++ b/c_src/include/linux/percpu-refcount.h
@@ -0,0 +1,180 @@
+#ifndef __TOOLS_LINUX_PERCPU_REFCOUNT_H
+#define __TOOLS_LINUX_PERCPU_REFCOUNT_H
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+
+struct percpu_ref;
+typedef void (percpu_ref_func_t)(struct percpu_ref *);
+
+/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+enum {
+ __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */
+ __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */
+ __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+
+ __PERCPU_REF_FLAG_BITS = 2,
+};
+
+/* @flags for percpu_ref_init() */
+enum {
+ PERCPU_REF_INIT_ATOMIC = 1 << 0,
+
+ /*
+ * Start dead w/ ref == 0 in atomic mode. Must be revived with
+ * percpu_ref_reinit() before used. Implies INIT_ATOMIC.
+ */
+ PERCPU_REF_INIT_DEAD = 1 << 1,
+};
+
+struct percpu_ref {
+ atomic_long_t count;
+ percpu_ref_func_t *release;
+ percpu_ref_func_t *confirm_switch;
+};
+
+static inline void percpu_ref_exit(struct percpu_ref *ref) {}
+
+static inline int __must_check percpu_ref_init(struct percpu_ref *ref,
+ percpu_ref_func_t *release, unsigned int flags,
+ gfp_t gfp)
+{
+ unsigned long start_count = 0;
+
+ if (!(flags & PERCPU_REF_INIT_DEAD))
+ start_count++;
+
+ atomic_long_set(&ref->count, start_count);
+
+ ref->release = release;
+ return 0;
+}
+
+/**
+ * percpu_ref_get_many - increment a percpu refcount
+ * @ref: percpu_ref to get
+ * @nr: number of references to get
+ *
+ * Analogous to atomic_long_add().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
+{
+ atomic_long_add(nr, &ref->count);
+}
+
+/**
+ * percpu_ref_get - increment a percpu refcount
+ * @ref: percpu_ref to get
+ *
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get(struct percpu_ref *ref)
+{
+ percpu_ref_get_many(ref, 1);
+}
+
+/**
+ * percpu_ref_tryget - try to increment a percpu refcount
+ * @ref: percpu_ref to try-get
+ *
+ * Increment a percpu refcount unless its count already reached zero.
+ * Returns %true on success; %false on failure.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+{
+ return atomic_long_inc_not_zero(&ref->count);
+}
+
+/**
+ * percpu_ref_tryget_live - try to increment a live percpu refcount
+ * @ref: percpu_ref to try-get
+ *
+ * Increment a percpu refcount unless it has already been killed. Returns
+ * %true on success; %false on failure.
+ *
+ * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+ * function will fail. For such guarantee, percpu_ref_kill_and_confirm()
+ * should be used. After the confirm_kill callback is invoked, it's
+ * guaranteed that no new reference will be given out by
+ * percpu_ref_tryget_live().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
+{
+ return atomic_long_inc_not_zero(&ref->count);
+}
+
+/**
+ * percpu_ref_put_many - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ * @nr: number of references to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
+{
+ if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
+ ref->release(ref);
+}
+
+/**
+ * percpu_ref_put - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put(struct percpu_ref *ref)
+{
+ percpu_ref_put_many(ref, 1);
+}
+
+static inline void percpu_ref_reinit(struct percpu_ref *ref)
+{
+ percpu_ref_get(ref);
+}
+
+/**
+ * percpu_ref_kill - drop the initial ref
+ * @ref: percpu_ref to kill
+ *
+ * Must be used to drop the initial ref on a percpu refcount; must be called
+ * precisely once before shutdown.
+ */
+static inline void percpu_ref_kill(struct percpu_ref *ref)
+{
+ percpu_ref_put(ref);
+}
+
+/**
+ * percpu_ref_is_zero - test whether a percpu refcount reached zero
+ * @ref: percpu_ref to test
+ *
+ * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
+{
+ return !atomic_long_read(&ref->count);
+}
+
+static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
+{
+ return percpu_ref_is_zero(ref);
+}
+
+#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */
diff --git a/c_src/include/linux/percpu-rwsem.h b/c_src/include/linux/percpu-rwsem.h
new file mode 100644
index 00000000..153251c0
--- /dev/null
+++ b/c_src/include/linux/percpu-rwsem.h
@@ -0,0 +1,58 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PERCPU_RWSEM_H
+#define _LINUX_PERCPU_RWSEM_H
+
+#include <pthread.h>
+#include <linux/preempt.h>
+
+struct percpu_rw_semaphore {
+ pthread_mutex_t lock;
+};
+
+static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_lock(&sem->lock);
+}
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_lock(&sem->lock);
+}
+
+static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+{
+ return !pthread_mutex_trylock(&sem->lock);
+}
+
+static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_unlock(&sem->lock);
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_unlock(&sem->lock);
+}
+
+static inline void percpu_down_write(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_lock(&sem->lock);
+}
+
+static inline void percpu_up_write(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_unlock(&sem->lock);
+}
+
+static inline void percpu_free_rwsem(struct percpu_rw_semaphore *sem) {}
+
+static inline int percpu_init_rwsem(struct percpu_rw_semaphore *sem)
+{
+ pthread_mutex_init(&sem->lock, NULL);
+ return 0;
+}
+
+#define percpu_rwsem_assert_held(sem) do {} while (0)
+
+#endif
diff --git a/c_src/include/linux/percpu.h b/c_src/include/linux/percpu.h
new file mode 100644
index 00000000..740d8332
--- /dev/null
+++ b/c_src/include/linux/percpu.h
@@ -0,0 +1,191 @@
+#ifndef __TOOLS_LINUX_PERCPU_H
+#define __TOOLS_LINUX_PERCPU_H
+
+#include <linux/cpumask.h>
+
+#define __percpu
+
+#define free_percpu(percpu) free(percpu)
+
+#define __alloc_percpu_gfp(size, align, gfp) calloc(1, size)
+#define __alloc_percpu(size, align) calloc(1, size)
+
+#define alloc_percpu_gfp(type, gfp) \
+ (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \
+ __alignof__(type), gfp)
+#define alloc_percpu(type) \
+ (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \
+ __alignof__(type))
+
+#define __verify_pcpu_ptr(ptr)
+
+#define per_cpu_ptr(ptr, cpu) (ptr)
+#define raw_cpu_ptr(ptr) (ptr)
+#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
+
+#define __pcpu_size_call_return(stem, variable) \
+({ \
+ typeof(variable) pscr_ret__; \
+ __verify_pcpu_ptr(&(variable)); \
+ switch(sizeof(variable)) { \
+ case 1: pscr_ret__ = stem##1(variable); break; \
+ case 2: pscr_ret__ = stem##2(variable); break; \
+ case 4: pscr_ret__ = stem##4(variable); break; \
+ case 8: pscr_ret__ = stem##8(variable); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pscr_ret__; \
+})
+
+#define __pcpu_size_call_return2(stem, variable, ...) \
+({ \
+ typeof(variable) pscr2_ret__; \
+ __verify_pcpu_ptr(&(variable)); \
+ switch(sizeof(variable)) { \
+ case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
+ case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \
+ case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \
+ case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pscr2_ret__; \
+})
+
+/*
+ * Special handling for cmpxchg_double. cmpxchg_double is passed two
+ * percpu variables. The first has to be aligned to a double word
+ * boundary and the second has to follow directly thereafter.
+ * We enforce this on all architectures even if they don't support
+ * a double cmpxchg instruction, since it's a cheap requirement, and it
+ * avoids breaking the requirement for architectures with the instruction.
+ */
+#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \
+({ \
+ bool pdcrb_ret__; \
+ __verify_pcpu_ptr(&(pcp1)); \
+ BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \
+ VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \
+ VM_BUG_ON((unsigned long)(&(pcp2)) != \
+ (unsigned long)(&(pcp1)) + sizeof(pcp1)); \
+ switch(sizeof(pcp1)) { \
+ case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \
+ case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \
+ case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \
+ case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pdcrb_ret__; \
+})
+
+#define __pcpu_size_call(stem, variable, ...) \
+do { \
+ __verify_pcpu_ptr(&(variable)); \
+ switch(sizeof(variable)) { \
+ case 1: stem##1(variable, __VA_ARGS__);break; \
+ case 2: stem##2(variable, __VA_ARGS__);break; \
+ case 4: stem##4(variable, __VA_ARGS__);break; \
+ case 8: stem##8(variable, __VA_ARGS__);break; \
+ default: \
+ __bad_size_call_parameter();break; \
+ } \
+} while (0)
+
+#define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp)
+#define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val)
+#define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, pcp, val)
+#define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, pcp, val)
+#define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, pcp, val)
+#define raw_cpu_add_return(pcp, val) __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
+#define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
+#define raw_cpu_cmpxchg(pcp, oval, nval) \
+ __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
+
+#define raw_cpu_sub(pcp, val) raw_cpu_add(pcp, -(val))
+#define raw_cpu_inc(pcp) raw_cpu_add(pcp, 1)
+#define raw_cpu_dec(pcp) raw_cpu_sub(pcp, 1)
+#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1)
+#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1)
+
+#define __this_cpu_read(pcp) \
+({ \
+ raw_cpu_read(pcp); \
+})
+
+#define __this_cpu_write(pcp, val) \
+({ \
+ raw_cpu_write(pcp, val); \
+})
+
+#define __this_cpu_add(pcp, val) \
+({ \
+ raw_cpu_add(pcp, val); \
+})
+
+#define __this_cpu_and(pcp, val) \
+({ \
+ raw_cpu_and(pcp, val); \
+})
+
+#define __this_cpu_or(pcp, val) \
+({ \
+ raw_cpu_or(pcp, val); \
+})
+
+#define __this_cpu_add_return(pcp, val) \
+({ \
+ raw_cpu_add_return(pcp, val); \
+})
+
+#define __this_cpu_xchg(pcp, nval) \
+({ \
+ raw_cpu_xchg(pcp, nval); \
+})
+
+#define __this_cpu_cmpxchg(pcp, oval, nval) \
+({ \
+ raw_cpu_cmpxchg(pcp, oval, nval); \
+})
+
+#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2); \
+})
+
+#define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1)
+
+#define this_cpu_read(pcp) ((pcp))
+#define this_cpu_write(pcp, val) ((pcp) = val)
+#define this_cpu_add(pcp, val) ((pcp) += val)
+#define this_cpu_and(pcp, val) ((pcp) &= val)
+#define this_cpu_or(pcp, val) ((pcp) |= val)
+#define this_cpu_add_return(pcp, val) ((pcp) += val)
+#define this_cpu_xchg(pcp, nval) \
+({ \
+ typeof(pcp) _r = (pcp); \
+ (pcp) = (nval); \
+ _r; \
+})
+
+#define this_cpu_cmpxchg(pcp, oval, nval) \
+ __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
+
+#define this_cpu_sub(pcp, val) this_cpu_add(pcp, -(typeof(pcp))(val))
+#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
+#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1)
+#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
+
+#endif /* __TOOLS_LINUX_PERCPU_H */
diff --git a/c_src/include/linux/poison.h b/c_src/include/linux/poison.h
new file mode 100644
index 00000000..851a855d
--- /dev/null
+++ b/c_src/include/linux/poison.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_POISON_H
+#define _LINUX_POISON_H
+
+/********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA)
+
+/********** include/linux/timer.h **********/
+#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
+
+/********** mm/page_poison.c **********/
+#define PAGE_POISON 0xaa
+
+/********** mm/page_alloc.c ************/
+
+#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA)
+
+/********** mm/slab.c **********/
+/*
+ * Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
+#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
+
+#define SLUB_RED_INACTIVE 0xbb
+#define SLUB_RED_ACTIVE 0xcc
+
+/* ...and for poisoning */
+#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
+#define POISON_FREE 0x6b /* for use-after-free poisoning */
+#define POISON_END 0xa5 /* end-byte of poisoning */
+
+/********** arch/$ARCH/mm/init.c **********/
+#define POISON_FREE_INITMEM 0xcc
+
+/********** arch/ia64/hp/common/sba_iommu.c **********/
+/*
+ * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
+ * value of "SBAIOMMU POISON\0" for spill-over poisoning.
+ */
+
+/********** fs/jbd/journal.c **********/
+#define JBD_POISON_FREE 0x5b
+#define JBD2_POISON_FREE 0x5c
+
+/********** drivers/base/dmapool.c **********/
+#define POOL_POISON_FREED 0xa7 /* !inuse */
+#define POOL_POISON_ALLOCATED 0xa9 /* !initted */
+
+/********** drivers/atm/ **********/
+#define ATM_POISON_FREE 0x12
+#define ATM_POISON 0xdeadbeef
+
+/********** kernel/mutexes **********/
+#define MUTEX_DEBUG_INIT 0x11
+#define MUTEX_DEBUG_FREE 0x22
+#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA)
+
+/********** security/ **********/
+#define KEY_DESTROY 0xbd
+
+/********** net/core/page_pool.c **********/
+#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
+
+/********** net/core/skbuff.c **********/
+#define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA))
+
+/********** kernel/bpf/ **********/
+#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA))
+
+/********** VFS **********/
+#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
+
+#endif
diff --git a/c_src/include/linux/posix_acl.h b/c_src/include/linux/posix_acl.h
new file mode 100644
index 00000000..1d21bfee
--- /dev/null
+++ b/c_src/include/linux/posix_acl.h
@@ -0,0 +1,49 @@
+/*
+ File: linux/posix_acl.h
+
+ (C) 2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+
+#ifndef __LINUX_POSIX_ACL_H
+#define __LINUX_POSIX_ACL_H
+
+#include <linux/bug.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+
+#define ACL_UNDEFINED_ID (-1)
+
+/* a_type field in acl_user_posix_entry_t */
+#define ACL_TYPE_ACCESS (0x8000)
+#define ACL_TYPE_DEFAULT (0x4000)
+
+/* e_tag entry in struct posix_acl_entry */
+#define ACL_USER_OBJ (0x01)
+#define ACL_USER (0x02)
+#define ACL_GROUP_OBJ (0x04)
+#define ACL_GROUP (0x08)
+#define ACL_MASK (0x10)
+#define ACL_OTHER (0x20)
+
+/* permissions in the e_perm field */
+#define ACL_READ (0x04)
+#define ACL_WRITE (0x02)
+#define ACL_EXECUTE (0x01)
+
+struct posix_acl_entry {
+ short e_tag;
+ unsigned short e_perm;
+ union {
+ uid_t e_uid;
+ gid_t e_gid;
+ };
+};
+
+struct posix_acl {
+ struct rcu_head a_rcu;
+ unsigned int a_count;
+ struct posix_acl_entry a_entries[0];
+};
+
+#endif /* __LINUX_POSIX_ACL_H */
diff --git a/c_src/include/linux/posix_acl_xattr.h b/c_src/include/linux/posix_acl_xattr.h
new file mode 100644
index 00000000..a8dad160
--- /dev/null
+++ b/c_src/include/linux/posix_acl_xattr.h
@@ -0,0 +1,34 @@
+/*
+ File: linux/posix_acl_xattr.h
+
+ Extended attribute system call representation of Access Control Lists.
+
+ Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org>
+ Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
+ */
+#ifndef _POSIX_ACL_XATTR_H
+#define _POSIX_ACL_XATTR_H
+
+#include <uapi/linux/xattr.h>
+
+/* Supported ACL a_version fields */
+#define POSIX_ACL_XATTR_VERSION 0x0002
+
+/* An undefined entry e_id value */
+#define ACL_UNDEFINED_ID (-1)
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} posix_acl_xattr_entry;
+
+typedef struct {
+ __le32 a_version;
+ posix_acl_xattr_entry a_entries[0];
+} posix_acl_xattr_header;
+
+extern const struct xattr_handler nop_posix_acl_access;
+extern const struct xattr_handler nop_posix_acl_default;
+
+#endif /* _POSIX_ACL_XATTR_H */
diff --git a/c_src/include/linux/prandom.h b/c_src/include/linux/prandom.h
new file mode 100644
index 00000000..9aea22dc
--- /dev/null
+++ b/c_src/include/linux/prandom.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_PRANDOM_H
+#define _LINUX_PRANDOM_H
+
+#include <linux/random.h>
+
+static inline void prandom_bytes(void *buf, int nbytes)
+{
+ return get_random_bytes(buf, nbytes);
+}
+
+#define prandom_type(type) \
+static inline type prandom_##type(void) \
+{ \
+ type v; \
+ \
+ prandom_bytes(&v, sizeof(v)); \
+ return v; \
+}
+
+prandom_type(int);
+prandom_type(long);
+prandom_type(u32);
+prandom_type(u64);
+#undef prandom_type
+
+static inline u32 prandom_u32_max(u32 max)
+{
+ return prandom_u32() % max;
+
+}
+
+#endif /* _LINUX_PRANDOM_H */
+
diff --git a/c_src/include/linux/preempt.h b/c_src/include/linux/preempt.h
new file mode 100644
index 00000000..dbc7c24d
--- /dev/null
+++ b/c_src/include/linux/preempt.h
@@ -0,0 +1,16 @@
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
+extern void preempt_disable(void);
+extern void preempt_enable(void);
+
+#define sched_preempt_enable_no_resched() preempt_enable()
+#define preempt_enable_no_resched() preempt_enable()
+#define preempt_check_resched() do { } while (0)
+
+#define preempt_disable_notrace() preempt_disable()
+#define preempt_enable_no_resched_notrace() preempt_enable()
+#define preempt_enable_notrace() preempt_enable()
+#define preemptible() 0
+
+#endif /* __LINUX_PREEMPT_H */
diff --git a/c_src/include/linux/prefetch.h b/c_src/include/linux/prefetch.h
new file mode 100644
index 00000000..b14fbe93
--- /dev/null
+++ b/c_src/include/linux/prefetch.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_PREFETCH_H
+#define _LINUX_PREFETCH_H
+
+#define prefetch(p) \
+ ({ __maybe_unused typeof(p) __var = (p); })
+
+#define prefetchw(p) \
+ ({ __maybe_unused typeof(p) __var = (p); })
+
+#endif /* _LINUX_PREFETCH_H */
diff --git a/c_src/include/linux/pretty-printers.h b/c_src/include/linux/pretty-printers.h
new file mode 100644
index 00000000..f39d8edf
--- /dev/null
+++ b/c_src/include/linux/pretty-printers.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022 Kent Overstreet */
+
+#ifndef _LINUX_PRETTY_PRINTERS_H
+#define _LINUX_PRETTY_PRINTERS_H
+
+void prt_string_option(struct printbuf *, const char * const[], size_t);
+void prt_bitflags(struct printbuf *, const char * const[], u64);
+
+#endif /* _LINUX_PRETTY_PRINTERS_H */
diff --git a/c_src/include/linux/printk.h b/c_src/include/linux/printk.h
new file mode 100644
index 00000000..cdafb9af
--- /dev/null
+++ b/c_src/include/linux/printk.h
@@ -0,0 +1,207 @@
+#ifndef __TOOLS_LINUX_PRINTK_H
+#define __TOOLS_LINUX_PRINTK_H
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#include <linux/compiler.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define KERN_EMERG ""
+#define KERN_ALERT ""
+#define KERN_CRIT ""
+#define KERN_ERR ""
+#define KERN_WARNING ""
+#define KERN_NOTICE ""
+#define KERN_INFO ""
+#define KERN_DEBUG ""
+#define KERN_DEFAULT ""
+#define KERN_CONT ""
+#define KERN_SOH "\001"
+
+static inline int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int i;
+
+ i = vsnprintf(buf, size, fmt, args);
+
+ if (likely(i < size))
+ return i;
+ if (size != 0)
+ return size - 1;
+ return 0;
+}
+
+static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vscnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return i;
+}
+
+#define printk(...) printf(__VA_ARGS__)
+#define vprintk(...) vprintf(__VA_ARGS__)
+
+#define no_printk(fmt, ...) \
+({ \
+ do { \
+ if (0) \
+ printk(fmt, ##__VA_ARGS__); \
+ } while (0); \
+ 0; \
+})
+
+#define pr_emerg(fmt, ...) \
+ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+/*
+ * Like KERN_CONT, pr_cont() should only be used when continuing
+ * a line with no newline ('\n') enclosed. Otherwise it defaults
+ * back to KERN_DEFAULT.
+ */
+#define pr_cont(fmt, ...) \
+ printk(KERN_CONT fmt, ##__VA_ARGS__)
+
+/* pr_devel() should produce zero code unless DEBUG is defined */
+#ifdef DEBUG
+#define pr_devel(fmt, ...) \
+ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#include <linux/dynamic_debug.h>
+
+/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
+#define pr_debug(fmt, ...) \
+ dynamic_pr_debug(fmt, ##__VA_ARGS__)
+#elif defined(DEBUG)
+#define pr_debug(fmt, ...) \
+ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+
+#define printk_once(fmt, ...) \
+({ \
+ static bool __print_once __read_mostly; \
+ bool __ret_print_once = !__print_once; \
+ \
+ if (!__print_once) { \
+ __print_once = true; \
+ printk(fmt, ##__VA_ARGS__); \
+ } \
+ unlikely(__ret_print_once); \
+})
+#define printk_deferred_once(fmt, ...) \
+({ \
+ static bool __print_once __read_mostly; \
+ bool __ret_print_once = !__print_once; \
+ \
+ if (!__print_once) { \
+ __print_once = true; \
+ printk_deferred(fmt, ##__VA_ARGS__); \
+ } \
+ unlikely(__ret_print_once); \
+})
+
+#define pr_emerg_once(fmt, ...) \
+ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_once(fmt, ...) \
+ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_once(fmt, ...) \
+ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_once(fmt, ...) \
+ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn_once(fmt, ...) \
+ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_notice_once(fmt, ...) \
+ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_once(fmt, ...) \
+ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_cont_once(fmt, ...) \
+ printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+
+#if defined(DEBUG)
+#define pr_devel_once(fmt, ...) \
+ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel_once(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/* If you are writing a driver, please use dev_dbg instead */
+#if defined(DEBUG)
+#define pr_debug_once(fmt, ...) \
+ printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_debug_once(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+/*
+ * ratelimited messages with local ratelimit_state,
+ * no local ratelimit_state used in the !PRINTK case
+ */
+#define printk_ratelimited(fmt, ...) \
+({ \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ \
+ if (__ratelimit(&_rs)) \
+ printk(fmt, ##__VA_ARGS__); \
+})
+
+#define pr_emerg_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_notice_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+/* no pr_cont_ratelimited, don't do that... */
+
+#if defined(DEBUG)
+#define pr_devel_ratelimited(fmt, ...) \
+ printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel_ratelimited(fmt, ...) \
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+#endif /* __TOOLS_LINUX_PRINTK_H */
diff --git a/c_src/include/linux/random.h b/c_src/include/linux/random.h
new file mode 100644
index 00000000..3203d13c
--- /dev/null
+++ b/c_src/include/linux/random.h
@@ -0,0 +1,70 @@
+/*
+ * include/linux/random.h
+ *
+ * Include file for the random number generator.
+ */
+#ifndef _LINUX_RANDOM_H
+#define _LINUX_RANDOM_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/bug.h>
+#include <linux/log2.h>
+
+#ifdef SYS_getrandom
+static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return syscall(SYS_getrandom, buf, buflen, flags);
+}
+#else
+extern int urandom_fd;
+
+static inline int getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return read(urandom_fd, buf, buflen);
+}
+#endif
+
+static inline void get_random_bytes(void *buf, int nbytes)
+{
+ BUG_ON(getrandom(buf, nbytes, 0) != nbytes);
+}
+
+#define get_random_type(type) \
+static inline type get_random_##type(void) \
+{ \
+ type v; \
+ \
+ get_random_bytes(&v, sizeof(v)); \
+ return v; \
+}
+
+get_random_type(int);
+get_random_type(long);
+get_random_type(u8);
+get_random_type(u16);
+get_random_type(u32);
+get_random_type(u64);
+
+static inline u32 get_random_u32_below(u32 ceil)
+{
+ if (ceil <= 1)
+ return 0;
+ for (;;) {
+ if (ceil <= 1U << 8) {
+ u32 mult = ceil * get_random_u8();
+ if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil))
+ return mult >> 8;
+ } else if (ceil <= 1U << 16) {
+ u32 mult = ceil * get_random_u16();
+ if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil))
+ return mult >> 16;
+ } else {
+ u64 mult = (u64)ceil * get_random_u32();
+ if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil))
+ return mult >> 32;
+ }
+ }
+}
+
+#endif /* _LINUX_RANDOM_H */
diff --git a/c_src/include/linux/ratelimit.h b/c_src/include/linux/ratelimit.h
new file mode 100644
index 00000000..680181d2
--- /dev/null
+++ b/c_src/include/linux/ratelimit.h
@@ -0,0 +1,109 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+/* issue num suppressed message on exit */
+#define RATELIMIT_MSG_ON_RELEASE 1
+
+struct ratelimit_state {
+ raw_spinlock_t lock; /* protect the state */
+
+ int interval;
+ int burst;
+ int printed;
+ int missed;
+ unsigned long begin;
+ unsigned long flags;
+};
+
+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+ .interval = interval_init, \
+ .burst = burst_init, \
+ }
+
+#define RATELIMIT_STATE_INIT_DISABLED \
+ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \
+ \
+ struct ratelimit_state name = \
+ RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
+
+static inline void ratelimit_state_init(struct ratelimit_state *rs,
+ int interval, int burst)
+{
+ memset(rs, 0, sizeof(*rs));
+
+ raw_spin_lock_init(&rs->lock);
+ rs->interval = interval;
+ rs->burst = burst;
+}
+
+static inline void ratelimit_default_init(struct ratelimit_state *rs)
+{
+ return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+}
+
+static inline void ratelimit_state_exit(struct ratelimit_state *rs)
+{
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE))
+ return;
+
+ if (rs->missed) {
+ pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
+ current->comm, rs->missed);
+ rs->missed = 0;
+ }
+}
+
+static inline void
+ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags)
+{
+ rs->flags = flags;
+}
+
+extern struct ratelimit_state printk_ratelimit_state;
+
+extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
+#define __ratelimit(state) ___ratelimit(state, __func__)
+
+#ifdef CONFIG_PRINTK
+
+#define WARN_ON_RATELIMIT(condition, state) \
+ WARN_ON((condition) && __ratelimit(state))
+
+#define WARN_RATELIMIT(condition, format, ...) \
+({ \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ int rtn = !!(condition); \
+ \
+ if (unlikely(rtn && __ratelimit(&_rs))) \
+ WARN(rtn, format, ##__VA_ARGS__); \
+ \
+ rtn; \
+})
+
+#else
+
+#define WARN_ON_RATELIMIT(condition, state) \
+ WARN_ON(condition)
+
+#define WARN_RATELIMIT(condition, format, ...) \
+({ \
+ int rtn = WARN(condition, format, ##__VA_ARGS__); \
+ rtn; \
+})
+
+#endif
+
+#endif /* _LINUX_RATELIMIT_H */
diff --git a/c_src/include/linux/rculist.h b/c_src/include/linux/rculist.h
new file mode 100644
index 00000000..81df4e13
--- /dev/null
+++ b/c_src/include/linux/rculist.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_RCULIST_H
+#define _LINUX_RCULIST_H
+
+#include <urcu/rculist.h>
+
+
+#include <urcu/rcuhlist.h>
+
+#define hlist_add_head_rcu cds_hlist_add_head_rcu
+#define hlist_del_rcu cds_hlist_del_rcu
+
+#define hlist_for_each_rcu cds_hlist_for_each_rcu
+#define hlist_for_each_entry_rcu cds_hlist_for_each_entry_rcu_2
+
+
+#endif
diff --git a/c_src/include/linux/rcupdate.h b/c_src/include/linux/rcupdate.h
new file mode 100644
index 00000000..f5260270
--- /dev/null
+++ b/c_src/include/linux/rcupdate.h
@@ -0,0 +1,48 @@
+#ifndef __TOOLS_LINUX_RCUPDATE_H
+#define __TOOLS_LINUX_RCUPDATE_H
+
+#include <urcu.h>
+#include <linux/compiler.h>
+
+#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
+
+#define rcu_dereference_check(p, c) rcu_dereference(p)
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#define rcu_dereference_protected(p, c) rcu_dereference(p)
+#define rcu_access_pointer(p) READ_ONCE(p)
+
+#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */
+#define kfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */
+#define kvfree_rcu_mightsleep(ptr) kfree(ptr) /* XXX */
+
+#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v)
+
+/* Has the specified rcu_head structure been handed to call_rcu()? */
+
+/**
+ * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
+ * @rhp: The rcu_head structure to initialize.
+ *
+ * If you intend to invoke rcu_head_after_call_rcu() to test whether a
+ * given rcu_head structure has already been passed to call_rcu(), then
+ * you must also invoke this rcu_head_init() function on it just after
+ * allocating that structure. Calls to this function must not race with
+ * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation.
+ */
+static inline void rcu_head_init(struct rcu_head *rhp)
+{
+ rhp->func = (void *)~0L;
+}
+
+static inline bool
+rcu_head_after_call_rcu(struct rcu_head *rhp,
+ void (*f)(struct rcu_head *head))
+{
+ void (*func)(struct rcu_head *head) = READ_ONCE(rhp->func);
+
+ if (func == f)
+ return true;
+ return false;
+}
+
+#endif /* __TOOLS_LINUX_RCUPDATE_H */
diff --git a/c_src/include/linux/refcount.h b/c_src/include/linux/refcount.h
new file mode 100644
index 00000000..ddeec986
--- /dev/null
+++ b/c_src/include/linux/refcount.h
@@ -0,0 +1,352 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * Saturation semantics
+ * ====================
+ *
+ * refcount_t differs from atomic_t in that the counter saturates at
+ * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the
+ * counter and causing 'spurious' use-after-free issues. In order to avoid the
+ * cost associated with introducing cmpxchg() loops into all of the saturating
+ * operations, we temporarily allow the counter to take on an unchecked value
+ * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow
+ * or overflow has occurred. Although this is racy when multiple threads
+ * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly
+ * equidistant from 0 and INT_MAX we minimise the scope for error:
+ *
+ * INT_MAX REFCOUNT_SATURATED UINT_MAX
+ * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff)
+ * +--------------------------------+----------------+----------------+
+ * <---------- bad value! ---------->
+ *
+ * (in a signed view of the world, the "bad value" range corresponds to
+ * a negative counter value).
+ *
+ * As an example, consider a refcount_inc() operation that causes the counter
+ * to overflow:
+ *
+ * int old = atomic_fetch_add_relaxed(r);
+ * // old is INT_MAX, refcount now INT_MIN (0x8000_0000)
+ * if (old < 0)
+ * atomic_set(r, REFCOUNT_SATURATED);
+ *
+ * If another thread also performs a refcount_inc() operation between the two
+ * atomic operations, then the count will continue to edge closer to 0. If it
+ * reaches a value of 1 before /any/ of the threads reset it to the saturated
+ * value, then a concurrent refcount_dec_and_test() may erroneously free the
+ * underlying object.
+ * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
+ * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
+ * With the current PID limit, if no batched refcounting operations are used and
+ * the attacker can't repeatedly trigger kernel oopses in the middle of refcount
+ * operations, this makes it impossible for a saturated refcount to leave the
+ * saturation range, even if it is possible for multiple uses of the same
+ * refcount to nest in the context of a single task:
+ *
+ * (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
+ * 0x40000000 / 0x400000 = 0x100 = 256
+ *
+ * If hundreds of references are added/removed with a single refcounting
+ * operation, it may potentially be possible to leave the saturation range; but
+ * given the precise timing details involved with the round-robin scheduling of
+ * each thread manipulating the refcount and the need to hit the race multiple
+ * times in succession, there doesn't appear to be a practical avenue of attack
+ * even if using refcount_add() operations with larger increments.
+ *
+ * Memory ordering
+ * ===============
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ * The decrements dec_and_test() and sub_and_test() also provide acquire
+ * ordering on success.
+ *
+ */
+
+#ifndef _LINUX_REFCOUNT_H
+#define _LINUX_REFCOUNT_H
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/limits.h>
+
+struct mutex;
+
+/**
+ * typedef refcount_t - variant of atomic_t specialized for reference counts
+ * @refs: atomic_t counter field
+ *
+ * The counter saturates at REFCOUNT_SATURATED and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free bugs.
+ */
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
+#define REFCOUNT_MAX INT_MAX
+#define REFCOUNT_SATURATED (INT_MIN / 2)
+
+enum refcount_saturation_type {
+ REFCOUNT_ADD_NOT_ZERO_OVF,
+ REFCOUNT_ADD_OVF,
+ REFCOUNT_ADD_UAF,
+ REFCOUNT_SUB_UAF,
+ REFCOUNT_DEC_LEAK,
+};
+
+/**
+ * refcount_set - set a refcount's value
+ * @r: the refcount
+ * @n: value to which the refcount will be set
+ */
+static inline void refcount_set(refcount_t *r, int n)
+{
+ atomic_set(&r->refs, n);
+}
+
+/**
+ * refcount_read - get a refcount's value
+ * @r: the refcount
+ *
+ * Return: the refcount's value
+ */
+static inline unsigned int refcount_read(const refcount_t *r)
+{
+ return atomic_read(&r->refs);
+}
+
+static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
+{
+ int old = refcount_read(r);
+
+ do {
+ if (!old)
+ break;
+ } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i));
+
+ if (oldp)
+ *oldp = old;
+
+ return old;
+}
+
+/**
+ * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
+static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
+{
+ return __refcount_add_not_zero(i, r, NULL);
+}
+
+static inline void __refcount_add(int i, refcount_t *r, int *oldp)
+{
+ int old = atomic_add_return(i, &r->refs);
+
+ if (oldp)
+ *oldp = old;
+}
+
+/**
+ * refcount_add - add a value to a refcount
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ */
+static inline void refcount_add(int i, refcount_t *r)
+{
+ __refcount_add(i, r, NULL);
+}
+
+static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
+{
+ return __refcount_add_not_zero(1, r, oldp);
+}
+
+/**
+ * refcount_inc_not_zero - increment a refcount unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED
+ * and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
+ */
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+ return __refcount_inc_not_zero(r, NULL);
+}
+
+static inline void __refcount_inc(refcount_t *r, int *oldp)
+{
+ __refcount_add(1, r, oldp);
+}
+
+/**
+ * refcount_inc - increment a refcount
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object.
+ *
+ * Will WARN if the refcount is 0, as this represents a possible use-after-free
+ * condition.
+ */
+static inline void refcount_inc(refcount_t *r)
+{
+ __refcount_inc(r, NULL);
+}
+
+static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
+{
+ int old = atomic_sub_return_release(i, &r->refs);
+
+ if (oldp)
+ *oldp = old;
+
+ if (old == i) {
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * @i: amount to subtract from the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), but it will WARN, return false and
+ * ultimately leak on underflow and will fail to decrement when saturated
+ * at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time. In these
+ * cases, refcount_dec(), or one of its variants, should instead be used to
+ * decrement a reference count.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
+static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
+{
+ return __refcount_sub_and_test(i, r, NULL);
+}
+
+static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
+{
+ return __refcount_sub_and_test(1, r, oldp);
+}
+
+/**
+ * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
+static inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+ return __refcount_dec_and_test(r, NULL);
+}
+
+static inline void __refcount_dec(refcount_t *r, int *oldp)
+{
+ int old = atomic_sub_return_release(1, &r->refs);
+
+ if (oldp)
+ *oldp = old;
+}
+
+/**
+ * refcount_dec - decrement a refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
+static inline void refcount_dec(refcount_t *r)
+{
+ __refcount_dec(r, NULL);
+}
+
+extern __must_check bool refcount_dec_if_one(refcount_t *r);
+extern __must_check bool refcount_dec_not_one(refcount_t *r);
+extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock);
+extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock);
+extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
+ spinlock_t *lock,
+ unsigned long *flags) __cond_acquires(lock);
+#endif /* _LINUX_REFCOUNT_H */
diff --git a/c_src/include/linux/rhashtable-types.h b/c_src/include/linux/rhashtable-types.h
new file mode 100644
index 00000000..57467cbf
--- /dev/null
+++ b/c_src/include/linux/rhashtable-types.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Simple structures that might be needed in include
+ * files.
+ */
+
+#ifndef _LINUX_RHASHTABLE_TYPES_H
+#define _LINUX_RHASHTABLE_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+struct rhash_head {
+ struct rhash_head __rcu *next;
+};
+
+struct rhlist_head {
+ struct rhash_head rhead;
+ struct rhlist_head __rcu *next;
+};
+
+struct bucket_table;
+
+/**
+ * struct rhashtable_compare_arg - Key for the function rhashtable_compare
+ * @ht: Hash table
+ * @key: Key to compare against
+ */
+struct rhashtable_compare_arg {
+ struct rhashtable *ht;
+ const void *key;
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+ const void *obj);
+
+/**
+ * struct rhashtable_params - Hash table construction parameters
+ * @nelem_hint: Hint on number of elements, should be 75% of desired size
+ * @key_len: Length of key
+ * @key_offset: Offset of key in struct to be hashed
+ * @head_offset: Offset of rhash_head in struct to be hashed
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
+ * @automatic_shrinking: Enable automatic shrinking of tables
+ * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
+ * @obj_hashfn: Function to hash object
+ * @obj_cmpfn: Function to compare key with object
+ */
+struct rhashtable_params {
+ u16 nelem_hint;
+ u16 key_len;
+ u16 key_offset;
+ u16 head_offset;
+ unsigned int max_size;
+ u16 min_size;
+ bool automatic_shrinking;
+ rht_hashfn_t hashfn;
+ rht_obj_hashfn_t obj_hashfn;
+ rht_obj_cmpfn_t obj_cmpfn;
+};
+
+/**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+ * @key_len: Key length for hashfn
+ * @max_elems: Maximum number of elements in table
+ * @p: Configuration parameters
+ * @rhlist: True if this is an rhltable
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
+ */
+struct rhashtable {
+ struct bucket_table __rcu *tbl;
+ unsigned int key_len;
+ unsigned int max_elems;
+ struct rhashtable_params p;
+ bool rhlist;
+ struct work_struct run_work;
+ struct mutex mutex;
+ spinlock_t lock;
+ atomic_t nelems;
+};
+
+/**
+ * struct rhltable - Hash table with duplicate objects in a list
+ * @ht: Underlying rhtable
+ */
+struct rhltable {
+ struct rhashtable ht;
+};
+
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @tbl: The table that we were walking over
+ */
+struct rhashtable_walker {
+ struct list_head list;
+ struct bucket_table *tbl;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @list: Current hash list pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+ struct rhashtable *ht;
+ struct rhash_head *p;
+ struct rhlist_head *list;
+ struct rhashtable_walker walker;
+ unsigned int slot;
+ unsigned int skip;
+ bool end_of_table;
+};
+
+int rhashtable_init(struct rhashtable *ht,
+ const struct rhashtable_params *params);
+int rhltable_init(struct rhltable *hlt,
+ const struct rhashtable_params *params);
+
+#endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/c_src/include/linux/rhashtable.h b/c_src/include/linux/rhashtable.h
new file mode 100644
index 00000000..1c6dbdc8
--- /dev/null
+++ b/c_src/include/linux/rhashtable.h
@@ -0,0 +1,1267 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_RHASHTABLE_H
+#define _LINUX_RHASHTABLE_H
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/jhash.h>
+#include <linux/list_nulls.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <linux/bit_spinlock.h>
+
+#include <linux/rhashtable-types.h>
+/*
+ * Objects in an rhashtable have an embedded struct rhash_head
+ * which is linked into as hash chain from the hash table - or one
+ * of two or more hash tables when the rhashtable is being resized.
+ * The end of the chain is marked with a special nulls marks which has
+ * the least significant bit set but otherwise stores the address of
+ * the hash bucket. This allows us to be sure we've found the end
+ * of the right list.
+ * The value stored in the hash bucket has BIT(0) used as a lock bit.
+ * This bit must be atomically set before any changes are made to
+ * the chain. To avoid dereferencing this pointer without clearing
+ * the bit first, we use an opaque 'struct rhash_lock_head *' for the
+ * pointer stored in the bucket. This struct needs to be defined so
+ * that rcu_dereference() works on it, but it has no content so a
+ * cast is needed for it to be useful. This ensures it isn't
+ * used by mistake with clearing the lock bit first.
+ */
+struct rhash_lock_head {};
+
+/* Maximum chain length before rehash
+ *
+ * The maximum (not average) chain length grows with the size of the hash
+ * table, at a rate of (log N)/(log log N).
+ *
+ * The value of 16 is selected so that even if the hash table grew to
+ * 2^32 you would not expect the maximum chain length to exceed it
+ * unless we are under attack (or extremely unlucky).
+ *
+ * As this limit is only to detect attacks, we don't need to set it to a
+ * lower value as you'd need the chain length to vastly exceed 16 to have
+ * any real effect on the system.
+ */
+#define RHT_ELASTICITY 16u
+
+/**
+ * struct bucket_table - Table of hash buckets
+ * @size: Number of hash buckets
+ * @nest: Number of bits of first-level nested table.
+ * @rehash: Current bucket being rehashed
+ * @hash_rnd: Random seed to fold into hash
+ * @walkers: List of active walkers
+ * @rcu: RCU structure for freeing the table
+ * @future_tbl: Table under construction during rehashing
+ * @ntbl: Nested table used when out of memory.
+ * @buckets: size * hash buckets
+ */
+struct bucket_table {
+ unsigned int size;
+ unsigned int nest;
+ u32 hash_rnd;
+ struct list_head walkers;
+ struct rcu_head rcu;
+
+ struct bucket_table __rcu *future_tbl;
+
+ struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
+};
+
+/*
+ * NULLS_MARKER() expects a hash value with the low
+ * bits mostly likely to be significant, and it discards
+ * the msb.
+ * We give it an address, in which the bottom bit is
+ * always 0, and the msb might be significant.
+ * So we shift the address down one bit to align with
+ * expectations and avoid losing a significant bit.
+ *
+ * We never store the NULLS_MARKER in the hash table
+ * itself as we need the lsb for locking.
+ * Instead we store a NULL
+ */
+#define RHT_NULLS_MARKER(ptr) \
+ ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
+#define INIT_RHT_NULLS_HEAD(ptr) \
+ ((ptr) = NULL)
+
+static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
+{
+ return ((unsigned long) ptr & 1);
+}
+
+static inline void *rht_obj(const struct rhashtable *ht,
+ const struct rhash_head *he)
+{
+ return (char *)he - ht->p.head_offset;
+}
+
+static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
+ unsigned int hash)
+{
+ return hash & (tbl->size - 1);
+}
+
+static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
+ const void *key, const struct rhashtable_params params,
+ unsigned int hash_rnd)
+{
+ unsigned int hash;
+
+ /* params must be equal to ht->p if it isn't constant. */
+ if (!__builtin_constant_p(params.key_len))
+ hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
+ else if (params.key_len) {
+ unsigned int key_len = params.key_len;
+
+ if (params.hashfn)
+ hash = params.hashfn(key, key_len, hash_rnd);
+ else if (key_len & (sizeof(u32) - 1))
+ hash = jhash(key, key_len, hash_rnd);
+ else
+ hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
+ } else {
+ unsigned int key_len = ht->p.key_len;
+
+ if (params.hashfn)
+ hash = params.hashfn(key, key_len, hash_rnd);
+ else
+ hash = jhash(key, key_len, hash_rnd);
+ }
+
+ return hash;
+}
+
+static inline unsigned int rht_key_hashfn(
+ struct rhashtable *ht, const struct bucket_table *tbl,
+ const void *key, const struct rhashtable_params params)
+{
+ unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
+
+ return rht_bucket_index(tbl, hash);
+}
+
+static inline unsigned int rht_head_hashfn(
+ struct rhashtable *ht, const struct bucket_table *tbl,
+ const struct rhash_head *he, const struct rhashtable_params params)
+{
+ const char *ptr = rht_obj(ht, he);
+
+ return likely(params.obj_hashfn) ?
+ rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
+ ht->p.key_len,
+ tbl->hash_rnd)) :
+ rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
+}
+
+/**
+ * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
+ * @ht: hash table
+ * @tbl: current table
+ */
+static inline bool rht_grow_above_75(const struct rhashtable *ht,
+ const struct bucket_table *tbl)
+{
+ /* Expand table when exceeding 75% load */
+ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
+ (!ht->p.max_size || tbl->size < ht->p.max_size);
+}
+
+/**
+ * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
+ * @ht: hash table
+ * @tbl: current table
+ */
+static inline bool rht_shrink_below_30(const struct rhashtable *ht,
+ const struct bucket_table *tbl)
+{
+ /* Shrink table beneath 30% load */
+ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
+ tbl->size > ht->p.min_size;
+}
+
+/**
+ * rht_grow_above_100 - returns true if nelems > table-size
+ * @ht: hash table
+ * @tbl: current table
+ */
+static inline bool rht_grow_above_100(const struct rhashtable *ht,
+ const struct bucket_table *tbl)
+{
+ return atomic_read(&ht->nelems) > tbl->size &&
+ (!ht->p.max_size || tbl->size < ht->p.max_size);
+}
+
+/**
+ * rht_grow_above_max - returns true if table is above maximum
+ * @ht: hash table
+ * @tbl: current table
+ */
+static inline bool rht_grow_above_max(const struct rhashtable *ht,
+ const struct bucket_table *tbl)
+{
+ return atomic_read(&ht->nelems) >= ht->max_elems;
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rht_mutex_is_held(struct rhashtable *ht);
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
+#else
+static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
+{
+ return 1;
+}
+
+static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
+ u32 hash)
+{
+ return 1;
+}
+#endif /* CONFIG_PROVE_LOCKING */
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj);
+
+void rhashtable_walk_enter(struct rhashtable *ht,
+ struct rhashtable_iter *iter);
+void rhashtable_walk_exit(struct rhashtable_iter *iter);
+int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
+
+static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
+{
+ (void)rhashtable_walk_start_check(iter);
+}
+
+void *rhashtable_walk_next(struct rhashtable_iter *iter);
+void *rhashtable_walk_peek(struct rhashtable_iter *iter);
+void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
+
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg);
+void rhashtable_destroy(struct rhashtable *ht);
+
+struct rhash_lock_head __rcu **rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash);
+
+#define rht_dereference(p, ht) \
+ rcu_dereference(p)
+
+#define rht_dereference_rcu(p, ht) \
+ rcu_dereference(p)
+
+#define rht_dereference_bucket(p, tbl, hash) \
+ rcu_dereference(p)
+
+#define rht_dereference_bucket_rcu(p, tbl, hash) \
+ rcu_dereference(p)
+
+#define rht_entry(tpos, pos, member) \
+ ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
+
+static inline struct rhash_lock_head __rcu *const *rht_bucket(
+ const struct bucket_table *tbl, unsigned int hash)
+{
+ return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+ &tbl->buckets[hash];
+}
+
+static inline struct rhash_lock_head __rcu **rht_bucket_var(
+ struct bucket_table *tbl, unsigned int hash)
+{
+ return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
+ &tbl->buckets[hash];
+}
+
+static inline struct rhash_lock_head __rcu **rht_bucket_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+ return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
+ &tbl->buckets[hash];
+}
+
+/*
+ * We lock a bucket by setting BIT(0) in the pointer - this is always
+ * zero in real pointers. The NULLS mark is never stored in the bucket,
+ * rather we store NULL if the bucket is empty.
+ * bit_spin_locks do not handle contention well, but the whole point
+ * of the hashtable design is to achieve minimum per-bucket contention.
+ * A nested hash table might not have a bucket pointer. In that case
+ * we cannot get a lock. For remove and replace the bucket cannot be
+ * interesting and doesn't need locking.
+ * For insert we allocate the bucket if this is the last bucket_table,
+ * and then take the lock.
+ * Sometimes we unlock a bucket by writing a new pointer there. In that
+ * case we don't need to unlock, but we do need to reset state such as
+ * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
+ * provides the same release semantics that bit_spin_unlock() provides,
+ * this is safe.
+ * When we write to a bucket without unlocking, we use rht_assign_locked().
+ */
+
+static inline void rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt)
+{
+ bit_spin_lock(0, (unsigned long *)bkt);
+}
+
+static inline void rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bucket,
+ unsigned int subclass)
+{
+ bit_spin_lock(0, (unsigned long *)bucket);
+}
+
+static inline void rht_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt)
+{
+ bit_spin_unlock(0, (unsigned long *)bkt);
+}
+
+static inline struct rhash_head *__rht_ptr(
+ struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
+{
+ return (struct rhash_head *)
+ ((unsigned long)p & ~BIT(0) ?:
+ (unsigned long)RHT_NULLS_MARKER(bkt));
+}
+
+/*
+ * Where 'bkt' is a bucket and might be locked:
+ * rht_ptr_rcu() dereferences that pointer and clears the lock bit.
+ * rht_ptr() dereferences in a context where the bucket is locked.
+ * rht_ptr_exclusive() dereferences in a context where exclusive
+ * access is guaranteed, such as when destroying the table.
+ */
+static inline struct rhash_head *rht_ptr_rcu(
+ struct rhash_lock_head __rcu *const *bkt)
+{
+ return __rht_ptr(rcu_dereference(*bkt), bkt);
+}
+
+static inline struct rhash_head *rht_ptr(
+ struct rhash_lock_head __rcu *const *bkt,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
+}
+
+static inline struct rhash_head *rht_ptr_exclusive(
+ struct rhash_lock_head __rcu *const *bkt)
+{
+ return __rht_ptr(rcu_dereference(*bkt), bkt);
+}
+
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0)));
+}
+
+static inline void rht_assign_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*bkt, (void *)obj);
+ preempt_enable();
+ __release(bitlock);
+ bit_spin_wake(0, (unsigned long *) bkt);
+}
+
+/**
+ * rht_for_each_from - iterate over hash chain from given head
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ */
+#define rht_for_each_from(pos, head, tbl, hash) \
+ for (pos = head; \
+ !rht_is_a_nulls(pos); \
+ pos = rht_dereference_bucket((pos)->next, tbl, hash))
+
+/**
+ * rht_for_each - iterate over hash chain
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ */
+#define rht_for_each(pos, tbl, hash) \
+ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash)
+
+/**
+ * rht_for_each_entry_from - iterate over hash chain from given head
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
+ for (pos = head; \
+ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
+ pos = rht_dereference_bucket((pos)->next, tbl, hash))
+
+/**
+ * rht_for_each_entry - iterate over hash chain of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
+ rht_for_each_entry_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash, member)
+
+/**
+ * rht_for_each_entry_safe - safely iterate over hash chain of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @next: the &struct rhash_head to use as next in loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive allows for the looped code to
+ * remove the loop cursor from the list.
+ */
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
+ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ next = !rht_is_a_nulls(pos) ? \
+ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
+ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
+ pos = next, \
+ next = !rht_is_a_nulls(pos) ? \
+ rht_dereference_bucket(pos->next, tbl, hash) : NULL)
+
+/**
+ * rht_for_each_rcu_from - iterate over rcu hash chain from given head
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu_from(pos, head, tbl, hash) \
+ for (({barrier(); }), \
+ pos = head; \
+ !rht_is_a_nulls(pos); \
+ pos = rcu_dereference_raw(pos->next))
+
+/**
+ * rht_for_each_rcu - iterate over rcu hash chain
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu(pos, tbl, hash) \
+ for (({barrier(); }), \
+ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \
+ !rht_is_a_nulls(pos); \
+ pos = rcu_dereference_raw(pos->next))
+
+/**
+ * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @head: the &struct rhash_head to start from
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
+ for (({barrier(); }), \
+ pos = head; \
+ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
+ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
+
+/**
+ * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rhash_head to use as a loop cursor.
+ * @tbl: the &struct bucket_table
+ * @hash: the hash value / bucket index
+ * @member: name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
+ rht_for_each_entry_rcu_from(tpos, pos, \
+ rht_ptr_rcu(rht_bucket(tbl, hash)), \
+ tbl, hash, member)
+
+/**
+ * rhl_for_each_rcu - iterate over rcu hash table list
+ * @pos: the &struct rlist_head to use as a loop cursor.
+ * @list: the head of the list
+ *
+ * This hash chain list-traversal primitive should be used on the
+ * list returned by rhltable_lookup.
+ */
+#define rhl_for_each_rcu(pos, list) \
+ for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
+
+/**
+ * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct rlist_head to use as a loop cursor.
+ * @list: the head of the list
+ * @member: name of the &struct rlist_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive should be used on the
+ * list returned by rhltable_lookup.
+ */
+#define rhl_for_each_entry_rcu(tpos, pos, list, member) \
+ for (pos = list; pos && rht_entry(tpos, pos, member); \
+ pos = rcu_dereference_raw(pos->next))
+
+static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ struct rhashtable *ht = arg->ht;
+ const char *ptr = obj;
+
+ return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
+}
+
+/* Internal function, do not use. */
+static inline struct rhash_head *__rhashtable_lookup(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhashtable_compare_arg arg = {
+ .ht = ht,
+ .key = key,
+ };
+ struct rhash_lock_head __rcu *const *bkt;
+ struct bucket_table *tbl;
+ struct rhash_head *he;
+ unsigned int hash;
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+restart:
+ hash = rht_key_hashfn(ht, tbl, key, params);
+ bkt = rht_bucket(tbl, hash);
+ do {
+ rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
+ if (params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+ rhashtable_compare(&arg, rht_obj(ht, he)))
+ continue;
+ return he;
+ }
+ /* An object might have been moved to a different hash chain,
+ * while we walk along it - better check and retry.
+ */
+ } while (he != RHT_NULLS_MARKER(bkt));
+
+ /* Ensure we see any new tables. */
+ smp_rmb();
+
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (unlikely(tbl))
+ goto restart;
+
+ return NULL;
+}
+
+/**
+ * rhashtable_lookup - search hash table
+ * @ht: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(ht, key, params);
+
+ return he ? rht_obj(ht, he) : NULL;
+}
+
+/**
+ * rhashtable_lookup_fast - search hash table, without RCU read lock
+ * @ht: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * Only use this function when you have other mechanisms guaranteeing
+ * that the object won't go away after the RCU read lock is released.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup_fast(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params)
+{
+ void *obj;
+
+ rcu_read_lock();
+ obj = rhashtable_lookup(ht, key, params);
+ rcu_read_unlock();
+
+ return obj;
+}
+
+/**
+ * rhltable_lookup - search hash list table
+ * @hlt: hash table
+ * @key: the pointer to the key
+ * @params: hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. All matching entries are returned
+ * in a list.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the list of entries that match the given key.
+ */
+static inline struct rhlist_head *rhltable_lookup(
+ struct rhltable *hlt, const void *key,
+ const struct rhashtable_params params)
+{
+ struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
+
+ return he ? container_of(he, struct rhlist_head, rhead) : NULL;
+}
+
+/* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+ */
+static inline void *__rhashtable_insert_fast(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params, bool rhlist)
+{
+ struct rhashtable_compare_arg arg = {
+ .ht = ht,
+ .key = key,
+ };
+ struct rhash_lock_head __rcu **bkt;
+ struct rhash_head __rcu **pprev;
+ struct bucket_table *tbl;
+ struct rhash_head *head;
+ unsigned int hash;
+ int elasticity;
+ void *data;
+
+ rcu_read_lock();
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ hash = rht_head_hashfn(ht, tbl, obj, params);
+ elasticity = RHT_ELASTICITY;
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ data = ERR_PTR(-ENOMEM);
+ if (!bkt)
+ goto out;
+ pprev = NULL;
+ rht_lock(tbl, bkt);
+
+ if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
+slow_path:
+ rht_unlock(tbl, bkt);
+ rcu_read_unlock();
+ return rhashtable_insert_slow(ht, key, obj);
+ }
+
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ struct rhlist_head *plist;
+ struct rhlist_head *list;
+
+ elasticity--;
+ if (!key ||
+ (params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+ rhashtable_compare(&arg, rht_obj(ht, head)))) {
+ pprev = &head->next;
+ continue;
+ }
+
+ data = rht_obj(ht, head);
+
+ if (!rhlist)
+ goto out_unlock;
+
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ plist = container_of(head, struct rhlist_head, rhead);
+
+ RCU_INIT_POINTER(list->next, plist);
+ head = rht_dereference_bucket(head->next, tbl, hash);
+ RCU_INIT_POINTER(list->rhead.next, head);
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj);
+ rht_unlock(tbl, bkt);
+ } else
+ rht_assign_unlock(tbl, bkt, obj);
+ data = NULL;
+ goto out;
+ }
+
+ if (elasticity <= 0)
+ goto slow_path;
+
+ data = ERR_PTR(-E2BIG);
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+ goto out_unlock;
+
+ if (unlikely(rht_grow_above_100(ht, tbl)))
+ goto slow_path;
+
+ /* Inserting at head of list makes unlocking free. */
+ head = rht_ptr(bkt, tbl, hash);
+
+ RCU_INIT_POINTER(obj->next, head);
+ if (rhlist) {
+ struct rhlist_head *list;
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ RCU_INIT_POINTER(list->next, NULL);
+ }
+
+ atomic_inc(&ht->nelems);
+ rht_assign_unlock(tbl, bkt, obj);
+
+ if (rht_grow_above_75(ht, tbl))
+ schedule_work(&ht->run_work);
+
+ data = NULL;
+out:
+ rcu_read_unlock();
+
+ return data;
+
+out_unlock:
+ rht_unlock(tbl, bkt);
+ goto out;
+}
+
+/**
+ * rhashtable_insert_fast - insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Will take the per bucket bitlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhashtable_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ void *ret;
+
+ ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhltable_insert_key - insert object into hash list table
+ * @hlt: hash list table
+ * @key: the pointer to the key
+ * @list: pointer to hash list head inside object
+ * @params: hash table parameters
+ *
+ * Will take the per bucket bitlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhltable_insert_key(
+ struct rhltable *hlt, const void *key, struct rhlist_head *list,
+ const struct rhashtable_params params)
+{
+ return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
+ params, true));
+}
+
+/**
+ * rhltable_insert - insert object into hash list table
+ * @hlt: hash list table
+ * @list: pointer to hash list head inside object
+ * @params: hash table parameters
+ *
+ * Will take the per bucket bitlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhltable_insert(
+ struct rhltable *hlt, struct rhlist_head *list,
+ const struct rhashtable_params params)
+{
+ const char *key = rht_obj(&hlt->ht, &list->rhead);
+
+ key += params.key_offset;
+
+ return rhltable_insert_key(hlt, key, list, params);
+}
+
+/**
+ * rhashtable_lookup_insert_fast - lookup and insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * This lookup function may only be used for fixed key hash table (key_len
+ * parameter set). It will BUG() if used inappropriately.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ */
+static inline int rhashtable_lookup_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ const char *key = rht_obj(ht, obj);
+ void *ret;
+
+ BUG_ON(ht->p.obj_hashfn);
+
+ ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
+ false);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Just like rhashtable_lookup_insert_fast(), but this function returns the
+ * object if it exists, NULL if it did not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ const char *key = rht_obj(ht, obj);
+
+ BUG_ON(ht->p.obj_hashfn);
+
+ return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
+ false);
+}
+
+/**
+ * rhashtable_lookup_insert_key - search and insert object to hash table
+ * with explicit key
+ * @ht: hash table
+ * @key: key
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Lookups may occur in parallel with hashtable mutations and resizing.
+ *
+ * Will trigger an automatic deferred table resizing if residency in the
+ * table grows beyond 70%.
+ *
+ * Returns zero on success.
+ */
+static inline int rhashtable_lookup_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ void *ret;
+
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+ ret = __rhashtable_insert_fast(ht, key, obj, params, false);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+
+ return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+ * @ht: hash table
+ * @key: key
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Just like rhashtable_lookup_insert_key(), but this function returns the
+ * object if it exists, NULL if it does not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_key(
+ struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ BUG_ON(!ht->p.obj_hashfn || !key);
+
+ return __rhashtable_insert_fast(ht, key, obj, params, false);
+}
+
+/* Internal function, please use rhashtable_remove_fast() instead */
+static inline int __rhashtable_remove_fast_one(
+ struct rhashtable *ht, struct bucket_table *tbl,
+ struct rhash_head *obj, const struct rhashtable_params params,
+ bool rhlist)
+{
+ struct rhash_lock_head __rcu **bkt;
+ struct rhash_head __rcu **pprev;
+ struct rhash_head *he;
+ unsigned int hash;
+ int err = -ENOENT;
+
+ hash = rht_head_hashfn(ht, tbl, obj, params);
+ bkt = rht_bucket_var(tbl, hash);
+ if (!bkt)
+ return -ENOENT;
+ pprev = NULL;
+ rht_lock(tbl, bkt);
+
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ struct rhlist_head *list;
+
+ list = container_of(he, struct rhlist_head, rhead);
+
+ if (he != obj) {
+ struct rhlist_head __rcu **lpprev;
+
+ pprev = &he->next;
+
+ if (!rhlist)
+ continue;
+
+ do {
+ lpprev = &list->next;
+ list = rht_dereference_bucket(list->next,
+ tbl, hash);
+ } while (list && obj != &list->rhead);
+
+ if (!list)
+ continue;
+
+ list = rht_dereference_bucket(list->next, tbl, hash);
+ RCU_INIT_POINTER(*lpprev, list);
+ err = 0;
+ break;
+ }
+
+ obj = rht_dereference_bucket(obj->next, tbl, hash);
+ err = 1;
+
+ if (rhlist) {
+ list = rht_dereference_bucket(list->next, tbl, hash);
+ if (list) {
+ RCU_INIT_POINTER(list->rhead.next, obj);
+ obj = &list->rhead;
+ err = 0;
+ }
+ }
+
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj);
+ rht_unlock(tbl, bkt);
+ } else {
+ rht_assign_unlock(tbl, bkt, obj);
+ }
+ goto unlocked;
+ }
+
+ rht_unlock(tbl, bkt);
+unlocked:
+ if (err > 0) {
+ atomic_dec(&ht->nelems);
+ if (unlikely(ht->p.automatic_shrinking &&
+ rht_shrink_below_30(ht, tbl)))
+ schedule_work(&ht->run_work);
+ err = 0;
+ }
+
+ return err;
+}
+
+/* Internal function, please use rhashtable_remove_fast() instead */
+static inline int __rhashtable_remove_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params, bool rhlist)
+{
+ struct bucket_table *tbl;
+ int err;
+
+ rcu_read_lock();
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ /* Because we have already taken (and released) the bucket
+ * lock in old_tbl, if we find that future_tbl is not yet
+ * visible then that guarantees the entry to still be in
+ * the old tbl if it exists.
+ */
+ while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
+ rhlist)) &&
+ (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+ ;
+
+ rcu_read_unlock();
+
+ return err;
+}
+
+/**
+ * rhashtable_remove_fast - remove object from hash table
+ * @ht: hash table
+ * @obj: pointer to hash head inside object
+ * @params: hash table parameters
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found.
+ */
+static inline int rhashtable_remove_fast(
+ struct rhashtable *ht, struct rhash_head *obj,
+ const struct rhashtable_params params)
+{
+ return __rhashtable_remove_fast(ht, obj, params, false);
+}
+
+/**
+ * rhltable_remove - remove object from hash list table
+ * @hlt: hash list table
+ * @list: pointer to hash list head inside object
+ * @params: hash table parameters
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table if permitted when residency drops
+ * below 30%
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found.
+ */
+static inline int rhltable_remove(
+ struct rhltable *hlt, struct rhlist_head *list,
+ const struct rhashtable_params params)
+{
+ return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
+}
+
+/* Internal function, please use rhashtable_replace_fast() instead */
+static inline int __rhashtable_replace_fast(
+ struct rhashtable *ht, struct bucket_table *tbl,
+ struct rhash_head *obj_old, struct rhash_head *obj_new,
+ const struct rhashtable_params params)
+{
+ struct rhash_lock_head __rcu **bkt;
+ struct rhash_head __rcu **pprev;
+ struct rhash_head *he;
+ unsigned int hash;
+ int err = -ENOENT;
+
+ /* Minimally, the old and new objects must have same hash
+ * (which should mean identifiers are the same).
+ */
+ hash = rht_head_hashfn(ht, tbl, obj_old, params);
+ if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
+ return -EINVAL;
+
+ bkt = rht_bucket_var(tbl, hash);
+ if (!bkt)
+ return -ENOENT;
+
+ pprev = NULL;
+ rht_lock(tbl, bkt);
+
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ if (he != obj_old) {
+ pprev = &he->next;
+ continue;
+ }
+
+ rcu_assign_pointer(obj_new->next, obj_old->next);
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj_new);
+ rht_unlock(tbl, bkt);
+ } else {
+ rht_assign_unlock(tbl, bkt, obj_new);
+ }
+ err = 0;
+ goto unlocked;
+ }
+
+ rht_unlock(tbl, bkt);
+
+unlocked:
+ return err;
+}
+
+/**
+ * rhashtable_replace_fast - replace an object in hash table
+ * @ht: hash table
+ * @obj_old: pointer to hash head inside object being replaced
+ * @obj_new: pointer to hash head inside object which is new
+ * @params: hash table parameters
+ *
+ * Replacing an object doesn't affect the number of elements in the hash table
+ * or bucket, so we don't need to worry about shrinking or expanding the
+ * table here.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found,
+ * -EINVAL if hash is not the same for the old and new objects.
+ */
+static inline int rhashtable_replace_fast(
+ struct rhashtable *ht, struct rhash_head *obj_old,
+ struct rhash_head *obj_new,
+ const struct rhashtable_params params)
+{
+ struct bucket_table *tbl;
+ int err;
+
+ rcu_read_lock();
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ /* Because we have already taken (and released) the bucket
+ * lock in old_tbl, if we find that future_tbl is not yet
+ * visible then that guarantees the entry to still be in
+ * the old tbl if it exists.
+ */
+ while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
+ obj_new, params)) &&
+ (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+ ;
+
+ rcu_read_unlock();
+
+ return err;
+}
+
+/**
+ * rhltable_walk_enter - Initialise an iterator
+ * @hlt: Table to walk over
+ * @iter: Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice. Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
+ *
+ * You must call rhashtable_walk_exit after this function returns.
+ */
+static inline void rhltable_walk_enter(struct rhltable *hlt,
+ struct rhashtable_iter *iter)
+{
+ return rhashtable_walk_enter(&hlt->ht, iter);
+}
+
+/**
+ * rhltable_free_and_destroy - free elements and destroy hash list table
+ * @hlt: the hash list table to destroy
+ * @free_fn: callback to release resources of element
+ * @arg: pointer passed to free_fn
+ *
+ * See documentation for rhashtable_free_and_destroy.
+ */
+static inline void rhltable_free_and_destroy(struct rhltable *hlt,
+ void (*free_fn)(void *ptr,
+ void *arg),
+ void *arg)
+{
+ return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
+}
+
+static inline void rhltable_destroy(struct rhltable *hlt)
+{
+ return rhltable_free_and_destroy(hlt, NULL, NULL);
+}
+
+#endif /* _LINUX_RHASHTABLE_H */
diff --git a/c_src/include/linux/rwsem.h b/c_src/include/linux/rwsem.h
new file mode 100644
index 00000000..f851d6a2
--- /dev/null
+++ b/c_src/include/linux/rwsem.h
@@ -0,0 +1,29 @@
+#ifndef __TOOLS_LINUX_RWSEM_H
+#define __TOOLS_LINUX_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+ pthread_rwlock_t lock;
+};
+
+#define __RWSEM_INITIALIZER(name) \
+ { .lock = PTHREAD_RWLOCK_INITIALIZER }
+
+#define DECLARE_RWSEM(name) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+static inline void init_rwsem(struct rw_semaphore *lock)
+{
+ pthread_rwlock_init(&lock->lock, NULL);
+}
+
+#define down_read(l) pthread_rwlock_rdlock(&(l)->lock)
+#define down_read_killable(l) (pthread_rwlock_rdlock(&(l)->lock), 0)
+#define down_read_trylock(l) (!pthread_rwlock_tryrdlock(&(l)->lock))
+#define up_read(l) pthread_rwlock_unlock(&(l)->lock)
+
+#define down_write(l) pthread_rwlock_wrlock(&(l)->lock)
+#define up_write(l) pthread_rwlock_unlock(&(l)->lock)
+
+#endif /* __TOOLS_LINUX_RWSEM_H */
diff --git a/c_src/include/linux/scatterlist.h b/c_src/include/linux/scatterlist.h
new file mode 100644
index 00000000..1e4395c5
--- /dev/null
+++ b/c_src/include/linux/scatterlist.h
@@ -0,0 +1,109 @@
+#ifndef _LINUX_SCATTERLIST_H
+#define _LINUX_SCATTERLIST_H
+
+#include <linux/bug.h>
+#include <linux/slab.h>
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+};
+
+#define sg_is_chain(sg) ((sg)->page_link & 0x01)
+#define sg_is_last(sg) ((sg)->page_link & 0x02)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~0x03))
+
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+ unsigned long page_link = sg->page_link & 0x3;
+
+ /*
+ * In order for the low bit stealing approach to work, pages
+ * must be aligned at a 32-bit boundary as a minimum.
+ */
+ BUG_ON((unsigned long) page & 0x03);
+ sg->page_link = page_link | (unsigned long) page;
+}
+
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ sg_assign_page(sg, page);
+ sg->offset = offset;
+ sg->length = len;
+}
+
+static inline struct page *sg_page(struct scatterlist *sg)
+{
+ return (struct page *)((sg)->page_link & ~0x3);
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+ if (sg_is_last(sg))
+ return NULL;
+
+ sg++;
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+
+ return sg;
+}
+
+#define for_each_sg(sglist, sg, nr, __i) \
+ for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+ struct scatterlist *sgl)
+{
+ /*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ prv[prv_nents - 1].offset = 0;
+ prv[prv_nents - 1].length = 0;
+
+ /*
+ * Set lowest bit to indicate a link pointer, and make sure to clear
+ * the termination bit if it happens to be set.
+ */
+ prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+}
+
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+ sg->page_link |= 0x02;
+ sg->page_link &= ~0x01;
+}
+
+static inline void sg_unmark_end(struct scatterlist *sg)
+{
+ sg->page_link &= ~0x02;
+}
+
+static inline void *sg_virt(struct scatterlist *sg)
+{
+ return page_address(sg_page(sg)) + sg->offset;
+}
+
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+ sg_mark_end(&sgl[nents - 1]);
+}
+
+static inline void sg_init_one(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+
+#endif /* _LINUX_SCATTERLIST_H */
diff --git a/c_src/include/linux/sched.h b/c_src/include/linux/sched.h
new file mode 100644
index 00000000..7afb6d54
--- /dev/null
+++ b/c_src/include/linux/sched.h
@@ -0,0 +1,186 @@
+#ifndef __TOOLS_LINUX_SCHED_H
+#define __TOOLS_LINUX_SCHED_H
+
+#include <pthread.h>
+#include <time.h>
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/rwsem.h>
+#include <linux/time64.h>
+
+#define TASK_RUNNING 0
+#define TASK_INTERRUPTIBLE 1
+#define TASK_UNINTERRUPTIBLE 2
+#define __TASK_STOPPED 4
+#define __TASK_TRACED 8
+/* in tsk->exit_state */
+#define EXIT_DEAD 16
+#define EXIT_ZOMBIE 32
+#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
+/* in tsk->state again */
+#define TASK_DEAD 64
+#define TASK_WAKEKILL 128
+#define TASK_WAKING 256
+#define TASK_PARKED 512
+#define TASK_NOLOAD 1024
+#define TASK_NEW 2048
+#define TASK_IDLE_WORKER 4096
+#define TASK_STATE_MAX 8192
+#define TASK_FREEZABLE (1U << 14)
+
+/* Convenience macros for the sake of set_task_state */
+#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
+
+#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
+/* Convenience macros for the sake of wake_up */
+#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+#define TASK_COMM_LEN 16
+
+#define PF_EXITING 0x00000004 /* getting shut down */
+#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
+#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
+#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
+#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
+#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
+#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
+#define PF_DUMPCORE 0x00000200 /* dumped core */
+#define PF_SIGNALED 0x00000400 /* killed by a signal */
+#define PF_MEMALLOC 0x00000800 /* Allocating memory */
+#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */
+#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */
+#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */
+#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */
+#define PF_FROZEN 0x00010000 /* frozen for system suspend */
+#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
+#define PF_KSWAPD 0x00040000 /* I am kswapd */
+#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */
+#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
+#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
+#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
+#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
+#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
+
+struct task_struct {
+ pthread_t thread;
+
+ int (*thread_fn)(void *);
+ void *thread_data;
+
+ atomic_t usage;
+ int state;
+
+ /* kthread: */
+ unsigned long kthread_flags;
+ struct completion exited;
+
+ unsigned flags;
+
+ bool on_cpu;
+ char comm[TASK_COMM_LEN];
+ pid_t pid;
+
+ struct bio_list *bio_list;
+
+ struct signal_struct {
+ struct rw_semaphore exec_update_lock;
+ } *signal, _signal;
+};
+
+extern __thread struct task_struct *current;
+
+#define __set_task_state(tsk, state_value) \
+ do { (tsk)->state = (state_value); } while (0)
+#define set_task_state(tsk, state_value) \
+ smp_store_mb((tsk)->state, (state_value))
+#define __set_current_state(state_value) \
+ do { current->state = (state_value); } while (0)
+#define set_current_state(state_value) \
+ smp_store_mb(current->state, (state_value))
+
+static inline struct task_struct *get_task_struct(struct task_struct *task)
+{
+ atomic_inc(&task->usage);
+ return task;
+
+}
+
+extern void __put_task_struct(struct task_struct *t);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+ if (atomic_dec_and_test(&t->usage))
+ __put_task_struct(t);
+}
+
+static inline void cond_resched(void) {}
+#define need_resched() 0
+
+void schedule(void);
+
+#define MAX_SCHEDULE_TIMEOUT LONG_MAX
+long schedule_timeout(long timeout);
+
+static inline void io_schedule(void)
+{
+ schedule();
+}
+
+static inline long io_schedule_timeout(long timeout)
+{
+ return schedule_timeout(timeout);
+}
+
+int wake_up_process(struct task_struct *);
+
+static inline u64 ktime_get_seconds(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+
+ return ts.tv_sec;
+}
+
+static inline u64 ktime_get_real_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+ return timespec_to_ns(&ts);
+}
+
+static inline u64 ktime_get_real_seconds(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ return ts.tv_sec;
+}
+
+static inline void ktime_get_coarse_real_ts64(struct timespec64 *ts)
+{
+ clock_gettime(CLOCK_REALTIME_COARSE, ts);
+}
+
+#define current_kernel_time64() current_kernel_time()
+#define CURRENT_TIME (current_kernel_time())
+
+static inline unsigned int stack_trace_save_tsk(struct task_struct *task,
+ unsigned long *store, unsigned int size,
+ unsigned int skipnr)
+{
+ return 0;
+}
+
+#endif /* __TOOLS_LINUX_SCHED_H */
diff --git a/c_src/include/linux/sched/clock.h b/c_src/include/linux/sched/clock.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/linux/sched/clock.h
diff --git a/c_src/include/linux/sched/cputime.h b/c_src/include/linux/sched/cputime.h
new file mode 100644
index 00000000..a89c626f
--- /dev/null
+++ b/c_src/include/linux/sched/cputime.h
@@ -0,0 +1,6 @@
+
+static inline void task_cputime_adjusted(struct task_struct *p, u64 *utime, u64 *stime)
+{
+ *utime = 0;
+ *stime = 0;
+}
diff --git a/c_src/include/linux/sched/debug.h b/c_src/include/linux/sched/debug.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/linux/sched/debug.h
diff --git a/c_src/include/linux/sched/mm.h b/c_src/include/linux/sched/mm.h
new file mode 100644
index 00000000..03feda7a
--- /dev/null
+++ b/c_src/include/linux/sched/mm.h
@@ -0,0 +1,31 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#define PF_MEMALLOC 0x00000800 /* Allocating memory */
+#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
+
+static inline unsigned int memalloc_nofs_save(void)
+{
+ unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
+ current->flags |= PF_MEMALLOC_NOFS;
+ return flags;
+}
+
+static inline void memalloc_nofs_restore(unsigned int flags)
+{
+ current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
+}
+
+static inline unsigned int memalloc_noreclaim_save(void)
+{
+ unsigned int flags = current->flags & PF_MEMALLOC;
+ current->flags |= PF_MEMALLOC;
+ return flags;
+}
+
+static inline void memalloc_noreclaim_restore(unsigned int flags)
+{
+ current->flags = (current->flags & ~PF_MEMALLOC) | flags;
+}
+
+#endif /* _LINUX_SCHED_MM_H */
diff --git a/c_src/include/linux/sched/rt.h b/c_src/include/linux/sched/rt.h
new file mode 100644
index 00000000..ef3040e4
--- /dev/null
+++ b/c_src/include/linux/sched/rt.h
@@ -0,0 +1,9 @@
+#ifndef _SCHED_RT_H
+#define _SCHED_RT_H
+
+static inline int rt_task(struct task_struct *p)
+{
+ return 0;
+}
+
+#endif /* _SCHED_RT_H */
diff --git a/c_src/include/linux/sched/signal.h b/c_src/include/linux/sched/signal.h
new file mode 100644
index 00000000..20bdc050
--- /dev/null
+++ b/c_src/include/linux/sched/signal.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_SIGNAL_H
+#define _LINUX_SCHED_SIGNAL_H
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+ return 0;
+}
+
+#endif /* _LINUX_SCHED_SIGNAL_H */
+
diff --git a/c_src/include/linux/sched/task.h b/c_src/include/linux/sched/task.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/linux/sched/task.h
diff --git a/c_src/include/linux/sched/task_stack.h b/c_src/include/linux/sched/task_stack.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/linux/sched/task_stack.h
diff --git a/c_src/include/linux/semaphore.h b/c_src/include/linux/semaphore.h
new file mode 100644
index 00000000..498e717a
--- /dev/null
+++ b/c_src/include/linux/semaphore.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Please see kernel/locking/semaphore.c for documentation of these functions
+ */
+#ifndef __LINUX_SEMAPHORE_H
+#define __LINUX_SEMAPHORE_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/* Please don't access any members of this structure directly */
+struct semaphore {
+ raw_spinlock_t lock;
+ unsigned int count;
+ struct list_head wait_list;
+};
+
+#define __SEMAPHORE_INITIALIZER(name, n) \
+{ \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \
+ .count = n, \
+ .wait_list = LIST_HEAD_INIT((name).wait_list), \
+}
+
+#define DEFINE_SEMAPHORE(name) \
+ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
+
+static inline void sema_init(struct semaphore *sem, int val)
+{
+ *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
+}
+
+extern void down(struct semaphore *sem);
+extern int __must_check down_interruptible(struct semaphore *sem);
+extern int __must_check down_killable(struct semaphore *sem);
+extern int __must_check down_trylock(struct semaphore *sem);
+extern int __must_check down_timeout(struct semaphore *sem, long);
+extern void up(struct semaphore *sem);
+
+#endif /* __LINUX_SEMAPHORE_H */
diff --git a/c_src/include/linux/seq_buf.h b/c_src/include/linux/seq_buf.h
new file mode 100644
index 00000000..8c9c0dd7
--- /dev/null
+++ b/c_src/include/linux/seq_buf.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SEQ_BUF_H
+#define _LINUX_SEQ_BUF_H
+
+#include <linux/kernel.h>
+#include <stdarg.h>
+#include <string.h>
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use.
+ */
+
+/**
+ * seq_buf - seq buffer structure
+ * @buffer: pointer to the buffer
+ * @size: size of the buffer
+ * @len: the amount of data inside the buffer
+ * @readpos: The next position to read in the buffer.
+ */
+struct seq_buf {
+ char *buffer;
+ size_t size;
+ size_t len;
+ loff_t readpos;
+};
+
+static inline void seq_buf_clear(struct seq_buf *s)
+{
+ s->len = 0;
+ s->readpos = 0;
+}
+
+static inline void
+seq_buf_init(struct seq_buf *s, char *buf, unsigned int size)
+{
+ s->buffer = buf;
+ s->size = size;
+ seq_buf_clear(s);
+}
+
+/*
+ * seq_buf have a buffer that might overflow. When this happens
+ * the len and size are set to be equal.
+ */
+static inline bool
+seq_buf_has_overflowed(struct seq_buf *s)
+{
+ return s->len > s->size;
+}
+
+static inline void
+seq_buf_set_overflow(struct seq_buf *s)
+{
+ s->len = s->size + 1;
+}
+
+/*
+ * How much buffer is left on the seq_buf?
+ */
+static inline unsigned int
+seq_buf_buffer_left(struct seq_buf *s)
+{
+ if (seq_buf_has_overflowed(s))
+ return 0;
+
+ return s->size - s->len;
+}
+
+/* How much buffer was written? */
+static inline unsigned int seq_buf_used(struct seq_buf *s)
+{
+ return min(s->len, s->size);
+}
+
+/**
+ * seq_buf_terminate - Make sure buffer is nul terminated
+ * @s: the seq_buf descriptor to terminate.
+ *
+ * This makes sure that the buffer in @s is nul terminated and
+ * safe to read as a string.
+ *
+ * Note, if this is called when the buffer has overflowed, then
+ * the last byte of the buffer is zeroed, and the len will still
+ * point passed it.
+ *
+ * After this function is called, s->buffer is safe to use
+ * in string operations.
+ */
+static inline void seq_buf_terminate(struct seq_buf *s)
+{
+ if (WARN_ON(s->size == 0))
+ return;
+
+ if (seq_buf_buffer_left(s))
+ s->buffer[s->len] = 0;
+ else
+ s->buffer[s->size - 1] = 0;
+}
+
+/**
+ * seq_buf_get_buf - get buffer to write arbitrary data to
+ * @s: the seq_buf handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
+{
+ WARN_ON(s->len > s->size + 1);
+
+ if (s->len < s->size) {
+ *bufp = s->buffer + s->len;
+ return s->size - s->len;
+ }
+
+ *bufp = NULL;
+ return 0;
+}
+
+/**
+ * seq_buf_commit - commit data to the buffer
+ * @s: the seq_buf handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get. To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_buf_commit(struct seq_buf *s, int num)
+{
+ if (num < 0) {
+ seq_buf_set_overflow(s);
+ } else {
+ /* num must be negative on overflow */
+ BUG_ON(s->len + num > s->size);
+ s->len += num;
+ }
+}
+
+extern __printf(2, 3)
+int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
+extern __printf(2, 0)
+int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args);
+extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf,
+ int cnt);
+extern int seq_buf_puts(struct seq_buf *s, const char *str);
+extern int seq_buf_putc(struct seq_buf *s, unsigned char c);
+
+void seq_buf_human_readable_u64(struct seq_buf *, u64);
+
+#endif /* _LINUX_SEQ_BUF_H */
diff --git a/c_src/include/linux/seq_file.h b/c_src/include/linux/seq_file.h
new file mode 100644
index 00000000..b455ebca
--- /dev/null
+++ b/c_src/include/linux/seq_file.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_SEQ_FILE_H
+#define _LINUX_SEQ_FILE_H
+
+#include <linux/types.h>
+#include <linux/fs.h>
+
+struct seq_file {
+ char *buf;
+ size_t size;
+ size_t from;
+ size_t count;
+ size_t pad_until;
+ loff_t index;
+ loff_t read_pos;
+ u64 version;
+ int poll_event;
+ const struct file *file;
+ void *private;
+};
+
+#endif
diff --git a/c_src/include/linux/seqlock.h b/c_src/include/linux/seqlock.h
new file mode 100644
index 00000000..435420fe
--- /dev/null
+++ b/c_src/include/linux/seqlock.h
@@ -0,0 +1,47 @@
+#ifndef __LINUX_SEQLOCK_H
+#define __LINUX_SEQLOCK_H
+
+#include <linux/compiler.h>
+
+typedef struct seqcount {
+ unsigned sequence;
+} seqcount_t;
+
+static inline void seqcount_init(seqcount_t *s)
+{
+ s->sequence = 0;
+}
+
+static inline unsigned read_seqcount_begin(const seqcount_t *s)
+{
+ unsigned ret;
+
+repeat:
+ ret = READ_ONCE(s->sequence);
+ if (unlikely(ret & 1)) {
+ cpu_relax();
+ goto repeat;
+ }
+ smp_rmb();
+ return ret;
+}
+
+static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
+{
+ smp_rmb();
+ return unlikely(s->sequence != start);
+}
+
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+ s->sequence++;
+ smp_wmb();
+}
+
+static inline void write_seqcount_end(seqcount_t *s)
+{
+ smp_wmb();
+ s->sequence++;
+}
+
+#endif /* __LINUX_SEQLOCK_H */
diff --git a/c_src/include/linux/shrinker.h b/c_src/include/linux/shrinker.h
new file mode 100644
index 00000000..d0a84794
--- /dev/null
+++ b/c_src/include/linux/shrinker.h
@@ -0,0 +1,35 @@
+#ifndef __TOOLS_LINUX_SHRINKER_H
+#define __TOOLS_LINUX_SHRINKER_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct shrink_control {
+ gfp_t gfp_mask;
+ unsigned long nr_to_scan;
+};
+
+#define SHRINK_STOP (~0UL)
+
+struct seq_buf;
+struct shrinker {
+ unsigned long (*count_objects)(struct shrinker *,
+ struct shrink_control *sc);
+ unsigned long (*scan_objects)(struct shrinker *,
+ struct shrink_control *sc);
+ void (*to_text)(struct seq_buf *, struct shrinker *);
+
+ int seeks; /* seeks to recreate an obj */
+ long batch; /* reclaim batch size, 0 = default */
+ struct list_head list;
+ void *private_data;
+};
+
+void shrinker_free(struct shrinker *);
+struct shrinker *shrinker_alloc(unsigned int, const char *, ...);
+
+int shrinker_register(struct shrinker *);
+
+void run_shrinkers(gfp_t gfp_mask, bool);
+
+#endif /* __TOOLS_LINUX_SHRINKER_H */
diff --git a/c_src/include/linux/siphash.h b/c_src/include/linux/siphash.h
new file mode 100644
index 00000000..bf21591a
--- /dev/null
+++ b/c_src/include/linux/siphash.h
@@ -0,0 +1,145 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#ifndef _LINUX_SIPHASH_H
+#define _LINUX_SIPHASH_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+#define SIPHASH_ALIGNMENT __alignof__(u64)
+typedef struct {
+ u64 key[2];
+} siphash_key_t;
+
+static inline bool siphash_key_is_zero(const siphash_key_t *key)
+{
+ return !(key->key[0] | key->key[1]);
+}
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
+#endif
+
+u64 siphash_1u64(const u64 a, const siphash_key_t *key);
+u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
+u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
+ const siphash_key_t *key);
+u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
+ const siphash_key_t *key);
+u64 siphash_1u32(const u32 a, const siphash_key_t *key);
+u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
+ const siphash_key_t *key);
+
+static inline u64 siphash_2u32(const u32 a, const u32 b,
+ const siphash_key_t *key)
+{
+ return siphash_1u64((u64)b << 32 | a, key);
+}
+static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
+ const u32 d, const siphash_key_t *key)
+{
+ return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
+}
+
+
+static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
+ const siphash_key_t *key)
+{
+ if (__builtin_constant_p(len) && len == 4)
+ return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
+ if (__builtin_constant_p(len) && len == 8)
+ return siphash_1u64(le64_to_cpu(data[0]), key);
+ if (__builtin_constant_p(len) && len == 16)
+ return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+ key);
+ if (__builtin_constant_p(len) && len == 24)
+ return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+ le64_to_cpu(data[2]), key);
+ if (__builtin_constant_p(len) && len == 32)
+ return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+ le64_to_cpu(data[2]), le64_to_cpu(data[3]),
+ key);
+ return __siphash_aligned(data, len, key);
+}
+
+/**
+ * siphash - compute 64-bit siphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the siphash key
+ */
+static inline u64 siphash(const void *data, size_t len,
+ const siphash_key_t *key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+ return __siphash_unaligned(data, len, key);
+#endif
+ return ___siphash_aligned(data, len, key);
+}
+
+#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
+typedef struct {
+ unsigned long key[2];
+} hsiphash_key_t;
+
+u32 __hsiphash_aligned(const void *data, size_t len,
+ const hsiphash_key_t *key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key);
+#endif
+
+u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
+u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
+u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
+ const hsiphash_key_t *key);
+u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
+ const hsiphash_key_t *key);
+
+static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ if (__builtin_constant_p(len) && len == 4)
+ return hsiphash_1u32(le32_to_cpu(data[0]), key);
+ if (__builtin_constant_p(len) && len == 8)
+ return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+ key);
+ if (__builtin_constant_p(len) && len == 12)
+ return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+ le32_to_cpu(data[2]), key);
+ if (__builtin_constant_p(len) && len == 16)
+ return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+ le32_to_cpu(data[2]), le32_to_cpu(data[3]),
+ key);
+ return __hsiphash_aligned(data, len, key);
+}
+
+/**
+ * hsiphash - compute 32-bit hsiphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the hsiphash key
+ */
+static inline u32 hsiphash(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+ return __hsiphash_unaligned(data, len, key);
+#endif
+ return ___hsiphash_aligned(data, len, key);
+}
+
+#endif /* _LINUX_SIPHASH_H */
diff --git a/c_src/include/linux/slab.h b/c_src/include/linux/slab.h
new file mode 100644
index 00000000..ca0c7934
--- /dev/null
+++ b/c_src/include/linux/slab.h
@@ -0,0 +1,272 @@
+#ifndef __TOOLS_LINUX_SLAB_H
+#define __TOOLS_LINUX_SLAB_H
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/overflow.h>
+#include <linux/page.h>
+#include <linux/shrinker.h>
+#include <linux/types.h>
+
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#define alloc_hooks(_do, ...) _do
+
+#define ARCH_KMALLOC_MINALIGN 16
+#define KMALLOC_MAX_SIZE SIZE_MAX
+
+static inline void *kmalloc_noprof(size_t size, gfp_t flags)
+{
+ unsigned i;
+ void *p;
+
+ for (i = 0; i < 10; i++) {
+ if (size) {
+ size_t alignment = min_t(size_t, PAGE_SIZE,
+ rounddown_pow_of_two(size));
+ alignment = max(sizeof(void *), alignment);
+ if (posix_memalign(&p, alignment, size))
+ p = NULL;
+ } else {
+ p = malloc(0);
+ }
+
+ if (p) {
+ if (flags & __GFP_ZERO)
+ memset(p, 0, size);
+ break;
+ }
+
+ run_shrinkers(flags, true);
+ }
+
+ return p;
+}
+#define kmalloc kmalloc_noprof
+
+static inline void *krealloc(void *old, size_t size, gfp_t flags)
+{
+ void *new;
+
+ new = kmalloc(size, flags);
+ if (!new)
+ return NULL;
+
+ if (flags & __GFP_ZERO)
+ memset(new, 0, size);
+
+ if (old) {
+ memcpy(new, old,
+ min(malloc_usable_size(old),
+ malloc_usable_size(new)));
+ free(old);
+ }
+
+ return new;
+}
+
+static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+ return NULL;
+
+ return krealloc(p, bytes, flags);
+}
+
+#define kzalloc(size, flags) kmalloc(size, flags|__GFP_ZERO)
+
+static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return NULL;
+ return kmalloc(bytes, flags);
+}
+
+#define kvmalloc_array(n, size, flags) \
+ ((size) != 0 && (n) > SIZE_MAX / (size) \
+ ? NULL : kmalloc((n) * (size), flags))
+
+#define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO)
+
+#define kfree(p) free(p)
+#define kzfree(p) free(p)
+
+#define kvmalloc(size, flags) kmalloc(size, flags)
+#define kvzalloc(size, flags) kzalloc(size, flags)
+#define kvfree(p) kfree(p)
+
+static inline struct page *alloc_pages_noprof(gfp_t flags, unsigned int order)
+{
+ size_t size = PAGE_SIZE << order;
+ unsigned i;
+ void *p;
+
+ for (i = 0; i < 10; i++) {
+ p = aligned_alloc(PAGE_SIZE, size);
+
+ if (p) {
+ if (flags & __GFP_ZERO)
+ memset(p, 0, size);
+ break;
+ }
+
+ run_shrinkers(flags, true);
+ }
+
+ return p;
+}
+#define alloc_pages alloc_pages_noprof
+
+#define alloc_page(gfp) alloc_pages(gfp, 0)
+
+#define _get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order))
+#define __get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order))
+#define get_free_pages_noprof(gfp, order) \
+ ((unsigned long) alloc_pages(gfp, order))
+#define __get_free_page(gfp) __get_free_pages(gfp, 0)
+
+#define __free_pages(page, order) \
+do { \
+ (void) order; \
+ free(page); \
+} while (0)
+
+#define free_pages(addr, order) \
+do { \
+ (void) order; \
+ free((void *) (addr)); \
+} while (0)
+
+#define __free_page(page) __free_pages((page), 0)
+#define free_page(addr) free_pages((addr), 0)
+
+#define VM_IOREMAP 0x00000001 /* ioremap() and friends */
+#define VM_ALLOC 0x00000002 /* vmalloc() */
+#define VM_MAP 0x00000004 /* vmap()ed pages */
+#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
+#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
+#define VM_NO_GUARD 0x00000040 /* don't add guard page */
+#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
+
+static inline void vunmap(const void *addr) {}
+
+static inline void *vmap(struct page **pages, unsigned int count,
+ unsigned long flags, unsigned prot)
+{
+ return NULL;
+}
+
+#define is_vmalloc_addr(page) 0
+
+#define vmalloc_to_page(addr) ((struct page *) (addr))
+
+static inline void *kmemdup(const void *src, size_t len, gfp_t gfp)
+{
+ void *p;
+
+ p = kmalloc(len, gfp);
+ if (p)
+ memcpy(p, src, len);
+ return p;
+}
+
+struct kmem_cache {
+ size_t obj_size;
+};
+
+static inline void *kmem_cache_alloc(struct kmem_cache *c, gfp_t gfp)
+{
+ return kmalloc(c->obj_size, gfp);
+}
+
+static inline void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t gfp)
+{
+ return kzalloc(c->obj_size, gfp);
+}
+
+static inline void kmem_cache_free(struct kmem_cache *c, void *p)
+{
+ kfree(p);
+}
+
+static inline void kmem_cache_destroy(struct kmem_cache *p)
+{
+ kfree(p);
+}
+
+static inline struct kmem_cache *kmem_cache_create(size_t obj_size)
+{
+ struct kmem_cache *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ p->obj_size = obj_size;
+ return p;
+}
+
+#define KMEM_CACHE(_struct, _flags) kmem_cache_create(sizeof(struct _struct))
+
+#define PAGE_KERNEL 0
+#define PAGE_KERNEL_EXEC 1
+
+#define vfree(p) free(p)
+
+static inline void *__vmalloc_noprof(unsigned long size, gfp_t flags)
+{
+ unsigned i;
+ void *p;
+
+ size = round_up(size, PAGE_SIZE);
+
+ for (i = 0; i < 10; i++) {
+ p = aligned_alloc(PAGE_SIZE, size);
+
+ if (p) {
+ if (flags & __GFP_ZERO)
+ memset(p, 0, size);
+ break;
+ }
+
+ run_shrinkers(flags, true);
+ }
+
+ return p;
+}
+#define __vmalloc __vmalloc_noprof
+
+static inline void *vmalloc_exec(unsigned long size, gfp_t gfp_mask)
+{
+ void *p;
+
+ p = __vmalloc(size, gfp_mask);
+ if (!p)
+ return NULL;
+
+ if (mprotect(p, size, PROT_READ|PROT_WRITE|PROT_EXEC)) {
+ vfree(p);
+ return NULL;
+ }
+
+ return p;
+}
+
+static inline void *vmalloc(unsigned long size)
+{
+ return __vmalloc(size, GFP_KERNEL);
+}
+
+static inline void *vzalloc(unsigned long size)
+{
+ return __vmalloc(size, GFP_KERNEL|__GFP_ZERO);
+}
+
+#endif /* __TOOLS_LINUX_SLAB_H */
diff --git a/c_src/include/linux/sort.h b/c_src/include/linux/sort.h
new file mode 100644
index 00000000..afea0445
--- /dev/null
+++ b/c_src/include/linux/sort.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_SORT_H
+#define _LINUX_SORT_H
+
+#include <stdlib.h>
+
+static inline void sort(void *base, size_t num, size_t size,
+ int (*cmp_func)(const void *, const void *),
+ void (*swap_func)(void *, void *, int size))
+{
+ return qsort(base, num, size, cmp_func);
+}
+
+#endif
diff --git a/c_src/include/linux/spinlock.h b/c_src/include/linux/spinlock.h
new file mode 100644
index 00000000..6c4a623c
--- /dev/null
+++ b/c_src/include/linux/spinlock.h
@@ -0,0 +1,65 @@
+#ifndef __TOOLS_LINUX_SPINLOCK_H
+#define __TOOLS_LINUX_SPINLOCK_H
+
+#include <linux/atomic.h>
+#include <pthread.h>
+
+typedef struct {
+ pthread_mutex_t lock;
+} raw_spinlock_t;
+
+#define __RAW_SPIN_LOCK_UNLOCKED(name) (raw_spinlock_t) { .lock = PTHREAD_MUTEX_INITIALIZER }
+
+static inline void raw_spin_lock_init(raw_spinlock_t *lock)
+{
+ pthread_mutex_init(&lock->lock, NULL);
+}
+
+static inline bool raw_spin_trylock(raw_spinlock_t *lock)
+{
+ return !pthread_mutex_trylock(&lock->lock);
+}
+
+static inline void raw_spin_lock(raw_spinlock_t *lock)
+{
+ pthread_mutex_lock(&lock->lock);
+}
+
+static inline void raw_spin_unlock(raw_spinlock_t *lock)
+{
+ pthread_mutex_unlock(&lock->lock);
+}
+
+#define raw_spin_lock_irq(lock) raw_spin_lock(lock)
+#define raw_spin_unlock_irq(lock) raw_spin_unlock(lock)
+
+#define raw_spin_lock_irqsave(lock, flags) \
+do { \
+ flags = 0; \
+ raw_spin_lock(lock); \
+} while (0)
+
+#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock)
+
+typedef raw_spinlock_t spinlock_t;
+
+#define __SPIN_LOCK_UNLOCKED(name) __RAW_SPIN_LOCK_UNLOCKED(name)
+
+#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
+
+#define spin_lock_init(lock) raw_spin_lock_init(lock)
+#define spin_lock(lock) raw_spin_lock(lock)
+#define spin_unlock(lock) raw_spin_unlock(lock)
+
+#define spin_lock_nested(lock, n) spin_lock(lock)
+
+#define spin_lock_bh(lock) raw_spin_lock(lock)
+#define spin_unlock_bh(lock) raw_spin_unlock(lock)
+
+#define spin_lock_irq(lock) raw_spin_lock(lock)
+#define spin_unlock_irq(lock) raw_spin_unlock(lock)
+
+#define spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags)
+#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags)
+
+#endif /* __TOOLS_LINUX_SPINLOCK_H */
diff --git a/c_src/include/linux/srcu.h b/c_src/include/linux/srcu.h
new file mode 100644
index 00000000..75823cf2
--- /dev/null
+++ b/c_src/include/linux/srcu.h
@@ -0,0 +1,31 @@
+#ifndef __TOOLS_LINUX_SRCU_H
+#define __TOOLS_LINUX_SRCU_H
+
+struct srcu_struct {
+};
+
+static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) {}
+
+static inline int srcu_read_lock(struct srcu_struct *ssp)
+{
+ return 0;
+}
+
+static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
+{
+ return false;
+}
+
+static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
+{
+ return 0;
+}
+
+static inline void cleanup_srcu_struct(struct srcu_struct *ssp) {}
+
+static inline int init_srcu_struct(struct srcu_struct *ssp)
+{
+ return 0;
+}
+
+#endif /* __TOOLS_LINUX_SRCU_H */
diff --git a/c_src/include/linux/stat.h b/c_src/include/linux/stat.h
new file mode 100644
index 00000000..1a30957b
--- /dev/null
+++ b/c_src/include/linux/stat.h
@@ -0,0 +1,12 @@
+#ifndef _LINUX_STAT_H
+#define _LINUX_STAT_H
+
+#include <sys/stat.h>
+
+#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
+#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH)
+#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH)
+#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
+
+#endif
diff --git a/c_src/include/linux/string.h b/c_src/include/linux/string.h
new file mode 100644
index 00000000..3ceda3a3
--- /dev/null
+++ b/c_src/include/linux/string.h
@@ -0,0 +1,17 @@
+#ifndef _TOOLS_LINUX_STRING_H_
+#define _TOOLS_LINUX_STRING_H_
+
+#include <stdlib.h>
+#include <string.h>
+#include <linux/types.h> /* for size_t */
+
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+extern ssize_t strscpy(char *dest, const char *src, size_t count);
+extern char *strim(char *);
+extern void memzero_explicit(void *, size_t);
+int match_string(const char * const *, size_t, const char *);
+
+#define kstrndup(s, n, gfp) strndup(s, n)
+#define kstrdup(s, gfp) strdup(s)
+
+#endif /* _LINUX_STRING_H_ */
diff --git a/c_src/include/linux/string_helpers.h b/c_src/include/linux/string_helpers.h
new file mode 100644
index 00000000..af587706
--- /dev/null
+++ b/c_src/include/linux/string_helpers.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STRING_HELPERS_H_
+#define _LINUX_STRING_HELPERS_H_
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+
+/* Descriptions of the types of units to
+ * print in */
+enum string_size_units {
+ STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
+ STRING_UNITS_2, /* use binary powers of 2^10 */
+};
+
+int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
+ char *buf, int len);
+
+#endif
diff --git a/c_src/include/linux/sysfs.h b/c_src/include/linux/sysfs.h
new file mode 100644
index 00000000..cb75d88b
--- /dev/null
+++ b/c_src/include/linux/sysfs.h
@@ -0,0 +1,38 @@
+#ifndef _SYSFS_H_
+#define _SYSFS_H_
+
+#include <linux/compiler.h>
+
+struct kobject;
+
+struct attribute {
+ const char *name;
+ umode_t mode;
+};
+
+struct attribute_group {
+ struct attribute **attrs;
+};
+
+struct sysfs_ops {
+ ssize_t (*show)(struct kobject *, struct attribute *, char *);
+ ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+};
+
+static inline int sysfs_create_files(struct kobject *kobj,
+ const struct attribute **attr)
+{
+ return 0;
+}
+
+static inline int sysfs_create_link(struct kobject *kobj,
+ struct kobject *target, const char *name)
+{
+ return 0;
+}
+
+static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
+{
+}
+
+#endif /* _SYSFS_H_ */
diff --git a/c_src/include/linux/time64.h b/c_src/include/linux/time64.h
new file mode 100644
index 00000000..cd6cc1c1
--- /dev/null
+++ b/c_src/include/linux/time64.h
@@ -0,0 +1,51 @@
+#ifndef _LINUX_TIME64_H
+#define _LINUX_TIME64_H
+
+#include <linux/types.h>
+
+#define timespec64 timespec
+
+typedef __s64 time64_t;
+
+/* Parameters used to convert the timespec values: */
+#define MSEC_PER_SEC 1000L
+#define USEC_PER_MSEC 1000L
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+#define FSEC_PER_SEC 1000000000000000LL
+
+static inline struct timespec ns_to_timespec(const u64 nsec)
+{
+ return (struct timespec) {
+ .tv_sec = nsec / NSEC_PER_SEC,
+ .tv_nsec = nsec % NSEC_PER_SEC,
+ };
+}
+
+static inline s64 timespec_to_ns(const struct timespec *ts)
+{
+ return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+ /* Avoid division in the common cases 1 ns and 1 s. */
+ if (gran == 1) {
+ /* nothing */
+ } else if (gran == NSEC_PER_SEC) {
+ t.tv_nsec = 0;
+ } else if (gran > 1 && gran < NSEC_PER_SEC) {
+ t.tv_nsec -= t.tv_nsec % gran;
+ } else {
+ WARN(1, "illegal file time granularity: %u", gran);
+ }
+ return t;
+}
+
+#define ns_to_timespec64 ns_to_timespec
+#define timespec64_to_ns timespec_to_ns
+#define timespec64_trunc timespec_trunc
+
+#endif /* _LINUX_TIME64_H */
diff --git a/c_src/include/linux/timer.h b/c_src/include/linux/timer.h
new file mode 100644
index 00000000..9667acf9
--- /dev/null
+++ b/c_src/include/linux/timer.h
@@ -0,0 +1,46 @@
+#ifndef __TOOLS_LINUX_TIMER_H
+#define __TOOLS_LINUX_TIMER_H
+
+#include <string.h>
+#include <linux/types.h>
+
+struct timer_list {
+ unsigned long expires;
+ void (*function)(struct timer_list *timer);
+ bool pending;
+};
+
+static inline void timer_setup(struct timer_list *timer,
+ void (*func)(struct timer_list *),
+ unsigned int flags)
+{
+ memset(timer, 0, sizeof(*timer));
+ timer->function = func;
+}
+
+#define timer_setup_on_stack(timer, callback, flags) \
+ timer_setup(timer, callback, flags)
+
+#define destroy_timer_on_stack(timer) do {} while (0)
+
+static inline int timer_pending(const struct timer_list *timer)
+{
+ return timer->pending;
+}
+
+int del_timer(struct timer_list * timer);
+int del_timer_sync(struct timer_list *timer);
+
+#define del_singleshot_timer_sync(timer) del_timer_sync(timer)
+
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+static inline void add_timer(struct timer_list *timer)
+{
+ BUG_ON(timer_pending(timer));
+ mod_timer(timer, timer->expires);
+}
+
+void flush_timers(void);
+
+#endif /* __TOOLS_LINUX_TIMER_H */
diff --git a/c_src/include/linux/tracepoint.h b/c_src/include/linux/tracepoint.h
new file mode 100644
index 00000000..1686cb90
--- /dev/null
+++ b/c_src/include/linux/tracepoint.h
@@ -0,0 +1,62 @@
+#ifndef __TOOLS_LINUX_TRACEPOINT_H
+#define __TOOLS_LINUX_TRACEPOINT_H
+
+#define PARAMS(args...) args
+
+#define TP_PROTO(args...) args
+#define TP_ARGS(args...) args
+#define TP_CONDITION(args...) args
+
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
+ static inline void trace_##name(proto) \
+ { } \
+ static inline void trace_##name##_rcuidle(proto) \
+ { } \
+ static inline int \
+ register_trace_##name(void (*probe)(data_proto), \
+ void *data) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline int \
+ unregister_trace_##name(void (*probe)(data_proto), \
+ void *data) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
+ { \
+ } \
+ static inline bool \
+ trace_##name##_enabled(void) \
+ { \
+ return false; \
+ }
+
+#define DEFINE_TRACE_FN(name, reg, unreg)
+#define DEFINE_TRACE(name)
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
+#define EXPORT_TRACEPOINT_SYMBOL(name)
+
+#define DECLARE_TRACE_NOARGS(name) \
+ __DECLARE_TRACE(name, void, , \
+ cpu_online(raw_smp_processor_id()), \
+ void *__data, __data)
+
+#define DECLARE_TRACE(name, proto, args) \
+ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
+ cpu_online(raw_smp_processor_id()), \
+ PARAMS(void *__data, proto), \
+ PARAMS(__data, args))
+
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
+#define DEFINE_EVENT(template, name, proto, args) \
+ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
+ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT(name, proto, args, struct, assign, print) \
+ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
+#endif /* __TOOLS_LINUX_TRACEPOINT_H */
diff --git a/c_src/include/linux/typecheck.h b/c_src/include/linux/typecheck.h
new file mode 100644
index 00000000..eb5b74a5
--- /dev/null
+++ b/c_src/include/linux/typecheck.h
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({ type __dummy; \
+ typeof(x) __dummy2; \
+ (void)(&__dummy == &__dummy2); \
+ 1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({ typeof(type) __tmp = function; \
+ (void)__tmp; \
+})
+
+#endif /* TYPECHECK_H_INCLUDED */
diff --git a/c_src/include/linux/types.h b/c_src/include/linux/types.h
new file mode 100644
index 00000000..ce454e26
--- /dev/null
+++ b/c_src/include/linux/types.h
@@ -0,0 +1,87 @@
+#ifndef _TOOLS_LINUX_TYPES_H_
+#define _TOOLS_LINUX_TYPES_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
+#include <asm/types.h>
+
+#include <linux/cache.h>
+
+#define BITS_PER_LONG __BITS_PER_LONG
+
+struct page;
+struct kmem_cache;
+
+typedef unsigned long pgoff_t;
+
+typedef unsigned short umode_t;
+
+typedef unsigned gfp_t;
+
+#define GFP_ATOMIC 0
+#define GFP_NOFS 0
+#define GFP_NOIO 0
+#define GFP_NOWAIT 0
+#define __GFP_FS 0
+#define __GFP_IO 0
+#define __GFP_NOWARN 0
+#define __GFP_NORETRY 0
+#define __GFP_NOFAIL 0
+#define __GFP_ZERO 1
+#define GFP_KERNEL 2
+
+#define PAGE_ALLOC_COSTLY_ORDER 6
+
+typedef __u64 u64;
+typedef __s64 s64;
+typedef __u32 u32;
+typedef __s32 s32;
+typedef __u16 u16;
+typedef __s16 s16;
+typedef __u8 u8;
+typedef __s8 s8;
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#ifdef __CHECK_ENDIAN__
+#define __bitwise __bitwise__
+#else
+#define __bitwise
+#endif
+
+#define __force
+#define __user
+#define __must_check
+#define __cold
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
+typedef u64 sector_t;
+
+typedef int (*cmp_func_t)(const void *a, const void *b);
+
+typedef unsigned int __bitwise slab_flags_t;
+typedef u64 phys_addr_t;
+struct vm_struct;
+struct mnt_idmap;
+
+#endif /* _TOOLS_LINUX_TYPES_H_ */
diff --git a/c_src/include/linux/unaligned/be_byteshift.h b/c_src/include/linux/unaligned/be_byteshift.h
new file mode 100644
index 00000000..9356b242
--- /dev/null
+++ b/c_src/include/linux/unaligned/be_byteshift.h
@@ -0,0 +1,70 @@
+#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
+#define _LINUX_UNALIGNED_BE_BYTESHIFT_H
+
+#include <linux/types.h>
+
+static inline u16 __get_unaligned_be16(const u8 *p)
+{
+ return p[0] << 8 | p[1];
+}
+
+static inline u32 __get_unaligned_be32(const u8 *p)
+{
+ return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static inline u64 __get_unaligned_be64(const u8 *p)
+{
+ return (u64)__get_unaligned_be32(p) << 32 |
+ __get_unaligned_be32(p + 4);
+}
+
+static inline void __put_unaligned_be16(u16 val, u8 *p)
+{
+ *p++ = val >> 8;
+ *p++ = val;
+}
+
+static inline void __put_unaligned_be32(u32 val, u8 *p)
+{
+ __put_unaligned_be16(val >> 16, p);
+ __put_unaligned_be16(val, p + 2);
+}
+
+static inline void __put_unaligned_be64(u64 val, u8 *p)
+{
+ __put_unaligned_be32(val >> 32, p);
+ __put_unaligned_be32(val, p + 4);
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return __get_unaligned_be16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return __get_unaligned_be32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return __get_unaligned_be64((const u8 *)p);
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ __put_unaligned_be16(val, p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ __put_unaligned_be32(val, p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ __put_unaligned_be64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */
diff --git a/c_src/include/linux/unaligned/be_struct.h b/c_src/include/linux/unaligned/be_struct.h
new file mode 100644
index 00000000..13241583
--- /dev/null
+++ b/c_src/include/linux/unaligned/be_struct.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_UNALIGNED_BE_STRUCT_H
+#define _LINUX_UNALIGNED_BE_STRUCT_H
+
+#include <linux/unaligned/packed_struct.h>
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return __get_unaligned_cpu16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return __get_unaligned_cpu32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return __get_unaligned_cpu64((const u8 *)p);
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ __put_unaligned_cpu16(val, p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ __put_unaligned_cpu32(val, p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ __put_unaligned_cpu64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */
diff --git a/c_src/include/linux/unaligned/generic.h b/c_src/include/linux/unaligned/generic.h
new file mode 100644
index 00000000..02d97ff3
--- /dev/null
+++ b/c_src/include/linux/unaligned/generic.h
@@ -0,0 +1,68 @@
+#ifndef _LINUX_UNALIGNED_GENERIC_H
+#define _LINUX_UNALIGNED_GENERIC_H
+
+/*
+ * Cause a link-time error if we try an unaligned access other than
+ * 1,2,4 or 8 bytes long
+ */
+extern void __bad_unaligned_access_size(void);
+
+#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \
+ __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \
+ __bad_unaligned_access_size())))); \
+ }))
+
+#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \
+ __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \
+ __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \
+ __bad_unaligned_access_size())))); \
+ }))
+
+#define __put_unaligned_le(val, ptr) ({ \
+ void *__gu_p = (ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ *(u8 *)__gu_p = (__force u8)(val); \
+ break; \
+ case 2: \
+ put_unaligned_le16((__force u16)(val), __gu_p); \
+ break; \
+ case 4: \
+ put_unaligned_le32((__force u32)(val), __gu_p); \
+ break; \
+ case 8: \
+ put_unaligned_le64((__force u64)(val), __gu_p); \
+ break; \
+ default: \
+ __bad_unaligned_access_size(); \
+ break; \
+ } \
+ (void)0; })
+
+#define __put_unaligned_be(val, ptr) ({ \
+ void *__gu_p = (ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ *(u8 *)__gu_p = (__force u8)(val); \
+ break; \
+ case 2: \
+ put_unaligned_be16((__force u16)(val), __gu_p); \
+ break; \
+ case 4: \
+ put_unaligned_be32((__force u32)(val), __gu_p); \
+ break; \
+ case 8: \
+ put_unaligned_be64((__force u64)(val), __gu_p); \
+ break; \
+ default: \
+ __bad_unaligned_access_size(); \
+ break; \
+ } \
+ (void)0; })
+
+#endif /* _LINUX_UNALIGNED_GENERIC_H */
diff --git a/c_src/include/linux/unaligned/le_byteshift.h b/c_src/include/linux/unaligned/le_byteshift.h
new file mode 100644
index 00000000..be376fb7
--- /dev/null
+++ b/c_src/include/linux/unaligned/le_byteshift.h
@@ -0,0 +1,70 @@
+#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
+#define _LINUX_UNALIGNED_LE_BYTESHIFT_H
+
+#include <linux/types.h>
+
+static inline u16 __get_unaligned_le16(const u8 *p)
+{
+ return p[0] | p[1] << 8;
+}
+
+static inline u32 __get_unaligned_le32(const u8 *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+}
+
+static inline u64 __get_unaligned_le64(const u8 *p)
+{
+ return (u64)__get_unaligned_le32(p + 4) << 32 |
+ __get_unaligned_le32(p);
+}
+
+static inline void __put_unaligned_le16(u16 val, u8 *p)
+{
+ *p++ = val;
+ *p++ = val >> 8;
+}
+
+static inline void __put_unaligned_le32(u32 val, u8 *p)
+{
+ __put_unaligned_le16(val >> 16, p + 2);
+ __put_unaligned_le16(val, p);
+}
+
+static inline void __put_unaligned_le64(u64 val, u8 *p)
+{
+ __put_unaligned_le32(val >> 32, p + 4);
+ __put_unaligned_le32(val, p);
+}
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+ return __get_unaligned_le16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return __get_unaligned_le32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return __get_unaligned_le64((const u8 *)p);
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ __put_unaligned_le16(val, p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_le32(val, p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ __put_unaligned_le64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */
diff --git a/c_src/include/linux/unaligned/le_struct.h b/c_src/include/linux/unaligned/le_struct.h
new file mode 100644
index 00000000..088c4572
--- /dev/null
+++ b/c_src/include/linux/unaligned/le_struct.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_UNALIGNED_LE_STRUCT_H
+#define _LINUX_UNALIGNED_LE_STRUCT_H
+
+#include <linux/unaligned/packed_struct.h>
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+ return __get_unaligned_cpu16((const u8 *)p);
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return __get_unaligned_cpu32((const u8 *)p);
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return __get_unaligned_cpu64((const u8 *)p);
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ __put_unaligned_cpu16(val, p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_cpu32(val, p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ __put_unaligned_cpu64(val, p);
+}
+
+#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */
diff --git a/c_src/include/linux/unaligned/packed_struct.h b/c_src/include/linux/unaligned/packed_struct.h
new file mode 100644
index 00000000..c0d817de
--- /dev/null
+++ b/c_src/include/linux/unaligned/packed_struct.h
@@ -0,0 +1,46 @@
+#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
+#define _LINUX_UNALIGNED_PACKED_STRUCT_H
+
+#include <linux/kernel.h>
+
+struct __una_u16 { u16 x; } __packed;
+struct __una_u32 { u32 x; } __packed;
+struct __una_u64 { u64 x; } __packed;
+
+static inline u16 __get_unaligned_cpu16(const void *p)
+{
+ const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+ return ptr->x;
+}
+
+static inline u32 __get_unaligned_cpu32(const void *p)
+{
+ const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+ return ptr->x;
+}
+
+static inline u64 __get_unaligned_cpu64(const void *p)
+{
+ const struct __una_u64 *ptr = (const struct __una_u64 *)p;
+ return ptr->x;
+}
+
+static inline void __put_unaligned_cpu16(u16 val, void *p)
+{
+ struct __una_u16 *ptr = (struct __una_u16 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu32(u32 val, void *p)
+{
+ struct __una_u32 *ptr = (struct __una_u32 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu64(u64 val, void *p)
+{
+ struct __una_u64 *ptr = (struct __una_u64 *)p;
+ ptr->x = val;
+}
+
+#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */
diff --git a/c_src/include/linux/uuid.h b/c_src/include/linux/uuid.h
new file mode 100644
index 00000000..a9990902
--- /dev/null
+++ b/c_src/include/linux/uuid.h
@@ -0,0 +1,41 @@
+/*
+ * UUID/GUID definition
+ *
+ * Copyright (C) 2010, 2016 Intel Corp.
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef _LINUX_UUID_H_
+#define _LINUX_UUID_H_
+
+#include <string.h>
+#include <asm/types.h>
+#include <stdbool.h>
+
+#define UUID_SIZE 16
+
+typedef struct {
+ __u8 b[UUID_SIZE];
+} __uuid_t;
+
+#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+((__uuid_t) \
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+ ((b) >> 8) & 0xff, (b) & 0xff, \
+ ((c) >> 8) & 0xff, (c) & 0xff, \
+ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+static inline bool uuid_equal(const __uuid_t *u1, const __uuid_t *u2)
+{
+ return memcmp(u1, u2, sizeof(__uuid_t)) == 0;
+}
+
+#endif
diff --git a/c_src/include/linux/vmalloc.h b/c_src/include/linux/vmalloc.h
new file mode 100644
index 00000000..55fffb59
--- /dev/null
+++ b/c_src/include/linux/vmalloc.h
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_LINUX_VMALLOC_H
+#define __TOOLS_LINUX_VMALLOC_H
+
+#include "linux/slab.h"
+
+#endif /* __TOOLS_LINUX_VMALLOC_H */
diff --git a/c_src/include/linux/wait.h b/c_src/include/linux/wait.h
new file mode 100644
index 00000000..4b9cbf38
--- /dev/null
+++ b/c_src/include/linux/wait.h
@@ -0,0 +1,138 @@
+#ifndef _LINUX_WAIT_H
+#define _LINUX_WAIT_H
+
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+typedef struct __wait_queue wait_queue_t;
+typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
+
+#define WQ_FLAG_EXCLUSIVE 0x01
+
+struct __wait_queue {
+ unsigned int flags;
+ void *private;
+ wait_queue_func_t func;
+ struct list_head task_list;
+};
+
+struct wait_queue_head {
+ spinlock_t lock;
+ struct list_head task_list;
+};
+
+typedef struct wait_queue_head wait_queue_head_t;
+
+void wake_up(wait_queue_head_t *);
+void wake_up_all(wait_queue_head_t *);
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+
+#define DECLARE_WAITQUEUE(name, tsk) \
+ wait_queue_t name = { \
+ .private = tsk, \
+ .func = default_wake_function, \
+ .task_list = { NULL, NULL } \
+ }
+
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
+ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
+ .task_list = { &(name).task_list, &(name).task_list } }
+
+#define DECLARE_WAIT_QUEUE_HEAD(name) \
+ struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
+
+static inline void init_waitqueue_head(wait_queue_head_t *q)
+{
+ spin_lock_init(&q->lock);
+ INIT_LIST_HEAD(&q->task_list);
+}
+
+#define DEFINE_WAIT(name) \
+ wait_queue_t name = { \
+ .private = current, \
+ .func = autoremove_wake_function, \
+ .task_list = LIST_HEAD_INIT((name).task_list), \
+ }
+
+#define ___wait_cond_timeout(condition) \
+({ \
+ bool __cond = (condition); \
+ if (__cond && !__ret) \
+ __ret = 1; \
+ __cond || !__ret; \
+})
+
+#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \
+({ \
+ DEFINE_WAIT(__wait); \
+ long __ret = ret; \
+ \
+ for (;;) { \
+ prepare_to_wait(&wq, &__wait, state); \
+ if (condition) \
+ break; \
+ cmd; \
+ } \
+ finish_wait(&wq, &__wait); \
+ __ret; \
+})
+
+#define __wait_event(wq, condition) \
+ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+ schedule())
+
+#define wait_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event(wq, condition); \
+} while (0)
+
+#define wait_event_freezable(wq, condition) ({wait_event(wq, condition); 0; })
+#define wait_event_killable(wq, condition) ({wait_event(wq, condition); 0; })
+#define wait_event_interruptible(wq, condition) ({wait_event(wq, condition); 0; })
+
+#define __wait_event_timeout(wq, condition, timeout) \
+ ___wait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_UNINTERRUPTIBLE, 0, timeout, \
+ __ret = schedule_timeout(__ret))
+
+#define wait_event_timeout(wq, condition, timeout) \
+({ \
+ long __ret = timeout; \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_timeout(wq, condition, timeout); \
+ __ret; \
+})
+
+void wake_up_bit(void *, int);
+void __wait_on_bit(void *, int, unsigned);
+void __wait_on_bit_lock(void *, int, unsigned);
+
+static inline int
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
+{
+ if (!test_bit(bit, word))
+ return 0;
+ __wait_on_bit(word, bit, mode);
+ return 0;
+}
+
+static inline int
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
+{
+ if (!test_and_set_bit(bit, word))
+ return 0;
+ __wait_on_bit_lock(word, bit, mode);
+ return 0;
+}
+
+#define wait_on_bit_io(w, b, m) wait_on_bit(w, b, m)
+#define wait_on_bit_lock_io(w, b, m) wait_on_bit_lock(w, b, m)
+
+#endif /* _LINUX_WAIT_H */
diff --git a/c_src/include/linux/workqueue.h b/c_src/include/linux/workqueue.h
new file mode 100644
index 00000000..1406c958
--- /dev/null
+++ b/c_src/include/linux/workqueue.h
@@ -0,0 +1,185 @@
+#ifndef __TOOLS_LINUX_WORKQUEUE_H
+#define __TOOLS_LINUX_WORKQUEUE_H
+
+#include <linux/list.h>
+#include <linux/timer.h>
+
+struct task_struct;
+struct workqueue_struct;
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(struct timer_list *);
+
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
+
+#if 0
+enum {
+ //WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
+ //WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
+ //
+ //WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
+ //WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
+};
+#endif
+
+struct work_struct {
+ atomic_long_t data;
+ struct list_head entry;
+ work_func_t func;
+};
+
+#define INIT_WORK(_work, _func) \
+do { \
+ (_work)->data.counter = 0; \
+ INIT_LIST_HEAD(&(_work)->entry); \
+ (_work)->func = (_func); \
+} while (0)
+
+struct delayed_work {
+ struct work_struct work;
+ struct timer_list timer;
+ struct workqueue_struct *wq;
+};
+
+#define INIT_DELAYED_WORK(_work, _func) \
+ do { \
+ INIT_WORK(&(_work)->work, (_func)); \
+ timer_setup(&(_work)->timer, delayed_work_timer_fn, 0); \
+ } while (0)
+
+static inline struct delayed_work *to_delayed_work(struct work_struct *work)
+{
+ return container_of(work, struct delayed_work, work);
+}
+
+enum {
+ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
+ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */
+ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
+ WQ_HIGHPRI = 1 << 4, /* high priority */
+ WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */
+ WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
+
+ /*
+ * Per-cpu workqueues are generally preferred because they tend to
+ * show better performance thanks to cache locality. Per-cpu
+ * workqueues exclude the scheduler from choosing the CPU to
+ * execute the worker threads, which has an unfortunate side effect
+ * of increasing power consumption.
+ *
+ * The scheduler considers a CPU idle if it doesn't have any task
+ * to execute and tries to keep idle cores idle to conserve power;
+ * however, for example, a per-cpu work item scheduled from an
+ * interrupt handler on an idle CPU will force the scheduler to
+ * excute the work item on that CPU breaking the idleness, which in
+ * turn may lead to more scheduling choices which are sub-optimal
+ * in terms of power consumption.
+ *
+ * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
+ * but become unbound if workqueue.power_efficient kernel param is
+ * specified. Per-cpu workqueues which are identified to
+ * contribute significantly to power-consumption are identified and
+ * marked with this flag and enabling the power_efficient mode
+ * leads to noticeable power saving at the cost of small
+ * performance disadvantage.
+ *
+ * http://thread.gmane.org/gmane.linux.kernel/1480396
+ */
+ WQ_POWER_EFFICIENT = 1 << 7,
+
+ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
+ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
+ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
+
+ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
+ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
+ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
+};
+
+/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
+#define WQ_UNBOUND_MAX_ACTIVE WQ_MAX_ACTIVE
+
+extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_highpri_wq;
+extern struct workqueue_struct *system_long_wq;
+extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_freezable_wq;
+
+extern struct workqueue_struct *
+alloc_workqueue(const char *fmt, unsigned int flags,
+ int max_active, ...) __printf(1, 4);
+
+#define alloc_ordered_workqueue(fmt, flags, args...) \
+ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
+
+#define create_workqueue(name) \
+ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
+#define create_freezable_workqueue(name) \
+ alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \
+ WQ_MEM_RECLAIM, 1, (name))
+#define create_singlethread_workqueue(name) \
+ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
+
+extern void destroy_workqueue(struct workqueue_struct *wq);
+
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
+void free_workqueue_attrs(struct workqueue_attrs *attrs);
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+ const struct workqueue_attrs *attrs);
+
+extern bool queue_work(struct workqueue_struct *wq,
+ struct work_struct *work);
+extern bool queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *work, unsigned long delay);
+extern bool mod_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork, unsigned long delay);
+
+extern void flush_workqueue(struct workqueue_struct *wq);
+extern void drain_workqueue(struct workqueue_struct *wq);
+
+extern int schedule_on_each_cpu(work_func_t func);
+
+extern bool flush_work(struct work_struct *work);
+extern bool cancel_work_sync(struct work_struct *work);
+
+extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
+
+extern void workqueue_set_max_active(struct workqueue_struct *wq,
+ int max_active);
+extern bool current_is_workqueue_rescuer(void);
+extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
+extern unsigned int work_busy(struct work_struct *work);
+extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
+extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+extern void show_workqueue_state(void);
+
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+ return queue_work(system_wq, work);
+}
+
+static inline bool schedule_work(struct work_struct *work)
+{
+ return queue_work(system_wq, work);
+}
+
+static inline void flush_scheduled_work(void)
+{
+ flush_workqueue(system_wq);
+}
+
+static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+ unsigned long delay)
+{
+ return queue_delayed_work(system_wq, dwork, delay);
+}
+
+static inline bool schedule_delayed_work(struct delayed_work *dwork,
+ unsigned long delay)
+{
+ return queue_delayed_work(system_wq, dwork, delay);
+}
+
+#endif /* __TOOLS_LINUX_WORKQUEUE_H */
diff --git a/c_src/include/linux/xattr.h b/c_src/include/linux/xattr.h
new file mode 100644
index 00000000..dcdff6e8
--- /dev/null
+++ b/c_src/include/linux/xattr.h
@@ -0,0 +1,78 @@
+/*
+ File: linux/xattr.h
+
+ Extended attributes handling.
+
+ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
+ Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
+ Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+*/
+#ifndef _LINUX_XATTR_H
+#define _LINUX_XATTR_H
+
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/xattr.h>
+
+#ifndef XATTR_CREATE
+#define XATTR_CREATE 0x1
+#endif
+
+#ifndef XATTR_REPLACE
+#define XATTR_REPLACE 0x2
+#endif
+
+struct inode;
+struct dentry;
+struct user_namespace;
+
+/*
+ * struct xattr_handler: When @name is set, match attributes with exactly that
+ * name. When @prefix is set instead, match attributes with that prefix and
+ * with a non-empty suffix.
+ */
+struct xattr_handler {
+ const char *name;
+ const char *prefix;
+ int flags; /* fs private flags */
+ bool (*list)(struct dentry *dentry);
+ int (*get)(const struct xattr_handler *, struct dentry *dentry,
+ struct inode *inode, const char *name, void *buffer,
+ size_t size);
+ int (*set)(const struct xattr_handler *,
+ struct mnt_idmap *idmap, struct dentry *dentry,
+ struct inode *inode, const char *name, const void *buffer,
+ size_t size, int flags);
+};
+
+const char *xattr_full_name(const struct xattr_handler *, const char *);
+
+struct xattr {
+ const char *name;
+ void *value;
+ size_t value_len;
+};
+
+ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
+ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
+ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
+int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
+int vfs_removexattr(struct dentry *, const char *);
+
+ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size);
+ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
+int generic_setxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags);
+int generic_removexattr(struct dentry *dentry, const char *name);
+ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name,
+ char **xattr_value, size_t size, gfp_t flags);
+
+static inline const char *xattr_prefix(const struct xattr_handler *handler)
+{
+ return handler->prefix ?: handler->name;
+}
+
+#endif /* _LINUX_XATTR_H */
diff --git a/c_src/include/linux/xxhash.h b/c_src/include/linux/xxhash.h
new file mode 100644
index 00000000..df425114
--- /dev/null
+++ b/c_src/include/linux/xxhash.h
@@ -0,0 +1,259 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/*
+ * Notice extracted from xxHash homepage:
+ *
+ * xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+ * It also successfully passes all tests from the SMHasher suite.
+ *
+ * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2
+ * Duo @3GHz)
+ *
+ * Name Speed Q.Score Author
+ * xxHash 5.4 GB/s 10
+ * CrapWow 3.2 GB/s 2 Andrew
+ * MumurHash 3a 2.7 GB/s 10 Austin Appleby
+ * SpookyHash 2.0 GB/s 10 Bob Jenkins
+ * SBox 1.4 GB/s 9 Bret Mulvey
+ * Lookup3 1.2 GB/s 9 Bob Jenkins
+ * SuperFastHash 1.2 GB/s 1 Paul Hsieh
+ * CityHash64 1.05 GB/s 10 Pike & Alakuijala
+ * FNV 0.55 GB/s 5 Fowler, Noll, Vo
+ * CRC32 0.43 GB/s 9
+ * MD5-32 0.33 GB/s 10 Ronald L. Rivest
+ * SHA1-32 0.28 GB/s 10
+ *
+ * Q.Score is a measure of quality of the hash function.
+ * It depends on successfully passing SMHasher test set.
+ * 10 is a perfect score.
+ *
+ * A 64-bits version, named xxh64 offers much better speed,
+ * but for 64-bits applications only.
+ * Name Speed on 64 bits Speed on 32 bits
+ * xxh64 13.8 GB/s 1.9 GB/s
+ * xxh32 6.8 GB/s 6.0 GB/s
+ */
+
+#ifndef XXHASH_H
+#define XXHASH_H
+
+#include <linux/types.h>
+
+/*-****************************
+ * Simple Hash Functions
+ *****************************/
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+ *
+ * Return: The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+/**
+ * xxh64() - calculate the 64-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
+ *
+ * Return: The 64-bit hash of the data.
+ */
+uint64_t xxh64(const void *input, size_t length, uint64_t seed);
+
+/**
+ * xxhash() - calculate wordsize hash of the input with a given seed
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * If the hash does not need to be comparable between machines with
+ * different word sizes, this function will call whichever of xxh32()
+ * or xxh64() is faster.
+ *
+ * Return: wordsize hash of the data.
+ */
+
+static inline unsigned long xxhash(const void *input, size_t length,
+ uint64_t seed)
+{
+#if BITS_PER_LONG == 64
+ return xxh64(input, length, seed);
+#else
+ return xxh32(input, length, seed);
+#endif
+}
+
+/*-****************************
+ * Streaming Hash Functions
+ *****************************/
+
+/*
+ * These definitions are only meant to allow allocation of XXH state
+ * statically, on stack, or in a struct for example.
+ * Do not use members directly.
+ */
+
+/**
+ * struct xxh32_state - private xxh32 state, do not use members directly
+ */
+struct xxh32_state {
+ uint32_t total_len_32;
+ uint32_t large_len;
+ uint32_t v1;
+ uint32_t v2;
+ uint32_t v3;
+ uint32_t v4;
+ uint32_t mem32[4];
+ uint32_t memsize;
+};
+
+/**
+ * struct xxh32_state - private xxh64 state, do not use members directly
+ */
+struct xxh64_state {
+ uint64_t total_len;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t v4;
+ uint64_t mem64[4];
+ uint32_t memsize;
+};
+
+/**
+ * xxh32_reset() - reset the xxh32 state to start a new hashing operation
+ *
+ * @state: The xxh32 state to reset.
+ * @seed: Initialize the hash state with this seed.
+ *
+ * Call this function on any xxh32_state to prepare for a new hashing operation.
+ */
+void xxh32_reset(struct xxh32_state *state, uint32_t seed);
+
+/**
+ * xxh32_update() - hash the data given and update the xxh32 state
+ *
+ * @state: The xxh32 state to update.
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ *
+ * After calling xxh32_reset() call xxh32_update() as many times as necessary.
+ *
+ * Return: Zero on success, otherwise an error code.
+ */
+int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
+
+/**
+ * xxh32_digest() - produce the current xxh32 hash
+ *
+ * @state: Produce the current xxh32 hash of this state.
+ *
+ * A hash value can be produced at any time. It is still possible to continue
+ * inserting input into the hash state after a call to xxh32_digest(), and
+ * generate new hashes later on, by calling xxh32_digest() again.
+ *
+ * Return: The xxh32 hash stored in the state.
+ */
+uint32_t xxh32_digest(const struct xxh32_state *state);
+
+/**
+ * xxh64_reset() - reset the xxh64 state to start a new hashing operation
+ *
+ * @state: The xxh64 state to reset.
+ * @seed: Initialize the hash state with this seed.
+ */
+void xxh64_reset(struct xxh64_state *state, uint64_t seed);
+
+/**
+ * xxh64_update() - hash the data given and update the xxh64 state
+ * @state: The xxh64 state to update.
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ *
+ * After calling xxh64_reset() call xxh64_update() as many times as necessary.
+ *
+ * Return: Zero on success, otherwise an error code.
+ */
+int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
+
+/**
+ * xxh64_digest() - produce the current xxh64 hash
+ *
+ * @state: Produce the current xxh64 hash of this state.
+ *
+ * A hash value can be produced at any time. It is still possible to continue
+ * inserting input into the hash state after a call to xxh64_digest(), and
+ * generate new hashes later on, by calling xxh64_digest() again.
+ *
+ * Return: The xxh64 hash stored in the state.
+ */
+uint64_t xxh64_digest(const struct xxh64_state *state);
+
+/*-**************************
+ * Utils
+ ***************************/
+
+/**
+ * xxh32_copy_state() - copy the source state into the destination state
+ *
+ * @src: The source xxh32 state.
+ * @dst: The destination xxh32 state.
+ */
+void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
+
+/**
+ * xxh64_copy_state() - copy the source state into the destination state
+ *
+ * @src: The source xxh64 state.
+ * @dst: The destination xxh64 state.
+ */
+void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
+
+#endif /* XXHASH_H */
diff --git a/c_src/include/linux/zlib.h b/c_src/include/linux/zlib.h
new file mode 100644
index 00000000..45cfbd87
--- /dev/null
+++ b/c_src/include/linux/zlib.h
@@ -0,0 +1,18 @@
+#ifndef _ZLIB_H
+#define _ZLIB_H
+
+#include <zlib.h>
+
+#define zlib_inflate_workspacesize() 0
+#define zlib_deflate_workspacesize(windowBits, memLevel) 0
+
+#define zlib_inflateInit2 inflateInit2
+#define zlib_inflate inflate
+
+#define zlib_deflateInit2 deflateInit2
+#define zlib_deflate deflate
+#define zlib_deflateEnd deflateEnd
+
+#define DEF_MEM_LEVEL 8
+
+#endif /* _ZLIB_H */
diff --git a/c_src/include/linux/zstd.h b/c_src/include/linux/zstd.h
new file mode 100644
index 00000000..b0fa1eda
--- /dev/null
+++ b/c_src/include/linux/zstd.h
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
+ */
+
+#ifndef LINUX_ZSTD_H
+#define LINUX_ZSTD_H
+
+/**
+ * This is a kernel-style API that wraps the upstream zstd API, which cannot be
+ * used directly because the symbols aren't exported. It exposes the minimal
+ * functionality which is currently required by users of zstd in the kernel.
+ * Expose extra functions from lib/zstd/zstd.h as needed.
+ */
+
+/* ====== Dependency ====== */
+#include <linux/types.h>
+#include <zstd.h>
+#include <linux/zstd_errors.h>
+
+/* ====== Helper Functions ====== */
+/**
+ * zstd_compress_bound() - maximum compressed size in worst case scenario
+ * @src_size: The size of the data to compress.
+ *
+ * Return: The maximum compressed size in the worst case scenario.
+ */
+size_t zstd_compress_bound(size_t src_size);
+
+/**
+ * zstd_is_error() - tells if a size_t function result is an error code
+ * @code: The function result to check for error.
+ *
+ * Return: Non-zero iff the code is an error.
+ */
+unsigned int zstd_is_error(size_t code);
+
+/**
+ * enum zstd_error_code - zstd error codes
+ */
+typedef ZSTD_ErrorCode zstd_error_code;
+
+/**
+ * zstd_get_error_code() - translates an error function result to an error code
+ * @code: The function result for which zstd_is_error(code) is true.
+ *
+ * Return: A unique error code for this error.
+ */
+zstd_error_code zstd_get_error_code(size_t code);
+
+/**
+ * zstd_get_error_name() - translates an error function result to a string
+ * @code: The function result for which zstd_is_error(code) is true.
+ *
+ * Return: An error string corresponding to the error code.
+ */
+const char *zstd_get_error_name(size_t code);
+
+/**
+ * zstd_min_clevel() - minimum allowed compression level
+ *
+ * Return: The minimum allowed compression level.
+ */
+int zstd_min_clevel(void);
+
+/**
+ * zstd_max_clevel() - maximum allowed compression level
+ *
+ * Return: The maximum allowed compression level.
+ */
+int zstd_max_clevel(void);
+
+/* ====== Parameter Selection ====== */
+
+/**
+ * enum zstd_strategy - zstd compression search strategy
+ *
+ * From faster to stronger. See zstd_lib.h.
+ */
+typedef ZSTD_strategy zstd_strategy;
+
+/**
+ * struct zstd_compression_parameters - zstd compression parameters
+ * @windowLog: Log of the largest match distance. Larger means more
+ * compression, and more memory needed during decompression.
+ * @chainLog: Fully searched segment. Larger means more compression,
+ * slower, and more memory (useless for fast).
+ * @hashLog: Dispatch table. Larger means more compression,
+ * slower, and more memory.
+ * @searchLog: Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ * sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ * more compression, and slower.
+ * @strategy: The zstd compression strategy.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_compressionParameters zstd_compression_parameters;
+
+/**
+ * struct zstd_frame_parameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the
+ * frame header (when known).
+ * @checksumFlag: Controls whether a 32-bit checksum is generated at the
+ * end of the frame for error detection.
+ * @noDictIDFlag: Controls whether dictID will be saved into the frame
+ * header when using dictionary compression.
+ *
+ * The default value is all fields set to 0. See zstd_lib.h.
+ */
+typedef ZSTD_frameParameters zstd_frame_parameters;
+
+/**
+ * struct zstd_parameters - zstd parameters
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
+ */
+typedef ZSTD_parameters zstd_parameters;
+
+/**
+ * zstd_get_params() - returns zstd_parameters for selected level
+ * @level: The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ * if unknown.
+ *
+ * Return: The selected zstd_parameters.
+ */
+zstd_parameters zstd_get_params(int level,
+ unsigned long long estimated_src_size);
+
+/* ====== Single-pass Compression ====== */
+
+typedef ZSTD_CCtx zstd_cctx;
+
+/**
+ * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
+ * @parameters: The compression parameters to be used.
+ *
+ * If multiple compression parameters might be used, the caller must call
+ * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
+ * size.
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ * zstd_init_cctx().
+ */
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
+
+/**
+ * zstd_init_cctx() - initialize a zstd compression context
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
+ * determine how large the workspace must be.
+ *
+ * Return: A zstd compression context or NULL on error.
+ */
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
+
+/**
+ * zstd_compress_cctx() - compress src into dst with the initialized parameters
+ * @cctx: The context. Must have been initialized with zstd_init_cctx().
+ * @dst: The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ * ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src: The data to compress.
+ * @src_size: The size of the data to compress.
+ * @parameters: The compression parameters to be used.
+ *
+ * Return: The compressed size or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size, const zstd_parameters *parameters);
+
+/* ====== Single-pass Decompression ====== */
+
+typedef ZSTD_DCtx zstd_dctx;
+
+/**
+ * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ * zstd_init_dctx().
+ */
+size_t zstd_dctx_workspace_bound(void);
+
+/**
+ * zstd_init_dctx() - initialize a zstd decompression context
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
+ * determine how large the workspace must be.
+ *
+ * Return: A zstd decompression context or NULL on error.
+ */
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
+
+/**
+ * zstd_decompress_dctx() - decompress zstd compressed src into dst
+ * @dctx: The decompression context.
+ * @dst: The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ * as the decompressed size. If the caller cannot upper bound the
+ * decompressed size, then it's better to use the streaming API.
+ * @src: The zstd compressed data to decompress. Multiple concatenated
+ * frames and skippable frames are allowed.
+ * @src_size: The exact size of the data to decompress.
+ *
+ * Return: The decompressed size or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size);
+
+/* ====== Streaming Buffers ====== */
+
+/**
+ * struct zstd_in_buffer - input buffer for streaming
+ * @src: Start of the input buffer.
+ * @size: Size of the input buffer.
+ * @pos: Position where reading stopped. Will be updated.
+ * Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_inBuffer zstd_in_buffer;
+
+/**
+ * struct zstd_out_buffer - output buffer for streaming
+ * @dst: Start of the output buffer.
+ * @size: Size of the output buffer.
+ * @pos: Position where writing stopped. Will be updated.
+ * Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_outBuffer zstd_out_buffer;
+
+/* ====== Streaming Compression ====== */
+
+typedef ZSTD_CStream zstd_cstream;
+
+/**
+ * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
+ * @cparams: The compression parameters to be used for compression.
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ * zstd_init_cstream().
+ */
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
+
+/**
+ * zstd_init_cstream() - initialize a zstd streaming compression context
+ * @parameters The zstd parameters to use for compression.
+ * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
+ * must pass the source size (zero means empty source).
+ * Otherwise, the caller may optionally pass the source
+ * size, or zero if unknown.
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspace_size: The size of workspace.
+ * Use zstd_cstream_workspace_bound(params->cparams) to
+ * determine how large the workspace must be.
+ *
+ * Return: The zstd streaming compression context or NULL on error.
+ */
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+ unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
+
+/**
+ * zstd_reset_cstream() - reset the context using parameters from creation
+ * @cstream: The zstd streaming compression context to reset.
+ * @pledged_src_size: Optionally the source size, or zero if unknown.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
+ * content size is always written into the frame header.
+ *
+ * Return: Zero or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+ unsigned long long pledged_src_size);
+
+/**
+ * zstd_compress_stream() - streaming compress some of input into output
+ * @cstream: The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ * compressed data was written.
+ * @input: Source buffer. `input->pos` is updated to indicate how much data
+ * was read. Note that it may not consume the entire input, in which
+ * case `input->pos < input->size`, and it's up to the caller to
+ * present remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ *
+ * Return: A hint for the number of bytes to use as the input for the next
+ * function call or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+ zstd_in_buffer *input);
+
+/**
+ * zstd_flush_stream() - flush internal buffers into output
+ * @cstream: The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ * compressed data was written.
+ *
+ * zstd_flush_stream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since zstd_flush_stream() causes a block to be ended,
+ * calling it too often will degrade the compression ratio.
+ *
+ * Return: The number of bytes still present within internal buffers or an
+ * error, which can be checked using zstd_is_error().
+ */
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/**
+ * zstd_end_stream() - flush internal buffers into output and end the frame
+ * @cstream: The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ * compressed data was written.
+ *
+ * zstd_end_stream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
+ *
+ * Return: The number of bytes still present within internal buffers or an
+ * error, which can be checked using zstd_is_error().
+ */
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/* ====== Streaming Decompression ====== */
+
+typedef ZSTD_DStream zstd_dstream;
+
+/**
+ * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ *
+ * Return: A lower bound on the size of the workspace that is passed
+ * to zstd_init_dstream().
+ */
+size_t zstd_dstream_workspace_bound(size_t max_window_size);
+
+/**
+ * zstd_init_dstream() - initialize a zstd streaming decompression context
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspaceSize: The size of workspace.
+ * Use zstd_dstream_workspace_bound(max_window_size) to
+ * determine how large the workspace must be.
+ *
+ * Return: The zstd streaming decompression context.
+ */
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+ size_t workspace_size);
+
+/**
+ * zstd_reset_dstream() - reset the context using parameters from creation
+ * @dstream: The zstd streaming decompression context to reset.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused.
+ *
+ * Return: Zero or an error, which can be checked using zstd_is_error().
+ */
+size_t zstd_reset_dstream(zstd_dstream *dstream);
+
+/**
+ * zstd_decompress_stream() - streaming decompress some of input into output
+ * @dstream: The zstd streaming decompression context.
+ * @output: Destination buffer. `output.pos` is updated to indicate how much
+ * decompressed data was written.
+ * @input: Source buffer. `input.pos` is updated to indicate how much data was
+ * read. Note that it may not consume the entire input, in which case
+ * `input.pos < input.size`, and it's up to the caller to present
+ * remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ * zstd_decompress_stream() will not consume the last byte of the frame until
+ * the entire frame is flushed.
+ *
+ * Return: Returns 0 iff a frame is completely decoded and fully flushed.
+ * Otherwise returns a hint for the number of bytes to use as the
+ * input for the next function call or an error, which can be checked
+ * using zstd_is_error(). The size hint will never load more than the
+ * frame.
+ */
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+ zstd_in_buffer *input);
+
+/* ====== Frame Inspection Functions ====== */
+
+/**
+ * zstd_find_frame_compressed_size() - returns the size of a compressed frame
+ * @src: Source buffer. It should point to the start of a zstd encoded
+ * frame or a skippable frame.
+ * @src_size: The size of the source buffer. It must be at least as large as the
+ * size of the frame.
+ *
+ * Return: The compressed size of the frame pointed to by `src` or an error,
+ * which can be check with zstd_is_error().
+ * Suitable to pass to ZSTD_decompress() or similar functions.
+ */
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
+
+/**
+ * struct zstd_frame_params - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ * present.
+ * @windowSize: The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax: The maximum block size.
+ * @frameType: The frame type (zstd or skippable)
+ * @headerSize: The size of the frame header.
+ * @dictID: The dictionary id, or 0 if not present.
+ * @checksumFlag: Whether a checksum was used.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_frameHeader zstd_frame_header;
+
+/**
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
+ * @params: On success the frame parameters are written here.
+ * @src: The source buffer. It must point to a zstd or skippable frame.
+ * @src_size: The size of the source buffer.
+ *
+ * Return: 0 on success. If more data is required it returns how many bytes
+ * must be provided to make forward progress. Otherwise it returns
+ * an error, which can be checked using zstd_is_error().
+ */
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
+ size_t src_size);
+
+#endif /* LINUX_ZSTD_H */
diff --git a/c_src/include/linux/zstd_errors.h b/c_src/include/linux/zstd_errors.h
new file mode 100644
index 00000000..58b6dd45
--- /dev/null
+++ b/c_src/include/linux/zstd_errors.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+
+/*===== dependency =====*/
+#include <linux/types.h> /* size_t */
+
+
+/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
+#define ZSTDERRORLIB_VISIBILITY
+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+
+/*-*********************************************
+ * Error codes list
+ *-*********************************************
+ * Error codes _values_ are pinned down since v1.3.1 only.
+ * Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ * Only values < 100 are considered stable.
+ *
+ * note 1 : this API shall be used with static linking only.
+ * dynamic linking is not yet officially supported.
+ * note 2 : Prefer relying on the enum than on its value whenever possible
+ * This is the only supported way to use the error list < v1.3.1
+ * note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+ ZSTD_error_no_error = 0,
+ ZSTD_error_GENERIC = 1,
+ ZSTD_error_prefix_unknown = 10,
+ ZSTD_error_version_unsupported = 12,
+ ZSTD_error_frameParameter_unsupported = 14,
+ ZSTD_error_frameParameter_windowTooLarge = 16,
+ ZSTD_error_corruption_detected = 20,
+ ZSTD_error_checksum_wrong = 22,
+ ZSTD_error_dictionary_corrupted = 30,
+ ZSTD_error_dictionary_wrong = 32,
+ ZSTD_error_dictionaryCreation_failed = 34,
+ ZSTD_error_parameter_unsupported = 40,
+ ZSTD_error_parameter_outOfBound = 42,
+ ZSTD_error_tableLog_tooLarge = 44,
+ ZSTD_error_maxSymbolValue_tooLarge = 46,
+ ZSTD_error_maxSymbolValue_tooSmall = 48,
+ ZSTD_error_stage_wrong = 60,
+ ZSTD_error_init_missing = 62,
+ ZSTD_error_memory_allocation = 64,
+ ZSTD_error_workSpace_tooSmall= 66,
+ ZSTD_error_dstSize_tooSmall = 70,
+ ZSTD_error_srcSize_wrong = 72,
+ ZSTD_error_dstBuffer_null = 74,
+ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+ ZSTD_error_frameIndex_tooLarge = 100,
+ ZSTD_error_seekableIO = 102,
+ ZSTD_error_dstBuffer_wrong = 104,
+ ZSTD_error_srcBuffer_wrong = 105,
+ ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+ convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+ which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/c_src/include/trace/define_trace.h b/c_src/include/trace/define_trace.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/include/trace/define_trace.h
diff --git a/c_src/include/trace/events/lock.h b/c_src/include/trace/events/lock.h
new file mode 100644
index 00000000..9ebd081e
--- /dev/null
+++ b/c_src/include/trace/events/lock.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lock
+
+#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCK_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+/* flags for lock:contention_begin */
+#define LCB_F_SPIN (1U << 0)
+#define LCB_F_READ (1U << 1)
+#define LCB_F_WRITE (1U << 2)
+#define LCB_F_RT (1U << 3)
+#define LCB_F_PERCPU (1U << 4)
+#define LCB_F_MUTEX (1U << 5)
+
+
+#ifdef CONFIG_LOCKDEP
+
+#include <linux/lockdep.h>
+
+TRACE_EVENT(lock_acquire,
+
+ TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+ int trylock, int read, int check,
+ struct lockdep_map *next_lock, unsigned long ip),
+
+ TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, flags)
+ __string(name, lock->name)
+ __field(void *, lockdep_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+ __assign_str(name, lock->name);
+ __entry->lockdep_addr = lock;
+ ),
+
+ TP_printk("%p %s%s%s", __entry->lockdep_addr,
+ (__entry->flags & 1) ? "try " : "",
+ (__entry->flags & 2) ? "read " : "",
+ __get_str(name))
+);
+
+DECLARE_EVENT_CLASS(lock,
+
+ TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+ TP_ARGS(lock, ip),
+
+ TP_STRUCT__entry(
+ __string( name, lock->name )
+ __field( void *, lockdep_addr )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, lock->name);
+ __entry->lockdep_addr = lock;
+ ),
+
+ TP_printk("%p %s", __entry->lockdep_addr, __get_str(name))
+);
+
+DEFINE_EVENT(lock, lock_release,
+
+ TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+ TP_ARGS(lock, ip)
+);
+
+#ifdef CONFIG_LOCK_STAT
+
+DEFINE_EVENT(lock, lock_contended,
+
+ TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+ TP_ARGS(lock, ip)
+);
+
+DEFINE_EVENT(lock, lock_acquired,
+
+ TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+ TP_ARGS(lock, ip)
+);
+
+#endif /* CONFIG_LOCK_STAT */
+#endif /* CONFIG_LOCKDEP */
+
+TRACE_EVENT(contention_begin,
+
+ TP_PROTO(void *lock, unsigned int flags),
+
+ TP_ARGS(lock, flags),
+
+ TP_STRUCT__entry(
+ __field(void *, lock_addr)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->lock_addr = lock;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("%p (flags=%s)", __entry->lock_addr,
+ __print_flags(__entry->flags, "|",
+ { LCB_F_SPIN, "SPIN" },
+ { LCB_F_READ, "READ" },
+ { LCB_F_WRITE, "WRITE" },
+ { LCB_F_RT, "RT" },
+ { LCB_F_PERCPU, "PERCPU" },
+ { LCB_F_MUTEX, "MUTEX" }
+ ))
+);
+
+TRACE_EVENT(contention_end,
+
+ TP_PROTO(void *lock, int ret),
+
+ TP_ARGS(lock, ret),
+
+ TP_STRUCT__entry(
+ __field(void *, lock_addr)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->lock_addr = lock;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
+);
+
+#endif /* _TRACE_LOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/c_src/include/uapi/linux/xattr.h b/c_src/include/uapi/linux/xattr.h
new file mode 100644
index 00000000..1590c49c
--- /dev/null
+++ b/c_src/include/uapi/linux/xattr.h
@@ -0,0 +1,77 @@
+/*
+ File: linux/xattr.h
+
+ Extended attributes handling.
+
+ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
+ Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
+ Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+*/
+
+#include <linux/libc-compat.h>
+
+#ifndef _UAPI_LINUX_XATTR_H
+#define _UAPI_LINUX_XATTR_H
+
+#if __UAPI_DEF_XATTR
+#define __USE_KERNEL_XATTR_DEFS
+
+#define XATTR_CREATE 0x1 /* set value, fail if attr already exists */
+#define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */
+#endif
+
+/* Namespaces */
+#define XATTR_OS2_PREFIX "os2."
+#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1)
+
+#define XATTR_MAC_OSX_PREFIX "osx."
+#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1)
+
+#define XATTR_BTRFS_PREFIX "btrfs."
+#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
+
+#define XATTR_SECURITY_PREFIX "security."
+#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
+
+#define XATTR_SYSTEM_PREFIX "system."
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1)
+
+#define XATTR_TRUSTED_PREFIX "trusted."
+#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
+
+#define XATTR_USER_PREFIX "user."
+#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
+
+/* Security namespace */
+#define XATTR_EVM_SUFFIX "evm"
+#define XATTR_NAME_EVM XATTR_SECURITY_PREFIX XATTR_EVM_SUFFIX
+
+#define XATTR_IMA_SUFFIX "ima"
+#define XATTR_NAME_IMA XATTR_SECURITY_PREFIX XATTR_IMA_SUFFIX
+
+#define XATTR_SELINUX_SUFFIX "selinux"
+#define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
+
+#define XATTR_SMACK_SUFFIX "SMACK64"
+#define XATTR_SMACK_IPIN "SMACK64IPIN"
+#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
+#define XATTR_SMACK_EXEC "SMACK64EXEC"
+#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE"
+#define XATTR_SMACK_MMAP "SMACK64MMAP"
+#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
+#define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
+#define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
+#define XATTR_NAME_SMACKEXEC XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC
+#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE
+#define XATTR_NAME_SMACKMMAP XATTR_SECURITY_PREFIX XATTR_SMACK_MMAP
+
+#define XATTR_CAPS_SUFFIX "capability"
+#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+
+#define XATTR_POSIX_ACL_ACCESS "posix_acl_access"
+#define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS
+#define XATTR_POSIX_ACL_DEFAULT "posix_acl_default"
+#define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
+
+
+#endif /* _UAPI_LINUX_XATTR_H */
diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c
new file mode 100644
index 00000000..ef4cc718
--- /dev/null
+++ b/c_src/libbcachefs.c
@@ -0,0 +1,753 @@
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "libbcachefs.h"
+#include "crypto.h"
+#include "libbcachefs/bcachefs_format.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/checksum.h"
+#include "libbcachefs/disk_groups.h"
+#include "libbcachefs/journal_seq_blacklist.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/super-io.h"
+#include "tools-util.h"
+
+#define NSEC_PER_SEC 1000000000L
+
+static void init_layout(struct bch_sb_layout *l,
+ unsigned block_size,
+ unsigned sb_size,
+ u64 sb_start, u64 sb_end)
+{
+ u64 sb_pos = sb_start;
+ unsigned i;
+
+ memset(l, 0, sizeof(*l));
+
+ l->magic = BCHFS_MAGIC;
+ l->layout_type = 0;
+ l->nr_superblocks = 2;
+ l->sb_max_size_bits = ilog2(sb_size);
+
+ /* Create two superblocks in the allowed range: */
+ for (i = 0; i < l->nr_superblocks; i++) {
+ if (sb_pos != BCH_SB_SECTOR)
+ sb_pos = round_up(sb_pos, block_size >> 9);
+
+ l->sb_offset[i] = cpu_to_le64(sb_pos);
+ sb_pos += sb_size;
+ }
+
+ if (sb_pos > sb_end)
+ die("insufficient space for superblocks: start %llu end %llu > %llu size %u",
+ sb_start, sb_pos, sb_end, sb_size);
+}
+
+/* minimum size filesystem we can create, given a bucket size: */
+static u64 min_size(unsigned bucket_size)
+{
+ return BCH_MIN_NR_NBUCKETS * bucket_size;
+}
+
+u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+{
+ u64 bucket_size;
+
+ if (dev->size < min_size(opts.block_size))
+ die("cannot format %s, too small (%llu bytes, min %llu)",
+ dev->path, dev->size, min_size(opts.block_size));
+
+ /* Bucket size must be >= block size: */
+ bucket_size = opts.block_size;
+
+ /* Bucket size must be >= btree node size: */
+ if (opt_defined(opts, btree_node_size))
+ bucket_size = max_t(unsigned, bucket_size,
+ opts.btree_node_size);
+
+ /* Want a bucket size of at least 128k, if possible: */
+ bucket_size = max(bucket_size, 128ULL << 10);
+
+ if (dev->size >= min_size(bucket_size)) {
+ unsigned scale = max(1,
+ ilog2(dev->size / min_size(bucket_size)) / 4);
+
+ scale = rounddown_pow_of_two(scale);
+
+ /* max bucket size 1 mb */
+ bucket_size = min(bucket_size * scale, 1ULL << 20);
+ } else {
+ do {
+ bucket_size /= 2;
+ } while (dev->size < min_size(bucket_size));
+ }
+
+ return bucket_size;
+}
+
+void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+{
+ if (dev->bucket_size < opts.block_size)
+ die("Bucket size (%llu) cannot be smaller than block size (%u)",
+ dev->bucket_size, opts.block_size);
+
+ if (opt_defined(opts, btree_node_size) &&
+ dev->bucket_size < opts.btree_node_size)
+ die("Bucket size (%llu) cannot be smaller than btree node size (%u)",
+ dev->bucket_size, opts.btree_node_size);
+
+ if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
+ die("Not enough buckets: %llu, need %u (bucket size %llu)",
+ dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
+
+ if (dev->bucket_size > (u32) U16_MAX << 9)
+ die("Bucket size (%llu) too big (max %u)",
+ dev->bucket_size, (u32) U16_MAX << 9);
+}
+
+static unsigned parse_target(struct bch_sb_handle *sb,
+ struct dev_opts *devs, size_t nr_devs,
+ const char *s)
+{
+ struct dev_opts *i;
+ int idx;
+
+ if (!s)
+ return 0;
+
+ for (i = devs; i < devs + nr_devs; i++)
+ if (!strcmp(s, i->path))
+ return dev_to_target(i - devs);
+
+ idx = bch2_disk_path_find(sb, s);
+ if (idx >= 0)
+ return group_to_target(idx);
+
+ die("Invalid target %s", s);
+ return 0;
+}
+
+struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
+ struct bch_opts fs_opts,
+ struct format_opts opts,
+ struct dev_opts *devs,
+ size_t nr_devs)
+{
+ struct bch_sb_handle sb = { NULL };
+ struct dev_opts *i;
+ unsigned max_dev_block_size = 0;
+ unsigned opt_id;
+ u64 min_bucket_size = U64_MAX;
+
+ for (i = devs; i < devs + nr_devs; i++)
+ max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
+
+ /* calculate block size: */
+ if (!opt_defined(fs_opts, block_size)) {
+ opt_set(fs_opts, block_size, max_dev_block_size);
+ } else if (fs_opts.block_size < max_dev_block_size)
+ die("blocksize too small: %u, must be greater than device blocksize %u",
+ fs_opts.block_size, max_dev_block_size);
+
+ /* get device size, if it wasn't specified: */
+ for (i = devs; i < devs + nr_devs; i++)
+ if (!i->size)
+ i->size = get_size(i->bdev->bd_fd);
+
+ /* calculate bucket sizes: */
+ for (i = devs; i < devs + nr_devs; i++)
+ min_bucket_size = min(min_bucket_size,
+ i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
+
+ for (i = devs; i < devs + nr_devs; i++)
+ if (!i->bucket_size)
+ i->bucket_size = min_bucket_size;
+
+ for (i = devs; i < devs + nr_devs; i++) {
+ i->nbuckets = i->size / i->bucket_size;
+ bch2_check_bucket_size(fs_opts, i);
+ }
+
+ /* calculate btree node size: */
+ if (!opt_defined(fs_opts, btree_node_size)) {
+ /* 256k default btree node size */
+ opt_set(fs_opts, btree_node_size, 256 << 10);
+
+ for (i = devs; i < devs + nr_devs; i++)
+ fs_opts.btree_node_size =
+ min_t(unsigned, fs_opts.btree_node_size,
+ i->bucket_size);
+ }
+
+ if (uuid_is_null(opts.uuid.b))
+ uuid_generate(opts.uuid.b);
+
+ if (bch2_sb_realloc(&sb, 0))
+ die("insufficient memory");
+
+ sb.sb->version = le16_to_cpu(opts.version);
+ sb.sb->version_min = le16_to_cpu(opts.version);
+ sb.sb->magic = BCHFS_MAGIC;
+ sb.sb->user_uuid = opts.uuid;
+ sb.sb->nr_devices = nr_devs;
+
+ if (opts.version == bcachefs_metadata_version_current)
+ sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
+
+ uuid_generate(sb.sb->uuid.b);
+
+ if (opts.label)
+ memcpy(sb.sb->label,
+ opts.label,
+ min(strlen(opts.label), sizeof(sb.sb->label)));
+
+ for (opt_id = 0;
+ opt_id < bch2_opts_nr;
+ opt_id++) {
+ u64 v;
+
+ v = bch2_opt_defined_by_id(&fs_opts, opt_id)
+ ? bch2_opt_get_by_id(&fs_opts, opt_id)
+ : bch2_opt_get_by_id(&bch2_opts_default, opt_id);
+
+ __bch2_opt_set_sb(sb.sb, &bch2_opt_table[opt_id], v);
+ }
+
+ struct timespec now;
+ if (clock_gettime(CLOCK_REALTIME, &now))
+ die("error getting current time: %m");
+
+ sb.sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
+ sb.sb->time_precision = cpu_to_le32(1);
+
+ /* Member info: */
+ struct bch_sb_field_members_v2 *mi =
+ bch2_sb_field_resize(&sb, members_v2,
+ (sizeof(*mi) + sizeof(struct bch_member) *
+ nr_devs) / sizeof(u64));
+ mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
+ for (i = devs; i < devs + nr_devs; i++) {
+ struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs));
+
+ uuid_generate(m->uuid.b);
+ m->nbuckets = cpu_to_le64(i->nbuckets);
+ m->first_bucket = 0;
+ m->bucket_size = cpu_to_le16(i->bucket_size >> 9);
+
+ SET_BCH_MEMBER_DISCARD(m, i->discard);
+ SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed);
+ SET_BCH_MEMBER_DURABILITY(m, i->durability + 1);
+ }
+
+ /* Disk labels*/
+ for (i = devs; i < devs + nr_devs; i++) {
+ struct bch_member *m;
+ int idx;
+
+ if (!i->label)
+ continue;
+
+ idx = bch2_disk_path_find_or_create(&sb, i->label);
+ if (idx < 0)
+ die("error creating disk path: %s", strerror(-idx));
+
+ /*
+ * Recompute mi and m after each sb modification: its location
+ * in memory may have changed due to reallocation.
+ */
+ m = bch2_members_v2_get_mut(sb.sb, (i - devs));
+ SET_BCH_MEMBER_GROUP(m, idx + 1);
+ }
+
+ SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
+ SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
+ SET_BCH_SB_PROMOTE_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
+ SET_BCH_SB_METADATA_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target));
+
+ /* Crypt: */
+ if (opts.encrypted) {
+ struct bch_sb_field_crypt *crypt =
+ bch2_sb_field_resize(&sb, crypt, sizeof(*crypt) / sizeof(u64));
+
+ bch_sb_crypt_init(sb.sb, crypt, opts.passphrase);
+ SET_BCH_SB_ENCRYPTION_TYPE(sb.sb, 1);
+ }
+
+ bch2_sb_members_cpy_v2_v1(&sb);
+
+ for (i = devs; i < devs + nr_devs; i++) {
+ u64 size_sectors = i->size >> 9;
+
+ sb.sb->dev_idx = i - devs;
+
+ if (!i->sb_offset) {
+ i->sb_offset = BCH_SB_SECTOR;
+ i->sb_end = size_sectors;
+ }
+
+ init_layout(&sb.sb->layout, fs_opts.block_size,
+ opts.superblock_size,
+ i->sb_offset, i->sb_end);
+
+ /*
+ * Also create a backup superblock at the end of the disk:
+ *
+ * If we're not creating a superblock at the default offset, it
+ * means we're being run from the migrate tool and we could be
+ * overwriting existing data if we write to the end of the disk:
+ */
+ if (i->sb_offset == BCH_SB_SECTOR) {
+ struct bch_sb_layout *l = &sb.sb->layout;
+ u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits);
+
+ backup_sb = rounddown(backup_sb, i->bucket_size >> 9);
+ l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb);
+ }
+
+ if (i->sb_offset == BCH_SB_SECTOR) {
+ /* Zero start of disk */
+ static const char zeroes[BCH_SB_SECTOR << 9];
+
+ xpwrite(i->bdev->bd_fd, zeroes, BCH_SB_SECTOR << 9, 0,
+ "zeroing start of disk");
+ }
+
+ bch2_super_write(i->bdev->bd_fd, sb.sb);
+ close(i->bdev->bd_fd);
+ }
+
+ return sb.sb;
+}
+
+void bch2_super_write(int fd, struct bch_sb *sb)
+{
+ struct nonce nonce = { 0 };
+ unsigned bs = get_blocksize(fd);
+
+ unsigned i;
+ for (i = 0; i < sb->layout.nr_superblocks; i++) {
+ sb->offset = sb->layout.sb_offset[i];
+
+ if (sb->offset == BCH_SB_SECTOR) {
+ /* Write backup layout */
+
+ BUG_ON(bs > 4096);
+
+ char *buf = aligned_alloc(bs, bs);
+ xpread(fd, buf, bs, 4096 - bs);
+ memcpy(buf + bs - sizeof(sb->layout),
+ &sb->layout,
+ sizeof(sb->layout));
+ xpwrite(fd, buf, bs, 4096 - bs,
+ "backup layout");
+ free(buf);
+
+ }
+
+ sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
+ xpwrite(fd, sb, round_up(vstruct_bytes(sb), bs),
+ le64_to_cpu(sb->offset) << 9,
+ "superblock");
+ }
+
+ fsync(fd);
+}
+
+struct bch_sb *__bch2_super_read(int fd, u64 sector)
+{
+ struct bch_sb sb, *ret;
+
+ xpread(fd, &sb, sizeof(sb), sector << 9);
+
+ if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)) &&
+ memcmp(&sb.magic, &BCHFS_MAGIC, sizeof(sb.magic)))
+ die("not a bcachefs superblock");
+
+ size_t bytes = vstruct_bytes(&sb);
+
+ ret = malloc(bytes);
+
+ xpread(fd, ret, bytes, sector << 9);
+
+ return ret;
+}
+
+/* ioctl interface: */
+
+/* Global control device: */
+int bcachectl_open(void)
+{
+ return xopen("/dev/bcachefs-ctl", O_RDWR);
+}
+
+/* Filesystem handles (ioctl, sysfs dir): */
+
+#define SYSFS_BASE "/sys/fs/bcachefs/"
+
+void bcache_fs_close(struct bchfs_handle fs)
+{
+ close(fs.ioctl_fd);
+ close(fs.sysfs_fd);
+}
+
+struct bchfs_handle bcache_fs_open(const char *path)
+{
+ struct bchfs_handle ret;
+
+ if (!uuid_parse(path, ret.uuid.b)) {
+ /* It's a UUID, look it up in sysfs: */
+ char *sysfs = mprintf(SYSFS_BASE "%s", path);
+ ret.sysfs_fd = xopen(sysfs, O_RDONLY);
+
+ char *minor = read_file_str(ret.sysfs_fd, "minor");
+ char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
+ ret.ioctl_fd = xopen(ctl, O_RDWR);
+
+ free(sysfs);
+ free(minor);
+ free(ctl);
+ } else {
+ /* It's a path: */
+ ret.ioctl_fd = open(path, O_RDONLY);
+ if (ret.ioctl_fd < 0)
+ die("Error opening filesystem at %s: %m", path);
+
+ struct bch_ioctl_query_uuid uuid;
+ if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0)
+ die("error opening %s: not a bcachefs filesystem", path);
+
+ ret.uuid = uuid.uuid;
+
+ char uuid_str[40];
+ uuid_unparse(uuid.uuid.b, uuid_str);
+
+ char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
+ ret.sysfs_fd = xopen(sysfs, O_RDONLY);
+ free(sysfs);
+ }
+
+ return ret;
+}
+
+/*
+ * Given a path to a block device, open the filesystem it belongs to; also
+ * return the device's idx:
+ */
+struct bchfs_handle bchu_fs_open_by_dev(const char *path, int *idx)
+{
+ struct bch_opts opts = bch2_opts_empty();
+ char buf[1024], *uuid_str;
+
+ struct stat stat = xstat(path);
+
+ if (S_ISBLK(stat.st_mode)) {
+ char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
+ major(stat.st_dev),
+ minor(stat.st_dev));
+
+ ssize_t len = readlink(sysfs, buf, sizeof(buf));
+ free(sysfs);
+
+ if (len <= 0)
+ goto read_super;
+
+ char *p = strrchr(buf, '/');
+ if (!p || sscanf(p + 1, "dev-%u", idx) != 1)
+ die("error parsing sysfs");
+
+ *p = '\0';
+ p = strrchr(buf, '/');
+ uuid_str = p + 1;
+ } else {
+read_super:
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(path, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", path, strerror(-ret));
+
+ *idx = sb.sb->dev_idx;
+ uuid_str = buf;
+ uuid_unparse(sb.sb->user_uuid.b, uuid_str);
+
+ bch2_free_super(&sb);
+ }
+
+ return bcache_fs_open(uuid_str);
+}
+
+int bchu_dev_path_to_idx(struct bchfs_handle fs, const char *dev_path)
+{
+ int idx;
+ struct bchfs_handle fs2 = bchu_fs_open_by_dev(dev_path, &idx);
+
+ if (memcmp(&fs.uuid, &fs2.uuid, sizeof(fs.uuid)))
+ idx = -1;
+ bcache_fs_close(fs2);
+ return idx;
+}
+
+int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
+{
+ int progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd);
+
+ while (1) {
+ struct bch_ioctl_data_event e;
+
+ if (read(progress_fd, &e, sizeof(e)) != sizeof(e))
+ die("error reading from progress fd %m");
+
+ if (e.type)
+ continue;
+
+ if (e.p.data_type == U8_MAX)
+ break;
+
+ printf("\33[2K\r");
+
+ printf("%llu%% complete: current position %s",
+ e.p.sectors_total
+ ? e.p.sectors_done * 100 / e.p.sectors_total
+ : 0,
+ bch2_data_types[e.p.data_type]);
+
+ switch (e.p.data_type) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ printf(" %s:%llu:%llu",
+ bch2_btree_id_str(e.p.btree_id),
+ e.p.pos.inode,
+ e.p.pos.offset);
+ }
+
+ fflush(stdout);
+ sleep(1);
+ }
+ printf("\nDone\n");
+
+ close(progress_fd);
+ return 0;
+}
+
+/* option parsing */
+
+void bch2_opt_strs_free(struct bch_opt_strs *opts)
+{
+ unsigned i;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ free(opts->by_id[i]);
+ opts->by_id[i] = NULL;
+ }
+}
+
+struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
+ unsigned opt_types)
+{
+ struct bch_opt_strs opts;
+ unsigned i = 1;
+
+ memset(&opts, 0, sizeof(opts));
+
+ while (i < *argc) {
+ char *optstr = strcmp_prefix(argv[i], "--");
+ char *valstr = NULL, *p;
+ int optid, nr_args = 1;
+
+ if (!optstr) {
+ i++;
+ continue;
+ }
+
+ optstr = strdup(optstr);
+
+ p = optstr;
+ while (isalpha(*p) || *p == '_')
+ p++;
+
+ if (*p == '=') {
+ *p = '\0';
+ valstr = p + 1;
+ }
+
+ optid = bch2_opt_lookup(optstr);
+ if (optid < 0 ||
+ !(bch2_opt_table[optid].flags & opt_types)) {
+ i++;
+ goto next;
+ }
+
+ if (!valstr &&
+ bch2_opt_table[optid].type != BCH_OPT_BOOL) {
+ nr_args = 2;
+ valstr = argv[i + 1];
+ }
+
+ if (!valstr)
+ valstr = "1";
+
+ opts.by_id[optid] = strdup(valstr);
+
+ *argc -= nr_args;
+ memmove(&argv[i],
+ &argv[i + nr_args],
+ sizeof(char *) * (*argc - i));
+ argv[*argc] = NULL;
+next:
+ free(optstr);
+ }
+
+ return opts;
+}
+
+struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct printbuf err = PRINTBUF;
+ unsigned i;
+ int ret;
+ u64 v;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ if (!strs.by_id[i])
+ continue;
+
+ ret = bch2_opt_parse(NULL,
+ &bch2_opt_table[i],
+ strs.by_id[i], &v, &err);
+ if (ret < 0)
+ die("Invalid option %s", err.buf);
+
+ bch2_opt_set_by_id(&opts, i, v);
+ }
+
+ printbuf_exit(&err);
+ return opts;
+}
+
+#define newline(c) \
+ do { \
+ printf("\n"); \
+ c = 0; \
+ } while(0)
+void bch2_opts_usage(unsigned opt_types)
+{
+ const struct bch_option *opt;
+ unsigned i, c = 0, helpcol = 30;
+
+
+
+ for (opt = bch2_opt_table;
+ opt < bch2_opt_table + bch2_opts_nr;
+ opt++) {
+ if (!(opt->flags & opt_types))
+ continue;
+
+ c += printf(" --%s", opt->attr.name);
+
+ switch (opt->type) {
+ case BCH_OPT_BOOL:
+ break;
+ case BCH_OPT_STR:
+ c += printf("=(");
+ for (i = 0; opt->choices[i]; i++) {
+ if (i)
+ c += printf("|");
+ c += printf("%s", opt->choices[i]);
+ }
+ c += printf(")");
+ break;
+ default:
+ c += printf("=%s", opt->hint);
+ break;
+ }
+
+ if (opt->help) {
+ const char *l = opt->help;
+
+ if (c >= helpcol)
+ newline(c);
+
+ while (1) {
+ const char *n = strchrnul(l, '\n');
+
+ while (c < helpcol) {
+ putchar(' ');
+ c++;
+ }
+ printf("%.*s", (int) (n - l), l);
+ newline(c);
+
+ if (!*n)
+ break;
+ l = n + 1;
+ }
+ } else {
+ newline(c);
+ }
+ }
+}
+
+dev_names bchu_fs_get_devices(struct bchfs_handle fs)
+{
+ DIR *dir = fdopendir(fs.sysfs_fd);
+ struct dirent *d;
+ dev_names devs;
+
+ darray_init(&devs);
+
+ while ((errno = 0), (d = readdir(dir))) {
+ struct dev_name n = { 0, NULL, NULL };
+
+ if (sscanf(d->d_name, "dev-%u", &n.idx) != 1)
+ continue;
+
+ char *block_attr = mprintf("dev-%u/block", n.idx);
+
+ char sysfs_block_buf[4096];
+ ssize_t r = readlinkat(fs.sysfs_fd, block_attr,
+ sysfs_block_buf, sizeof(sysfs_block_buf));
+ if (r > 0) {
+ sysfs_block_buf[r] = '\0';
+ n.dev = strdup(basename(sysfs_block_buf));
+ }
+
+ free(block_attr);
+
+ char *label_attr = mprintf("dev-%u/label", n.idx);
+ n.label = read_file_str(fs.sysfs_fd, label_attr);
+ free(label_attr);
+
+ char *durability_attr = mprintf("dev-%u/durability", n.idx);
+ n.durability = read_file_u64(fs.sysfs_fd, durability_attr);
+ free(durability_attr);
+
+ darray_push(&devs, n);
+ }
+
+ closedir(dir);
+
+ return devs;
+}
diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h
new file mode 100644
index 00000000..b189a208
--- /dev/null
+++ b/c_src/libbcachefs.h
@@ -0,0 +1,267 @@
+#ifndef _LIBBCACHE_H
+#define _LIBBCACHE_H
+
+#include <linux/uuid.h>
+#include <stdbool.h>
+
+#include "libbcachefs/bcachefs_format.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/vstructs.h"
+#include "tools-util.h"
+
+/* option parsing */
+
+#define SUPERBLOCK_SIZE_DEFAULT 2048 /* 1 MB */
+
+struct bch_opt_strs {
+union {
+ char *by_id[bch2_opts_nr];
+struct {
+#define x(_name, ...) char *_name;
+ BCH_OPTS()
+#undef x
+};
+};
+};
+
+void bch2_opt_strs_free(struct bch_opt_strs *);
+struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
+struct bch_opts bch2_parse_opts(struct bch_opt_strs);
+void bch2_opts_usage(unsigned);
+
+struct format_opts {
+ char *label;
+ __uuid_t uuid;
+ unsigned version;
+ unsigned superblock_size;
+ bool encrypted;
+ char *passphrase;
+};
+
+static inline struct format_opts format_opts_default()
+{
+ unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
+ ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
+ : bcachefs_metadata_version_current;
+
+ return (struct format_opts) {
+ .version = version,
+ .superblock_size = SUPERBLOCK_SIZE_DEFAULT,
+ };
+}
+
+struct dev_opts {
+ struct block_device *bdev;
+ char *path;
+ u64 size; /* bytes*/
+ u64 bucket_size; /* bytes */
+ const char *label;
+ unsigned data_allowed;
+ unsigned durability;
+ bool discard;
+
+ u64 nbuckets;
+
+ u64 sb_offset;
+ u64 sb_end;
+};
+
+static inline struct dev_opts dev_opts_default()
+{
+ return (struct dev_opts) {
+ .data_allowed = ~0U << 2,
+ .durability = 1,
+ };
+}
+
+u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
+void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
+
+struct bch_sb *bch2_format(struct bch_opt_strs,
+ struct bch_opts,
+ struct format_opts, struct dev_opts *, size_t);
+
+void bch2_super_write(int, struct bch_sb *);
+struct bch_sb *__bch2_super_read(int, u64);
+
+/* ioctl interface: */
+
+int bcachectl_open(void);
+
+struct bchfs_handle {
+ __uuid_t uuid;
+ int ioctl_fd;
+ int sysfs_fd;
+};
+
+void bcache_fs_close(struct bchfs_handle);
+struct bchfs_handle bcache_fs_open(const char *);
+struct bchfs_handle bchu_fs_open_by_dev(const char *, int *);
+int bchu_dev_path_to_idx(struct bchfs_handle, const char *);
+
+static inline void bchu_disk_add(struct bchfs_handle fs, char *dev)
+{
+ struct bch_ioctl_disk i = { .dev = (unsigned long) dev, };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &i);
+}
+
+static inline void bchu_disk_remove(struct bchfs_handle fs, unsigned dev_idx,
+ unsigned flags)
+{
+ struct bch_ioctl_disk i = {
+ .flags = flags|BCH_BY_INDEX,
+ .dev = dev_idx,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &i);
+}
+
+static inline void bchu_disk_online(struct bchfs_handle fs, char *dev)
+{
+ struct bch_ioctl_disk i = { .dev = (unsigned long) dev, };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ONLINE, &i);
+}
+
+static inline void bchu_disk_offline(struct bchfs_handle fs, unsigned dev_idx,
+ unsigned flags)
+{
+ struct bch_ioctl_disk i = {
+ .flags = flags|BCH_BY_INDEX,
+ .dev = dev_idx,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_OFFLINE, &i);
+}
+
+static inline void bchu_disk_set_state(struct bchfs_handle fs, unsigned dev,
+ unsigned new_state, unsigned flags)
+{
+ struct bch_ioctl_disk_set_state i = {
+ .flags = flags|BCH_BY_INDEX,
+ .new_state = new_state,
+ .dev = dev,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &i);
+}
+
+static inline struct bch_ioctl_fs_usage *bchu_fs_usage(struct bchfs_handle fs)
+{
+ struct bch_ioctl_fs_usage *u = NULL;
+ size_t replica_entries_bytes = 4096;
+
+ while (1) {
+ u = xrealloc(u, sizeof(*u) + replica_entries_bytes);
+ u->replica_entries_bytes = replica_entries_bytes;
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_FS_USAGE, u))
+ return u;
+
+ if (errno != ERANGE)
+ die("BCH_IOCTL_USAGE error: %m");
+
+ replica_entries_bytes *= 2;
+ }
+}
+
+static inline struct bch_ioctl_dev_usage_v2 *bchu_dev_usage(struct bchfs_handle fs,
+ unsigned idx)
+{
+ struct bch_ioctl_dev_usage_v2 *u = xcalloc(sizeof(*u) + sizeof(u->d[0]) * BCH_DATA_NR, 1);
+
+ u->dev = idx;
+ u->flags = BCH_BY_INDEX;
+ u->nr_data_types = BCH_DATA_NR;
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE_V2, u))
+ return u;
+
+ struct bch_ioctl_dev_usage u_v1 = { .dev = idx, .flags = BCH_BY_INDEX};
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE, &u_v1);
+
+ u->state = u_v1.state;
+ u->nr_data_types = ARRAY_SIZE(u_v1.d);
+ u->bucket_size = u_v1.bucket_size;
+ u->nr_buckets = u_v1.nr_buckets;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(u_v1.d); i++)
+ u->d[i] = u_v1.d[i];
+
+ return u;
+}
+
+static inline struct bch_sb *bchu_read_super(struct bchfs_handle fs, unsigned idx)
+{
+ size_t size = 4096;
+ struct bch_sb *sb = NULL;
+
+ while (1) {
+ sb = xrealloc(sb, size);
+ struct bch_ioctl_read_super i = {
+ .size = size,
+ .sb = (unsigned long) sb,
+ };
+
+ if (idx != -1) {
+ i.flags |= BCH_READ_DEV|BCH_BY_INDEX;
+ i.dev = idx;
+ }
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_READ_SUPER, &i))
+ return sb;
+ if (errno != ERANGE)
+ die("BCH_IOCTL_READ_SUPER error: %m");
+ size *= 2;
+ }
+}
+
+static inline unsigned bchu_disk_get_idx(struct bchfs_handle fs, dev_t dev)
+{
+ struct bch_ioctl_disk_get_idx i = { .dev = dev };
+
+ return xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_GET_IDX, &i);
+}
+
+static inline void bchu_disk_resize(struct bchfs_handle fs,
+ unsigned idx,
+ u64 nbuckets)
+{
+ struct bch_ioctl_disk_resize i = {
+ .flags = BCH_BY_INDEX,
+ .dev = idx,
+ .nbuckets = nbuckets,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE, &i);
+}
+
+static inline void bchu_disk_resize_journal(struct bchfs_handle fs,
+ unsigned idx,
+ u64 nbuckets)
+{
+ struct bch_ioctl_disk_resize i = {
+ .flags = BCH_BY_INDEX,
+ .dev = idx,
+ .nbuckets = nbuckets,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE_JOURNAL, &i);
+}
+
+int bchu_data(struct bchfs_handle, struct bch_ioctl_data);
+
+struct dev_name {
+ unsigned idx;
+ char *dev;
+ char *label;
+ uuid_t uuid;
+ unsigned durability;
+};
+typedef DARRAY(struct dev_name) dev_names;
+
+dev_names bchu_fs_get_devices(struct bchfs_handle);
+
+#endif /* _LIBBCACHE_H */
diff --git a/c_src/libbcachefs/acl.c b/c_src/libbcachefs/acl.c
new file mode 100644
index 00000000..3640f417
--- /dev/null
+++ b/c_src/libbcachefs/acl.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+
+#include "acl.h"
+#include "xattr.h"
+
+#include <linux/posix_acl.h>
+
+static const char * const acl_types[] = {
+ [ACL_USER_OBJ] = "user_obj",
+ [ACL_USER] = "user",
+ [ACL_GROUP_OBJ] = "group_obj",
+ [ACL_GROUP] = "group",
+ [ACL_MASK] = "mask",
+ [ACL_OTHER] = "other",
+ NULL,
+};
+
+void bch2_acl_to_text(struct printbuf *out, const void *value, size_t size)
+{
+ const void *p, *end = value + size;
+
+ if (!value ||
+ size < sizeof(bch_acl_header) ||
+ ((bch_acl_header *)value)->a_version != cpu_to_le32(BCH_ACL_VERSION))
+ return;
+
+ p = value + sizeof(bch_acl_header);
+ while (p < end) {
+ const bch_acl_entry *in = p;
+ unsigned tag = le16_to_cpu(in->e_tag);
+
+ prt_str(out, acl_types[tag]);
+
+ switch (tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ p += sizeof(bch_acl_entry_short);
+ break;
+ case ACL_USER:
+ prt_printf(out, " uid %u", le32_to_cpu(in->e_id));
+ p += sizeof(bch_acl_entry);
+ break;
+ case ACL_GROUP:
+ prt_printf(out, " gid %u", le32_to_cpu(in->e_id));
+ p += sizeof(bch_acl_entry);
+ break;
+ }
+
+ prt_printf(out, " %o", le16_to_cpu(in->e_perm));
+
+ if (p != end)
+ prt_char(out, ' ');
+ }
+}
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+
+#include "fs.h"
+
+#include <linux/fs.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long)
+{
+ return sizeof(bch_acl_header) +
+ sizeof(bch_acl_entry_short) * nr_short +
+ sizeof(bch_acl_entry) * nr_long;
+}
+
+static inline int acl_to_xattr_type(int type)
+{
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS;
+ case ACL_TYPE_DEFAULT:
+ return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans,
+ const void *value, size_t size)
+{
+ const void *p, *end = value + size;
+ struct posix_acl *acl;
+ struct posix_acl_entry *out;
+ unsigned count = 0;
+ int ret;
+
+ if (!value)
+ return NULL;
+ if (size < sizeof(bch_acl_header))
+ goto invalid;
+ if (((bch_acl_header *)value)->a_version !=
+ cpu_to_le32(BCH_ACL_VERSION))
+ goto invalid;
+
+ p = value + sizeof(bch_acl_header);
+ while (p < end) {
+ const bch_acl_entry *entry = p;
+
+ if (p + sizeof(bch_acl_entry_short) > end)
+ goto invalid;
+
+ switch (le16_to_cpu(entry->e_tag)) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ p += sizeof(bch_acl_entry_short);
+ break;
+ case ACL_USER:
+ case ACL_GROUP:
+ p += sizeof(bch_acl_entry);
+ break;
+ default:
+ goto invalid;
+ }
+
+ count++;
+ }
+
+ if (p > end)
+ goto invalid;
+
+ if (!count)
+ return NULL;
+
+ acl = allocate_dropping_locks(trans, ret,
+ posix_acl_alloc(count, _gfp));
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ if (ret) {
+ kfree(acl);
+ return ERR_PTR(ret);
+ }
+
+ out = acl->a_entries;
+
+ p = value + sizeof(bch_acl_header);
+ while (p < end) {
+ const bch_acl_entry *in = p;
+
+ out->e_tag = le16_to_cpu(in->e_tag);
+ out->e_perm = le16_to_cpu(in->e_perm);
+
+ switch (out->e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ p += sizeof(bch_acl_entry_short);
+ break;
+ case ACL_USER:
+ out->e_uid = make_kuid(&init_user_ns,
+ le32_to_cpu(in->e_id));
+ p += sizeof(bch_acl_entry);
+ break;
+ case ACL_GROUP:
+ out->e_gid = make_kgid(&init_user_ns,
+ le32_to_cpu(in->e_id));
+ p += sizeof(bch_acl_entry);
+ break;
+ }
+
+ out++;
+ }
+
+ BUG_ON(out != acl->a_entries + acl->a_count);
+
+ return acl;
+invalid:
+ pr_err("invalid acl entry");
+ return ERR_PTR(-EINVAL);
+}
+
+#define acl_for_each_entry(acl, acl_e) \
+ for (acl_e = acl->a_entries; \
+ acl_e < acl->a_entries + acl->a_count; \
+ acl_e++)
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static struct bkey_i_xattr *
+bch2_acl_to_xattr(struct btree_trans *trans,
+ const struct posix_acl *acl,
+ int type)
+{
+ struct bkey_i_xattr *xattr;
+ bch_acl_header *acl_header;
+ const struct posix_acl_entry *acl_e;
+ void *outptr;
+ unsigned nr_short = 0, nr_long = 0, acl_len, u64s;
+
+ acl_for_each_entry(acl, acl_e) {
+ switch (acl_e->e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ nr_long++;
+ break;
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ nr_short++;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ acl_len = bch2_acl_size(nr_short, nr_long);
+ u64s = BKEY_U64s + xattr_val_u64s(0, acl_len);
+
+ if (u64s > U8_MAX)
+ return ERR_PTR(-E2BIG);
+
+ xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
+ if (IS_ERR(xattr))
+ return xattr;
+
+ bkey_xattr_init(&xattr->k_i);
+ xattr->k.u64s = u64s;
+ xattr->v.x_type = acl_to_xattr_type(type);
+ xattr->v.x_name_len = 0;
+ xattr->v.x_val_len = cpu_to_le16(acl_len);
+
+ acl_header = xattr_val(&xattr->v);
+ acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION);
+
+ outptr = (void *) acl_header + sizeof(*acl_header);
+
+ acl_for_each_entry(acl, acl_e) {
+ bch_acl_entry *entry = outptr;
+
+ entry->e_tag = cpu_to_le16(acl_e->e_tag);
+ entry->e_perm = cpu_to_le16(acl_e->e_perm);
+ switch (acl_e->e_tag) {
+ case ACL_USER:
+ entry->e_id = cpu_to_le32(
+ from_kuid(&init_user_ns, acl_e->e_uid));
+ outptr += sizeof(bch_acl_entry);
+ break;
+ case ACL_GROUP:
+ entry->e_id = cpu_to_le32(
+ from_kgid(&init_user_ns, acl_e->e_gid));
+ outptr += sizeof(bch_acl_entry);
+ break;
+
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ outptr += sizeof(bch_acl_entry_short);
+ break;
+ }
+ }
+
+ BUG_ON(outptr != xattr_val(&xattr->v) + acl_len);
+
+ return xattr;
+}
+
+struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry, int type)
+{
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
+ struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter = { NULL };
+ struct bkey_s_c_xattr xattr;
+ struct posix_acl *acl = NULL;
+ struct bkey_s_c k;
+ int ret;
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
+ &hash, inode_inum(inode), &search, 0);
+ if (ret) {
+ if (!bch2_err_matches(ret, ENOENT))
+ acl = ERR_PTR(ret);
+ goto out;
+ }
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret) {
+ acl = ERR_PTR(ret);
+ goto out;
+ }
+
+ xattr = bkey_s_c_to_xattr(k);
+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
+ le16_to_cpu(xattr.v->x_val_len));
+
+ if (!IS_ERR(acl))
+ set_cached_acl(&inode->v, type, acl);
+out:
+ if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return acl;
+}
+
+int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode_u,
+ struct posix_acl *acl, int type)
+{
+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode_u);
+ int ret;
+
+ if (type == ACL_TYPE_DEFAULT &&
+ !S_ISDIR(inode_u->bi_mode))
+ return acl ? -EACCES : 0;
+
+ if (acl) {
+ struct bkey_i_xattr *xattr =
+ bch2_acl_to_xattr(trans, acl, type);
+ if (IS_ERR(xattr))
+ return PTR_ERR(xattr);
+
+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, &hash_info,
+ inum, &xattr->k_i, 0);
+ } else {
+ struct xattr_search_key search =
+ X_SEARCH(acl_to_xattr_type(type), "", 0);
+
+ ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, &hash_info,
+ inum, &search);
+ }
+
+ return bch2_err_matches(ret, ENOENT) ? 0 : ret;
+}
+
+int bch2_set_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ struct posix_acl *_acl, int type)
+{
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter inode_iter = { NULL };
+ struct bch_inode_unpacked inode_u;
+ struct posix_acl *acl;
+ umode_t mode;
+ int ret;
+
+ mutex_lock(&inode->ei_update_lock);
+retry:
+ bch2_trans_begin(trans);
+ acl = _acl;
+
+ ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
+ bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto btree_err;
+
+ mode = inode_u.bi_mode;
+
+ if (type == ACL_TYPE_ACCESS) {
+ ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl);
+ if (ret)
+ goto btree_err;
+ }
+
+ ret = bch2_set_acl_trans(trans, inode_inum(inode), &inode_u, acl, type);
+ if (ret)
+ goto btree_err;
+
+ inode_u.bi_ctime = bch2_current_time(c);
+ inode_u.bi_mode = mode;
+
+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+btree_err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+ if (unlikely(ret))
+ goto err;
+
+ bch2_inode_update_after_write(trans, inode, &inode_u,
+ ATTR_CTIME|ATTR_MODE);
+
+ set_cached_acl(&inode->v, type, acl);
+err:
+ mutex_unlock(&inode->ei_update_lock);
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode,
+ umode_t mode,
+ struct posix_acl **new_acl)
+{
+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode);
+ struct xattr_search_key search = X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0);
+ struct btree_iter iter;
+ struct bkey_s_c_xattr xattr;
+ struct bkey_i_xattr *new;
+ struct posix_acl *acl = NULL;
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
+ &hash_info, inum, &search, BTREE_ITER_INTENT);
+ if (ret)
+ return bch2_err_matches(ret, ENOENT) ? 0 : ret;
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+ xattr = bkey_s_c_to_xattr(k);
+
+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
+ le16_to_cpu(xattr.v->x_val_len));
+ ret = PTR_ERR_OR_ZERO(acl);
+ if (IS_ERR_OR_NULL(acl))
+ goto err;
+
+ ret = allocate_dropping_locks_errcode(trans,
+ __posix_acl_chmod(&acl, _gfp, mode));
+ if (ret)
+ goto err;
+
+ new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS);
+ if (IS_ERR(new)) {
+ ret = PTR_ERR(new);
+ goto err;
+ }
+
+ new->k.p = iter.pos;
+ ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
+ *new_acl = acl;
+ acl = NULL;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (!IS_ERR_OR_NULL(acl))
+ kfree(acl);
+ return ret;
+}
+
+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
diff --git a/c_src/libbcachefs/acl.h b/c_src/libbcachefs/acl.h
new file mode 100644
index 00000000..27e7eec0
--- /dev/null
+++ b/c_src/libbcachefs/acl.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ACL_H
+#define _BCACHEFS_ACL_H
+
+struct bch_inode_unpacked;
+struct bch_hash_info;
+struct bch_inode_info;
+struct posix_acl;
+
+#define BCH_ACL_VERSION 0x0001
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} bch_acl_entry;
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+} bch_acl_entry_short;
+
+typedef struct {
+ __le32 a_version;
+} bch_acl_header;
+
+void bch2_acl_to_text(struct printbuf *, const void *, size_t);
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+
+struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
+
+int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *,
+ struct posix_acl *, int);
+int bch2_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int);
+int bch2_acl_chmod(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *,
+ umode_t, struct posix_acl **);
+
+#else
+
+static inline int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode_u,
+ struct posix_acl *acl, int type)
+{
+ return 0;
+}
+
+static inline int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode,
+ umode_t mode,
+ struct posix_acl **new_acl)
+{
+ return 0;
+}
+
+#endif /* CONFIG_BCACHEFS_POSIX_ACL */
+
+#endif /* _BCACHEFS_ACL_H */
diff --git a/c_src/libbcachefs/alloc_background.c b/c_src/libbcachefs/alloc_background.c
new file mode 100644
index 00000000..a09b9d00
--- /dev/null
+++ b/c_src/libbcachefs/alloc_background.c
@@ -0,0 +1,2217 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "backpointers.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_gc.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "buckets_waiting_for_journal.h"
+#include "clock.h"
+#include "debug.h"
+#include "ec.h"
+#include "error.h"
+#include "lru.h"
+#include "recovery.h"
+#include "trace.h"
+#include "varint.h"
+
+#include <linux/kthread.h>
+#include <linux/math64.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/task.h>
+#include <linux/sort.h>
+
+/* Persistent alloc info: */
+
+static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
+#define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
+ BCH_ALLOC_FIELDS_V1()
+#undef x
+};
+
+struct bkey_alloc_unpacked {
+ u64 journal_seq;
+ u8 gen;
+ u8 oldest_gen;
+ u8 data_type;
+ bool need_discard:1;
+ bool need_inc_gen:1;
+#define x(_name, _bits) u##_bits _name;
+ BCH_ALLOC_FIELDS_V2()
+#undef x
+};
+
+static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
+ const void **p, unsigned field)
+{
+ unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field];
+ u64 v;
+
+ if (!(a->fields & (1 << field)))
+ return 0;
+
+ switch (bytes) {
+ case 1:
+ v = *((const u8 *) *p);
+ break;
+ case 2:
+ v = le16_to_cpup(*p);
+ break;
+ case 4:
+ v = le32_to_cpup(*p);
+ break;
+ case 8:
+ v = le64_to_cpup(*p);
+ break;
+ default:
+ BUG();
+ }
+
+ *p += bytes;
+ return v;
+}
+
+static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out,
+ struct bkey_s_c k)
+{
+ const struct bch_alloc *in = bkey_s_c_to_alloc(k).v;
+ const void *d = in->data;
+ unsigned idx = 0;
+
+ out->gen = in->gen;
+
+#define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
+ BCH_ALLOC_FIELDS_V1()
+#undef x
+}
+
+static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k);
+ const u8 *in = a.v->data;
+ const u8 *end = bkey_val_end(a);
+ unsigned fieldnr = 0;
+ int ret;
+ u64 v;
+
+ out->gen = a.v->gen;
+ out->oldest_gen = a.v->oldest_gen;
+ out->data_type = a.v->data_type;
+
+#define x(_name, _bits) \
+ if (fieldnr < a.v->nr_fields) { \
+ ret = bch2_varint_decode_fast(in, end, &v); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ } else { \
+ v = 0; \
+ } \
+ out->_name = v; \
+ if (v != out->_name) \
+ return -1; \
+ fieldnr++;
+
+ BCH_ALLOC_FIELDS_V2()
+#undef x
+ return 0;
+}
+
+static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k);
+ const u8 *in = a.v->data;
+ const u8 *end = bkey_val_end(a);
+ unsigned fieldnr = 0;
+ int ret;
+ u64 v;
+
+ out->gen = a.v->gen;
+ out->oldest_gen = a.v->oldest_gen;
+ out->data_type = a.v->data_type;
+ out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v);
+ out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v);
+ out->journal_seq = le64_to_cpu(a.v->journal_seq);
+
+#define x(_name, _bits) \
+ if (fieldnr < a.v->nr_fields) { \
+ ret = bch2_varint_decode_fast(in, end, &v); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ } else { \
+ v = 0; \
+ } \
+ out->_name = v; \
+ if (v != out->_name) \
+ return -1; \
+ fieldnr++;
+
+ BCH_ALLOC_FIELDS_V2()
+#undef x
+ return 0;
+}
+
+static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
+{
+ struct bkey_alloc_unpacked ret = { .gen = 0 };
+
+ switch (k.k->type) {
+ case KEY_TYPE_alloc:
+ bch2_alloc_unpack_v1(&ret, k);
+ break;
+ case KEY_TYPE_alloc_v2:
+ bch2_alloc_unpack_v2(&ret, k);
+ break;
+ case KEY_TYPE_alloc_v3:
+ bch2_alloc_unpack_v3(&ret, k);
+ break;
+ }
+
+ return ret;
+}
+
+static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
+{
+ unsigned i, bytes = offsetof(struct bch_alloc, data);
+
+ for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++)
+ if (a->fields & (1 << i))
+ bytes += BCH_ALLOC_V1_FIELD_BYTES[i];
+
+ return DIV_ROUND_UP(bytes, sizeof(u64));
+}
+
+int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
+ int ret = 0;
+
+ /* allow for unknown fields */
+ bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err,
+ alloc_v1_val_size_bad,
+ "incorrect value size (%zu < %u)",
+ bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
+fsck_err:
+ return ret;
+}
+
+int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_alloc_unpacked u;
+ int ret = 0;
+
+ bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err,
+ alloc_v2_unpack_error,
+ "unpack error");
+fsck_err:
+ return ret;
+}
+
+int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_alloc_unpacked u;
+ int ret = 0;
+
+ bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err,
+ alloc_v2_unpack_error,
+ "unpack error");
+fsck_err:
+ return ret;
+}
+
+int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err)
+{
+ struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err,
+ alloc_v4_val_size_bad,
+ "bad val size (%u > %zu)",
+ alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
+
+ bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
+ alloc_v4_backpointers_start_bad,
+ "invalid backpointers_start");
+
+ bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err,
+ alloc_key_data_type_bad,
+ "invalid data type (got %u should be %u)",
+ a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
+
+ switch (a.v->data_type) {
+ case BCH_DATA_free:
+ case BCH_DATA_need_gc_gens:
+ case BCH_DATA_need_discard:
+ bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe,
+ c, err, alloc_key_empty_but_have_data,
+ "empty data type free but have data");
+ break;
+ case BCH_DATA_sb:
+ case BCH_DATA_journal:
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ case BCH_DATA_parity:
+ bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
+ c, err, alloc_key_dirty_sectors_0,
+ "data_type %s but dirty_sectors==0",
+ bch2_data_types[a.v->data_type]);
+ break;
+ case BCH_DATA_cached:
+ bkey_fsck_err_on(!a.v->cached_sectors ||
+ bch2_bucket_sectors_dirty(*a.v) ||
+ a.v->stripe,
+ c, err, alloc_key_cached_inconsistency,
+ "data type inconsistency");
+
+ bkey_fsck_err_on(!a.v->io_time[READ] &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
+ c, err, alloc_key_cached_but_read_time_zero,
+ "cached bucket with read_time == 0");
+ break;
+ case BCH_DATA_stripe:
+ break;
+ }
+fsck_err:
+ return ret;
+}
+
+void bch2_alloc_v4_swab(struct bkey_s k)
+{
+ struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
+ struct bch_backpointer *bp, *bps;
+
+ a->journal_seq = swab64(a->journal_seq);
+ a->flags = swab32(a->flags);
+ a->dirty_sectors = swab32(a->dirty_sectors);
+ a->cached_sectors = swab32(a->cached_sectors);
+ a->io_time[0] = swab64(a->io_time[0]);
+ a->io_time[1] = swab64(a->io_time[1]);
+ a->stripe = swab32(a->stripe);
+ a->nr_external_backpointers = swab32(a->nr_external_backpointers);
+ a->fragmentation_lru = swab64(a->fragmentation_lru);
+
+ bps = alloc_v4_backpointers(a);
+ for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
+ bp->bucket_offset = swab40(bp->bucket_offset);
+ bp->bucket_len = swab32(bp->bucket_len);
+ bch2_bpos_swab(&bp->pos);
+ }
+}
+
+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
+ unsigned i;
+
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ prt_printf(out, "gen %u oldest_gen %u data_type %s",
+ a->gen, a->oldest_gen,
+ a->data_type < BCH_DATA_NR
+ ? bch2_data_types[a->data_type]
+ : "(invalid data type)");
+ prt_newline(out);
+ prt_printf(out, "journal_seq %llu", a->journal_seq);
+ prt_newline(out);
+ prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a));
+ prt_newline(out);
+ prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a));
+ prt_newline(out);
+ prt_printf(out, "dirty_sectors %u", a->dirty_sectors);
+ prt_newline(out);
+ prt_printf(out, "cached_sectors %u", a->cached_sectors);
+ prt_newline(out);
+ prt_printf(out, "stripe %u", a->stripe);
+ prt_newline(out);
+ prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy);
+ prt_newline(out);
+ prt_printf(out, "io_time[READ] %llu", a->io_time[READ]);
+ prt_newline(out);
+ prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
+ prt_newline(out);
+ prt_printf(out, "fragmentation %llu", a->fragmentation_lru);
+ prt_newline(out);
+ prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
+ prt_newline(out);
+
+ if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) {
+ struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
+ const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
+
+ prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
+ printbuf_indent_add(out, 2);
+
+ for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) {
+ prt_newline(out);
+ bch2_backpointer_to_text(out, &bps[i]);
+ }
+
+ printbuf_indent_sub(out, 2);
+ }
+
+ printbuf_indent_sub(out, 2);
+}
+
+void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
+{
+ if (k.k->type == KEY_TYPE_alloc_v4) {
+ void *src, *dst;
+
+ *out = *bkey_s_c_to_alloc_v4(k).v;
+
+ src = alloc_v4_backpointers(out);
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+ dst = alloc_v4_backpointers(out);
+
+ if (src < dst)
+ memset(src, 0, dst - src);
+
+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0);
+ } else {
+ struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
+
+ *out = (struct bch_alloc_v4) {
+ .journal_seq = u.journal_seq,
+ .flags = u.need_discard,
+ .gen = u.gen,
+ .oldest_gen = u.oldest_gen,
+ .data_type = u.data_type,
+ .stripe_redundancy = u.stripe_redundancy,
+ .dirty_sectors = u.dirty_sectors,
+ .cached_sectors = u.cached_sectors,
+ .io_time[READ] = u.read_time,
+ .io_time[WRITE] = u.write_time,
+ .stripe = u.stripe,
+ };
+
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+ }
+}
+
+static noinline struct bkey_i_alloc_v4 *
+__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bkey_i_alloc_v4 *ret;
+
+ ret = bch2_trans_kmalloc(trans, max(bkey_bytes(k.k), sizeof(struct bkey_i_alloc_v4)));
+ if (IS_ERR(ret))
+ return ret;
+
+ if (k.k->type == KEY_TYPE_alloc_v4) {
+ void *src, *dst;
+
+ bkey_reassemble(&ret->k_i, k);
+
+ src = alloc_v4_backpointers(&ret->v);
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
+ dst = alloc_v4_backpointers(&ret->v);
+
+ if (src < dst)
+ memset(src, 0, dst - src);
+
+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0);
+ set_alloc_v4_u64s(ret);
+ } else {
+ bkey_alloc_v4_init(&ret->k_i);
+ ret->k.p = k.k->p;
+ bch2_alloc_to_v4(k, &ret->v);
+ }
+ return ret;
+}
+
+static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bkey_s_c_alloc_v4 a;
+
+ if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
+ ((a = bkey_s_c_to_alloc_v4(k), true) &&
+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0))
+ return bch2_bkey_make_mut_noupdate_typed(trans, k, alloc_v4);
+
+ return __bch2_alloc_to_v4_mut(trans, k);
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+ return bch2_alloc_to_v4_mut_inlined(trans, k);
+}
+
+struct bkey_i_alloc_v4 *
+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
+ struct bpos pos)
+{
+ struct bkey_s_c k;
+ struct bkey_i_alloc_v4 *a;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos,
+ BTREE_ITER_WITH_UPDATES|
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ ret = bkey_err(k);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+
+ a = bch2_alloc_to_v4_mut_inlined(trans, k);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (unlikely(ret))
+ goto err;
+ return a;
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return ERR_PTR(ret);
+}
+
+static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset)
+{
+ *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK;
+
+ pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS;
+ return pos;
+}
+
+static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset)
+{
+ pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS;
+ pos.offset += offset;
+ return pos;
+}
+
+static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
+{
+ return k.k->type == KEY_TYPE_bucket_gens
+ ? bkey_s_c_to_bucket_gens(k).v->gens[offset]
+ : 0;
+}
+
+int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err,
+ bucket_gens_val_size_bad,
+ "bad val size (%zu != %zu)",
+ bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
+fsck_err:
+ return ret;
+}
+
+void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens(k);
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(g.v->gens); i++) {
+ if (i)
+ prt_char(out, ' ');
+ prt_printf(out, "%u", g.v->gens[i]);
+ }
+}
+
+int bch2_bucket_gens_init(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bkey_i_bucket_gens g;
+ bool have_bucket_gens_key = false;
+ int ret;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ /*
+ * Not a fsck error because this is checked/repaired by
+ * bch2_check_alloc_key() which runs later:
+ */
+ if (!bch2_dev_bucket_exists(c, k.k->p))
+ continue;
+
+ struct bch_alloc_v4 a;
+ u8 gen = bch2_alloc_to_v4(k, &a)->gen;
+ unsigned offset;
+ struct bpos pos = alloc_gens_pos(iter.pos, &offset);
+
+ if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
+ if (ret)
+ break;
+ have_bucket_gens_key = false;
+ }
+
+ if (!have_bucket_gens_key) {
+ bkey_bucket_gens_init(&g.k_i);
+ g.k.p = pos;
+ have_bucket_gens_key = true;
+ }
+
+ g.v.gens[offset] = gen;
+ 0;
+ }));
+
+ if (have_bucket_gens_key && !ret)
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
+
+ bch2_trans_put(trans);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_alloc_read(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret;
+
+ down_read(&c->gc_lock);
+
+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
+ ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
+ u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
+
+ if (k.k->type != KEY_TYPE_bucket_gens)
+ continue;
+
+ const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v;
+
+ /*
+ * Not a fsck error because this is checked/repaired by
+ * bch2_check_alloc_key() which runs later:
+ */
+ if (!bch2_dev_exists2(c, k.k->p.inode))
+ continue;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode);
+
+ for (u64 b = max_t(u64, ca->mi.first_bucket, start);
+ b < min_t(u64, ca->mi.nbuckets, end);
+ b++)
+ *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
+ 0;
+ }));
+ } else {
+ ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ /*
+ * Not a fsck error because this is checked/repaired by
+ * bch2_check_alloc_key() which runs later:
+ */
+ if (!bch2_dev_bucket_exists(c, k.k->p))
+ continue;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode);
+
+ struct bch_alloc_v4 a;
+ *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
+ 0;
+ }));
+ }
+
+ bch2_trans_put(trans);
+ up_read(&c->gc_lock);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/* Free space/discard btree: */
+
+static int bch2_bucket_do_index(struct btree_trans *trans,
+ struct bkey_s_c alloc_k,
+ const struct bch_alloc_v4 *a,
+ bool set)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode);
+ struct btree_iter iter;
+ struct bkey_s_c old;
+ struct bkey_i *k;
+ enum btree_id btree;
+ enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted;
+ enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ if (a->data_type != BCH_DATA_free &&
+ a->data_type != BCH_DATA_need_discard)
+ return 0;
+
+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
+ if (IS_ERR(k))
+ return PTR_ERR(k);
+
+ bkey_init(&k->k);
+ k->k.type = new_type;
+
+ switch (a->data_type) {
+ case BCH_DATA_free:
+ btree = BTREE_ID_freespace;
+ k->k.p = alloc_freespace_pos(alloc_k.k->p, *a);
+ bch2_key_resize(&k->k, 1);
+ break;
+ case BCH_DATA_need_discard:
+ btree = BTREE_ID_need_discard;
+ k->k.p = alloc_k.k->p;
+ break;
+ default:
+ return 0;
+ }
+
+ old = bch2_bkey_get_iter(trans, &iter, btree,
+ bkey_start_pos(&k->k),
+ BTREE_ITER_INTENT);
+ ret = bkey_err(old);
+ if (ret)
+ return ret;
+
+ if (ca->mi.freespace_initialized &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info &&
+ bch2_trans_inconsistent_on(old.k->type != old_type, trans,
+ "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n"
+ " for %s",
+ set ? "setting" : "clearing",
+ bch2_btree_id_str(btree),
+ iter.pos.inode,
+ iter.pos.offset,
+ bch2_bkey_types[old.k->type],
+ bch2_bkey_types[old_type],
+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ ret = -EIO;
+ goto err;
+ }
+
+ ret = bch2_trans_update(trans, &iter, k, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
+ struct bpos bucket, u8 gen)
+{
+ struct btree_iter iter;
+ unsigned offset;
+ struct bpos pos = alloc_gens_pos(bucket, &offset);
+ struct bkey_i_bucket_gens *g;
+ struct bkey_s_c k;
+ int ret;
+
+ g = bch2_trans_kmalloc(trans, sizeof(*g));
+ ret = PTR_ERR_OR_ZERO(g);
+ if (ret)
+ return ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_WITH_UPDATES);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (k.k->type != KEY_TYPE_bucket_gens) {
+ bkey_bucket_gens_init(&g->k_i);
+ g->k.p = iter.pos;
+ } else {
+ bkey_reassemble(&g->k_i, k);
+ }
+
+ g->v.gens[offset] = gen;
+
+ ret = bch2_trans_update(trans, &iter, &g->k_i, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_trigger_alloc(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
+ "alloc key for invalid device or bucket"))
+ return -EIO;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
+
+ struct bch_alloc_v4 old_a_convert;
+ const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
+
+ new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
+
+ if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+ new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
+ SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
+ }
+
+ if (data_type_is_empty(new_a->data_type) &&
+ BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
+ !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
+ new_a->gen++;
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
+ }
+
+ if (old_a->data_type != new_a->data_type ||
+ (new_a->data_type == BCH_DATA_free &&
+ alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
+ ret = bch2_bucket_do_index(trans, old, old_a, false) ?:
+ bch2_bucket_do_index(trans, new.s_c, new_a, true);
+ if (ret)
+ return ret;
+ }
+
+ if (new_a->data_type == BCH_DATA_cached &&
+ !new_a->io_time[READ])
+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+
+ u64 old_lru = alloc_lru_idx_read(*old_a);
+ u64 new_lru = alloc_lru_idx_read(*new_a);
+ if (old_lru != new_lru) {
+ ret = bch2_lru_change(trans, new.k->p.inode,
+ bucket_to_u64(new.k->p),
+ old_lru, new_lru);
+ if (ret)
+ return ret;
+ }
+
+ new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
+ bch_dev_bkey_exists(c, new.k->p.inode));
+ if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
+ ret = bch2_lru_change(trans,
+ BCH_LRU_FRAGMENTATION_START,
+ bucket_to_u64(new.k->p),
+ old_a->fragmentation_lru, new_a->fragmentation_lru);
+ if (ret)
+ return ret;
+ }
+
+ if (old_a->gen != new_a->gen) {
+ ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * need to know if we're getting called from the invalidate path or
+ * not:
+ */
+
+ if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
+ old_a->cached_sectors) {
+ ret = bch2_update_cached_sectors_list(trans, new.k->p.inode,
+ -((s64) old_a->cached_sectors));
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+ struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
+ u64 journal_seq = trans->journal_res.seq;
+ u64 bucket_journal_seq = new_a->journal_seq;
+
+ if ((flags & BTREE_TRIGGER_INSERT) &&
+ data_type_is_empty(old_a->data_type) !=
+ data_type_is_empty(new_a->data_type) &&
+ new.k->type == KEY_TYPE_alloc_v4) {
+ struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v;
+
+ /*
+ * If the btree updates referring to a bucket weren't flushed
+ * before the bucket became empty again, then the we don't have
+ * to wait on a journal flush before we can reuse the bucket:
+ */
+ v->journal_seq = bucket_journal_seq =
+ data_type_is_empty(new_a->data_type) &&
+ (journal_seq == v->journal_seq ||
+ bch2_journal_noflush_seq(&c->journal, v->journal_seq))
+ ? 0 : journal_seq;
+ }
+
+ if (!data_type_is_empty(old_a->data_type) &&
+ data_type_is_empty(new_a->data_type) &&
+ bucket_journal_seq) {
+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk,
+ new.k->p.inode, new.k->p.offset,
+ bucket_journal_seq);
+ if (ret) {
+ bch2_fs_fatal_error(c,
+ "error setting bucket_needs_journal_commit: %i", ret);
+ return ret;
+ }
+ }
+
+ percpu_down_read(&c->mark_lock);
+ if (new_a->gen != old_a->gen)
+ *bucket_gen(ca, new.k->p.offset) = new_a->gen;
+
+ bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
+
+ if (new_a->data_type == BCH_DATA_free &&
+ (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
+ closure_wake_up(&c->freelist_wait);
+
+ if (new_a->data_type == BCH_DATA_need_discard &&
+ (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
+ bch2_do_discards(c);
+
+ if (old_a->data_type != BCH_DATA_cached &&
+ new_a->data_type == BCH_DATA_cached &&
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+ bch2_do_invalidates(c);
+
+ if (new_a->data_type == BCH_DATA_need_gc_gens)
+ bch2_do_gc_gens(c);
+ percpu_up_read(&c->mark_lock);
+ }
+
+ if ((flags & BTREE_TRIGGER_GC) &&
+ (flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) {
+ struct bch_alloc_v4 new_a_convert;
+ const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);
+
+ percpu_down_read(&c->mark_lock);
+ struct bucket *g = gc_bucket(ca, new.k->p.offset);
+
+ bucket_lock(g);
+
+ g->gen_valid = 1;
+ g->gen = new_a->gen;
+ g->data_type = new_a->data_type;
+ g->stripe = new_a->stripe;
+ g->stripe_redundancy = new_a->stripe_redundancy;
+ g->dirty_sectors = new_a->dirty_sectors;
+ g->cached_sectors = new_a->cached_sectors;
+
+ bucket_unlock(g);
+ percpu_up_read(&c->mark_lock);
+ }
+
+ return 0;
+}
+
+/*
+ * This synthesizes deleted extents for holes, similar to BTREE_ITER_SLOTS for
+ * extents style btrees, but works on non-extents btrees:
+ */
+static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole)
+{
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
+
+ if (bkey_err(k))
+ return k;
+
+ if (k.k->type) {
+ return k;
+ } else {
+ struct btree_iter iter2;
+ struct bpos next;
+
+ bch2_trans_copy_iter(&iter2, iter);
+
+ struct btree_path *path = btree_iter_path(iter->trans, iter);
+ if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX))
+ end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p));
+
+ end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1));
+
+ /*
+ * btree node min/max is a closed interval, upto takes a half
+ * open interval:
+ */
+ k = bch2_btree_iter_peek_upto(&iter2, end);
+ next = iter2.pos;
+ bch2_trans_iter_exit(iter->trans, &iter2);
+
+ BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
+
+ if (bkey_err(k))
+ return k;
+
+ bkey_init(hole);
+ hole->p = iter->pos;
+
+ bch2_key_resize(hole, next.offset - iter->pos.offset);
+ return (struct bkey_s_c) { hole, NULL };
+ }
+}
+
+static bool next_bucket(struct bch_fs *c, struct bpos *bucket)
+{
+ struct bch_dev *ca;
+
+ if (bch2_dev_bucket_exists(c, *bucket))
+ return true;
+
+ if (bch2_dev_exists2(c, bucket->inode)) {
+ ca = bch_dev_bkey_exists(c, bucket->inode);
+
+ if (bucket->offset < ca->mi.first_bucket) {
+ bucket->offset = ca->mi.first_bucket;
+ return true;
+ }
+
+ bucket->inode++;
+ bucket->offset = 0;
+ }
+
+ rcu_read_lock();
+ ca = __bch2_next_dev_idx(c, bucket->inode, NULL);
+ if (ca)
+ *bucket = POS(ca->dev_idx, ca->mi.first_bucket);
+ rcu_read_unlock();
+
+ return ca != NULL;
+}
+
+static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, struct bkey *hole)
+{
+ struct bch_fs *c = iter->trans->c;
+ struct bkey_s_c k;
+again:
+ k = bch2_get_key_or_hole(iter, POS_MAX, hole);
+ if (bkey_err(k))
+ return k;
+
+ if (!k.k->type) {
+ struct bpos bucket = bkey_start_pos(k.k);
+
+ if (!bch2_dev_bucket_exists(c, bucket)) {
+ if (!next_bucket(c, &bucket))
+ return bkey_s_c_null;
+
+ bch2_btree_iter_set_pos(iter, bucket);
+ goto again;
+ }
+
+ if (!bch2_dev_bucket_exists(c, k.k->p)) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
+
+ bch2_key_resize(hole, ca->mi.nbuckets - bucket.offset);
+ }
+ }
+
+ return k;
+}
+
+static noinline_for_stack
+int bch2_check_alloc_key(struct btree_trans *trans,
+ struct bkey_s_c alloc_k,
+ struct btree_iter *alloc_iter,
+ struct btree_iter *discard_iter,
+ struct btree_iter *freespace_iter,
+ struct btree_iter *bucket_gens_iter)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ unsigned discard_key_type, freespace_key_type;
+ unsigned gens_offset;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
+ alloc_key_to_missing_dev_bucket,
+ "alloc key for invalid device:bucket %llu:%llu",
+ alloc_k.k->p.inode, alloc_k.k->p.offset))
+ return bch2_btree_delete_at(trans, alloc_iter, 0);
+
+ ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode);
+ if (!ca->mi.freespace_initialized)
+ return 0;
+
+ a = bch2_alloc_to_v4(alloc_k, &a_convert);
+
+ discard_key_type = a->data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0;
+ bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p);
+ k = bch2_btree_iter_peek_slot(discard_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != discard_key_type &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, need_discard_key_wrong,
+ "incorrect key in need_discard btree (got %s should be %s)\n"
+ " %s",
+ bch2_bkey_types[k.k->type],
+ bch2_bkey_types[discard_key_type],
+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
+ struct bkey_i *update =
+ bch2_trans_kmalloc(trans, sizeof(*update));
+
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ goto err;
+
+ bkey_init(&update->k);
+ update->k.type = discard_key_type;
+ update->k.p = discard_iter->pos;
+
+ ret = bch2_trans_update(trans, discard_iter, update, 0);
+ if (ret)
+ goto err;
+ }
+
+ freespace_key_type = a->data_type == BCH_DATA_free ? KEY_TYPE_set : 0;
+ bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
+ k = bch2_btree_iter_peek_slot(freespace_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != freespace_key_type &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, freespace_key_wrong,
+ "incorrect key in freespace btree (got %s should be %s)\n"
+ " %s",
+ bch2_bkey_types[k.k->type],
+ bch2_bkey_types[freespace_key_type],
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
+ struct bkey_i *update =
+ bch2_trans_kmalloc(trans, sizeof(*update));
+
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ goto err;
+
+ bkey_init(&update->k);
+ update->k.type = freespace_key_type;
+ update->k.p = freespace_iter->pos;
+ bch2_key_resize(&update->k, 1);
+
+ ret = bch2_trans_update(trans, freespace_iter, update, 0);
+ if (ret)
+ goto err;
+ }
+
+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
+ k = bch2_btree_iter_peek_slot(bucket_gens_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (a->gen != alloc_gen(k, gens_offset) &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, bucket_gens_key_wrong,
+ "incorrect gen in bucket_gens btree (got %u should be %u)\n"
+ " %s",
+ alloc_gen(k, gens_offset), a->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
+ struct bkey_i_bucket_gens *g =
+ bch2_trans_kmalloc(trans, sizeof(*g));
+
+ ret = PTR_ERR_OR_ZERO(g);
+ if (ret)
+ goto err;
+
+ if (k.k->type == KEY_TYPE_bucket_gens) {
+ bkey_reassemble(&g->k_i, k);
+ } else {
+ bkey_bucket_gens_init(&g->k_i);
+ g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset);
+ }
+
+ g->v.gens[gens_offset] = a->gen;
+
+ ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0);
+ if (ret)
+ goto err;
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static noinline_for_stack
+int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
+ struct bpos start,
+ struct bpos *end,
+ struct btree_iter *freespace_iter)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ ca = bch_dev_bkey_exists(c, start.inode);
+ if (!ca->mi.freespace_initialized)
+ return 0;
+
+ bch2_btree_iter_set_pos(freespace_iter, start);
+
+ k = bch2_btree_iter_peek_slot(freespace_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ *end = bkey_min(k.k->p, *end);
+
+ if (k.k->type != KEY_TYPE_set &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, freespace_hole_missing,
+ "hole in alloc btree missing in freespace btree\n"
+ " device %llu buckets %llu-%llu",
+ freespace_iter->pos.inode,
+ freespace_iter->pos.offset,
+ end->offset))) {
+ struct bkey_i *update =
+ bch2_trans_kmalloc(trans, sizeof(*update));
+
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ goto err;
+
+ bkey_init(&update->k);
+ update->k.type = KEY_TYPE_set;
+ update->k.p = freespace_iter->pos;
+ bch2_key_resize(&update->k,
+ min_t(u64, U32_MAX, end->offset -
+ freespace_iter->pos.offset));
+
+ ret = bch2_trans_update(trans, freespace_iter, update, 0);
+ if (ret)
+ goto err;
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static noinline_for_stack
+int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
+ struct bpos start,
+ struct bpos *end,
+ struct btree_iter *bucket_gens_iter)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ unsigned i, gens_offset, gens_end_offset;
+ int ret;
+
+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
+
+ k = bch2_btree_iter_peek_slot(bucket_gens_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (bkey_cmp(alloc_gens_pos(start, &gens_offset),
+ alloc_gens_pos(*end, &gens_end_offset)))
+ gens_end_offset = KEY_TYPE_BUCKET_GENS_NR;
+
+ if (k.k->type == KEY_TYPE_bucket_gens) {
+ struct bkey_i_bucket_gens g;
+ bool need_update = false;
+
+ bkey_reassemble(&g.k_i, k);
+
+ for (i = gens_offset; i < gens_end_offset; i++) {
+ if (fsck_err_on(g.v.gens[i], c,
+ bucket_gens_hole_wrong,
+ "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
+ bucket_gens_pos_to_alloc(k.k->p, i).inode,
+ bucket_gens_pos_to_alloc(k.k->p, i).offset,
+ g.v.gens[i])) {
+ g.v.gens[i] = 0;
+ need_update = true;
+ }
+ }
+
+ if (need_update) {
+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
+
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ memcpy(u, &g, sizeof(g));
+
+ ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
+ if (ret)
+ goto err;
+ }
+ }
+
+ *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0));
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_trans *trans,
+ struct btree_iter *iter)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter alloc_iter;
+ struct bkey_s_c alloc_k;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ u64 genbits;
+ struct bpos pos;
+ enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
+ ? BCH_DATA_need_discard
+ : BCH_DATA_free;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ pos = iter->pos;
+ pos.offset &= ~(~0ULL << 56);
+ genbits = iter->pos.offset & (~0ULL << 56);
+
+ alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, pos, 0);
+ ret = bkey_err(alloc_k);
+ if (ret)
+ return ret;
+
+ if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
+ need_discard_freespace_key_to_invalid_dev_bucket,
+ "entry in %s btree for nonexistant dev:bucket %llu:%llu",
+ bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset))
+ goto delete;
+
+ a = bch2_alloc_to_v4(alloc_k, &a_convert);
+
+ if (fsck_err_on(a->data_type != state ||
+ (state == BCH_DATA_free &&
+ genbits != alloc_freespace_genbits(*a)), c,
+ need_discard_freespace_key_bad,
+ "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
+ bch2_btree_id_str(iter->btree_id),
+ iter->pos.inode,
+ iter->pos.offset,
+ a->data_type == state,
+ genbits >> 56, alloc_freespace_genbits(*a) >> 56))
+ goto delete;
+out:
+fsck_err:
+ set_btree_iter_dontneed(&alloc_iter);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ printbuf_exit(&buf);
+ return ret;
+delete:
+ ret = bch2_btree_delete_extent_at(trans, iter,
+ iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+ goto out;
+}
+
+/*
+ * We've already checked that generation numbers in the bucket_gens btree are
+ * valid for buckets that exist; this just checks for keys for nonexistent
+ * buckets.
+ */
+static noinline_for_stack
+int bch2_check_bucket_gens_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i_bucket_gens g;
+ struct bch_dev *ca;
+ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
+ u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
+ u64 b;
+ bool need_update = false, dev_exists;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ BUG_ON(k.k->type != KEY_TYPE_bucket_gens);
+ bkey_reassemble(&g.k_i, k);
+
+ /* if no bch_dev, skip out whether we repair or not */
+ dev_exists = bch2_dev_exists2(c, k.k->p.inode);
+ if (!dev_exists) {
+ if (fsck_err_on(!dev_exists, c,
+ bucket_gens_to_invalid_dev,
+ "bucket_gens key for invalid device:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter, 0);
+ }
+ goto out;
+ }
+
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ if (fsck_err_on(end <= ca->mi.first_bucket ||
+ start >= ca->mi.nbuckets, c,
+ bucket_gens_to_invalid_buckets,
+ "bucket_gens key for invalid buckets:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter, 0);
+ goto out;
+ }
+
+ for (b = start; b < ca->mi.first_bucket; b++)
+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
+ bucket_gens_nonzero_for_invalid_buckets,
+ "bucket_gens key has nonzero gen for invalid bucket")) {
+ g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
+ need_update = true;
+ }
+
+ for (b = ca->mi.nbuckets; b < end; b++)
+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
+ bucket_gens_nonzero_for_invalid_buckets,
+ "bucket_gens key has nonzero gen for invalid bucket")) {
+ g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
+ need_update = true;
+ }
+
+ if (need_update) {
+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
+
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto out;
+
+ memcpy(u, &g, sizeof(g));
+ ret = bch2_trans_update(trans, iter, u, 0);
+ }
+out:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_alloc_info(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
+ struct bkey hole;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH);
+ bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
+ BTREE_ITER_PREFETCH);
+ bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
+ BTREE_ITER_PREFETCH);
+ bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
+ BTREE_ITER_PREFETCH);
+
+ while (1) {
+ struct bpos next;
+
+ bch2_trans_begin(trans);
+
+ k = bch2_get_key_or_real_bucket_hole(&iter, &hole);
+ ret = bkey_err(k);
+ if (ret)
+ goto bkey_err;
+
+ if (!k.k)
+ break;
+
+ if (k.k->type) {
+ next = bpos_nosnap_successor(k.k->p);
+
+ ret = bch2_check_alloc_key(trans,
+ k, &iter,
+ &discard_iter,
+ &freespace_iter,
+ &bucket_gens_iter);
+ if (ret)
+ goto bkey_err;
+ } else {
+ next = k.k->p;
+
+ ret = bch2_check_alloc_hole_freespace(trans,
+ bkey_start_pos(k.k),
+ &next,
+ &freespace_iter) ?:
+ bch2_check_alloc_hole_bucket_gens(trans,
+ bkey_start_pos(k.k),
+ &next,
+ &bucket_gens_iter);
+ if (ret)
+ goto bkey_err;
+ }
+
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto bkey_err;
+
+ bch2_btree_iter_set_pos(&iter, next);
+bkey_err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &bucket_gens_iter);
+ bch2_trans_iter_exit(trans, &freespace_iter);
+ bch2_trans_iter_exit(trans, &discard_iter);
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret < 0)
+ goto err;
+
+ ret = for_each_btree_key(trans, iter,
+ BTREE_ID_need_discard, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ bch2_check_discard_freespace_key(trans, &iter));
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN,
+ BTREE_ITER_PREFETCH);
+ while (1) {
+ bch2_trans_begin(trans);
+ k = bch2_btree_iter_peek(&iter);
+ if (!k.k)
+ break;
+
+ ret = bkey_err(k) ?:
+ bch2_check_discard_freespace_key(trans, &iter);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ ret = 0;
+ continue;
+ }
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ bch_err(c, "while checking %s", buf.buf);
+ printbuf_exit(&buf);
+ break;
+ }
+
+ bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
+ }
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret)
+ goto err;
+
+ ret = for_each_btree_key_commit(trans, iter,
+ BTREE_ID_bucket_gens, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_check_bucket_gens_key(trans, &iter, k));
+err:
+ bch2_trans_put(trans);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
+ struct btree_iter *alloc_iter)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter lru_iter;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ struct bkey_s_c alloc_k, lru_k;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ alloc_k = bch2_btree_iter_peek(alloc_iter);
+ if (!alloc_k.k)
+ return 0;
+
+ ret = bkey_err(alloc_k);
+ if (ret)
+ return ret;
+
+ a = bch2_alloc_to_v4(alloc_k, &a_convert);
+
+ if (a->data_type != BCH_DATA_cached)
+ return 0;
+
+ if (fsck_err_on(!a->io_time[READ], c,
+ alloc_key_cached_but_read_time_zero,
+ "cached bucket with read_time 0\n"
+ " %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ struct bkey_i_alloc_v4 *a_mut =
+ bch2_alloc_to_v4_mut(trans, alloc_k);
+ ret = PTR_ERR_OR_ZERO(a_mut);
+ if (ret)
+ goto err;
+
+ a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+ ret = bch2_trans_update(trans, alloc_iter,
+ &a_mut->k_i, BTREE_TRIGGER_NORUN);
+ if (ret)
+ goto err;
+
+ a = &a_mut->v;
+ }
+
+ lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
+ lru_pos(alloc_k.k->p.inode,
+ bucket_to_u64(alloc_k.k->p),
+ a->io_time[READ]), 0);
+ ret = bkey_err(lru_k);
+ if (ret)
+ return ret;
+
+ if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
+ alloc_key_to_missing_lru_entry,
+ "missing lru entry\n"
+ " %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ ret = bch2_lru_set(trans,
+ alloc_k.k->p.inode,
+ bucket_to_u64(alloc_k.k->p),
+ a->io_time[READ]);
+ if (ret)
+ goto err;
+ }
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &lru_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
+ POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_check_alloc_to_lru_ref(trans, &iter)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int bch2_discard_one_bucket(struct btree_trans *trans,
+ struct btree_iter *need_discard_iter,
+ struct bpos *discard_pos_done,
+ u64 *seen,
+ u64 *open,
+ u64 *need_journal_commit,
+ u64 *discarded)
+{
+ struct bch_fs *c = trans->c;
+ struct bpos pos = need_discard_iter->pos;
+ struct btree_iter iter = { NULL };
+ struct bkey_s_c k;
+ struct bch_dev *ca;
+ struct bkey_i_alloc_v4 *a;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ ca = bch_dev_bkey_exists(c, pos.inode);
+ if (!percpu_ref_tryget(&ca->io_ref)) {
+ bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
+ return 0;
+ }
+
+ if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
+ (*open)++;
+ goto out;
+ }
+
+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk,
+ pos.inode, pos.offset)) {
+ (*need_journal_commit)++;
+ goto out;
+ }
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
+ need_discard_iter->pos,
+ BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret)
+ goto out;
+
+ a = bch2_alloc_to_v4_mut(trans, k);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ goto out;
+
+ if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
+ a->v.gen++;
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+ goto write;
+ }
+
+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
+ bch2_trans_inconsistent(trans,
+ "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
+ "%s",
+ a->v.journal_seq,
+ c->journal.flushed_seq_ondisk,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ }
+ goto out;
+ }
+
+ if (a->v.data_type != BCH_DATA_need_discard) {
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
+ bch2_trans_inconsistent(trans,
+ "bucket incorrectly set in need_discard btree\n"
+ "%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ }
+
+ goto out;
+ }
+
+ if (!bkey_eq(*discard_pos_done, iter.pos) &&
+ ca->mi.discard && !c->opts.nochanges) {
+ /*
+ * This works without any other locks because this is the only
+ * thread that removes items from the need_discard tree
+ */
+ bch2_trans_unlock(trans);
+ blkdev_issue_discard(ca->disk_sb.bdev,
+ k.k->p.offset * ca->mi.bucket_size,
+ ca->mi.bucket_size,
+ GFP_KERNEL);
+ *discard_pos_done = iter.pos;
+
+ ret = bch2_trans_relock_notrace(trans);
+ if (ret)
+ goto out;
+ }
+
+ SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
+ a->v.data_type = alloc_data_type(a->v, a->v.data_type);
+write:
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto out;
+
+ count_event(c, bucket_discard);
+ (*discarded)++;
+out:
+ (*seen)++;
+ bch2_trans_iter_exit(trans, &iter);
+ percpu_ref_put(&ca->io_ref);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static void bch2_do_discards_work(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
+ u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
+ struct bpos discard_pos_done = POS_MAX;
+ int ret;
+
+ /*
+ * We're doing the commit in bch2_discard_one_bucket instead of using
+ * for_each_btree_key_commit() so that we can increment counters after
+ * successful commit:
+ */
+ ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter,
+ BTREE_ID_need_discard, POS_MIN, 0, k,
+ bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
+ &seen,
+ &open,
+ &need_journal_commit,
+ &discarded)));
+
+ if (need_journal_commit * 2 > seen)
+ bch2_journal_flush_async(&c->journal, NULL);
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+
+ trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
+ bch2_err_str(ret));
+}
+
+void bch2_do_discards(struct bch_fs *c)
+{
+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
+ !queue_work(c->write_ref_wq, &c->discard_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+}
+
+static int invalidate_one_bucket(struct btree_trans *trans,
+ struct btree_iter *lru_iter,
+ struct bkey_s_c lru_k,
+ s64 *nr_to_invalidate)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter alloc_iter = { NULL };
+ struct bkey_i_alloc_v4 *a = NULL;
+ struct printbuf buf = PRINTBUF;
+ struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
+ unsigned cached_sectors;
+ int ret = 0;
+
+ if (*nr_to_invalidate <= 0)
+ return 1;
+
+ if (!bch2_dev_bucket_exists(c, bucket)) {
+ prt_str(&buf, "lru entry points to invalid bucket");
+ goto err;
+ }
+
+ if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
+ return 0;
+
+ a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ goto out;
+
+ /* We expect harmless races here due to the btree write buffer: */
+ if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
+ goto out;
+
+ BUG_ON(a->v.data_type != BCH_DATA_cached);
+
+ if (!a->v.cached_sectors)
+ bch_err(c, "invalidating empty bucket, confused");
+
+ cached_sectors = a->v.cached_sectors;
+
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+ a->v.gen++;
+ a->v.data_type = 0;
+ a->v.dirty_sectors = 0;
+ a->v.cached_sectors = 0;
+ a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+ a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
+
+ ret = bch2_trans_update(trans, &alloc_iter, &a->k_i,
+ BTREE_TRIGGER_BUCKET_INVALIDATE) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto out;
+
+ trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors);
+ --*nr_to_invalidate;
+out:
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ printbuf_exit(&buf);
+ return ret;
+err:
+ prt_str(&buf, "\n lru key: ");
+ bch2_bkey_val_to_text(&buf, c, lru_k);
+
+ prt_str(&buf, "\n lru entry: ");
+ bch2_lru_pos_to_text(&buf, lru_iter->pos);
+
+ prt_str(&buf, "\n alloc key: ");
+ if (!a)
+ bch2_bpos_to_text(&buf, bucket);
+ else
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
+
+ bch_err(c, "%s", buf.buf);
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) {
+ bch2_inconsistent_error(c);
+ ret = -EINVAL;
+ }
+
+ goto out;
+}
+
+static void bch2_do_invalidates_work(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret = 0;
+
+ ret = bch2_btree_write_buffer_tryflush(trans);
+ if (ret)
+ goto err;
+
+ for_each_member_device(c, ca) {
+ s64 nr_to_invalidate =
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+
+ ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
+ lru_pos(ca->dev_idx, 0, 0),
+ lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
+ BTREE_ITER_INTENT, k,
+ invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
+
+ if (ret < 0) {
+ percpu_ref_put(&ca->ref);
+ break;
+ }
+ }
+err:
+ bch2_trans_put(trans);
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+}
+
+void bch2_do_invalidates(struct bch_fs *c)
+{
+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
+ !queue_work(c->write_ref_wq, &c->invalidate_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+}
+
+int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
+ u64 bucket_start, u64 bucket_end)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey hole;
+ struct bpos end = POS(ca->dev_idx, bucket_end);
+ struct bch_member *m;
+ unsigned long last_updated = jiffies;
+ int ret;
+
+ BUG_ON(bucket_start > bucket_end);
+ BUG_ON(bucket_end > ca->mi.nbuckets);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)),
+ BTREE_ITER_PREFETCH);
+ /*
+ * Scan the alloc btree for every bucket on @ca, and add buckets to the
+ * freespace/need_discard/need_gc_gens btrees as needed:
+ */
+ while (1) {
+ if (last_updated + HZ * 10 < jiffies) {
+ bch_info(ca, "%s: currently at %llu/%llu",
+ __func__, iter.pos.offset, ca->mi.nbuckets);
+ last_updated = jiffies;
+ }
+
+ bch2_trans_begin(trans);
+
+ if (bkey_ge(iter.pos, end)) {
+ ret = 0;
+ break;
+ }
+
+ k = bch2_get_key_or_hole(&iter, end, &hole);
+ ret = bkey_err(k);
+ if (ret)
+ goto bkey_err;
+
+ if (k.k->type) {
+ /*
+ * We process live keys in the alloc btree one at a
+ * time:
+ */
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
+
+ ret = bch2_bucket_do_index(trans, k, a, true) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto bkey_err;
+
+ bch2_btree_iter_advance(&iter);
+ } else {
+ struct bkey_i *freespace;
+
+ freespace = bch2_trans_kmalloc(trans, sizeof(*freespace));
+ ret = PTR_ERR_OR_ZERO(freespace);
+ if (ret)
+ goto bkey_err;
+
+ bkey_init(&freespace->k);
+ freespace->k.type = KEY_TYPE_set;
+ freespace->k.p = k.k->p;
+ freespace->k.size = k.k->size;
+
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto bkey_err;
+
+ bch2_btree_iter_set_pos(&iter, k.k->p);
+ }
+bkey_err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+
+ if (ret < 0) {
+ bch_err_msg(ca, ret, "initializing free space");
+ return ret;
+ }
+
+ mutex_lock(&c->sb_lock);
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+int bch2_fs_freespace_init(struct bch_fs *c)
+{
+ int ret = 0;
+ bool doing_init = false;
+
+ /*
+ * We can crash during the device add path, so we need to check this on
+ * every mount:
+ */
+
+ for_each_member_device(c, ca) {
+ if (ca->mi.freespace_initialized)
+ continue;
+
+ if (!doing_init) {
+ bch_info(c, "initializing freespace");
+ doing_init = true;
+ }
+
+ ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
+ if (ret) {
+ percpu_ref_put(&ca->ref);
+ bch_err_fn(c, ret);
+ return ret;
+ }
+ }
+
+ if (doing_init) {
+ mutex_lock(&c->sb_lock);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ bch_verbose(c, "done initializing freespace");
+ }
+
+ return 0;
+}
+
+/* Bucket IO clocks: */
+
+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
+ size_t bucket_nr, int rw)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_i_alloc_v4 *a;
+ u64 now;
+ int ret = 0;
+
+ a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr));
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
+
+ now = atomic64_read(&c->io_clock[rw].now);
+ if (a->v.io_time[rw] == now)
+ goto out;
+
+ a->v.io_time[rw] = now;
+
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* Startup/shutdown (ro/rw): */
+
+void bch2_recalc_capacity(struct bch_fs *c)
+{
+ u64 capacity = 0, reserved_sectors = 0, gc_reserve;
+ unsigned bucket_size_max = 0;
+ unsigned long ra_pages = 0;
+
+ lockdep_assert_held(&c->state_lock);
+
+ for_each_online_member(c, ca) {
+ struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
+
+ ra_pages += bdi->ra_pages;
+ }
+
+ bch2_set_ra_pages(c, ra_pages);
+
+ for_each_rw_member(c, ca) {
+ u64 dev_reserve = 0;
+
+ /*
+ * We need to reserve buckets (from the number
+ * of currently available buckets) against
+ * foreground writes so that mainly copygc can
+ * make forward progress.
+ *
+ * We need enough to refill the various reserves
+ * from scratch - copygc will use its entire
+ * reserve all at once, then run against when
+ * its reserve is refilled (from the formerly
+ * available buckets).
+ *
+ * This reserve is just used when considering if
+ * allocations for foreground writes must wait -
+ * not -ENOSPC calculations.
+ */
+
+ dev_reserve += ca->nr_btree_reserve * 2;
+ dev_reserve += ca->mi.nbuckets >> 6; /* copygc reserve */
+
+ dev_reserve += 1; /* btree write point */
+ dev_reserve += 1; /* copygc write point */
+ dev_reserve += 1; /* rebalance write point */
+
+ dev_reserve *= ca->mi.bucket_size;
+
+ capacity += bucket_to_sector(ca, ca->mi.nbuckets -
+ ca->mi.first_bucket);
+
+ reserved_sectors += dev_reserve * 2;
+
+ bucket_size_max = max_t(unsigned, bucket_size_max,
+ ca->mi.bucket_size);
+ }
+
+ gc_reserve = c->opts.gc_reserve_bytes
+ ? c->opts.gc_reserve_bytes >> 9
+ : div64_u64(capacity * c->opts.gc_reserve_percent, 100);
+
+ reserved_sectors = max(gc_reserve, reserved_sectors);
+
+ reserved_sectors = min(reserved_sectors, capacity);
+
+ c->capacity = capacity - reserved_sectors;
+
+ c->bucket_size_max = bucket_size_max;
+
+ /* Wake up case someone was waiting for buckets */
+ closure_wake_up(&c->freelist_wait);
+}
+
+u64 bch2_min_rw_member_capacity(struct bch_fs *c)
+{
+ u64 ret = U64_MAX;
+
+ for_each_rw_member(c, ca)
+ ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
+ return ret;
+}
+
+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
+{
+ struct open_bucket *ob;
+ bool ret = false;
+
+ for (ob = c->open_buckets;
+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
+ ob++) {
+ spin_lock(&ob->lock);
+ if (ob->valid && !ob->on_partial_list &&
+ ob->dev == ca->dev_idx)
+ ret = true;
+ spin_unlock(&ob->lock);
+ }
+
+ return ret;
+}
+
+/* device goes ro: */
+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
+{
+ unsigned i;
+
+ /* First, remove device from allocation groups: */
+
+ for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
+ clear_bit(ca->dev_idx, c->rw_devs[i].d);
+
+ /*
+ * Capacity is calculated based off of devices in allocation groups:
+ */
+ bch2_recalc_capacity(c);
+
+ bch2_open_buckets_stop(c, ca, false);
+
+ /*
+ * Wake up threads that were blocked on allocation, so they can notice
+ * the device can no longer be removed and the capacity has changed:
+ */
+ closure_wake_up(&c->freelist_wait);
+
+ /*
+ * journal_res_get() can block waiting for free space in the journal -
+ * it needs to notice there may not be devices to allocate from anymore:
+ */
+ wake_up(&c->journal.wait);
+
+ /* Now wait for any in flight writes: */
+
+ closure_wait_event(&c->open_buckets_wait,
+ !bch2_dev_has_open_write_point(c, ca));
+}
+
+/* device goes rw: */
+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
+ if (ca->mi.data_allowed & (1 << i))
+ set_bit(ca->dev_idx, c->rw_devs[i].d);
+}
+
+void bch2_fs_allocator_background_init(struct bch_fs *c)
+{
+ spin_lock_init(&c->freelist_lock);
+ INIT_WORK(&c->discard_work, bch2_do_discards_work);
+ INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
+}
diff --git a/c_src/libbcachefs/alloc_background.h b/c_src/libbcachefs/alloc_background.h
new file mode 100644
index 00000000..e7f7e842
--- /dev/null
+++ b/c_src/libbcachefs/alloc_background.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
+#define _BCACHEFS_ALLOC_BACKGROUND_H
+
+#include "bcachefs.h"
+#include "alloc_types.h"
+#include "buckets.h"
+#include "debug.h"
+#include "super.h"
+
+enum bkey_invalid_flags;
+
+/* How out of date a pointer gen is allowed to be: */
+#define BUCKET_GC_GEN_MAX 96U
+
+static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
+{
+ struct bch_dev *ca;
+
+ if (!bch2_dev_exists2(c, pos.inode))
+ return false;
+
+ ca = bch_dev_bkey_exists(c, pos.inode);
+ return pos.offset >= ca->mi.first_bucket &&
+ pos.offset < ca->mi.nbuckets;
+}
+
+static inline u64 bucket_to_u64(struct bpos bucket)
+{
+ return (bucket.inode << 48) | bucket.offset;
+}
+
+static inline struct bpos u64_to_bucket(u64 bucket)
+{
+ return POS(bucket >> 48, bucket & ~(~0ULL << 48));
+}
+
+static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
+{
+ return a.gen - a.oldest_gen;
+}
+
+static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors,
+ u32 cached_sectors,
+ u32 stripe,
+ struct bch_alloc_v4 a,
+ enum bch_data_type data_type)
+{
+ if (stripe)
+ return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
+ if (dirty_sectors)
+ return data_type;
+ if (cached_sectors)
+ return BCH_DATA_cached;
+ if (BCH_ALLOC_V4_NEED_DISCARD(&a))
+ return BCH_DATA_need_discard;
+ if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX)
+ return BCH_DATA_need_gc_gens;
+ return BCH_DATA_free;
+}
+
+static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
+ enum bch_data_type data_type)
+{
+ return __alloc_data_type(a.dirty_sectors, a.cached_sectors,
+ a.stripe, a, data_type);
+}
+
+static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
+{
+ return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
+}
+
+static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a)
+{
+ return a.dirty_sectors + a.cached_sectors;
+}
+
+static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
+{
+ return a.dirty_sectors;
+}
+
+static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca,
+ struct bch_alloc_v4 a)
+{
+ int d = bch2_bucket_sectors_dirty(a);
+
+ return d ? max(0, ca->mi.bucket_size - d) : 0;
+}
+
+static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
+{
+ return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
+}
+
+#define DATA_TYPES_MOVABLE \
+ ((1U << BCH_DATA_btree)| \
+ (1U << BCH_DATA_user)| \
+ (1U << BCH_DATA_stripe))
+
+static inline bool data_type_movable(enum bch_data_type type)
+{
+ return (1U << type) & DATA_TYPES_MOVABLE;
+}
+
+static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
+ struct bch_dev *ca)
+{
+ if (!data_type_movable(a.data_type) ||
+ !bch2_bucket_sectors_fragmented(ca, a))
+ return 0;
+
+ u64 d = bch2_bucket_sectors_dirty(a);
+ return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
+}
+
+static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
+{
+ return ((u64) alloc_gc_gen(a) >> 4) << 56;
+}
+
+static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_v4 a)
+{
+ pos.offset |= alloc_freespace_genbits(a);
+ return pos;
+}
+
+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
+{
+ unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+ BCH_ALLOC_V4_U64s_V0) +
+ BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
+ (sizeof(struct bch_backpointer) / sizeof(u64));
+
+ BUG_ON(ret > U8_MAX - BKEY_U64s);
+ return ret;
+}
+
+static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
+{
+ set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v));
+}
+
+struct bkey_i_alloc_v4 *
+bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
+
+void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
+
+static inline const struct bch_alloc_v4 *bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *convert)
+{
+ const struct bch_alloc_v4 *ret;
+
+ if (unlikely(k.k->type != KEY_TYPE_alloc_v4))
+ goto slowpath;
+
+ ret = bkey_s_c_to_alloc_v4(k).v;
+ if (BCH_ALLOC_V4_BACKPOINTERS_START(ret) != BCH_ALLOC_V4_U64s)
+ goto slowpath;
+
+ return ret;
+slowpath:
+ __bch2_alloc_to_v4(k, convert);
+ return convert;
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c);
+
+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
+
+int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_alloc_v4_swab(struct bkey_s);
+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_alloc ((struct bkey_ops) { \
+ .key_invalid = bch2_alloc_v1_invalid, \
+ .val_to_text = bch2_alloc_to_text, \
+ .trigger = bch2_trigger_alloc, \
+ .min_val_size = 8, \
+})
+
+#define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \
+ .key_invalid = bch2_alloc_v2_invalid, \
+ .val_to_text = bch2_alloc_to_text, \
+ .trigger = bch2_trigger_alloc, \
+ .min_val_size = 8, \
+})
+
+#define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \
+ .key_invalid = bch2_alloc_v3_invalid, \
+ .val_to_text = bch2_alloc_to_text, \
+ .trigger = bch2_trigger_alloc, \
+ .min_val_size = 16, \
+})
+
+#define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \
+ .key_invalid = bch2_alloc_v4_invalid, \
+ .val_to_text = bch2_alloc_to_text, \
+ .swab = bch2_alloc_v4_swab, \
+ .trigger = bch2_trigger_alloc, \
+ .min_val_size = 48, \
+})
+
+int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \
+ .key_invalid = bch2_bucket_gens_invalid, \
+ .val_to_text = bch2_bucket_gens_to_text, \
+})
+
+int bch2_bucket_gens_init(struct bch_fs *);
+
+static inline bool bkey_is_alloc(const struct bkey *k)
+{
+ return k->type == KEY_TYPE_alloc ||
+ k->type == KEY_TYPE_alloc_v2 ||
+ k->type == KEY_TYPE_alloc_v3;
+}
+
+int bch2_alloc_read(struct bch_fs *);
+
+int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+int bch2_check_alloc_info(struct bch_fs *);
+int bch2_check_alloc_to_lru_refs(struct bch_fs *);
+void bch2_do_discards(struct bch_fs *);
+
+static inline u64 should_invalidate_buckets(struct bch_dev *ca,
+ struct bch_dev_usage u)
+{
+ u64 want_free = ca->mi.nbuckets >> 7;
+ u64 free = max_t(s64, 0,
+ u.d[BCH_DATA_free].buckets
+ + u.d[BCH_DATA_need_discard].buckets
+ - bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe));
+
+ return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
+}
+
+void bch2_do_invalidates(struct bch_fs *);
+
+static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
+{
+ return (void *) ((u64 *) &a->v +
+ (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+ BCH_ALLOC_V4_U64s_V0));
+}
+
+static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
+{
+ return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+}
+
+int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
+int bch2_fs_freespace_init(struct bch_fs *);
+
+void bch2_recalc_capacity(struct bch_fs *);
+u64 bch2_min_rw_member_capacity(struct bch_fs *);
+
+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
+
+void bch2_fs_allocator_background_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/c_src/libbcachefs/alloc_foreground.c b/c_src/libbcachefs/alloc_foreground.c
new file mode 100644
index 00000000..b0ff4799
--- /dev/null
+++ b/c_src/libbcachefs/alloc_foreground.c
@@ -0,0 +1,1624 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2012 Google, Inc.
+ *
+ * Foreground allocator code: allocate buckets from freelist, and allocate in
+ * sector granularity from writepoints.
+ *
+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
+ *
+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
+ * in a given filesystem.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "backpointers.h"
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "buckets_waiting_for_journal.h"
+#include "clock.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io_write.h"
+#include "journal.h"
+#include "movinggc.h"
+#include "nocow_locking.h"
+#include "trace.h"
+
+#include <linux/math64.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+
+static void bch2_trans_mutex_lock_norelock(struct btree_trans *trans,
+ struct mutex *lock)
+{
+ if (!mutex_trylock(lock)) {
+ bch2_trans_unlock(trans);
+ mutex_lock(lock);
+ }
+}
+
+const char * const bch2_watermarks[] = {
+#define x(t) #t,
+ BCH_WATERMARKS()
+#undef x
+ NULL
+};
+
+/*
+ * Open buckets represent a bucket that's currently being allocated from. They
+ * serve two purposes:
+ *
+ * - They track buckets that have been partially allocated, allowing for
+ * sub-bucket sized allocations - they're used by the sector allocator below
+ *
+ * - They provide a reference to the buckets they own that mark and sweep GC
+ * can find, until the new allocation has a pointer to it inserted into the
+ * btree
+ *
+ * When allocating some space with the sector allocator, the allocation comes
+ * with a reference to an open bucket - the caller is required to put that
+ * reference _after_ doing the index update that makes its allocation reachable.
+ */
+
+void bch2_reset_alloc_cursors(struct bch_fs *c)
+{
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL)
+ ca->alloc_cursor = 0;
+ rcu_read_unlock();
+}
+
+static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
+{
+ open_bucket_idx_t idx = ob - c->open_buckets;
+ open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
+
+ ob->hash = *slot;
+ *slot = idx;
+}
+
+static void bch2_open_bucket_hash_remove(struct bch_fs *c, struct open_bucket *ob)
+{
+ open_bucket_idx_t idx = ob - c->open_buckets;
+ open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket);
+
+ while (*slot != idx) {
+ BUG_ON(!*slot);
+ slot = &c->open_buckets[*slot].hash;
+ }
+
+ *slot = ob->hash;
+ ob->hash = 0;
+}
+
+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+
+ if (ob->ec) {
+ ec_stripe_new_put(c, ob->ec, STRIPE_REF_io);
+ return;
+ }
+
+ percpu_down_read(&c->mark_lock);
+ spin_lock(&ob->lock);
+
+ ob->valid = false;
+ ob->data_type = 0;
+
+ spin_unlock(&ob->lock);
+ percpu_up_read(&c->mark_lock);
+
+ spin_lock(&c->freelist_lock);
+ bch2_open_bucket_hash_remove(c, ob);
+
+ ob->freelist = c->open_buckets_freelist;
+ c->open_buckets_freelist = ob - c->open_buckets;
+
+ c->open_buckets_nr_free++;
+ ca->nr_open_buckets--;
+ spin_unlock(&c->freelist_lock);
+
+ closure_wake_up(&c->open_buckets_wait);
+}
+
+void bch2_open_bucket_write_error(struct bch_fs *c,
+ struct open_buckets *obs,
+ unsigned dev)
+{
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, obs, ob, i)
+ if (ob->dev == dev && ob->ec)
+ bch2_ec_bucket_cancel(c, ob);
+}
+
+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
+{
+ struct open_bucket *ob;
+
+ BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
+
+ ob = c->open_buckets + c->open_buckets_freelist;
+ c->open_buckets_freelist = ob->freelist;
+ atomic_set(&ob->pin, 1);
+ ob->data_type = 0;
+
+ c->open_buckets_nr_free--;
+ return ob;
+}
+
+static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
+{
+ BUG_ON(c->open_buckets_partial_nr >=
+ ARRAY_SIZE(c->open_buckets_partial));
+
+ spin_lock(&c->freelist_lock);
+ ob->on_partial_list = true;
+ c->open_buckets_partial[c->open_buckets_partial_nr++] =
+ ob - c->open_buckets;
+ spin_unlock(&c->freelist_lock);
+
+ closure_wake_up(&c->open_buckets_wait);
+ closure_wake_up(&c->freelist_wait);
+}
+
+/* _only_ for allocating the journal on a new device: */
+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
+{
+ while (ca->new_fs_bucket_idx < ca->mi.nbuckets) {
+ u64 b = ca->new_fs_bucket_idx++;
+
+ if (!is_superblock_bucket(ca, b) &&
+ (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse)))
+ return b;
+ }
+
+ return -1;
+}
+
+static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
+{
+ switch (watermark) {
+ case BCH_WATERMARK_reclaim:
+ return 0;
+ case BCH_WATERMARK_btree:
+ case BCH_WATERMARK_btree_copygc:
+ return OPEN_BUCKETS_COUNT / 4;
+ case BCH_WATERMARK_copygc:
+ return OPEN_BUCKETS_COUNT / 3;
+ default:
+ return OPEN_BUCKETS_COUNT / 2;
+ }
+}
+
+static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+ u64 bucket,
+ enum bch_watermark watermark,
+ const struct bch_alloc_v4 *a,
+ struct bucket_alloc_state *s,
+ struct closure *cl)
+{
+ struct open_bucket *ob;
+
+ if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
+ s->skipped_nouse++;
+ return NULL;
+ }
+
+ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
+ s->skipped_open++;
+ return NULL;
+ }
+
+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) {
+ s->skipped_need_journal_commit++;
+ return NULL;
+ }
+
+ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) {
+ s->skipped_nocow++;
+ return NULL;
+ }
+
+ spin_lock(&c->freelist_lock);
+
+ if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) {
+ if (cl)
+ closure_wait(&c->open_buckets_wait, cl);
+
+ track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
+ &c->blocked_allocate_open_bucket, true);
+ spin_unlock(&c->freelist_lock);
+ return ERR_PTR(-BCH_ERR_open_buckets_empty);
+ }
+
+ /* Recheck under lock: */
+ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
+ spin_unlock(&c->freelist_lock);
+ s->skipped_open++;
+ return NULL;
+ }
+
+ ob = bch2_open_bucket_alloc(c);
+
+ spin_lock(&ob->lock);
+
+ ob->valid = true;
+ ob->sectors_free = ca->mi.bucket_size;
+ ob->dev = ca->dev_idx;
+ ob->gen = a->gen;
+ ob->bucket = bucket;
+ spin_unlock(&ob->lock);
+
+ ca->nr_open_buckets++;
+ bch2_open_bucket_hash_add(c, ob);
+
+ track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
+ &c->blocked_allocate_open_bucket, false);
+
+ track_event_change(&c->times[BCH_TIME_blocked_allocate],
+ &c->blocked_allocate, false);
+
+ spin_unlock(&c->freelist_lock);
+ return ob;
+}
+
+static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
+ enum bch_watermark watermark, u64 free_entry,
+ struct bucket_alloc_state *s,
+ struct bkey_s_c freespace_k,
+ struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter = { NULL };
+ struct bkey_s_c k;
+ struct open_bucket *ob;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ u64 b = free_entry & ~(~0ULL << 56);
+ unsigned genbits = free_entry >> 56;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) {
+ prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n"
+ " freespace key ",
+ ca->mi.first_bucket, ca->mi.nbuckets);
+ bch2_bkey_val_to_text(&buf, c, freespace_k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ k = bch2_bkey_get_iter(trans, &iter,
+ BTREE_ID_alloc, POS(ca->dev_idx, b),
+ BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret) {
+ ob = ERR_PTR(ret);
+ goto err;
+ }
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+
+ if (a->data_type != BCH_DATA_free) {
+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
+ ob = NULL;
+ goto err;
+ }
+
+ prt_printf(&buf, "non free bucket in freespace btree\n"
+ " freespace key ");
+ bch2_bkey_val_to_text(&buf, c, freespace_k);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
+ prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
+ " freespace key ",
+ genbits, alloc_freespace_genbits(*a) >> 56);
+ bch2_bkey_val_to_text(&buf, c, freespace_k);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) {
+ struct bch_backpointer bp;
+ struct bpos bp_pos = POS_MIN;
+
+ ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1,
+ &bp_pos, &bp,
+ BTREE_ITER_NOPRESERVE);
+ if (ret) {
+ ob = ERR_PTR(ret);
+ goto err;
+ }
+
+ if (!bkey_eq(bp_pos, POS_MAX)) {
+ /*
+ * Bucket may have data in it - we don't call
+ * bc2h_trans_inconnsistent() because fsck hasn't
+ * finished yet
+ */
+ ob = NULL;
+ goto err;
+ }
+ }
+
+ ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
+ if (!ob)
+ set_btree_iter_dontneed(&iter);
+err:
+ if (iter.path)
+ set_btree_iter_dontneed(&iter);
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ob;
+}
+
+/*
+ * This path is for before the freespace btree is initialized:
+ *
+ * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
+ * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
+ */
+static noinline struct open_bucket *
+bch2_bucket_alloc_early(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct bucket_alloc_state *s,
+ struct closure *cl)
+{
+ struct btree_iter iter, citer;
+ struct bkey_s_c k, ck;
+ struct open_bucket *ob = NULL;
+ u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+ u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
+ u64 alloc_cursor = alloc_start;
+ int ret;
+
+ /*
+ * Scan with an uncached iterator to avoid polluting the key cache. An
+ * uncached iter will return a cached key if one exists, but if not
+ * there is no other underlying protection for the associated key cache
+ * slot. To avoid racing bucket allocations, look up the cached key slot
+ * of any likely allocation candidate before attempting to proceed with
+ * the allocation. This provides proper exclusion on the associated
+ * bucket.
+ */
+again:
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
+ BTREE_ITER_SLOTS, k, ret) {
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+
+ if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
+ break;
+
+ if (ca->new_fs_bucket_idx &&
+ is_superblock_bucket(ca, k.k->p.offset))
+ continue;
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+ if (a->data_type != BCH_DATA_free)
+ continue;
+
+ /* now check the cached key to serialize concurrent allocs of the bucket */
+ ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
+ ret = bkey_err(ck);
+ if (ret)
+ break;
+
+ a = bch2_alloc_to_v4(ck, &a_convert);
+ if (a->data_type != BCH_DATA_free)
+ goto next;
+
+ s->buckets_seen++;
+
+ ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+next:
+ set_btree_iter_dontneed(&citer);
+ bch2_trans_iter_exit(trans, &citer);
+ if (ob)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ alloc_cursor = iter.pos.offset;
+ ca->alloc_cursor = alloc_cursor;
+
+ if (!ob && ret)
+ ob = ERR_PTR(ret);
+
+ if (!ob && alloc_start > first_bucket) {
+ alloc_cursor = alloc_start = first_bucket;
+ goto again;
+ }
+
+ return ob;
+}
+
+static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct bucket_alloc_state *s,
+ struct closure *cl)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct open_bucket *ob = NULL;
+ u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor));
+ u64 alloc_cursor = alloc_start;
+ int ret;
+
+ BUG_ON(ca->new_fs_bucket_idx);
+again:
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
+ POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
+ if (k.k->p.inode != ca->dev_idx)
+ break;
+
+ for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
+ alloc_cursor < k.k->p.offset;
+ alloc_cursor++) {
+ ret = btree_trans_too_many_iters(trans);
+ if (ret) {
+ ob = ERR_PTR(ret);
+ break;
+ }
+
+ s->buckets_seen++;
+
+ ob = try_alloc_bucket(trans, ca, watermark,
+ alloc_cursor, s, k, cl);
+ if (ob) {
+ set_btree_iter_dontneed(&iter);
+ break;
+ }
+ }
+
+ if (ob || ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ ca->alloc_cursor = alloc_cursor;
+
+ if (!ob && ret)
+ ob = ERR_PTR(ret);
+
+ if (!ob && alloc_start > ca->mi.first_bucket) {
+ alloc_cursor = alloc_start = ca->mi.first_bucket;
+ goto again;
+ }
+
+ return ob;
+}
+
+/**
+ * bch2_bucket_alloc_trans - allocate a single bucket from a specific device
+ * @trans: transaction object
+ * @ca: device to allocate from
+ * @watermark: how important is this allocation?
+ * @cl: if not NULL, closure to be used to wait if buckets not available
+ * @usage: for secondarily also returning the current device usage
+ *
+ * Returns: an open_bucket on success, or an ERR_PTR() on failure.
+ */
+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct closure *cl,
+ struct bch_dev_usage *usage)
+{
+ struct bch_fs *c = trans->c;
+ struct open_bucket *ob = NULL;
+ bool freespace = READ_ONCE(ca->mi.freespace_initialized);
+ u64 avail;
+ struct bucket_alloc_state s = { 0 };
+ bool waiting = false;
+again:
+ bch2_dev_usage_read_fast(ca, usage);
+ avail = dev_buckets_free(ca, *usage, watermark);
+
+ if (usage->d[BCH_DATA_need_discard].buckets > avail)
+ bch2_do_discards(c);
+
+ if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
+ bch2_do_gc_gens(c);
+
+ if (should_invalidate_buckets(ca, *usage))
+ bch2_do_invalidates(c);
+
+ if (!avail) {
+ if (cl && !waiting) {
+ closure_wait(&c->freelist_wait, cl);
+ waiting = true;
+ goto again;
+ }
+
+ track_event_change(&c->times[BCH_TIME_blocked_allocate],
+ &c->blocked_allocate, true);
+
+ ob = ERR_PTR(-BCH_ERR_freelist_empty);
+ goto err;
+ }
+
+ if (waiting)
+ closure_wake_up(&c->freelist_wait);
+alloc:
+ ob = likely(freespace)
+ ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
+ : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
+
+ if (s.skipped_need_journal_commit * 2 > avail)
+ bch2_journal_flush_async(&c->journal, NULL);
+
+ if (!ob && freespace && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
+ freespace = false;
+ goto alloc;
+ }
+err:
+ if (!ob)
+ ob = ERR_PTR(-BCH_ERR_no_buckets_found);
+
+ if (!IS_ERR(ob))
+ trace_and_count(c, bucket_alloc, ca,
+ bch2_watermarks[watermark],
+ ob->bucket,
+ usage->d[BCH_DATA_free].buckets,
+ avail,
+ bch2_copygc_wait_amount(c),
+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
+ &s,
+ cl == NULL,
+ "");
+ else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
+ trace_and_count(c, bucket_alloc_fail, ca,
+ bch2_watermarks[watermark],
+ 0,
+ usage->d[BCH_DATA_free].buckets,
+ avail,
+ bch2_copygc_wait_amount(c),
+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
+ &s,
+ cl == NULL,
+ bch2_err_str(PTR_ERR(ob)));
+
+ return ob;
+}
+
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct closure *cl)
+{
+ struct bch_dev_usage usage;
+ struct open_bucket *ob;
+
+ bch2_trans_do(c, NULL, NULL, 0,
+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
+ cl, &usage)));
+ return ob;
+}
+
+static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
+ unsigned l, unsigned r)
+{
+ return ((stripe->next_alloc[l] > stripe->next_alloc[r]) -
+ (stripe->next_alloc[l] < stripe->next_alloc[r]));
+}
+
+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r)
+
+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
+ struct dev_stripe_state *stripe,
+ struct bch_devs_mask *devs)
+{
+ struct dev_alloc_list ret = { .nr = 0 };
+ unsigned i;
+
+ for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
+ ret.devs[ret.nr++] = i;
+
+ bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
+ return ret;
+}
+
+static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
+ struct dev_stripe_state *stripe,
+ struct bch_dev_usage *usage)
+{
+ u64 *v = stripe->next_alloc + ca->dev_idx;
+ u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal);
+ u64 free_space_inv = free_space
+ ? div64_u64(1ULL << 48, free_space)
+ : 1ULL << 48;
+ u64 scale = *v / 4;
+
+ if (*v + free_space_inv >= *v)
+ *v += free_space_inv;
+ else
+ *v = U64_MAX;
+
+ for (v = stripe->next_alloc;
+ v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++)
+ *v = *v < scale ? 0 : *v - scale;
+}
+
+void bch2_dev_stripe_increment(struct bch_dev *ca,
+ struct dev_stripe_state *stripe)
+{
+ struct bch_dev_usage usage;
+
+ bch2_dev_usage_read_fast(ca, &usage);
+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
+}
+
+static int add_new_bucket(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ unsigned flags,
+ struct open_bucket *ob)
+{
+ unsigned durability =
+ bch_dev_bkey_exists(c, ob->dev)->mi.durability;
+
+ BUG_ON(*nr_effective >= nr_replicas);
+
+ __clear_bit(ob->dev, devs_may_alloc->d);
+ *nr_effective += durability;
+ *have_cache |= !durability;
+
+ ob_push(c, ptrs, ob);
+
+ if (*nr_effective >= nr_replicas)
+ return 1;
+ if (ob->ec)
+ return 1;
+ return 0;
+}
+
+int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
+ struct open_buckets *ptrs,
+ struct dev_stripe_state *stripe,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ unsigned flags,
+ enum bch_data_type data_type,
+ enum bch_watermark watermark,
+ struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct dev_alloc_list devs_sorted =
+ bch2_dev_alloc_list(c, stripe, devs_may_alloc);
+ unsigned dev;
+ struct bch_dev *ca;
+ int ret = -BCH_ERR_insufficient_devices;
+ unsigned i;
+
+ BUG_ON(*nr_effective >= nr_replicas);
+
+ for (i = 0; i < devs_sorted.nr; i++) {
+ struct bch_dev_usage usage;
+ struct open_bucket *ob;
+
+ dev = devs_sorted.devs[i];
+
+ rcu_read_lock();
+ ca = rcu_dereference(c->devs[dev]);
+ if (ca)
+ percpu_ref_get(&ca->ref);
+ rcu_read_unlock();
+
+ if (!ca)
+ continue;
+
+ if (!ca->mi.durability && *have_cache) {
+ percpu_ref_put(&ca->ref);
+ continue;
+ }
+
+ ob = bch2_bucket_alloc_trans(trans, ca, watermark, cl, &usage);
+ if (!IS_ERR(ob))
+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
+ percpu_ref_put(&ca->ref);
+
+ if (IS_ERR(ob)) {
+ ret = PTR_ERR(ob);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl)
+ break;
+ continue;
+ }
+
+ ob->data_type = data_type;
+
+ if (add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* Allocate from stripes: */
+
+/*
+ * if we can't allocate a new stripe because there are already too many
+ * partially filled stripes, force allocating from an existing stripe even when
+ * it's to a device we don't want:
+ */
+
+static int bucket_alloc_from_stripe(struct btree_trans *trans,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ u16 target,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum bch_watermark watermark,
+ unsigned flags,
+ struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct dev_alloc_list devs_sorted;
+ struct ec_stripe_head *h;
+ struct open_bucket *ob;
+ unsigned i, ec_idx;
+ int ret = 0;
+
+ if (nr_replicas < 2)
+ return 0;
+
+ if (ec_open_bucket(c, ptrs))
+ return 0;
+
+ h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
+ if (IS_ERR(h))
+ return PTR_ERR(h);
+ if (!h)
+ return 0;
+
+ devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
+
+ for (i = 0; i < devs_sorted.nr; i++)
+ for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
+ if (!h->s->blocks[ec_idx])
+ continue;
+
+ ob = c->open_buckets + h->s->blocks[ec_idx];
+ if (ob->dev == devs_sorted.devs[i] &&
+ !test_and_set_bit(ec_idx, h->s->blocks_allocated))
+ goto got_bucket;
+ }
+ goto out_put_head;
+got_bucket:
+ ob->ec_idx = ec_idx;
+ ob->ec = h->s;
+ ec_stripe_new_get(h->s, STRIPE_REF_io);
+
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob);
+out_put_head:
+ bch2_ec_stripe_head_put(c, h);
+ return ret;
+}
+
+/* Sector allocator */
+
+static bool want_bucket(struct bch_fs *c,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ bool *have_cache, bool ec,
+ struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+
+ if (!test_bit(ob->dev, devs_may_alloc->d))
+ return false;
+
+ if (ob->data_type != wp->data_type)
+ return false;
+
+ if (!ca->mi.durability &&
+ (wp->data_type == BCH_DATA_btree || ec || *have_cache))
+ return false;
+
+ if (ec != (ob->ec != NULL))
+ return false;
+
+ return true;
+}
+
+static int bucket_alloc_set_writepoint(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ bool ec, unsigned flags)
+{
+ struct open_buckets ptrs_skip = { .nr = 0 };
+ struct open_bucket *ob;
+ unsigned i;
+ int ret = 0;
+
+ open_bucket_for_each(c, &wp->ptrs, ob, i) {
+ if (!ret && want_bucket(c, wp, devs_may_alloc,
+ have_cache, ec, ob))
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob);
+ else
+ ob_push(c, &ptrs_skip, ob);
+ }
+ wp->ptrs = ptrs_skip;
+
+ return ret;
+}
+
+static int bucket_alloc_set_partial(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache, bool ec,
+ enum bch_watermark watermark,
+ unsigned flags)
+{
+ int i, ret = 0;
+
+ if (!c->open_buckets_partial_nr)
+ return 0;
+
+ spin_lock(&c->freelist_lock);
+
+ if (!c->open_buckets_partial_nr)
+ goto unlock;
+
+ for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
+ struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
+
+ if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+ struct bch_dev_usage usage;
+ u64 avail;
+
+ bch2_dev_usage_read_fast(ca, &usage);
+ avail = dev_buckets_free(ca, usage, watermark);
+ if (!avail)
+ continue;
+
+ array_remove_item(c->open_buckets_partial,
+ c->open_buckets_partial_nr,
+ i);
+ ob->on_partial_list = false;
+
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, flags, ob);
+ if (ret)
+ break;
+ }
+ }
+unlock:
+ spin_unlock(&c->freelist_lock);
+ return ret;
+}
+
+static int __open_bucket_add_buckets(struct btree_trans *trans,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_list *devs_have,
+ u16 target,
+ bool erasure_code,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum bch_watermark watermark,
+ unsigned flags,
+ struct closure *_cl)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_devs_mask devs;
+ struct open_bucket *ob;
+ struct closure *cl = NULL;
+ unsigned i;
+ int ret;
+
+ devs = target_rw_devs(c, wp->data_type, target);
+
+ /* Don't allocate from devices we already have pointers to: */
+ darray_for_each(*devs_have, i)
+ __clear_bit(*i, devs.d);
+
+ open_bucket_for_each(c, ptrs, ob, i)
+ __clear_bit(ob->dev, devs.d);
+
+ if (erasure_code && ec_open_bucket(c, ptrs))
+ return 0;
+
+ ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
+ nr_replicas, nr_effective,
+ have_cache, erasure_code, flags);
+ if (ret)
+ return ret;
+
+ ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
+ nr_replicas, nr_effective,
+ have_cache, erasure_code, watermark, flags);
+ if (ret)
+ return ret;
+
+ if (erasure_code) {
+ ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
+ target,
+ nr_replicas, nr_effective,
+ have_cache,
+ watermark, flags, _cl);
+ } else {
+retry_blocking:
+ /*
+ * Try nonblocking first, so that if one device is full we'll try from
+ * other devices:
+ */
+ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
+ nr_replicas, nr_effective, have_cache,
+ flags, wp->data_type, watermark, cl);
+ if (ret &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+ !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
+ !cl && _cl) {
+ cl = _cl;
+ goto retry_blocking;
+ }
+ }
+
+ return ret;
+}
+
+static int open_bucket_add_buckets(struct btree_trans *trans,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_list *devs_have,
+ u16 target,
+ unsigned erasure_code,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum bch_watermark watermark,
+ unsigned flags,
+ struct closure *cl)
+{
+ int ret;
+
+ if (erasure_code) {
+ ret = __open_bucket_add_buckets(trans, ptrs, wp,
+ devs_have, target, erasure_code,
+ nr_replicas, nr_effective, have_cache,
+ watermark, flags, cl);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
+ bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+ return ret;
+ if (*nr_effective >= nr_replicas)
+ return 0;
+ }
+
+ ret = __open_bucket_add_buckets(trans, ptrs, wp,
+ devs_have, target, false,
+ nr_replicas, nr_effective, have_cache,
+ watermark, flags, cl);
+ return ret < 0 ? ret : 0;
+}
+
+/**
+ * should_drop_bucket - check if this is open_bucket should go away
+ * @ob: open_bucket to predicate on
+ * @c: filesystem handle
+ * @ca: if set, we're killing buckets for a particular device
+ * @ec: if true, we're shutting down erasure coding and killing all ec
+ * open_buckets
+ * otherwise, return true
+ * Returns: true if we should kill this open_bucket
+ *
+ * We're killing open_buckets because we're shutting down a device, erasure
+ * coding, or the entire filesystem - check if this open_bucket matches:
+ */
+static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c,
+ struct bch_dev *ca, bool ec)
+{
+ if (ec) {
+ return ob->ec != NULL;
+ } else if (ca) {
+ bool drop = ob->dev == ca->dev_idx;
+ struct open_bucket *ob2;
+ unsigned i;
+
+ if (!drop && ob->ec) {
+ unsigned nr_blocks;
+
+ mutex_lock(&ob->ec->lock);
+ nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks;
+
+ for (i = 0; i < nr_blocks; i++) {
+ if (!ob->ec->blocks[i])
+ continue;
+
+ ob2 = c->open_buckets + ob->ec->blocks[i];
+ drop |= ob2->dev == ca->dev_idx;
+ }
+ mutex_unlock(&ob->ec->lock);
+ }
+
+ return drop;
+ } else {
+ return true;
+ }
+}
+
+static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
+ bool ec, struct write_point *wp)
+{
+ struct open_buckets ptrs = { .nr = 0 };
+ struct open_bucket *ob;
+ unsigned i;
+
+ mutex_lock(&wp->lock);
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ if (should_drop_bucket(ob, c, ca, ec))
+ bch2_open_bucket_put(c, ob);
+ else
+ ob_push(c, &ptrs, ob);
+ wp->ptrs = ptrs;
+ mutex_unlock(&wp->lock);
+}
+
+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
+ bool ec)
+{
+ unsigned i;
+
+ /* Next, close write points that point to this device... */
+ for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
+ bch2_writepoint_stop(c, ca, ec, &c->write_points[i]);
+
+ bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point);
+ bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point);
+ bch2_writepoint_stop(c, ca, ec, &c->btree_write_point);
+
+ mutex_lock(&c->btree_reserve_cache_lock);
+ while (c->btree_reserve_cache_nr) {
+ struct btree_alloc *a =
+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+
+ bch2_open_buckets_put(c, &a->ob);
+ }
+ mutex_unlock(&c->btree_reserve_cache_lock);
+
+ spin_lock(&c->freelist_lock);
+ i = 0;
+ while (i < c->open_buckets_partial_nr) {
+ struct open_bucket *ob =
+ c->open_buckets + c->open_buckets_partial[i];
+
+ if (should_drop_bucket(ob, c, ca, ec)) {
+ --c->open_buckets_partial_nr;
+ swap(c->open_buckets_partial[i],
+ c->open_buckets_partial[c->open_buckets_partial_nr]);
+ ob->on_partial_list = false;
+ spin_unlock(&c->freelist_lock);
+ bch2_open_bucket_put(c, ob);
+ spin_lock(&c->freelist_lock);
+ } else {
+ i++;
+ }
+ }
+ spin_unlock(&c->freelist_lock);
+
+ bch2_ec_stop_dev(c, ca);
+}
+
+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
+ unsigned long write_point)
+{
+ unsigned hash =
+ hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
+
+ return &c->write_points_hash[hash];
+}
+
+static struct write_point *__writepoint_find(struct hlist_head *head,
+ unsigned long write_point)
+{
+ struct write_point *wp;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(wp, head, node)
+ if (wp->write_point == write_point)
+ goto out;
+ wp = NULL;
+out:
+ rcu_read_unlock();
+ return wp;
+}
+
+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
+{
+ u64 stranded = c->write_points_nr * c->bucket_size_max;
+ u64 free = bch2_fs_usage_read_short(c).free;
+
+ return stranded * factor > free;
+}
+
+static bool try_increase_writepoints(struct bch_fs *c)
+{
+ struct write_point *wp;
+
+ if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
+ too_many_writepoints(c, 32))
+ return false;
+
+ wp = c->write_points + c->write_points_nr++;
+ hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
+ return true;
+}
+
+static bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr)
+{
+ struct bch_fs *c = trans->c;
+ struct write_point *wp;
+ struct open_bucket *ob;
+ unsigned i;
+
+ mutex_lock(&c->write_points_hash_lock);
+ if (c->write_points_nr < old_nr) {
+ mutex_unlock(&c->write_points_hash_lock);
+ return true;
+ }
+
+ if (c->write_points_nr == 1 ||
+ !too_many_writepoints(c, 8)) {
+ mutex_unlock(&c->write_points_hash_lock);
+ return false;
+ }
+
+ wp = c->write_points + --c->write_points_nr;
+
+ hlist_del_rcu(&wp->node);
+ mutex_unlock(&c->write_points_hash_lock);
+
+ bch2_trans_mutex_lock_norelock(trans, &wp->lock);
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ open_bucket_free_unused(c, ob);
+ wp->ptrs.nr = 0;
+ mutex_unlock(&wp->lock);
+ return true;
+}
+
+static struct write_point *writepoint_find(struct btree_trans *trans,
+ unsigned long write_point)
+{
+ struct bch_fs *c = trans->c;
+ struct write_point *wp, *oldest;
+ struct hlist_head *head;
+
+ if (!(write_point & 1UL)) {
+ wp = (struct write_point *) write_point;
+ bch2_trans_mutex_lock_norelock(trans, &wp->lock);
+ return wp;
+ }
+
+ head = writepoint_hash(c, write_point);
+restart_find:
+ wp = __writepoint_find(head, write_point);
+ if (wp) {
+lock_wp:
+ bch2_trans_mutex_lock_norelock(trans, &wp->lock);
+ if (wp->write_point == write_point)
+ goto out;
+ mutex_unlock(&wp->lock);
+ goto restart_find;
+ }
+restart_find_oldest:
+ oldest = NULL;
+ for (wp = c->write_points;
+ wp < c->write_points + c->write_points_nr; wp++)
+ if (!oldest || time_before64(wp->last_used, oldest->last_used))
+ oldest = wp;
+
+ bch2_trans_mutex_lock_norelock(trans, &oldest->lock);
+ bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock);
+ if (oldest >= c->write_points + c->write_points_nr ||
+ try_increase_writepoints(c)) {
+ mutex_unlock(&c->write_points_hash_lock);
+ mutex_unlock(&oldest->lock);
+ goto restart_find_oldest;
+ }
+
+ wp = __writepoint_find(head, write_point);
+ if (wp && wp != oldest) {
+ mutex_unlock(&c->write_points_hash_lock);
+ mutex_unlock(&oldest->lock);
+ goto lock_wp;
+ }
+
+ wp = oldest;
+ hlist_del_rcu(&wp->node);
+ wp->write_point = write_point;
+ hlist_add_head_rcu(&wp->node, head);
+ mutex_unlock(&c->write_points_hash_lock);
+out:
+ wp->last_used = local_clock();
+ return wp;
+}
+
+static noinline void
+deallocate_extra_replicas(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct open_buckets *ptrs_no_use,
+ unsigned extra_replicas)
+{
+ struct open_buckets ptrs2 = { 0 };
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, ptrs, ob, i) {
+ unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability;
+
+ if (d && d <= extra_replicas) {
+ extra_replicas -= d;
+ ob_push(c, ptrs_no_use, ob);
+ } else {
+ ob_push(c, &ptrs2, ob);
+ }
+ }
+
+ *ptrs = ptrs2;
+}
+
+/*
+ * Get us an open_bucket we can allocate from, return with it locked:
+ */
+int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
+ unsigned target,
+ unsigned erasure_code,
+ struct write_point_specifier write_point,
+ struct bch_devs_list *devs_have,
+ unsigned nr_replicas,
+ unsigned nr_replicas_required,
+ enum bch_watermark watermark,
+ unsigned flags,
+ struct closure *cl,
+ struct write_point **wp_ret)
+{
+ struct bch_fs *c = trans->c;
+ struct write_point *wp;
+ struct open_bucket *ob;
+ struct open_buckets ptrs;
+ unsigned nr_effective, write_points_nr;
+ bool have_cache;
+ int ret;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+ erasure_code = false;
+
+ BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
+
+ BUG_ON(!nr_replicas || !nr_replicas_required);
+retry:
+ ptrs.nr = 0;
+ nr_effective = 0;
+ write_points_nr = c->write_points_nr;
+ have_cache = false;
+
+ *wp_ret = wp = writepoint_find(trans, write_point.v);
+
+ /* metadata may not allocate on cache devices: */
+ if (wp->data_type != BCH_DATA_user)
+ have_cache = true;
+
+ if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
+ target, erasure_code,
+ nr_replicas, &nr_effective,
+ &have_cache, watermark,
+ flags, NULL);
+ if (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto alloc_done;
+
+ /* Don't retry from all devices if we're out of open buckets: */
+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
+ int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
+ target, erasure_code,
+ nr_replicas, &nr_effective,
+ &have_cache, watermark,
+ flags, cl);
+ if (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+ goto alloc_done;
+ }
+
+ /*
+ * Only try to allocate cache (durability = 0 devices) from the
+ * specified target:
+ */
+ have_cache = true;
+
+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
+ 0, erasure_code,
+ nr_replicas, &nr_effective,
+ &have_cache, watermark,
+ flags, cl);
+ } else {
+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
+ target, erasure_code,
+ nr_replicas, &nr_effective,
+ &have_cache, watermark,
+ flags, cl);
+ }
+alloc_done:
+ BUG_ON(!ret && nr_effective < nr_replicas);
+
+ if (erasure_code && !ec_open_bucket(c, &ptrs))
+ pr_debug("failed to get ec bucket: ret %u", ret);
+
+ if (ret == -BCH_ERR_insufficient_devices &&
+ nr_effective >= nr_replicas_required)
+ ret = 0;
+
+ if (ret)
+ goto err;
+
+ if (nr_effective > nr_replicas)
+ deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas);
+
+ /* Free buckets we didn't use: */
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ open_bucket_free_unused(c, ob);
+
+ wp->ptrs = ptrs;
+
+ wp->sectors_free = UINT_MAX;
+
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
+
+ BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
+
+ return 0;
+err:
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ if (ptrs.nr < ARRAY_SIZE(ptrs.v))
+ ob_push(c, &ptrs, ob);
+ else
+ open_bucket_free_unused(c, ob);
+ wp->ptrs = ptrs;
+
+ mutex_unlock(&wp->lock);
+
+ if (bch2_err_matches(ret, BCH_ERR_freelist_empty) &&
+ try_decrease_writepoints(trans, write_points_nr))
+ goto retry;
+
+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
+ bch2_err_matches(ret, BCH_ERR_freelist_empty))
+ return cl
+ ? -BCH_ERR_bucket_alloc_blocked
+ : -BCH_ERR_ENOSPC_bucket_alloc;
+
+ return ret;
+}
+
+struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+
+ return (struct bch_extent_ptr) {
+ .type = 1 << BCH_EXTENT_ENTRY_ptr,
+ .gen = ob->gen,
+ .dev = ob->dev,
+ .offset = bucket_to_sector(ca, ob->bucket) +
+ ca->mi.bucket_size -
+ ob->sectors_free,
+ };
+}
+
+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
+ struct bkey_i *k, unsigned sectors,
+ bool cached)
+{
+ bch2_alloc_sectors_append_ptrs_inlined(c, wp, k, sectors, cached);
+}
+
+/*
+ * Append pointers to the space we just allocated to @k, and mark @sectors space
+ * as allocated out of @ob
+ */
+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
+{
+ bch2_alloc_sectors_done_inlined(c, wp);
+}
+
+static inline void writepoint_init(struct write_point *wp,
+ enum bch_data_type type)
+{
+ mutex_init(&wp->lock);
+ wp->data_type = type;
+
+ INIT_WORK(&wp->index_update_work, bch2_write_point_do_index_updates);
+ INIT_LIST_HEAD(&wp->writes);
+ spin_lock_init(&wp->writes_lock);
+}
+
+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
+{
+ struct open_bucket *ob;
+ struct write_point *wp;
+
+ mutex_init(&c->write_points_hash_lock);
+ c->write_points_nr = ARRAY_SIZE(c->write_points);
+
+ /* open bucket 0 is a sentinal NULL: */
+ spin_lock_init(&c->open_buckets[0].lock);
+
+ for (ob = c->open_buckets + 1;
+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
+ spin_lock_init(&ob->lock);
+ c->open_buckets_nr_free++;
+
+ ob->freelist = c->open_buckets_freelist;
+ c->open_buckets_freelist = ob - c->open_buckets;
+ }
+
+ writepoint_init(&c->btree_write_point, BCH_DATA_btree);
+ writepoint_init(&c->rebalance_write_point, BCH_DATA_user);
+ writepoint_init(&c->copygc_write_point, BCH_DATA_user);
+
+ for (wp = c->write_points;
+ wp < c->write_points + c->write_points_nr; wp++) {
+ writepoint_init(wp, BCH_DATA_user);
+
+ wp->last_used = local_clock();
+ wp->write_point = (unsigned long) wp;
+ hlist_add_head_rcu(&wp->node,
+ writepoint_hash(c, wp->write_point));
+ }
+}
+
+static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+ unsigned data_type = ob->data_type;
+ barrier(); /* READ_ONCE() doesn't work on bitfields */
+
+ prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
+ ob - c->open_buckets,
+ atomic_read(&ob->pin),
+ data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
+ ob->dev, ob->bucket, ob->gen,
+ ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
+ if (ob->ec)
+ prt_printf(out, " ec idx %llu", ob->ec->idx);
+ if (ob->on_partial_list)
+ prt_str(out, " partial");
+ prt_newline(out);
+}
+
+void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct open_bucket *ob;
+
+ out->atomic++;
+
+ for (ob = c->open_buckets;
+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
+ ob++) {
+ spin_lock(&ob->lock);
+ if (ob->valid && !ob->on_partial_list)
+ bch2_open_bucket_to_text(out, c, ob);
+ spin_unlock(&ob->lock);
+ }
+
+ --out->atomic;
+}
+
+void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ unsigned i;
+
+ out->atomic++;
+ spin_lock(&c->freelist_lock);
+
+ for (i = 0; i < c->open_buckets_partial_nr; i++)
+ bch2_open_bucket_to_text(out, c,
+ c->open_buckets + c->open_buckets_partial[i]);
+
+ spin_unlock(&c->freelist_lock);
+ --out->atomic;
+}
+
+static const char * const bch2_write_point_states[] = {
+#define x(n) #n,
+ WRITE_POINT_STATES()
+#undef x
+ NULL
+};
+
+static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
+ struct write_point *wp)
+{
+ struct open_bucket *ob;
+ unsigned i;
+
+ prt_printf(out, "%lu: ", wp->write_point);
+ prt_human_readable_u64(out, wp->sectors_allocated);
+
+ prt_printf(out, " last wrote: ");
+ bch2_pr_time_units(out, sched_clock() - wp->last_used);
+
+ for (i = 0; i < WRITE_POINT_STATE_NR; i++) {
+ prt_printf(out, " %s: ", bch2_write_point_states[i]);
+ bch2_pr_time_units(out, wp->time[i]);
+ }
+
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ bch2_open_bucket_to_text(out, c, ob);
+ printbuf_indent_sub(out, 2);
+}
+
+void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct write_point *wp;
+
+ prt_str(out, "Foreground write points\n");
+ for (wp = c->write_points;
+ wp < c->write_points + ARRAY_SIZE(c->write_points);
+ wp++)
+ bch2_write_point_to_text(out, c, wp);
+
+ prt_str(out, "Copygc write point\n");
+ bch2_write_point_to_text(out, c, &c->copygc_write_point);
+
+ prt_str(out, "Rebalance write point\n");
+ bch2_write_point_to_text(out, c, &c->rebalance_write_point);
+
+ prt_str(out, "Btree write point\n");
+ bch2_write_point_to_text(out, c, &c->btree_write_point);
+}
diff --git a/c_src/libbcachefs/alloc_foreground.h b/c_src/libbcachefs/alloc_foreground.h
new file mode 100644
index 00000000..7aaeec44
--- /dev/null
+++ b/c_src/libbcachefs/alloc_foreground.h
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
+#define _BCACHEFS_ALLOC_FOREGROUND_H
+
+#include "bcachefs.h"
+#include "alloc_types.h"
+#include "extents.h"
+#include "sb-members.h"
+
+#include <linux/hash.h>
+
+struct bkey;
+struct bch_dev;
+struct bch_fs;
+struct bch_devs_List;
+
+extern const char * const bch2_watermarks[];
+
+void bch2_reset_alloc_cursors(struct bch_fs *);
+
+struct dev_alloc_list {
+ unsigned nr;
+ u8 devs[BCH_SB_MEMBERS_MAX];
+};
+
+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
+ struct dev_stripe_state *,
+ struct bch_devs_mask *);
+void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
+
+long bch2_bucket_alloc_new_fs(struct bch_dev *);
+
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
+ enum bch_watermark, struct closure *);
+
+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
+ struct open_bucket *ob)
+{
+ BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
+
+ obs->v[obs->nr++] = ob - c->open_buckets;
+}
+
+#define open_bucket_for_each(_c, _obs, _ob, _i) \
+ for ((_i) = 0; \
+ (_i) < (_obs)->nr && \
+ ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true); \
+ (_i)++)
+
+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c,
+ struct open_buckets *obs)
+{
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, obs, ob, i)
+ if (ob->ec)
+ return ob;
+
+ return NULL;
+}
+
+void bch2_open_bucket_write_error(struct bch_fs *,
+ struct open_buckets *, unsigned);
+
+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
+
+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
+{
+ if (atomic_dec_and_test(&ob->pin))
+ __bch2_open_bucket_put(c, ob);
+}
+
+static inline void bch2_open_buckets_put(struct bch_fs *c,
+ struct open_buckets *ptrs)
+{
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, ptrs, ob, i)
+ bch2_open_bucket_put(c, ob);
+ ptrs->nr = 0;
+}
+
+static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct write_point *wp)
+{
+ struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+ wp->ptrs = keep;
+
+ mutex_unlock(&wp->lock);
+
+ bch2_open_buckets_put(c, &ptrs);
+}
+
+static inline void bch2_open_bucket_get(struct bch_fs *c,
+ struct write_point *wp,
+ struct open_buckets *ptrs)
+{
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, &wp->ptrs, ob, i) {
+ ob->data_type = wp->data_type;
+ atomic_inc(&ob->pin);
+ ob_push(c, ptrs, ob);
+ }
+}
+
+static inline open_bucket_idx_t *open_bucket_hashslot(struct bch_fs *c,
+ unsigned dev, u64 bucket)
+{
+ return c->open_buckets_hash +
+ (jhash_3words(dev, bucket, bucket >> 32, 0) &
+ (OPEN_BUCKETS_COUNT - 1));
+}
+
+static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucket)
+{
+ open_bucket_idx_t slot = *open_bucket_hashslot(c, dev, bucket);
+
+ while (slot) {
+ struct open_bucket *ob = &c->open_buckets[slot];
+
+ if (ob->dev == dev && ob->bucket == bucket)
+ return true;
+
+ slot = ob->hash;
+ }
+
+ return false;
+}
+
+static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket)
+{
+ bool ret;
+
+ if (bch2_bucket_is_open(c, dev, bucket))
+ return true;
+
+ spin_lock(&c->freelist_lock);
+ ret = bch2_bucket_is_open(c, dev, bucket);
+ spin_unlock(&c->freelist_lock);
+
+ return ret;
+}
+
+int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
+ struct dev_stripe_state *, struct bch_devs_mask *,
+ unsigned, unsigned *, bool *, unsigned,
+ enum bch_data_type, enum bch_watermark,
+ struct closure *);
+
+int bch2_alloc_sectors_start_trans(struct btree_trans *,
+ unsigned, unsigned,
+ struct write_point_specifier,
+ struct bch_devs_list *,
+ unsigned, unsigned,
+ enum bch_watermark,
+ unsigned,
+ struct closure *,
+ struct write_point **);
+
+struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *);
+
+/*
+ * Append pointers to the space we just allocated to @k, and mark @sectors space
+ * as allocated out of @ob
+ */
+static inline void
+bch2_alloc_sectors_append_ptrs_inlined(struct bch_fs *c, struct write_point *wp,
+ struct bkey_i *k, unsigned sectors,
+ bool cached)
+{
+ struct open_bucket *ob;
+ unsigned i;
+
+ BUG_ON(sectors > wp->sectors_free);
+ wp->sectors_free -= sectors;
+ wp->sectors_allocated += sectors;
+
+ open_bucket_for_each(c, &wp->ptrs, ob, i) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+ struct bch_extent_ptr ptr = bch2_ob_ptr(c, ob);
+
+ ptr.cached = cached ||
+ (!ca->mi.durability &&
+ wp->data_type == BCH_DATA_user);
+
+ bch2_bkey_append_ptr(k, ptr);
+
+ BUG_ON(sectors > ob->sectors_free);
+ ob->sectors_free -= sectors;
+ }
+}
+
+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
+ struct bkey_i *, unsigned, bool);
+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
+
+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool);
+
+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
+{
+ return (struct write_point_specifier) { .v = v | 1 };
+}
+
+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
+{
+ return (struct write_point_specifier) { .v = (unsigned long) wp };
+}
+
+void bch2_fs_allocator_foreground_init(struct bch_fs *);
+
+void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
+void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
+
+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
diff --git a/c_src/libbcachefs/alloc_types.h b/c_src/libbcachefs/alloc_types.h
new file mode 100644
index 00000000..b91b7a46
--- /dev/null
+++ b/c_src/libbcachefs/alloc_types.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_TYPES_H
+#define _BCACHEFS_ALLOC_TYPES_H
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#include "clock_types.h"
+#include "fifo.h"
+
+struct bucket_alloc_state {
+ u64 buckets_seen;
+ u64 skipped_open;
+ u64 skipped_need_journal_commit;
+ u64 skipped_nocow;
+ u64 skipped_nouse;
+};
+
+#define BCH_WATERMARKS() \
+ x(stripe) \
+ x(normal) \
+ x(copygc) \
+ x(btree) \
+ x(btree_copygc) \
+ x(reclaim)
+
+enum bch_watermark {
+#define x(name) BCH_WATERMARK_##name,
+ BCH_WATERMARKS()
+#undef x
+ BCH_WATERMARK_NR,
+};
+
+#define BCH_WATERMARK_BITS 3
+#define BCH_WATERMARK_MASK ~(~0U << BCH_WATERMARK_BITS)
+
+#define OPEN_BUCKETS_COUNT 1024
+
+#define WRITE_POINT_HASH_NR 32
+#define WRITE_POINT_MAX 32
+
+/*
+ * 0 is never a valid open_bucket_idx_t:
+ */
+typedef u16 open_bucket_idx_t;
+
+struct open_bucket {
+ spinlock_t lock;
+ atomic_t pin;
+ open_bucket_idx_t freelist;
+ open_bucket_idx_t hash;
+
+ /*
+ * When an open bucket has an ec_stripe attached, this is the index of
+ * the block in the stripe this open_bucket corresponds to:
+ */
+ u8 ec_idx;
+ enum bch_data_type data_type:6;
+ unsigned valid:1;
+ unsigned on_partial_list:1;
+
+ u8 dev;
+ u8 gen;
+ u32 sectors_free;
+ u64 bucket;
+ struct ec_stripe_new *ec;
+};
+
+#define OPEN_BUCKET_LIST_MAX 15
+
+struct open_buckets {
+ open_bucket_idx_t nr;
+ open_bucket_idx_t v[OPEN_BUCKET_LIST_MAX];
+};
+
+struct dev_stripe_state {
+ u64 next_alloc[BCH_SB_MEMBERS_MAX];
+};
+
+#define WRITE_POINT_STATES() \
+ x(stopped) \
+ x(waiting_io) \
+ x(waiting_work) \
+ x(running)
+
+enum write_point_state {
+#define x(n) WRITE_POINT_##n,
+ WRITE_POINT_STATES()
+#undef x
+ WRITE_POINT_STATE_NR
+};
+
+struct write_point {
+ struct {
+ struct hlist_node node;
+ struct mutex lock;
+ u64 last_used;
+ unsigned long write_point;
+ enum bch_data_type data_type;
+
+ /* calculated based on how many pointers we're actually going to use: */
+ unsigned sectors_free;
+
+ struct open_buckets ptrs;
+ struct dev_stripe_state stripe;
+
+ u64 sectors_allocated;
+ } __aligned(SMP_CACHE_BYTES);
+
+ struct {
+ struct work_struct index_update_work;
+
+ struct list_head writes;
+ spinlock_t writes_lock;
+
+ enum write_point_state state;
+ u64 last_state_change;
+ u64 time[WRITE_POINT_STATE_NR];
+ } __aligned(SMP_CACHE_BYTES);
+};
+
+struct write_point_specifier {
+ unsigned long v;
+};
+
+#endif /* _BCACHEFS_ALLOC_TYPES_H */
diff --git a/c_src/libbcachefs/backpointers.c b/c_src/libbcachefs/backpointers.c
new file mode 100644
index 00000000..e358a2ff
--- /dev/null
+++ b/c_src/libbcachefs/backpointers.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "bbpos.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "bkey_buf.h"
+#include "btree_cache.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
+#include "error.h"
+
+#include <linux/mm.h>
+
+static bool extent_matches_bp(struct bch_fs *c,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k,
+ struct bpos bucket,
+ struct bch_backpointer bp)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ struct bpos bucket2;
+ struct bch_backpointer bp2;
+
+ if (p.ptr.cached)
+ continue;
+
+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
+ &bucket2, &bp2);
+ if (bpos_eq(bucket, bucket2) &&
+ !memcmp(&bp, &bp2, sizeof(bp)))
+ return true;
+ }
+
+ return false;
+}
+
+int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+ struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
+ int ret = 0;
+
+ bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+ c, err,
+ backpointer_pos_wrong,
+ "backpointer at wrong pos");
+fsck_err:
+ return ret;
+}
+
+void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
+{
+ prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
+ bch2_btree_id_str(bp->btree_id),
+ bp->level,
+ (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
+ (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
+ bp->bucket_len);
+ bch2_bpos_to_text(out, bp->pos);
+}
+
+void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ prt_str(out, "bucket=");
+ bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
+ prt_str(out, " ");
+
+ bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
+}
+
+void bch2_backpointer_swab(struct bkey_s k)
+{
+ struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
+
+ bp.v->bucket_offset = swab40(bp.v->bucket_offset);
+ bp.v->bucket_len = swab32(bp.v->bucket_len);
+ bch2_bpos_swab(&bp.v->pos);
+}
+
+static noinline int backpointer_mod_err(struct btree_trans *trans,
+ struct bch_backpointer bp,
+ struct bkey_s_c bp_k,
+ struct bkey_s_c orig_k,
+ bool insert)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ if (insert) {
+ prt_printf(&buf, "existing backpointer found when inserting ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ prt_printf(&buf, "found ");
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "for ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+
+ bch_err(c, "%s", buf.buf);
+ } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
+ prt_printf(&buf, "backpointer not found when deleting");
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ prt_printf(&buf, "searching for ");
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "got ");
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "for ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+
+ bch_err(c, "%s", buf.buf);
+ }
+
+ printbuf_exit(&buf);
+
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
+ bch2_inconsistent_error(c);
+ return -EIO;
+ } else {
+ return 0;
+ }
+}
+
+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
+ struct bpos bucket,
+ struct bch_backpointer bp,
+ struct bkey_s_c orig_k,
+ bool insert)
+{
+ struct btree_iter bp_iter;
+ struct bkey_s_c k;
+ struct bkey_i_backpointer *bp_k;
+ int ret;
+
+ bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
+ ret = PTR_ERR_OR_ZERO(bp_k);
+ if (ret)
+ return ret;
+
+ bkey_backpointer_init(&bp_k->k_i);
+ bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+ bp_k->v = bp;
+
+ if (!insert) {
+ bp_k->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&bp_k->k, 0);
+ }
+
+ k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
+ bp_k->k.p,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_WITH_UPDATES);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (insert
+ ? k.k->type
+ : (k.k->type != KEY_TYPE_backpointer ||
+ memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
+ ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
+ if (ret)
+ goto err;
+ }
+
+ ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
+err:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ return ret;
+}
+
+/*
+ * Find the next backpointer >= *bp_offset:
+ */
+int bch2_get_next_backpointer(struct btree_trans *trans,
+ struct bpos bucket, int gen,
+ struct bpos *bp_pos,
+ struct bch_backpointer *bp,
+ unsigned iter_flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
+ struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
+ struct bkey_s_c k;
+ int ret = 0;
+
+ if (bpos_ge(*bp_pos, bp_end_pos))
+ goto done;
+
+ if (gen >= 0) {
+ k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
+ bucket, BTREE_ITER_CACHED|iter_flags);
+ ret = bkey_err(k);
+ if (ret)
+ goto out;
+
+ if (k.k->type != KEY_TYPE_alloc_v4 ||
+ bkey_s_c_to_alloc_v4(k).v->gen != gen)
+ goto done;
+ }
+
+ *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
+
+ for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
+ *bp_pos, iter_flags, k, ret) {
+ if (bpos_ge(k.k->p, bp_end_pos))
+ break;
+
+ *bp_pos = k.k->p;
+ *bp = *bkey_s_c_to_backpointer(k).v;
+ goto out;
+ }
+done:
+ *bp_pos = SPOS_MAX;
+out:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ return ret;
+}
+
+static void backpointer_not_found(struct btree_trans *trans,
+ struct bpos bp_pos,
+ struct bch_backpointer bp,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
+
+ /*
+ * If we're using the btree write buffer, the backpointer we were
+ * looking at may have already been deleted - failure to find what it
+ * pointed to is not an error:
+ */
+ if (likely(!bch2_backpointers_no_use_write_buffer))
+ return;
+
+ prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
+ bp.level ? "btree node" : "extent");
+ prt_printf(&buf, "bucket: ");
+ bch2_bpos_to_text(&buf, bucket);
+ prt_printf(&buf, "\n ");
+
+ prt_printf(&buf, "backpointer pos: ");
+ bch2_bpos_to_text(&buf, bp_pos);
+ prt_printf(&buf, "\n ");
+
+ bch2_backpointer_to_text(&buf, &bp);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
+ bch_err_ratelimited(c, "%s", buf.buf);
+ else
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+
+ printbuf_exit(&buf);
+}
+
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos bp_pos,
+ struct bch_backpointer bp,
+ unsigned iter_flags)
+{
+ if (likely(!bp.level)) {
+ struct bch_fs *c = trans->c;
+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
+ struct bkey_s_c k;
+
+ bch2_trans_node_iter_init(trans, iter,
+ bp.btree_id,
+ bp.pos,
+ 0, 0,
+ iter_flags);
+ k = bch2_btree_iter_peek_slot(iter);
+ if (bkey_err(k)) {
+ bch2_trans_iter_exit(trans, iter);
+ return k;
+ }
+
+ if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
+ return k;
+
+ bch2_trans_iter_exit(trans, iter);
+ backpointer_not_found(trans, bp_pos, bp, k);
+ return bkey_s_c_null;
+ } else {
+ struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
+
+ if (IS_ERR_OR_NULL(b)) {
+ bch2_trans_iter_exit(trans, iter);
+ return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
+ }
+ return bkey_i_to_s_c(&b->key);
+ }
+}
+
+struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos bp_pos,
+ struct bch_backpointer bp)
+{
+ struct bch_fs *c = trans->c;
+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
+ struct btree *b;
+
+ BUG_ON(!bp.level);
+
+ bch2_trans_node_iter_init(trans, iter,
+ bp.btree_id,
+ bp.pos,
+ 0,
+ bp.level - 1,
+ 0);
+ b = bch2_btree_iter_peek_node(iter);
+ if (IS_ERR_OR_NULL(b))
+ goto err;
+
+ BUG_ON(b->c.level != bp.level - 1);
+
+ if (extent_matches_bp(c, bp.btree_id, bp.level,
+ bkey_i_to_s_c(&b->key),
+ bucket, bp))
+ return b;
+
+ if (btree_node_will_make_reachable(b)) {
+ b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
+ } else {
+ backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
+ b = NULL;
+ }
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return b;
+}
+
+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter alloc_iter = { NULL };
+ struct bkey_s_c alloc_k;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
+ backpointer_to_missing_device,
+ "backpointer for missing device:\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, bp_iter, 0);
+ goto out;
+ }
+
+ alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
+ bp_pos_to_bucket(c, k.k->p), 0);
+ ret = bkey_err(alloc_k);
+ if (ret)
+ goto out;
+
+ if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
+ backpointer_to_missing_alloc,
+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
+ alloc_iter.pos.inode, alloc_iter.pos.offset,
+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, bp_iter, 0);
+ goto out;
+ }
+out:
+fsck_err:
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+/* verify that every backpointer has a corresponding alloc key */
+int bch2_check_btree_backpointers(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_backpointers, POS_MIN, 0, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_check_btree_backpointer(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_bp_exists(struct btree_trans *trans,
+ struct bpos bucket,
+ struct bch_backpointer bp,
+ struct bkey_s_c orig_k,
+ struct bpos bucket_start,
+ struct bpos bucket_end,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter bp_iter = { NULL };
+ struct printbuf buf = PRINTBUF;
+ struct bkey_s_c bp_k;
+ struct bkey_buf tmp;
+ int ret;
+
+ bch2_bkey_buf_init(&tmp);
+
+ if (bpos_lt(bucket, bucket_start) ||
+ bpos_gt(bucket, bucket_end))
+ return 0;
+
+ if (!bch2_dev_bucket_exists(c, bucket))
+ goto missing;
+
+ bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp(c, bucket, bp.bucket_offset),
+ 0);
+ ret = bkey_err(bp_k);
+ if (ret)
+ goto err;
+
+ if (bp_k.k->type != KEY_TYPE_backpointer ||
+ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
+ if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) ||
+ bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) ||
+ memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) {
+ bch2_bkey_buf_reassemble(&tmp, c, orig_k);
+
+ if (bp.level) {
+ bch2_trans_unlock(trans);
+ bch2_btree_interior_updates_flush(c);
+ }
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+ ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+ goto out;
+ }
+ goto missing;
+ }
+out:
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ bch2_bkey_buf_exit(&tmp, c);
+ printbuf_exit(&buf);
+ return ret;
+missing:
+ prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
+ bch2_btree_id_str(bp.btree_id), bp.level);
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_printf(&buf, "\nbp pos ");
+ bch2_bpos_to_text(&buf, bp_iter.pos);
+
+ if (c->opts.reconstruct_alloc ||
+ fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
+
+ goto out;
+}
+
+static int check_extent_to_backpointers(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bpos bucket_start,
+ struct bpos bucket_end,
+ struct bkey_buf *last_flushed,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ int ret;
+
+ ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ struct bpos bucket_pos;
+ struct bch_backpointer bp;
+
+ if (p.ptr.cached)
+ continue;
+
+ bch2_extent_ptr_to_bp(c, btree, level,
+ k, p, &bucket_pos, &bp);
+
+ ret = check_bp_exists(trans, bucket_pos, bp, k,
+ bucket_start, bucket_end,
+ last_flushed);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int check_btree_root_to_backpointers(struct btree_trans *trans,
+ enum btree_id btree_id,
+ struct bpos bucket_start,
+ struct bpos bucket_end,
+ struct bkey_buf *last_flushed,
+ int *level)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct btree *b;
+ struct bkey_s_c k;
+ int ret;
+retry:
+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
+ 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
+ b = bch2_btree_iter_peek_node(&iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto err;
+
+ if (b != btree_node_root(c, b)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto retry;
+ }
+
+ *level = b->c.level;
+
+ k = bkey_i_to_s_c(&b->key);
+ ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1,
+ bucket_start, bucket_end,
+ last_flushed, k);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
+{
+ return (struct bbpos) {
+ .btree = bp.btree_id,
+ .pos = bp.pos,
+ };
+}
+
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+ struct sysinfo i;
+ u64 mem_bytes;
+
+ si_meminfo(&i);
+ mem_bytes = i.totalram * i.mem_unit;
+ return div_u64(mem_bytes >> 1, btree_bytes(c));
+}
+
+static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
+ unsigned btree_leaf_mask,
+ unsigned btree_interior_mask,
+ struct bbpos start, struct bbpos *end)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+ enum btree_id btree;
+ int ret = 0;
+
+ for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
+ unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+
+ if (!((1U << btree) & btree_leaf_mask) &&
+ !((1U << btree) & btree_interior_mask))
+ continue;
+
+ bch2_trans_node_iter_init(trans, &iter, btree,
+ btree == start.btree ? start.pos : POS_MIN,
+ 0, depth, 0);
+ /*
+ * for_each_btree_key_contineu() doesn't check the return value
+ * from bch2_btree_iter_advance(), which is needed when
+ * iterating over interior nodes where we'll see keys at
+ * SPOS_MAX:
+ */
+ do {
+ k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
+ ret = bkey_err(k);
+ if (!k.k || ret)
+ break;
+
+ --btree_nodes;
+ if (!btree_nodes) {
+ *end = BBPOS(btree, k.k->p);
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+ }
+ } while (bch2_btree_iter_advance(&iter));
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ *end = BBPOS_MAX;
+ return ret;
+}
+
+static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
+ struct bpos bucket_start,
+ struct bpos bucket_end)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ enum btree_id btree_id;
+ struct bkey_s_c k;
+ struct bkey_buf last_flushed;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
+ int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
+
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ check_btree_root_to_backpointers(trans, btree_id,
+ bucket_start, bucket_end,
+ &last_flushed, &level));
+ if (ret)
+ return ret;
+
+ while (level >= depth) {
+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
+ level,
+ BTREE_ITER_PREFETCH);
+ while (1) {
+ bch2_trans_begin(trans);
+ k = bch2_btree_iter_peek(&iter);
+ if (!k.k)
+ break;
+ ret = bkey_err(k) ?:
+ check_extent_to_backpointers(trans, btree_id, level,
+ bucket_start, bucket_end,
+ &last_flushed, k) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ ret = 0;
+ continue;
+ }
+ if (ret)
+ break;
+ if (bpos_eq(iter.pos, SPOS_MAX))
+ break;
+ bch2_btree_iter_advance(&iter);
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ return ret;
+
+ --level;
+ }
+ }
+
+ bch2_bkey_buf_exit(&last_flushed, c);
+ return 0;
+}
+
+static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
+ struct bpos bucket)
+{
+ return bch2_dev_exists2(c, bucket.inode)
+ ? bucket_pos_to_bp(c, bucket, 0)
+ : bucket;
+}
+
+static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
+ struct bpos start, struct bpos *end)
+{
+ struct btree_iter alloc_iter;
+ struct btree_iter bp_iter;
+ struct bkey_s_c alloc_k, bp_k;
+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
+ bool alloc_end = false, bp_end = false;
+ int ret = 0;
+
+ bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
+ start, 0, 1, 0);
+ bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
+ while (1) {
+ alloc_k = !alloc_end
+ ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
+ : bkey_s_c_null;
+ bp_k = !bp_end
+ ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
+ : bkey_s_c_null;
+
+ ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
+ if ((!alloc_k.k && !bp_k.k) || ret) {
+ *end = SPOS_MAX;
+ break;
+ }
+
+ --btree_nodes;
+ if (!btree_nodes) {
+ *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
+ break;
+ }
+
+ if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
+ bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
+ if (!bch2_btree_iter_advance(&alloc_iter))
+ alloc_end = true;
+ } else {
+ if (!bch2_btree_iter_advance(&bp_iter))
+ bp_end = true;
+ }
+ }
+ bch2_trans_iter_exit(trans, &bp_iter);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ return ret;
+}
+
+int bch2_check_extents_to_backpointers(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bpos start = POS_MIN, end;
+ int ret;
+
+ while (1) {
+ ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
+ if (ret)
+ break;
+
+ if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
+ bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
+ __func__, btree_nodes_fit_in_ram(c));
+
+ if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "check_extents_to_backpointers(): ");
+ bch2_bpos_to_text(&buf, start);
+ prt_str(&buf, "-");
+ bch2_bpos_to_text(&buf, end);
+
+ bch_verbose(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
+ if (ret || bpos_eq(end, SPOS_MAX))
+ break;
+
+ start = bpos_successor(end);
+ }
+ bch2_trans_put(trans);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_one_backpointer(struct btree_trans *trans,
+ struct bbpos start,
+ struct bbpos end,
+ struct bkey_s_c_backpointer bp,
+ struct bpos *last_flushed_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bbpos pos = bp_to_bbpos(*bp.v);
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ if (bbpos_cmp(pos, start) < 0 ||
+ bbpos_cmp(pos, end) > 0)
+ return 0;
+
+ k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
+ ret = bkey_err(k);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ return 0;
+ if (ret)
+ return ret;
+
+ if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
+ *last_flushed_pos = bp.k->p;
+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ -BCH_ERR_transaction_restart_write_buffer_flush;
+ goto out;
+ }
+
+ if (fsck_err_on(!k.k, c,
+ backpointer_to_missing_ptr,
+ "backpointer for missing %s\n %s",
+ bp.v->level ? "btree node" : "extent",
+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
+ goto out;
+ }
+out:
+fsck_err:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
+ struct bbpos start,
+ struct bbpos end)
+{
+ struct bpos last_flushed_pos = SPOS_MAX;
+
+ return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
+ POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_one_backpointer(trans, start, end,
+ bkey_s_c_to_backpointer(k),
+ &last_flushed_pos));
+}
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
+ int ret;
+
+ while (1) {
+ ret = bch2_get_btree_in_memory_pos(trans,
+ (1U << BTREE_ID_extents)|
+ (1U << BTREE_ID_reflink),
+ ~0,
+ start, &end);
+ if (ret)
+ break;
+
+ if (!bbpos_cmp(start, BBPOS_MIN) &&
+ bbpos_cmp(end, BBPOS_MAX))
+ bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
+ __func__, btree_nodes_fit_in_ram(c));
+
+ if (bbpos_cmp(start, BBPOS_MIN) ||
+ bbpos_cmp(end, BBPOS_MAX)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "check_backpointers_to_extents(): ");
+ bch2_bbpos_to_text(&buf, start);
+ prt_str(&buf, "-");
+ bch2_bbpos_to_text(&buf, end);
+
+ bch_verbose(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
+ if (ret || !bbpos_cmp(end, BBPOS_MAX))
+ break;
+
+ start = bbpos_successor(end);
+ }
+ bch2_trans_put(trans);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
diff --git a/c_src/libbcachefs/backpointers.h b/c_src/libbcachefs/backpointers.h
new file mode 100644
index 00000000..737e2396
--- /dev/null
+++ b/c_src/libbcachefs/backpointers.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
+#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
+
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "super.h"
+
+static inline u64 swab40(u64 x)
+{
+ return (((x & 0x00000000ffULL) << 32)|
+ ((x & 0x000000ff00ULL) << 16)|
+ ((x & 0x0000ff0000ULL) >> 0)|
+ ((x & 0x00ff000000ULL) >> 16)|
+ ((x & 0xff00000000ULL) >> 32));
+}
+
+int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
+void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+void bch2_backpointer_swab(struct bkey_s);
+
+#define bch2_bkey_ops_backpointer ((struct bkey_ops) { \
+ .key_invalid = bch2_backpointer_invalid, \
+ .val_to_text = bch2_backpointer_k_to_text, \
+ .swab = bch2_backpointer_swab, \
+ .min_val_size = 32, \
+})
+
+#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
+
+/*
+ * Convert from pos in backpointer btree to pos of corresponding bucket in alloc
+ * btree:
+ */
+static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
+ struct bpos bp_pos)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode);
+ u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT;
+
+ return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
+}
+
+/*
+ * Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
+ */
+static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
+ struct bpos bucket,
+ u64 bucket_offset)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
+ struct bpos ret;
+
+ ret = POS(bucket.inode,
+ (bucket_to_sector(ca, bucket.offset) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+
+ EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
+
+ return ret;
+}
+
+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bpos bucket,
+ struct bch_backpointer, struct bkey_s_c, bool);
+
+static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
+ struct bpos bucket,
+ struct bch_backpointer bp,
+ struct bkey_s_c orig_k,
+ bool insert)
+{
+ if (unlikely(bch2_backpointers_no_use_write_buffer))
+ return bch2_bucket_backpointer_mod_nowritebuffer(trans, bucket, bp, orig_k, insert);
+
+ struct bkey_i_backpointer bp_k;
+
+ bkey_backpointer_init(&bp_k.k_i);
+ bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+ bp_k.v = bp;
+
+ if (!insert) {
+ bp_k.k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&bp_k.k, 0);
+ }
+
+ return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
+}
+
+static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p)
+{
+ return level ? BCH_DATA_btree :
+ p.has_ec ? BCH_DATA_stripe :
+ BCH_DATA_user;
+}
+
+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p,
+ struct bpos *bucket_pos, struct bch_backpointer *bp)
+{
+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ s64 sectors = level ? btree_sectors(c) : k.k->size;
+ u32 bucket_offset;
+
+ *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset);
+ *bp = (struct bch_backpointer) {
+ .btree_id = btree_id,
+ .level = level,
+ .data_type = data_type,
+ .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
+ p.crc.offset,
+ .bucket_len = ptr_disk_sectors(sectors, p),
+ .pos = k.k->p,
+ };
+}
+
+int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int,
+ struct bpos *, struct bch_backpointer *, unsigned);
+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *,
+ struct bpos, struct bch_backpointer,
+ unsigned);
+struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *,
+ struct bpos, struct bch_backpointer);
+
+int bch2_check_btree_backpointers(struct bch_fs *);
+int bch2_check_extents_to_backpointers(struct bch_fs *);
+int bch2_check_backpointers_to_extents(struct bch_fs *);
+
+#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */
diff --git a/c_src/libbcachefs/bbpos.h b/c_src/libbcachefs/bbpos.h
new file mode 100644
index 00000000..be2edced
--- /dev/null
+++ b/c_src/libbcachefs/bbpos.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BBPOS_H
+#define _BCACHEFS_BBPOS_H
+
+#include "bbpos_types.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+
+static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
+{
+ return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos);
+}
+
+static inline struct bbpos bbpos_successor(struct bbpos pos)
+{
+ if (bpos_cmp(pos.pos, SPOS_MAX)) {
+ pos.pos = bpos_successor(pos.pos);
+ return pos;
+ }
+
+ if (pos.btree != BTREE_ID_NR) {
+ pos.btree++;
+ pos.pos = POS_MIN;
+ return pos;
+ }
+
+ BUG();
+}
+
+static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos)
+{
+ prt_str(out, bch2_btree_id_str(pos.btree));
+ prt_char(out, ':');
+ bch2_bpos_to_text(out, pos.pos);
+}
+
+#endif /* _BCACHEFS_BBPOS_H */
diff --git a/c_src/libbcachefs/bbpos_types.h b/c_src/libbcachefs/bbpos_types.h
new file mode 100644
index 00000000..5198e94c
--- /dev/null
+++ b/c_src/libbcachefs/bbpos_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BBPOS_TYPES_H
+#define _BCACHEFS_BBPOS_TYPES_H
+
+struct bbpos {
+ enum btree_id btree;
+ struct bpos pos;
+};
+
+static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
+{
+ return (struct bbpos) { btree, pos };
+}
+
+#define BBPOS_MIN BBPOS(0, POS_MIN)
+#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX)
+
+#endif /* _BCACHEFS_BBPOS_TYPES_H */
diff --git a/c_src/libbcachefs/bcachefs.h b/c_src/libbcachefs/bcachefs.h
new file mode 100644
index 00000000..dac383e3
--- /dev/null
+++ b/c_src/libbcachefs/bcachefs.h
@@ -0,0 +1,1260 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_H
+#define _BCACHEFS_H
+
+/*
+ * SOME HIGH LEVEL CODE DOCUMENTATION:
+ *
+ * Bcache mostly works with cache sets, cache devices, and backing devices.
+ *
+ * Support for multiple cache devices hasn't quite been finished off yet, but
+ * it's about 95% plumbed through. A cache set and its cache devices is sort of
+ * like a md raid array and its component devices. Most of the code doesn't care
+ * about individual cache devices, the main abstraction is the cache set.
+ *
+ * Multiple cache devices is intended to give us the ability to mirror dirty
+ * cached data and metadata, without mirroring clean cached data.
+ *
+ * Backing devices are different, in that they have a lifetime independent of a
+ * cache set. When you register a newly formatted backing device it'll come up
+ * in passthrough mode, and then you can attach and detach a backing device from
+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly
+ * invalidates any cached data for that backing device.
+ *
+ * A cache set can have multiple (many) backing devices attached to it.
+ *
+ * There's also flash only volumes - this is the reason for the distinction
+ * between struct cached_dev and struct bcache_device. A flash only volume
+ * works much like a bcache device that has a backing device, except the
+ * "cached" data is always dirty. The end result is that we get thin
+ * provisioning with very little additional code.
+ *
+ * Flash only volumes work but they're not production ready because the moving
+ * garbage collector needs more work. More on that later.
+ *
+ * BUCKETS/ALLOCATION:
+ *
+ * Bcache is primarily designed for caching, which means that in normal
+ * operation all of our available space will be allocated. Thus, we need an
+ * efficient way of deleting things from the cache so we can write new things to
+ * it.
+ *
+ * To do this, we first divide the cache device up into buckets. A bucket is the
+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+
+ * works efficiently.
+ *
+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with
+ * it. The gens and priorities for all the buckets are stored contiguously and
+ * packed on disk (in a linked list of buckets - aside from the superblock, all
+ * of bcache's metadata is stored in buckets).
+ *
+ * The priority is used to implement an LRU. We reset a bucket's priority when
+ * we allocate it or on cache it, and every so often we decrement the priority
+ * of each bucket. It could be used to implement something more sophisticated,
+ * if anyone ever gets around to it.
+ *
+ * The generation is used for invalidating buckets. Each pointer also has an 8
+ * bit generation embedded in it; for a pointer to be considered valid, its gen
+ * must match the gen of the bucket it points into. Thus, to reuse a bucket all
+ * we have to do is increment its gen (and write its new gen to disk; we batch
+ * this up).
+ *
+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that
+ * contain metadata (including btree nodes).
+ *
+ * THE BTREE:
+ *
+ * Bcache is in large part design around the btree.
+ *
+ * At a high level, the btree is just an index of key -> ptr tuples.
+ *
+ * Keys represent extents, and thus have a size field. Keys also have a variable
+ * number of pointers attached to them (potentially zero, which is handy for
+ * invalidating the cache).
+ *
+ * The key itself is an inode:offset pair. The inode number corresponds to a
+ * backing device or a flash only volume. The offset is the ending offset of the
+ * extent within the inode - not the starting offset; this makes lookups
+ * slightly more convenient.
+ *
+ * Pointers contain the cache device id, the offset on that device, and an 8 bit
+ * generation number. More on the gen later.
+ *
+ * Index lookups are not fully abstracted - cache lookups in particular are
+ * still somewhat mixed in with the btree code, but things are headed in that
+ * direction.
+ *
+ * Updates are fairly well abstracted, though. There are two different ways of
+ * updating the btree; insert and replace.
+ *
+ * BTREE_INSERT will just take a list of keys and insert them into the btree -
+ * overwriting (possibly only partially) any extents they overlap with. This is
+ * used to update the index after a write.
+ *
+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is
+ * overwriting a key that matches another given key. This is used for inserting
+ * data into the cache after a cache miss, and for background writeback, and for
+ * the moving garbage collector.
+ *
+ * There is no "delete" operation; deleting things from the index is
+ * accomplished by either by invalidating pointers (by incrementing a bucket's
+ * gen) or by inserting a key with 0 pointers - which will overwrite anything
+ * previously present at that location in the index.
+ *
+ * This means that there are always stale/invalid keys in the btree. They're
+ * filtered out by the code that iterates through a btree node, and removed when
+ * a btree node is rewritten.
+ *
+ * BTREE NODES:
+ *
+ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and
+ * free smaller than a bucket - so, that's how big our btree nodes are.
+ *
+ * (If buckets are really big we'll only use part of the bucket for a btree node
+ * - no less than 1/4th - but a bucket still contains no more than a single
+ * btree node. I'd actually like to change this, but for now we rely on the
+ * bucket's gen for deleting btree nodes when we rewrite/split a node.)
+ *
+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook
+ * btree implementation.
+ *
+ * The way this is solved is that btree nodes are internally log structured; we
+ * can append new keys to an existing btree node without rewriting it. This
+ * means each set of keys we write is sorted, but the node is not.
+ *
+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would
+ * be expensive, and we have to distinguish between the keys we have written and
+ * the keys we haven't. So to do a lookup in a btree node, we have to search
+ * each sorted set. But we do merge written sets together lazily, so the cost of
+ * these extra searches is quite low (normally most of the keys in a btree node
+ * will be in one big set, and then there'll be one or two sets that are much
+ * smaller).
+ *
+ * This log structure makes bcache's btree more of a hybrid between a
+ * conventional btree and a compacting data structure, with some of the
+ * advantages of both.
+ *
+ * GARBAGE COLLECTION:
+ *
+ * We can't just invalidate any bucket - it might contain dirty data or
+ * metadata. If it once contained dirty data, other writes might overwrite it
+ * later, leaving no valid pointers into that bucket in the index.
+ *
+ * Thus, the primary purpose of garbage collection is to find buckets to reuse.
+ * It also counts how much valid data it each bucket currently contains, so that
+ * allocation can reuse buckets sooner when they've been mostly overwritten.
+ *
+ * It also does some things that are really internal to the btree
+ * implementation. If a btree node contains pointers that are stale by more than
+ * some threshold, it rewrites the btree node to avoid the bucket's generation
+ * wrapping around. It also merges adjacent btree nodes if they're empty enough.
+ *
+ * THE JOURNAL:
+ *
+ * Bcache's journal is not necessary for consistency; we always strictly
+ * order metadata writes so that the btree and everything else is consistent on
+ * disk in the event of an unclean shutdown, and in fact bcache had writeback
+ * caching (with recovery from unclean shutdown) before journalling was
+ * implemented.
+ *
+ * Rather, the journal is purely a performance optimization; we can't complete a
+ * write until we've updated the index on disk, otherwise the cache would be
+ * inconsistent in the event of an unclean shutdown. This means that without the
+ * journal, on random write workloads we constantly have to update all the leaf
+ * nodes in the btree, and those writes will be mostly empty (appending at most
+ * a few keys each) - highly inefficient in terms of amount of metadata writes,
+ * and it puts more strain on the various btree resorting/compacting code.
+ *
+ * The journal is just a log of keys we've inserted; on startup we just reinsert
+ * all the keys in the open journal entries. That means that when we're updating
+ * a node in the btree, we can wait until a 4k block of keys fills up before
+ * writing them out.
+ *
+ * For simplicity, we only journal updates to leaf nodes; updates to parent
+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth
+ * the complexity to deal with journalling them (in particular, journal replay)
+ * - updates to non leaf nodes just happen synchronously (see btree_split()).
+ */
+
+#undef pr_fmt
+#ifdef __KERNEL__
+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
+#else
+#define pr_fmt(fmt) "%s() " fmt "\n", __func__
+#endif
+
+#include <linux/backing-dev-defs.h>
+#include <linux/bug.h>
+#include <linux/bio.h>
+#include <linux/closure.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/math64.h>
+#include <linux/mutex.h>
+#include <linux/percpu-refcount.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/refcount.h>
+#include <linux/rhashtable.h>
+#include <linux/rwsem.h>
+#include <linux/semaphore.h>
+#include <linux/seqlock.h>
+#include <linux/shrinker.h>
+#include <linux/srcu.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/zstd.h>
+
+#include "bcachefs_format.h"
+#include "errcode.h"
+#include "fifo.h"
+#include "nocow_locking_types.h"
+#include "opts.h"
+#include "recovery_types.h"
+#include "sb-errors_types.h"
+#include "seqmutex.h"
+#include "util.h"
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define BCH_WRITE_REF_DEBUG
+#endif
+
+#ifndef dynamic_fault
+#define dynamic_fault(...) 0
+#endif
+
+#define race_fault(...) dynamic_fault("bcachefs:race")
+
+#define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name])
+
+#define trace_and_count(_c, _name, ...) \
+do { \
+ count_event(_c, _name); \
+ trace_##_name(__VA_ARGS__); \
+} while (0)
+
+#define bch2_fs_init_fault(name) \
+ dynamic_fault("bcachefs:bch_fs_init:" name)
+#define bch2_meta_read_fault(name) \
+ dynamic_fault("bcachefs:meta:read:" name)
+#define bch2_meta_write_fault(name) \
+ dynamic_fault("bcachefs:meta:write:" name)
+
+#ifdef __KERNEL__
+#define BCACHEFS_LOG_PREFIX
+#endif
+
+#ifdef BCACHEFS_LOG_PREFIX
+
+#define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name)
+#define bch2_fmt_dev(_ca, fmt) "bcachefs (%s): " fmt "\n", ((_ca)->name)
+#define bch2_fmt_dev_offset(_ca, _offset, fmt) "bcachefs (%s sector %llu): " fmt "\n", ((_ca)->name), (_offset)
+#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
+#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \
+ "bcachefs (%s inum %llu offset %llu): " fmt "\n", ((_c)->name), (_inum), (_offset)
+
+#else
+
+#define bch2_log_msg(_c, fmt) fmt
+#define bch2_fmt_dev(_ca, fmt) "%s: " fmt "\n", ((_ca)->name)
+#define bch2_fmt_dev_offset(_ca, _offset, fmt) "%s sector %llu: " fmt "\n", ((_ca)->name), (_offset)
+#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum)
+#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \
+ "inum %llu offset %llu: " fmt "\n", (_inum), (_offset)
+
+#endif
+
+#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
+
+__printf(2, 3)
+void __bch2_print(struct bch_fs *c, const char *fmt, ...);
+
+#define maybe_dev_to_fs(_c) _Generic((_c), \
+ struct bch_dev *: ((struct bch_dev *) (_c))->fs, \
+ struct bch_fs *: (_c))
+
+#define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__)
+
+#define bch2_print_ratelimited(_c, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ \
+ if (__ratelimit(&_rs)) \
+ bch2_print(_c, __VA_ARGS__); \
+} while (0)
+
+#define bch_info(c, fmt, ...) \
+ bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_notice(c, fmt, ...) \
+ bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_warn(c, fmt, ...) \
+ bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_warn_ratelimited(c, fmt, ...) \
+ bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
+
+#define bch_err(c, fmt, ...) \
+ bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err_dev(ca, fmt, ...) \
+ bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
+#define bch_err_dev_offset(ca, _offset, fmt, ...) \
+ bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
+#define bch_err_inum(c, _inum, fmt, ...) \
+ bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
+#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \
+ bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
+
+#define bch_err_ratelimited(c, fmt, ...) \
+ bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err_dev_ratelimited(ca, fmt, ...) \
+ bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__)
+#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \
+ bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__)
+#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
+ bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
+#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \
+ bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__)
+
+static inline bool should_print_err(int err)
+{
+ return err && !bch2_err_matches(err, BCH_ERR_transaction_restart);
+}
+
+#define bch_err_fn(_c, _ret) \
+do { \
+ if (should_print_err(_ret)) \
+ bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\
+} while (0)
+
+#define bch_err_fn_ratelimited(_c, _ret) \
+do { \
+ if (should_print_err(_ret)) \
+ bch_err_ratelimited(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\
+} while (0)
+
+#define bch_err_msg(_c, _ret, _msg, ...) \
+do { \
+ if (should_print_err(_ret)) \
+ bch_err(_c, "%s(): error " _msg " %s", __func__, \
+ ##__VA_ARGS__, bch2_err_str(_ret)); \
+} while (0)
+
+#define bch_verbose(c, fmt, ...) \
+do { \
+ if ((c)->opts.verbose) \
+ bch_info(c, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define pr_verbose_init(opts, fmt, ...) \
+do { \
+ if (opt_get(opts, verbose)) \
+ pr_info(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Parameters that are useful for debugging, but should always be compiled in: */
+#define BCH_DEBUG_PARAMS_ALWAYS() \
+ BCH_DEBUG_PARAM(key_merging_disabled, \
+ "Disables merging of extents") \
+ BCH_DEBUG_PARAM(btree_gc_always_rewrite, \
+ "Causes mark and sweep to compact and rewrite every " \
+ "btree node it traverses") \
+ BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \
+ "Disables rewriting of btree nodes during mark and sweep")\
+ BCH_DEBUG_PARAM(btree_shrinker_disabled, \
+ "Disables the shrinker callback for the btree node cache")\
+ BCH_DEBUG_PARAM(verify_btree_ondisk, \
+ "Reread btree nodes at various points to verify the " \
+ "mergesort in the read path against modifications " \
+ "done in memory") \
+ BCH_DEBUG_PARAM(verify_all_btree_replicas, \
+ "When reading btree nodes, read all replicas and " \
+ "compare them") \
+ BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \
+ "Don't use the write buffer for backpointers, enabling "\
+ "extra runtime checks")
+
+/* Parameters that should only be compiled in debug mode: */
+#define BCH_DEBUG_PARAMS_DEBUG() \
+ BCH_DEBUG_PARAM(expensive_debug_checks, \
+ "Enables various runtime debugging checks that " \
+ "significantly affect performance") \
+ BCH_DEBUG_PARAM(debug_check_iterators, \
+ "Enables extra verification for btree iterators") \
+ BCH_DEBUG_PARAM(debug_check_btree_accounting, \
+ "Verify btree accounting for keys within a node") \
+ BCH_DEBUG_PARAM(journal_seq_verify, \
+ "Store the journal sequence number in the version " \
+ "number of every btree key, and verify that btree " \
+ "update ordering is preserved during recovery") \
+ BCH_DEBUG_PARAM(inject_invalid_keys, \
+ "Store the journal sequence number in the version " \
+ "number of every btree key, and verify that btree " \
+ "update ordering is preserved during recovery") \
+ BCH_DEBUG_PARAM(test_alloc_startup, \
+ "Force allocator startup to use the slowpath where it" \
+ "can't find enough free buckets without invalidating" \
+ "cached data") \
+ BCH_DEBUG_PARAM(force_reconstruct_read, \
+ "Force reads to use the reconstruct path, when reading" \
+ "from erasure coded extents") \
+ BCH_DEBUG_PARAM(test_restart_gc, \
+ "Test restarting mark and sweep gc when bucket gens change")
+
+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL()
+#else
+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS()
+#endif
+
+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name;
+BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+#ifndef CONFIG_BCACHEFS_DEBUG
+#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name;
+BCH_DEBUG_PARAMS_DEBUG()
+#undef BCH_DEBUG_PARAM
+#endif
+
+#define BCH_TIME_STATS() \
+ x(btree_node_mem_alloc) \
+ x(btree_node_split) \
+ x(btree_node_compact) \
+ x(btree_node_merge) \
+ x(btree_node_sort) \
+ x(btree_node_read) \
+ x(btree_node_read_done) \
+ x(btree_interior_update_foreground) \
+ x(btree_interior_update_total) \
+ x(btree_gc) \
+ x(data_write) \
+ x(data_read) \
+ x(data_promote) \
+ x(journal_flush_write) \
+ x(journal_noflush_write) \
+ x(journal_flush_seq) \
+ x(blocked_journal_low_on_space) \
+ x(blocked_journal_low_on_pin) \
+ x(blocked_journal_max_in_flight) \
+ x(blocked_allocate) \
+ x(blocked_allocate_open_bucket) \
+ x(blocked_write_buffer_full) \
+ x(nocow_lock_contended)
+
+enum bch_time_stats {
+#define x(name) BCH_TIME_##name,
+ BCH_TIME_STATS()
+#undef x
+ BCH_TIME_STAT_NR
+};
+
+#include "alloc_types.h"
+#include "btree_types.h"
+#include "btree_write_buffer_types.h"
+#include "buckets_types.h"
+#include "buckets_waiting_for_journal_types.h"
+#include "clock_types.h"
+#include "disk_groups_types.h"
+#include "ec_types.h"
+#include "journal_types.h"
+#include "keylist_types.h"
+#include "quota_types.h"
+#include "rebalance_types.h"
+#include "replicas_types.h"
+#include "subvolume_types.h"
+#include "super_types.h"
+#include "thread_with_file_types.h"
+
+/* Number of nodes btree coalesce will try to coalesce at once */
+#define GC_MERGE_NODES 4U
+
+/* Maximum number of nodes we might need to allocate atomically: */
+#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
+
+/* Size of the freelist we allocate btree nodes from: */
+#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
+
+#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX)
+
+struct btree;
+
+enum gc_phase {
+ GC_PHASE_NOT_RUNNING,
+ GC_PHASE_START,
+ GC_PHASE_SB,
+
+ GC_PHASE_BTREE_stripes,
+ GC_PHASE_BTREE_extents,
+ GC_PHASE_BTREE_inodes,
+ GC_PHASE_BTREE_dirents,
+ GC_PHASE_BTREE_xattrs,
+ GC_PHASE_BTREE_alloc,
+ GC_PHASE_BTREE_quotas,
+ GC_PHASE_BTREE_reflink,
+ GC_PHASE_BTREE_subvolumes,
+ GC_PHASE_BTREE_snapshots,
+ GC_PHASE_BTREE_lru,
+ GC_PHASE_BTREE_freespace,
+ GC_PHASE_BTREE_need_discard,
+ GC_PHASE_BTREE_backpointers,
+ GC_PHASE_BTREE_bucket_gens,
+ GC_PHASE_BTREE_snapshot_trees,
+ GC_PHASE_BTREE_deleted_inodes,
+ GC_PHASE_BTREE_logged_ops,
+ GC_PHASE_BTREE_rebalance_work,
+
+ GC_PHASE_PENDING_DELETE,
+};
+
+struct gc_pos {
+ enum gc_phase phase;
+ struct bpos pos;
+ unsigned level;
+};
+
+struct reflink_gc {
+ u64 offset;
+ u32 size;
+ u32 refcount;
+};
+
+typedef GENRADIX(struct reflink_gc) reflink_gc_table;
+
+struct io_count {
+ u64 sectors[2][BCH_DATA_NR];
+};
+
+struct bch_dev {
+ struct kobject kobj;
+ struct percpu_ref ref;
+ struct completion ref_completion;
+ struct percpu_ref io_ref;
+ struct completion io_ref_completion;
+
+ struct bch_fs *fs;
+
+ u8 dev_idx;
+ /*
+ * Cached version of this device's member info from superblock
+ * Committed by bch2_write_super() -> bch_fs_mi_update()
+ */
+ struct bch_member_cpu mi;
+ atomic64_t errors[BCH_MEMBER_ERROR_NR];
+
+ __uuid_t uuid;
+ char name[BDEVNAME_SIZE];
+
+ struct bch_sb_handle disk_sb;
+ struct bch_sb *sb_read_scratch;
+ int sb_write_error;
+ dev_t dev;
+ atomic_t flush_seq;
+
+ struct bch_devs_mask self;
+
+ /* biosets used in cloned bios for writing multiple replicas */
+ struct bio_set replica_set;
+
+ /*
+ * Buckets:
+ * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
+ * gc_lock, for device resize - holding any is sufficient for access:
+ * Or rcu_read_lock(), but only for ptr_stale():
+ */
+ struct bucket_array __rcu *buckets_gc;
+ struct bucket_gens __rcu *bucket_gens;
+ u8 *oldest_gen;
+ unsigned long *buckets_nouse;
+ struct rw_semaphore bucket_lock;
+
+ struct bch_dev_usage *usage_base;
+ struct bch_dev_usage __percpu *usage[JOURNAL_BUF_NR];
+ struct bch_dev_usage __percpu *usage_gc;
+
+ /* Allocator: */
+ u64 new_fs_bucket_idx;
+ u64 alloc_cursor;
+
+ unsigned nr_open_buckets;
+ unsigned nr_btree_reserve;
+
+ size_t inc_gen_needs_gc;
+ size_t inc_gen_really_needs_gc;
+ size_t buckets_waiting_on_journal;
+
+ atomic64_t rebalance_work;
+
+ struct journal_device journal;
+ u64 prev_journal_sector;
+
+ struct work_struct io_error_work;
+
+ /* The rest of this all shows up in sysfs */
+ atomic64_t cur_latency[2];
+ struct bch2_time_stats io_latency[2];
+
+#define CONGESTED_MAX 1024
+ atomic_t congested;
+ u64 congested_last;
+
+ struct io_count __percpu *io_done;
+};
+
+/*
+ * initial_gc_unfixed
+ * error
+ * topology error
+ */
+
+#define BCH_FS_FLAGS() \
+ x(started) \
+ x(may_go_rw) \
+ x(rw) \
+ x(was_rw) \
+ x(stopping) \
+ x(emergency_ro) \
+ x(going_ro) \
+ x(write_disable_complete) \
+ x(clean_shutdown) \
+ x(fsck_running) \
+ x(initial_gc_unfixed) \
+ x(need_another_gc) \
+ x(need_delete_dead_snapshots) \
+ x(error) \
+ x(topology_error) \
+ x(errors_fixed) \
+ x(errors_not_fixed)
+
+enum bch_fs_flags {
+#define x(n) BCH_FS_##n,
+ BCH_FS_FLAGS()
+#undef x
+};
+
+struct btree_debug {
+ unsigned id;
+};
+
+#define BCH_TRANSACTIONS_NR 128
+
+struct btree_transaction_stats {
+ struct bch2_time_stats duration;
+ struct bch2_time_stats lock_hold_times;
+ struct mutex lock;
+ unsigned nr_max_paths;
+ unsigned journal_entries_size;
+ unsigned max_mem;
+ char *max_paths_text;
+};
+
+struct bch_fs_pcpu {
+ u64 sectors_available;
+};
+
+struct journal_seq_blacklist_table {
+ size_t nr;
+ struct journal_seq_blacklist_table_entry {
+ u64 start;
+ u64 end;
+ bool dirty;
+ } entries[];
+};
+
+struct journal_keys {
+ struct journal_key {
+ u64 journal_seq;
+ u32 journal_offset;
+ enum btree_id btree_id:8;
+ unsigned level:8;
+ bool allocated;
+ bool overwritten;
+ struct bkey_i *k;
+ } *d;
+ /*
+ * Gap buffer: instead of all the empty space in the array being at the
+ * end of the buffer - from @nr to @size - the empty space is at @gap.
+ * This means that sequential insertions are O(n) instead of O(n^2).
+ */
+ size_t gap;
+ size_t nr;
+ size_t size;
+ atomic_t ref;
+ bool initial_ref_held;
+};
+
+struct btree_trans_buf {
+ struct btree_trans *trans;
+};
+
+#define REPLICAS_DELTA_LIST_MAX (1U << 16)
+
+#define BCACHEFS_ROOT_SUBVOL_INUM \
+ ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO })
+
+#define BCH_WRITE_REFS() \
+ x(trans) \
+ x(write) \
+ x(promote) \
+ x(node_rewrite) \
+ x(stripe_create) \
+ x(stripe_delete) \
+ x(reflink) \
+ x(fallocate) \
+ x(discard) \
+ x(invalidate) \
+ x(delete_dead_snapshots) \
+ x(snapshot_delete_pagecache) \
+ x(sysfs) \
+ x(btree_write_buffer)
+
+enum bch_write_ref {
+#define x(n) BCH_WRITE_REF_##n,
+ BCH_WRITE_REFS()
+#undef x
+ BCH_WRITE_REF_NR,
+};
+
+struct bch_fs {
+ struct closure cl;
+
+ struct list_head list;
+ struct kobject kobj;
+ struct kobject counters_kobj;
+ struct kobject internal;
+ struct kobject opts_dir;
+ struct kobject time_stats;
+ unsigned long flags;
+
+ int minor;
+ struct device *chardev;
+ struct super_block *vfs_sb;
+ dev_t dev;
+ char name[40];
+ struct stdio_redirect *stdio;
+ struct task_struct *stdio_filter;
+
+ /* ro/rw, add/remove/resize devices: */
+ struct rw_semaphore state_lock;
+
+ /* Counts outstanding writes, for clean transition to read-only */
+#ifdef BCH_WRITE_REF_DEBUG
+ atomic_long_t writes[BCH_WRITE_REF_NR];
+#else
+ struct percpu_ref writes;
+#endif
+ /*
+ * Analagous to c->writes, for asynchronous ops that don't necessarily
+ * need fs to be read-write
+ */
+ refcount_t ro_ref;
+ wait_queue_head_t ro_ref_wait;
+
+ struct work_struct read_only_work;
+
+ struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX];
+
+ struct bch_replicas_cpu replicas;
+ struct bch_replicas_cpu replicas_gc;
+ struct mutex replicas_gc_lock;
+ mempool_t replicas_delta_pool;
+
+ struct journal_entry_res btree_root_journal_res;
+ struct journal_entry_res replicas_journal_res;
+ struct journal_entry_res clock_journal_res;
+ struct journal_entry_res dev_usage_journal_res;
+
+ struct bch_disk_groups_cpu __rcu *disk_groups;
+
+ struct bch_opts opts;
+
+ /* Updated by bch2_sb_update():*/
+ struct {
+ __uuid_t uuid;
+ __uuid_t user_uuid;
+
+ u16 version;
+ u16 version_min;
+ u16 version_upgrade_complete;
+
+ u8 nr_devices;
+ u8 clean;
+
+ u8 encryption_type;
+
+ u64 time_base_lo;
+ u32 time_base_hi;
+ unsigned time_units_per_sec;
+ unsigned nsec_per_time_unit;
+ u64 features;
+ u64 compat;
+ unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+ } sb;
+
+
+ struct bch_sb_handle disk_sb;
+
+ unsigned short block_bits; /* ilog2(block_size) */
+
+ u16 btree_foreground_merge_threshold;
+
+ struct closure sb_write;
+ struct mutex sb_lock;
+
+ /* snapshot.c: */
+ struct snapshot_table __rcu *snapshots;
+ size_t snapshot_table_size;
+ struct mutex snapshot_table_lock;
+ struct rw_semaphore snapshot_create_lock;
+
+ struct work_struct snapshot_delete_work;
+ struct work_struct snapshot_wait_for_pagecache_and_delete_work;
+ snapshot_id_list snapshots_unlinked;
+ struct mutex snapshots_unlinked_lock;
+
+ /* BTREE CACHE */
+ struct bio_set btree_bio;
+ struct workqueue_struct *io_complete_wq;
+
+ struct btree_root btree_roots_known[BTREE_ID_NR];
+ DARRAY(struct btree_root) btree_roots_extra;
+ struct mutex btree_root_lock;
+
+ struct btree_cache btree_cache;
+
+ /*
+ * Cache of allocated btree nodes - if we allocate a btree node and
+ * don't use it, if we free it that space can't be reused until going
+ * _all_ the way through the allocator (which exposes us to a livelock
+ * when allocating btree reserves fail halfway through) - instead, we
+ * can stick them here:
+ */
+ struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2];
+ unsigned btree_reserve_cache_nr;
+ struct mutex btree_reserve_cache_lock;
+
+ mempool_t btree_interior_update_pool;
+ struct list_head btree_interior_update_list;
+ struct list_head btree_interior_updates_unwritten;
+ struct mutex btree_interior_update_lock;
+ struct closure_waitlist btree_interior_update_wait;
+
+ struct workqueue_struct *btree_interior_update_worker;
+ struct work_struct btree_interior_update_work;
+
+ struct list_head pending_node_rewrites;
+ struct mutex pending_node_rewrites_lock;
+
+ /* btree_io.c: */
+ spinlock_t btree_write_error_lock;
+ struct btree_write_stats {
+ atomic64_t nr;
+ atomic64_t bytes;
+ } btree_write_stats[BTREE_WRITE_TYPE_NR];
+
+ /* btree_iter.c: */
+ struct seqmutex btree_trans_lock;
+ struct list_head btree_trans_list;
+ mempool_t btree_trans_pool;
+ mempool_t btree_trans_mem_pool;
+ struct btree_trans_buf __percpu *btree_trans_bufs;
+
+ struct srcu_struct btree_trans_barrier;
+ bool btree_trans_barrier_initialized;
+
+ struct btree_key_cache btree_key_cache;
+ unsigned btree_key_cache_btrees;
+
+ struct btree_write_buffer btree_write_buffer;
+
+ struct workqueue_struct *btree_update_wq;
+ struct workqueue_struct *btree_io_complete_wq;
+ /* copygc needs its own workqueue for index updates.. */
+ struct workqueue_struct *copygc_wq;
+ /*
+ * Use a dedicated wq for write ref holder tasks. Required to avoid
+ * dependency problems with other wq tasks that can block on ref
+ * draining, such as read-only transition.
+ */
+ struct workqueue_struct *write_ref_wq;
+
+ /* ALLOCATION */
+ struct bch_devs_mask rw_devs[BCH_DATA_NR];
+
+ u64 capacity; /* sectors */
+
+ /*
+ * When capacity _decreases_ (due to a disk being removed), we
+ * increment capacity_gen - this invalidates outstanding reservations
+ * and forces them to be revalidated
+ */
+ u32 capacity_gen;
+ unsigned bucket_size_max;
+
+ atomic64_t sectors_available;
+ struct mutex sectors_available_lock;
+
+ struct bch_fs_pcpu __percpu *pcpu;
+
+ struct percpu_rw_semaphore mark_lock;
+
+ seqcount_t usage_lock;
+ struct bch_fs_usage *usage_base;
+ struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR];
+ struct bch_fs_usage __percpu *usage_gc;
+ u64 __percpu *online_reserved;
+
+ /* single element mempool: */
+ struct mutex usage_scratch_lock;
+ struct bch_fs_usage_online *usage_scratch;
+
+ struct io_clock io_clock[2];
+
+ /* JOURNAL SEQ BLACKLIST */
+ struct journal_seq_blacklist_table *
+ journal_seq_blacklist_table;
+ struct work_struct journal_seq_blacklist_gc_work;
+
+ /* ALLOCATOR */
+ spinlock_t freelist_lock;
+ struct closure_waitlist freelist_wait;
+ u64 blocked_allocate;
+ u64 blocked_allocate_open_bucket;
+
+ open_bucket_idx_t open_buckets_freelist;
+ open_bucket_idx_t open_buckets_nr_free;
+ struct closure_waitlist open_buckets_wait;
+ struct open_bucket open_buckets[OPEN_BUCKETS_COUNT];
+ open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT];
+
+ open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT];
+ open_bucket_idx_t open_buckets_partial_nr;
+
+ struct write_point btree_write_point;
+ struct write_point rebalance_write_point;
+
+ struct write_point write_points[WRITE_POINT_MAX];
+ struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
+ struct mutex write_points_hash_lock;
+ unsigned write_points_nr;
+
+ struct buckets_waiting_for_journal buckets_waiting_for_journal;
+ struct work_struct discard_work;
+ struct work_struct invalidate_work;
+
+ /* GARBAGE COLLECTION */
+ struct task_struct *gc_thread;
+ atomic_t kick_gc;
+ unsigned long gc_count;
+
+ enum btree_id gc_gens_btree;
+ struct bpos gc_gens_pos;
+
+ /*
+ * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos]
+ * has been marked by GC.
+ *
+ * gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.)
+ *
+ * Protected by gc_pos_lock. Only written to by GC thread, so GC thread
+ * can read without a lock.
+ */
+ seqcount_t gc_pos_lock;
+ struct gc_pos gc_pos;
+
+ /*
+ * The allocation code needs gc_mark in struct bucket to be correct, but
+ * it's not while a gc is in progress.
+ */
+ struct rw_semaphore gc_lock;
+ struct mutex gc_gens_lock;
+
+ /* IO PATH */
+ struct semaphore io_in_flight;
+ struct bio_set bio_read;
+ struct bio_set bio_read_split;
+ struct bio_set bio_write;
+ struct mutex bio_bounce_pages_lock;
+ mempool_t bio_bounce_pages;
+ struct bucket_nocow_lock_table
+ nocow_locks;
+ struct rhashtable promote_table;
+
+ mempool_t compression_bounce[2];
+ mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR];
+ mempool_t decompress_workspace;
+ size_t zstd_workspace_size;
+
+ struct crypto_shash *sha256;
+ struct crypto_sync_skcipher *chacha20;
+ struct crypto_shash *poly1305;
+
+ atomic64_t key_version;
+
+ mempool_t large_bkey_pool;
+
+ /* MOVE.C */
+ struct list_head moving_context_list;
+ struct mutex moving_context_lock;
+
+ /* REBALANCE */
+ struct bch_fs_rebalance rebalance;
+
+ /* COPYGC */
+ struct task_struct *copygc_thread;
+ struct write_point copygc_write_point;
+ s64 copygc_wait_at;
+ s64 copygc_wait;
+ bool copygc_running;
+ wait_queue_head_t copygc_running_wq;
+
+ /* STRIPES: */
+ GENRADIX(struct stripe) stripes;
+ GENRADIX(struct gc_stripe) gc_stripes;
+
+ struct hlist_head ec_stripes_new[32];
+ spinlock_t ec_stripes_new_lock;
+
+ ec_stripes_heap ec_stripes_heap;
+ struct mutex ec_stripes_heap_lock;
+
+ /* ERASURE CODING */
+ struct list_head ec_stripe_head_list;
+ struct mutex ec_stripe_head_lock;
+
+ struct list_head ec_stripe_new_list;
+ struct mutex ec_stripe_new_lock;
+ wait_queue_head_t ec_stripe_new_wait;
+
+ struct work_struct ec_stripe_create_work;
+ u64 ec_stripe_hint;
+
+ struct work_struct ec_stripe_delete_work;
+
+ struct bio_set ec_bioset;
+
+ /* REFLINK */
+ reflink_gc_table reflink_gc_table;
+ size_t reflink_gc_nr;
+
+ /* fs.c */
+ struct list_head vfs_inodes_list;
+ struct mutex vfs_inodes_lock;
+
+ /* VFS IO PATH - fs-io.c */
+ struct bio_set writepage_bioset;
+ struct bio_set dio_write_bioset;
+ struct bio_set dio_read_bioset;
+ struct bio_set nocow_flush_bioset;
+
+ /* QUOTAS */
+ struct bch_memquota_type quotas[QTYP_NR];
+
+ /* RECOVERY */
+ u64 journal_replay_seq_start;
+ u64 journal_replay_seq_end;
+ /*
+ * Two different uses:
+ * "Has this fsck pass?" - i.e. should this type of error be an
+ * emergency read-only
+ * And, in certain situations fsck will rewind to an earlier pass: used
+ * for signaling to the toplevel code which pass we want to run now.
+ */
+ enum bch_recovery_pass curr_recovery_pass;
+ /* bitmap of explicitly enabled recovery passes: */
+ u64 recovery_passes_explicit;
+ /* bitmask of recovery passes that we actually ran */
+ u64 recovery_passes_complete;
+ /* never rewinds version of curr_recovery_pass */
+ enum bch_recovery_pass recovery_pass_done;
+ struct semaphore online_fsck_mutex;
+
+ /* DEBUG JUNK */
+ struct dentry *fs_debug_dir;
+ struct dentry *btree_debug_dir;
+ struct btree_debug btree_debug[BTREE_ID_NR];
+ struct btree *verify_data;
+ struct btree_node *verify_ondisk;
+ struct mutex verify_lock;
+
+ u64 *unused_inode_hints;
+ unsigned inode_shard_bits;
+
+ /*
+ * A btree node on disk could have too many bsets for an iterator to fit
+ * on the stack - have to dynamically allocate them
+ */
+ mempool_t fill_iter;
+
+ mempool_t btree_bounce_pool;
+
+ struct journal journal;
+ GENRADIX(struct journal_replay *) journal_entries;
+ u64 journal_entries_base_seq;
+ struct journal_keys journal_keys;
+ struct list_head journal_iters;
+
+ u64 last_bucket_seq_cleanup;
+
+ u64 counters_on_mount[BCH_COUNTER_NR];
+ u64 __percpu *counters;
+
+ unsigned btree_gc_periodic:1;
+ unsigned copy_gc_enabled:1;
+ bool promote_whole_extents;
+
+ struct bch2_time_stats times[BCH_TIME_STAT_NR];
+
+ struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
+
+ /* ERRORS */
+ struct list_head fsck_error_msgs;
+ struct mutex fsck_error_msgs_lock;
+ bool fsck_alloc_msgs_err;
+
+ bch_sb_errors_cpu fsck_error_counts;
+ struct mutex fsck_error_counts_lock;
+};
+
+extern struct wait_queue_head bch2_read_only_wait;
+
+static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
+{
+#ifdef BCH_WRITE_REF_DEBUG
+ atomic_long_inc(&c->writes[ref]);
+#else
+ percpu_ref_get(&c->writes);
+#endif
+}
+
+static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
+{
+#ifdef BCH_WRITE_REF_DEBUG
+ return !test_bit(BCH_FS_going_ro, &c->flags) &&
+ atomic_long_inc_not_zero(&c->writes[ref]);
+#else
+ return percpu_ref_tryget(&c->writes);
+#endif
+}
+
+static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
+{
+#ifdef BCH_WRITE_REF_DEBUG
+ return !test_bit(BCH_FS_going_ro, &c->flags) &&
+ atomic_long_inc_not_zero(&c->writes[ref]);
+#else
+ return percpu_ref_tryget_live(&c->writes);
+#endif
+}
+
+static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
+{
+#ifdef BCH_WRITE_REF_DEBUG
+ long v = atomic_long_dec_return(&c->writes[ref]);
+
+ BUG_ON(v < 0);
+ if (v)
+ return;
+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
+ if (atomic_long_read(&c->writes[i]))
+ return;
+
+ set_bit(BCH_FS_write_disable_complete, &c->flags);
+ wake_up(&bch2_read_only_wait);
+#else
+ percpu_ref_put(&c->writes);
+#endif
+}
+
+static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
+{
+ if (test_bit(BCH_FS_stopping, &c->flags))
+ return false;
+
+ return refcount_inc_not_zero(&c->ro_ref);
+}
+
+static inline void bch2_ro_ref_put(struct bch_fs *c)
+{
+ if (refcount_dec_and_test(&c->ro_ref))
+ wake_up(&c->ro_ref_wait);
+}
+
+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
+{
+#ifndef NO_BCACHEFS_FS
+ if (c->vfs_sb)
+ c->vfs_sb->s_bdi->ra_pages = ra_pages;
+#endif
+}
+
+static inline unsigned bucket_bytes(const struct bch_dev *ca)
+{
+ return ca->mi.bucket_size << 9;
+}
+
+static inline unsigned block_bytes(const struct bch_fs *c)
+{
+ return c->opts.block_size;
+}
+
+static inline unsigned block_sectors(const struct bch_fs *c)
+{
+ return c->opts.block_size >> 9;
+}
+
+static inline size_t btree_sectors(const struct bch_fs *c)
+{
+ return c->opts.btree_node_size >> 9;
+}
+
+static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
+{
+ return c->btree_key_cache_btrees & (1U << btree);
+}
+
+static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time)
+{
+ struct timespec64 t;
+ s32 rem;
+
+ time += c->sb.time_base_lo;
+
+ t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem);
+ t.tv_nsec = rem * c->sb.nsec_per_time_unit;
+ return t;
+}
+
+static inline s64 timespec_to_bch2_time(const struct bch_fs *c, struct timespec64 ts)
+{
+ return (ts.tv_sec * c->sb.time_units_per_sec +
+ (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo;
+}
+
+static inline s64 bch2_current_time(const struct bch_fs *c)
+{
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
+ return timespec_to_bch2_time(c, now);
+}
+
+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
+{
+ return dev < c->sb.nr_devices && c->devs[dev];
+}
+
+static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
+{
+ struct stdio_redirect *stdio = c->stdio;
+
+ if (c->stdio_filter && c->stdio_filter != current)
+ stdio = NULL;
+ return stdio;
+}
+
+#define BKEY_PADDED_ONSTACK(key, pad) \
+ struct { struct bkey_i key; __u64 key ## _pad[pad]; }
+
+#endif /* _BCACHEFS_H */
diff --git a/c_src/libbcachefs/bcachefs_format.h b/c_src/libbcachefs/bcachefs_format.h
new file mode 100644
index 00000000..0d5ac418
--- /dev/null
+++ b/c_src/libbcachefs/bcachefs_format.h
@@ -0,0 +1,2451 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FORMAT_H
+#define _BCACHEFS_FORMAT_H
+
+/*
+ * bcachefs on disk data structures
+ *
+ * OVERVIEW:
+ *
+ * There are three main types of on disk data structures in bcachefs (this is
+ * reduced from 5 in bcache)
+ *
+ * - superblock
+ * - journal
+ * - btree
+ *
+ * The btree is the primary structure; most metadata exists as keys in the
+ * various btrees. There are only a small number of btrees, they're not
+ * sharded - we have one btree for extents, another for inodes, et cetera.
+ *
+ * SUPERBLOCK:
+ *
+ * The superblock contains the location of the journal, the list of devices in
+ * the filesystem, and in general any metadata we need in order to decide
+ * whether we can start a filesystem or prior to reading the journal/btree
+ * roots.
+ *
+ * The superblock is extensible, and most of the contents of the superblock are
+ * in variable length, type tagged fields; see struct bch_sb_field.
+ *
+ * Backup superblocks do not reside in a fixed location; also, superblocks do
+ * not have a fixed size. To locate backup superblocks we have struct
+ * bch_sb_layout; we store a copy of this inside every superblock, and also
+ * before the first superblock.
+ *
+ * JOURNAL:
+ *
+ * The journal primarily records btree updates in the order they occurred;
+ * journal replay consists of just iterating over all the keys in the open
+ * journal entries and re-inserting them into the btrees.
+ *
+ * The journal also contains entry types for the btree roots, and blacklisted
+ * journal sequence numbers (see journal_seq_blacklist.c).
+ *
+ * BTREE:
+ *
+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
+ * 128k-256k) and log structured. We use struct btree_node for writing the first
+ * entry in a given node (offset 0), and struct btree_node_entry for all
+ * subsequent writes.
+ *
+ * After the header, btree node entries contain a list of keys in sorted order.
+ * Values are stored inline with the keys; since values are variable length (and
+ * keys effectively are variable length too, due to packing) we can't do random
+ * access without building up additional in memory tables in the btree node read
+ * path.
+ *
+ * BTREE KEYS (struct bkey):
+ *
+ * The various btrees share a common format for the key - so as to avoid
+ * switching in fastpath lookup/comparison code - but define their own
+ * structures for the key values.
+ *
+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
+ * size is just under 2k. The common part also contains a type tag for the
+ * value, and a format field indicating whether the key is packed or not (and
+ * also meant to allow adding new key fields in the future, if desired).
+ *
+ * bkeys, when stored within a btree node, may also be packed. In that case, the
+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
+ * be generous with field sizes in the common part of the key format (64 bit
+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
+ */
+
+#include <asm/types.h>
+#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/uuid.h>
+#include "vstructs.h"
+
+#ifdef __KERNEL__
+typedef uuid_t __uuid_t;
+#endif
+
+#define BITMASK(name, type, field, offset, end) \
+static const __maybe_unused unsigned name##_OFFSET = offset; \
+static const __maybe_unused unsigned name##_BITS = (end - offset); \
+ \
+static inline __u64 name(const type *k) \
+{ \
+ return (k->field >> offset) & ~(~0ULL << (end - offset)); \
+} \
+ \
+static inline void SET_##name(type *k, __u64 v) \
+{ \
+ k->field &= ~(~(~0ULL << (end - offset)) << offset); \
+ k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
+}
+
+#define LE_BITMASK(_bits, name, type, field, offset, end) \
+static const __maybe_unused unsigned name##_OFFSET = offset; \
+static const __maybe_unused unsigned name##_BITS = (end - offset); \
+static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\
+ \
+static inline __u64 name(const type *k) \
+{ \
+ return (__le##_bits##_to_cpu(k->field) >> offset) & \
+ ~(~0ULL << (end - offset)); \
+} \
+ \
+static inline void SET_##name(type *k, __u64 v) \
+{ \
+ __u##_bits new = __le##_bits##_to_cpu(k->field); \
+ \
+ new &= ~(~(~0ULL << (end - offset)) << offset); \
+ new |= (v & ~(~0ULL << (end - offset))) << offset; \
+ k->field = __cpu_to_le##_bits(new); \
+}
+
+#define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e)
+#define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e)
+#define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e)
+
+struct bkey_format {
+ __u8 key_u64s;
+ __u8 nr_fields;
+ /* One unused slot for now: */
+ __u8 bits_per_field[6];
+ __le64 field_offset[6];
+};
+
+/* Btree keys - all units are in sectors */
+
+struct bpos {
+ /*
+ * Word order matches machine byte order - btree code treats a bpos as a
+ * single large integer, for search/comparison purposes
+ *
+ * Note that wherever a bpos is embedded in another on disk data
+ * structure, it has to be byte swabbed when reading in metadata that
+ * wasn't written in native endian order:
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ __u32 snapshot;
+ __u64 offset;
+ __u64 inode;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ __u64 inode;
+ __u64 offset; /* Points to end of extent - sectors */
+ __u32 snapshot;
+#else
+#error edit for your odd byteorder.
+#endif
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
+
+#define KEY_INODE_MAX ((__u64)~0ULL)
+#define KEY_OFFSET_MAX ((__u64)~0ULL)
+#define KEY_SNAPSHOT_MAX ((__u32)~0U)
+#define KEY_SIZE_MAX ((__u32)~0U)
+
+static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
+{
+ return (struct bpos) {
+ .inode = inode,
+ .offset = offset,
+ .snapshot = snapshot,
+ };
+}
+
+#define POS_MIN SPOS(0, 0, 0)
+#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, 0)
+#define SPOS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
+#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
+
+/* Empty placeholder struct, for container_of() */
+struct bch_val {
+ __u64 __nothing[0];
+};
+
+struct bversion {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ __u64 lo;
+ __u32 hi;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ __u32 hi;
+ __u64 lo;
+#endif
+} __packed __aligned(4);
+
+struct bkey {
+ /* Size of combined key and value, in u64s */
+ __u8 u64s;
+
+ /* Format of key (0 for format local to btree node) */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 format:7,
+ needs_whiteout:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 needs_whiteout:1,
+ format:7;
+#else
+#error edit for your odd byteorder.
+#endif
+
+ /* Type of the value */
+ __u8 type;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ __u8 pad[1];
+
+ struct bversion version;
+ __u32 size; /* extent size, in sectors */
+ struct bpos p;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ struct bpos p;
+ __u32 size; /* extent size, in sectors */
+ struct bversion version;
+
+ __u8 pad[1];
+#endif
+} __packed __aligned(8);
+
+struct bkey_packed {
+ __u64 _data[0];
+
+ /* Size of combined key and value, in u64s */
+ __u8 u64s;
+
+ /* Format of key (0 for format local to btree node) */
+
+ /*
+ * XXX: next incompat on disk format change, switch format and
+ * needs_whiteout - bkey_packed() will be cheaper if format is the high
+ * bits of the bitfield
+ */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 format:7,
+ needs_whiteout:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 needs_whiteout:1,
+ format:7;
+#endif
+
+ /* Type of the value */
+ __u8 type;
+ __u8 key_start[0];
+
+ /*
+ * We copy bkeys with struct assignment in various places, and while
+ * that shouldn't be done with packed bkeys we can't disallow it in C,
+ * and it's legal to cast a bkey to a bkey_packed - so padding it out
+ * to the same size as struct bkey should hopefully be safest.
+ */
+ __u8 pad[sizeof(struct bkey) - 3];
+} __packed __aligned(8);
+
+typedef struct {
+ __le64 lo;
+ __le64 hi;
+} bch_le128;
+
+#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
+#define BKEY_U64s_MAX U8_MAX
+#define BKEY_VAL_U64s_MAX (BKEY_U64s_MAX - BKEY_U64s)
+
+#define KEY_PACKED_BITS_START 24
+
+#define KEY_FORMAT_LOCAL_BTREE 0
+#define KEY_FORMAT_CURRENT 1
+
+enum bch_bkey_fields {
+ BKEY_FIELD_INODE,
+ BKEY_FIELD_OFFSET,
+ BKEY_FIELD_SNAPSHOT,
+ BKEY_FIELD_SIZE,
+ BKEY_FIELD_VERSION_HI,
+ BKEY_FIELD_VERSION_LO,
+ BKEY_NR_FIELDS,
+};
+
+#define bkey_format_field(name, field) \
+ [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
+
+#define BKEY_FORMAT_CURRENT \
+((struct bkey_format) { \
+ .key_u64s = BKEY_U64s, \
+ .nr_fields = BKEY_NR_FIELDS, \
+ .bits_per_field = { \
+ bkey_format_field(INODE, p.inode), \
+ bkey_format_field(OFFSET, p.offset), \
+ bkey_format_field(SNAPSHOT, p.snapshot), \
+ bkey_format_field(SIZE, size), \
+ bkey_format_field(VERSION_HI, version.hi), \
+ bkey_format_field(VERSION_LO, version.lo), \
+ }, \
+})
+
+/* bkey with inline value */
+struct bkey_i {
+ __u64 _data[0];
+
+ struct bkey k;
+ struct bch_val v;
+};
+
+#define POS_KEY(_pos) \
+((struct bkey) { \
+ .u64s = BKEY_U64s, \
+ .format = KEY_FORMAT_CURRENT, \
+ .p = _pos, \
+})
+
+#define KEY(_inode, _offset, _size) \
+((struct bkey) { \
+ .u64s = BKEY_U64s, \
+ .format = KEY_FORMAT_CURRENT, \
+ .p = POS(_inode, _offset), \
+ .size = _size, \
+})
+
+static inline void bkey_init(struct bkey *k)
+{
+ *k = KEY(0, 0, 0);
+}
+
+#define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64))
+
+#define __BKEY_PADDED(key, pad) \
+ struct bkey_i key; __u64 key ## _pad[pad]
+
+/*
+ * - DELETED keys are used internally to mark keys that should be ignored but
+ * override keys in composition order. Their version number is ignored.
+ *
+ * - DISCARDED keys indicate that the data is all 0s because it has been
+ * discarded. DISCARDs may have a version; if the version is nonzero the key
+ * will be persistent, otherwise the key will be dropped whenever the btree
+ * node is rewritten (like DELETED keys).
+ *
+ * - ERROR: any read of the data returns a read error, as the data was lost due
+ * to a failing device. Like DISCARDED keys, they can be removed (overridden)
+ * by new writes or cluster-wide GC. Node repair can also overwrite them with
+ * the same or a more recent version number, but not with an older version
+ * number.
+ *
+ * - WHITEOUT: for hash table btrees
+ */
+#define BCH_BKEY_TYPES() \
+ x(deleted, 0) \
+ x(whiteout, 1) \
+ x(error, 2) \
+ x(cookie, 3) \
+ x(hash_whiteout, 4) \
+ x(btree_ptr, 5) \
+ x(extent, 6) \
+ x(reservation, 7) \
+ x(inode, 8) \
+ x(inode_generation, 9) \
+ x(dirent, 10) \
+ x(xattr, 11) \
+ x(alloc, 12) \
+ x(quota, 13) \
+ x(stripe, 14) \
+ x(reflink_p, 15) \
+ x(reflink_v, 16) \
+ x(inline_data, 17) \
+ x(btree_ptr_v2, 18) \
+ x(indirect_inline_data, 19) \
+ x(alloc_v2, 20) \
+ x(subvolume, 21) \
+ x(snapshot, 22) \
+ x(inode_v2, 23) \
+ x(alloc_v3, 24) \
+ x(set, 25) \
+ x(lru, 26) \
+ x(alloc_v4, 27) \
+ x(backpointer, 28) \
+ x(inode_v3, 29) \
+ x(bucket_gens, 30) \
+ x(snapshot_tree, 31) \
+ x(logged_op_truncate, 32) \
+ x(logged_op_finsert, 33)
+
+enum bch_bkey_type {
+#define x(name, nr) KEY_TYPE_##name = nr,
+ BCH_BKEY_TYPES()
+#undef x
+ KEY_TYPE_MAX,
+};
+
+struct bch_deleted {
+ struct bch_val v;
+};
+
+struct bch_whiteout {
+ struct bch_val v;
+};
+
+struct bch_error {
+ struct bch_val v;
+};
+
+struct bch_cookie {
+ struct bch_val v;
+ __le64 cookie;
+};
+
+struct bch_hash_whiteout {
+ struct bch_val v;
+};
+
+struct bch_set {
+ struct bch_val v;
+};
+
+/* Extents */
+
+/*
+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
+ * preceded by checksum/compression information (bch_extent_crc32 or
+ * bch_extent_crc64).
+ *
+ * One major determining factor in the format of extents is how we handle and
+ * represent extents that have been partially overwritten and thus trimmed:
+ *
+ * If an extent is not checksummed or compressed, when the extent is trimmed we
+ * don't have to remember the extent we originally allocated and wrote: we can
+ * merely adjust ptr->offset to point to the start of the data that is currently
+ * live. The size field in struct bkey records the current (live) size of the
+ * extent, and is also used to mean "size of region on disk that we point to" in
+ * this case.
+ *
+ * Thus an extent that is not checksummed or compressed will consist only of a
+ * list of bch_extent_ptrs, with none of the fields in
+ * bch_extent_crc32/bch_extent_crc64.
+ *
+ * When an extent is checksummed or compressed, it's not possible to read only
+ * the data that is currently live: we have to read the entire extent that was
+ * originally written, and then return only the part of the extent that is
+ * currently live.
+ *
+ * Thus, in addition to the current size of the extent in struct bkey, we need
+ * to store the size of the originally allocated space - this is the
+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
+ * when the extent is trimmed, instead of modifying the offset field of the
+ * pointer, we keep a second smaller offset field - "offset into the original
+ * extent of the currently live region".
+ *
+ * The other major determining factor is replication and data migration:
+ *
+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
+ * write, we will initially write all the replicas in the same format, with the
+ * same checksum type and compression format - however, when copygc runs later (or
+ * tiering/cache promotion, anything that moves data), it is not in general
+ * going to rewrite all the pointers at once - one of the replicas may be in a
+ * bucket on one device that has very little fragmentation while another lives
+ * in a bucket that has become heavily fragmented, and thus is being rewritten
+ * sooner than the rest.
+ *
+ * Thus it will only move a subset of the pointers (or in the case of
+ * tiering/cache promotion perhaps add a single pointer without dropping any
+ * current pointers), and if the extent has been partially overwritten it must
+ * write only the currently live portion (or copygc would not be able to reduce
+ * fragmentation!) - which necessitates a different bch_extent_crc format for
+ * the new pointer.
+ *
+ * But in the interests of space efficiency, we don't want to store one
+ * bch_extent_crc for each pointer if we don't have to.
+ *
+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
+ * type, not a size), encoding the type in the position of the first set bit:
+ *
+ * bch_extent_crc32 - 0b1
+ * bch_extent_ptr - 0b10
+ * bch_extent_crc64 - 0b100
+ *
+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
+ * bch_extent_crc64 is the least constrained).
+ *
+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
+ * until the next bch_extent_crc32/64.
+ *
+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
+ * is neither checksummed nor compressed.
+ */
+
+/* 128 bits, sufficient for cryptographic MACs: */
+struct bch_csum {
+ __le64 lo;
+ __le64 hi;
+} __packed __aligned(8);
+
+#define BCH_EXTENT_ENTRY_TYPES() \
+ x(ptr, 0) \
+ x(crc32, 1) \
+ x(crc64, 2) \
+ x(crc128, 3) \
+ x(stripe_ptr, 4) \
+ x(rebalance, 5)
+#define BCH_EXTENT_ENTRY_MAX 6
+
+enum bch_extent_entry_type {
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+/* Compressed/uncompressed size are stored biased by 1: */
+struct bch_extent_crc32 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u32 type:2,
+ _compressed_size:7,
+ _uncompressed_size:7,
+ offset:7,
+ _unused:1,
+ csum_type:4,
+ compression_type:4;
+ __u32 csum;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u32 csum;
+ __u32 compression_type:4,
+ csum_type:4,
+ _unused:1,
+ offset:7,
+ _uncompressed_size:7,
+ _compressed_size:7,
+ type:2;
+#endif
+} __packed __aligned(8);
+
+#define CRC32_SIZE_MAX (1U << 7)
+#define CRC32_NONCE_MAX 0
+
+struct bch_extent_crc64 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:3,
+ _compressed_size:9,
+ _uncompressed_size:9,
+ offset:9,
+ nonce:10,
+ csum_type:4,
+ compression_type:4,
+ csum_hi:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 csum_hi:16,
+ compression_type:4,
+ csum_type:4,
+ nonce:10,
+ offset:9,
+ _uncompressed_size:9,
+ _compressed_size:9,
+ type:3;
+#endif
+ __u64 csum_lo;
+} __packed __aligned(8);
+
+#define CRC64_SIZE_MAX (1U << 9)
+#define CRC64_NONCE_MAX ((1U << 10) - 1)
+
+struct bch_extent_crc128 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:4,
+ _compressed_size:13,
+ _uncompressed_size:13,
+ offset:13,
+ nonce:13,
+ csum_type:4,
+ compression_type:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 compression_type:4,
+ csum_type:4,
+ nonce:13,
+ offset:13,
+ _uncompressed_size:13,
+ _compressed_size:13,
+ type:4;
+#endif
+ struct bch_csum csum;
+} __packed __aligned(8);
+
+#define CRC128_SIZE_MAX (1U << 13)
+#define CRC128_NONCE_MAX ((1U << 13) - 1)
+
+/*
+ * @reservation - pointer hasn't been written to, just reserved
+ */
+struct bch_extent_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:1,
+ cached:1,
+ unused:1,
+ unwritten:1,
+ offset:44, /* 8 petabytes */
+ dev:8,
+ gen:8;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 gen:8,
+ dev:8,
+ offset:44,
+ unwritten:1,
+ unused:1,
+ cached:1,
+ type:1;
+#endif
+} __packed __aligned(8);
+
+struct bch_extent_stripe_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:5,
+ block:8,
+ redundancy:4,
+ idx:47;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 idx:47,
+ redundancy:4,
+ block:8,
+ type:5;
+#endif
+};
+
+struct bch_extent_rebalance {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:6,
+ unused:34,
+ compression:8, /* enum bch_compression_opt */
+ target:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 target:16,
+ compression:8,
+ unused:34,
+ type:6;
+#endif
+};
+
+union bch_extent_entry {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
+ unsigned long type;
+#elif __BITS_PER_LONG == 32
+ struct {
+ unsigned long pad;
+ unsigned long type;
+ };
+#else
+#error edit for your odd byteorder.
+#endif
+
+#define x(f, n) struct bch_extent_##f f;
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+struct bch_btree_ptr {
+ struct bch_val v;
+
+ __u64 _data[0];
+ struct bch_extent_ptr start[];
+} __packed __aligned(8);
+
+struct bch_btree_ptr_v2 {
+ struct bch_val v;
+
+ __u64 mem_ptr;
+ __le64 seq;
+ __le16 sectors_written;
+ __le16 flags;
+ struct bpos min_key;
+ __u64 _data[0];
+ struct bch_extent_ptr start[];
+} __packed __aligned(8);
+
+LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
+
+struct bch_extent {
+ struct bch_val v;
+
+ __u64 _data[0];
+ union bch_extent_entry start[];
+} __packed __aligned(8);
+
+struct bch_reservation {
+ struct bch_val v;
+
+ __le32 generation;
+ __u8 nr_replicas;
+ __u8 pad[3];
+} __packed __aligned(8);
+
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+ ((sizeof(struct bch_extent_crc128) + \
+ sizeof(struct bch_extent_ptr)) / sizeof(__u64))
+
+/* Maximum possible size of an entire extent value: */
+#define BKEY_EXTENT_VAL_U64s_MAX \
+ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+/* Btree pointers don't carry around checksums: */
+#define BKEY_BTREE_PTR_VAL_U64s_MAX \
+ ((sizeof(struct bch_btree_ptr_v2) + \
+ sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
+#define BKEY_BTREE_PTR_U64s_MAX \
+ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
+
+/* Inodes */
+
+#define BLOCKDEV_INODE_MAX 4096
+
+#define BCACHEFS_ROOT_INO 4096
+
+struct bch_inode {
+ struct bch_val v;
+
+ __le64 bi_hash_seed;
+ __le32 bi_flags;
+ __le16 bi_mode;
+ __u8 fields[];
+} __packed __aligned(8);
+
+struct bch_inode_v2 {
+ struct bch_val v;
+
+ __le64 bi_journal_seq;
+ __le64 bi_hash_seed;
+ __le64 bi_flags;
+ __le16 bi_mode;
+ __u8 fields[];
+} __packed __aligned(8);
+
+struct bch_inode_v3 {
+ struct bch_val v;
+
+ __le64 bi_journal_seq;
+ __le64 bi_hash_seed;
+ __le64 bi_flags;
+ __le64 bi_sectors;
+ __le64 bi_size;
+ __le64 bi_version;
+ __u8 fields[];
+} __packed __aligned(8);
+
+#define INODEv3_FIELDS_START_INITIAL 6
+#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
+
+struct bch_inode_generation {
+ struct bch_val v;
+
+ __le32 bi_generation;
+ __le32 pad;
+} __packed __aligned(8);
+
+/*
+ * bi_subvol and bi_parent_subvol are only set for subvolume roots:
+ */
+
+#define BCH_INODE_FIELDS_v2() \
+ x(bi_atime, 96) \
+ x(bi_ctime, 96) \
+ x(bi_mtime, 96) \
+ x(bi_otime, 96) \
+ x(bi_size, 64) \
+ x(bi_sectors, 64) \
+ x(bi_uid, 32) \
+ x(bi_gid, 32) \
+ x(bi_nlink, 32) \
+ x(bi_generation, 32) \
+ x(bi_dev, 32) \
+ x(bi_data_checksum, 8) \
+ x(bi_compression, 8) \
+ x(bi_project, 32) \
+ x(bi_background_compression, 8) \
+ x(bi_data_replicas, 8) \
+ x(bi_promote_target, 16) \
+ x(bi_foreground_target, 16) \
+ x(bi_background_target, 16) \
+ x(bi_erasure_code, 16) \
+ x(bi_fields_set, 16) \
+ x(bi_dir, 64) \
+ x(bi_dir_offset, 64) \
+ x(bi_subvol, 32) \
+ x(bi_parent_subvol, 32)
+
+#define BCH_INODE_FIELDS_v3() \
+ x(bi_atime, 96) \
+ x(bi_ctime, 96) \
+ x(bi_mtime, 96) \
+ x(bi_otime, 96) \
+ x(bi_uid, 32) \
+ x(bi_gid, 32) \
+ x(bi_nlink, 32) \
+ x(bi_generation, 32) \
+ x(bi_dev, 32) \
+ x(bi_data_checksum, 8) \
+ x(bi_compression, 8) \
+ x(bi_project, 32) \
+ x(bi_background_compression, 8) \
+ x(bi_data_replicas, 8) \
+ x(bi_promote_target, 16) \
+ x(bi_foreground_target, 16) \
+ x(bi_background_target, 16) \
+ x(bi_erasure_code, 16) \
+ x(bi_fields_set, 16) \
+ x(bi_dir, 64) \
+ x(bi_dir_offset, 64) \
+ x(bi_subvol, 32) \
+ x(bi_parent_subvol, 32) \
+ x(bi_nocow, 8)
+
+/* subset of BCH_INODE_FIELDS */
+#define BCH_INODE_OPTS() \
+ x(data_checksum, 8) \
+ x(compression, 8) \
+ x(project, 32) \
+ x(background_compression, 8) \
+ x(data_replicas, 8) \
+ x(promote_target, 16) \
+ x(foreground_target, 16) \
+ x(background_target, 16) \
+ x(erasure_code, 16) \
+ x(nocow, 8)
+
+enum inode_opt_id {
+#define x(name, ...) \
+ Inode_opt_##name,
+ BCH_INODE_OPTS()
+#undef x
+ Inode_opt_nr,
+};
+
+#define BCH_INODE_FLAGS() \
+ x(sync, 0) \
+ x(immutable, 1) \
+ x(append, 2) \
+ x(nodump, 3) \
+ x(noatime, 4) \
+ x(i_size_dirty, 5) \
+ x(i_sectors_dirty, 6) \
+ x(unlinked, 7) \
+ x(backptr_untrusted, 8)
+
+/* bits 20+ reserved for packed fields below: */
+
+enum bch_inode_flags {
+#define x(t, n) BCH_INODE_##t = 1U << n,
+ BCH_INODE_FLAGS()
+#undef x
+};
+
+enum __bch_inode_flags {
+#define x(t, n) __BCH_INODE_##t = n,
+ BCH_INODE_FLAGS()
+#undef x
+};
+
+LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
+LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
+LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
+
+LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
+LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
+LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_FIELDS_START,
+ struct bch_inode_v3, bi_flags, 31, 36);
+LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
+
+/* Dirents */
+
+/*
+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
+ * doesn't support arbitrary length strings for the key, we instead index by a
+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
+ * field of the key - using linear probing to resolve hash collisions. This also
+ * provides us with the readdir cookie posix requires.
+ *
+ * Linear probing requires us to use whiteouts for deletions, in the event of a
+ * collision:
+ */
+
+struct bch_dirent {
+ struct bch_val v;
+
+ /* Target inode number: */
+ union {
+ __le64 d_inum;
+ struct { /* DT_SUBVOL */
+ __le32 d_child_subvol;
+ __le32 d_parent_subvol;
+ };
+ };
+
+ /*
+ * Copy of mode bits 12-15 from the target inode - so userspace can get
+ * the filetype without having to do a stat()
+ */
+ __u8 d_type;
+
+ __u8 d_name[];
+} __packed __aligned(8);
+
+#define DT_SUBVOL 16
+#define BCH_DT_MAX 17
+
+#define BCH_NAME_MAX 512
+
+/* Xattrs */
+
+#define KEY_TYPE_XATTR_INDEX_USER 0
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
+#define KEY_TYPE_XATTR_INDEX_TRUSTED 3
+#define KEY_TYPE_XATTR_INDEX_SECURITY 4
+
+struct bch_xattr {
+ struct bch_val v;
+ __u8 x_type;
+ __u8 x_name_len;
+ __le16 x_val_len;
+ __u8 x_name[];
+} __packed __aligned(8);
+
+/* Bucket/allocation information: */
+
+struct bch_alloc {
+ struct bch_val v;
+ __u8 fields;
+ __u8 gen;
+ __u8 data[];
+} __packed __aligned(8);
+
+#define BCH_ALLOC_FIELDS_V1() \
+ x(read_time, 16) \
+ x(write_time, 16) \
+ x(data_type, 8) \
+ x(dirty_sectors, 16) \
+ x(cached_sectors, 16) \
+ x(oldest_gen, 8) \
+ x(stripe, 32) \
+ x(stripe_redundancy, 8)
+
+enum {
+#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
+ BCH_ALLOC_FIELDS_V1()
+#undef x
+};
+
+struct bch_alloc_v2 {
+ struct bch_val v;
+ __u8 nr_fields;
+ __u8 gen;
+ __u8 oldest_gen;
+ __u8 data_type;
+ __u8 data[];
+} __packed __aligned(8);
+
+#define BCH_ALLOC_FIELDS_V2() \
+ x(read_time, 64) \
+ x(write_time, 64) \
+ x(dirty_sectors, 32) \
+ x(cached_sectors, 32) \
+ x(stripe, 32) \
+ x(stripe_redundancy, 8)
+
+struct bch_alloc_v3 {
+ struct bch_val v;
+ __le64 journal_seq;
+ __le32 flags;
+ __u8 nr_fields;
+ __u8 gen;
+ __u8 oldest_gen;
+ __u8 data_type;
+ __u8 data[];
+} __packed __aligned(8);
+
+LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
+LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
+
+struct bch_alloc_v4 {
+ struct bch_val v;
+ __u64 journal_seq;
+ __u32 flags;
+ __u8 gen;
+ __u8 oldest_gen;
+ __u8 data_type;
+ __u8 stripe_redundancy;
+ __u32 dirty_sectors;
+ __u32 cached_sectors;
+ __u64 io_time[2];
+ __u32 stripe;
+ __u32 nr_external_backpointers;
+ __u64 fragmentation_lru;
+} __packed __aligned(8);
+
+#define BCH_ALLOC_V4_U64s_V0 6
+#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
+
+BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
+BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
+BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
+BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
+
+#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40
+
+struct bch_backpointer {
+ struct bch_val v;
+ __u8 btree_id;
+ __u8 level;
+ __u8 data_type;
+ __u64 bucket_offset:40;
+ __u32 bucket_len;
+ struct bpos pos;
+} __packed __aligned(8);
+
+#define KEY_TYPE_BUCKET_GENS_BITS 8
+#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS)
+#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1)
+
+struct bch_bucket_gens {
+ struct bch_val v;
+ u8 gens[KEY_TYPE_BUCKET_GENS_NR];
+} __packed __aligned(8);
+
+/* Quotas: */
+
+enum quota_types {
+ QTYP_USR = 0,
+ QTYP_GRP = 1,
+ QTYP_PRJ = 2,
+ QTYP_NR = 3,
+};
+
+enum quota_counters {
+ Q_SPC = 0,
+ Q_INO = 1,
+ Q_COUNTERS = 2,
+};
+
+struct bch_quota_counter {
+ __le64 hardlimit;
+ __le64 softlimit;
+};
+
+struct bch_quota {
+ struct bch_val v;
+ struct bch_quota_counter c[Q_COUNTERS];
+} __packed __aligned(8);
+
+/* Erasure coding */
+
+struct bch_stripe {
+ struct bch_val v;
+ __le16 sectors;
+ __u8 algorithm;
+ __u8 nr_blocks;
+ __u8 nr_redundant;
+
+ __u8 csum_granularity_bits;
+ __u8 csum_type;
+ __u8 pad;
+
+ struct bch_extent_ptr ptrs[];
+} __packed __aligned(8);
+
+/* Reflink: */
+
+struct bch_reflink_p {
+ struct bch_val v;
+ __le64 idx;
+ /*
+ * A reflink pointer might point to an indirect extent which is then
+ * later split (by copygc or rebalance). If we only pointed to part of
+ * the original indirect extent, and then one of the fragments is
+ * outside the range we point to, we'd leak a refcount: so when creating
+ * reflink pointers, we need to store pad values to remember the full
+ * range we were taking a reference on.
+ */
+ __le32 front_pad;
+ __le32 back_pad;
+} __packed __aligned(8);
+
+struct bch_reflink_v {
+ struct bch_val v;
+ __le64 refcount;
+ union bch_extent_entry start[0];
+ __u64 _data[];
+} __packed __aligned(8);
+
+struct bch_indirect_inline_data {
+ struct bch_val v;
+ __le64 refcount;
+ u8 data[];
+};
+
+/* Inline data */
+
+struct bch_inline_data {
+ struct bch_val v;
+ u8 data[];
+};
+
+/* Subvolumes: */
+
+#define SUBVOL_POS_MIN POS(0, 1)
+#define SUBVOL_POS_MAX POS(0, S32_MAX)
+#define BCACHEFS_ROOT_SUBVOL 1
+
+struct bch_subvolume {
+ struct bch_val v;
+ __le32 flags;
+ __le32 snapshot;
+ __le64 inode;
+ /*
+ * Snapshot subvolumes form a tree, separate from the snapshot nodes
+ * tree - if this subvolume is a snapshot, this is the ID of the
+ * subvolume it was created from:
+ */
+ __le32 parent;
+ __le32 pad;
+ bch_le128 otime;
+};
+
+LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1)
+/*
+ * We need to know whether a subvolume is a snapshot so we can know whether we
+ * can delete it (or whether it should just be rm -rf'd)
+ */
+LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2)
+LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3)
+
+/* Snapshots */
+
+struct bch_snapshot {
+ struct bch_val v;
+ __le32 flags;
+ __le32 parent;
+ __le32 children[2];
+ __le32 subvol;
+ /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
+ __le32 tree;
+ __le32 depth;
+ __le32 skip[3];
+};
+
+LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1)
+
+/* True if a subvolume points to this snapshot node: */
+LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2)
+
+/*
+ * Snapshot trees:
+ *
+ * The snapshot_trees btree gives us persistent indentifier for each tree of
+ * bch_snapshot nodes, and allow us to record and easily find the root/master
+ * subvolume that other snapshots were created from:
+ */
+struct bch_snapshot_tree {
+ struct bch_val v;
+ __le32 master_subvol;
+ __le32 root_snapshot;
+};
+
+/* LRU btree: */
+
+struct bch_lru {
+ struct bch_val v;
+ __le64 idx;
+} __packed __aligned(8);
+
+#define LRU_ID_STRIPES (1U << 16)
+
+/* Logged operations btree: */
+
+struct bch_logged_op_truncate {
+ struct bch_val v;
+ __le32 subvol;
+ __le32 pad;
+ __le64 inum;
+ __le64 new_i_size;
+};
+
+enum logged_op_finsert_state {
+ LOGGED_OP_FINSERT_start,
+ LOGGED_OP_FINSERT_shift_extents,
+ LOGGED_OP_FINSERT_finish,
+};
+
+struct bch_logged_op_finsert {
+ struct bch_val v;
+ __u8 state;
+ __u8 pad[3];
+ __le32 subvol;
+ __le64 inum;
+ __le64 dst_offset;
+ __le64 src_offset;
+ __le64 pos;
+};
+
+/* Optional/variable size superblock sections: */
+
+struct bch_sb_field {
+ __u64 _data[0];
+ __le32 u64s;
+ __le32 type;
+};
+
+#define BCH_SB_FIELDS() \
+ x(journal, 0) \
+ x(members_v1, 1) \
+ x(crypt, 2) \
+ x(replicas_v0, 3) \
+ x(quota, 4) \
+ x(disk_groups, 5) \
+ x(clean, 6) \
+ x(replicas, 7) \
+ x(journal_seq_blacklist, 8) \
+ x(journal_v2, 9) \
+ x(counters, 10) \
+ x(members_v2, 11) \
+ x(errors, 12) \
+ x(ext, 13) \
+ x(downgrade, 14)
+
+enum bch_sb_field_type {
+#define x(f, nr) BCH_SB_FIELD_##f = nr,
+ BCH_SB_FIELDS()
+#undef x
+ BCH_SB_FIELD_NR
+};
+
+/*
+ * Most superblock fields are replicated in all device's superblocks - a few are
+ * not:
+ */
+#define BCH_SINGLE_DEVICE_SB_FIELDS \
+ ((1U << BCH_SB_FIELD_journal)| \
+ (1U << BCH_SB_FIELD_journal_v2))
+
+/* BCH_SB_FIELD_journal: */
+
+struct bch_sb_field_journal {
+ struct bch_sb_field field;
+ __le64 buckets[];
+};
+
+struct bch_sb_field_journal_v2 {
+ struct bch_sb_field field;
+
+ struct bch_sb_field_journal_v2_entry {
+ __le64 start;
+ __le64 nr;
+ } d[];
+};
+
+/* BCH_SB_FIELD_members_v1: */
+
+#define BCH_MIN_NR_NBUCKETS (1 << 6)
+
+#define BCH_IOPS_MEASUREMENTS() \
+ x(seqread, 0) \
+ x(seqwrite, 1) \
+ x(randread, 2) \
+ x(randwrite, 3)
+
+enum bch_iops_measurement {
+#define x(t, n) BCH_IOPS_##t = n,
+ BCH_IOPS_MEASUREMENTS()
+#undef x
+ BCH_IOPS_NR
+};
+
+#define BCH_MEMBER_ERROR_TYPES() \
+ x(read, 0) \
+ x(write, 1) \
+ x(checksum, 2)
+
+enum bch_member_error_type {
+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
+ BCH_MEMBER_ERROR_TYPES()
+#undef x
+ BCH_MEMBER_ERROR_NR
+};
+
+struct bch_member {
+ __uuid_t uuid;
+ __le64 nbuckets; /* device size */
+ __le16 first_bucket; /* index of first bucket used */
+ __le16 bucket_size; /* sectors */
+ __le32 pad;
+ __le64 last_mount; /* time_t */
+
+ __le64 flags;
+ __le32 iops[4];
+ __le64 errors[BCH_MEMBER_ERROR_NR];
+ __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
+ __le64 errors_reset_time;
+ __le64 seq;
+};
+
+#define BCH_MEMBER_V1_BYTES 56
+
+LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
+/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
+LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)
+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20)
+LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
+LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
+LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
+ struct bch_member, flags, 30, 31)
+
+#if 0
+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
+#endif
+
+#define BCH_MEMBER_STATES() \
+ x(rw, 0) \
+ x(ro, 1) \
+ x(failed, 2) \
+ x(spare, 3)
+
+enum bch_member_state {
+#define x(t, n) BCH_MEMBER_STATE_##t = n,
+ BCH_MEMBER_STATES()
+#undef x
+ BCH_MEMBER_STATE_NR
+};
+
+struct bch_sb_field_members_v1 {
+ struct bch_sb_field field;
+ struct bch_member _members[]; //Members are now variable size
+};
+
+struct bch_sb_field_members_v2 {
+ struct bch_sb_field field;
+ __le16 member_bytes; //size of single member entry
+ u8 pad[6];
+ struct bch_member _members[];
+};
+
+/* BCH_SB_FIELD_crypt: */
+
+struct nonce {
+ __le32 d[4];
+};
+
+struct bch_key {
+ __le64 key[4];
+};
+
+#define BCH_KEY_MAGIC \
+ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \
+ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \
+ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \
+ ((__u64) 'e' << 48)|((__u64) 'y' << 56))
+
+struct bch_encrypted_key {
+ __le64 magic;
+ struct bch_key key;
+};
+
+/*
+ * If this field is present in the superblock, it stores an encryption key which
+ * is used encrypt all other data/metadata. The key will normally be encrypted
+ * with the key userspace provides, but if encryption has been turned off we'll
+ * just store the master key unencrypted in the superblock so we can access the
+ * previously encrypted data.
+ */
+struct bch_sb_field_crypt {
+ struct bch_sb_field field;
+
+ __le64 flags;
+ __le64 kdf_flags;
+ struct bch_encrypted_key key;
+};
+
+LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4);
+
+enum bch_kdf_types {
+ BCH_KDF_SCRYPT = 0,
+ BCH_KDF_NR = 1,
+};
+
+/* stored as base 2 log of scrypt params: */
+LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
+LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
+LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
+
+/* BCH_SB_FIELD_replicas: */
+
+#define BCH_DATA_TYPES() \
+ x(free, 0) \
+ x(sb, 1) \
+ x(journal, 2) \
+ x(btree, 3) \
+ x(user, 4) \
+ x(cached, 5) \
+ x(parity, 6) \
+ x(stripe, 7) \
+ x(need_gc_gens, 8) \
+ x(need_discard, 9)
+
+enum bch_data_type {
+#define x(t, n) BCH_DATA_##t,
+ BCH_DATA_TYPES()
+#undef x
+ BCH_DATA_NR
+};
+
+static inline bool data_type_is_empty(enum bch_data_type type)
+{
+ switch (type) {
+ case BCH_DATA_free:
+ case BCH_DATA_need_gc_gens:
+ case BCH_DATA_need_discard:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool data_type_is_hidden(enum bch_data_type type)
+{
+ switch (type) {
+ case BCH_DATA_sb:
+ case BCH_DATA_journal:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct bch_replicas_entry_v0 {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 devs[];
+} __packed;
+
+struct bch_sb_field_replicas_v0 {
+ struct bch_sb_field field;
+ struct bch_replicas_entry_v0 entries[];
+} __packed __aligned(8);
+
+struct bch_replicas_entry_v1 {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 nr_required;
+ __u8 devs[];
+} __packed;
+
+#define replicas_entry_bytes(_i) \
+ (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
+
+struct bch_sb_field_replicas {
+ struct bch_sb_field field;
+ struct bch_replicas_entry_v1 entries[];
+} __packed __aligned(8);
+
+/* BCH_SB_FIELD_quota: */
+
+struct bch_sb_quota_counter {
+ __le32 timelimit;
+ __le32 warnlimit;
+};
+
+struct bch_sb_quota_type {
+ __le64 flags;
+ struct bch_sb_quota_counter c[Q_COUNTERS];
+};
+
+struct bch_sb_field_quota {
+ struct bch_sb_field field;
+ struct bch_sb_quota_type q[QTYP_NR];
+} __packed __aligned(8);
+
+/* BCH_SB_FIELD_disk_groups: */
+
+#define BCH_SB_LABEL_SIZE 32
+
+struct bch_disk_group {
+ __u8 label[BCH_SB_LABEL_SIZE];
+ __le64 flags[2];
+} __packed __aligned(8);
+
+LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
+LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
+
+struct bch_sb_field_disk_groups {
+ struct bch_sb_field field;
+ struct bch_disk_group entries[];
+} __packed __aligned(8);
+
+/* BCH_SB_FIELD_counters */
+
+#define BCH_PERSISTENT_COUNTERS() \
+ x(io_read, 0) \
+ x(io_write, 1) \
+ x(io_move, 2) \
+ x(bucket_invalidate, 3) \
+ x(bucket_discard, 4) \
+ x(bucket_alloc, 5) \
+ x(bucket_alloc_fail, 6) \
+ x(btree_cache_scan, 7) \
+ x(btree_cache_reap, 8) \
+ x(btree_cache_cannibalize, 9) \
+ x(btree_cache_cannibalize_lock, 10) \
+ x(btree_cache_cannibalize_lock_fail, 11) \
+ x(btree_cache_cannibalize_unlock, 12) \
+ x(btree_node_write, 13) \
+ x(btree_node_read, 14) \
+ x(btree_node_compact, 15) \
+ x(btree_node_merge, 16) \
+ x(btree_node_split, 17) \
+ x(btree_node_rewrite, 18) \
+ x(btree_node_alloc, 19) \
+ x(btree_node_free, 20) \
+ x(btree_node_set_root, 21) \
+ x(btree_path_relock_fail, 22) \
+ x(btree_path_upgrade_fail, 23) \
+ x(btree_reserve_get_fail, 24) \
+ x(journal_entry_full, 25) \
+ x(journal_full, 26) \
+ x(journal_reclaim_finish, 27) \
+ x(journal_reclaim_start, 28) \
+ x(journal_write, 29) \
+ x(read_promote, 30) \
+ x(read_bounce, 31) \
+ x(read_split, 33) \
+ x(read_retry, 32) \
+ x(read_reuse_race, 34) \
+ x(move_extent_read, 35) \
+ x(move_extent_write, 36) \
+ x(move_extent_finish, 37) \
+ x(move_extent_fail, 38) \
+ x(move_extent_start_fail, 39) \
+ x(copygc, 40) \
+ x(copygc_wait, 41) \
+ x(gc_gens_end, 42) \
+ x(gc_gens_start, 43) \
+ x(trans_blocked_journal_reclaim, 44) \
+ x(trans_restart_btree_node_reused, 45) \
+ x(trans_restart_btree_node_split, 46) \
+ x(trans_restart_fault_inject, 47) \
+ x(trans_restart_iter_upgrade, 48) \
+ x(trans_restart_journal_preres_get, 49) \
+ x(trans_restart_journal_reclaim, 50) \
+ x(trans_restart_journal_res_get, 51) \
+ x(trans_restart_key_cache_key_realloced, 52) \
+ x(trans_restart_key_cache_raced, 53) \
+ x(trans_restart_mark_replicas, 54) \
+ x(trans_restart_mem_realloced, 55) \
+ x(trans_restart_memory_allocation_failure, 56) \
+ x(trans_restart_relock, 57) \
+ x(trans_restart_relock_after_fill, 58) \
+ x(trans_restart_relock_key_cache_fill, 59) \
+ x(trans_restart_relock_next_node, 60) \
+ x(trans_restart_relock_parent_for_fill, 61) \
+ x(trans_restart_relock_path, 62) \
+ x(trans_restart_relock_path_intent, 63) \
+ x(trans_restart_too_many_iters, 64) \
+ x(trans_restart_traverse, 65) \
+ x(trans_restart_upgrade, 66) \
+ x(trans_restart_would_deadlock, 67) \
+ x(trans_restart_would_deadlock_write, 68) \
+ x(trans_restart_injected, 69) \
+ x(trans_restart_key_cache_upgrade, 70) \
+ x(trans_traverse_all, 71) \
+ x(transaction_commit, 72) \
+ x(write_super, 73) \
+ x(trans_restart_would_deadlock_recursion_limit, 74) \
+ x(trans_restart_write_buffer_flush, 75) \
+ x(trans_restart_split_race, 76) \
+ x(write_buffer_flush_slowpath, 77) \
+ x(write_buffer_flush_sync, 78)
+
+enum bch_persistent_counters {
+#define x(t, n, ...) BCH_COUNTER_##t,
+ BCH_PERSISTENT_COUNTERS()
+#undef x
+ BCH_COUNTER_NR
+};
+
+struct bch_sb_field_counters {
+ struct bch_sb_field field;
+ __le64 d[];
+};
+
+/*
+ * On clean shutdown, store btree roots and current journal sequence number in
+ * the superblock:
+ */
+struct jset_entry {
+ __le16 u64s;
+ __u8 btree_id;
+ __u8 level;
+ __u8 type; /* designates what this jset holds */
+ __u8 pad[3];
+
+ struct bkey_i start[0];
+ __u64 _data[];
+};
+
+struct bch_sb_field_clean {
+ struct bch_sb_field field;
+
+ __le32 flags;
+ __le16 _read_clock; /* no longer used */
+ __le16 _write_clock;
+ __le64 journal_seq;
+
+ struct jset_entry start[0];
+ __u64 _data[];
+};
+
+struct journal_seq_blacklist_entry {
+ __le64 start;
+ __le64 end;
+};
+
+struct bch_sb_field_journal_seq_blacklist {
+ struct bch_sb_field field;
+ struct journal_seq_blacklist_entry start[];
+};
+
+struct bch_sb_field_errors {
+ struct bch_sb_field field;
+ struct bch_sb_field_error_entry {
+ __le64 v;
+ __le64 last_error_time;
+ } entries[];
+};
+
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
+
+struct bch_sb_field_ext {
+ struct bch_sb_field field;
+ __le64 recovery_passes_required[2];
+ __le64 errors_silent[8];
+};
+
+struct bch_sb_field_downgrade_entry {
+ __le16 version;
+ __le64 recovery_passes[2];
+ __le16 nr_errors;
+ __le16 errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+ struct bch_sb_field field;
+ struct bch_sb_field_downgrade_entry entries[];
+};
+
+/* Superblock: */
+
+/*
+ * New versioning scheme:
+ * One common version number for all on disk data structures - superblock, btree
+ * nodes, journal entries
+ */
+#define BCH_VERSION_MAJOR(_v) ((__u16) ((_v) >> 10))
+#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
+#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
+
+/*
+ * field 1: version name
+ * field 2: BCH_VERSION(major, minor)
+ * field 3: recovery passess required on upgrade
+ */
+#define BCH_METADATA_VERSIONS() \
+ x(bkey_renumber, BCH_VERSION(0, 10)) \
+ x(inode_btree_change, BCH_VERSION(0, 11)) \
+ x(snapshot, BCH_VERSION(0, 12)) \
+ x(inode_backpointers, BCH_VERSION(0, 13)) \
+ x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \
+ x(snapshot_2, BCH_VERSION(0, 15)) \
+ x(reflink_p_fix, BCH_VERSION(0, 16)) \
+ x(subvol_dirent, BCH_VERSION(0, 17)) \
+ x(inode_v2, BCH_VERSION(0, 18)) \
+ x(freespace, BCH_VERSION(0, 19)) \
+ x(alloc_v4, BCH_VERSION(0, 20)) \
+ x(new_data_types, BCH_VERSION(0, 21)) \
+ x(backpointers, BCH_VERSION(0, 22)) \
+ x(inode_v3, BCH_VERSION(0, 23)) \
+ x(unwritten_extents, BCH_VERSION(0, 24)) \
+ x(bucket_gens, BCH_VERSION(0, 25)) \
+ x(lru_v2, BCH_VERSION(0, 26)) \
+ x(fragmentation_lru, BCH_VERSION(0, 27)) \
+ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \
+ x(snapshot_trees, BCH_VERSION(0, 29)) \
+ x(major_minor, BCH_VERSION(1, 0)) \
+ x(snapshot_skiplists, BCH_VERSION(1, 1)) \
+ x(deleted_inodes, BCH_VERSION(1, 2)) \
+ x(rebalance_work, BCH_VERSION(1, 3)) \
+ x(member_seq, BCH_VERSION(1, 4))
+
+enum bcachefs_metadata_version {
+ bcachefs_metadata_version_min = 9,
+#define x(t, n) bcachefs_metadata_version_##t = n,
+ BCH_METADATA_VERSIONS()
+#undef x
+ bcachefs_metadata_version_max
+};
+
+static const __maybe_unused
+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
+
+#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
+
+#define BCH_SB_SECTOR 8
+#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
+
+struct bch_sb_layout {
+ __uuid_t magic; /* bcachefs superblock UUID */
+ __u8 layout_type;
+ __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
+ __u8 nr_superblocks;
+ __u8 pad[5];
+ __le64 sb_offset[61];
+} __packed __aligned(8);
+
+#define BCH_SB_LAYOUT_SECTOR 7
+
+/*
+ * @offset - sector where this sb was written
+ * @version - on disk format version
+ * @version_min - Oldest metadata version this filesystem contains; so we can
+ * safely drop compatibility code and refuse to mount filesystems
+ * we'd need it for
+ * @magic - identifies as a bcachefs superblock (BCHFS_MAGIC)
+ * @seq - incremented each time superblock is written
+ * @uuid - used for generating various magic numbers and identifying
+ * member devices, never changes
+ * @user_uuid - user visible UUID, may be changed
+ * @label - filesystem label
+ * @seq - identifies most recent superblock, incremented each time
+ * superblock is written
+ * @features - enabled incompatible features
+ */
+struct bch_sb {
+ struct bch_csum csum;
+ __le16 version;
+ __le16 version_min;
+ __le16 pad[2];
+ __uuid_t magic;
+ __uuid_t uuid;
+ __uuid_t user_uuid;
+ __u8 label[BCH_SB_LABEL_SIZE];
+ __le64 offset;
+ __le64 seq;
+
+ __le16 block_size;
+ __u8 dev_idx;
+ __u8 nr_devices;
+ __le32 u64s;
+
+ __le64 time_base_lo;
+ __le32 time_base_hi;
+ __le32 time_precision;
+
+ __le64 flags[7];
+ __le64 write_time;
+ __le64 features[2];
+ __le64 compat[2];
+
+ struct bch_sb_layout layout;
+
+ struct bch_sb_field start[0];
+ __le64 _data[];
+} __packed __aligned(8);
+
+/*
+ * Flags:
+ * BCH_SB_INITALIZED - set on first mount
+ * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect
+ * behaviour of mount/recovery path:
+ * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits
+ * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80
+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
+ * DATA/META_CSUM_TYPE. Also indicates encryption
+ * algorithm in use, if/when we get more than one
+ */
+
+LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16);
+
+LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1);
+LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2);
+LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8);
+LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12);
+
+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28);
+
+LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33);
+LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40);
+
+LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44);
+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48);
+
+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
+
+LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57);
+LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58);
+LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59);
+LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
+
+LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
+LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
+
+LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
+
+LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8);
+LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
+
+LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
+
+/*
+ * Max size of an extent that may require bouncing to read or write
+ * (checksummed, compressed): 64k
+ */
+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
+ struct bch_sb, flags[1], 14, 20);
+
+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
+
+LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
+
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO,
+ struct bch_sb, flags[2], 0, 4);
+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
+
+LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
+LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28);
+LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
+LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
+LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
+LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
+LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
+LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
+LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
+LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
+LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56);
+
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60);
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
+ struct bch_sb, flags[4], 60, 64);
+
+LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
+ struct bch_sb, flags[5], 0, 16);
+
+static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
+{
+ return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4);
+}
+
+static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
+{
+ SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v);
+ SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4);
+}
+
+static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb)
+{
+ return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) |
+ (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4);
+}
+
+static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
+{
+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v);
+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4);
+}
+
+/*
+ * Features:
+ *
+ * journal_seq_blacklist_v3: gates BCH_SB_FIELD_journal_seq_blacklist
+ * reflink: gates KEY_TYPE_reflink
+ * inline_data: gates KEY_TYPE_inline_data
+ * new_siphash: gates BCH_STR_HASH_siphash
+ * new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE
+ */
+#define BCH_SB_FEATURES() \
+ x(lz4, 0) \
+ x(gzip, 1) \
+ x(zstd, 2) \
+ x(atomic_nlink, 3) \
+ x(ec, 4) \
+ x(journal_seq_blacklist_v3, 5) \
+ x(reflink, 6) \
+ x(new_siphash, 7) \
+ x(inline_data, 8) \
+ x(new_extent_overwrite, 9) \
+ x(incompressible, 10) \
+ x(btree_ptr_v2, 11) \
+ x(extents_above_btree_updates, 12) \
+ x(btree_updates_journalled, 13) \
+ x(reflink_inline_data, 14) \
+ x(new_varint, 15) \
+ x(journal_no_flush, 16) \
+ x(alloc_v2, 17) \
+ x(extents_across_btree_nodes, 18)
+
+#define BCH_SB_FEATURES_ALWAYS \
+ ((1ULL << BCH_FEATURE_new_extent_overwrite)| \
+ (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
+ (1ULL << BCH_FEATURE_btree_updates_journalled)|\
+ (1ULL << BCH_FEATURE_alloc_v2)|\
+ (1ULL << BCH_FEATURE_extents_across_btree_nodes))
+
+#define BCH_SB_FEATURES_ALL \
+ (BCH_SB_FEATURES_ALWAYS| \
+ (1ULL << BCH_FEATURE_new_siphash)| \
+ (1ULL << BCH_FEATURE_btree_ptr_v2)| \
+ (1ULL << BCH_FEATURE_new_varint)| \
+ (1ULL << BCH_FEATURE_journal_no_flush))
+
+enum bch_sb_feature {
+#define x(f, n) BCH_FEATURE_##f,
+ BCH_SB_FEATURES()
+#undef x
+ BCH_FEATURE_NR,
+};
+
+#define BCH_SB_COMPAT() \
+ x(alloc_info, 0) \
+ x(alloc_metadata, 1) \
+ x(extents_above_btree_updates_done, 2) \
+ x(bformat_overflow_done, 3)
+
+enum bch_sb_compat {
+#define x(f, n) BCH_COMPAT_##f,
+ BCH_SB_COMPAT()
+#undef x
+ BCH_COMPAT_NR,
+};
+
+/* options: */
+
+#define BCH_VERSION_UPGRADE_OPTS() \
+ x(compatible, 0) \
+ x(incompatible, 1) \
+ x(none, 2)
+
+enum bch_version_upgrade_opts {
+#define x(t, n) BCH_VERSION_UPGRADE_##t = n,
+ BCH_VERSION_UPGRADE_OPTS()
+#undef x
+};
+
+#define BCH_REPLICAS_MAX 4U
+
+#define BCH_BKEY_PTRS_MAX 16U
+
+#define BCH_ERROR_ACTIONS() \
+ x(continue, 0) \
+ x(ro, 1) \
+ x(panic, 2)
+
+enum bch_error_actions {
+#define x(t, n) BCH_ON_ERROR_##t = n,
+ BCH_ERROR_ACTIONS()
+#undef x
+ BCH_ON_ERROR_NR
+};
+
+#define BCH_STR_HASH_TYPES() \
+ x(crc32c, 0) \
+ x(crc64, 1) \
+ x(siphash_old, 2) \
+ x(siphash, 3)
+
+enum bch_str_hash_type {
+#define x(t, n) BCH_STR_HASH_##t = n,
+ BCH_STR_HASH_TYPES()
+#undef x
+ BCH_STR_HASH_NR
+};
+
+#define BCH_STR_HASH_OPTS() \
+ x(crc32c, 0) \
+ x(crc64, 1) \
+ x(siphash, 2)
+
+enum bch_str_hash_opts {
+#define x(t, n) BCH_STR_HASH_OPT_##t = n,
+ BCH_STR_HASH_OPTS()
+#undef x
+ BCH_STR_HASH_OPT_NR
+};
+
+#define BCH_CSUM_TYPES() \
+ x(none, 0) \
+ x(crc32c_nonzero, 1) \
+ x(crc64_nonzero, 2) \
+ x(chacha20_poly1305_80, 3) \
+ x(chacha20_poly1305_128, 4) \
+ x(crc32c, 5) \
+ x(crc64, 6) \
+ x(xxhash, 7)
+
+enum bch_csum_type {
+#define x(t, n) BCH_CSUM_##t = n,
+ BCH_CSUM_TYPES()
+#undef x
+ BCH_CSUM_NR
+};
+
+static const __maybe_unused unsigned bch_crc_bytes[] = {
+ [BCH_CSUM_none] = 0,
+ [BCH_CSUM_crc32c_nonzero] = 4,
+ [BCH_CSUM_crc32c] = 4,
+ [BCH_CSUM_crc64_nonzero] = 8,
+ [BCH_CSUM_crc64] = 8,
+ [BCH_CSUM_xxhash] = 8,
+ [BCH_CSUM_chacha20_poly1305_80] = 10,
+ [BCH_CSUM_chacha20_poly1305_128] = 16,
+};
+
+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
+{
+ switch (type) {
+ case BCH_CSUM_chacha20_poly1305_80:
+ case BCH_CSUM_chacha20_poly1305_128:
+ return true;
+ default:
+ return false;
+ }
+}
+
+#define BCH_CSUM_OPTS() \
+ x(none, 0) \
+ x(crc32c, 1) \
+ x(crc64, 2) \
+ x(xxhash, 3)
+
+enum bch_csum_opts {
+#define x(t, n) BCH_CSUM_OPT_##t = n,
+ BCH_CSUM_OPTS()
+#undef x
+ BCH_CSUM_OPT_NR
+};
+
+#define BCH_COMPRESSION_TYPES() \
+ x(none, 0) \
+ x(lz4_old, 1) \
+ x(gzip, 2) \
+ x(lz4, 3) \
+ x(zstd, 4) \
+ x(incompressible, 5)
+
+enum bch_compression_type {
+#define x(t, n) BCH_COMPRESSION_TYPE_##t = n,
+ BCH_COMPRESSION_TYPES()
+#undef x
+ BCH_COMPRESSION_TYPE_NR
+};
+
+#define BCH_COMPRESSION_OPTS() \
+ x(none, 0) \
+ x(lz4, 1) \
+ x(gzip, 2) \
+ x(zstd, 3)
+
+enum bch_compression_opts {
+#define x(t, n) BCH_COMPRESSION_OPT_##t = n,
+ BCH_COMPRESSION_OPTS()
+#undef x
+ BCH_COMPRESSION_OPT_NR
+};
+
+/*
+ * Magic numbers
+ *
+ * The various other data structures have their own magic numbers, which are
+ * xored with the first part of the cache set's UUID
+ */
+
+#define BCACHE_MAGIC \
+ UUID_INIT(0xc68573f6, 0x4e1a, 0x45ca, \
+ 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
+#define BCHFS_MAGIC \
+ UUID_INIT(0xc68573f6, 0x66ce, 0x90a9, \
+ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
+
+#define BCACHEFS_STATFS_MAGIC 0xca451a4e
+
+#define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL)
+#define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL)
+
+static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
+{
+ __le64 ret;
+
+ memcpy(&ret, &sb->uuid, sizeof(ret));
+ return ret;
+}
+
+static inline __u64 __jset_magic(struct bch_sb *sb)
+{
+ return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
+}
+
+static inline __u64 __bset_magic(struct bch_sb *sb)
+{
+ return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
+}
+
+/* Journal */
+
+#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
+
+#define BCH_JSET_ENTRY_TYPES() \
+ x(btree_keys, 0) \
+ x(btree_root, 1) \
+ x(prio_ptrs, 2) \
+ x(blacklist, 3) \
+ x(blacklist_v2, 4) \
+ x(usage, 5) \
+ x(data_usage, 6) \
+ x(clock, 7) \
+ x(dev_usage, 8) \
+ x(log, 9) \
+ x(overwrite, 10) \
+ x(write_buffer_keys, 11)
+
+enum {
+#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
+ BCH_JSET_ENTRY_TYPES()
+#undef x
+ BCH_JSET_ENTRY_NR
+};
+
+static inline bool jset_entry_is_key(struct jset_entry *e)
+{
+ switch (e->type) {
+ case BCH_JSET_ENTRY_btree_keys:
+ case BCH_JSET_ENTRY_btree_root:
+ case BCH_JSET_ENTRY_overwrite:
+ case BCH_JSET_ENTRY_write_buffer_keys:
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
+ * number of all the journal entries they contain updates for, so that on
+ * recovery we can ignore those bsets that contain index updates newer that what
+ * made it into the journal.
+ *
+ * This means that we can't reuse that journal_seq - we have to skip it, and
+ * then record that we skipped it so that the next time we crash and recover we
+ * don't think there was a missing journal entry.
+ */
+struct jset_entry_blacklist {
+ struct jset_entry entry;
+ __le64 seq;
+};
+
+struct jset_entry_blacklist_v2 {
+ struct jset_entry entry;
+ __le64 start;
+ __le64 end;
+};
+
+#define BCH_FS_USAGE_TYPES() \
+ x(reserved, 0) \
+ x(inodes, 1) \
+ x(key_version, 2)
+
+enum {
+#define x(f, nr) BCH_FS_USAGE_##f = nr,
+ BCH_FS_USAGE_TYPES()
+#undef x
+ BCH_FS_USAGE_NR
+};
+
+struct jset_entry_usage {
+ struct jset_entry entry;
+ __le64 v;
+} __packed;
+
+struct jset_entry_data_usage {
+ struct jset_entry entry;
+ __le64 v;
+ struct bch_replicas_entry_v1 r;
+} __packed;
+
+struct jset_entry_clock {
+ struct jset_entry entry;
+ __u8 rw;
+ __u8 pad[7];
+ __le64 time;
+} __packed;
+
+struct jset_entry_dev_usage_type {
+ __le64 buckets;
+ __le64 sectors;
+ __le64 fragmented;
+} __packed;
+
+struct jset_entry_dev_usage {
+ struct jset_entry entry;
+ __le32 dev;
+ __u32 pad;
+
+ __le64 _buckets_ec; /* No longer used */
+ __le64 _buckets_unavailable; /* No longer used */
+
+ struct jset_entry_dev_usage_type d[];
+};
+
+static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage *u)
+{
+ return (vstruct_bytes(&u->entry) - sizeof(struct jset_entry_dev_usage)) /
+ sizeof(struct jset_entry_dev_usage_type);
+}
+
+struct jset_entry_log {
+ struct jset_entry entry;
+ u8 d[];
+} __packed __aligned(8);
+
+/*
+ * On disk format for a journal entry:
+ * seq is monotonically increasing; every journal entry has its own unique
+ * sequence number.
+ *
+ * last_seq is the oldest journal entry that still has keys the btree hasn't
+ * flushed to disk yet.
+ *
+ * version is for on disk format changes.
+ */
+struct jset {
+ struct bch_csum csum;
+
+ __le64 magic;
+ __le64 seq;
+ __le32 version;
+ __le32 flags;
+
+ __le32 u64s; /* size of d[] in u64s */
+
+ __u8 encrypted_start[0];
+
+ __le16 _read_clock; /* no longer used */
+ __le16 _write_clock;
+
+ /* Sequence number of oldest dirty journal entry */
+ __le64 last_seq;
+
+
+ struct jset_entry start[0];
+ __u64 _data[];
+} __packed __aligned(8);
+
+LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
+LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
+LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6);
+
+#define BCH_JOURNAL_BUCKETS_MIN 8
+
+/* Btree: */
+
+enum btree_id_flags {
+ BTREE_ID_EXTENTS = BIT(0),
+ BTREE_ID_SNAPSHOTS = BIT(1),
+ BTREE_ID_SNAPSHOT_FIELD = BIT(2),
+ BTREE_ID_DATA = BIT(3),
+};
+
+#define BCH_BTREE_IDS() \
+ x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
+ BIT_ULL(KEY_TYPE_whiteout)| \
+ BIT_ULL(KEY_TYPE_error)| \
+ BIT_ULL(KEY_TYPE_cookie)| \
+ BIT_ULL(KEY_TYPE_extent)| \
+ BIT_ULL(KEY_TYPE_reservation)| \
+ BIT_ULL(KEY_TYPE_reflink_p)| \
+ BIT_ULL(KEY_TYPE_inline_data)) \
+ x(inodes, 1, BTREE_ID_SNAPSHOTS, \
+ BIT_ULL(KEY_TYPE_whiteout)| \
+ BIT_ULL(KEY_TYPE_inode)| \
+ BIT_ULL(KEY_TYPE_inode_v2)| \
+ BIT_ULL(KEY_TYPE_inode_v3)| \
+ BIT_ULL(KEY_TYPE_inode_generation)) \
+ x(dirents, 2, BTREE_ID_SNAPSHOTS, \
+ BIT_ULL(KEY_TYPE_whiteout)| \
+ BIT_ULL(KEY_TYPE_hash_whiteout)| \
+ BIT_ULL(KEY_TYPE_dirent)) \
+ x(xattrs, 3, BTREE_ID_SNAPSHOTS, \
+ BIT_ULL(KEY_TYPE_whiteout)| \
+ BIT_ULL(KEY_TYPE_cookie)| \
+ BIT_ULL(KEY_TYPE_hash_whiteout)| \
+ BIT_ULL(KEY_TYPE_xattr)) \
+ x(alloc, 4, 0, \
+ BIT_ULL(KEY_TYPE_alloc)| \
+ BIT_ULL(KEY_TYPE_alloc_v2)| \
+ BIT_ULL(KEY_TYPE_alloc_v3)| \
+ BIT_ULL(KEY_TYPE_alloc_v4)) \
+ x(quotas, 5, 0, \
+ BIT_ULL(KEY_TYPE_quota)) \
+ x(stripes, 6, 0, \
+ BIT_ULL(KEY_TYPE_stripe)) \
+ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
+ BIT_ULL(KEY_TYPE_reflink_v)| \
+ BIT_ULL(KEY_TYPE_indirect_inline_data)) \
+ x(subvolumes, 8, 0, \
+ BIT_ULL(KEY_TYPE_subvolume)) \
+ x(snapshots, 9, 0, \
+ BIT_ULL(KEY_TYPE_snapshot)) \
+ x(lru, 10, 0, \
+ BIT_ULL(KEY_TYPE_set)) \
+ x(freespace, 11, BTREE_ID_EXTENTS, \
+ BIT_ULL(KEY_TYPE_set)) \
+ x(need_discard, 12, 0, \
+ BIT_ULL(KEY_TYPE_set)) \
+ x(backpointers, 13, 0, \
+ BIT_ULL(KEY_TYPE_backpointer)) \
+ x(bucket_gens, 14, 0, \
+ BIT_ULL(KEY_TYPE_bucket_gens)) \
+ x(snapshot_trees, 15, 0, \
+ BIT_ULL(KEY_TYPE_snapshot_tree)) \
+ x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \
+ BIT_ULL(KEY_TYPE_set)) \
+ x(logged_ops, 17, 0, \
+ BIT_ULL(KEY_TYPE_logged_op_truncate)| \
+ BIT_ULL(KEY_TYPE_logged_op_finsert)) \
+ x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
+ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
+
+enum btree_id {
+#define x(name, nr, ...) BTREE_ID_##name = nr,
+ BCH_BTREE_IDS()
+#undef x
+ BTREE_ID_NR
+};
+
+#define BTREE_MAX_DEPTH 4U
+
+/* Btree nodes */
+
+/*
+ * Btree nodes
+ *
+ * On disk a btree node is a list/log of these; within each set the keys are
+ * sorted
+ */
+struct bset {
+ __le64 seq;
+
+ /*
+ * Highest journal entry this bset contains keys for.
+ * If on recovery we don't see that journal entry, this bset is ignored:
+ * this allows us to preserve the order of all index updates after a
+ * crash, since the journal records a total order of all index updates
+ * and anything that didn't make it to the journal doesn't get used.
+ */
+ __le64 journal_seq;
+
+ __le32 flags;
+ __le16 version;
+ __le16 u64s; /* count of d[] in u64s */
+
+ struct bkey_packed start[0];
+ __u64 _data[];
+} __packed __aligned(8);
+
+LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
+
+LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5);
+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
+ struct bset, flags, 5, 6);
+
+/* Sector offset within the btree node: */
+LE32_BITMASK(BSET_OFFSET, struct bset, flags, 16, 32);
+
+struct btree_node {
+ struct bch_csum csum;
+ __le64 magic;
+
+ /* this flags field is encrypted, unlike bset->flags: */
+ __le64 flags;
+
+ /* Closed interval: */
+ struct bpos min_key;
+ struct bpos max_key;
+ struct bch_extent_ptr _ptr; /* not used anymore */
+ struct bkey_format format;
+
+ union {
+ struct bset keys;
+ struct {
+ __u8 pad[22];
+ __le16 u64s;
+ __u64 _data[0];
+
+ };
+ };
+} __packed __aligned(8);
+
+LE64_BITMASK(BTREE_NODE_ID_LO, struct btree_node, flags, 0, 4);
+LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
+ struct btree_node, flags, 8, 9);
+LE64_BITMASK(BTREE_NODE_ID_HI, struct btree_node, flags, 9, 25);
+/* 25-32 unused */
+LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
+
+static inline __u64 BTREE_NODE_ID(struct btree_node *n)
+{
+ return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4);
+}
+
+static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v)
+{
+ SET_BTREE_NODE_ID_LO(n, v);
+ SET_BTREE_NODE_ID_HI(n, v >> 4);
+}
+
+struct btree_node_entry {
+ struct bch_csum csum;
+
+ union {
+ struct bset keys;
+ struct {
+ __u8 pad[22];
+ __le16 u64s;
+ __u64 _data[0];
+ };
+ };
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_FORMAT_H */
diff --git a/c_src/libbcachefs/bcachefs_ioctl.h b/c_src/libbcachefs/bcachefs_ioctl.h
new file mode 100644
index 00000000..4b8fba75
--- /dev/null
+++ b/c_src/libbcachefs/bcachefs_ioctl.h
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IOCTL_H
+#define _BCACHEFS_IOCTL_H
+
+#include <linux/uuid.h>
+#include <asm/ioctl.h>
+#include "bcachefs_format.h"
+
+/*
+ * Flags common to multiple ioctls:
+ */
+#define BCH_FORCE_IF_DATA_LOST (1 << 0)
+#define BCH_FORCE_IF_METADATA_LOST (1 << 1)
+#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
+#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
+
+#define BCH_FORCE_IF_LOST \
+ (BCH_FORCE_IF_DATA_LOST| \
+ BCH_FORCE_IF_METADATA_LOST)
+#define BCH_FORCE_IF_DEGRADED \
+ (BCH_FORCE_IF_DATA_DEGRADED| \
+ BCH_FORCE_IF_METADATA_DEGRADED)
+
+/*
+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname
+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the
+ * filesystem:
+ */
+#define BCH_BY_INDEX (1 << 4)
+
+/*
+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem
+ * wide superblock:
+ */
+#define BCH_READ_DEV (1 << 5)
+
+/* global control dev: */
+
+/* These are currently broken, and probably unnecessary: */
+#if 0
+#define BCH_IOCTL_ASSEMBLE _IOW(0xbc, 1, struct bch_ioctl_assemble)
+#define BCH_IOCTL_INCREMENTAL _IOW(0xbc, 2, struct bch_ioctl_incremental)
+
+struct bch_ioctl_assemble {
+ __u32 flags;
+ __u32 nr_devs;
+ __u64 pad;
+ __u64 devs[];
+};
+
+struct bch_ioctl_incremental {
+ __u32 flags;
+ __u64 pad;
+ __u64 dev;
+};
+#endif
+
+/* filesystem ioctls: */
+
+#define BCH_IOCTL_QUERY_UUID _IOR(0xbc, 1, struct bch_ioctl_query_uuid)
+
+/* These only make sense when we also have incremental assembly */
+#if 0
+#define BCH_IOCTL_START _IOW(0xbc, 2, struct bch_ioctl_start)
+#define BCH_IOCTL_STOP _IO(0xbc, 3)
+#endif
+
+#define BCH_IOCTL_DISK_ADD _IOW(0xbc, 4, struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_REMOVE _IOW(0xbc, 5, struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk)
+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state)
+#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data)
+#define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage)
+#define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage)
+#define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super)
+#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx)
+#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 14, struct bch_ioctl_disk_resize)
+#define BCH_IOCTL_DISK_RESIZE_JOURNAL _IOW(0xbc,15, struct bch_ioctl_disk_resize_journal)
+
+#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume)
+#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume)
+
+#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2)
+
+#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline)
+#define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online)
+
+/* ioctl below act on a particular file, not the filesystem as a whole: */
+
+#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
+
+/*
+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID
+ *
+ * Returns user visible UUID, not internal UUID (which may not ever be changed);
+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with
+ * this UUID.
+ */
+struct bch_ioctl_query_uuid {
+ __uuid_t uuid;
+};
+
+#if 0
+struct bch_ioctl_start {
+ __u32 flags;
+ __u32 pad;
+};
+#endif
+
+/*
+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem
+ *
+ * The specified device must not be open or in use. On success, the new device
+ * will be an online member of the filesystem just like any other member.
+ *
+ * The device must first be prepared by userspace by formatting with a bcachefs
+ * superblock, which is only used for passing in superblock options/parameters
+ * for that device (in struct bch_member). The new device's superblock should
+ * not claim to be a member of any existing filesystem - UUIDs on it will be
+ * ignored.
+ */
+
+/*
+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem
+ *
+ * Any data present on @dev will be permanently deleted, and @dev will be
+ * removed from its slot in the filesystem's list of member devices. The device
+ * may be either offline or offline.
+ *
+ * Will fail removing @dev would leave us with insufficient read write devices
+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are
+ * set.
+ */
+
+/*
+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem
+ * but is not open (e.g. because we started in degraded mode), bring it online
+ *
+ * all existing data on @dev will be available once the device is online,
+ * exactly as if @dev was present when the filesystem was first mounted
+ */
+
+/*
+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that
+ * block device, without removing it from the filesystem (so it can be brought
+ * back online later)
+ *
+ * Data present on @dev will be unavailable while @dev is offline (unless
+ * replicated), but will still be intact and untouched if @dev is brought back
+ * online
+ *
+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would
+ * leave us with insufficient read write devices or degraded/unavailable data,
+ * unless the approprate BCH_FORCE_IF_* flags are set.
+ */
+
+struct bch_ioctl_disk {
+ __u32 flags;
+ __u32 pad;
+ __u64 dev;
+};
+
+/*
+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem
+ *
+ * @new_state - one of the bch_member_state states (rw, ro, failed,
+ * spare)
+ *
+ * Will refuse to change member state if we would then have insufficient devices
+ * to write to, or if it would result in degraded data (when @new_state is
+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set.
+ */
+struct bch_ioctl_disk_set_state {
+ __u32 flags;
+ __u8 new_state;
+ __u8 pad[3];
+ __u64 dev;
+};
+
+#define BCH_DATA_OPS() \
+ x(scrub, 0) \
+ x(rereplicate, 1) \
+ x(migrate, 2) \
+ x(rewrite_old_nodes, 3) \
+ x(drop_extra_replicas, 4)
+
+enum bch_data_ops {
+#define x(t, n) BCH_DATA_OP_##t = n,
+ BCH_DATA_OPS()
+#undef x
+ BCH_DATA_OP_NR
+};
+
+/*
+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g.
+ * scrub, rereplicate, migrate).
+ *
+ * This ioctl kicks off a job in the background, and returns a file descriptor.
+ * Reading from the file descriptor returns a struct bch_ioctl_data_event,
+ * indicating current progress, and closing the file descriptor will stop the
+ * job. The file descriptor is O_CLOEXEC.
+ */
+struct bch_ioctl_data {
+ __u16 op;
+ __u8 start_btree;
+ __u8 end_btree;
+ __u32 flags;
+
+ struct bpos start_pos;
+ struct bpos end_pos;
+
+ union {
+ struct {
+ __u32 dev;
+ __u32 pad;
+ } migrate;
+ struct {
+ __u64 pad[8];
+ };
+ };
+} __packed __aligned(8);
+
+enum bch_data_event {
+ BCH_DATA_EVENT_PROGRESS = 0,
+ /* XXX: add an event for reporting errors */
+ BCH_DATA_EVENT_NR = 1,
+};
+
+struct bch_ioctl_data_progress {
+ __u8 data_type;
+ __u8 btree_id;
+ __u8 pad[2];
+ struct bpos pos;
+
+ __u64 sectors_done;
+ __u64 sectors_total;
+} __packed __aligned(8);
+
+struct bch_ioctl_data_event {
+ __u8 type;
+ __u8 pad[7];
+ union {
+ struct bch_ioctl_data_progress p;
+ __u64 pad2[15];
+ };
+} __packed __aligned(8);
+
+struct bch_replicas_usage {
+ __u64 sectors;
+ struct bch_replicas_entry_v1 r;
+} __packed;
+
+static inline struct bch_replicas_usage *
+replicas_usage_next(struct bch_replicas_usage *u)
+{
+ return (void *) u + replicas_entry_bytes(&u->r) + 8;
+}
+
+/*
+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage
+ *
+ * Returns disk space usage broken out by data type, number of replicas, and
+ * by component device
+ *
+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries
+ *
+ * On success, @replica_entries_bytes will be changed to indicate the number of
+ * bytes actually used.
+ *
+ * Returns -ERANGE if @replica_entries_bytes was too small
+ */
+struct bch_ioctl_fs_usage {
+ __u64 capacity;
+ __u64 used;
+ __u64 online_reserved;
+ __u64 persistent_reserved[BCH_REPLICAS_MAX];
+
+ __u32 replica_entries_bytes;
+ __u32 pad;
+
+ struct bch_replicas_usage replicas[];
+};
+
+/*
+ * BCH_IOCTL_DEV_USAGE: query device disk space usage
+ *
+ * Returns disk space usage broken out by data type - both by buckets and
+ * sectors.
+ */
+struct bch_ioctl_dev_usage {
+ __u64 dev;
+ __u32 flags;
+ __u8 state;
+ __u8 pad[7];
+
+ __u32 bucket_size;
+ __u64 nr_buckets;
+
+ __u64 buckets_ec;
+
+ struct bch_ioctl_dev_usage_type {
+ __u64 buckets;
+ __u64 sectors;
+ __u64 fragmented;
+ } d[10];
+};
+
+struct bch_ioctl_dev_usage_v2 {
+ __u64 dev;
+ __u32 flags;
+ __u8 state;
+ __u8 nr_data_types;
+ __u8 pad[6];
+
+ __u32 bucket_size;
+ __u64 nr_buckets;
+
+ struct bch_ioctl_dev_usage_type d[];
+};
+
+/*
+ * BCH_IOCTL_READ_SUPER: read filesystem superblock
+ *
+ * Equivalent to reading the superblock directly from the block device, except
+ * avoids racing with the kernel writing the superblock or having to figure out
+ * which block device to read
+ *
+ * @sb - buffer to read into
+ * @size - size of userspace allocated buffer
+ * @dev - device to read superblock for, if BCH_READ_DEV flag is
+ * specified
+ *
+ * Returns -ERANGE if buffer provided is too small
+ */
+struct bch_ioctl_read_super {
+ __u32 flags;
+ __u32 pad;
+ __u64 dev;
+ __u64 size;
+ __u64 sb;
+};
+
+/*
+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to
+ * determine if disk is a (online) member - if so, returns device's index
+ *
+ * Returns -ENOENT if not found
+ */
+struct bch_ioctl_disk_get_idx {
+ __u64 dev;
+};
+
+/*
+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device
+ *
+ * @dev - member to resize
+ * @nbuckets - new number of buckets
+ */
+struct bch_ioctl_disk_resize {
+ __u32 flags;
+ __u32 pad;
+ __u64 dev;
+ __u64 nbuckets;
+};
+
+/*
+ * BCH_IOCTL_DISK_RESIZE_JOURNAL: resize journal on a device
+ *
+ * @dev - member to resize
+ * @nbuckets - new number of buckets
+ */
+struct bch_ioctl_disk_resize_journal {
+ __u32 flags;
+ __u32 pad;
+ __u64 dev;
+ __u64 nbuckets;
+};
+
+struct bch_ioctl_subvolume {
+ __u32 flags;
+ __u32 dirfd;
+ __u16 mode;
+ __u16 pad[3];
+ __u64 dst_ptr;
+ __u64 src_ptr;
+};
+
+#define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0)
+#define BCH_SUBVOL_SNAPSHOT_RO (1U << 1)
+
+/*
+ * BCH_IOCTL_FSCK_OFFLINE: run fsck from the 'bcachefs fsck' userspace command,
+ * but with the kernel's implementation of fsck:
+ */
+struct bch_ioctl_fsck_offline {
+ __u64 flags;
+ __u64 opts; /* string */
+ __u64 nr_devs;
+ __u64 devs[] __counted_by(nr_devs);
+};
+
+/*
+ * BCH_IOCTL_FSCK_ONLINE: run fsck from the 'bcachefs fsck' userspace command,
+ * but with the kernel's implementation of fsck:
+ */
+struct bch_ioctl_fsck_online {
+ __u64 flags;
+ __u64 opts; /* string */
+};
+
+#endif /* _BCACHEFS_IOCTL_H */
diff --git a/c_src/libbcachefs/bkey.c b/c_src/libbcachefs/bkey.c
new file mode 100644
index 00000000..abdb0550
--- /dev/null
+++ b/c_src/libbcachefs/bkey.c
@@ -0,0 +1,1120 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey.h"
+#include "bkey_cmp.h"
+#include "bkey_methods.h"
+#include "bset.h"
+#include "util.h"
+
+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
+
+void bch2_bkey_packed_to_binary_text(struct printbuf *out,
+ const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ const u64 *p = high_word(f, k);
+ unsigned word_bits = 64 - high_bit_offset;
+ unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset;
+ u64 v = *p & (~0ULL >> high_bit_offset);
+
+ if (!nr_key_bits) {
+ prt_str(out, "(empty)");
+ return;
+ }
+
+ while (1) {
+ unsigned next_key_bits = nr_key_bits;
+
+ if (nr_key_bits < 64) {
+ v >>= 64 - nr_key_bits;
+ next_key_bits = 0;
+ } else {
+ next_key_bits -= 64;
+ }
+
+ bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
+
+ if (!next_key_bits)
+ break;
+
+ prt_char(out, ' ');
+
+ p = next_word(p);
+ v = *p;
+ word_bits = 64;
+ nr_key_bits = next_key_bits;
+ }
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
+ const struct bkey *unpacked,
+ const struct bkey_format *format)
+{
+ struct bkey tmp;
+
+ BUG_ON(bkeyp_val_u64s(format, packed) !=
+ bkey_val_u64s(unpacked));
+
+ BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed));
+
+ tmp = __bch2_bkey_unpack_key(format, packed);
+
+ if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n",
+ format->key_u64s,
+ format->bits_per_field[0],
+ format->bits_per_field[1],
+ format->bits_per_field[2],
+ format->bits_per_field[3],
+ format->bits_per_field[4]);
+
+ prt_printf(&buf, "compiled unpack: ");
+ bch2_bkey_to_text(&buf, unpacked);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "c unpack: ");
+ bch2_bkey_to_text(&buf, &tmp);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "compiled unpack: ");
+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
+ (struct bkey_packed *) unpacked);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "c unpack: ");
+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
+ (struct bkey_packed *) &tmp);
+ prt_newline(&buf);
+
+ panic("%s", buf.buf);
+ }
+}
+
+#else
+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed,
+ const struct bkey *unpacked,
+ const struct bkey_format *format) {}
+#endif
+
+struct pack_state {
+ const struct bkey_format *format;
+ unsigned bits; /* bits remaining in current word */
+ u64 w; /* current word */
+ u64 *p; /* pointer to next word */
+};
+
+__always_inline
+static struct pack_state pack_state_init(const struct bkey_format *format,
+ struct bkey_packed *k)
+{
+ u64 *p = high_word(format, k);
+
+ return (struct pack_state) {
+ .format = format,
+ .bits = 64 - high_bit_offset,
+ .w = 0,
+ .p = p,
+ };
+}
+
+__always_inline
+static void pack_state_finish(struct pack_state *state,
+ struct bkey_packed *k)
+{
+ EBUG_ON(state->p < k->_data);
+ EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s);
+
+ *state->p = state->w;
+}
+
+struct unpack_state {
+ const struct bkey_format *format;
+ unsigned bits; /* bits remaining in current word */
+ u64 w; /* current word */
+ const u64 *p; /* pointer to next word */
+};
+
+__always_inline
+static struct unpack_state unpack_state_init(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ const u64 *p = high_word(format, k);
+
+ return (struct unpack_state) {
+ .format = format,
+ .bits = 64 - high_bit_offset,
+ .w = *p << high_bit_offset,
+ .p = p,
+ };
+}
+
+__always_inline
+static u64 get_inc_field(struct unpack_state *state, unsigned field)
+{
+ unsigned bits = state->format->bits_per_field[field];
+ u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]);
+
+ if (bits >= state->bits) {
+ v = state->w >> (64 - bits);
+ bits -= state->bits;
+
+ state->p = next_word(state->p);
+ state->w = *state->p;
+ state->bits = 64;
+ }
+
+ /* avoid shift by 64 if bits is 0 - bits is never 64 here: */
+ v |= (state->w >> 1) >> (63 - bits);
+ state->w <<= bits;
+ state->bits -= bits;
+
+ return v + offset;
+}
+
+__always_inline
+static void __set_inc_field(struct pack_state *state, unsigned field, u64 v)
+{
+ unsigned bits = state->format->bits_per_field[field];
+
+ if (bits) {
+ if (bits > state->bits) {
+ bits -= state->bits;
+ /* avoid shift by 64 if bits is 64 - bits is never 0 here: */
+ state->w |= (v >> 1) >> (bits - 1);
+
+ *state->p = state->w;
+ state->p = next_word(state->p);
+ state->w = 0;
+ state->bits = 64;
+ }
+
+ state->bits -= bits;
+ state->w |= v << state->bits;
+ }
+}
+
+__always_inline
+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v)
+{
+ unsigned bits = state->format->bits_per_field[field];
+ u64 offset = le64_to_cpu(state->format->field_offset[field]);
+
+ if (v < offset)
+ return false;
+
+ v -= offset;
+
+ if (fls64(v) > bits)
+ return false;
+
+ __set_inc_field(state, field, v);
+ return true;
+}
+
+/*
+ * Note: does NOT set out->format (we don't know what it should be here!)
+ *
+ * Also: doesn't work on extents - it doesn't preserve the invariant that
+ * if k is packed bkey_start_pos(k) will successfully pack
+ */
+static bool bch2_bkey_transform_key(const struct bkey_format *out_f,
+ struct bkey_packed *out,
+ const struct bkey_format *in_f,
+ const struct bkey_packed *in)
+{
+ struct pack_state out_s = pack_state_init(out_f, out);
+ struct unpack_state in_s = unpack_state_init(in_f, in);
+ u64 *w = out->_data;
+ unsigned i;
+
+ *w = 0;
+
+ for (i = 0; i < BKEY_NR_FIELDS; i++)
+ if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i)))
+ return false;
+
+ /* Can't happen because the val would be too big to unpack: */
+ EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX);
+
+ pack_state_finish(&out_s, out);
+ out->u64s = out_f->key_u64s + in->u64s - in_f->key_u64s;
+ out->needs_whiteout = in->needs_whiteout;
+ out->type = in->type;
+
+ return true;
+}
+
+bool bch2_bkey_transform(const struct bkey_format *out_f,
+ struct bkey_packed *out,
+ const struct bkey_format *in_f,
+ const struct bkey_packed *in)
+{
+ if (!bch2_bkey_transform_key(out_f, out, in_f, in))
+ return false;
+
+ memcpy_u64s((u64 *) out + out_f->key_u64s,
+ (u64 *) in + in_f->key_u64s,
+ (in->u64s - in_f->key_u64s));
+ return true;
+}
+
+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format,
+ const struct bkey_packed *in)
+{
+ struct unpack_state state = unpack_state_init(format, in);
+ struct bkey out;
+
+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
+ EBUG_ON(in->u64s < format->key_u64s);
+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
+ EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
+
+ out.u64s = BKEY_U64s + in->u64s - format->key_u64s;
+ out.format = KEY_FORMAT_CURRENT;
+ out.needs_whiteout = in->needs_whiteout;
+ out.type = in->type;
+ out.pad[0] = 0;
+
+#define x(id, field) out.field = get_inc_field(&state, id);
+ bkey_fields()
+#undef x
+
+ return out;
+}
+
+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
+struct bpos __bkey_unpack_pos(const struct bkey_format *format,
+ const struct bkey_packed *in)
+{
+ struct unpack_state state = unpack_state_init(format, in);
+ struct bpos out;
+
+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
+ EBUG_ON(in->u64s < format->key_u64s);
+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
+
+ out.inode = get_inc_field(&state, BKEY_FIELD_INODE);
+ out.offset = get_inc_field(&state, BKEY_FIELD_OFFSET);
+ out.snapshot = get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
+
+ return out;
+}
+#endif
+
+/**
+ * bch2_bkey_pack_key -- pack just the key, not the value
+ * @out: packed result
+ * @in: key to pack
+ * @format: format of packed result
+ *
+ * Returns: true on success, false on failure
+ */
+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
+ const struct bkey_format *format)
+{
+ struct pack_state state = pack_state_init(format, out);
+ u64 *w = out->_data;
+
+ EBUG_ON((void *) in == (void *) out);
+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
+ EBUG_ON(in->format != KEY_FORMAT_CURRENT);
+
+ *w = 0;
+
+#define x(id, field) if (!set_inc_field(&state, id, in->field)) return false;
+ bkey_fields()
+#undef x
+ pack_state_finish(&state, out);
+ out->u64s = format->key_u64s + in->u64s - BKEY_U64s;
+ out->format = KEY_FORMAT_LOCAL_BTREE;
+ out->needs_whiteout = in->needs_whiteout;
+ out->type = in->type;
+
+ bch2_bkey_pack_verify(out, in, format);
+ return true;
+}
+
+/**
+ * bch2_bkey_unpack -- unpack the key and the value
+ * @b: btree node of @src key (for packed format)
+ * @dst: unpacked result
+ * @src: packed input
+ */
+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst,
+ const struct bkey_packed *src)
+{
+ __bkey_unpack_key(b, &dst->k, src);
+
+ memcpy_u64s(&dst->v,
+ bkeyp_val(&b->format, src),
+ bkeyp_val_u64s(&b->format, src));
+}
+
+/**
+ * bch2_bkey_pack -- pack the key and the value
+ * @dst: packed result
+ * @src: unpacked input
+ * @format: format of packed result
+ *
+ * Returns: true on success, false on failure
+ */
+bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src,
+ const struct bkey_format *format)
+{
+ struct bkey_packed tmp;
+
+ if (!bch2_bkey_pack_key(&tmp, &src->k, format))
+ return false;
+
+ memmove_u64s((u64 *) dst + format->key_u64s,
+ &src->v,
+ bkey_val_u64s(&src->k));
+ memcpy_u64s_small(dst, &tmp, format->key_u64s);
+
+ return true;
+}
+
+__always_inline
+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v)
+{
+ unsigned bits = state->format->bits_per_field[field];
+ u64 offset = le64_to_cpu(state->format->field_offset[field]);
+ bool ret = true;
+
+ EBUG_ON(v < offset);
+ v -= offset;
+
+ if (fls64(v) > bits) {
+ v = ~(~0ULL << bits);
+ ret = false;
+ }
+
+ __set_inc_field(state, field, v);
+ return ret;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+static bool bkey_packed_successor(struct bkey_packed *out,
+ const struct btree *b,
+ struct bkey_packed k)
+{
+ const struct bkey_format *f = &b->format;
+ unsigned nr_key_bits = b->nr_key_bits;
+ unsigned first_bit, offset;
+ u64 *p;
+
+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+ if (!nr_key_bits)
+ return false;
+
+ *out = k;
+
+ first_bit = high_bit_offset + nr_key_bits - 1;
+ p = nth_word(high_word(f, out), first_bit >> 6);
+ offset = 63 - (first_bit & 63);
+
+ while (nr_key_bits) {
+ unsigned bits = min(64 - offset, nr_key_bits);
+ u64 mask = (~0ULL >> (64 - bits)) << offset;
+
+ if ((*p & mask) != mask) {
+ *p += 1ULL << offset;
+ EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0);
+ return true;
+ }
+
+ *p &= ~mask;
+ p = prev_word(p);
+ nr_key_bits -= bits;
+ offset = 0;
+ }
+
+ return false;
+}
+
+static bool bkey_format_has_too_big_fields(const struct bkey_format *f)
+{
+ for (unsigned i = 0; i < f->nr_fields; i++) {
+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
+ u64 packed_max = f->bits_per_field[i]
+ ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
+ : 0;
+ u64 field_offset = le64_to_cpu(f->field_offset[i]);
+
+ if (packed_max + field_offset < packed_max ||
+ packed_max + field_offset > unpacked_max)
+ return true;
+ }
+
+ return false;
+}
+#endif
+
+/*
+ * Returns a packed key that compares <= in
+ *
+ * This is used in bset_search_tree(), where we need a packed pos in order to be
+ * able to compare against the keys in the auxiliary search tree - and it's
+ * legal to use a packed pos that isn't equivalent to the original pos,
+ * _provided_ it compares <= to the original pos.
+ */
+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out,
+ struct bpos in,
+ const struct btree *b)
+{
+ const struct bkey_format *f = &b->format;
+ struct pack_state state = pack_state_init(f, out);
+ u64 *w = out->_data;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bpos orig = in;
+#endif
+ bool exact = true;
+ unsigned i;
+
+ /*
+ * bch2_bkey_pack_key() will write to all of f->key_u64s, minus the 3
+ * byte header, but pack_pos() won't if the len/version fields are big
+ * enough - we need to make sure to zero them out:
+ */
+ for (i = 0; i < f->key_u64s; i++)
+ w[i] = 0;
+
+ if (unlikely(in.snapshot <
+ le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) {
+ if (!in.offset-- &&
+ !in.inode--)
+ return BKEY_PACK_POS_FAIL;
+ in.snapshot = KEY_SNAPSHOT_MAX;
+ exact = false;
+ }
+
+ if (unlikely(in.offset <
+ le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) {
+ if (!in.inode--)
+ return BKEY_PACK_POS_FAIL;
+ in.offset = KEY_OFFSET_MAX;
+ in.snapshot = KEY_SNAPSHOT_MAX;
+ exact = false;
+ }
+
+ if (unlikely(in.inode <
+ le64_to_cpu(f->field_offset[BKEY_FIELD_INODE])))
+ return BKEY_PACK_POS_FAIL;
+
+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode))) {
+ in.offset = KEY_OFFSET_MAX;
+ in.snapshot = KEY_SNAPSHOT_MAX;
+ exact = false;
+ }
+
+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset))) {
+ in.snapshot = KEY_SNAPSHOT_MAX;
+ exact = false;
+ }
+
+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot)))
+ exact = false;
+
+ pack_state_finish(&state, out);
+ out->u64s = f->key_u64s;
+ out->format = KEY_FORMAT_LOCAL_BTREE;
+ out->type = KEY_TYPE_deleted;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+ if (exact) {
+ BUG_ON(bkey_cmp_left_packed(b, out, &orig));
+ } else {
+ struct bkey_packed successor;
+
+ BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0);
+ BUG_ON(bkey_packed_successor(&successor, b, *out) &&
+ bkey_cmp_left_packed(b, &successor, &orig) < 0 &&
+ !bkey_format_has_too_big_fields(f));
+ }
+#endif
+
+ return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER;
+}
+
+void bch2_bkey_format_init(struct bkey_format_state *s)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(s->field_min); i++)
+ s->field_min[i] = U64_MAX;
+
+ for (i = 0; i < ARRAY_SIZE(s->field_max); i++)
+ s->field_max[i] = 0;
+
+ /* Make sure we can store a size of 0: */
+ s->field_min[BKEY_FIELD_SIZE] = 0;
+}
+
+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
+{
+ unsigned field = 0;
+
+ __bkey_format_add(s, field++, p.inode);
+ __bkey_format_add(s, field++, p.offset);
+ __bkey_format_add(s, field++, p.snapshot);
+}
+
+/*
+ * We don't want it to be possible for the packed format to represent fields
+ * bigger than a u64... that will cause confusion and issues (like with
+ * bkey_packed_successor())
+ */
+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i,
+ unsigned bits, u64 offset)
+{
+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
+
+ bits = min(bits, unpacked_bits);
+
+ offset = bits == unpacked_bits ? 0 : min(offset, unpacked_max - ((1ULL << bits) - 1));
+
+ f->bits_per_field[i] = bits;
+ f->field_offset[i] = cpu_to_le64(offset);
+}
+
+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s)
+{
+ unsigned i, bits = KEY_PACKED_BITS_START;
+ struct bkey_format ret = {
+ .nr_fields = BKEY_NR_FIELDS,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
+ s->field_min[i] = min(s->field_min[i], s->field_max[i]);
+
+ set_format_field(&ret, i,
+ fls64(s->field_max[i] - s->field_min[i]),
+ s->field_min[i]);
+
+ bits += ret.bits_per_field[i];
+ }
+
+ /* allow for extent merging: */
+ if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
+ unsigned b = min(4U, 32U - ret.bits_per_field[BKEY_FIELD_SIZE]);
+
+ ret.bits_per_field[BKEY_FIELD_SIZE] += b;
+ bits += b;
+ }
+
+ ret.key_u64s = DIV_ROUND_UP(bits, 64);
+
+ /* if we have enough spare bits, round fields up to nearest byte */
+ bits = ret.key_u64s * 64 - bits;
+
+ for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) {
+ unsigned r = round_up(ret.bits_per_field[i], 8) -
+ ret.bits_per_field[i];
+
+ if (r <= bits) {
+ set_format_field(&ret, i,
+ ret.bits_per_field[i] + r,
+ le64_to_cpu(ret.field_offset[i]));
+ bits -= r;
+ }
+ }
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+ {
+ struct printbuf buf = PRINTBUF;
+
+ BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf));
+ printbuf_exit(&buf);
+ }
+#endif
+ return ret;
+}
+
+int bch2_bkey_format_invalid(struct bch_fs *c,
+ struct bkey_format *f,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ unsigned i, bits = KEY_PACKED_BITS_START;
+
+ if (f->nr_fields != BKEY_NR_FIELDS) {
+ prt_printf(err, "incorrect number of fields: got %u, should be %u",
+ f->nr_fields, BKEY_NR_FIELDS);
+ return -BCH_ERR_invalid;
+ }
+
+ /*
+ * Verify that the packed format can't represent fields larger than the
+ * unpacked format:
+ */
+ for (i = 0; i < f->nr_fields; i++) {
+ if (!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) {
+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
+ u64 packed_max = f->bits_per_field[i]
+ ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
+ : 0;
+ u64 field_offset = le64_to_cpu(f->field_offset[i]);
+
+ if (packed_max + field_offset < packed_max ||
+ packed_max + field_offset > unpacked_max) {
+ prt_printf(err, "field %u too large: %llu + %llu > %llu",
+ i, packed_max, field_offset, unpacked_max);
+ return -BCH_ERR_invalid;
+ }
+ }
+
+ bits += f->bits_per_field[i];
+ }
+
+ if (f->key_u64s != DIV_ROUND_UP(bits, 64)) {
+ prt_printf(err, "incorrect key_u64s: got %u, should be %u",
+ f->key_u64s, DIV_ROUND_UP(bits, 64));
+ return -BCH_ERR_invalid;
+ }
+
+ return 0;
+}
+
+void bch2_bkey_format_to_text(struct printbuf *out, const struct bkey_format *f)
+{
+ prt_printf(out, "u64s %u fields ", f->key_u64s);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(f->bits_per_field); i++) {
+ if (i)
+ prt_str(out, ", ");
+ prt_printf(out, "%u:%llu",
+ f->bits_per_field[i],
+ le64_to_cpu(f->field_offset[i]));
+ }
+}
+
+/*
+ * Most significant differing bit
+ * Bits are indexed from 0 - return is [0, nr_key_bits)
+ */
+__pure
+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b,
+ const struct bkey_packed *l_k,
+ const struct bkey_packed *r_k)
+{
+ const u64 *l = high_word(&b->format, l_k);
+ const u64 *r = high_word(&b->format, r_k);
+ unsigned nr_key_bits = b->nr_key_bits;
+ unsigned word_bits = 64 - high_bit_offset;
+ u64 l_v, r_v;
+
+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
+
+ /* for big endian, skip past header */
+ l_v = *l & (~0ULL >> high_bit_offset);
+ r_v = *r & (~0ULL >> high_bit_offset);
+
+ while (nr_key_bits) {
+ if (nr_key_bits < word_bits) {
+ l_v >>= word_bits - nr_key_bits;
+ r_v >>= word_bits - nr_key_bits;
+ nr_key_bits = 0;
+ } else {
+ nr_key_bits -= word_bits;
+ }
+
+ if (l_v != r_v)
+ return fls64(l_v ^ r_v) - 1 + nr_key_bits;
+
+ l = next_word(l);
+ r = next_word(r);
+
+ l_v = *l;
+ r_v = *r;
+ word_bits = 64;
+ }
+
+ return 0;
+}
+
+/*
+ * First set bit
+ * Bits are indexed from 0 - return is [0, nr_key_bits)
+ */
+__pure
+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
+{
+ const u64 *p = high_word(&b->format, k);
+ unsigned nr_key_bits = b->nr_key_bits;
+ unsigned ret = 0, offset;
+
+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format));
+
+ offset = nr_key_bits;
+ while (offset > 64) {
+ p = next_word(p);
+ offset -= 64;
+ }
+
+ offset = 64 - offset;
+
+ while (nr_key_bits) {
+ unsigned bits = nr_key_bits + offset < 64
+ ? nr_key_bits
+ : 64 - offset;
+
+ u64 mask = (~0ULL >> (64 - bits)) << offset;
+
+ if (*p & mask)
+ return ret + __ffs64(*p & mask) - offset;
+
+ p = prev_word(p);
+ nr_key_bits -= bits;
+ ret += bits;
+ offset = 0;
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+
+#define I(_x) (*(out)++ = (_x))
+#define I1(i0) I(i0)
+#define I2(i0, i1) (I1(i0), I(i1))
+#define I3(i0, i1, i2) (I2(i0, i1), I(i2))
+#define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3))
+#define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4))
+
+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out,
+ enum bch_bkey_fields field,
+ unsigned dst_offset, unsigned dst_size,
+ bool *eax_zeroed)
+{
+ unsigned bits = format->bits_per_field[field];
+ u64 offset = le64_to_cpu(format->field_offset[field]);
+ unsigned i, byte, bit_offset, align, shl, shr;
+
+ if (!bits && !offset) {
+ if (!*eax_zeroed) {
+ /* xor eax, eax */
+ I2(0x31, 0xc0);
+ }
+
+ *eax_zeroed = true;
+ goto set_field;
+ }
+
+ if (!bits) {
+ /* just return offset: */
+
+ switch (dst_size) {
+ case 8:
+ if (offset > S32_MAX) {
+ /* mov [rdi + dst_offset], offset */
+ I3(0xc7, 0x47, dst_offset);
+ memcpy(out, &offset, 4);
+ out += 4;
+
+ I3(0xc7, 0x47, dst_offset + 4);
+ memcpy(out, (void *) &offset + 4, 4);
+ out += 4;
+ } else {
+ /* mov [rdi + dst_offset], offset */
+ /* sign extended */
+ I4(0x48, 0xc7, 0x47, dst_offset);
+ memcpy(out, &offset, 4);
+ out += 4;
+ }
+ break;
+ case 4:
+ /* mov [rdi + dst_offset], offset */
+ I3(0xc7, 0x47, dst_offset);
+ memcpy(out, &offset, 4);
+ out += 4;
+ break;
+ default:
+ BUG();
+ }
+
+ return out;
+ }
+
+ bit_offset = format->key_u64s * 64;
+ for (i = 0; i <= field; i++)
+ bit_offset -= format->bits_per_field[i];
+
+ byte = bit_offset / 8;
+ bit_offset -= byte * 8;
+
+ *eax_zeroed = false;
+
+ if (bit_offset == 0 && bits == 8) {
+ /* movzx eax, BYTE PTR [rsi + imm8] */
+ I4(0x0f, 0xb6, 0x46, byte);
+ } else if (bit_offset == 0 && bits == 16) {
+ /* movzx eax, WORD PTR [rsi + imm8] */
+ I4(0x0f, 0xb7, 0x46, byte);
+ } else if (bit_offset + bits <= 32) {
+ align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
+ byte -= align;
+ bit_offset += align * 8;
+
+ BUG_ON(bit_offset + bits > 32);
+
+ /* mov eax, [rsi + imm8] */
+ I3(0x8b, 0x46, byte);
+
+ if (bit_offset) {
+ /* shr eax, imm8 */
+ I3(0xc1, 0xe8, bit_offset);
+ }
+
+ if (bit_offset + bits < 32) {
+ unsigned mask = ~0U >> (32 - bits);
+
+ /* and eax, imm32 */
+ I1(0x25);
+ memcpy(out, &mask, 4);
+ out += 4;
+ }
+ } else if (bit_offset + bits <= 64) {
+ align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7);
+ byte -= align;
+ bit_offset += align * 8;
+
+ BUG_ON(bit_offset + bits > 64);
+
+ /* mov rax, [rsi + imm8] */
+ I4(0x48, 0x8b, 0x46, byte);
+
+ shl = 64 - bit_offset - bits;
+ shr = bit_offset + shl;
+
+ if (shl) {
+ /* shl rax, imm8 */
+ I4(0x48, 0xc1, 0xe0, shl);
+ }
+
+ if (shr) {
+ /* shr rax, imm8 */
+ I4(0x48, 0xc1, 0xe8, shr);
+ }
+ } else {
+ align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3);
+ byte -= align;
+ bit_offset += align * 8;
+
+ BUG_ON(bit_offset + bits > 96);
+
+ /* mov rax, [rsi + byte] */
+ I4(0x48, 0x8b, 0x46, byte);
+
+ /* mov edx, [rsi + byte + 8] */
+ I3(0x8b, 0x56, byte + 8);
+
+ /* bits from next word: */
+ shr = bit_offset + bits - 64;
+ BUG_ON(shr > bit_offset);
+
+ /* shr rax, bit_offset */
+ I4(0x48, 0xc1, 0xe8, shr);
+
+ /* shl rdx, imm8 */
+ I4(0x48, 0xc1, 0xe2, 64 - shr);
+
+ /* or rax, rdx */
+ I3(0x48, 0x09, 0xd0);
+
+ shr = bit_offset - shr;
+
+ if (shr) {
+ /* shr rax, imm8 */
+ I4(0x48, 0xc1, 0xe8, shr);
+ }
+ }
+
+ /* rax += offset: */
+ if (offset > S32_MAX) {
+ /* mov rdx, imm64 */
+ I2(0x48, 0xba);
+ memcpy(out, &offset, 8);
+ out += 8;
+ /* add %rdx, %rax */
+ I3(0x48, 0x01, 0xd0);
+ } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) {
+ /* add rax, imm32 */
+ I2(0x48, 0x05);
+ memcpy(out, &offset, 4);
+ out += 4;
+ } else if (offset) {
+ /* add eax, imm32 */
+ I1(0x05);
+ memcpy(out, &offset, 4);
+ out += 4;
+ }
+set_field:
+ switch (dst_size) {
+ case 8:
+ /* mov [rdi + dst_offset], rax */
+ I4(0x48, 0x89, 0x47, dst_offset);
+ break;
+ case 4:
+ /* mov [rdi + dst_offset], eax */
+ I3(0x89, 0x47, dst_offset);
+ break;
+ default:
+ BUG();
+ }
+
+ return out;
+}
+
+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
+{
+ bool eax_zeroed = false;
+ u8 *out = _out;
+
+ /*
+ * rdi: dst - unpacked key
+ * rsi: src - packed key
+ */
+
+ /* k->u64s, k->format, k->type */
+
+ /* mov eax, [rsi] */
+ I2(0x8b, 0x06);
+
+ /* add eax, BKEY_U64s - format->key_u64s */
+ I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0);
+
+ /* and eax, imm32: mask out k->pad: */
+ I5(0x25, 0xff, 0xff, 0xff, 0);
+
+ /* mov [rdi], eax */
+ I2(0x89, 0x07);
+
+#define x(id, field) \
+ out = compile_bkey_field(format, out, id, \
+ offsetof(struct bkey, field), \
+ sizeof(((struct bkey *) NULL)->field), \
+ &eax_zeroed);
+ bkey_fields()
+#undef x
+
+ /* retq */
+ I1(0xc3);
+
+ return (void *) out - _out;
+}
+
+#else
+#endif
+
+__pure
+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
+ const struct bkey_packed *r,
+ const struct btree *b)
+{
+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
+}
+
+__pure __flatten
+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bpos *r)
+{
+ return bpos_cmp(bkey_unpack_pos_format_checked(b, l), *r);
+}
+
+__pure __flatten
+int bch2_bkey_cmp_packed(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
+{
+ return bch2_bkey_cmp_packed_inlined(b, l, r);
+}
+
+__pure __flatten
+int __bch2_bkey_cmp_left_packed(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bpos *r)
+{
+ const struct bkey *l_unpacked;
+
+ return unlikely(l_unpacked = packed_to_bkey_c(l))
+ ? bpos_cmp(l_unpacked->p, *r)
+ : __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
+}
+
+void bch2_bpos_swab(struct bpos *p)
+{
+ u8 *l = (u8 *) p;
+ u8 *h = ((u8 *) &p[1]) - 1;
+
+ while (l < h) {
+ swap(*l, *h);
+ l++;
+ --h;
+ }
+}
+
+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
+{
+ const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
+ u8 *l = k->key_start;
+ u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
+
+ while (l < h) {
+ swap(*l, *h);
+ l++;
+ --h;
+ }
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_bkey_pack_test(void)
+{
+ struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0);
+ struct bkey_packed p;
+
+ struct bkey_format test_format = {
+ .key_u64s = 3,
+ .nr_fields = BKEY_NR_FIELDS,
+ .bits_per_field = {
+ 13,
+ 64,
+ 32,
+ },
+ };
+
+ struct unpack_state in_s =
+ unpack_state_init(&bch2_bkey_format_current, (void *) &t);
+ struct pack_state out_s = pack_state_init(&test_format, &p);
+ unsigned i;
+
+ for (i = 0; i < out_s.format->nr_fields; i++) {
+ u64 a, v = get_inc_field(&in_s, i);
+
+ switch (i) {
+#define x(id, field) case id: a = t.field; break;
+ bkey_fields()
+#undef x
+ default:
+ BUG();
+ }
+
+ if (a != v)
+ panic("got %llu actual %llu i %u\n", v, a, i);
+
+ if (!set_inc_field(&out_s, i, v))
+ panic("failed at %u\n", i);
+ }
+
+ BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format));
+}
+#endif
diff --git a/c_src/libbcachefs/bkey.h b/c_src/libbcachefs/bkey.h
new file mode 100644
index 00000000..831be018
--- /dev/null
+++ b/c_src/libbcachefs/bkey.h
@@ -0,0 +1,778 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_H
+#define _BCACHEFS_BKEY_H
+
+#include <linux/bug.h>
+#include "bcachefs_format.h"
+
+#include "btree_types.h"
+#include "util.h"
+#include "vstructs.h"
+
+enum bkey_invalid_flags {
+ BKEY_INVALID_WRITE = (1U << 0),
+ BKEY_INVALID_COMMIT = (1U << 1),
+ BKEY_INVALID_JOURNAL = (1U << 2),
+};
+
+#if 0
+
+/*
+ * compiled unpack functions are disabled, pending a new interface for
+ * dynamically allocating executable memory:
+ */
+
+#ifdef CONFIG_X86_64
+#define HAVE_BCACHEFS_COMPILED_UNPACK 1
+#endif
+#endif
+
+void bch2_bkey_packed_to_binary_text(struct printbuf *,
+ const struct bkey_format *,
+ const struct bkey_packed *);
+
+/* bkey with split value, const */
+struct bkey_s_c {
+ const struct bkey *k;
+ const struct bch_val *v;
+};
+
+/* bkey with split value */
+struct bkey_s {
+ union {
+ struct {
+ struct bkey *k;
+ struct bch_val *v;
+ };
+ struct bkey_s_c s_c;
+ };
+};
+
+#define bkey_p_next(_k) vstruct_next(_k)
+
+static inline struct bkey_i *bkey_next(struct bkey_i *k)
+{
+ return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s);
+}
+
+#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
+
+static inline size_t bkey_val_bytes(const struct bkey *k)
+{
+ return bkey_val_u64s(k) * sizeof(u64);
+}
+
+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
+{
+ unsigned u64s = BKEY_U64s + val_u64s;
+
+ BUG_ON(u64s > U8_MAX);
+ k->u64s = u64s;
+}
+
+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
+{
+ set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
+}
+
+#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
+
+#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
+
+#define bkey_whiteout(_k) \
+ ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
+
+enum bkey_lr_packed {
+ BKEY_PACKED_BOTH,
+ BKEY_PACKED_RIGHT,
+ BKEY_PACKED_LEFT,
+ BKEY_PACKED_NONE,
+};
+
+#define bkey_lr_packed(_l, _r) \
+ ((_l)->format + ((_r)->format << 1))
+
+static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src)
+{
+ memcpy_u64s_small(dst, src, src->u64s);
+}
+
+static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src)
+{
+ memcpy_u64s_small(dst, src, src->k.u64s);
+}
+
+struct btree;
+
+__pure
+unsigned bch2_bkey_greatest_differing_bit(const struct btree *,
+ const struct bkey_packed *,
+ const struct bkey_packed *);
+__pure
+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *);
+
+__pure
+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *,
+ const struct bkey_packed *,
+ const struct btree *);
+
+__pure
+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *,
+ const struct bkey_packed *,
+ const struct bpos *);
+
+__pure
+int bch2_bkey_cmp_packed(const struct btree *,
+ const struct bkey_packed *,
+ const struct bkey_packed *);
+
+__pure
+int __bch2_bkey_cmp_left_packed(const struct btree *,
+ const struct bkey_packed *,
+ const struct bpos *);
+
+static inline __pure
+int bkey_cmp_left_packed(const struct btree *b,
+ const struct bkey_packed *l, const struct bpos *r)
+{
+ return __bch2_bkey_cmp_left_packed(b, l, r);
+}
+
+/*
+ * The compiler generates better code when we pass bpos by ref, but it's often
+ * enough terribly convenient to pass it by val... as much as I hate c++, const
+ * ref would be nice here:
+ */
+__pure __flatten
+static inline int bkey_cmp_left_packed_byval(const struct btree *b,
+ const struct bkey_packed *l,
+ struct bpos r)
+{
+ return bkey_cmp_left_packed(b, l, &r);
+}
+
+static __always_inline bool bpos_eq(struct bpos l, struct bpos r)
+{
+ return !((l.inode ^ r.inode) |
+ (l.offset ^ r.offset) |
+ (l.snapshot ^ r.snapshot));
+}
+
+static __always_inline bool bpos_lt(struct bpos l, struct bpos r)
+{
+ return l.inode != r.inode ? l.inode < r.inode :
+ l.offset != r.offset ? l.offset < r.offset :
+ l.snapshot != r.snapshot ? l.snapshot < r.snapshot : false;
+}
+
+static __always_inline bool bpos_le(struct bpos l, struct bpos r)
+{
+ return l.inode != r.inode ? l.inode < r.inode :
+ l.offset != r.offset ? l.offset < r.offset :
+ l.snapshot != r.snapshot ? l.snapshot < r.snapshot : true;
+}
+
+static __always_inline bool bpos_gt(struct bpos l, struct bpos r)
+{
+ return bpos_lt(r, l);
+}
+
+static __always_inline bool bpos_ge(struct bpos l, struct bpos r)
+{
+ return bpos_le(r, l);
+}
+
+static __always_inline int bpos_cmp(struct bpos l, struct bpos r)
+{
+ return cmp_int(l.inode, r.inode) ?:
+ cmp_int(l.offset, r.offset) ?:
+ cmp_int(l.snapshot, r.snapshot);
+}
+
+static inline struct bpos bpos_min(struct bpos l, struct bpos r)
+{
+ return bpos_lt(l, r) ? l : r;
+}
+
+static inline struct bpos bpos_max(struct bpos l, struct bpos r)
+{
+ return bpos_gt(l, r) ? l : r;
+}
+
+static __always_inline bool bkey_eq(struct bpos l, struct bpos r)
+{
+ return !((l.inode ^ r.inode) |
+ (l.offset ^ r.offset));
+}
+
+static __always_inline bool bkey_lt(struct bpos l, struct bpos r)
+{
+ return l.inode != r.inode
+ ? l.inode < r.inode
+ : l.offset < r.offset;
+}
+
+static __always_inline bool bkey_le(struct bpos l, struct bpos r)
+{
+ return l.inode != r.inode
+ ? l.inode < r.inode
+ : l.offset <= r.offset;
+}
+
+static __always_inline bool bkey_gt(struct bpos l, struct bpos r)
+{
+ return bkey_lt(r, l);
+}
+
+static __always_inline bool bkey_ge(struct bpos l, struct bpos r)
+{
+ return bkey_le(r, l);
+}
+
+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
+{
+ return cmp_int(l.inode, r.inode) ?:
+ cmp_int(l.offset, r.offset);
+}
+
+static inline struct bpos bkey_min(struct bpos l, struct bpos r)
+{
+ return bkey_lt(l, r) ? l : r;
+}
+
+static inline struct bpos bkey_max(struct bpos l, struct bpos r)
+{
+ return bkey_gt(l, r) ? l : r;
+}
+
+void bch2_bpos_swab(struct bpos *);
+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
+
+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
+{
+ return cmp_int(l.hi, r.hi) ?:
+ cmp_int(l.lo, r.lo);
+}
+
+#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
+#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL })
+
+static __always_inline int bversion_zero(struct bversion v)
+{
+ return !bversion_cmp(v, ZERO_VERSION);
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+/* statement expressions confusing unlikely()? */
+#define bkey_packed(_k) \
+ ({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT); \
+ (_k)->format != KEY_FORMAT_CURRENT; })
+#else
+#define bkey_packed(_k) ((_k)->format != KEY_FORMAT_CURRENT)
+#endif
+
+/*
+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
+ */
+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
+{
+ return (struct bkey_packed *) k;
+}
+
+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
+{
+ return (const struct bkey_packed *) k;
+}
+
+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
+{
+ return bkey_packed(k) ? NULL : (struct bkey_i *) k;
+}
+
+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
+{
+ return bkey_packed(k) ? NULL : (const struct bkey *) k;
+}
+
+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
+{
+ return format->bits_per_field[BKEY_FIELD_INODE] +
+ format->bits_per_field[BKEY_FIELD_OFFSET] +
+ format->bits_per_field[BKEY_FIELD_SNAPSHOT];
+}
+
+static inline struct bpos bpos_successor(struct bpos p)
+{
+ if (!++p.snapshot &&
+ !++p.offset &&
+ !++p.inode)
+ BUG();
+
+ return p;
+}
+
+static inline struct bpos bpos_predecessor(struct bpos p)
+{
+ if (!p.snapshot-- &&
+ !p.offset-- &&
+ !p.inode--)
+ BUG();
+
+ return p;
+}
+
+static inline struct bpos bpos_nosnap_successor(struct bpos p)
+{
+ p.snapshot = 0;
+
+ if (!++p.offset &&
+ !++p.inode)
+ BUG();
+
+ return p;
+}
+
+static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
+{
+ p.snapshot = 0;
+
+ if (!p.offset-- &&
+ !p.inode--)
+ BUG();
+
+ return p;
+}
+
+static inline u64 bkey_start_offset(const struct bkey *k)
+{
+ return k->p.offset - k->size;
+}
+
+static inline struct bpos bkey_start_pos(const struct bkey *k)
+{
+ return (struct bpos) {
+ .inode = k->p.inode,
+ .offset = bkey_start_offset(k),
+ .snapshot = k->p.snapshot,
+ };
+}
+
+/* Packed helpers */
+
+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
+
+ EBUG_ON(k->u64s < ret);
+ return ret;
+}
+
+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return bkeyp_key_u64s(format, k) * sizeof(u64);
+}
+
+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return k->u64s - bkeyp_key_u64s(format, k);
+}
+
+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return bkeyp_val_u64s(format, k) * sizeof(u64);
+}
+
+static inline void set_bkeyp_val_u64s(const struct bkey_format *format,
+ struct bkey_packed *k, unsigned val_u64s)
+{
+ k->u64s = bkeyp_key_u64s(format, k) + val_u64s;
+}
+
+#define bkeyp_val(_format, _k) \
+ ((struct bch_val *) ((u64 *) (_k)->_data + bkeyp_key_u64s(_format, _k)))
+
+extern const struct bkey_format bch2_bkey_format_current;
+
+bool bch2_bkey_transform(const struct bkey_format *,
+ struct bkey_packed *,
+ const struct bkey_format *,
+ const struct bkey_packed *);
+
+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
+ const struct bkey_packed *);
+
+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK
+struct bpos __bkey_unpack_pos(const struct bkey_format *,
+ const struct bkey_packed *);
+#endif
+
+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *,
+ const struct bkey_format *);
+
+enum bkey_pack_pos_ret {
+ BKEY_PACK_POS_EXACT,
+ BKEY_PACK_POS_SMALLER,
+ BKEY_PACK_POS_FAIL,
+};
+
+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
+ const struct btree *);
+
+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
+ const struct btree *b)
+{
+ return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT;
+}
+
+void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
+ const struct bkey_packed *);
+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
+ const struct bkey_format *);
+
+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
+
+static inline void
+__bkey_unpack_key_format_checked(const struct btree *b,
+ struct bkey *dst,
+ const struct bkey_packed *src)
+{
+ if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) {
+ compiled_unpack_fn unpack_fn = b->aux_data;
+ unpack_fn(dst, src);
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+ bch2_expensive_debug_checks) {
+ struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
+
+ BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
+ }
+ } else {
+ *dst = __bch2_bkey_unpack_key(&b->format, src);
+ }
+}
+
+static inline struct bkey
+bkey_unpack_key_format_checked(const struct btree *b,
+ const struct bkey_packed *src)
+{
+ struct bkey dst;
+
+ __bkey_unpack_key_format_checked(b, &dst, src);
+ return dst;
+}
+
+static inline void __bkey_unpack_key(const struct btree *b,
+ struct bkey *dst,
+ const struct bkey_packed *src)
+{
+ if (likely(bkey_packed(src)))
+ __bkey_unpack_key_format_checked(b, dst, src);
+ else
+ *dst = *packed_to_bkey_c(src);
+}
+
+/**
+ * bkey_unpack_key -- unpack just the key, not the value
+ */
+static inline struct bkey bkey_unpack_key(const struct btree *b,
+ const struct bkey_packed *src)
+{
+ return likely(bkey_packed(src))
+ ? bkey_unpack_key_format_checked(b, src)
+ : *packed_to_bkey_c(src);
+}
+
+static inline struct bpos
+bkey_unpack_pos_format_checked(const struct btree *b,
+ const struct bkey_packed *src)
+{
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+ return bkey_unpack_key_format_checked(b, src).p;
+#else
+ return __bkey_unpack_pos(&b->format, src);
+#endif
+}
+
+static inline struct bpos bkey_unpack_pos(const struct btree *b,
+ const struct bkey_packed *src)
+{
+ return likely(bkey_packed(src))
+ ? bkey_unpack_pos_format_checked(b, src)
+ : packed_to_bkey_c(src)->p;
+}
+
+/* Disassembled bkeys */
+
+static inline struct bkey_s_c bkey_disassemble(const struct btree *b,
+ const struct bkey_packed *k,
+ struct bkey *u)
+{
+ __bkey_unpack_key(b, u, k);
+
+ return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
+}
+
+/* non const version: */
+static inline struct bkey_s __bkey_disassemble(const struct btree *b,
+ struct bkey_packed *k,
+ struct bkey *u)
+{
+ __bkey_unpack_key(b, u, k);
+
+ return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
+}
+
+static inline u64 bkey_field_max(const struct bkey_format *f,
+ enum bch_bkey_fields nr)
+{
+ return f->bits_per_field[nr] < 64
+ ? (le64_to_cpu(f->field_offset[nr]) +
+ ~(~0ULL << f->bits_per_field[nr]))
+ : U64_MAX;
+}
+
+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
+
+int bch2_compile_bkey_format(const struct bkey_format *, void *);
+
+#else
+
+static inline int bch2_compile_bkey_format(const struct bkey_format *format,
+ void *out) { return 0; }
+
+#endif
+
+static inline void bkey_reassemble(struct bkey_i *dst,
+ struct bkey_s_c src)
+{
+ dst->k = *src.k;
+ memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
+}
+
+#define bkey_s_null ((struct bkey_s) { .k = NULL })
+#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL })
+
+#define bkey_s_err(err) ((struct bkey_s) { .k = ERR_PTR(err) })
+#define bkey_s_c_err(err) ((struct bkey_s_c) { .k = ERR_PTR(err) })
+
+static inline struct bkey_s bkey_to_s(struct bkey *k)
+{
+ return (struct bkey_s) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
+{
+ return (struct bkey_s_c) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
+{
+ return (struct bkey_s) { .k = &k->k, .v = &k->v };
+}
+
+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
+{
+ return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
+}
+
+/*
+ * For a given type of value (e.g. struct bch_extent), generates the types for
+ * bkey + bch_extent - inline, split, split const - and also all the conversion
+ * functions, which also check that the value is of the correct type.
+ *
+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
+ * functions.
+ */
+#define x(name, ...) \
+struct bkey_i_##name { \
+ union { \
+ struct bkey k; \
+ struct bkey_i k_i; \
+ }; \
+ struct bch_##name v; \
+}; \
+ \
+struct bkey_s_c_##name { \
+ union { \
+ struct { \
+ const struct bkey *k; \
+ const struct bch_##name *v; \
+ }; \
+ struct bkey_s_c s_c; \
+ }; \
+}; \
+ \
+struct bkey_s_##name { \
+ union { \
+ struct { \
+ struct bkey *k; \
+ struct bch_##name *v; \
+ }; \
+ struct bkey_s_c_##name c; \
+ struct bkey_s s; \
+ struct bkey_s_c s_c; \
+ }; \
+}; \
+ \
+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
+{ \
+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
+ return container_of(&k->k, struct bkey_i_##name, k); \
+} \
+ \
+static inline const struct bkey_i_##name * \
+bkey_i_to_##name##_c(const struct bkey_i *k) \
+{ \
+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
+ return container_of(&k->k, struct bkey_i_##name, k); \
+} \
+ \
+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \
+{ \
+ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \
+ return (struct bkey_s_##name) { \
+ .k = k.k, \
+ .v = container_of(k.v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
+{ \
+ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \
+ return (struct bkey_s_c_##name) { \
+ .k = k.k, \
+ .v = container_of(k.v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
+{ \
+ return (struct bkey_s_##name) { \
+ .k = &k->k, \
+ .v = &k->v, \
+ }; \
+} \
+ \
+static inline struct bkey_s_c_##name \
+name##_i_to_s_c(const struct bkey_i_##name *k) \
+{ \
+ return (struct bkey_s_c_##name) { \
+ .k = &k->k, \
+ .v = &k->v, \
+ }; \
+} \
+ \
+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \
+{ \
+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
+ return (struct bkey_s_##name) { \
+ .k = &k->k, \
+ .v = container_of(&k->v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bkey_s_c_##name \
+bkey_i_to_s_c_##name(const struct bkey_i *k) \
+{ \
+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \
+ return (struct bkey_s_c_##name) { \
+ .k = &k->k, \
+ .v = container_of(&k->v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
+{ \
+ struct bkey_i_##name *k = \
+ container_of(&_k->k, struct bkey_i_##name, k); \
+ \
+ bkey_init(&k->k); \
+ memset(&k->v, 0, sizeof(k->v)); \
+ k->k.type = KEY_TYPE_##name; \
+ set_bkey_val_bytes(&k->k, sizeof(k->v)); \
+ \
+ return k; \
+}
+
+BCH_BKEY_TYPES();
+#undef x
+
+/* byte order helpers */
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+static inline unsigned high_word_offset(const struct bkey_format *f)
+{
+ return f->key_u64s - 1;
+}
+
+#define high_bit_offset 0
+#define nth_word(p, n) ((p) - (n))
+
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+
+static inline unsigned high_word_offset(const struct bkey_format *f)
+{
+ return 0;
+}
+
+#define high_bit_offset KEY_PACKED_BITS_START
+#define nth_word(p, n) ((p) + (n))
+
+#else
+#error edit for your odd byteorder.
+#endif
+
+#define high_word(f, k) ((u64 *) (k)->_data + high_word_offset(f))
+#define next_word(p) nth_word(p, 1)
+#define prev_word(p) nth_word(p, -1)
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_bkey_pack_test(void);
+#else
+static inline void bch2_bkey_pack_test(void) {}
+#endif
+
+#define bkey_fields() \
+ x(BKEY_FIELD_INODE, p.inode) \
+ x(BKEY_FIELD_OFFSET, p.offset) \
+ x(BKEY_FIELD_SNAPSHOT, p.snapshot) \
+ x(BKEY_FIELD_SIZE, size) \
+ x(BKEY_FIELD_VERSION_HI, version.hi) \
+ x(BKEY_FIELD_VERSION_LO, version.lo)
+
+struct bkey_format_state {
+ u64 field_min[BKEY_NR_FIELDS];
+ u64 field_max[BKEY_NR_FIELDS];
+};
+
+void bch2_bkey_format_init(struct bkey_format_state *);
+
+static inline void __bkey_format_add(struct bkey_format_state *s, unsigned field, u64 v)
+{
+ s->field_min[field] = min(s->field_min[field], v);
+ s->field_max[field] = max(s->field_max[field], v);
+}
+
+/*
+ * Changes @format so that @k can be successfully packed with @format
+ */
+static inline void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
+{
+#define x(id, field) __bkey_format_add(s, id, k->field);
+ bkey_fields()
+#undef x
+}
+
+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos);
+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *);
+int bch2_bkey_format_invalid(struct bch_fs *, struct bkey_format *,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_bkey_format_to_text(struct printbuf *, const struct bkey_format *);
+
+#endif /* _BCACHEFS_BKEY_H */
diff --git a/c_src/libbcachefs/bkey_buf.h b/c_src/libbcachefs/bkey_buf.h
new file mode 100644
index 00000000..a30c4ae8
--- /dev/null
+++ b/c_src/libbcachefs/bkey_buf.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_BUF_H
+#define _BCACHEFS_BKEY_BUF_H
+
+#include "bcachefs.h"
+#include "bkey.h"
+
+struct bkey_buf {
+ struct bkey_i *k;
+ u64 onstack[12];
+};
+
+static inline void bch2_bkey_buf_realloc(struct bkey_buf *s,
+ struct bch_fs *c, unsigned u64s)
+{
+ if (s->k == (void *) s->onstack &&
+ u64s > ARRAY_SIZE(s->onstack)) {
+ s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
+ memcpy(s->k, s->onstack, sizeof(s->onstack));
+ }
+}
+
+static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ bch2_bkey_buf_realloc(s, c, k.k->u64s);
+ bkey_reassemble(s->k, k);
+}
+
+static inline void bch2_bkey_buf_copy(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct bkey_i *src)
+{
+ bch2_bkey_buf_realloc(s, c, src->k.u64s);
+ bkey_copy(s->k, src);
+}
+
+static inline void bch2_bkey_buf_unpack(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct btree *b,
+ struct bkey_packed *src)
+{
+ bch2_bkey_buf_realloc(s, c, BKEY_U64s +
+ bkeyp_val_u64s(&b->format, src));
+ bch2_bkey_unpack(b, s->k, src);
+}
+
+static inline void bch2_bkey_buf_init(struct bkey_buf *s)
+{
+ s->k = (void *) s->onstack;
+}
+
+static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c)
+{
+ if (s->k != (void *) s->onstack)
+ mempool_free(s->k, &c->large_bkey_pool);
+ s->k = NULL;
+}
+
+#endif /* _BCACHEFS_BKEY_BUF_H */
diff --git a/c_src/libbcachefs/bkey_cmp.h b/c_src/libbcachefs/bkey_cmp.h
new file mode 100644
index 00000000..5f42a6e6
--- /dev/null
+++ b/c_src/libbcachefs/bkey_cmp.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_CMP_H
+#define _BCACHEFS_BKEY_CMP_H
+
+#include "bkey.h"
+
+#ifdef CONFIG_X86_64
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+ unsigned nr_key_bits)
+{
+ long d0, d1, d2, d3;
+ int cmp;
+
+ /* we shouldn't need asm for this, but gcc is being retarded: */
+
+ asm(".intel_syntax noprefix;"
+ "xor eax, eax;"
+ "xor edx, edx;"
+ "1:;"
+ "mov r8, [rdi];"
+ "mov r9, [rsi];"
+ "sub ecx, 64;"
+ "jl 2f;"
+
+ "cmp r8, r9;"
+ "jnz 3f;"
+
+ "lea rdi, [rdi - 8];"
+ "lea rsi, [rsi - 8];"
+ "jmp 1b;"
+
+ "2:;"
+ "not ecx;"
+ "shr r8, 1;"
+ "shr r9, 1;"
+ "shr r8, cl;"
+ "shr r9, cl;"
+ "cmp r8, r9;"
+
+ "3:\n"
+ "seta al;"
+ "setb dl;"
+ "sub eax, edx;"
+ ".att_syntax prefix;"
+ : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
+ : "0" (l), "1" (r), "3" (nr_key_bits)
+ : "r8", "r9", "cc", "memory");
+
+ return cmp;
+}
+#else
+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
+ unsigned nr_key_bits)
+{
+ u64 l_v, r_v;
+
+ if (!nr_key_bits)
+ return 0;
+
+ /* for big endian, skip past header */
+ nr_key_bits += high_bit_offset;
+ l_v = *l & (~0ULL >> high_bit_offset);
+ r_v = *r & (~0ULL >> high_bit_offset);
+
+ while (1) {
+ if (nr_key_bits < 64) {
+ l_v >>= 64 - nr_key_bits;
+ r_v >>= 64 - nr_key_bits;
+ nr_key_bits = 0;
+ } else {
+ nr_key_bits -= 64;
+ }
+
+ if (!nr_key_bits || l_v != r_v)
+ break;
+
+ l = next_word(l);
+ r = next_word(r);
+
+ l_v = *l;
+ r_v = *r;
+ }
+
+ return cmp_int(l_v, r_v);
+}
+#endif
+
+static inline __pure __flatten
+int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l,
+ const struct bkey_packed *r,
+ const struct btree *b)
+{
+ const struct bkey_format *f = &b->format;
+ int ret;
+
+ EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
+
+ ret = __bkey_cmp_bits(high_word(f, l),
+ high_word(f, r),
+ b->nr_key_bits);
+
+ EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
+ bkey_unpack_pos(b, r)));
+ return ret;
+}
+
+static inline __pure __flatten
+int bch2_bkey_cmp_packed_inlined(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
+{
+ struct bkey unpacked;
+
+ if (likely(bkey_packed(l) && bkey_packed(r)))
+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
+
+ if (bkey_packed(l)) {
+ __bkey_unpack_key_format_checked(b, &unpacked, l);
+ l = (void *) &unpacked;
+ } else if (bkey_packed(r)) {
+ __bkey_unpack_key_format_checked(b, &unpacked, r);
+ r = (void *) &unpacked;
+ }
+
+ return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
+}
+
+#endif /* _BCACHEFS_BKEY_CMP_H */
diff --git a/c_src/libbcachefs/bkey_methods.c b/c_src/libbcachefs/bkey_methods.c
new file mode 100644
index 00000000..761f5e33
--- /dev/null
+++ b/c_src/libbcachefs/bkey_methods.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "backpointers.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+#include "btree_types.h"
+#include "alloc_background.h"
+#include "dirent.h"
+#include "ec.h"
+#include "error.h"
+#include "extents.h"
+#include "inode.h"
+#include "io_misc.h"
+#include "lru.h"
+#include "quota.h"
+#include "reflink.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "xattr.h"
+
+const char * const bch2_bkey_types[] = {
+#define x(name, nr) #name,
+ BCH_BKEY_TYPES()
+#undef x
+ NULL
+};
+
+static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err)
+{
+ return 0;
+}
+
+#define bch2_bkey_ops_deleted ((struct bkey_ops) { \
+ .key_invalid = deleted_key_invalid, \
+})
+
+#define bch2_bkey_ops_whiteout ((struct bkey_ops) { \
+ .key_invalid = deleted_key_invalid, \
+})
+
+static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_bytes(k.k), c, err,
+ bkey_val_size_nonzero,
+ "incorrect value size (%zu != 0)",
+ bkey_val_bytes(k.k));
+fsck_err:
+ return ret;
+}
+
+#define bch2_bkey_ops_error ((struct bkey_ops) { \
+ .key_invalid = empty_val_key_invalid, \
+})
+
+static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err)
+{
+ return 0;
+}
+
+#define bch2_bkey_ops_cookie ((struct bkey_ops) { \
+ .key_invalid = key_type_cookie_invalid, \
+ .min_val_size = 8, \
+})
+
+#define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\
+ .key_invalid = empty_val_key_invalid, \
+})
+
+static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err)
+{
+ return 0;
+}
+
+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
+ unsigned datalen = bkey_inline_data_bytes(k.k);
+
+ prt_printf(out, "datalen %u: %*phN",
+ datalen, min(datalen, 32U), d.v->data);
+}
+
+#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \
+ .key_invalid = key_type_inline_data_invalid, \
+ .val_to_text = key_type_inline_data_to_text, \
+})
+
+static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
+{
+ bch2_key_resize(l.k, l.k->size + r.k->size);
+ return true;
+}
+
+#define bch2_bkey_ops_set ((struct bkey_ops) { \
+ .key_invalid = empty_val_key_invalid, \
+ .key_merge = key_type_set_merge, \
+})
+
+const struct bkey_ops bch2_bkey_ops[] = {
+#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name,
+ BCH_BKEY_TYPES()
+#undef x
+};
+
+const struct bkey_ops bch2_bkey_null_ops = {
+};
+
+int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err,
+ bkey_val_size_too_small,
+ "bad val size (%zu < %u)",
+ bkey_val_bytes(k.k), ops->min_val_size);
+
+ if (!ops->key_invalid)
+ return 0;
+
+ ret = ops->key_invalid(c, k, flags, err);
+fsck_err:
+ return ret;
+}
+
+static u64 bch2_key_types_allowed[] = {
+ [BKEY_TYPE_btree] =
+ BIT_ULL(KEY_TYPE_deleted)|
+ BIT_ULL(KEY_TYPE_btree_ptr)|
+ BIT_ULL(KEY_TYPE_btree_ptr_v2),
+#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys,
+ BCH_BTREE_IDS()
+#undef x
+};
+
+const char *bch2_btree_node_type_str(enum btree_node_type type)
+{
+ return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1);
+}
+
+int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum btree_node_type type,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err,
+ bkey_u64s_too_small,
+ "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s);
+
+ if (type >= BKEY_TYPE_NR)
+ return 0;
+
+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+ !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
+ bkey_invalid_type_for_btree,
+ "invalid key type for btree %s (%s)",
+ bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+
+ if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
+ bkey_fsck_err_on(k.k->size == 0, c, err,
+ bkey_extent_size_zero,
+ "size == 0");
+
+ bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err,
+ bkey_extent_size_greater_than_offset,
+ "size greater than offset (%u > %llu)",
+ k.k->size, k.k->p.offset);
+ } else {
+ bkey_fsck_err_on(k.k->size, c, err,
+ bkey_size_nonzero,
+ "size != 0");
+ }
+
+ if (type != BKEY_TYPE_btree) {
+ enum btree_id btree = type - 1;
+
+ if (btree_type_has_snapshots(btree)) {
+ bkey_fsck_err_on(!k.k->p.snapshot, c, err,
+ bkey_snapshot_zero,
+ "snapshot == 0");
+ } else if (!btree_type_has_snapshot_field(btree)) {
+ bkey_fsck_err_on(k.k->p.snapshot, c, err,
+ bkey_snapshot_nonzero,
+ "nonzero snapshot");
+ } else {
+ /*
+ * btree uses snapshot field but it's not required to be
+ * nonzero
+ */
+ }
+
+ bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err,
+ bkey_at_pos_max,
+ "key at POS_MAX");
+ }
+fsck_err:
+ return ret;
+}
+
+int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum btree_node_type type,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ return __bch2_bkey_invalid(c, k, type, flags, err) ?:
+ bch2_bkey_val_invalid(c, k, flags, err);
+}
+
+int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
+ struct bkey_s_c k, struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err,
+ bkey_before_start_of_btree_node,
+ "key before start of btree node");
+
+ bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err,
+ bkey_after_end_of_btree_node,
+ "key past end of btree node");
+fsck_err:
+ return ret;
+}
+
+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
+{
+ if (bpos_eq(pos, POS_MIN))
+ prt_printf(out, "POS_MIN");
+ else if (bpos_eq(pos, POS_MAX))
+ prt_printf(out, "POS_MAX");
+ else if (bpos_eq(pos, SPOS_MAX))
+ prt_printf(out, "SPOS_MAX");
+ else {
+ if (pos.inode == U64_MAX)
+ prt_printf(out, "U64_MAX");
+ else
+ prt_printf(out, "%llu", pos.inode);
+ prt_printf(out, ":");
+ if (pos.offset == U64_MAX)
+ prt_printf(out, "U64_MAX");
+ else
+ prt_printf(out, "%llu", pos.offset);
+ prt_printf(out, ":");
+ if (pos.snapshot == U32_MAX)
+ prt_printf(out, "U32_MAX");
+ else
+ prt_printf(out, "%u", pos.snapshot);
+ }
+}
+
+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
+{
+ if (k) {
+ prt_printf(out, "u64s %u type ", k->u64s);
+
+ if (k->type < KEY_TYPE_MAX)
+ prt_printf(out, "%s ", bch2_bkey_types[k->type]);
+ else
+ prt_printf(out, "%u ", k->type);
+
+ bch2_bpos_to_text(out, k->p);
+
+ prt_printf(out, " len %u ver %llu", k->size, k->version.lo);
+ } else {
+ prt_printf(out, "(null)");
+ }
+}
+
+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
+
+ if (likely(ops->val_to_text))
+ ops->val_to_text(out, c, k);
+}
+
+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ bch2_bkey_to_text(out, k.k);
+
+ if (bkey_val_bytes(k.k)) {
+ prt_printf(out, ": ");
+ bch2_val_to_text(out, c, k);
+ }
+}
+
+void bch2_bkey_swab_val(struct bkey_s k)
+{
+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
+
+ if (ops->swab)
+ ops->swab(k);
+}
+
+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k)
+{
+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type);
+
+ return ops->key_normalize
+ ? ops->key_normalize(c, k)
+ : false;
+}
+
+bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
+{
+ const struct bkey_ops *ops = bch2_bkey_type_ops(l.k->type);
+
+ return ops->key_merge &&
+ bch2_bkey_maybe_mergable(l.k, r.k) &&
+ (u64) l.k->size + r.k->size <= KEY_SIZE_MAX &&
+ !bch2_key_merging_disabled &&
+ ops->key_merge(c, l, r);
+}
+
+static const struct old_bkey_type {
+ u8 btree_node_type;
+ u8 old;
+ u8 new;
+} bkey_renumber_table[] = {
+ {BKEY_TYPE_btree, 128, KEY_TYPE_btree_ptr },
+ {BKEY_TYPE_extents, 128, KEY_TYPE_extent },
+ {BKEY_TYPE_extents, 129, KEY_TYPE_extent },
+ {BKEY_TYPE_extents, 130, KEY_TYPE_reservation },
+ {BKEY_TYPE_inodes, 128, KEY_TYPE_inode },
+ {BKEY_TYPE_inodes, 130, KEY_TYPE_inode_generation },
+ {BKEY_TYPE_dirents, 128, KEY_TYPE_dirent },
+ {BKEY_TYPE_dirents, 129, KEY_TYPE_hash_whiteout },
+ {BKEY_TYPE_xattrs, 128, KEY_TYPE_xattr },
+ {BKEY_TYPE_xattrs, 129, KEY_TYPE_hash_whiteout },
+ {BKEY_TYPE_alloc, 128, KEY_TYPE_alloc },
+ {BKEY_TYPE_quotas, 128, KEY_TYPE_quota },
+};
+
+void bch2_bkey_renumber(enum btree_node_type btree_node_type,
+ struct bkey_packed *k,
+ int write)
+{
+ const struct old_bkey_type *i;
+
+ for (i = bkey_renumber_table;
+ i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table);
+ i++)
+ if (btree_node_type == i->btree_node_type &&
+ k->type == (write ? i->new : i->old)) {
+ k->type = write ? i->old : i->new;
+ break;
+ }
+}
+
+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write,
+ struct bkey_format *f,
+ struct bkey_packed *k)
+{
+ const struct bkey_ops *ops;
+ struct bkey uk;
+ unsigned nr_compat = 5;
+ int i;
+
+ /*
+ * Do these operations in reverse order in the write path:
+ */
+
+ for (i = 0; i < nr_compat; i++)
+ switch (!write ? i : nr_compat - 1 - i) {
+ case 0:
+ if (big_endian != CPU_BIG_ENDIAN)
+ bch2_bkey_swab_key(f, k);
+ break;
+ case 1:
+ if (version < bcachefs_metadata_version_bkey_renumber)
+ bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
+ break;
+ case 2:
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id == BTREE_ID_inodes) {
+ if (!bkey_packed(k)) {
+ struct bkey_i *u = packed_to_bkey(k);
+
+ swap(u->k.p.inode, u->k.p.offset);
+ } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
+ f->bits_per_field[BKEY_FIELD_OFFSET]) {
+ struct bkey_format tmp = *f, *in = f, *out = &tmp;
+
+ swap(tmp.bits_per_field[BKEY_FIELD_INODE],
+ tmp.bits_per_field[BKEY_FIELD_OFFSET]);
+ swap(tmp.field_offset[BKEY_FIELD_INODE],
+ tmp.field_offset[BKEY_FIELD_OFFSET]);
+
+ if (!write)
+ swap(in, out);
+
+ uk = __bch2_bkey_unpack_key(in, k);
+ swap(uk.p.inode, uk.p.offset);
+ BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
+ }
+ }
+ break;
+ case 3:
+ if (version < bcachefs_metadata_version_snapshot &&
+ (level || btree_type_has_snapshots(btree_id))) {
+ struct bkey_i *u = packed_to_bkey(k);
+
+ if (u) {
+ u->k.p.snapshot = write
+ ? 0 : U32_MAX;
+ } else {
+ u64 min_packed = le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]);
+ u64 max_packed = min_packed +
+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
+
+ uk = __bch2_bkey_unpack_key(f, k);
+ uk.p.snapshot = write
+ ? min_packed : min_t(u64, U32_MAX, max_packed);
+
+ BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
+ }
+ }
+
+ break;
+ case 4: {
+ struct bkey_s u;
+
+ if (!bkey_packed(k)) {
+ u = bkey_i_to_s(packed_to_bkey(k));
+ } else {
+ uk = __bch2_bkey_unpack_key(f, k);
+ u.k = &uk;
+ u.v = bkeyp_val(f, k);
+ }
+
+ if (big_endian != CPU_BIG_ENDIAN)
+ bch2_bkey_swab_val(u);
+
+ ops = bch2_bkey_type_ops(k->type);
+
+ if (ops->compat)
+ ops->compat(btree_id, version, big_endian, write, u);
+ break;
+ }
+ default:
+ BUG();
+ }
+}
diff --git a/c_src/libbcachefs/bkey_methods.h b/c_src/libbcachefs/bkey_methods.h
new file mode 100644
index 00000000..ee822837
--- /dev/null
+++ b/c_src/libbcachefs/bkey_methods.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_METHODS_H
+#define _BCACHEFS_BKEY_METHODS_H
+
+#include "bkey.h"
+
+struct bch_fs;
+struct btree;
+struct btree_trans;
+struct bkey;
+enum btree_node_type;
+
+extern const char * const bch2_bkey_types[];
+extern const struct bkey_ops bch2_bkey_null_ops;
+
+/*
+ * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If
+ * invalid, entire key will be deleted.
+ *
+ * When invalid, error string is returned via @err. @rw indicates whether key is
+ * being read or written; more aggressive checks can be enabled when rw == WRITE.
+ */
+struct bkey_ops {
+ int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err);
+ void (*val_to_text)(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+ void (*swab)(struct bkey_s);
+ bool (*key_normalize)(struct bch_fs *, struct bkey_s);
+ bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+ int (*trigger)(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+ void (*compat)(enum btree_id id, unsigned version,
+ unsigned big_endian, int write,
+ struct bkey_s);
+
+ /* Size of value type when first created: */
+ unsigned min_val_size;
+};
+
+extern const struct bkey_ops bch2_bkey_ops[];
+
+static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type)
+{
+ return likely(type < KEY_TYPE_MAX)
+ ? &bch2_bkey_ops[type]
+ : &bch2_bkey_null_ops;
+}
+
+int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *,
+ struct bkey_s_c, struct printbuf *);
+
+void bch2_bpos_to_text(struct printbuf *, struct bpos);
+void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
+void bch2_val_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+
+void bch2_bkey_swab_val(struct bkey_s);
+
+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
+
+static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
+{
+ return l->type == r->type &&
+ !bversion_cmp(l->version, r->version) &&
+ bpos_eq(l->p, bkey_start_pos(r));
+}
+
+bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+
+enum btree_update_flags {
+ __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
+ __BTREE_UPDATE_NOJOURNAL,
+ __BTREE_UPDATE_KEY_CACHE_RECLAIM,
+
+ __BTREE_TRIGGER_NORUN,
+ __BTREE_TRIGGER_TRANSACTIONAL,
+ __BTREE_TRIGGER_INSERT,
+ __BTREE_TRIGGER_OVERWRITE,
+ __BTREE_TRIGGER_GC,
+ __BTREE_TRIGGER_BUCKET_INVALIDATE,
+};
+
+#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
+#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
+#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
+
+/* Don't run triggers at all */
+#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
+
+/*
+ * If set, we're running transactional triggers as part of a transaction commit:
+ * triggers may generate new updates
+ *
+ * If cleared, and either BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE are set,
+ * we're running atomic triggers during a transaction commit: we have our
+ * journal reservation, we're holding btree node write locks, and we know the
+ * transaction is going to commit (returning an error here is a fatal error,
+ * causing us to go emergency read-only)
+ */
+#define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL)
+
+/* @new is entering the btree */
+#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
+
+/* @old is leaving the btree */
+#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
+
+/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
+#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
+
+/* signal from bucket invalidate path to alloc trigger */
+#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
+
+static inline int bch2_key_trigger(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
+
+ return ops->trigger
+ ? ops->trigger(trans, btree, level, old, new, flags)
+ : 0;
+}
+
+static inline int bch2_key_trigger_old(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, unsigned flags)
+{
+ struct bkey_i deleted;
+
+ bkey_init(&deleted.k);
+ deleted.k.p = old.k->p;
+
+ return bch2_key_trigger(trans, btree_id, level, old, bkey_i_to_s(&deleted),
+ BTREE_TRIGGER_OVERWRITE|flags);
+}
+
+static inline int bch2_key_trigger_new(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s new, unsigned flags)
+{
+ struct bkey_i deleted;
+
+ bkey_init(&deleted.k);
+ deleted.k.p = new.k->p;
+
+ return bch2_key_trigger(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
+ BTREE_TRIGGER_INSERT|flags);
+}
+
+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
+
+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
+ int, struct bkey_format *, struct bkey_packed *);
+
+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write,
+ struct bkey_format *f,
+ struct bkey_packed *k)
+{
+ if (version < bcachefs_metadata_version_current ||
+ big_endian != CPU_BIG_ENDIAN)
+ __bch2_bkey_compat(level, btree_id, version,
+ big_endian, write, f, k);
+
+}
+
+#endif /* _BCACHEFS_BKEY_METHODS_H */
diff --git a/c_src/libbcachefs/bkey_sort.c b/c_src/libbcachefs/bkey_sort.c
new file mode 100644
index 00000000..bcca9e76
--- /dev/null
+++ b/c_src/libbcachefs/bkey_sort.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "bkey_cmp.h"
+#include "bkey_sort.h"
+#include "bset.h"
+#include "extents.h"
+
+typedef int (*sort_cmp_fn)(struct btree *,
+ struct bkey_packed *,
+ struct bkey_packed *);
+
+static inline bool sort_iter_end(struct sort_iter *iter)
+{
+ return !iter->used;
+}
+
+static inline void sort_iter_sift(struct sort_iter *iter, unsigned from,
+ sort_cmp_fn cmp)
+{
+ unsigned i;
+
+ for (i = from;
+ i + 1 < iter->used &&
+ cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0;
+ i++)
+ swap(iter->data[i], iter->data[i + 1]);
+}
+
+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp)
+{
+ unsigned i = iter->used;
+
+ while (i--)
+ sort_iter_sift(iter, i, cmp);
+}
+
+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
+{
+ return !sort_iter_end(iter) ? iter->data->k : NULL;
+}
+
+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
+{
+ struct sort_iter_set *i = iter->data;
+
+ BUG_ON(!iter->used);
+
+ i->k = bkey_p_next(i->k);
+
+ BUG_ON(i->k > i->end);
+
+ if (i->k == i->end)
+ array_remove_item(iter->data, iter->used, 0);
+ else
+ sort_iter_sift(iter, 0, cmp);
+}
+
+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter,
+ sort_cmp_fn cmp)
+{
+ struct bkey_packed *ret = sort_iter_peek(iter);
+
+ if (ret)
+ sort_iter_advance(iter, cmp);
+
+ return ret;
+}
+
+/*
+ * If keys compare equal, compare by pointer order:
+ */
+static inline int key_sort_fix_overlapping_cmp(struct btree *b,
+ struct bkey_packed *l,
+ struct bkey_packed *r)
+{
+ return bch2_bkey_cmp_packed(b, l, r) ?:
+ cmp_int((unsigned long) l, (unsigned long) r);
+}
+
+static inline bool should_drop_next_key(struct sort_iter *iter)
+{
+ /*
+ * key_sort_cmp() ensures that when keys compare equal the older key
+ * comes first; so if l->k compares equal to r->k then l->k is older
+ * and should be dropped.
+ */
+ return iter->used >= 2 &&
+ !bch2_bkey_cmp_packed(iter->b,
+ iter->data[0].k,
+ iter->data[1].k);
+}
+
+struct btree_nr_keys
+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
+ struct sort_iter *iter)
+{
+ struct bkey_packed *out = dst->start;
+ struct bkey_packed *k;
+ struct btree_nr_keys nr;
+
+ memset(&nr, 0, sizeof(nr));
+
+ sort_iter_sort(iter, key_sort_fix_overlapping_cmp);
+
+ while ((k = sort_iter_peek(iter))) {
+ if (!bkey_deleted(k) &&
+ !should_drop_next_key(iter)) {
+ bkey_p_copy(out, k);
+ btree_keys_account_key_add(&nr, 0, out);
+ out = bkey_p_next(out);
+ }
+
+ sort_iter_advance(iter, key_sort_fix_overlapping_cmp);
+ }
+
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+ return nr;
+}
+
+/* Sort + repack in a new format: */
+struct btree_nr_keys
+bch2_sort_repack(struct bset *dst, struct btree *src,
+ struct btree_node_iter *src_iter,
+ struct bkey_format *out_f,
+ bool filter_whiteouts)
+{
+ struct bkey_format *in_f = &src->format;
+ struct bkey_packed *in, *out = vstruct_last(dst);
+ struct btree_nr_keys nr;
+ bool transform = memcmp(out_f, &src->format, sizeof(*out_f));
+
+ memset(&nr, 0, sizeof(nr));
+
+ while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
+ if (filter_whiteouts && bkey_deleted(in))
+ continue;
+
+ if (!transform)
+ bkey_p_copy(out, in);
+ else if (bch2_bkey_transform(out_f, out, bkey_packed(in)
+ ? in_f : &bch2_bkey_format_current, in))
+ out->format = KEY_FORMAT_LOCAL_BTREE;
+ else
+ bch2_bkey_unpack(src, (void *) out, in);
+
+ out->needs_whiteout = false;
+
+ btree_keys_account_key_add(&nr, 0, out);
+ out = bkey_p_next(out);
+ }
+
+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
+ return nr;
+}
+
+static inline int sort_keys_cmp(struct btree *b,
+ struct bkey_packed *l,
+ struct bkey_packed *r)
+{
+ return bch2_bkey_cmp_packed_inlined(b, l, r) ?:
+ (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
+ (int) l->needs_whiteout - (int) r->needs_whiteout;
+}
+
+unsigned bch2_sort_keys(struct bkey_packed *dst,
+ struct sort_iter *iter,
+ bool filter_whiteouts)
+{
+ const struct bkey_format *f = &iter->b->format;
+ struct bkey_packed *in, *next, *out = dst;
+
+ sort_iter_sort(iter, sort_keys_cmp);
+
+ while ((in = sort_iter_next(iter, sort_keys_cmp))) {
+ bool needs_whiteout = false;
+
+ if (bkey_deleted(in) &&
+ (filter_whiteouts || !in->needs_whiteout))
+ continue;
+
+ while ((next = sort_iter_peek(iter)) &&
+ !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) {
+ BUG_ON(in->needs_whiteout &&
+ next->needs_whiteout);
+ needs_whiteout |= in->needs_whiteout;
+ in = sort_iter_next(iter, sort_keys_cmp);
+ }
+
+ if (bkey_deleted(in)) {
+ memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in));
+ set_bkeyp_val_u64s(f, out, 0);
+ } else {
+ bkey_p_copy(out, in);
+ }
+ out->needs_whiteout |= needs_whiteout;
+ out = bkey_p_next(out);
+ }
+
+ return (u64 *) out - (u64 *) dst;
+}
diff --git a/c_src/libbcachefs/bkey_sort.h b/c_src/libbcachefs/bkey_sort.h
new file mode 100644
index 00000000..7c0f0b16
--- /dev/null
+++ b/c_src/libbcachefs/bkey_sort.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_SORT_H
+#define _BCACHEFS_BKEY_SORT_H
+
+struct sort_iter {
+ struct btree *b;
+ unsigned used;
+ unsigned size;
+
+ struct sort_iter_set {
+ struct bkey_packed *k, *end;
+ } data[];
+};
+
+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b, unsigned size)
+{
+ iter->b = b;
+ iter->used = 0;
+ iter->size = size;
+}
+
+struct sort_iter_stack {
+ struct sort_iter iter;
+ struct sort_iter_set sets[MAX_BSETS + 1];
+};
+
+static inline void sort_iter_stack_init(struct sort_iter_stack *iter, struct btree *b)
+{
+ sort_iter_init(&iter->iter, b, ARRAY_SIZE(iter->sets));
+}
+
+static inline void sort_iter_add(struct sort_iter *iter,
+ struct bkey_packed *k,
+ struct bkey_packed *end)
+{
+ BUG_ON(iter->used >= iter->size);
+
+ if (k != end)
+ iter->data[iter->used++] = (struct sort_iter_set) { k, end };
+}
+
+struct btree_nr_keys
+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *,
+ struct sort_iter *);
+
+struct btree_nr_keys
+bch2_sort_repack(struct bset *, struct btree *,
+ struct btree_node_iter *,
+ struct bkey_format *, bool);
+
+unsigned bch2_sort_keys(struct bkey_packed *,
+ struct sort_iter *, bool);
+
+#endif /* _BCACHEFS_BKEY_SORT_H */
diff --git a/c_src/libbcachefs/bset.c b/c_src/libbcachefs/bset.c
new file mode 100644
index 00000000..74bf8eb9
--- /dev/null
+++ b/c_src/libbcachefs/bset.c
@@ -0,0 +1,1598 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for working with individual keys, and sorted sets of keys with in a
+ * btree node
+ *
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "bset.h"
+#include "eytzinger.h"
+#include "trace.h"
+#include "util.h"
+
+#include <asm/unaligned.h>
+#include <linux/console.h>
+#include <linux/random.h>
+#include <linux/prefetch.h>
+
+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *,
+ struct btree *);
+
+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter)
+{
+ unsigned n = ARRAY_SIZE(iter->data);
+
+ while (n && __btree_node_iter_set_end(iter, n - 1))
+ --n;
+
+ return n;
+}
+
+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k)
+{
+ return bch2_bkey_to_bset_inlined(b, k);
+}
+
+/*
+ * There are never duplicate live keys in the btree - but including keys that
+ * have been flagged as deleted (and will be cleaned up later) we _will_ see
+ * duplicates.
+ *
+ * Thus the sort order is: usual key comparison first, but for keys that compare
+ * equal the deleted key(s) come first, and the (at most one) live version comes
+ * last.
+ *
+ * The main reason for this is insertion: to handle overwrites, we first iterate
+ * over keys that compare equal to our insert key, and then insert immediately
+ * prior to the first key greater than the key we're inserting - our insert
+ * position will be after all keys that compare equal to our insert key, which
+ * by the time we actually do the insert will all be deleted.
+ */
+
+void bch2_dump_bset(struct bch_fs *c, struct btree *b,
+ struct bset *i, unsigned set)
+{
+ struct bkey_packed *_k, *_n;
+ struct bkey uk, n;
+ struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+
+ if (!i->u64s)
+ return;
+
+ for (_k = i->start;
+ _k < vstruct_last(i);
+ _k = _n) {
+ _n = bkey_p_next(_k);
+
+ if (!_k->u64s) {
+ printk(KERN_ERR "block %u key %5zu - u64s 0? aieee!\n", set,
+ _k->_data - i->_data);
+ break;
+ }
+
+ k = bkey_disassemble(b, _k, &uk);
+
+ printbuf_reset(&buf);
+ if (c)
+ bch2_bkey_val_to_text(&buf, c, k);
+ else
+ bch2_bkey_to_text(&buf, k.k);
+ printk(KERN_ERR "block %u key %5zu: %s\n", set,
+ _k->_data - i->_data, buf.buf);
+
+ if (_n == vstruct_last(i))
+ continue;
+
+ n = bkey_unpack_key(b, _n);
+
+ if (bpos_lt(n.p, k.k->p)) {
+ printk(KERN_ERR "Key skipped backwards\n");
+ continue;
+ }
+
+ if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p))
+ printk(KERN_ERR "Duplicate keys\n");
+ }
+
+ printbuf_exit(&buf);
+}
+
+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b)
+{
+ struct bset_tree *t;
+
+ console_lock();
+ for_each_bset(b, t)
+ bch2_dump_bset(c, b, bset(b, t), t - b->set);
+ console_unlock();
+}
+
+void bch2_dump_btree_node_iter(struct btree *b,
+ struct btree_node_iter *iter)
+{
+ struct btree_node_iter_set *set;
+ struct printbuf buf = PRINTBUF;
+
+ printk(KERN_ERR "btree node iter with %u/%u sets:\n",
+ __btree_node_iter_used(iter), b->nsets);
+
+ btree_node_iter_for_each(iter, set) {
+ struct bkey_packed *k = __btree_node_offset_to_key(b, set->k);
+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
+ struct bkey uk = bkey_unpack_key(b, k);
+
+ printbuf_reset(&buf);
+ bch2_bkey_to_text(&buf, &uk);
+ printk(KERN_ERR "set %zu key %u: %s\n",
+ t - b->set, set->k, buf.buf);
+ }
+
+ printbuf_exit(&buf);
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *b)
+{
+ struct bset_tree *t;
+ struct bkey_packed *k;
+ struct btree_nr_keys nr = { 0 };
+
+ for_each_bset(b, t)
+ bset_tree_for_each_key(b, t, k)
+ if (!bkey_deleted(k))
+ btree_keys_account_key_add(&nr, t - b->set, k);
+
+ BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
+}
+
+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter,
+ struct btree *b)
+{
+ struct btree_node_iter iter = *_iter;
+ const struct bkey_packed *k, *n;
+
+ k = bch2_btree_node_iter_peek_all(&iter, b);
+ __bch2_btree_node_iter_advance(&iter, b);
+ n = bch2_btree_node_iter_peek_all(&iter, b);
+
+ bkey_unpack_key(b, k);
+
+ if (n &&
+ bkey_iter_cmp(b, k, n) > 0) {
+ struct btree_node_iter_set *set;
+ struct bkey ku = bkey_unpack_key(b, k);
+ struct bkey nu = bkey_unpack_key(b, n);
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+
+ bch2_dump_btree_node(NULL, b);
+ bch2_bkey_to_text(&buf1, &ku);
+ bch2_bkey_to_text(&buf2, &nu);
+ printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n",
+ buf1.buf, buf2.buf);
+ printk(KERN_ERR "iter was:");
+
+ btree_node_iter_for_each(_iter, set) {
+ struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k);
+ struct bset_tree *t = bch2_bkey_to_bset(b, k2);
+ printk(" [%zi %zi]", t - b->set,
+ k2->_data - bset(b, t)->_data);
+ }
+ panic("\n");
+ }
+}
+
+void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ struct btree_node_iter_set *set, *s2;
+ struct bkey_packed *k, *p;
+ struct bset_tree *t;
+
+ if (bch2_btree_node_iter_end(iter))
+ return;
+
+ /* Verify no duplicates: */
+ btree_node_iter_for_each(iter, set) {
+ BUG_ON(set->k > set->end);
+ btree_node_iter_for_each(iter, s2)
+ BUG_ON(set != s2 && set->end == s2->end);
+ }
+
+ /* Verify that set->end is correct: */
+ btree_node_iter_for_each(iter, set) {
+ for_each_bset(b, t)
+ if (set->end == t->end_offset)
+ goto found;
+ BUG();
+found:
+ BUG_ON(set->k < btree_bkey_first_offset(t) ||
+ set->k >= t->end_offset);
+ }
+
+ /* Verify iterator is sorted: */
+ btree_node_iter_for_each(iter, set)
+ BUG_ON(set != iter->data &&
+ btree_node_iter_cmp(b, set[-1], set[0]) > 0);
+
+ k = bch2_btree_node_iter_peek_all(iter, b);
+
+ for_each_bset(b, t) {
+ if (iter->data[0].end == t->end_offset)
+ continue;
+
+ p = bch2_bkey_prev_all(b, t,
+ bch2_btree_node_iter_bset_pos(iter, b, t));
+
+ BUG_ON(p && bkey_iter_cmp(b, k, p) < 0);
+ }
+}
+
+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
+ struct bkey_packed *insert, unsigned clobber_u64s)
+{
+ struct bset_tree *t = bch2_bkey_to_bset(b, where);
+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where);
+ struct bkey_packed *next = (void *) ((u64 *) where->_data + clobber_u64s);
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+#if 0
+ BUG_ON(prev &&
+ bkey_iter_cmp(b, prev, insert) > 0);
+#else
+ if (prev &&
+ bkey_iter_cmp(b, prev, insert) > 0) {
+ struct bkey k1 = bkey_unpack_key(b, prev);
+ struct bkey k2 = bkey_unpack_key(b, insert);
+
+ bch2_dump_btree_node(NULL, b);
+ bch2_bkey_to_text(&buf1, &k1);
+ bch2_bkey_to_text(&buf2, &k2);
+
+ panic("prev > insert:\n"
+ "prev key %s\n"
+ "insert key %s\n",
+ buf1.buf, buf2.buf);
+ }
+#endif
+#if 0
+ BUG_ON(next != btree_bkey_last(b, t) &&
+ bkey_iter_cmp(b, insert, next) > 0);
+#else
+ if (next != btree_bkey_last(b, t) &&
+ bkey_iter_cmp(b, insert, next) > 0) {
+ struct bkey k1 = bkey_unpack_key(b, insert);
+ struct bkey k2 = bkey_unpack_key(b, next);
+
+ bch2_dump_btree_node(NULL, b);
+ bch2_bkey_to_text(&buf1, &k1);
+ bch2_bkey_to_text(&buf2, &k2);
+
+ panic("insert > next:\n"
+ "insert key %s\n"
+ "next key %s\n",
+ buf1.buf, buf2.buf);
+ }
+#endif
+}
+
+#else
+
+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
+ struct btree *b) {}
+
+#endif
+
+/* Auxiliary search trees */
+
+#define BFLOAT_FAILED_UNPACKED U8_MAX
+#define BFLOAT_FAILED U8_MAX
+
+struct bkey_float {
+ u8 exponent;
+ u8 key_offset;
+ u16 mantissa;
+};
+#define BKEY_MANTISSA_BITS 16
+
+static unsigned bkey_float_byte_offset(unsigned idx)
+{
+ return idx * sizeof(struct bkey_float);
+}
+
+struct ro_aux_tree {
+ u8 nothing[0];
+ struct bkey_float f[];
+};
+
+struct rw_aux_tree {
+ u16 offset;
+ struct bpos k;
+};
+
+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
+{
+ BUG_ON(t->aux_data_offset == U16_MAX);
+
+ switch (bset_aux_tree_type(t)) {
+ case BSET_NO_AUX_TREE:
+ return t->aux_data_offset;
+ case BSET_RO_AUX_TREE:
+ return t->aux_data_offset +
+ DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
+ t->size * sizeof(u8), 8);
+ case BSET_RW_AUX_TREE:
+ return t->aux_data_offset +
+ DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
+ default:
+ BUG();
+ }
+}
+
+static unsigned bset_aux_tree_buf_start(const struct btree *b,
+ const struct bset_tree *t)
+{
+ return t == b->set
+ ? DIV_ROUND_UP(b->unpack_fn_len, 8)
+ : bset_aux_tree_buf_end(t - 1);
+}
+
+static void *__aux_tree_base(const struct btree *b,
+ const struct bset_tree *t)
+{
+ return b->aux_data + t->aux_data_offset * 8;
+}
+
+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b,
+ const struct bset_tree *t)
+{
+ EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
+
+ return __aux_tree_base(b, t);
+}
+
+static u8 *ro_aux_tree_prev(const struct btree *b,
+ const struct bset_tree *t)
+{
+ EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE);
+
+ return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
+}
+
+static struct bkey_float *bkey_float(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned idx)
+{
+ return ro_aux_tree_base(b, t)->f + idx;
+}
+
+static void bset_aux_tree_verify(const struct btree *b)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ const struct bset_tree *t;
+
+ for_each_bset(b, t) {
+ if (t->aux_data_offset == U16_MAX)
+ continue;
+
+ BUG_ON(t != b->set &&
+ t[-1].aux_data_offset == U16_MAX);
+
+ BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t));
+ BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b));
+ BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b));
+ }
+#endif
+}
+
+void bch2_btree_keys_init(struct btree *b)
+{
+ unsigned i;
+
+ b->nsets = 0;
+ memset(&b->nr, 0, sizeof(b->nr));
+
+ for (i = 0; i < MAX_BSETS; i++)
+ b->set[i].data_offset = U16_MAX;
+
+ bch2_bset_set_no_aux_tree(b, b->set);
+}
+
+/* Binary tree stuff for auxiliary search trees */
+
+/*
+ * Cacheline/offset <-> bkey pointer arithmetic:
+ *
+ * t->tree is a binary search tree in an array; each node corresponds to a key
+ * in one cacheline in t->set (BSET_CACHELINE bytes).
+ *
+ * This means we don't have to store the full index of the key that a node in
+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and
+ * then bkey_float->m gives us the offset within that cacheline, in units of 8
+ * bytes.
+ *
+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
+ * make this work.
+ *
+ * To construct the bfloat for an arbitrary key we need to know what the key
+ * immediately preceding it is: we have to check if the two keys differ in the
+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
+ * of the previous key so we can walk backwards to it from t->tree[j]'s key.
+ */
+
+static inline void *bset_cacheline(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned cacheline)
+{
+ return (void *) round_down((unsigned long) btree_bkey_first(b, t),
+ L1_CACHE_BYTES) +
+ cacheline * BSET_CACHELINE;
+}
+
+static struct bkey_packed *cacheline_to_bkey(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned cacheline,
+ unsigned offset)
+{
+ return bset_cacheline(b, t, cacheline) + offset * 8;
+}
+
+static unsigned bkey_to_cacheline(const struct btree *b,
+ const struct bset_tree *t,
+ const struct bkey_packed *k)
+{
+ return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE;
+}
+
+static ssize_t __bkey_to_cacheline_offset(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned cacheline,
+ const struct bkey_packed *k)
+{
+ return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline);
+}
+
+static unsigned bkey_to_cacheline_offset(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned cacheline,
+ const struct bkey_packed *k)
+{
+ size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k);
+
+ EBUG_ON(m > U8_MAX);
+ return m;
+}
+
+static inline struct bkey_packed *tree_to_bkey(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned j)
+{
+ return cacheline_to_bkey(b, t,
+ __eytzinger1_to_inorder(j, t->size - 1, t->extra),
+ bkey_float(b, t, j)->key_offset);
+}
+
+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b,
+ const struct bset_tree *t,
+ unsigned j)
+{
+ unsigned prev_u64s = ro_aux_tree_prev(b, t)[j];
+
+ return (void *) ((u64 *) tree_to_bkey(b, t, j)->_data - prev_u64s);
+}
+
+static struct rw_aux_tree *rw_aux_tree(const struct btree *b,
+ const struct bset_tree *t)
+{
+ EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
+
+ return __aux_tree_base(b, t);
+}
+
+/*
+ * For the write set - the one we're currently inserting keys into - we don't
+ * maintain a full search tree, we just keep a simple lookup table in t->prev.
+ */
+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b,
+ struct bset_tree *t,
+ unsigned j)
+{
+ return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset);
+}
+
+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t,
+ unsigned j, struct bkey_packed *k)
+{
+ EBUG_ON(k >= btree_bkey_last(b, t));
+
+ rw_aux_tree(b, t)[j] = (struct rw_aux_tree) {
+ .offset = __btree_node_key_to_offset(b, k),
+ .k = bkey_unpack_pos(b, k),
+ };
+}
+
+static void bch2_bset_verify_rw_aux_tree(struct btree *b,
+ struct bset_tree *t)
+{
+ struct bkey_packed *k = btree_bkey_first(b, t);
+ unsigned j = 0;
+
+ if (!bch2_expensive_debug_checks)
+ return;
+
+ BUG_ON(bset_has_ro_aux_tree(t));
+
+ if (!bset_has_rw_aux_tree(t))
+ return;
+
+ BUG_ON(t->size < 1);
+ BUG_ON(rw_aux_to_bkey(b, t, j) != k);
+
+ goto start;
+ while (1) {
+ if (rw_aux_to_bkey(b, t, j) == k) {
+ BUG_ON(!bpos_eq(rw_aux_tree(b, t)[j].k,
+ bkey_unpack_pos(b, k)));
+start:
+ if (++j == t->size)
+ break;
+
+ BUG_ON(rw_aux_tree(b, t)[j].offset <=
+ rw_aux_tree(b, t)[j - 1].offset);
+ }
+
+ k = bkey_p_next(k);
+ BUG_ON(k >= btree_bkey_last(b, t));
+ }
+}
+
+/* returns idx of first entry >= offset: */
+static unsigned rw_aux_tree_bsearch(struct btree *b,
+ struct bset_tree *t,
+ unsigned offset)
+{
+ unsigned bset_offs = offset - btree_bkey_first_offset(t);
+ unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t);
+ unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0;
+
+ EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE);
+ EBUG_ON(!t->size);
+ EBUG_ON(idx > t->size);
+
+ while (idx < t->size &&
+ rw_aux_tree(b, t)[idx].offset < offset)
+ idx++;
+
+ while (idx &&
+ rw_aux_tree(b, t)[idx - 1].offset >= offset)
+ idx--;
+
+ EBUG_ON(idx < t->size &&
+ rw_aux_tree(b, t)[idx].offset < offset);
+ EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset);
+ EBUG_ON(idx + 1 < t->size &&
+ rw_aux_tree(b, t)[idx].offset ==
+ rw_aux_tree(b, t)[idx + 1].offset);
+
+ return idx;
+}
+
+static inline unsigned bkey_mantissa(const struct bkey_packed *k,
+ const struct bkey_float *f,
+ unsigned idx)
+{
+ u64 v;
+
+ EBUG_ON(!bkey_packed(k));
+
+ v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3)));
+
+ /*
+ * In little endian, we're shifting off low bits (and then the bits we
+ * want are at the low end), in big endian we're shifting off high bits
+ * (and then the bits we want are at the high end, so we shift them
+ * back down):
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ v >>= f->exponent & 7;
+#else
+ v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
+#endif
+ return (u16) v;
+}
+
+static __always_inline void make_bfloat(struct btree *b, struct bset_tree *t,
+ unsigned j,
+ struct bkey_packed *min_key,
+ struct bkey_packed *max_key)
+{
+ struct bkey_float *f = bkey_float(b, t, j);
+ struct bkey_packed *m = tree_to_bkey(b, t, j);
+ struct bkey_packed *l = is_power_of_2(j)
+ ? min_key
+ : tree_to_prev_bkey(b, t, j >> ffs(j));
+ struct bkey_packed *r = is_power_of_2(j + 1)
+ ? max_key
+ : tree_to_bkey(b, t, j >> (ffz(j) + 1));
+ unsigned mantissa;
+ int shift, exponent, high_bit;
+
+ /*
+ * for failed bfloats, the lookup code falls back to comparing against
+ * the original key.
+ */
+
+ if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
+ !b->nr_key_bits) {
+ f->exponent = BFLOAT_FAILED_UNPACKED;
+ return;
+ }
+
+ /*
+ * The greatest differing bit of l and r is the first bit we must
+ * include in the bfloat mantissa we're creating in order to do
+ * comparisons - that bit always becomes the high bit of
+ * bfloat->mantissa, and thus the exponent we're calculating here is
+ * the position of what will become the low bit in bfloat->mantissa:
+ *
+ * Note that this may be negative - we may be running off the low end
+ * of the key: we handle this later:
+ */
+ high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
+ min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
+ exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
+
+ /*
+ * Then we calculate the actual shift value, from the start of the key
+ * (k->_data), to get the key bits starting at exponent:
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
+
+ EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
+#else
+ shift = high_bit_offset +
+ b->nr_key_bits -
+ exponent -
+ BKEY_MANTISSA_BITS;
+
+ EBUG_ON(shift < KEY_PACKED_BITS_START);
+#endif
+ EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED);
+
+ f->exponent = shift;
+ mantissa = bkey_mantissa(m, f, j);
+
+ /*
+ * If we've got garbage bits, set them to all 1s - it's legal for the
+ * bfloat to compare larger than the original key, but not smaller:
+ */
+ if (exponent < 0)
+ mantissa |= ~(~0U << -exponent);
+
+ f->mantissa = mantissa;
+}
+
+/* bytes remaining - only valid for last bset: */
+static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t)
+{
+ bset_aux_tree_verify(b);
+
+ return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64);
+}
+
+static unsigned bset_ro_tree_capacity(const struct btree *b, const struct bset_tree *t)
+{
+ return __bset_tree_capacity(b, t) /
+ (sizeof(struct bkey_float) + sizeof(u8));
+}
+
+static unsigned bset_rw_tree_capacity(const struct btree *b, const struct bset_tree *t)
+{
+ return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree);
+}
+
+static noinline void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
+{
+ struct bkey_packed *k;
+
+ t->size = 1;
+ t->extra = BSET_RW_AUX_TREE_VAL;
+ rw_aux_tree(b, t)[0].offset =
+ __btree_node_key_to_offset(b, btree_bkey_first(b, t));
+
+ bset_tree_for_each_key(b, t, k) {
+ if (t->size == bset_rw_tree_capacity(b, t))
+ break;
+
+ if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) >
+ L1_CACHE_BYTES)
+ rw_aux_tree_set(b, t, t->size++, k);
+ }
+}
+
+static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
+{
+ struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
+ struct bkey_i min_key, max_key;
+ unsigned j, cacheline = 1;
+
+ t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
+ bset_ro_tree_capacity(b, t));
+retry:
+ if (t->size < 2) {
+ t->size = 0;
+ t->extra = BSET_NO_AUX_TREE_VAL;
+ return;
+ }
+
+ t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
+
+ /* First we figure out where the first key in each cacheline is */
+ eytzinger1_for_each(j, t->size - 1) {
+ while (bkey_to_cacheline(b, t, k) < cacheline)
+ prev = k, k = bkey_p_next(k);
+
+ if (k >= btree_bkey_last(b, t)) {
+ /* XXX: this path sucks */
+ t->size--;
+ goto retry;
+ }
+
+ ro_aux_tree_prev(b, t)[j] = prev->u64s;
+ bkey_float(b, t, j)->key_offset =
+ bkey_to_cacheline_offset(b, t, cacheline++, k);
+
+ EBUG_ON(tree_to_prev_bkey(b, t, j) != prev);
+ EBUG_ON(tree_to_bkey(b, t, j) != k);
+ }
+
+ while (k != btree_bkey_last(b, t))
+ prev = k, k = bkey_p_next(k);
+
+ if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) {
+ bkey_init(&min_key.k);
+ min_key.k.p = b->data->min_key;
+ }
+
+ if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) {
+ bkey_init(&max_key.k);
+ max_key.k.p = b->data->max_key;
+ }
+
+ /* Then we build the tree */
+ eytzinger1_for_each(j, t->size - 1)
+ make_bfloat(b, t, j,
+ bkey_to_packed(&min_key),
+ bkey_to_packed(&max_key));
+}
+
+static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
+{
+ struct bset_tree *i;
+
+ for (i = b->set; i != t; i++)
+ BUG_ON(bset_has_rw_aux_tree(i));
+
+ bch2_bset_set_no_aux_tree(b, t);
+
+ /* round up to next cacheline: */
+ t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t),
+ SMP_CACHE_BYTES / sizeof(u64));
+
+ bset_aux_tree_verify(b);
+}
+
+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t,
+ bool writeable)
+{
+ if (writeable
+ ? bset_has_rw_aux_tree(t)
+ : bset_has_ro_aux_tree(t))
+ return;
+
+ bset_alloc_tree(b, t);
+
+ if (!__bset_tree_capacity(b, t))
+ return;
+
+ if (writeable)
+ __build_rw_aux_tree(b, t);
+ else
+ __build_ro_aux_tree(b, t);
+
+ bset_aux_tree_verify(b);
+}
+
+void bch2_bset_init_first(struct btree *b, struct bset *i)
+{
+ struct bset_tree *t;
+
+ BUG_ON(b->nsets);
+
+ memset(i, 0, sizeof(*i));
+ get_random_bytes(&i->seq, sizeof(i->seq));
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
+ t = &b->set[b->nsets++];
+ set_btree_bset(b, t, i);
+}
+
+void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
+ struct btree_node_entry *bne)
+{
+ struct bset *i = &bne->keys;
+ struct bset_tree *t;
+
+ BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
+ BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
+ BUG_ON(b->nsets >= MAX_BSETS);
+
+ memset(i, 0, sizeof(*i));
+ i->seq = btree_bset_first(b)->seq;
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
+ t = &b->set[b->nsets++];
+ set_btree_bset(b, t, i);
+}
+
+/*
+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the
+ * immediate predecessor:
+ */
+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t,
+ struct bkey_packed *k)
+{
+ struct bkey_packed *p;
+ unsigned offset;
+ int j;
+
+ EBUG_ON(k < btree_bkey_first(b, t) ||
+ k > btree_bkey_last(b, t));
+
+ if (k == btree_bkey_first(b, t))
+ return NULL;
+
+ switch (bset_aux_tree_type(t)) {
+ case BSET_NO_AUX_TREE:
+ p = btree_bkey_first(b, t);
+ break;
+ case BSET_RO_AUX_TREE:
+ j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k));
+
+ do {
+ p = j ? tree_to_bkey(b, t,
+ __inorder_to_eytzinger1(j--,
+ t->size - 1, t->extra))
+ : btree_bkey_first(b, t);
+ } while (p >= k);
+ break;
+ case BSET_RW_AUX_TREE:
+ offset = __btree_node_key_to_offset(b, k);
+ j = rw_aux_tree_bsearch(b, t, offset);
+ p = j ? rw_aux_to_bkey(b, t, j - 1)
+ : btree_bkey_first(b, t);
+ break;
+ }
+
+ return p;
+}
+
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
+ struct bset_tree *t,
+ struct bkey_packed *k,
+ unsigned min_key_type)
+{
+ struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
+
+ while ((p = __bkey_prev(b, t, k)) && !ret) {
+ for (i = p; i != k; i = bkey_p_next(i))
+ if (i->type >= min_key_type)
+ ret = i;
+
+ k = p;
+ }
+
+ if (bch2_expensive_debug_checks) {
+ BUG_ON(ret >= orig_k);
+
+ for (i = ret
+ ? bkey_p_next(ret)
+ : btree_bkey_first(b, t);
+ i != orig_k;
+ i = bkey_p_next(i))
+ BUG_ON(i->type >= min_key_type);
+ }
+
+ return ret;
+}
+
+/* Insert */
+
+static void bch2_bset_fix_lookup_table(struct btree *b,
+ struct bset_tree *t,
+ struct bkey_packed *_where,
+ unsigned clobber_u64s,
+ unsigned new_u64s)
+{
+ int shift = new_u64s - clobber_u64s;
+ unsigned l, j, where = __btree_node_key_to_offset(b, _where);
+
+ EBUG_ON(bset_has_ro_aux_tree(t));
+
+ if (!bset_has_rw_aux_tree(t))
+ return;
+
+ /* returns first entry >= where */
+ l = rw_aux_tree_bsearch(b, t, where);
+
+ if (!l) /* never delete first entry */
+ l++;
+ else if (l < t->size &&
+ where < t->end_offset &&
+ rw_aux_tree(b, t)[l].offset == where)
+ rw_aux_tree_set(b, t, l++, _where);
+
+ /* l now > where */
+
+ for (j = l;
+ j < t->size &&
+ rw_aux_tree(b, t)[j].offset < where + clobber_u64s;
+ j++)
+ ;
+
+ if (j < t->size &&
+ rw_aux_tree(b, t)[j].offset + shift ==
+ rw_aux_tree(b, t)[l - 1].offset)
+ j++;
+
+ memmove(&rw_aux_tree(b, t)[l],
+ &rw_aux_tree(b, t)[j],
+ (void *) &rw_aux_tree(b, t)[t->size] -
+ (void *) &rw_aux_tree(b, t)[j]);
+ t->size -= j - l;
+
+ for (j = l; j < t->size; j++)
+ rw_aux_tree(b, t)[j].offset += shift;
+
+ EBUG_ON(l < t->size &&
+ rw_aux_tree(b, t)[l].offset ==
+ rw_aux_tree(b, t)[l - 1].offset);
+
+ if (t->size < bset_rw_tree_capacity(b, t) &&
+ (l < t->size
+ ? rw_aux_tree(b, t)[l].offset
+ : t->end_offset) -
+ rw_aux_tree(b, t)[l - 1].offset >
+ L1_CACHE_BYTES / sizeof(u64)) {
+ struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1);
+ struct bkey_packed *end = l < t->size
+ ? rw_aux_to_bkey(b, t, l)
+ : btree_bkey_last(b, t);
+ struct bkey_packed *k = start;
+
+ while (1) {
+ k = bkey_p_next(k);
+ if (k == end)
+ break;
+
+ if ((void *) k - (void *) start >= L1_CACHE_BYTES) {
+ memmove(&rw_aux_tree(b, t)[l + 1],
+ &rw_aux_tree(b, t)[l],
+ (void *) &rw_aux_tree(b, t)[t->size] -
+ (void *) &rw_aux_tree(b, t)[l]);
+ t->size++;
+ rw_aux_tree_set(b, t, l, k);
+ break;
+ }
+ }
+ }
+
+ bch2_bset_verify_rw_aux_tree(b, t);
+ bset_aux_tree_verify(b);
+}
+
+void bch2_bset_insert(struct btree *b,
+ struct btree_node_iter *iter,
+ struct bkey_packed *where,
+ struct bkey_i *insert,
+ unsigned clobber_u64s)
+{
+ struct bkey_format *f = &b->format;
+ struct bset_tree *t = bset_tree_last(b);
+ struct bkey_packed packed, *src = bkey_to_packed(insert);
+
+ bch2_bset_verify_rw_aux_tree(b, t);
+ bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s);
+
+ if (bch2_bkey_pack_key(&packed, &insert->k, f))
+ src = &packed;
+
+ if (!bkey_deleted(&insert->k))
+ btree_keys_account_key_add(&b->nr, t - b->set, src);
+
+ if (src->u64s != clobber_u64s) {
+ u64 *src_p = (u64 *) where->_data + clobber_u64s;
+ u64 *dst_p = (u64 *) where->_data + src->u64s;
+
+ EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) <
+ (int) clobber_u64s - src->u64s);
+
+ memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
+ le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s);
+ set_btree_bset_end(b, t);
+ }
+
+ memcpy_u64s_small(where, src,
+ bkeyp_key_u64s(f, src));
+ memcpy_u64s(bkeyp_val(f, where), &insert->v,
+ bkeyp_val_u64s(f, src));
+
+ if (src->u64s != clobber_u64s)
+ bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s);
+
+ bch2_verify_btree_nr_keys(b);
+}
+
+void bch2_bset_delete(struct btree *b,
+ struct bkey_packed *where,
+ unsigned clobber_u64s)
+{
+ struct bset_tree *t = bset_tree_last(b);
+ u64 *src_p = (u64 *) where->_data + clobber_u64s;
+ u64 *dst_p = where->_data;
+
+ bch2_bset_verify_rw_aux_tree(b, t);
+
+ EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s);
+
+ memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p);
+ le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s);
+ set_btree_bset_end(b, t);
+
+ bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0);
+}
+
+/* Lookup */
+
+__flatten
+static struct bkey_packed *bset_search_write_set(const struct btree *b,
+ struct bset_tree *t,
+ struct bpos *search)
+{
+ unsigned l = 0, r = t->size;
+
+ while (l + 1 != r) {
+ unsigned m = (l + r) >> 1;
+
+ if (bpos_lt(rw_aux_tree(b, t)[m].k, *search))
+ l = m;
+ else
+ r = m;
+ }
+
+ return rw_aux_to_bkey(b, t, l);
+}
+
+static inline void prefetch_four_cachelines(void *p)
+{
+#ifdef CONFIG_X86_64
+ asm("prefetcht0 (-127 + 64 * 0)(%0);"
+ "prefetcht0 (-127 + 64 * 1)(%0);"
+ "prefetcht0 (-127 + 64 * 2)(%0);"
+ "prefetcht0 (-127 + 64 * 3)(%0);"
+ :
+ : "r" (p + 127));
+#else
+ prefetch(p + L1_CACHE_BYTES * 0);
+ prefetch(p + L1_CACHE_BYTES * 1);
+ prefetch(p + L1_CACHE_BYTES * 2);
+ prefetch(p + L1_CACHE_BYTES * 3);
+#endif
+}
+
+static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
+ const struct bkey_float *f,
+ unsigned idx)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
+
+ return f->exponent > key_bits_start;
+#else
+ unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
+
+ return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
+#endif
+}
+
+__flatten
+static struct bkey_packed *bset_search_tree(const struct btree *b,
+ const struct bset_tree *t,
+ const struct bpos *search,
+ const struct bkey_packed *packed_search)
+{
+ struct ro_aux_tree *base = ro_aux_tree_base(b, t);
+ struct bkey_float *f;
+ struct bkey_packed *k;
+ unsigned inorder, n = 1, l, r;
+ int cmp;
+
+ do {
+ if (likely(n << 4 < t->size))
+ prefetch(&base->f[n << 4]);
+
+ f = &base->f[n];
+ if (unlikely(f->exponent >= BFLOAT_FAILED))
+ goto slowpath;
+
+ l = f->mantissa;
+ r = bkey_mantissa(packed_search, f, n);
+
+ if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
+ goto slowpath;
+
+ n = n * 2 + (l < r);
+ continue;
+slowpath:
+ k = tree_to_bkey(b, t, n);
+ cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
+ if (!cmp)
+ return k;
+
+ n = n * 2 + (cmp < 0);
+ } while (n < t->size);
+
+ inorder = __eytzinger1_to_inorder(n >> 1, t->size - 1, t->extra);
+
+ /*
+ * n would have been the node we recursed to - the low bit tells us if
+ * we recursed left or recursed right.
+ */
+ if (likely(!(n & 1))) {
+ --inorder;
+ if (unlikely(!inorder))
+ return btree_bkey_first(b, t);
+
+ f = &base->f[eytzinger1_prev(n >> 1, t->size - 1)];
+ }
+
+ return cacheline_to_bkey(b, t, inorder, f->key_offset);
+}
+
+static __always_inline __flatten
+struct bkey_packed *__bch2_bset_search(struct btree *b,
+ struct bset_tree *t,
+ struct bpos *search,
+ const struct bkey_packed *lossy_packed_search)
+{
+
+ /*
+ * First, we search for a cacheline, then lastly we do a linear search
+ * within that cacheline.
+ *
+ * To search for the cacheline, there's three different possibilities:
+ * * The set is too small to have a search tree, so we just do a linear
+ * search over the whole set.
+ * * The set is the one we're currently inserting into; keeping a full
+ * auxiliary search tree up to date would be too expensive, so we
+ * use a much simpler lookup table to do a binary search -
+ * bset_search_write_set().
+ * * Or we use the auxiliary search tree we constructed earlier -
+ * bset_search_tree()
+ */
+
+ switch (bset_aux_tree_type(t)) {
+ case BSET_NO_AUX_TREE:
+ return btree_bkey_first(b, t);
+ case BSET_RW_AUX_TREE:
+ return bset_search_write_set(b, t, search);
+ case BSET_RO_AUX_TREE:
+ return bset_search_tree(b, t, search, lossy_packed_search);
+ default:
+ BUG();
+ }
+}
+
+static __always_inline __flatten
+struct bkey_packed *bch2_bset_search_linear(struct btree *b,
+ struct bset_tree *t,
+ struct bpos *search,
+ struct bkey_packed *packed_search,
+ const struct bkey_packed *lossy_packed_search,
+ struct bkey_packed *m)
+{
+ if (lossy_packed_search)
+ while (m != btree_bkey_last(b, t) &&
+ bkey_iter_cmp_p_or_unp(b, m,
+ lossy_packed_search, search) < 0)
+ m = bkey_p_next(m);
+
+ if (!packed_search)
+ while (m != btree_bkey_last(b, t) &&
+ bkey_iter_pos_cmp(b, m, search) < 0)
+ m = bkey_p_next(m);
+
+ if (bch2_expensive_debug_checks) {
+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
+
+ BUG_ON(prev &&
+ bkey_iter_cmp_p_or_unp(b, prev,
+ packed_search, search) >= 0);
+ }
+
+ return m;
+}
+
+/* Btree node iterator */
+
+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
+ struct btree *b,
+ const struct bkey_packed *k,
+ const struct bkey_packed *end)
+{
+ if (k != end) {
+ struct btree_node_iter_set *pos;
+
+ btree_node_iter_for_each(iter, pos)
+ ;
+
+ BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data));
+ *pos = (struct btree_node_iter_set) {
+ __btree_node_key_to_offset(b, k),
+ __btree_node_key_to_offset(b, end)
+ };
+ }
+}
+
+void bch2_btree_node_iter_push(struct btree_node_iter *iter,
+ struct btree *b,
+ const struct bkey_packed *k,
+ const struct bkey_packed *end)
+{
+ __bch2_btree_node_iter_push(iter, b, k, end);
+ bch2_btree_node_iter_sort(iter, b);
+}
+
+noinline __flatten __cold
+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
+ struct btree *b, struct bpos *search)
+{
+ struct bkey_packed *k;
+
+ trace_bkey_pack_pos_fail(search);
+
+ bch2_btree_node_iter_init_from_start(iter, b);
+
+ while ((k = bch2_btree_node_iter_peek(iter, b)) &&
+ bkey_iter_pos_cmp(b, k, search) < 0)
+ bch2_btree_node_iter_advance(iter, b);
+}
+
+/**
+ * bch2_btree_node_iter_init - initialize a btree node iterator, starting from a
+ * given position
+ *
+ * @iter: iterator to initialize
+ * @b: btree node to search
+ * @search: search key
+ *
+ * Main entry point to the lookup code for individual btree nodes:
+ *
+ * NOTE:
+ *
+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate
+ * keys. This doesn't matter for most code, but it does matter for lookups.
+ *
+ * Some adjacent keys with a string of equal keys:
+ * i j k k k k l m
+ *
+ * If you search for k, the lookup code isn't guaranteed to return you any
+ * specific k. The lookup code is conceptually doing a binary search and
+ * iterating backwards is very expensive so if the pivot happens to land at the
+ * last k that's what you'll get.
+ *
+ * This works out ok, but it's something to be aware of:
+ *
+ * - For non extents, we guarantee that the live key comes last - see
+ * btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
+ * see will only be deleted keys you don't care about.
+ *
+ * - For extents, deleted keys sort last (see the comment at the top of this
+ * file). But when you're searching for extents, you actually want the first
+ * key strictly greater than your search key - an extent that compares equal
+ * to the search key is going to have 0 sectors after the search key.
+ *
+ * But this does mean that we can't just search for
+ * bpos_successor(start_of_range) to get the first extent that overlaps with
+ * the range we want - if we're unlucky and there's an extent that ends
+ * exactly where we searched, then there could be a deleted key at the same
+ * position and we'd get that when we search instead of the preceding extent
+ * we needed.
+ *
+ * So we've got to search for start_of_range, then after the lookup iterate
+ * past any extents that compare equal to the position we searched for.
+ */
+__flatten
+void bch2_btree_node_iter_init(struct btree_node_iter *iter,
+ struct btree *b, struct bpos *search)
+{
+ struct bkey_packed p, *packed_search = NULL;
+ struct btree_node_iter_set *pos = iter->data;
+ struct bkey_packed *k[MAX_BSETS];
+ unsigned i;
+
+ EBUG_ON(bpos_lt(*search, b->data->min_key));
+ EBUG_ON(bpos_gt(*search, b->data->max_key));
+ bset_aux_tree_verify(b);
+
+ memset(iter, 0, sizeof(*iter));
+
+ switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) {
+ case BKEY_PACK_POS_EXACT:
+ packed_search = &p;
+ break;
+ case BKEY_PACK_POS_SMALLER:
+ packed_search = NULL;
+ break;
+ case BKEY_PACK_POS_FAIL:
+ btree_node_iter_init_pack_failed(iter, b, search);
+ return;
+ }
+
+ for (i = 0; i < b->nsets; i++) {
+ k[i] = __bch2_bset_search(b, b->set + i, search, &p);
+ prefetch_four_cachelines(k[i]);
+ }
+
+ for (i = 0; i < b->nsets; i++) {
+ struct bset_tree *t = b->set + i;
+ struct bkey_packed *end = btree_bkey_last(b, t);
+
+ k[i] = bch2_bset_search_linear(b, t, search,
+ packed_search, &p, k[i]);
+ if (k[i] != end)
+ *pos++ = (struct btree_node_iter_set) {
+ __btree_node_key_to_offset(b, k[i]),
+ __btree_node_key_to_offset(b, end)
+ };
+ }
+
+ bch2_btree_node_iter_sort(iter, b);
+}
+
+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ struct bset_tree *t;
+
+ memset(iter, 0, sizeof(*iter));
+
+ for_each_bset(b, t)
+ __bch2_btree_node_iter_push(iter, b,
+ btree_bkey_first(b, t),
+ btree_bkey_last(b, t));
+ bch2_btree_node_iter_sort(iter, b);
+}
+
+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter,
+ struct btree *b,
+ struct bset_tree *t)
+{
+ struct btree_node_iter_set *set;
+
+ btree_node_iter_for_each(iter, set)
+ if (set->end == t->end_offset)
+ return __btree_node_offset_to_key(b, set->k);
+
+ return btree_bkey_last(b, t);
+}
+
+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter,
+ struct btree *b,
+ unsigned first)
+{
+ bool ret;
+
+ if ((ret = (btree_node_iter_cmp(b,
+ iter->data[first],
+ iter->data[first + 1]) > 0)))
+ swap(iter->data[first], iter->data[first + 1]);
+ return ret;
+}
+
+void bch2_btree_node_iter_sort(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ /* unrolled bubble sort: */
+
+ if (!__btree_node_iter_set_end(iter, 2)) {
+ btree_node_iter_sort_two(iter, b, 0);
+ btree_node_iter_sort_two(iter, b, 1);
+ }
+
+ if (!__btree_node_iter_set_end(iter, 1))
+ btree_node_iter_sort_two(iter, b, 0);
+}
+
+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter,
+ struct btree_node_iter_set *set)
+{
+ struct btree_node_iter_set *last =
+ iter->data + ARRAY_SIZE(iter->data) - 1;
+
+ memmove(&set[0], &set[1], (void *) last - (void *) set);
+ *last = (struct btree_node_iter_set) { 0, 0 };
+}
+
+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s;
+
+ EBUG_ON(iter->data->k > iter->data->end);
+
+ if (unlikely(__btree_node_iter_set_end(iter, 0))) {
+ /* avoid an expensive memmove call: */
+ iter->data[0] = iter->data[1];
+ iter->data[1] = iter->data[2];
+ iter->data[2] = (struct btree_node_iter_set) { 0, 0 };
+ return;
+ }
+
+ if (__btree_node_iter_set_end(iter, 1))
+ return;
+
+ if (!btree_node_iter_sort_two(iter, b, 0))
+ return;
+
+ if (__btree_node_iter_set_end(iter, 2))
+ return;
+
+ btree_node_iter_sort_two(iter, b, 1);
+}
+
+void bch2_btree_node_iter_advance(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ if (bch2_expensive_debug_checks) {
+ bch2_btree_node_iter_verify(iter, b);
+ bch2_btree_node_iter_next_check(iter, b);
+ }
+
+ __bch2_btree_node_iter_advance(iter, b);
+}
+
+/*
+ * Expensive:
+ */
+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ struct bkey_packed *k, *prev = NULL;
+ struct btree_node_iter_set *set;
+ struct bset_tree *t;
+ unsigned end = 0;
+
+ if (bch2_expensive_debug_checks)
+ bch2_btree_node_iter_verify(iter, b);
+
+ for_each_bset(b, t) {
+ k = bch2_bkey_prev_all(b, t,
+ bch2_btree_node_iter_bset_pos(iter, b, t));
+ if (k &&
+ (!prev || bkey_iter_cmp(b, k, prev) > 0)) {
+ prev = k;
+ end = t->end_offset;
+ }
+ }
+
+ if (!prev)
+ return NULL;
+
+ /*
+ * We're manually memmoving instead of just calling sort() to ensure the
+ * prev we picked ends up in slot 0 - sort won't necessarily put it
+ * there because of duplicate deleted keys:
+ */
+ btree_node_iter_for_each(iter, set)
+ if (set->end == end)
+ goto found;
+
+ BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]);
+found:
+ BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data));
+
+ memmove(&iter->data[1],
+ &iter->data[0],
+ (void *) set - (void *) &iter->data[0]);
+
+ iter->data[0].k = __btree_node_key_to_offset(b, prev);
+ iter->data[0].end = end;
+
+ if (bch2_expensive_debug_checks)
+ bch2_btree_node_iter_verify(iter, b);
+ return prev;
+}
+
+struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ struct bkey_packed *prev;
+
+ do {
+ prev = bch2_btree_node_iter_prev_all(iter, b);
+ } while (prev && bkey_deleted(prev));
+
+ return prev;
+}
+
+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter,
+ struct btree *b,
+ struct bkey *u)
+{
+ struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b);
+
+ return k ? bkey_disassemble(b, k, u) : bkey_s_c_null;
+}
+
+/* Mergesort */
+
+void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats)
+{
+ const struct bset_tree *t;
+
+ for_each_bset(b, t) {
+ enum bset_aux_tree_type type = bset_aux_tree_type(t);
+ size_t j;
+
+ stats->sets[type].nr++;
+ stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) *
+ sizeof(u64);
+
+ if (bset_has_ro_aux_tree(t)) {
+ stats->floats += t->size - 1;
+
+ for (j = 1; j < t->size; j++)
+ stats->failed +=
+ bkey_float(b, t, j)->exponent ==
+ BFLOAT_FAILED;
+ }
+ }
+}
+
+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
+ struct bkey_packed *k)
+{
+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
+ struct bkey uk;
+ unsigned j, inorder;
+
+ if (!bset_has_ro_aux_tree(t))
+ return;
+
+ inorder = bkey_to_cacheline(b, t, k);
+ if (!inorder || inorder >= t->size)
+ return;
+
+ j = __inorder_to_eytzinger1(inorder, t->size - 1, t->extra);
+ if (k != tree_to_bkey(b, t, j))
+ return;
+
+ switch (bkey_float(b, t, j)->exponent) {
+ case BFLOAT_FAILED:
+ uk = bkey_unpack_key(b, k);
+ prt_printf(out,
+ " failed unpacked at depth %u\n"
+ "\t",
+ ilog2(j));
+ bch2_bpos_to_text(out, uk.p);
+ prt_printf(out, "\n");
+ break;
+ }
+}
diff --git a/c_src/libbcachefs/bset.h b/c_src/libbcachefs/bset.h
new file mode 100644
index 00000000..632c2b8c
--- /dev/null
+++ b/c_src/libbcachefs/bset.h
@@ -0,0 +1,541 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BSET_H
+#define _BCACHEFS_BSET_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "bcachefs.h"
+#include "bkey.h"
+#include "bkey_methods.h"
+#include "btree_types.h"
+#include "util.h" /* for time_stats */
+#include "vstructs.h"
+
+/*
+ * BKEYS:
+ *
+ * A bkey contains a key, a size field, a variable number of pointers, and some
+ * ancillary flag bits.
+ *
+ * We use two different functions for validating bkeys, bkey_invalid and
+ * bkey_deleted().
+ *
+ * The one exception to the rule that ptr_invalid() filters out invalid keys is
+ * that it also filters out keys of size 0 - these are keys that have been
+ * completely overwritten. It'd be safe to delete these in memory while leaving
+ * them on disk, just unnecessary work - so we filter them out when resorting
+ * instead.
+ *
+ * We can't filter out stale keys when we're resorting, because garbage
+ * collection needs to find them to ensure bucket gens don't wrap around -
+ * unless we're rewriting the btree node those stale keys still exist on disk.
+ *
+ * We also implement functions here for removing some number of sectors from the
+ * front or the back of a bkey - this is mainly used for fixing overlapping
+ * extents, by removing the overlapping sectors from the older key.
+ *
+ * BSETS:
+ *
+ * A bset is an array of bkeys laid out contiguously in memory in sorted order,
+ * along with a header. A btree node is made up of a number of these, written at
+ * different times.
+ *
+ * There could be many of them on disk, but we never allow there to be more than
+ * 4 in memory - we lazily resort as needed.
+ *
+ * We implement code here for creating and maintaining auxiliary search trees
+ * (described below) for searching an individial bset, and on top of that we
+ * implement a btree iterator.
+ *
+ * BTREE ITERATOR:
+ *
+ * Most of the code in bcache doesn't care about an individual bset - it needs
+ * to search entire btree nodes and iterate over them in sorted order.
+ *
+ * The btree iterator code serves both functions; it iterates through the keys
+ * in a btree node in sorted order, starting from either keys after a specific
+ * point (if you pass it a search key) or the start of the btree node.
+ *
+ * AUXILIARY SEARCH TREES:
+ *
+ * Since keys are variable length, we can't use a binary search on a bset - we
+ * wouldn't be able to find the start of the next key. But binary searches are
+ * slow anyways, due to terrible cache behaviour; bcache originally used binary
+ * searches and that code topped out at under 50k lookups/second.
+ *
+ * So we need to construct some sort of lookup table. Since we only insert keys
+ * into the last (unwritten) set, most of the keys within a given btree node are
+ * usually in sets that are mostly constant. We use two different types of
+ * lookup tables to take advantage of this.
+ *
+ * Both lookup tables share in common that they don't index every key in the
+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search
+ * is used for the rest.
+ *
+ * For sets that have been written to disk and are no longer being inserted
+ * into, we construct a binary search tree in an array - traversing a binary
+ * search tree in an array gives excellent locality of reference and is very
+ * fast, since both children of any node are adjacent to each other in memory
+ * (and their grandchildren, and great grandchildren...) - this means
+ * prefetching can be used to great effect.
+ *
+ * It's quite useful performance wise to keep these nodes small - not just
+ * because they're more likely to be in L2, but also because we can prefetch
+ * more nodes on a single cacheline and thus prefetch more iterations in advance
+ * when traversing this tree.
+ *
+ * Nodes in the auxiliary search tree must contain both a key to compare against
+ * (we don't want to fetch the key from the set, that would defeat the purpose),
+ * and a pointer to the key. We use a few tricks to compress both of these.
+ *
+ * To compress the pointer, we take advantage of the fact that one node in the
+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have
+ * a function (to_inorder()) that takes the index of a node in a binary tree and
+ * returns what its index would be in an inorder traversal, so we only have to
+ * store the low bits of the offset.
+ *
+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To
+ * compress that, we take advantage of the fact that when we're traversing the
+ * search tree at every iteration we know that both our search key and the key
+ * we're looking for lie within some range - bounded by our previous
+ * comparisons. (We special case the start of a search so that this is true even
+ * at the root of the tree).
+ *
+ * So we know the key we're looking for is between a and b, and a and b don't
+ * differ higher than bit 50, we don't need to check anything higher than bit
+ * 50.
+ *
+ * We don't usually need the rest of the bits, either; we only need enough bits
+ * to partition the key range we're currently checking. Consider key n - the
+ * key our auxiliary search tree node corresponds to, and key p, the key
+ * immediately preceding n. The lowest bit we need to store in the auxiliary
+ * search tree is the highest bit that differs between n and p.
+ *
+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the
+ * comparison. But we'd really like our nodes in the auxiliary search tree to be
+ * of fixed size.
+ *
+ * The solution is to make them fixed size, and when we're constructing a node
+ * check if p and n differed in the bits we needed them to. If they don't we
+ * flag that node, and when doing lookups we fallback to comparing against the
+ * real key. As long as this doesn't happen to often (and it seems to reliably
+ * happen a bit less than 1% of the time), we win - even on failures, that key
+ * is then more likely to be in cache than if we were doing binary searches all
+ * the way, since we're touching so much less memory.
+ *
+ * The keys in the auxiliary search tree are stored in (software) floating
+ * point, with an exponent and a mantissa. The exponent needs to be big enough
+ * to address all the bits in the original key, but the number of bits in the
+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures.
+ *
+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys
+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes.
+ * We need one node per 128 bytes in the btree node, which means the auxiliary
+ * search trees take up 3% as much memory as the btree itself.
+ *
+ * Constructing these auxiliary search trees is moderately expensive, and we
+ * don't want to be constantly rebuilding the search tree for the last set
+ * whenever we insert another key into it. For the unwritten set, we use a much
+ * simpler lookup table - it's just a flat array, so index i in the lookup table
+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing
+ * within each byte range works the same as with the auxiliary search trees.
+ *
+ * These are much easier to keep up to date when we insert a key - we do it
+ * somewhat lazily; when we shift a key up we usually just increment the pointer
+ * to it, only when it would overflow do we go to the trouble of finding the
+ * first key in that range of bytes again.
+ */
+
+enum bset_aux_tree_type {
+ BSET_NO_AUX_TREE,
+ BSET_RO_AUX_TREE,
+ BSET_RW_AUX_TREE,
+};
+
+#define BSET_TREE_NR_TYPES 3
+
+#define BSET_NO_AUX_TREE_VAL (U16_MAX)
+#define BSET_RW_AUX_TREE_VAL (U16_MAX - 1)
+
+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t)
+{
+ switch (t->extra) {
+ case BSET_NO_AUX_TREE_VAL:
+ EBUG_ON(t->size);
+ return BSET_NO_AUX_TREE;
+ case BSET_RW_AUX_TREE_VAL:
+ EBUG_ON(!t->size);
+ return BSET_RW_AUX_TREE;
+ default:
+ EBUG_ON(!t->size);
+ return BSET_RO_AUX_TREE;
+ }
+}
+
+/*
+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
+ * it used to be 64, but I realized the lookup code would touch slightly less
+ * memory if it was 128.
+ *
+ * It definites the number of bytes (in struct bset) per struct bkey_float in
+ * the auxiliar search tree - when we're done searching the bset_float tree we
+ * have this many bytes left that we do a linear search over.
+ *
+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
+ * we're touching one fewer cacheline in the bset tree in exchange for one more
+ * cacheline in the linear search - but the linear search might stop before it
+ * gets to the second cacheline.
+ */
+
+#define BSET_CACHELINE 256
+
+static inline size_t btree_keys_cachelines(const struct btree *b)
+{
+ return (1U << b->byte_order) / BSET_CACHELINE;
+}
+
+static inline size_t btree_aux_data_bytes(const struct btree *b)
+{
+ return btree_keys_cachelines(b) * 8;
+}
+
+static inline size_t btree_aux_data_u64s(const struct btree *b)
+{
+ return btree_aux_data_bytes(b) / sizeof(u64);
+}
+
+#define for_each_bset(_b, _t) \
+ for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
+
+#define bset_tree_for_each_key(_b, _t, _k) \
+ for (_k = btree_bkey_first(_b, _t); \
+ _k != btree_bkey_last(_b, _t); \
+ _k = bkey_p_next(_k))
+
+static inline bool bset_has_ro_aux_tree(const struct bset_tree *t)
+{
+ return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;
+}
+
+static inline bool bset_has_rw_aux_tree(struct bset_tree *t)
+{
+ return bset_aux_tree_type(t) == BSET_RW_AUX_TREE;
+}
+
+static inline void bch2_bset_set_no_aux_tree(struct btree *b,
+ struct bset_tree *t)
+{
+ BUG_ON(t < b->set);
+
+ for (; t < b->set + ARRAY_SIZE(b->set); t++) {
+ t->size = 0;
+ t->extra = BSET_NO_AUX_TREE_VAL;
+ t->aux_data_offset = U16_MAX;
+ }
+}
+
+static inline void btree_node_set_format(struct btree *b,
+ struct bkey_format f)
+{
+ int len;
+
+ b->format = f;
+ b->nr_key_bits = bkey_format_key_bits(&f);
+
+ len = bch2_compile_bkey_format(&b->format, b->aux_data);
+ BUG_ON(len < 0 || len > U8_MAX);
+
+ b->unpack_fn_len = len;
+
+ bch2_bset_set_no_aux_tree(b, b->set);
+}
+
+static inline struct bset *bset_next_set(struct btree *b,
+ unsigned block_bytes)
+{
+ struct bset *i = btree_bset_last(b);
+
+ EBUG_ON(!is_power_of_2(block_bytes));
+
+ return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
+}
+
+void bch2_btree_keys_init(struct btree *);
+
+void bch2_bset_init_first(struct btree *, struct bset *);
+void bch2_bset_init_next(struct bch_fs *, struct btree *,
+ struct btree_node_entry *);
+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
+
+void bch2_bset_insert(struct btree *, struct btree_node_iter *,
+ struct bkey_packed *, struct bkey_i *, unsigned);
+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned);
+
+/* Bkey utility code */
+
+/* packed or unpacked */
+static inline int bkey_cmp_p_or_unp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r_packed,
+ const struct bpos *r)
+{
+ EBUG_ON(r_packed && !bkey_packed(r_packed));
+
+ if (unlikely(!bkey_packed(l)))
+ return bpos_cmp(packed_to_bkey_c(l)->p, *r);
+
+ if (likely(r_packed))
+ return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b);
+
+ return __bch2_bkey_cmp_left_packed_format_checked(b, l, r);
+}
+
+static inline struct bset_tree *
+bch2_bkey_to_bset_inlined(struct btree *b, struct bkey_packed *k)
+{
+ unsigned offset = __btree_node_key_to_offset(b, k);
+ struct bset_tree *t;
+
+ for_each_bset(b, t)
+ if (offset <= t->end_offset) {
+ EBUG_ON(offset < btree_bkey_first_offset(t));
+ return t;
+ }
+
+ BUG();
+}
+
+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *);
+
+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *,
+ struct bkey_packed *, unsigned);
+
+static inline struct bkey_packed *
+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
+{
+ return bch2_bkey_prev_filter(b, t, k, 0);
+}
+
+static inline struct bkey_packed *
+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k)
+{
+ return bch2_bkey_prev_filter(b, t, k, 1);
+}
+
+/* Btree key iteration */
+
+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *,
+ const struct bkey_packed *,
+ const struct bkey_packed *);
+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *,
+ struct bpos *);
+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *,
+ struct btree *);
+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *,
+ struct btree *,
+ struct bset_tree *);
+
+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *);
+void bch2_btree_node_iter_set_drop(struct btree_node_iter *,
+ struct btree_node_iter_set *);
+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *);
+
+#define btree_node_iter_for_each(_iter, _set) \
+ for (_set = (_iter)->data; \
+ _set < (_iter)->data + ARRAY_SIZE((_iter)->data) && \
+ (_set)->k != (_set)->end; \
+ _set++)
+
+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter,
+ unsigned i)
+{
+ return iter->data[i].k == iter->data[i].end;
+}
+
+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter)
+{
+ return __btree_node_iter_set_end(iter, 0);
+}
+
+/*
+ * When keys compare equal, deleted keys compare first:
+ *
+ * XXX: only need to compare pointers for keys that are both within a
+ * btree_node_iterator - we need to break ties for prev() to work correctly
+ */
+static inline int bkey_iter_cmp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
+{
+ return bch2_bkey_cmp_packed(b, l, r)
+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l)
+ ?: cmp_int(l, r);
+}
+
+static inline int btree_node_iter_cmp(const struct btree *b,
+ struct btree_node_iter_set l,
+ struct btree_node_iter_set r)
+{
+ return bkey_iter_cmp(b,
+ __btree_node_offset_to_key(b, l.k),
+ __btree_node_offset_to_key(b, r.k));
+}
+
+/* These assume r (the search key) is not a deleted key: */
+static inline int bkey_iter_pos_cmp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bpos *r)
+{
+ return bkey_cmp_left_packed(b, l, r)
+ ?: -((int) bkey_deleted(l));
+}
+
+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r_packed,
+ const struct bpos *r)
+{
+ return bkey_cmp_p_or_unp(b, l, r_packed, r)
+ ?: -((int) bkey_deleted(l));
+}
+
+static inline struct bkey_packed *
+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter,
+ struct btree *b)
+{
+ return __btree_node_offset_to_key(b, iter->data->k);
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, struct btree *b)
+{
+ return !bch2_btree_node_iter_end(iter)
+ ? __btree_node_offset_to_key(b, iter->data->k)
+ : NULL;
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b)
+{
+ struct bkey_packed *k;
+
+ while ((k = bch2_btree_node_iter_peek_all(iter, b)) &&
+ bkey_deleted(k))
+ bch2_btree_node_iter_advance(iter, b);
+
+ return k;
+}
+
+static inline struct bkey_packed *
+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b)
+{
+ struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b);
+
+ if (ret)
+ bch2_btree_node_iter_advance(iter, b);
+
+ return ret;
+}
+
+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *,
+ struct btree *);
+struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *,
+ struct btree *);
+
+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
+ struct btree *,
+ struct bkey *);
+
+#define for_each_btree_node_key(b, k, iter) \
+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
+ (k = bch2_btree_node_iter_peek((iter), (b))); \
+ bch2_btree_node_iter_advance(iter, b))
+
+#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \
+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \
+ (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\
+ bch2_btree_node_iter_advance(iter, b))
+
+/* Accounting: */
+
+static inline void btree_keys_account_key(struct btree_nr_keys *n,
+ unsigned bset,
+ struct bkey_packed *k,
+ int sign)
+{
+ n->live_u64s += k->u64s * sign;
+ n->bset_u64s[bset] += k->u64s * sign;
+
+ if (bkey_packed(k))
+ n->packed_keys += sign;
+ else
+ n->unpacked_keys += sign;
+}
+
+static inline void btree_keys_account_val_delta(struct btree *b,
+ struct bkey_packed *k,
+ int delta)
+{
+ struct bset_tree *t = bch2_bkey_to_bset(b, k);
+
+ b->nr.live_u64s += delta;
+ b->nr.bset_u64s[t - b->set] += delta;
+}
+
+#define btree_keys_account_key_add(_nr, _bset_idx, _k) \
+ btree_keys_account_key(_nr, _bset_idx, _k, 1)
+#define btree_keys_account_key_drop(_nr, _bset_idx, _k) \
+ btree_keys_account_key(_nr, _bset_idx, _k, -1)
+
+#define btree_account_key_add(_b, _k) \
+ btree_keys_account_key(&(_b)->nr, \
+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1)
+#define btree_account_key_drop(_b, _k) \
+ btree_keys_account_key(&(_b)->nr, \
+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1)
+
+struct bset_stats {
+ struct {
+ size_t nr, bytes;
+ } sets[BSET_TREE_NR_TYPES];
+
+ size_t floats;
+ size_t failed;
+};
+
+void bch2_btree_keys_stats(const struct btree *, struct bset_stats *);
+void bch2_bfloat_to_text(struct printbuf *, struct btree *,
+ struct bkey_packed *);
+
+/* Debug stuff */
+
+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned);
+void bch2_dump_btree_node(struct bch_fs *, struct btree *);
+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *);
+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
+ struct bkey_packed *, unsigned);
+
+#else
+
+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {}
+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter,
+ struct btree *b) {}
+static inline void bch2_verify_insert_pos(struct btree *b,
+ struct bkey_packed *where,
+ struct bkey_packed *insert,
+ unsigned clobber_u64s) {}
+#endif
+
+static inline void bch2_verify_btree_nr_keys(struct btree *b)
+{
+ if (bch2_debug_check_btree_accounting)
+ __bch2_verify_btree_nr_keys(b);
+}
+
+#endif /* _BCACHEFS_BSET_H */
diff --git a/c_src/libbcachefs/btree_cache.c b/c_src/libbcachefs/btree_cache.c
new file mode 100644
index 00000000..8e2488a4
--- /dev/null
+++ b/c_src/libbcachefs/btree_cache.c
@@ -0,0 +1,1211 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "debug.h"
+#include "errcode.h"
+#include "error.h"
+#include "journal.h"
+#include "trace.h"
+
+#include <linux/prefetch.h>
+#include <linux/sched/mm.h>
+
+const char * const bch2_btree_node_flags[] = {
+#define x(f) #f,
+ BTREE_FLAGS()
+#undef x
+ NULL
+};
+
+void bch2_recalc_btree_reserve(struct bch_fs *c)
+{
+ unsigned i, reserve = 16;
+
+ if (!c->btree_roots_known[0].b)
+ reserve += 8;
+
+ for (i = 0; i < btree_id_nr_alive(c); i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (r->b)
+ reserve += min_t(unsigned, 1, r->b->c.level) * 8;
+ }
+
+ c->btree_cache.reserve = reserve;
+}
+
+static inline unsigned btree_cache_can_free(struct btree_cache *bc)
+{
+ return max_t(int, 0, bc->used - bc->reserve);
+}
+
+static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
+{
+ if (b->c.lock.readers)
+ list_move(&b->list, &bc->freed_pcpu);
+ else
+ list_move(&b->list, &bc->freed_nonpcpu);
+}
+
+static void btree_node_data_free(struct bch_fs *c, struct btree *b)
+{
+ struct btree_cache *bc = &c->btree_cache;
+
+ EBUG_ON(btree_node_write_in_flight(b));
+
+ clear_btree_node_just_written(b);
+
+ kvpfree(b->data, btree_bytes(c));
+ b->data = NULL;
+#ifdef __KERNEL__
+ kvfree(b->aux_data);
+#else
+ munmap(b->aux_data, btree_aux_data_bytes(b));
+#endif
+ b->aux_data = NULL;
+
+ bc->used--;
+
+ btree_node_to_freedlist(bc, b);
+}
+
+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct btree *b = obj;
+ const u64 *v = arg->key;
+
+ return b->hash_val == *v ? 0 : 1;
+}
+
+static const struct rhashtable_params bch_btree_cache_params = {
+ .head_offset = offsetof(struct btree, hash),
+ .key_offset = offsetof(struct btree, hash_val),
+ .key_len = sizeof(u64),
+ .obj_cmpfn = bch2_btree_cache_cmp_fn,
+};
+
+static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
+{
+ BUG_ON(b->data || b->aux_data);
+
+ b->data = kvpmalloc(btree_bytes(c), gfp);
+ if (!b->data)
+ return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
+#ifdef __KERNEL__
+ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
+#else
+ b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (b->aux_data == MAP_FAILED)
+ b->aux_data = NULL;
+#endif
+ if (!b->aux_data) {
+ kvpfree(b->data, btree_bytes(c));
+ b->data = NULL;
+ return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
+ }
+
+ return 0;
+}
+
+static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
+{
+ struct btree *b;
+
+ b = kzalloc(sizeof(struct btree), gfp);
+ if (!b)
+ return NULL;
+
+ bkey_btree_ptr_init(&b->key);
+ INIT_LIST_HEAD(&b->list);
+ INIT_LIST_HEAD(&b->write_blocked);
+ b->byte_order = ilog2(btree_bytes(c));
+ return b;
+}
+
+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+
+ b = __btree_node_mem_alloc(c, GFP_KERNEL);
+ if (!b)
+ return NULL;
+
+ if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
+ kfree(b);
+ return NULL;
+ }
+
+ bch2_btree_lock_init(&b->c, 0);
+
+ bc->used++;
+ list_add(&b->list, &bc->freeable);
+ return b;
+}
+
+/* Btree in memory cache - hash table */
+
+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
+{
+ int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
+
+ BUG_ON(ret);
+
+ /* Cause future lookups for this node to fail: */
+ b->hash_val = 0;
+}
+
+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
+{
+ BUG_ON(b->hash_val);
+ b->hash_val = btree_ptr_hash_val(&b->key);
+
+ return rhashtable_lookup_insert_fast(&bc->table, &b->hash,
+ bch_btree_cache_params);
+}
+
+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
+ unsigned level, enum btree_id id)
+{
+ int ret;
+
+ b->c.level = level;
+ b->c.btree_id = id;
+
+ mutex_lock(&bc->lock);
+ ret = __bch2_btree_node_hash_insert(bc, b);
+ if (!ret)
+ list_add_tail(&b->list, &bc->live);
+ mutex_unlock(&bc->lock);
+
+ return ret;
+}
+
+__flatten
+static inline struct btree *btree_cache_find(struct btree_cache *bc,
+ const struct bkey_i *k)
+{
+ u64 v = btree_ptr_hash_val(k);
+
+ return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
+}
+
+/*
+ * this version is for btree nodes that have already been freed (we're not
+ * reaping a real btree node)
+ */
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ int ret = 0;
+
+ lockdep_assert_held(&bc->lock);
+wait_on_io:
+ if (b->flags & ((1U << BTREE_NODE_dirty)|
+ (1U << BTREE_NODE_read_in_flight)|
+ (1U << BTREE_NODE_write_in_flight))) {
+ if (!flush)
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+
+ /* XXX: waiting on IO with btree cache lock held */
+ bch2_btree_node_wait_on_read(b);
+ bch2_btree_node_wait_on_write(b);
+ }
+
+ if (!six_trylock_intent(&b->c.lock))
+ return -BCH_ERR_ENOMEM_btree_node_reclaim;
+
+ if (!six_trylock_write(&b->c.lock))
+ goto out_unlock_intent;
+
+ /* recheck under lock */
+ if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
+ (1U << BTREE_NODE_write_in_flight))) {
+ if (!flush)
+ goto out_unlock;
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ goto wait_on_io;
+ }
+
+ if (btree_node_noevict(b) ||
+ btree_node_write_blocked(b) ||
+ btree_node_will_make_reachable(b))
+ goto out_unlock;
+
+ if (btree_node_dirty(b)) {
+ if (!flush)
+ goto out_unlock;
+ /*
+ * Using the underscore version because we don't want to compact
+ * bsets after the write, since this node is about to be evicted
+ * - unless btree verify mode is enabled, since it runs out of
+ * the post write cleanup:
+ */
+ if (bch2_verify_btree_ondisk)
+ bch2_btree_node_write(c, b, SIX_LOCK_intent,
+ BTREE_WRITE_cache_reclaim);
+ else
+ __bch2_btree_node_write(c, b,
+ BTREE_WRITE_cache_reclaim);
+
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ goto wait_on_io;
+ }
+out:
+ if (b->hash_val && !ret)
+ trace_and_count(c, btree_cache_reap, c, b);
+ return ret;
+out_unlock:
+ six_unlock_write(&b->c.lock);
+out_unlock_intent:
+ six_unlock_intent(&b->c.lock);
+ ret = -BCH_ERR_ENOMEM_btree_node_reclaim;
+ goto out;
+}
+
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
+{
+ return __btree_node_reclaim(c, b, false);
+}
+
+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
+{
+ return __btree_node_reclaim(c, b, true);
+}
+
+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct bch_fs *c = shrink->private_data;
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b, *t;
+ unsigned long nr = sc->nr_to_scan;
+ unsigned long can_free = 0;
+ unsigned long freed = 0;
+ unsigned long touched = 0;
+ unsigned i, flags;
+ unsigned long ret = SHRINK_STOP;
+ bool trigger_writes = atomic_read(&bc->dirty) + nr >=
+ bc->used * 3 / 4;
+
+ if (bch2_btree_shrinker_disabled)
+ return SHRINK_STOP;
+
+ mutex_lock(&bc->lock);
+ flags = memalloc_nofs_save();
+
+ /*
+ * It's _really_ critical that we don't free too many btree nodes - we
+ * have to always leave ourselves a reserve. The reserve is how we
+ * guarantee that allocating memory for a new btree node can always
+ * succeed, so that inserting keys into the btree can always succeed and
+ * IO can always make forward progress:
+ */
+ can_free = btree_cache_can_free(bc);
+ nr = min_t(unsigned long, nr, can_free);
+
+ i = 0;
+ list_for_each_entry_safe(b, t, &bc->freeable, list) {
+ /*
+ * Leave a few nodes on the freeable list, so that a btree split
+ * won't have to hit the system allocator:
+ */
+ if (++i <= 3)
+ continue;
+
+ touched++;
+
+ if (touched >= nr)
+ goto out;
+
+ if (!btree_node_reclaim(c, b)) {
+ btree_node_data_free(c, b);
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ freed++;
+ }
+ }
+restart:
+ list_for_each_entry_safe(b, t, &bc->live, list) {
+ touched++;
+
+ if (btree_node_accessed(b)) {
+ clear_btree_node_accessed(b);
+ } else if (!btree_node_reclaim(c, b)) {
+ freed++;
+ btree_node_data_free(c, b);
+
+ bch2_btree_node_hash_remove(bc, b);
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+
+ if (freed == nr)
+ goto out_rotate;
+ } else if (trigger_writes &&
+ btree_node_dirty(b) &&
+ !btree_node_will_make_reachable(b) &&
+ !btree_node_write_blocked(b) &&
+ six_trylock_read(&b->c.lock)) {
+ list_move(&bc->live, &b->list);
+ mutex_unlock(&bc->lock);
+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
+ six_unlock_read(&b->c.lock);
+ if (touched >= nr)
+ goto out_nounlock;
+ mutex_lock(&bc->lock);
+ goto restart;
+ }
+
+ if (touched >= nr)
+ break;
+ }
+out_rotate:
+ if (&t->list != &bc->live)
+ list_move_tail(&bc->live, &t->list);
+out:
+ mutex_unlock(&bc->lock);
+out_nounlock:
+ ret = freed;
+ memalloc_nofs_restore(flags);
+ trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret);
+ return ret;
+}
+
+static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct bch_fs *c = shrink->private_data;
+ struct btree_cache *bc = &c->btree_cache;
+
+ if (bch2_btree_shrinker_disabled)
+ return 0;
+
+ return btree_cache_can_free(bc);
+}
+
+void bch2_fs_btree_cache_exit(struct bch_fs *c)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+ unsigned i, flags;
+
+ shrinker_free(bc->shrink);
+
+ /* vfree() can allocate memory: */
+ flags = memalloc_nofs_save();
+ mutex_lock(&bc->lock);
+
+ if (c->verify_data)
+ list_move(&c->verify_data->list, &bc->live);
+
+ kvpfree(c->verify_ondisk, btree_bytes(c));
+
+ for (i = 0; i < btree_id_nr_alive(c); i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (r->b)
+ list_add(&r->b->list, &bc->live);
+ }
+
+ list_splice(&bc->freeable, &bc->live);
+
+ while (!list_empty(&bc->live)) {
+ b = list_first_entry(&bc->live, struct btree, list);
+
+ BUG_ON(btree_node_read_in_flight(b) ||
+ btree_node_write_in_flight(b));
+
+ btree_node_data_free(c, b);
+ }
+
+ BUG_ON(!bch2_journal_error(&c->journal) &&
+ atomic_read(&c->btree_cache.dirty));
+
+ list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
+
+ while (!list_empty(&bc->freed_nonpcpu)) {
+ b = list_first_entry(&bc->freed_nonpcpu, struct btree, list);
+ list_del(&b->list);
+ six_lock_exit(&b->c.lock);
+ kfree(b);
+ }
+
+ mutex_unlock(&bc->lock);
+ memalloc_nofs_restore(flags);
+
+ if (bc->table_init_done)
+ rhashtable_destroy(&bc->table);
+}
+
+int bch2_fs_btree_cache_init(struct bch_fs *c)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ struct shrinker *shrink;
+ unsigned i;
+ int ret = 0;
+
+ ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
+ if (ret)
+ goto err;
+
+ bc->table_init_done = true;
+
+ bch2_recalc_btree_reserve(c);
+
+ for (i = 0; i < bc->reserve; i++)
+ if (!__bch2_btree_node_mem_alloc(c))
+ goto err;
+
+ list_splice_init(&bc->live, &bc->freeable);
+
+ mutex_init(&c->verify_lock);
+
+ shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
+ if (!shrink)
+ goto err;
+ bc->shrink = shrink;
+ shrink->count_objects = bch2_btree_cache_count;
+ shrink->scan_objects = bch2_btree_cache_scan;
+ shrink->seeks = 4;
+ shrink->private_data = c;
+ shrinker_register(shrink);
+
+ return 0;
+err:
+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+}
+
+void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
+{
+ mutex_init(&bc->lock);
+ INIT_LIST_HEAD(&bc->live);
+ INIT_LIST_HEAD(&bc->freeable);
+ INIT_LIST_HEAD(&bc->freed_pcpu);
+ INIT_LIST_HEAD(&bc->freed_nonpcpu);
+}
+
+/*
+ * We can only have one thread cannibalizing other cached btree nodes at a time,
+ * or we'll deadlock. We use an open coded mutex to ensure that, which a
+ * cannibalize_bucket() will take. This means every time we unlock the root of
+ * the btree, we need to release this lock if we have it held.
+ */
+void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+
+ if (bc->alloc_lock == current) {
+ trace_and_count(c, btree_cache_cannibalize_unlock, trans);
+ bc->alloc_lock = NULL;
+ closure_wake_up(&bc->alloc_wait);
+ }
+}
+
+int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct task_struct *old;
+
+ old = cmpxchg(&bc->alloc_lock, NULL, current);
+ if (old == NULL || old == current)
+ goto success;
+
+ if (!cl) {
+ trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
+ return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
+ }
+
+ closure_wait(&bc->alloc_wait, cl);
+
+ /* Try again, after adding ourselves to waitlist */
+ old = cmpxchg(&bc->alloc_lock, NULL, current);
+ if (old == NULL || old == current) {
+ /* We raced */
+ closure_wake_up(&bc->alloc_wait);
+ goto success;
+ }
+
+ trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
+ return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
+
+success:
+ trace_and_count(c, btree_cache_cannibalize_lock, trans);
+ return 0;
+}
+
+static struct btree *btree_node_cannibalize(struct bch_fs *c)
+{
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+
+ list_for_each_entry_reverse(b, &bc->live, list)
+ if (!btree_node_reclaim(c, b))
+ return b;
+
+ while (1) {
+ list_for_each_entry_reverse(b, &bc->live, list)
+ if (!btree_node_write_and_reclaim(c, b))
+ return b;
+
+ /*
+ * Rare case: all nodes were intent-locked.
+ * Just busy-wait.
+ */
+ WARN_ONCE(1, "btree cache cannibalize failed\n");
+ cond_resched();
+ }
+}
+
+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct list_head *freed = pcpu_read_locks
+ ? &bc->freed_pcpu
+ : &bc->freed_nonpcpu;
+ struct btree *b, *b2;
+ u64 start_time = local_clock();
+ unsigned flags;
+
+ flags = memalloc_nofs_save();
+ mutex_lock(&bc->lock);
+
+ /*
+ * We never free struct btree itself, just the memory that holds the on
+ * disk node. Check the freed list before allocating a new one:
+ */
+ list_for_each_entry(b, freed, list)
+ if (!btree_node_reclaim(c, b)) {
+ list_del_init(&b->list);
+ goto got_node;
+ }
+
+ b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
+ if (!b) {
+ mutex_unlock(&bc->lock);
+ bch2_trans_unlock(trans);
+ b = __btree_node_mem_alloc(c, GFP_KERNEL);
+ if (!b)
+ goto err;
+ mutex_lock(&bc->lock);
+ }
+
+ bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
+
+ BUG_ON(!six_trylock_intent(&b->c.lock));
+ BUG_ON(!six_trylock_write(&b->c.lock));
+got_node:
+
+ /*
+ * btree_free() doesn't free memory; it sticks the node on the end of
+ * the list. Check if there's any freed nodes there:
+ */
+ list_for_each_entry(b2, &bc->freeable, list)
+ if (!btree_node_reclaim(c, b2)) {
+ swap(b->data, b2->data);
+ swap(b->aux_data, b2->aux_data);
+ btree_node_to_freedlist(bc, b2);
+ six_unlock_write(&b2->c.lock);
+ six_unlock_intent(&b2->c.lock);
+ goto got_mem;
+ }
+
+ mutex_unlock(&bc->lock);
+
+ if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) {
+ bch2_trans_unlock(trans);
+ if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))
+ goto err;
+ }
+
+ mutex_lock(&bc->lock);
+ bc->used++;
+got_mem:
+ mutex_unlock(&bc->lock);
+
+ BUG_ON(btree_node_hashed(b));
+ BUG_ON(btree_node_dirty(b));
+ BUG_ON(btree_node_write_in_flight(b));
+out:
+ b->flags = 0;
+ b->written = 0;
+ b->nsets = 0;
+ b->sib_u64s[0] = 0;
+ b->sib_u64s[1] = 0;
+ b->whiteout_u64s = 0;
+ bch2_btree_keys_init(b);
+ set_btree_node_accessed(b);
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
+ start_time);
+
+ memalloc_nofs_restore(flags);
+ return b;
+err:
+ mutex_lock(&bc->lock);
+
+ /* Try to cannibalize another cached btree node: */
+ if (bc->alloc_lock == current) {
+ b2 = btree_node_cannibalize(c);
+ clear_btree_node_just_written(b2);
+ bch2_btree_node_hash_remove(bc, b2);
+
+ if (b) {
+ swap(b->data, b2->data);
+ swap(b->aux_data, b2->aux_data);
+ btree_node_to_freedlist(bc, b2);
+ six_unlock_write(&b2->c.lock);
+ six_unlock_intent(&b2->c.lock);
+ } else {
+ b = b2;
+ list_del_init(&b->list);
+ }
+
+ mutex_unlock(&bc->lock);
+
+ trace_and_count(c, btree_cache_cannibalize, trans);
+ goto out;
+ }
+
+ mutex_unlock(&bc->lock);
+ memalloc_nofs_restore(flags);
+ return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc);
+}
+
+/* Slowpath, don't want it inlined into btree_iter_traverse() */
+static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
+ struct btree_path *path,
+ const struct bkey_i *k,
+ enum btree_id btree_id,
+ unsigned level,
+ enum six_lock_type lock_type,
+ bool sync)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+ u32 seq;
+
+ BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+ /*
+ * Parent node must be locked, else we could read in a btree node that's
+ * been freed:
+ */
+ if (path && !bch2_btree_node_relock(trans, path, level + 1)) {
+ trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
+ }
+
+ b = bch2_btree_node_mem_alloc(trans, level != 0);
+
+ if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) {
+ trans->memory_allocation_failure = true;
+ trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
+ }
+
+ if (IS_ERR(b))
+ return b;
+
+ bkey_copy(&b->key, k);
+ if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
+ /* raced with another fill: */
+
+ /* mark as unhashed... */
+ b->hash_val = 0;
+
+ mutex_lock(&bc->lock);
+ list_add(&b->list, &bc->freeable);
+ mutex_unlock(&bc->lock);
+
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ return NULL;
+ }
+
+ set_btree_node_read_in_flight(b);
+
+ six_unlock_write(&b->c.lock);
+ seq = six_lock_seq(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+
+ /* Unlock before doing IO: */
+ if (path && sync)
+ bch2_trans_unlock_noassert(trans);
+
+ bch2_btree_node_read(trans, b, sync);
+
+ if (!sync)
+ return NULL;
+
+ if (path) {
+ int ret = bch2_trans_relock(trans) ?:
+ bch2_btree_path_relock_intent(trans, path);
+ if (ret) {
+ BUG_ON(!trans->restarted);
+ return ERR_PTR(ret);
+ }
+ }
+
+ if (!six_relock_type(&b->c.lock, lock_type, seq)) {
+ if (path)
+ trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+ }
+
+ return b;
+}
+
+static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
+{
+ struct printbuf buf = PRINTBUF;
+
+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
+ return;
+
+ prt_printf(&buf,
+ "btree node header doesn't match ptr\n"
+ "btree %s level %u\n"
+ "ptr: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ prt_printf(&buf, "\nheader: btree %s level %llu\n"
+ "min ",
+ bch2_btree_id_str(BTREE_NODE_ID(b->data)),
+ BTREE_NODE_LEVEL(b->data));
+ bch2_bpos_to_text(&buf, b->data->min_key);
+
+ prt_printf(&buf, "\nmax ");
+ bch2_bpos_to_text(&buf, b->data->max_key);
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+}
+
+static inline void btree_check_header(struct bch_fs *c, struct btree *b)
+{
+ if (b->c.btree_id != BTREE_NODE_ID(b->data) ||
+ b->c.level != BTREE_NODE_LEVEL(b->data) ||
+ !bpos_eq(b->data->max_key, b->key.k.p) ||
+ (b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(b->data->min_key,
+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)))
+ btree_bad_header(c, b);
+}
+
+static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
+ const struct bkey_i *k, unsigned level,
+ enum six_lock_type lock_type,
+ unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+ struct bset_tree *t;
+ bool need_relock = false;
+ int ret;
+
+ EBUG_ON(level >= BTREE_MAX_DEPTH);
+retry:
+ b = btree_cache_find(bc, k);
+ if (unlikely(!b)) {
+ /*
+ * We must have the parent locked to call bch2_btree_node_fill(),
+ * else we could read in a btree node from disk that's been
+ * freed:
+ */
+ b = bch2_btree_node_fill(trans, path, k, path->btree_id,
+ level, lock_type, true);
+ need_relock = true;
+
+ /* We raced and found the btree node in the cache */
+ if (!b)
+ goto retry;
+
+ if (IS_ERR(b))
+ return b;
+ } else {
+ if (btree_node_read_locked(path, level + 1))
+ btree_node_unlock(trans, path, level + 1);
+
+ ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ERR_PTR(ret);
+
+ BUG_ON(ret);
+
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
+ b->c.level != level ||
+ race_fault())) {
+ six_unlock_type(&b->c.lock, lock_type);
+ if (bch2_btree_node_relock(trans, path, level + 1))
+ goto retry;
+
+ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
+ }
+
+ /* avoid atomic set bit if it's not needed: */
+ if (!btree_node_accessed(b))
+ set_btree_node_accessed(b);
+ }
+
+ if (unlikely(btree_node_read_in_flight(b))) {
+ u32 seq = six_lock_seq(&b->c.lock);
+
+ six_unlock_type(&b->c.lock, lock_type);
+ bch2_trans_unlock(trans);
+ need_relock = true;
+
+ bch2_btree_node_wait_on_read(b);
+
+ /*
+ * should_be_locked is not set on this path yet, so we need to
+ * relock it specifically:
+ */
+ if (!six_relock_type(&b->c.lock, lock_type, seq))
+ goto retry;
+ }
+
+ if (unlikely(need_relock)) {
+ ret = bch2_trans_relock(trans) ?:
+ bch2_btree_path_relock_intent(trans, path);
+ if (ret) {
+ six_unlock_type(&b->c.lock, lock_type);
+ return ERR_PTR(ret);
+ }
+ }
+
+ prefetch(b->aux_data);
+
+ for_each_bset(b, t) {
+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
+
+ prefetch(p + L1_CACHE_BYTES * 0);
+ prefetch(p + L1_CACHE_BYTES * 1);
+ prefetch(p + L1_CACHE_BYTES * 2);
+ }
+
+ if (unlikely(btree_node_read_error(b))) {
+ six_unlock_type(&b->c.lock, lock_type);
+ return ERR_PTR(-EIO);
+ }
+
+ EBUG_ON(b->c.btree_id != path->btree_id);
+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+ btree_check_header(c, b);
+
+ return b;
+}
+
+/**
+ * bch2_btree_node_get - find a btree node in the cache and lock it, reading it
+ * in from disk if necessary.
+ *
+ * @trans: btree transaction object
+ * @path: btree_path being traversed
+ * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2)
+ * @level: level of btree node being looked up (0 == leaf node)
+ * @lock_type: SIX_LOCK_read or SIX_LOCK_intent
+ * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek())
+ *
+ * The btree node will have either a read or a write lock held, depending on
+ * the @write parameter.
+ *
+ * Returns: btree node or ERR_PTR()
+ */
+struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
+ const struct bkey_i *k, unsigned level,
+ enum six_lock_type lock_type,
+ unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *b;
+ struct bset_tree *t;
+ int ret;
+
+ EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+ b = btree_node_mem_ptr(k);
+
+ /*
+ * Check b->hash_val _before_ calling btree_node_lock() - this might not
+ * be the node we want anymore, and trying to lock the wrong node could
+ * cause an unneccessary transaction restart:
+ */
+ if (unlikely(!c->opts.btree_node_mem_ptr_optimization ||
+ !b ||
+ b->hash_val != btree_ptr_hash_val(k)))
+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
+
+ if (btree_node_read_locked(path, level + 1))
+ btree_node_unlock(trans, path, level + 1);
+
+ ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ERR_PTR(ret);
+
+ BUG_ON(ret);
+
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
+ b->c.level != level ||
+ race_fault())) {
+ six_unlock_type(&b->c.lock, lock_type);
+ if (bch2_btree_node_relock(trans, path, level + 1))
+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
+
+ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
+ }
+
+ if (unlikely(btree_node_read_in_flight(b))) {
+ six_unlock_type(&b->c.lock, lock_type);
+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
+ }
+
+ prefetch(b->aux_data);
+
+ for_each_bset(b, t) {
+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
+
+ prefetch(p + L1_CACHE_BYTES * 0);
+ prefetch(p + L1_CACHE_BYTES * 1);
+ prefetch(p + L1_CACHE_BYTES * 2);
+ }
+
+ /* avoid atomic set bit if it's not needed: */
+ if (!btree_node_accessed(b))
+ set_btree_node_accessed(b);
+
+ if (unlikely(btree_node_read_error(b))) {
+ six_unlock_type(&b->c.lock, lock_type);
+ return ERR_PTR(-EIO);
+ }
+
+ EBUG_ON(b->c.btree_id != path->btree_id);
+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+ btree_check_header(c, b);
+
+ return b;
+}
+
+struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
+ const struct bkey_i *k,
+ enum btree_id btree_id,
+ unsigned level,
+ bool nofill)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+ struct bset_tree *t;
+ int ret;
+
+ EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+ if (c->opts.btree_node_mem_ptr_optimization) {
+ b = btree_node_mem_ptr(k);
+ if (b)
+ goto lock_node;
+ }
+retry:
+ b = btree_cache_find(bc, k);
+ if (unlikely(!b)) {
+ if (nofill)
+ goto out;
+
+ b = bch2_btree_node_fill(trans, NULL, k, btree_id,
+ level, SIX_LOCK_read, true);
+
+ /* We raced and found the btree node in the cache */
+ if (!b)
+ goto retry;
+
+ if (IS_ERR(b) &&
+ !bch2_btree_cache_cannibalize_lock(trans, NULL))
+ goto retry;
+
+ if (IS_ERR(b))
+ goto out;
+ } else {
+lock_node:
+ ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ERR_PTR(ret);
+
+ BUG_ON(ret);
+
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
+ b->c.btree_id != btree_id ||
+ b->c.level != level)) {
+ six_unlock_read(&b->c.lock);
+ goto retry;
+ }
+ }
+
+ /* XXX: waiting on IO with btree locks held: */
+ __bch2_btree_node_wait_on_read(b);
+
+ prefetch(b->aux_data);
+
+ for_each_bset(b, t) {
+ void *p = (u64 *) b->aux_data + t->aux_data_offset;
+
+ prefetch(p + L1_CACHE_BYTES * 0);
+ prefetch(p + L1_CACHE_BYTES * 1);
+ prefetch(p + L1_CACHE_BYTES * 2);
+ }
+
+ /* avoid atomic set bit if it's not needed: */
+ if (!btree_node_accessed(b))
+ set_btree_node_accessed(b);
+
+ if (unlikely(btree_node_read_error(b))) {
+ six_unlock_read(&b->c.lock);
+ b = ERR_PTR(-EIO);
+ goto out;
+ }
+
+ EBUG_ON(b->c.btree_id != btree_id);
+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+ btree_check_header(c, b);
+out:
+ bch2_btree_cache_cannibalize_unlock(trans);
+ return b;
+}
+
+int bch2_btree_node_prefetch(struct btree_trans *trans,
+ struct btree_path *path,
+ const struct bkey_i *k,
+ enum btree_id btree_id, unsigned level)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+
+ BUG_ON(trans && !btree_node_locked(path, level + 1));
+ BUG_ON(level >= BTREE_MAX_DEPTH);
+
+ b = btree_cache_find(bc, k);
+ if (b)
+ return 0;
+
+ b = bch2_btree_node_fill(trans, path, k, btree_id,
+ level, SIX_LOCK_read, false);
+ return PTR_ERR_OR_ZERO(b);
+}
+
+void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_cache *bc = &c->btree_cache;
+ struct btree *b;
+
+ b = btree_cache_find(bc, k);
+ if (!b)
+ return;
+wait_on_io:
+ /* not allowed to wait on io with btree locks held: */
+
+ /* XXX we're called from btree_gc which will be holding other btree
+ * nodes locked
+ */
+ __bch2_btree_node_wait_on_read(b);
+ __bch2_btree_node_wait_on_write(b);
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+
+ if (btree_node_dirty(b)) {
+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ goto wait_on_io;
+ }
+
+ BUG_ON(btree_node_dirty(b));
+
+ mutex_lock(&bc->lock);
+ btree_node_data_free(c, b);
+ bch2_btree_node_hash_remove(bc, b);
+ mutex_unlock(&bc->lock);
+
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+}
+
+const char *bch2_btree_id_str(enum btree_id btree)
+{
+ return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
+}
+
+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+{
+ prt_printf(out, "%s level %u/%u\n ",
+ bch2_btree_id_str(b->c.btree_id),
+ b->c.level,
+ bch2_btree_id_root(c, b->c.btree_id)->level);
+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+}
+
+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
+{
+ struct bset_stats stats;
+
+ memset(&stats, 0, sizeof(stats));
+
+ bch2_btree_keys_stats(b, &stats);
+
+ prt_printf(out, "l %u ", b->c.level);
+ bch2_bpos_to_text(out, b->data->min_key);
+ prt_printf(out, " - ");
+ bch2_bpos_to_text(out, b->data->max_key);
+ prt_printf(out, ":\n"
+ " ptrs: ");
+ bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+ prt_newline(out);
+
+ prt_printf(out,
+ " format: ");
+ bch2_bkey_format_to_text(out, &b->format);
+
+ prt_printf(out,
+ " unpack fn len: %u\n"
+ " bytes used %zu/%zu (%zu%% full)\n"
+ " sib u64s: %u, %u (merge threshold %u)\n"
+ " nr packed keys %u\n"
+ " nr unpacked keys %u\n"
+ " floats %zu\n"
+ " failed unpacked %zu\n",
+ b->unpack_fn_len,
+ b->nr.live_u64s * sizeof(u64),
+ btree_bytes(c) - sizeof(struct btree_node),
+ b->nr.live_u64s * 100 / btree_max_u64s(c),
+ b->sib_u64s[0],
+ b->sib_u64s[1],
+ c->btree_foreground_merge_threshold,
+ b->nr.packed_keys,
+ b->nr.unpacked_keys,
+ stats.floats,
+ stats.failed);
+}
+
+void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
+{
+ prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used);
+ prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty));
+ prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock);
+}
diff --git a/c_src/libbcachefs/btree_cache.h b/c_src/libbcachefs/btree_cache.h
new file mode 100644
index 00000000..4e1af588
--- /dev/null
+++ b/c_src/libbcachefs/btree_cache.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_CACHE_H
+#define _BCACHEFS_BTREE_CACHE_H
+
+#include "bcachefs.h"
+#include "btree_types.h"
+#include "bkey_methods.h"
+
+extern const char * const bch2_btree_node_flags[];
+
+struct btree_iter;
+
+void bch2_recalc_btree_reserve(struct bch_fs *);
+
+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
+ unsigned, enum btree_id);
+
+void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
+int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
+
+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
+
+struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
+ const struct bkey_i *, unsigned,
+ enum six_lock_type, unsigned long);
+
+struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
+ enum btree_id, unsigned, bool);
+
+int bch2_btree_node_prefetch(struct btree_trans *, struct btree_path *,
+ const struct bkey_i *, enum btree_id, unsigned);
+
+void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
+
+void bch2_fs_btree_cache_exit(struct bch_fs *);
+int bch2_fs_btree_cache_init(struct bch_fs *);
+void bch2_fs_btree_cache_init_early(struct btree_cache *);
+
+static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
+{
+ switch (k->k.type) {
+ case KEY_TYPE_btree_ptr:
+ return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
+ case KEY_TYPE_btree_ptr_v2:
+ /*
+ * The cast/deref is only necessary to avoid sparse endianness
+ * warnings:
+ */
+ return *((u64 *) &bkey_i_to_btree_ptr_v2_c(k)->v.seq);
+ default:
+ return 0;
+ }
+}
+
+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k)
+{
+ return k->k.type == KEY_TYPE_btree_ptr_v2
+ ? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr
+ : NULL;
+}
+
+/* is btree node in hash table? */
+static inline bool btree_node_hashed(struct btree *b)
+{
+ return b->hash_val != 0;
+}
+
+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
+ for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl, \
+ &(_c)->btree_cache.table), \
+ _iter = 0; _iter < (_tbl)->size; _iter++) \
+ rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
+
+static inline size_t btree_bytes(struct bch_fs *c)
+{
+ return c->opts.btree_node_size;
+}
+
+static inline size_t btree_max_u64s(struct bch_fs *c)
+{
+ return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
+}
+
+static inline size_t btree_pages(struct bch_fs *c)
+{
+ return btree_bytes(c) / PAGE_SIZE;
+}
+
+static inline unsigned btree_blocks(struct bch_fs *c)
+{
+ return btree_sectors(c) >> c->block_bits;
+}
+
+#define BTREE_SPLIT_THRESHOLD(c) (btree_max_u64s(c) * 2 / 3)
+
+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) >> 2))
+
+static inline unsigned btree_id_nr_alive(struct bch_fs *c)
+{
+ return BTREE_ID_NR + c->btree_roots_extra.nr;
+}
+
+static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned id)
+{
+ if (likely(id < BTREE_ID_NR)) {
+ return &c->btree_roots_known[id];
+ } else {
+ unsigned idx = id - BTREE_ID_NR;
+
+ EBUG_ON(idx >= c->btree_roots_extra.nr);
+ return &c->btree_roots_extra.data[idx];
+ }
+}
+
+static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
+{
+ return bch2_btree_id_root(c, b->c.btree_id)->b;
+}
+
+const char *bch2_btree_id_str(enum btree_id);
+void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
+void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_CACHE_H */
diff --git a/c_src/libbcachefs/btree_gc.c b/c_src/libbcachefs/btree_gc.c
new file mode 100644
index 00000000..49b4ade7
--- /dev/null
+++ b/c_src/libbcachefs/btree_gc.c
@@ -0,0 +1,2071 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright (C) 2014 Datera Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "bkey_methods.h"
+#include "bkey_buf.h"
+#include "btree_journal_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "debug.h"
+#include "ec.h"
+#include "error.h"
+#include "extents.h"
+#include "journal.h"
+#include "keylist.h"
+#include "move.h"
+#include "recovery.h"
+#include "reflink.h"
+#include "replicas.h"
+#include "super-io.h"
+#include "trace.h"
+
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched/task.h>
+
+#define DROP_THIS_NODE 10
+#define DROP_PREV_NODE 11
+
+static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
+{
+ return (struct bkey_s) {{{
+ (struct bkey *) k.k,
+ (struct bch_val *) k.v
+ }}};
+}
+
+static bool should_restart_for_topology_repair(struct bch_fs *c)
+{
+ return c->opts.fix_errors != FSCK_FIX_no &&
+ !(c->recovery_passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology));
+}
+
+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+ preempt_disable();
+ write_seqcount_begin(&c->gc_pos_lock);
+ c->gc_pos = new_pos;
+ write_seqcount_end(&c->gc_pos_lock);
+ preempt_enable();
+}
+
+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+ BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
+ __gc_pos_set(c, new_pos);
+}
+
+/*
+ * Missing: if an interior btree node is empty, we need to do something -
+ * perhaps just kill it
+ */
+static int bch2_gc_check_topology(struct bch_fs *c,
+ struct btree *b,
+ struct bkey_buf *prev,
+ struct bkey_buf cur,
+ bool is_last)
+{
+ struct bpos node_start = b->data->min_key;
+ struct bpos node_end = b->data->max_key;
+ struct bpos expected_start = bkey_deleted(&prev->k->k)
+ ? node_start
+ : bpos_successor(prev->k->k.p);
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ int ret = 0;
+
+ if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
+ struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
+
+ if (!bpos_eq(expected_start, bp->v.min_key)) {
+ bch2_topology_error(c);
+
+ if (bkey_deleted(&prev->k->k)) {
+ prt_printf(&buf1, "start of node: ");
+ bch2_bpos_to_text(&buf1, node_start);
+ } else {
+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
+ }
+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
+
+ if (__fsck_err(c,
+ FSCK_CAN_FIX|
+ FSCK_CAN_IGNORE|
+ FSCK_NO_RATELIMIT,
+ btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key at btree %s level %u:\n"
+ " prev %s\n"
+ " cur %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
+ bch_info(c, "Halting mark and sweep to start topology repair pass");
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+ goto err;
+ } else {
+ set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
+ }
+ }
+ }
+
+ if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf1);
+ printbuf_reset(&buf2);
+
+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
+ bch2_bpos_to_text(&buf2, node_end);
+
+ if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
+ btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key at btree %s level %u:\n"
+ " %s\n"
+ " expected %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf) &&
+ should_restart_for_topology_repair(c)) {
+ bch_info(c, "Halting mark and sweep to start topology repair pass");
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+ goto err;
+ } else {
+ set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
+ }
+ }
+
+ bch2_bkey_buf_copy(prev, c, cur.k);
+err:
+fsck_err:
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ return ret;
+}
+
+static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
+{
+ switch (b->key.k.type) {
+ case KEY_TYPE_btree_ptr: {
+ struct bkey_i_btree_ptr *src = bkey_i_to_btree_ptr(&b->key);
+
+ dst->k.p = src->k.p;
+ dst->v.mem_ptr = 0;
+ dst->v.seq = b->data->keys.seq;
+ dst->v.sectors_written = 0;
+ dst->v.flags = 0;
+ dst->v.min_key = b->data->min_key;
+ set_bkey_val_bytes(&dst->k, sizeof(dst->v) + bkey_val_bytes(&src->k));
+ memcpy(dst->v.start, src->v.start, bkey_val_bytes(&src->k));
+ break;
+ }
+ case KEY_TYPE_btree_ptr_v2:
+ bkey_copy(&dst->k_i, &b->key);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void bch2_btree_node_update_key_early(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_i *new)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *b;
+ struct bkey_buf tmp;
+ int ret;
+
+ bch2_bkey_buf_init(&tmp);
+ bch2_bkey_buf_reassemble(&tmp, c, old);
+
+ b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
+ if (!IS_ERR_OR_NULL(b)) {
+ mutex_lock(&c->btree_cache.lock);
+
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ bkey_copy(&b->key, new);
+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+ BUG_ON(ret);
+
+ mutex_unlock(&c->btree_cache.lock);
+ six_unlock_read(&b->c.lock);
+ }
+
+ bch2_bkey_buf_exit(&tmp, c);
+}
+
+static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
+{
+ struct bkey_i_btree_ptr_v2 *new;
+ int ret;
+
+ new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
+ if (!new)
+ return -BCH_ERR_ENOMEM_gc_repair_key;
+
+ btree_ptr_to_v2(b, new);
+ b->data->min_key = new_min;
+ new->v.min_key = new_min;
+ SET_BTREE_PTR_RANGE_UPDATED(&new->v, true);
+
+ ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i);
+ if (ret) {
+ kfree(new);
+ return ret;
+ }
+
+ bch2_btree_node_drop_keys_outside_node(b);
+ bkey_copy(&b->key, &new->k_i);
+ return 0;
+}
+
+static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
+{
+ struct bkey_i_btree_ptr_v2 *new;
+ int ret;
+
+ ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
+ if (ret)
+ return ret;
+
+ new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
+ if (!new)
+ return -BCH_ERR_ENOMEM_gc_repair_key;
+
+ btree_ptr_to_v2(b, new);
+ b->data->max_key = new_max;
+ new->k.p = new_max;
+ SET_BTREE_PTR_RANGE_UPDATED(&new->v, true);
+
+ ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i);
+ if (ret) {
+ kfree(new);
+ return ret;
+ }
+
+ bch2_btree_node_drop_keys_outside_node(b);
+
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ bkey_copy(&b->key, &new->k_i);
+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+ BUG_ON(ret);
+ mutex_unlock(&c->btree_cache.lock);
+ return 0;
+}
+
+static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
+ struct btree *prev, struct btree *cur)
+{
+ struct bpos expected_start = !prev
+ ? b->data->min_key
+ : bpos_successor(prev->key.k.p);
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ int ret = 0;
+
+ if (!prev) {
+ prt_printf(&buf1, "start of node: ");
+ bch2_bpos_to_text(&buf1, b->data->min_key);
+ } else {
+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
+ }
+
+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
+
+ if (prev &&
+ bpos_gt(expected_start, cur->data->min_key) &&
+ BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
+ /* cur overwrites prev: */
+
+ if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
+ cur->data->min_key), c,
+ btree_node_topology_overwritten_by_next_node,
+ "btree node overwritten by next node at btree %s level %u:\n"
+ " node %s\n"
+ " next %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf)) {
+ ret = DROP_PREV_NODE;
+ goto out;
+ }
+
+ if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
+ bpos_predecessor(cur->data->min_key)), c,
+ btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key at btree %s level %u:\n"
+ " node %s\n"
+ " next %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf))
+ ret = set_node_max(c, prev,
+ bpos_predecessor(cur->data->min_key));
+ } else {
+ /* prev overwrites cur: */
+
+ if (mustfix_fsck_err_on(bpos_ge(expected_start,
+ cur->data->max_key), c,
+ btree_node_topology_overwritten_by_prev_node,
+ "btree node overwritten by prev node at btree %s level %u:\n"
+ " prev %s\n"
+ " node %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf)) {
+ ret = DROP_THIS_NODE;
+ goto out;
+ }
+
+ if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
+ btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key at btree %s level %u:\n"
+ " prev %s\n"
+ " node %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+out:
+fsck_err:
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ return ret;
+}
+
+static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
+ struct btree *child)
+{
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ int ret = 0;
+
+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
+ bch2_bpos_to_text(&buf2, b->key.k.p);
+
+ if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
+ btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key at btree %s level %u:\n"
+ " %s\n"
+ " expected %s",
+ bch2_btree_id_str(b->c.btree_id), b->c.level,
+ buf1.buf, buf2.buf)) {
+ ret = set_node_max(c, child, b->key.k.p);
+ if (ret)
+ goto err;
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ return ret;
+}
+
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_and_journal_iter iter;
+ struct bkey_s_c k;
+ struct bkey_buf prev_k, cur_k;
+ struct btree *prev = NULL, *cur = NULL;
+ bool have_child, dropped_children = false;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ if (!b->c.level)
+ return 0;
+again:
+ prev = NULL;
+ have_child = dropped_children = false;
+ bch2_bkey_buf_init(&prev_k);
+ bch2_bkey_buf_init(&cur_k);
+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+ BUG_ON(bpos_lt(k.k->p, b->data->min_key));
+ BUG_ON(bpos_gt(k.k->p, b->data->max_key));
+
+ bch2_btree_and_journal_iter_advance(&iter);
+ bch2_bkey_buf_reassemble(&cur_k, c, k);
+
+ cur = bch2_btree_node_get_noiter(trans, cur_k.k,
+ b->c.btree_id, b->c.level - 1,
+ false);
+ ret = PTR_ERR_OR_ZERO(cur);
+
+ printbuf_reset(&buf);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
+
+ if (mustfix_fsck_err_on(ret == -EIO, c,
+ btree_node_unreadable,
+ "Topology repair: unreadable btree node at btree %s level %u:\n"
+ " %s",
+ bch2_btree_id_str(b->c.btree_id),
+ b->c.level - 1,
+ buf.buf)) {
+ bch2_btree_node_evict(trans, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ cur = NULL;
+ if (ret)
+ break;
+ continue;
+ }
+
+ bch_err_msg(c, ret, "getting btree node");
+ if (ret)
+ break;
+
+ ret = btree_repair_node_boundaries(c, b, prev, cur);
+
+ if (ret == DROP_THIS_NODE) {
+ six_unlock_read(&cur->c.lock);
+ bch2_btree_node_evict(trans, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ cur = NULL;
+ if (ret)
+ break;
+ continue;
+ }
+
+ if (prev)
+ six_unlock_read(&prev->c.lock);
+ prev = NULL;
+
+ if (ret == DROP_PREV_NODE) {
+ bch2_btree_node_evict(trans, prev_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, prev_k.k->k.p);
+ if (ret)
+ break;
+
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_bkey_buf_exit(&prev_k, c);
+ bch2_bkey_buf_exit(&cur_k, c);
+ goto again;
+ } else if (ret)
+ break;
+
+ prev = cur;
+ cur = NULL;
+ bch2_bkey_buf_copy(&prev_k, c, cur_k.k);
+ }
+
+ if (!ret && !IS_ERR_OR_NULL(prev)) {
+ BUG_ON(cur);
+ ret = btree_repair_node_end(c, b, prev);
+ }
+
+ if (!IS_ERR_OR_NULL(prev))
+ six_unlock_read(&prev->c.lock);
+ prev = NULL;
+ if (!IS_ERR_OR_NULL(cur))
+ six_unlock_read(&cur->c.lock);
+ cur = NULL;
+
+ if (ret)
+ goto err;
+
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+ bch2_bkey_buf_reassemble(&cur_k, c, k);
+ bch2_btree_and_journal_iter_advance(&iter);
+
+ cur = bch2_btree_node_get_noiter(trans, cur_k.k,
+ b->c.btree_id, b->c.level - 1,
+ false);
+ ret = PTR_ERR_OR_ZERO(cur);
+
+ bch_err_msg(c, ret, "getting btree node");
+ if (ret)
+ goto err;
+
+ ret = bch2_btree_repair_topology_recurse(trans, cur);
+ six_unlock_read(&cur->c.lock);
+ cur = NULL;
+
+ if (ret == DROP_THIS_NODE) {
+ bch2_btree_node_evict(trans, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ dropped_children = true;
+ }
+
+ if (ret)
+ goto err;
+
+ have_child = true;
+ }
+
+ printbuf_reset(&buf);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ if (mustfix_fsck_err_on(!have_child, c,
+ btree_node_topology_interior_node_empty,
+ "empty interior btree node at btree %s level %u\n"
+ " %s",
+ bch2_btree_id_str(b->c.btree_id),
+ b->c.level, buf.buf))
+ ret = DROP_THIS_NODE;
+err:
+fsck_err:
+ if (!IS_ERR_OR_NULL(prev))
+ six_unlock_read(&prev->c.lock);
+ if (!IS_ERR_OR_NULL(cur))
+ six_unlock_read(&cur->c.lock);
+
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_bkey_buf_exit(&prev_k, c);
+ bch2_bkey_buf_exit(&cur_k, c);
+
+ if (!ret && dropped_children)
+ goto again;
+
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_topology(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree *b;
+ unsigned i;
+ int ret = 0;
+
+ for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (!r->alive)
+ continue;
+
+ b = r->b;
+ if (btree_node_fake(b))
+ continue;
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ ret = bch2_btree_repair_topology_recurse(trans, b);
+ six_unlock_read(&b->c.lock);
+
+ if (ret == DROP_THIS_NODE) {
+ bch_err(c, "empty btree root - repair unimplemented");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ }
+ }
+
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
+ unsigned level, bool is_root,
+ struct bkey_s_c *k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k);
+ const union bch_extent_entry *entry_c;
+ struct extent_ptr_decoded p = { 0 };
+ bool do_update = false;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ /*
+ * XXX
+ * use check_bucket_ref here
+ */
+ bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
+
+ if (!g->gen_valid &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, ptr_to_missing_alloc_key,
+ "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
+ if (!p.ptr.cached) {
+ g->gen_valid = true;
+ g->gen = p.ptr.gen;
+ } else {
+ do_update = true;
+ }
+ }
+
+ if (gen_cmp(p.ptr.gen, g->gen) > 0 &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, ptr_gen_newer_than_bucket_gen,
+ "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen, g->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
+ if (!p.ptr.cached) {
+ g->gen_valid = true;
+ g->gen = p.ptr.gen;
+ g->data_type = 0;
+ g->dirty_sectors = 0;
+ g->cached_sectors = 0;
+ set_bit(BCH_FS_need_another_gc, &c->flags);
+ } else {
+ do_update = true;
+ }
+ }
+
+ if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, ptr_gen_newer_than_bucket_gen,
+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
+ do_update = true;
+
+ if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 &&
+ (c->opts.reconstruct_alloc ||
+ fsck_err(c, stale_dirty_ptr,
+ "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+ p.ptr.gen, g->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
+ do_update = true;
+
+ if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
+ continue;
+
+ if (fsck_err_on(bucket_data_type(g->data_type) &&
+ bucket_data_type(g->data_type) != data_type, c,
+ ptr_bucket_data_type_mismatch,
+ "bucket %u:%zu different types of data in same bucket: %s, %s\n"
+ "while marking %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ bch2_data_types[g->data_type],
+ bch2_data_types[data_type],
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
+ if (data_type == BCH_DATA_btree) {
+ g->data_type = data_type;
+ set_bit(BCH_FS_need_another_gc, &c->flags);
+ } else {
+ do_update = true;
+ }
+ }
+
+ if (p.has_ec) {
+ struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
+
+ if (fsck_err_on(!m || !m->alive, c,
+ ptr_to_missing_stripe,
+ "pointer to nonexistent stripe %llu\n"
+ "while marking %s",
+ (u64) p.ec.idx,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
+ do_update = true;
+
+ if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c,
+ ptr_to_incorrect_stripe,
+ "pointer does not match stripe %llu\n"
+ "while marking %s",
+ (u64) p.ec.idx,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
+ do_update = true;
+ }
+ }
+
+ if (do_update) {
+ struct bkey_ptrs ptrs;
+ union bch_extent_entry *entry;
+ struct bch_extent_ptr *ptr;
+ struct bkey_i *new;
+
+ if (is_root) {
+ bch_err(c, "cannot update btree roots yet");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
+ if (!new) {
+ ret = -BCH_ERR_ENOMEM_gc_repair_key;
+ bch_err_msg(c, ret, "allocating new key");
+ goto err;
+ }
+
+ bkey_reassemble(new, *k);
+
+ if (level) {
+ /*
+ * We don't want to drop btree node pointers - if the
+ * btree node isn't there anymore, the read path will
+ * sort it out:
+ */
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bucket *g = PTR_GC_BUCKET(ca, ptr);
+
+ ptr->gen = g->gen;
+ }
+ } else {
+ bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bucket *g = PTR_GC_BUCKET(ca, ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
+
+ (ptr->cached &&
+ (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
+ (!ptr->cached &&
+ gen_cmp(ptr->gen, g->gen) < 0) ||
+ gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
+ (g->data_type &&
+ g->data_type != data_type);
+ }));
+again:
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
+ struct gc_stripe *m = genradix_ptr(&c->gc_stripes,
+ entry->stripe_ptr.idx);
+ union bch_extent_entry *next_ptr;
+
+ bkey_extent_entry_for_each_from(ptrs, next_ptr, entry)
+ if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr)
+ goto found;
+ next_ptr = NULL;
+found:
+ if (!next_ptr) {
+ bch_err(c, "aieee, found stripe ptr with no data ptr");
+ continue;
+ }
+
+ if (!m || !m->alive ||
+ !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block],
+ &next_ptr->ptr,
+ m->sectors)) {
+ bch2_bkey_extent_entry_drop(new, entry);
+ goto again;
+ }
+ }
+ }
+ }
+
+ ret = bch2_journal_key_insert_take(c, btree_id, level, new);
+ if (ret) {
+ kfree(new);
+ goto err;
+ }
+
+ if (level)
+ bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
+
+ if (0) {
+ printbuf_reset(&buf);
+ bch2_bkey_val_to_text(&buf, c, *k);
+ bch_info(c, "updated %s", buf.buf);
+
+ printbuf_reset(&buf);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
+ bch_info(c, "new key %s", buf.buf);
+ }
+
+ *k = bkey_i_to_s_c(new);
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+/* marking of btree keys/nodes: */
+
+static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
+ unsigned level, bool is_root,
+ struct bkey_s_c *k,
+ bool initial)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey deleted = KEY(0, 0, 0);
+ struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+ int ret = 0;
+
+ deleted.p = k->k->p;
+
+ if (initial) {
+ BUG_ON(bch2_journal_seq_verify &&
+ k->k->version.lo > atomic64_read(&c->journal.seq));
+
+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
+ if (ret)
+ goto err;
+
+ if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
+ bkey_version_in_future,
+ "key version number higher than recorded: %llu > %llu",
+ k->k->version.lo,
+ atomic64_read(&c->key_version)))
+ atomic64_set(&c->key_version, k->k->version.lo);
+ }
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
+fsck_err:
+err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_node_iter iter;
+ struct bkey unpacked;
+ struct bkey_s_c k;
+ struct bkey_buf prev, cur;
+ int ret = 0;
+
+ if (!btree_node_type_needs_gc(btree_node_type(b)))
+ return 0;
+
+ bch2_btree_node_iter_init_from_start(&iter, b);
+ bch2_bkey_buf_init(&prev);
+ bch2_bkey_buf_init(&cur);
+ bkey_init(&prev.k->k);
+
+ while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
+ &k, initial);
+ if (ret)
+ break;
+
+ bch2_btree_node_iter_advance(&iter, b);
+
+ if (b->c.level) {
+ bch2_bkey_buf_reassemble(&cur, c, k);
+
+ ret = bch2_gc_check_topology(c, b, &prev, cur,
+ bch2_btree_node_iter_end(&iter));
+ if (ret)
+ break;
+ }
+ }
+
+ bch2_bkey_buf_exit(&cur, c);
+ bch2_bkey_buf_exit(&prev, c);
+ return ret;
+}
+
+static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
+ bool initial, bool metadata_only)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct btree *b;
+ unsigned depth = metadata_only ? 1 : 0;
+ int ret = 0;
+
+ gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
+
+ __for_each_btree_node(trans, iter, btree_id, POS_MIN,
+ 0, depth, BTREE_ITER_PREFETCH, b, ret) {
+ bch2_verify_btree_nr_keys(b);
+
+ gc_pos_set(c, gc_pos_btree_node(b));
+
+ ret = btree_gc_mark_node(trans, b, initial);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ return ret;
+
+ mutex_lock(&c->btree_root_lock);
+ b = bch2_btree_id_root(c, btree_id)->b;
+ if (!btree_node_fake(b)) {
+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
+
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1,
+ true, &k, initial);
+ }
+ gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
+ mutex_unlock(&c->btree_root_lock);
+
+ return ret;
+}
+
+static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b,
+ unsigned target_depth)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_and_journal_iter iter;
+ struct bkey_s_c k;
+ struct bkey_buf cur, prev;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+ bch2_bkey_buf_init(&prev);
+ bch2_bkey_buf_init(&cur);
+ bkey_init(&prev.k->k);
+
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+ BUG_ON(bpos_lt(k.k->p, b->data->min_key));
+ BUG_ON(bpos_gt(k.k->p, b->data->max_key));
+
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
+ false, &k, true);
+ if (ret)
+ goto fsck_err;
+
+ if (b->c.level) {
+ bch2_bkey_buf_reassemble(&cur, c, k);
+ k = bkey_i_to_s_c(cur.k);
+
+ bch2_btree_and_journal_iter_advance(&iter);
+
+ ret = bch2_gc_check_topology(c, b,
+ &prev, cur,
+ !bch2_btree_and_journal_iter_peek(&iter).k);
+ if (ret)
+ goto fsck_err;
+ } else {
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
+ }
+
+ if (b->c.level > target_depth) {
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+ struct btree *child;
+
+ bch2_bkey_buf_reassemble(&cur, c, k);
+ bch2_btree_and_journal_iter_advance(&iter);
+
+ child = bch2_btree_node_get_noiter(trans, cur.k,
+ b->c.btree_id, b->c.level - 1,
+ false);
+ ret = PTR_ERR_OR_ZERO(child);
+
+ if (ret == -EIO) {
+ bch2_topology_error(c);
+
+ if (__fsck_err(c,
+ FSCK_CAN_FIX|
+ FSCK_CAN_IGNORE|
+ FSCK_NO_RATELIMIT,
+ btree_node_read_error,
+ "Unreadable btree node at btree %s level %u:\n"
+ " %s",
+ bch2_btree_id_str(b->c.btree_id),
+ b->c.level - 1,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
+ should_restart_for_topology_repair(c)) {
+ bch_info(c, "Halting mark and sweep to start topology repair pass");
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+ goto fsck_err;
+ } else {
+ /* Continue marking when opted to not
+ * fix the error: */
+ ret = 0;
+ set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
+ continue;
+ }
+ } else if (ret) {
+ bch_err_msg(c, ret, "getting btree node");
+ break;
+ }
+
+ ret = bch2_gc_btree_init_recurse(trans, child,
+ target_depth);
+ six_unlock_read(&child->c.lock);
+
+ if (ret)
+ break;
+ }
+ }
+fsck_err:
+ bch2_bkey_buf_exit(&cur, c);
+ bch2_bkey_buf_exit(&prev, c);
+ bch2_btree_and_journal_iter_exit(&iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int bch2_gc_btree_init(struct btree_trans *trans,
+ enum btree_id btree_id,
+ bool metadata_only)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *b;
+ unsigned target_depth = metadata_only ? 1 : 0;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ b = bch2_btree_id_root(c, btree_id)->b;
+
+ if (btree_node_fake(b))
+ return 0;
+
+ six_lock_read(&b->c.lock, NULL, NULL);
+ printbuf_reset(&buf);
+ bch2_bpos_to_text(&buf, b->data->min_key);
+ if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), c,
+ btree_root_bad_min_key,
+ "btree root with incorrect min_key: %s", buf.buf)) {
+ bch_err(c, "repair unimplemented");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto fsck_err;
+ }
+
+ printbuf_reset(&buf);
+ bch2_bpos_to_text(&buf, b->data->max_key);
+ if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), c,
+ btree_root_bad_max_key,
+ "btree root with incorrect max_key: %s", buf.buf)) {
+ bch_err(c, "repair unimplemented");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto fsck_err;
+ }
+
+ if (b->c.level >= target_depth)
+ ret = bch2_gc_btree_init_recurse(trans, b, target_depth);
+
+ if (!ret) {
+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
+
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, true,
+ &k, true);
+ }
+fsck_err:
+ six_unlock_read(&b->c.lock);
+
+ bch_err_fn(c, ret);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
+{
+ return (int) btree_id_to_gc_phase(l) -
+ (int) btree_id_to_gc_phase(r);
+}
+
+static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ enum btree_id ids[BTREE_ID_NR];
+ unsigned i;
+ int ret = 0;
+
+ for (i = 0; i < BTREE_ID_NR; i++)
+ ids[i] = i;
+ bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
+
+ for (i = 0; i < BTREE_ID_NR && !ret; i++)
+ ret = initial
+ ? bch2_gc_btree_init(trans, ids[i], metadata_only)
+ : bch2_gc_btree(trans, ids[i], initial, metadata_only);
+
+ for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) {
+ if (!bch2_btree_id_root(c, i)->alive)
+ continue;
+
+ ret = initial
+ ? bch2_gc_btree_init(trans, i, metadata_only)
+ : bch2_gc_btree(trans, i, initial, metadata_only);
+ }
+
+ bch2_trans_put(trans);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
+ u64 start, u64 end,
+ enum bch_data_type type,
+ unsigned flags)
+{
+ u64 b = sector_to_bucket(ca, start);
+
+ do {
+ unsigned sectors =
+ min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
+
+ bch2_mark_metadata_bucket(c, ca, b, type, sectors,
+ gc_phase(GC_PHASE_SB), flags);
+ b++;
+ start += sectors;
+ } while (start < end);
+}
+
+static void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca,
+ unsigned flags)
+{
+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+ unsigned i;
+ u64 b;
+
+ for (i = 0; i < layout->nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
+
+ if (offset == BCH_SB_SECTOR)
+ mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR,
+ BCH_DATA_sb, flags);
+
+ mark_metadata_sectors(c, ca, offset,
+ offset + (1 << layout->sb_max_size_bits),
+ BCH_DATA_sb, flags);
+ }
+
+ for (i = 0; i < ca->journal.nr; i++) {
+ b = ca->journal.buckets[i];
+ bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
+ ca->mi.bucket_size,
+ gc_phase(GC_PHASE_SB), flags);
+ }
+}
+
+static void bch2_mark_superblocks(struct bch_fs *c)
+{
+ mutex_lock(&c->sb_lock);
+ gc_pos_set(c, gc_phase(GC_PHASE_SB));
+
+ for_each_online_member(c, ca)
+ bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC);
+ mutex_unlock(&c->sb_lock);
+}
+
+#if 0
+/* Also see bch2_pending_btree_node_free_insert_done() */
+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
+{
+ struct btree_update *as;
+ struct pending_btree_node_free *d;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE));
+
+ for_each_pending_btree_node_free(c, as, d)
+ if (d->index_update_done)
+ bch2_mark_key(c, bkey_i_to_s_c(&d->key), BTREE_TRIGGER_GC);
+
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+#endif
+
+static void bch2_gc_free(struct bch_fs *c)
+{
+ genradix_free(&c->reflink_gc_table);
+ genradix_free(&c->gc_stripes);
+
+ for_each_member_device(c, ca) {
+ kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
+ sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket));
+ ca->buckets_gc = NULL;
+
+ free_percpu(ca->usage_gc);
+ ca->usage_gc = NULL;
+ }
+
+ free_percpu(c->usage_gc);
+ c->usage_gc = NULL;
+}
+
+static int bch2_gc_done(struct bch_fs *c,
+ bool initial, bool metadata_only)
+{
+ struct bch_dev *ca = NULL;
+ struct printbuf buf = PRINTBUF;
+ bool verify = !metadata_only &&
+ !c->opts.reconstruct_alloc &&
+ (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)));
+ unsigned i;
+ int ret = 0;
+
+ percpu_down_write(&c->mark_lock);
+
+#define copy_field(_err, _f, _msg, ...) \
+ if (dst->_f != src->_f && \
+ (!verify || \
+ fsck_err(c, _err, _msg ": got %llu, should be %llu" \
+ , ##__VA_ARGS__, dst->_f, src->_f))) \
+ dst->_f = src->_f
+#define copy_dev_field(_err, _f, _msg, ...) \
+ copy_field(_err, _f, "dev %u has wrong " _msg, ca->dev_idx, ##__VA_ARGS__)
+#define copy_fs_field(_err, _f, _msg, ...) \
+ copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__)
+
+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+ bch2_fs_usage_acc_to_base(c, i);
+
+ __for_each_member_device(c, ca) {
+ struct bch_dev_usage *dst = ca->usage_base;
+ struct bch_dev_usage *src = (void *)
+ bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
+ dev_usage_u64s());
+
+ for (i = 0; i < BCH_DATA_NR; i++) {
+ copy_dev_field(dev_usage_buckets_wrong,
+ d[i].buckets, "%s buckets", bch2_data_types[i]);
+ copy_dev_field(dev_usage_sectors_wrong,
+ d[i].sectors, "%s sectors", bch2_data_types[i]);
+ copy_dev_field(dev_usage_fragmented_wrong,
+ d[i].fragmented, "%s fragmented", bch2_data_types[i]);
+ }
+ }
+
+ {
+ unsigned nr = fs_usage_u64s(c);
+ struct bch_fs_usage *dst = c->usage_base;
+ struct bch_fs_usage *src = (void *)
+ bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
+
+ copy_fs_field(fs_usage_hidden_wrong,
+ hidden, "hidden");
+ copy_fs_field(fs_usage_btree_wrong,
+ btree, "btree");
+
+ if (!metadata_only) {
+ copy_fs_field(fs_usage_data_wrong,
+ data, "data");
+ copy_fs_field(fs_usage_cached_wrong,
+ cached, "cached");
+ copy_fs_field(fs_usage_reserved_wrong,
+ reserved, "reserved");
+ copy_fs_field(fs_usage_nr_inodes_wrong,
+ nr_inodes,"nr_inodes");
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
+ copy_fs_field(fs_usage_persistent_reserved_wrong,
+ persistent_reserved[i],
+ "persistent_reserved[%i]", i);
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ if (metadata_only &&
+ (e->data_type == BCH_DATA_user ||
+ e->data_type == BCH_DATA_cached))
+ continue;
+
+ printbuf_reset(&buf);
+ bch2_replicas_entry_to_text(&buf, e);
+
+ copy_fs_field(fs_usage_replicas_wrong,
+ replicas[i], "%s", buf.buf);
+ }
+ }
+
+#undef copy_fs_field
+#undef copy_dev_field
+#undef copy_stripe_field
+#undef copy_field
+fsck_err:
+ if (ca)
+ percpu_ref_put(&ca->ref);
+ bch_err_fn(c, ret);
+
+ percpu_up_write(&c->mark_lock);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int bch2_gc_start(struct bch_fs *c)
+{
+ BUG_ON(c->usage_gc);
+
+ c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
+ sizeof(u64), GFP_KERNEL);
+ if (!c->usage_gc) {
+ bch_err(c, "error allocating c->usage_gc");
+ return -BCH_ERR_ENOMEM_gc_start;
+ }
+
+ for_each_member_device(c, ca) {
+ BUG_ON(ca->usage_gc);
+
+ ca->usage_gc = alloc_percpu(struct bch_dev_usage);
+ if (!ca->usage_gc) {
+ bch_err(c, "error allocating ca->usage_gc");
+ percpu_ref_put(&ca->ref);
+ return -BCH_ERR_ENOMEM_gc_start;
+ }
+
+ this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
+ ca->mi.nbuckets - ca->mi.first_bucket);
+ }
+
+ return 0;
+}
+
+static int bch2_gc_reset(struct bch_fs *c)
+{
+ for_each_member_device(c, ca) {
+ free_percpu(ca->usage_gc);
+ ca->usage_gc = NULL;
+ }
+
+ free_percpu(c->usage_gc);
+ c->usage_gc = NULL;
+
+ return bch2_gc_start(c);
+}
+
+/* returns true if not equal */
+static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
+ struct bch_alloc_v4 r)
+{
+ return l.gen != r.gen ||
+ l.oldest_gen != r.oldest_gen ||
+ l.data_type != r.data_type ||
+ l.dirty_sectors != r.dirty_sectors ||
+ l.cached_sectors != r.cached_sectors ||
+ l.stripe_redundancy != r.stripe_redundancy ||
+ l.stripe != r.stripe;
+}
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ bool metadata_only)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
+ struct bucket gc, *b;
+ struct bkey_i_alloc_v4 *a;
+ struct bch_alloc_v4 old_convert, new;
+ const struct bch_alloc_v4 *old;
+ enum bch_data_type type;
+ int ret;
+
+ old = bch2_alloc_to_v4(k, &old_convert);
+ new = *old;
+
+ percpu_down_read(&c->mark_lock);
+ b = gc_bucket(ca, iter->pos.offset);
+
+ /*
+ * b->data_type doesn't yet include need_discard & need_gc_gen states -
+ * fix that here:
+ */
+ type = __alloc_data_type(b->dirty_sectors,
+ b->cached_sectors,
+ b->stripe,
+ *old,
+ b->data_type);
+ if (b->data_type != type) {
+ struct bch_dev_usage *u;
+
+ preempt_disable();
+ u = this_cpu_ptr(ca->usage_gc);
+ u->d[b->data_type].buckets--;
+ b->data_type = type;
+ u->d[b->data_type].buckets++;
+ preempt_enable();
+ }
+
+ gc = *b;
+ percpu_up_read(&c->mark_lock);
+
+ if (metadata_only &&
+ gc.data_type != BCH_DATA_sb &&
+ gc.data_type != BCH_DATA_journal &&
+ gc.data_type != BCH_DATA_btree)
+ return 0;
+
+ if (gen_after(old->gen, gc.gen))
+ return 0;
+
+ if (c->opts.reconstruct_alloc ||
+ fsck_err_on(new.data_type != gc.data_type, c,
+ alloc_key_data_type_wrong,
+ "bucket %llu:%llu gen %u has wrong data_type"
+ ": got %s, should be %s",
+ iter->pos.inode, iter->pos.offset,
+ gc.gen,
+ bch2_data_types[new.data_type],
+ bch2_data_types[gc.data_type]))
+ new.data_type = gc.data_type;
+
+#define copy_bucket_field(_errtype, _f) \
+ if (c->opts.reconstruct_alloc || \
+ fsck_err_on(new._f != gc._f, c, _errtype, \
+ "bucket %llu:%llu gen %u data type %s has wrong " #_f \
+ ": got %u, should be %u", \
+ iter->pos.inode, iter->pos.offset, \
+ gc.gen, \
+ bch2_data_types[gc.data_type], \
+ new._f, gc._f)) \
+ new._f = gc._f; \
+
+ copy_bucket_field(alloc_key_gen_wrong,
+ gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong,
+ dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong,
+ cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong,
+ stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong,
+ stripe_redundancy);
+#undef copy_bucket_field
+
+ if (!bch2_alloc_v4_cmp(*old, new))
+ return 0;
+
+ a = bch2_alloc_to_v4_mut(trans, k);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
+
+ a->v = new;
+
+ /*
+ * The trigger normally makes sure this is set, but we're not running
+ * triggers:
+ */
+ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
+ a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+
+ ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN);
+fsck_err:
+ return ret;
+}
+
+static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
+{
+ int ret = 0;
+
+ for_each_member_device(c, ca) {
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc,
+ POS(ca->dev_idx, ca->mi.first_bucket),
+ POS(ca->dev_idx, ca->mi.nbuckets - 1),
+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ bch2_alloc_write_key(trans, &iter, k, metadata_only)));
+ if (ret) {
+ percpu_ref_put(&ca->ref);
+ break;
+ }
+ }
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
+{
+ for_each_member_device(c, ca) {
+ struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket),
+ GFP_KERNEL|__GFP_ZERO);
+ if (!buckets) {
+ percpu_ref_put(&ca->ref);
+ bch_err(c, "error allocating ca->buckets[gc]");
+ return -BCH_ERR_ENOMEM_gc_alloc_start;
+ }
+
+ buckets->first_bucket = ca->mi.first_bucket;
+ buckets->nbuckets = ca->mi.nbuckets;
+ rcu_assign_pointer(ca->buckets_gc, buckets);
+ }
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ struct bucket *g = gc_bucket(ca, k.k->p.offset);
+
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
+
+ g->gen_valid = 1;
+ g->gen = a->gen;
+
+ if (metadata_only &&
+ (a->data_type == BCH_DATA_user ||
+ a->data_type == BCH_DATA_cached ||
+ a->data_type == BCH_DATA_parity)) {
+ g->data_type = a->data_type;
+ g->dirty_sectors = a->dirty_sectors;
+ g->cached_sectors = a->cached_sectors;
+ g->stripe = a->stripe;
+ g->stripe_redundancy = a->stripe_redundancy;
+ }
+
+ 0;
+ })));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
+{
+ for_each_member_device(c, ca) {
+ struct bucket_array *buckets = gc_bucket_array(ca);
+ struct bucket *g;
+
+ for_each_bucket(g, buckets) {
+ if (metadata_only &&
+ (g->data_type == BCH_DATA_user ||
+ g->data_type == BCH_DATA_cached ||
+ g->data_type == BCH_DATA_parity))
+ continue;
+ g->data_type = 0;
+ g->dirty_sectors = 0;
+ g->cached_sectors = 0;
+ }
+ }
+}
+
+static int bch2_gc_write_reflink_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ size_t *idx)
+{
+ struct bch_fs *c = trans->c;
+ const __le64 *refcount = bkey_refcount_c(k);
+ struct printbuf buf = PRINTBUF;
+ struct reflink_gc *r;
+ int ret = 0;
+
+ if (!refcount)
+ return 0;
+
+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
+ r->offset < k.k->p.offset)
+ ++*idx;
+
+ if (!r ||
+ r->offset != k.k->p.offset ||
+ r->size != k.k->size) {
+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
+ return -EINVAL;
+ }
+
+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+ reflink_v_refcount_wrong,
+ "reflink key has wrong refcount:\n"
+ " %s\n"
+ " should be %u",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+ r->refcount)) {
+ struct bkey_i *new = bch2_bkey_make_mut(trans, iter, &k, 0);
+
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ return ret;
+
+ if (!r->refcount)
+ new->k.type = KEY_TYPE_deleted;
+ else
+ *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
+ }
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
+{
+ size_t idx = 0;
+
+ if (metadata_only)
+ return 0;
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_gc_write_reflink_key(trans, &iter, k, &idx)));
+ c->reflink_gc_nr = 0;
+ return ret;
+}
+
+static int bch2_gc_reflink_start(struct bch_fs *c,
+ bool metadata_only)
+{
+
+ if (metadata_only)
+ return 0;
+
+ c->reflink_gc_nr = 0;
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ const __le64 *refcount = bkey_refcount_c(k);
+
+ if (!refcount)
+ continue;
+
+ struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table,
+ c->reflink_gc_nr++, GFP_KERNEL);
+ if (!r) {
+ ret = -BCH_ERR_ENOMEM_gc_reflink_start;
+ break;
+ }
+
+ r->offset = k.k->p.offset;
+ r->size = k.k->size;
+ r->refcount = 0;
+ 0;
+ })));
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only)
+{
+ struct genradix_iter iter;
+ struct reflink_gc *r;
+
+ genradix_for_each(&c->reflink_gc_table, iter, r)
+ r->refcount = 0;
+}
+
+static int bch2_gc_write_stripes_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ const struct bch_stripe *s;
+ struct gc_stripe *m;
+ bool bad = false;
+ unsigned i;
+ int ret = 0;
+
+ if (k.k->type != KEY_TYPE_stripe)
+ return 0;
+
+ s = bkey_s_c_to_stripe(k).v;
+ m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
+
+ for (i = 0; i < s->nr_blocks; i++) {
+ u32 old = stripe_blockcount_get(s, i);
+ u32 new = (m ? m->block_sectors[i] : 0);
+
+ if (old != new) {
+ prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n",
+ i, old, new);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ if (fsck_err_on(bad, c, stripe_sector_count_wrong,
+ "%s", buf.buf)) {
+ struct bkey_i_stripe *new;
+
+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ return ret;
+
+ bkey_reassemble(&new->k_i, k);
+
+ for (i = 0; i < new->v.nr_blocks; i++)
+ stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
+
+ ret = bch2_trans_update(trans, iter, &new->k_i, 0);
+ }
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
+{
+ if (metadata_only)
+ return 0;
+
+ return bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_stripes, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_gc_write_stripes_key(trans, &iter, k)));
+}
+
+static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only)
+{
+ genradix_free(&c->gc_stripes);
+}
+
+/**
+ * bch2_gc - walk _all_ references to buckets, and recompute them:
+ *
+ * @c: filesystem object
+ * @initial: are we in recovery?
+ * @metadata_only: are we just checking metadata references, or everything?
+ *
+ * Returns: 0 on success, or standard errcode on failure
+ *
+ * Order matters here:
+ * - Concurrent GC relies on the fact that we have a total ordering for
+ * everything that GC walks - see gc_will_visit_node(),
+ * gc_will_visit_root()
+ *
+ * - also, references move around in the course of index updates and
+ * various other crap: everything needs to agree on the ordering
+ * references are allowed to move around in - e.g., we're allowed to
+ * start with a reference owned by an open_bucket (the allocator) and
+ * move it to the btree, but not the reverse.
+ *
+ * This is necessary to ensure that gc doesn't miss references that
+ * move around - if references move backwards in the ordering GC
+ * uses, GC could skip past them
+ */
+int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
+{
+ unsigned iter = 0;
+ int ret;
+
+ lockdep_assert_held(&c->state_lock);
+
+ down_write(&c->gc_lock);
+
+ bch2_btree_interior_updates_flush(c);
+
+ ret = bch2_gc_start(c) ?:
+ bch2_gc_alloc_start(c, metadata_only) ?:
+ bch2_gc_reflink_start(c, metadata_only);
+ if (ret)
+ goto out;
+again:
+ gc_pos_set(c, gc_phase(GC_PHASE_START));
+
+ bch2_mark_superblocks(c);
+
+ ret = bch2_gc_btrees(c, initial, metadata_only);
+
+ if (ret)
+ goto out;
+
+#if 0
+ bch2_mark_pending_btree_node_frees(c);
+#endif
+ c->gc_count++;
+
+ if (test_bit(BCH_FS_need_another_gc, &c->flags) ||
+ (!iter && bch2_test_restart_gc)) {
+ if (iter++ > 2) {
+ bch_info(c, "Unable to fix bucket gens, looping");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * XXX: make sure gens we fixed got saved
+ */
+ bch_info(c, "Second GC pass needed, restarting:");
+ clear_bit(BCH_FS_need_another_gc, &c->flags);
+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+ bch2_gc_stripes_reset(c, metadata_only);
+ bch2_gc_alloc_reset(c, metadata_only);
+ bch2_gc_reflink_reset(c, metadata_only);
+ ret = bch2_gc_reset(c);
+ if (ret)
+ goto out;
+
+ /* flush fsck errors, reset counters */
+ bch2_flush_fsck_errs(c);
+ goto again;
+ }
+out:
+ if (!ret) {
+ bch2_journal_block(&c->journal);
+
+ ret = bch2_gc_stripes_done(c, metadata_only) ?:
+ bch2_gc_reflink_done(c, metadata_only) ?:
+ bch2_gc_alloc_done(c, metadata_only) ?:
+ bch2_gc_done(c, initial, metadata_only);
+
+ bch2_journal_unblock(&c->journal);
+ }
+
+ percpu_down_write(&c->mark_lock);
+ /* Indicates that gc is no longer in progress: */
+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+
+ bch2_gc_free(c);
+ percpu_up_write(&c->mark_lock);
+
+ up_write(&c->gc_lock);
+
+ /*
+ * At startup, allocations can happen directly instead of via the
+ * allocator thread - issue wakeup in case they blocked on gc_lock:
+ */
+ closure_wake_up(&c->freelist_wait);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int gc_btree_gens_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bkey_i *u;
+ int ret;
+
+ percpu_down_read(&c->mark_lock);
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ if (ptr_stale(ca, ptr) > 16) {
+ percpu_up_read(&c->mark_lock);
+ goto update;
+ }
+ }
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
+
+ if (gen_after(*gen, ptr->gen))
+ *gen = ptr->gen;
+ }
+ percpu_up_read(&c->mark_lock);
+ return 0;
+update:
+ u = bch2_bkey_make_mut(trans, iter, &k, 0);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
+
+ bch2_extent_normalize(c, bkey_i_to_s(u));
+ return 0;
+}
+
+static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode);
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
+ struct bkey_i_alloc_v4 *a_mut;
+ int ret;
+
+ if (a->oldest_gen == ca->oldest_gen[iter->pos.offset])
+ return 0;
+
+ a_mut = bch2_alloc_to_v4_mut(trans, k);
+ ret = PTR_ERR_OR_ZERO(a_mut);
+ if (ret)
+ return ret;
+
+ a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
+ a_mut->v.data_type = alloc_data_type(a_mut->v, a_mut->v.data_type);
+
+ return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
+}
+
+int bch2_gc_gens(struct bch_fs *c)
+{
+ u64 b, start_time = local_clock();
+ int ret;
+
+ /*
+ * Ideally we would be using state_lock and not gc_lock here, but that
+ * introduces a deadlock in the RO path - we currently take the state
+ * lock at the start of going RO, thus the gc thread may get stuck:
+ */
+ if (!mutex_trylock(&c->gc_gens_lock))
+ return 0;
+
+ trace_and_count(c, gc_gens_start, c);
+ down_read(&c->gc_lock);
+
+ for_each_member_device(c, ca) {
+ struct bucket_gens *gens = bucket_gens(ca);
+
+ BUG_ON(ca->oldest_gen);
+
+ ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL);
+ if (!ca->oldest_gen) {
+ percpu_ref_put(&ca->ref);
+ ret = -BCH_ERR_ENOMEM_gc_gens;
+ goto err;
+ }
+
+ for (b = gens->first_bucket;
+ b < gens->nbuckets; b++)
+ ca->oldest_gen[b] = gens->b[b];
+ }
+
+ for (unsigned i = 0; i < BTREE_ID_NR; i++)
+ if (btree_type_has_ptrs(i)) {
+ c->gc_gens_btree = i;
+ c->gc_gens_pos = POS_MIN;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, i,
+ POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
+ k,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ gc_btree_gens_key(trans, &iter, k)));
+ if (ret)
+ goto err;
+ }
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
+ POS_MIN,
+ BTREE_ITER_PREFETCH,
+ k,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_alloc_write_oldest_gen(trans, &iter, k)));
+ if (ret)
+ goto err;
+
+ c->gc_gens_btree = 0;
+ c->gc_gens_pos = POS_MIN;
+
+ c->gc_count++;
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
+ trace_and_count(c, gc_gens_end, c);
+err:
+ for_each_member_device(c, ca) {
+ kvfree(ca->oldest_gen);
+ ca->oldest_gen = NULL;
+ }
+
+ up_read(&c->gc_lock);
+ mutex_unlock(&c->gc_gens_lock);
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int bch2_gc_thread(void *arg)
+{
+ struct bch_fs *c = arg;
+ struct io_clock *clock = &c->io_clock[WRITE];
+ unsigned long last = atomic64_read(&clock->now);
+ unsigned last_kick = atomic_read(&c->kick_gc);
+
+ set_freezable();
+
+ while (1) {
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ if (atomic_read(&c->kick_gc) != last_kick)
+ break;
+
+ if (c->btree_gc_periodic) {
+ unsigned long next = last + c->capacity / 16;
+
+ if (atomic64_read(&clock->now) >= next)
+ break;
+
+ bch2_io_clock_schedule_timeout(clock, next);
+ } else {
+ schedule();
+ }
+
+ try_to_freeze();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ last = atomic64_read(&clock->now);
+ last_kick = atomic_read(&c->kick_gc);
+
+ /*
+ * Full gc is currently incompatible with btree key cache:
+ */
+#if 0
+ ret = bch2_gc(c, false, false);
+#else
+ bch2_gc_gens(c);
+#endif
+ debug_check_no_locks_held();
+ }
+
+ return 0;
+}
+
+void bch2_gc_thread_stop(struct bch_fs *c)
+{
+ struct task_struct *p;
+
+ p = c->gc_thread;
+ c->gc_thread = NULL;
+
+ if (p) {
+ kthread_stop(p);
+ put_task_struct(p);
+ }
+}
+
+int bch2_gc_thread_start(struct bch_fs *c)
+{
+ struct task_struct *p;
+
+ if (c->gc_thread)
+ return 0;
+
+ p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
+ if (IS_ERR(p)) {
+ bch_err_fn(c, PTR_ERR(p));
+ return PTR_ERR(p);
+ }
+
+ get_task_struct(p);
+ c->gc_thread = p;
+ wake_up_process(p);
+ return 0;
+}
diff --git a/c_src/libbcachefs/btree_gc.h b/c_src/libbcachefs/btree_gc.h
new file mode 100644
index 00000000..607575f8
--- /dev/null
+++ b/c_src/libbcachefs/btree_gc.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_GC_H
+#define _BCACHEFS_BTREE_GC_H
+
+#include "bkey.h"
+#include "btree_types.h"
+
+int bch2_check_topology(struct bch_fs *);
+int bch2_gc(struct bch_fs *, bool, bool);
+int bch2_gc_gens(struct bch_fs *);
+void bch2_gc_thread_stop(struct bch_fs *);
+int bch2_gc_thread_start(struct bch_fs *);
+
+/*
+ * For concurrent mark and sweep (with other index updates), we define a total
+ * ordering of _all_ references GC walks:
+ *
+ * Note that some references will have the same GC position as others - e.g.
+ * everything within the same btree node; in those cases we're relying on
+ * whatever locking exists for where those references live, i.e. the write lock
+ * on a btree node.
+ *
+ * That locking is also required to ensure GC doesn't pass the updater in
+ * between the updater adding/removing the reference and updating the GC marks;
+ * without that, we would at best double count sometimes.
+ *
+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_
+ * be held that prevents GC from passing the position the updater is at.
+ *
+ * (What about the start of gc, when we're clearing all the marks? GC clears the
+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc
+ * position inside its cmpxchg loop, so crap magically works).
+ */
+
+/* Position of (the start of) a gc phase: */
+static inline struct gc_pos gc_phase(enum gc_phase phase)
+{
+ return (struct gc_pos) {
+ .phase = phase,
+ .pos = POS_MIN,
+ .level = 0,
+ };
+}
+
+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
+{
+ return cmp_int(l.phase, r.phase) ?:
+ bpos_cmp(l.pos, r.pos) ?:
+ cmp_int(l.level, r.level);
+}
+
+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
+{
+ switch (id) {
+#define x(name, v, ...) case BTREE_ID_##name: return GC_PHASE_BTREE_##name;
+ BCH_BTREE_IDS()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+static inline struct gc_pos gc_pos_btree(enum btree_id id,
+ struct bpos pos, unsigned level)
+{
+ return (struct gc_pos) {
+ .phase = btree_id_to_gc_phase(id),
+ .pos = pos,
+ .level = level,
+ };
+}
+
+/*
+ * GC position of the pointers within a btree node: note, _not_ for &b->key
+ * itself, that lives in the parent node:
+ */
+static inline struct gc_pos gc_pos_btree_node(struct btree *b)
+{
+ return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level);
+}
+
+/*
+ * GC position of the pointer to a btree root: we don't use
+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with
+ * btree_split() increasing the tree depth - the new root will have level > the
+ * old root and thus have a greater gc position than the old root, but that
+ * would be incorrect since once gc has marked the root it's not coming back.
+ */
+static inline struct gc_pos gc_pos_btree_root(enum btree_id id)
+{
+ return gc_pos_btree(id, SPOS_MAX, BTREE_MAX_DEPTH);
+}
+
+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
+{
+ unsigned seq;
+ bool ret;
+
+ do {
+ seq = read_seqcount_begin(&c->gc_pos_lock);
+ ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
+ } while (read_seqcount_retry(&c->gc_pos_lock, seq));
+
+ return ret;
+}
+
+static inline void bch2_do_gc_gens(struct bch_fs *c)
+{
+ atomic_inc(&c->kick_gc);
+ if (c->gc_thread)
+ wake_up_process(c->gc_thread);
+}
+
+#endif /* _BCACHEFS_BTREE_GC_H */
diff --git a/c_src/libbcachefs/btree_io.c b/c_src/libbcachefs/btree_io.c
new file mode 100644
index 00000000..33db48e2
--- /dev/null
+++ b/c_src/libbcachefs/btree_io.c
@@ -0,0 +1,2349 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "bkey_sort.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "debug.h"
+#include "error.h"
+#include "extents.h"
+#include "io_write.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "recovery.h"
+#include "super-io.h"
+#include "trace.h"
+
+#include <linux/sched/mm.h>
+
+void bch2_btree_node_io_unlock(struct btree *b)
+{
+ EBUG_ON(!btree_node_write_in_flight(b));
+
+ clear_btree_node_write_in_flight_inner(b);
+ clear_btree_node_write_in_flight(b);
+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
+}
+
+void bch2_btree_node_io_lock(struct btree *b)
+{
+ bch2_assert_btree_nodes_not_locked();
+
+ wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
+ TASK_UNINTERRUPTIBLE);
+}
+
+void __bch2_btree_node_wait_on_read(struct btree *b)
+{
+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+ TASK_UNINTERRUPTIBLE);
+}
+
+void __bch2_btree_node_wait_on_write(struct btree *b)
+{
+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
+ TASK_UNINTERRUPTIBLE);
+}
+
+void bch2_btree_node_wait_on_read(struct btree *b)
+{
+ bch2_assert_btree_nodes_not_locked();
+
+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
+ TASK_UNINTERRUPTIBLE);
+}
+
+void bch2_btree_node_wait_on_write(struct btree *b)
+{
+ bch2_assert_btree_nodes_not_locked();
+
+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static void verify_no_dups(struct btree *b,
+ struct bkey_packed *start,
+ struct bkey_packed *end)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bkey_packed *k, *p;
+
+ if (start == end)
+ return;
+
+ for (p = start, k = bkey_p_next(start);
+ k != end;
+ p = k, k = bkey_p_next(k)) {
+ struct bkey l = bkey_unpack_key(b, p);
+ struct bkey r = bkey_unpack_key(b, k);
+
+ BUG_ON(bpos_ge(l.p, bkey_start_pos(&r)));
+ }
+#endif
+}
+
+static void set_needs_whiteout(struct bset *i, int v)
+{
+ struct bkey_packed *k;
+
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k))
+ k->needs_whiteout = v;
+}
+
+static void btree_bounce_free(struct bch_fs *c, size_t size,
+ bool used_mempool, void *p)
+{
+ if (used_mempool)
+ mempool_free(p, &c->btree_bounce_pool);
+ else
+ vpfree(p, size);
+}
+
+static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
+ bool *used_mempool)
+{
+ unsigned flags = memalloc_nofs_save();
+ void *p;
+
+ BUG_ON(size > btree_bytes(c));
+
+ *used_mempool = false;
+ p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
+ if (!p) {
+ *used_mempool = true;
+ p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
+ }
+ memalloc_nofs_restore(flags);
+ return p;
+}
+
+static void sort_bkey_ptrs(const struct btree *bt,
+ struct bkey_packed **ptrs, unsigned nr)
+{
+ unsigned n = nr, a = nr / 2, b, c, d;
+
+ if (!a)
+ return;
+
+ /* Heap sort: see lib/sort.c: */
+ while (1) {
+ if (a)
+ a--;
+ else if (--n)
+ swap(ptrs[0], ptrs[n]);
+ else
+ break;
+
+ for (b = a; c = 2 * b + 1, (d = c + 1) < n;)
+ b = bch2_bkey_cmp_packed(bt,
+ ptrs[c],
+ ptrs[d]) >= 0 ? c : d;
+ if (d == n)
+ b = c;
+
+ while (b != a &&
+ bch2_bkey_cmp_packed(bt,
+ ptrs[a],
+ ptrs[b]) >= 0)
+ b = (b - 1) / 2;
+ c = b;
+ while (b != a) {
+ b = (b - 1) / 2;
+ swap(ptrs[b], ptrs[c]);
+ }
+ }
+}
+
+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
+{
+ struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k;
+ bool used_mempool = false;
+ size_t bytes = b->whiteout_u64s * sizeof(u64);
+
+ if (!b->whiteout_u64s)
+ return;
+
+ new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool);
+
+ ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
+
+ for (k = unwritten_whiteouts_start(c, b);
+ k != unwritten_whiteouts_end(c, b);
+ k = bkey_p_next(k))
+ *--ptrs = k;
+
+ sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs);
+
+ k = new_whiteouts;
+
+ while (ptrs != ptrs_end) {
+ bkey_p_copy(k, *ptrs);
+ k = bkey_p_next(k);
+ ptrs++;
+ }
+
+ verify_no_dups(b, new_whiteouts,
+ (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
+
+ memcpy_u64s(unwritten_whiteouts_start(c, b),
+ new_whiteouts, b->whiteout_u64s);
+
+ btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
+}
+
+static bool should_compact_bset(struct btree *b, struct bset_tree *t,
+ bool compacting, enum compact_mode mode)
+{
+ if (!bset_dead_u64s(b, t))
+ return false;
+
+ switch (mode) {
+ case COMPACT_LAZY:
+ return should_compact_bset_lazy(b, t) ||
+ (compacting && !bset_written(b, bset(b, t)));
+ case COMPACT_ALL:
+ return true;
+ default:
+ BUG();
+ }
+}
+
+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
+{
+ struct bset_tree *t;
+ bool ret = false;
+
+ for_each_bset(b, t) {
+ struct bset *i = bset(b, t);
+ struct bkey_packed *k, *n, *out, *start, *end;
+ struct btree_node_entry *src = NULL, *dst = NULL;
+
+ if (t != b->set && !bset_written(b, i)) {
+ src = container_of(i, struct btree_node_entry, keys);
+ dst = max(write_block(b),
+ (void *) btree_bkey_last(b, t - 1));
+ }
+
+ if (src != dst)
+ ret = true;
+
+ if (!should_compact_bset(b, t, ret, mode)) {
+ if (src != dst) {
+ memmove(dst, src, sizeof(*src) +
+ le16_to_cpu(src->keys.u64s) *
+ sizeof(u64));
+ i = &dst->keys;
+ set_btree_bset(b, t, i);
+ }
+ continue;
+ }
+
+ start = btree_bkey_first(b, t);
+ end = btree_bkey_last(b, t);
+
+ if (src != dst) {
+ memmove(dst, src, sizeof(*src));
+ i = &dst->keys;
+ set_btree_bset(b, t, i);
+ }
+
+ out = i->start;
+
+ for (k = start; k != end; k = n) {
+ n = bkey_p_next(k);
+
+ if (!bkey_deleted(k)) {
+ bkey_p_copy(out, k);
+ out = bkey_p_next(out);
+ } else {
+ BUG_ON(k->needs_whiteout);
+ }
+ }
+
+ i->u64s = cpu_to_le16((u64 *) out - i->_data);
+ set_btree_bset_end(b, t);
+ bch2_bset_set_no_aux_tree(b, t);
+ ret = true;
+ }
+
+ bch2_verify_btree_nr_keys(b);
+
+ bch2_btree_build_aux_trees(b);
+
+ return ret;
+}
+
+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
+ enum compact_mode mode)
+{
+ return bch2_drop_whiteouts(b, mode);
+}
+
+static void btree_node_sort(struct bch_fs *c, struct btree *b,
+ unsigned start_idx,
+ unsigned end_idx,
+ bool filter_whiteouts)
+{
+ struct btree_node *out;
+ struct sort_iter_stack sort_iter;
+ struct bset_tree *t;
+ struct bset *start_bset = bset(b, &b->set[start_idx]);
+ bool used_mempool = false;
+ u64 start_time, seq = 0;
+ unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1;
+ bool sorting_entire_node = start_idx == 0 &&
+ end_idx == b->nsets;
+
+ sort_iter_stack_init(&sort_iter, b);
+
+ for (t = b->set + start_idx;
+ t < b->set + end_idx;
+ t++) {
+ u64s += le16_to_cpu(bset(b, t)->u64s);
+ sort_iter_add(&sort_iter.iter,
+ btree_bkey_first(b, t),
+ btree_bkey_last(b, t));
+ }
+
+ bytes = sorting_entire_node
+ ? btree_bytes(c)
+ : __vstruct_bytes(struct btree_node, u64s);
+
+ out = btree_bounce_alloc(c, bytes, &used_mempool);
+
+ start_time = local_clock();
+
+ u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts);
+
+ out->keys.u64s = cpu_to_le16(u64s);
+
+ BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes);
+
+ if (sorting_entire_node)
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
+ start_time);
+
+ /* Make sure we preserve bset journal_seq: */
+ for (t = b->set + start_idx; t < b->set + end_idx; t++)
+ seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq));
+ start_bset->journal_seq = cpu_to_le64(seq);
+
+ if (sorting_entire_node) {
+ u64s = le16_to_cpu(out->keys.u64s);
+
+ BUG_ON(bytes != btree_bytes(c));
+
+ /*
+ * Our temporary buffer is the same size as the btree node's
+ * buffer, we can just swap buffers instead of doing a big
+ * memcpy()
+ */
+ *out = *b->data;
+ out->keys.u64s = cpu_to_le16(u64s);
+ swap(out, b->data);
+ set_btree_bset(b, b->set, &b->data->keys);
+ } else {
+ start_bset->u64s = out->keys.u64s;
+ memcpy_u64s(start_bset->start,
+ out->keys.start,
+ le16_to_cpu(out->keys.u64s));
+ }
+
+ for (i = start_idx + 1; i < end_idx; i++)
+ b->nr.bset_u64s[start_idx] +=
+ b->nr.bset_u64s[i];
+
+ b->nsets -= shift;
+
+ for (i = start_idx + 1; i < b->nsets; i++) {
+ b->nr.bset_u64s[i] = b->nr.bset_u64s[i + shift];
+ b->set[i] = b->set[i + shift];
+ }
+
+ for (i = b->nsets; i < MAX_BSETS; i++)
+ b->nr.bset_u64s[i] = 0;
+
+ set_btree_bset_end(b, &b->set[start_idx]);
+ bch2_bset_set_no_aux_tree(b, &b->set[start_idx]);
+
+ btree_bounce_free(c, bytes, used_mempool, out);
+
+ bch2_verify_btree_nr_keys(b);
+}
+
+void bch2_btree_sort_into(struct bch_fs *c,
+ struct btree *dst,
+ struct btree *src)
+{
+ struct btree_nr_keys nr;
+ struct btree_node_iter src_iter;
+ u64 start_time = local_clock();
+
+ BUG_ON(dst->nsets != 1);
+
+ bch2_bset_set_no_aux_tree(dst, dst->set);
+
+ bch2_btree_node_iter_init_from_start(&src_iter, src);
+
+ nr = bch2_sort_repack(btree_bset_first(dst),
+ src, &src_iter,
+ &dst->format,
+ true);
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
+ start_time);
+
+ set_btree_bset_end(dst, dst->set);
+
+ dst->nr.live_u64s += nr.live_u64s;
+ dst->nr.bset_u64s[0] += nr.bset_u64s[0];
+ dst->nr.packed_keys += nr.packed_keys;
+ dst->nr.unpacked_keys += nr.unpacked_keys;
+
+ bch2_verify_btree_nr_keys(dst);
+}
+
+/*
+ * We're about to add another bset to the btree node, so if there's currently
+ * too many bsets - sort some of them together:
+ */
+static bool btree_node_compact(struct bch_fs *c, struct btree *b)
+{
+ unsigned unwritten_idx;
+ bool ret = false;
+
+ for (unwritten_idx = 0;
+ unwritten_idx < b->nsets;
+ unwritten_idx++)
+ if (!bset_written(b, bset(b, &b->set[unwritten_idx])))
+ break;
+
+ if (b->nsets - unwritten_idx > 1) {
+ btree_node_sort(c, b, unwritten_idx,
+ b->nsets, false);
+ ret = true;
+ }
+
+ if (unwritten_idx > 1) {
+ btree_node_sort(c, b, 0, unwritten_idx, false);
+ ret = true;
+ }
+
+ return ret;
+}
+
+void bch2_btree_build_aux_trees(struct btree *b)
+{
+ struct bset_tree *t;
+
+ for_each_bset(b, t)
+ bch2_bset_build_aux_tree(b, t,
+ !bset_written(b, bset(b, t)) &&
+ t == bset_tree_last(b));
+}
+
+/*
+ * If we have MAX_BSETS (3) bsets, should we sort them all down to just one?
+ *
+ * The first bset is going to be of similar order to the size of the node, the
+ * last bset is bounded by btree_write_set_buffer(), which is set to keep the
+ * memmove on insert from being too expensive: the middle bset should, ideally,
+ * be the geometric mean of the first and the last.
+ *
+ * Returns true if the middle bset is greater than that geometric mean:
+ */
+static inline bool should_compact_all(struct bch_fs *c, struct btree *b)
+{
+ unsigned mid_u64s_bits =
+ (ilog2(btree_max_u64s(c)) + BTREE_WRITE_SET_U64s_BITS) / 2;
+
+ return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits;
+}
+
+/*
+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
+ * inserted into
+ *
+ * Safe to call if there already is an unwritten bset - will only add a new bset
+ * if @b doesn't already have one.
+ *
+ * Returns true if we sorted (i.e. invalidated iterators
+ */
+void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_node_entry *bne;
+ bool reinit_iter = false;
+
+ EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]);
+ BUG_ON(bset_written(b, bset(b, &b->set[1])));
+ BUG_ON(btree_node_just_written(b));
+
+ if (b->nsets == MAX_BSETS &&
+ !btree_node_write_in_flight(b) &&
+ should_compact_all(c, b)) {
+ bch2_btree_node_write(c, b, SIX_LOCK_write,
+ BTREE_WRITE_init_next_bset);
+ reinit_iter = true;
+ }
+
+ if (b->nsets == MAX_BSETS &&
+ btree_node_compact(c, b))
+ reinit_iter = true;
+
+ BUG_ON(b->nsets >= MAX_BSETS);
+
+ bne = want_new_bset(c, b);
+ if (bne)
+ bch2_bset_init_next(c, b, bne);
+
+ bch2_btree_build_aux_trees(b);
+
+ if (reinit_iter)
+ bch2_trans_node_reinit_iter(trans, b);
+}
+
+static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
+ struct bch_dev *ca,
+ struct btree *b, struct bset *i,
+ unsigned offset, int write)
+{
+ prt_printf(out, bch2_log_msg(c, "%s"),
+ write == READ
+ ? "error validating btree node "
+ : "corrupt btree node before write ");
+ if (ca)
+ prt_printf(out, "on %s ", ca->name);
+ prt_printf(out, "at btree ");
+ bch2_btree_pos_to_text(out, c, b);
+
+ prt_printf(out, "\n node offset %u/%u",
+ b->written, btree_ptr_sectors_written(&b->key));
+ if (i)
+ prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
+ prt_str(out, ": ");
+}
+
+__printf(9, 10)
+static int __btree_err(int ret,
+ struct bch_fs *c,
+ struct bch_dev *ca,
+ struct btree *b,
+ struct bset *i,
+ int write,
+ bool have_retry,
+ enum bch_sb_error_id err_type,
+ const char *fmt, ...)
+{
+ struct printbuf out = PRINTBUF;
+ va_list args;
+
+ btree_err_msg(&out, c, ca, b, i, b->written, write);
+
+ va_start(args, fmt);
+ prt_vprintf(&out, fmt, args);
+ va_end(args);
+
+ if (write == WRITE) {
+ bch2_print_string_as_lines(KERN_ERR, out.buf);
+ ret = c->opts.errors == BCH_ON_ERROR_continue
+ ? 0
+ : -BCH_ERR_fsck_errors_not_fixed;
+ goto out;
+ }
+
+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
+ ret = -BCH_ERR_btree_node_read_err_fixable;
+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
+ ret = -BCH_ERR_btree_node_read_err_bad_node;
+
+ if (ret != -BCH_ERR_btree_node_read_err_fixable)
+ bch2_sb_error_count(c, err_type);
+
+ switch (ret) {
+ case -BCH_ERR_btree_node_read_err_fixable:
+ ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf);
+ if (ret != -BCH_ERR_fsck_fix &&
+ ret != -BCH_ERR_fsck_ignore)
+ goto fsck_err;
+ ret = -BCH_ERR_fsck_fix;
+ break;
+ case -BCH_ERR_btree_node_read_err_want_retry:
+ case -BCH_ERR_btree_node_read_err_must_retry:
+ bch2_print_string_as_lines(KERN_ERR, out.buf);
+ break;
+ case -BCH_ERR_btree_node_read_err_bad_node:
+ bch2_print_string_as_lines(KERN_ERR, out.buf);
+ bch2_topology_error(c);
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
+ break;
+ case -BCH_ERR_btree_node_read_err_incompatible:
+ bch2_print_string_as_lines(KERN_ERR, out.buf);
+ ret = -BCH_ERR_fsck_errors_not_fixed;
+ break;
+ default:
+ BUG();
+ }
+out:
+fsck_err:
+ printbuf_exit(&out);
+ return ret;
+}
+
+#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \
+({ \
+ int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \
+ BCH_FSCK_ERR_##_err_type, \
+ msg, ##__VA_ARGS__); \
+ \
+ if (_ret != -BCH_ERR_fsck_fix) { \
+ ret = _ret; \
+ goto fsck_err; \
+ } \
+ \
+ *saw_error = true; \
+})
+
+#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false)
+
+/*
+ * When btree topology repair changes the start or end of a node, that might
+ * mean we have to drop keys that are no longer inside the node:
+ */
+__cold
+void bch2_btree_node_drop_keys_outside_node(struct btree *b)
+{
+ struct bset_tree *t;
+
+ for_each_bset(b, t) {
+ struct bset *i = bset(b, t);
+ struct bkey_packed *k;
+
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k))
+ if (bkey_cmp_left_packed(b, k, &b->data->min_key) >= 0)
+ break;
+
+ if (k != i->start) {
+ unsigned shift = (u64 *) k - (u64 *) i->start;
+
+ memmove_u64s_down(i->start, k,
+ (u64 *) vstruct_end(i) - (u64 *) k);
+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift);
+ set_btree_bset_end(b, t);
+ }
+
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k))
+ if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0)
+ break;
+
+ if (k != vstruct_last(i)) {
+ i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start);
+ set_btree_bset_end(b, t);
+ }
+ }
+
+ /*
+ * Always rebuild search trees: eytzinger search tree nodes directly
+ * depend on the values of min/max key:
+ */
+ bch2_bset_set_no_aux_tree(b, b->set);
+ bch2_btree_build_aux_trees(b);
+
+ struct bkey_s_c k;
+ struct bkey unpacked;
+ struct btree_node_iter iter;
+ for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
+ BUG_ON(bpos_lt(k.k->p, b->data->min_key));
+ BUG_ON(bpos_gt(k.k->p, b->data->max_key));
+ }
+}
+
+static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
+ struct btree *b, struct bset *i,
+ unsigned offset, unsigned sectors,
+ int write, bool have_retry, bool *saw_error)
+{
+ unsigned version = le16_to_cpu(i->version);
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+ int ret = 0;
+
+ btree_err_on(!bch2_version_compatible(version),
+ -BCH_ERR_btree_node_read_err_incompatible,
+ c, ca, b, i,
+ btree_node_unsupported_version,
+ "unsupported bset version %u.%u",
+ BCH_VERSION_MAJOR(version),
+ BCH_VERSION_MINOR(version));
+
+ if (btree_err_on(version < c->sb.version_min,
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bset_older_than_sb_min,
+ "bset version %u older than superblock version_min %u",
+ version, c->sb.version_min)) {
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->version_min = cpu_to_le16(version);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
+ if (btree_err_on(BCH_VERSION_MAJOR(version) >
+ BCH_VERSION_MAJOR(c->sb.version),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bset_newer_than_sb,
+ "bset version %u newer than superblock version %u",
+ version, c->sb.version)) {
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->version = cpu_to_le16(version);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
+ btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
+ -BCH_ERR_btree_node_read_err_incompatible,
+ c, ca, b, i,
+ btree_node_unsupported_version,
+ "BSET_SEPARATE_WHITEOUTS no longer supported");
+
+ if (btree_err_on(offset + sectors > btree_sectors(c),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, ca, b, i,
+ bset_past_end_of_btree_node,
+ "bset past end of btree node")) {
+ i->u64s = 0;
+ ret = 0;
+ goto out;
+ }
+
+ btree_err_on(offset && !i->u64s,
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, ca, b, i,
+ bset_empty,
+ "empty bset");
+
+ btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset,
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i,
+ bset_wrong_sector_offset,
+ "bset at wrong sector offset");
+
+ if (!offset) {
+ struct btree_node *bn =
+ container_of(i, struct btree_node, keys);
+ /* These indicate that we read the wrong btree node: */
+
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+ struct bch_btree_ptr_v2 *bp =
+ &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+ /* XXX endianness */
+ btree_err_on(bp->seq != bn->keys.seq,
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, NULL,
+ bset_bad_seq,
+ "incorrect sequence number (wrong btree node)");
+ }
+
+ btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, i,
+ btree_node_bad_btree,
+ "incorrect btree id");
+
+ btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, i,
+ btree_node_bad_level,
+ "incorrect level");
+
+ if (!write)
+ compat_btree_node(b->c.level, b->c.btree_id, version,
+ BSET_BIG_ENDIAN(i), write, bn);
+
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+ struct bch_btree_ptr_v2 *bp =
+ &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+ if (BTREE_PTR_RANGE_UPDATED(bp)) {
+ b->data->min_key = bp->min_key;
+ b->data->max_key = b->key.k.p;
+ }
+
+ btree_err_on(!bpos_eq(b->data->min_key, bp->min_key),
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, NULL,
+ btree_node_bad_min_key,
+ "incorrect min_key: got %s should be %s",
+ (printbuf_reset(&buf1),
+ bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf),
+ (printbuf_reset(&buf2),
+ bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf));
+ }
+
+ btree_err_on(!bpos_eq(bn->max_key, b->key.k.p),
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, i,
+ btree_node_bad_max_key,
+ "incorrect max key %s",
+ (printbuf_reset(&buf1),
+ bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf));
+
+ if (write)
+ compat_btree_node(b->c.level, b->c.btree_id, version,
+ BSET_BIG_ENDIAN(i), write, bn);
+
+ btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1),
+ -BCH_ERR_btree_node_read_err_bad_node,
+ c, ca, b, i,
+ btree_node_bad_format,
+ "invalid bkey format: %s\n %s", buf1.buf,
+ (printbuf_reset(&buf2),
+ bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
+ printbuf_reset(&buf1);
+
+ compat_bformat(b->c.level, b->c.btree_id, version,
+ BSET_BIG_ENDIAN(i), write,
+ &bn->format);
+ }
+out:
+fsck_err:
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ return ret;
+}
+
+static int bset_key_invalid(struct bch_fs *c, struct btree *b,
+ struct bkey_s_c k,
+ bool updated_range, int rw,
+ struct printbuf *err)
+{
+ return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?:
+ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?:
+ (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
+}
+
+static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+ struct bset *i, struct bkey_packed *k)
+{
+ if (bkey_p_next(k) > vstruct_last(i))
+ return false;
+
+ if (k->format > KEY_FORMAT_CURRENT)
+ return false;
+
+ struct printbuf buf = PRINTBUF;
+ struct bkey tmp;
+ struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+ bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int validate_bset_keys(struct bch_fs *c, struct btree *b,
+ struct bset *i, int write,
+ bool have_retry, bool *saw_error)
+{
+ unsigned version = le16_to_cpu(i->version);
+ struct bkey_packed *k, *prev = NULL;
+ struct printbuf buf = PRINTBUF;
+ bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
+ int ret = 0;
+
+ for (k = i->start;
+ k != vstruct_last(i);) {
+ struct bkey_s u;
+ struct bkey tmp;
+ unsigned next_good_key;
+
+ if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bkey_past_bset_end,
+ "key extends past end of bset")) {
+ i->u64s = cpu_to_le16((u64 *) k - i->_data);
+ break;
+ }
+
+ if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bkey_bad_format,
+ "invalid bkey format %u", k->format))
+ goto drop_this_key;
+
+ /* XXX: validate k->u64s */
+ if (!write)
+ bch2_bkey_compat(b->c.level, b->c.btree_id, version,
+ BSET_BIG_ENDIAN(i), write,
+ &b->format, k);
+
+ u = __bkey_disassemble(b, k, &tmp);
+
+ printbuf_reset(&buf);
+ if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) {
+ printbuf_reset(&buf);
+ bset_key_invalid(c, b, u.s_c, updated_range, write, &buf);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, u.s_c);
+
+ btree_err(-BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bad_bkey,
+ "invalid bkey: %s", buf.buf);
+ goto drop_this_key;
+ }
+
+ if (write)
+ bch2_bkey_compat(b->c.level, b->c.btree_id, version,
+ BSET_BIG_ENDIAN(i), write,
+ &b->format, k);
+
+ if (prev && bkey_iter_cmp(b, prev, k) > 0) {
+ struct bkey up = bkey_unpack_key(b, prev);
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "keys out of order: ");
+ bch2_bkey_to_text(&buf, &up);
+ prt_printf(&buf, " > ");
+ bch2_bkey_to_text(&buf, u.k);
+
+ if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bkey_out_of_order,
+ "%s", buf.buf))
+ goto drop_this_key;
+ }
+
+ prev = k;
+ k = bkey_p_next(k);
+ continue;
+drop_this_key:
+ next_good_key = k->u64s;
+
+ if (!next_good_key ||
+ (BSET_BIG_ENDIAN(i) == CPU_BIG_ENDIAN &&
+ version >= bcachefs_metadata_version_snapshot)) {
+ /*
+ * only do scanning if bch2_bkey_compat() has nothing to
+ * do
+ */
+
+ if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+ for (next_good_key = 1;
+ next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
+ next_good_key++)
+ if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+ goto got_good_key;
+
+ }
+
+ /*
+ * didn't find a good key, have to truncate the rest of
+ * the bset
+ */
+ next_good_key = (u64 *) vstruct_last(i) - (u64 *) k;
+ }
+got_good_key:
+ le16_add_cpu(&i->u64s, -next_good_key);
+ memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
+ }
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
+ struct btree *b, bool have_retry, bool *saw_error)
+{
+ struct btree_node_entry *bne;
+ struct sort_iter *iter;
+ struct btree_node *sorted;
+ struct bkey_packed *k;
+ struct bset *i;
+ bool used_mempool, blacklisted;
+ bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
+ unsigned u64s;
+ unsigned ptr_written = btree_ptr_sectors_written(&b->key);
+ struct printbuf buf = PRINTBUF;
+ int ret = 0, retry_read = 0, write = READ;
+ u64 start_time = local_clock();
+
+ b->version_ondisk = U16_MAX;
+ /* We might get called multiple times on read retry: */
+ b->written = 0;
+
+ iter = mempool_alloc(&c->fill_iter, GFP_NOFS);
+ sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2);
+
+ if (bch2_meta_read_fault("btree"))
+ btree_err(-BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, NULL,
+ btree_node_fault_injected,
+ "dynamic fault");
+
+ btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, NULL,
+ btree_node_bad_magic,
+ "bad magic: want %llx, got %llx",
+ bset_magic(c), le64_to_cpu(b->data->magic));
+
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+ struct bch_btree_ptr_v2 *bp =
+ &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+ bch2_bpos_to_text(&buf, b->data->min_key);
+ prt_str(&buf, "-");
+ bch2_bpos_to_text(&buf, b->data->max_key);
+
+ btree_err_on(b->data->keys.seq != bp->seq,
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, NULL,
+ btree_node_bad_seq,
+ "got wrong btree node (want %llx got %llx)\n"
+ "got btree %s level %llu pos %s",
+ bp->seq, b->data->keys.seq,
+ bch2_btree_id_str(BTREE_NODE_ID(b->data)),
+ BTREE_NODE_LEVEL(b->data),
+ buf.buf);
+ } else {
+ btree_err_on(!b->data->keys.seq,
+ -BCH_ERR_btree_node_read_err_must_retry,
+ c, ca, b, NULL,
+ btree_node_bad_seq,
+ "bad btree header: seq 0");
+ }
+
+ while (b->written < (ptr_written ?: btree_sectors(c))) {
+ unsigned sectors;
+ struct nonce nonce;
+ bool first = !b->written;
+ bool csum_bad;
+
+ if (!b->written) {
+ i = &b->data->keys;
+
+ btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i,
+ bset_unknown_csum,
+ "unknown checksum type %llu", BSET_CSUM_TYPE(i));
+
+ nonce = btree_nonce(i, b->written << 9);
+
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+ csum_bad = bch2_crc_cmp(b->data->csum, csum);
+ if (csum_bad)
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+ btree_err_on(csum_bad,
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i,
+ bset_bad_csum,
+ "%s",
+ (printbuf_reset(&buf),
+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
+ buf.buf));
+
+ ret = bset_encrypt(c, i, b->written << 9);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "error decrypting btree node: %i", ret))
+ goto fsck_err;
+
+ btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
+ !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
+ -BCH_ERR_btree_node_read_err_incompatible,
+ c, NULL, b, NULL,
+ btree_node_unsupported_version,
+ "btree node does not have NEW_EXTENT_OVERWRITE set");
+
+ sectors = vstruct_sectors(b->data, c->block_bits);
+ } else {
+ bne = write_block(b);
+ i = &bne->keys;
+
+ if (i->seq != b->data->keys.seq)
+ break;
+
+ btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i,
+ bset_unknown_csum,
+ "unknown checksum type %llu", BSET_CSUM_TYPE(i));
+
+ nonce = btree_nonce(i, b->written << 9);
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ csum_bad = bch2_crc_cmp(bne->csum, csum);
+ if (csum_bad)
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+ btree_err_on(csum_bad,
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i,
+ bset_bad_csum,
+ "%s",
+ (printbuf_reset(&buf),
+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
+ buf.buf));
+
+ ret = bset_encrypt(c, i, b->written << 9);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "error decrypting btree node: %i\n", ret))
+ goto fsck_err;
+
+ sectors = vstruct_sectors(bne, c->block_bits);
+ }
+
+ b->version_ondisk = min(b->version_ondisk,
+ le16_to_cpu(i->version));
+
+ ret = validate_bset(c, ca, b, i, b->written, sectors,
+ READ, have_retry, saw_error);
+ if (ret)
+ goto fsck_err;
+
+ if (!b->written)
+ btree_node_set_format(b, b->data->format);
+
+ ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error);
+ if (ret)
+ goto fsck_err;
+
+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
+
+ blacklisted = bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(i->journal_seq),
+ true);
+
+ btree_err_on(blacklisted && first,
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, ca, b, i,
+ bset_blacklisted_journal_seq,
+ "first btree node bset has blacklisted journal seq (%llu)",
+ le64_to_cpu(i->journal_seq));
+
+ btree_err_on(blacklisted && ptr_written,
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, ca, b, i,
+ first_bset_blacklisted_journal_seq,
+ "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
+ le64_to_cpu(i->journal_seq),
+ b->written, b->written + sectors, ptr_written);
+
+ b->written += sectors;
+
+ if (blacklisted && !first)
+ continue;
+
+ sort_iter_add(iter,
+ vstruct_idx(i, 0),
+ vstruct_last(i));
+ }
+
+ if (ptr_written) {
+ btree_err_on(b->written < ptr_written,
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, NULL,
+ btree_node_data_missing,
+ "btree node data missing: expected %u sectors, found %u",
+ ptr_written, b->written);
+ } else {
+ for (bne = write_block(b);
+ bset_byte_offset(b, bne) < btree_bytes(c);
+ bne = (void *) bne + block_bytes(c))
+ btree_err_on(bne->keys.seq == b->data->keys.seq &&
+ !bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(bne->keys.journal_seq),
+ true),
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, NULL,
+ btree_node_bset_after_end,
+ "found bset signature after last bset");
+ }
+
+ sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
+ sorted->keys.u64s = 0;
+
+ set_btree_bset(b, b->set, &b->data->keys);
+
+ b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter);
+
+ u64s = le16_to_cpu(sorted->keys.u64s);
+ *sorted = *b->data;
+ sorted->keys.u64s = cpu_to_le16(u64s);
+ swap(sorted, b->data);
+ set_btree_bset(b, b->set, &b->data->keys);
+ b->nsets = 1;
+
+ BUG_ON(b->nr.live_u64s != u64s);
+
+ btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
+
+ if (updated_range)
+ bch2_btree_node_drop_keys_outside_node(b);
+
+ i = &b->data->keys;
+ for (k = i->start; k != vstruct_last(i);) {
+ struct bkey tmp;
+ struct bkey_s u = __bkey_disassemble(b, k, &tmp);
+
+ printbuf_reset(&buf);
+
+ if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) ||
+ (bch2_inject_invalid_keys &&
+ !bversion_cmp(u.k->version, MAX_VERSION))) {
+ printbuf_reset(&buf);
+
+ prt_printf(&buf, "invalid bkey: ");
+ bch2_bkey_val_invalid(c, u.s_c, READ, &buf);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, u.s_c);
+
+ btree_err(-BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, i,
+ btree_node_bad_bkey,
+ "%s", buf.buf);
+
+ btree_keys_account_key_drop(&b->nr, 0, k);
+
+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
+ memmove_u64s_down(k, bkey_p_next(k),
+ (u64 *) vstruct_end(i) - (u64 *) k);
+ set_btree_bset_end(b, b->set);
+ continue;
+ }
+
+ if (u.k->type == KEY_TYPE_btree_ptr_v2) {
+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
+
+ bp.v->mem_ptr = 0;
+ }
+
+ k = bkey_p_next(k);
+ }
+
+ bch2_bset_build_aux_tree(b, b->set, false);
+
+ set_needs_whiteout(btree_bset_first(b), true);
+
+ btree_node_reset_sib_u64s(b);
+
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
+ struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev);
+
+ if (ca2->mi.state != BCH_MEMBER_STATE_rw)
+ set_btree_node_need_rewrite(b);
+ }
+
+ if (!ptr_written)
+ set_btree_node_need_rewrite(b);
+out:
+ mempool_free(iter, &c->fill_iter);
+ printbuf_exit(&buf);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
+ return retry_read;
+fsck_err:
+ if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
+ ret == -BCH_ERR_btree_node_read_err_must_retry)
+ retry_read = 1;
+ else
+ set_btree_node_read_error(b);
+ goto out;
+}
+
+static void btree_node_read_work(struct work_struct *work)
+{
+ struct btree_read_bio *rb =
+ container_of(work, struct btree_read_bio, work);
+ struct bch_fs *c = rb->c;
+ struct btree *b = rb->b;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+ struct bio *bio = &rb->bio;
+ struct bch_io_failures failed = { .nr = 0 };
+ struct printbuf buf = PRINTBUF;
+ bool saw_error = false;
+ bool retry = false;
+ bool can_retry;
+
+ goto start;
+ while (1) {
+ retry = true;
+ bch_info(c, "retrying read");
+ ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+ rb->have_ioref = bch2_dev_get_ioref(ca, READ);
+ bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
+ bio->bi_iter.bi_sector = rb->pick.ptr.offset;
+ bio->bi_iter.bi_size = btree_bytes(c);
+
+ if (rb->have_ioref) {
+ bio_set_dev(bio, ca->disk_sb.bdev);
+ submit_bio_wait(bio);
+ } else {
+ bio->bi_status = BLK_STS_REMOVED;
+ }
+start:
+ printbuf_reset(&buf);
+ bch2_btree_pos_to_text(&buf, c, b);
+ bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+ "btree read error %s for %s",
+ bch2_blk_status_to_str(bio->bi_status), buf.buf);
+ if (rb->have_ioref)
+ percpu_ref_put(&ca->io_ref);
+ rb->have_ioref = false;
+
+ bch2_mark_io_failure(&failed, &rb->pick);
+
+ can_retry = bch2_bkey_pick_read_device(c,
+ bkey_i_to_s_c(&b->key),
+ &failed, &rb->pick) > 0;
+
+ if (!bio->bi_status &&
+ !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) {
+ if (retry)
+ bch_info(c, "retry success");
+ break;
+ }
+
+ saw_error = true;
+
+ if (!can_retry) {
+ set_btree_node_read_error(b);
+ break;
+ }
+ }
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+ rb->start_time);
+ bio_put(&rb->bio);
+
+ if (saw_error && !btree_node_read_error(b)) {
+ printbuf_reset(&buf);
+ bch2_bpos_to_text(&buf, b->key.k.p);
+ bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
+ __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
+
+ bch2_btree_node_rewrite_async(c, b);
+ }
+
+ printbuf_exit(&buf);
+ clear_btree_node_read_in_flight(b);
+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
+}
+
+static void btree_node_read_endio(struct bio *bio)
+{
+ struct btree_read_bio *rb =
+ container_of(bio, struct btree_read_bio, bio);
+ struct bch_fs *c = rb->c;
+
+ if (rb->have_ioref) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+
+ bch2_latency_acct(ca, rb->start_time, READ);
+ }
+
+ queue_work(c->io_complete_wq, &rb->work);
+}
+
+struct btree_node_read_all {
+ struct closure cl;
+ struct bch_fs *c;
+ struct btree *b;
+ unsigned nr;
+ void *buf[BCH_REPLICAS_MAX];
+ struct bio *bio[BCH_REPLICAS_MAX];
+ blk_status_t err[BCH_REPLICAS_MAX];
+};
+
+static unsigned btree_node_sectors_written(struct bch_fs *c, void *data)
+{
+ struct btree_node *bn = data;
+ struct btree_node_entry *bne;
+ unsigned offset = 0;
+
+ if (le64_to_cpu(bn->magic) != bset_magic(c))
+ return 0;
+
+ while (offset < btree_sectors(c)) {
+ if (!offset) {
+ offset += vstruct_sectors(bn, c->block_bits);
+ } else {
+ bne = data + (offset << 9);
+ if (bne->keys.seq != bn->keys.seq)
+ break;
+ offset += vstruct_sectors(bne, c->block_bits);
+ }
+ }
+
+ return offset;
+}
+
+static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data)
+{
+ struct btree_node *bn = data;
+ struct btree_node_entry *bne;
+
+ if (!offset)
+ return false;
+
+ while (offset < btree_sectors(c)) {
+ bne = data + (offset << 9);
+ if (bne->keys.seq == bn->keys.seq)
+ return true;
+ offset++;
+ }
+
+ return false;
+ return offset;
+}
+
+static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
+{
+ closure_type(ra, struct btree_node_read_all, cl);
+ struct bch_fs *c = ra->c;
+ struct btree *b = ra->b;
+ struct printbuf buf = PRINTBUF;
+ bool dump_bset_maps = false;
+ bool have_retry = false;
+ int ret = 0, best = -1, write = READ;
+ unsigned i, written = 0, written2 = 0;
+ __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2
+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0;
+ bool _saw_error = false, *saw_error = &_saw_error;
+
+ for (i = 0; i < ra->nr; i++) {
+ struct btree_node *bn = ra->buf[i];
+
+ if (ra->err[i])
+ continue;
+
+ if (le64_to_cpu(bn->magic) != bset_magic(c) ||
+ (seq && seq != bn->keys.seq))
+ continue;
+
+ if (best < 0) {
+ best = i;
+ written = btree_node_sectors_written(c, bn);
+ continue;
+ }
+
+ written2 = btree_node_sectors_written(c, ra->buf[i]);
+ if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, NULL,
+ btree_node_replicas_sectors_written_mismatch,
+ "btree node sectors written mismatch: %u != %u",
+ written, written2) ||
+ btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, NULL,
+ btree_node_bset_after_end,
+ "found bset signature after last bset") ||
+ btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9),
+ -BCH_ERR_btree_node_read_err_fixable,
+ c, NULL, b, NULL,
+ btree_node_replicas_data_mismatch,
+ "btree node replicas content mismatch"))
+ dump_bset_maps = true;
+
+ if (written2 > written) {
+ written = written2;
+ best = i;
+ }
+ }
+fsck_err:
+ if (dump_bset_maps) {
+ for (i = 0; i < ra->nr; i++) {
+ struct btree_node *bn = ra->buf[i];
+ struct btree_node_entry *bne = NULL;
+ unsigned offset = 0, sectors;
+ bool gap = false;
+
+ if (ra->err[i])
+ continue;
+
+ printbuf_reset(&buf);
+
+ while (offset < btree_sectors(c)) {
+ if (!offset) {
+ sectors = vstruct_sectors(bn, c->block_bits);
+ } else {
+ bne = ra->buf[i] + (offset << 9);
+ if (bne->keys.seq != bn->keys.seq)
+ break;
+ sectors = vstruct_sectors(bne, c->block_bits);
+ }
+
+ prt_printf(&buf, " %u-%u", offset, offset + sectors);
+ if (bne && bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(bne->keys.journal_seq), false))
+ prt_printf(&buf, "*");
+ offset += sectors;
+ }
+
+ while (offset < btree_sectors(c)) {
+ bne = ra->buf[i] + (offset << 9);
+ if (bne->keys.seq == bn->keys.seq) {
+ if (!gap)
+ prt_printf(&buf, " GAP");
+ gap = true;
+
+ sectors = vstruct_sectors(bne, c->block_bits);
+ prt_printf(&buf, " %u-%u", offset, offset + sectors);
+ if (bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(bne->keys.journal_seq), false))
+ prt_printf(&buf, "*");
+ }
+ offset++;
+ }
+
+ bch_err(c, "replica %u:%s", i, buf.buf);
+ }
+ }
+
+ if (best >= 0) {
+ memcpy(b->data, ra->buf[best], btree_bytes(c));
+ ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
+ } else {
+ ret = -1;
+ }
+
+ if (ret)
+ set_btree_node_read_error(b);
+ else if (*saw_error)
+ bch2_btree_node_rewrite_async(c, b);
+
+ for (i = 0; i < ra->nr; i++) {
+ mempool_free(ra->buf[i], &c->btree_bounce_pool);
+ bio_put(ra->bio[i]);
+ }
+
+ closure_debug_destroy(&ra->cl);
+ kfree(ra);
+ printbuf_exit(&buf);
+
+ clear_btree_node_read_in_flight(b);
+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
+}
+
+static void btree_node_read_all_replicas_endio(struct bio *bio)
+{
+ struct btree_read_bio *rb =
+ container_of(bio, struct btree_read_bio, bio);
+ struct bch_fs *c = rb->c;
+ struct btree_node_read_all *ra = rb->ra;
+
+ if (rb->have_ioref) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
+
+ bch2_latency_acct(ca, rb->start_time, READ);
+ }
+
+ ra->err[rb->idx] = bio->bi_status;
+ closure_put(&ra->cl);
+}
+
+/*
+ * XXX This allocates multiple times from the same mempools, and can deadlock
+ * under sufficient memory pressure (but is only a debug path)
+ */
+static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync)
+{
+ struct bkey_s_c k = bkey_i_to_s_c(&b->key);
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded pick;
+ struct btree_node_read_all *ra;
+ unsigned i;
+
+ ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -BCH_ERR_ENOMEM_btree_node_read_all_replicas;
+
+ closure_init(&ra->cl, NULL);
+ ra->c = c;
+ ra->b = b;
+ ra->nr = bch2_bkey_nr_ptrs(k);
+
+ for (i = 0; i < ra->nr; i++) {
+ ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
+ ra->bio[i] = bio_alloc_bioset(NULL,
+ buf_pages(ra->buf[i], btree_bytes(c)),
+ REQ_OP_READ|REQ_SYNC|REQ_META,
+ GFP_NOFS,
+ &c->btree_bio);
+ }
+
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ struct btree_read_bio *rb =
+ container_of(ra->bio[i], struct btree_read_bio, bio);
+ rb->c = c;
+ rb->b = b;
+ rb->ra = ra;
+ rb->start_time = local_clock();
+ rb->have_ioref = bch2_dev_get_ioref(ca, READ);
+ rb->idx = i;
+ rb->pick = pick;
+ rb->bio.bi_iter.bi_sector = pick.ptr.offset;
+ rb->bio.bi_end_io = btree_node_read_all_replicas_endio;
+ bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
+
+ if (rb->have_ioref) {
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
+ bio_sectors(&rb->bio));
+ bio_set_dev(&rb->bio, ca->disk_sb.bdev);
+
+ closure_get(&ra->cl);
+ submit_bio(&rb->bio);
+ } else {
+ ra->err[i] = BLK_STS_REMOVED;
+ }
+
+ i++;
+ }
+
+ if (sync) {
+ closure_sync(&ra->cl);
+ btree_node_read_all_replicas_done(&ra->cl.work);
+ } else {
+ continue_at(&ra->cl, btree_node_read_all_replicas_done,
+ c->io_complete_wq);
+ }
+
+ return 0;
+}
+
+void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
+ bool sync)
+{
+ struct bch_fs *c = trans->c;
+ struct extent_ptr_decoded pick;
+ struct btree_read_bio *rb;
+ struct bch_dev *ca;
+ struct bio *bio;
+ int ret;
+
+ trace_and_count(c, btree_node_read, trans, b);
+
+ if (bch2_verify_all_btree_replicas &&
+ !btree_node_read_all_replicas(c, b, sync))
+ return;
+
+ ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key),
+ NULL, &pick);
+
+ if (ret <= 0) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "btree node read error: no device to read from\n at ");
+ bch2_btree_pos_to_text(&buf, c, b);
+ bch_err(c, "%s", buf.buf);
+
+ if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
+ bch2_fatal_error(c);
+
+ set_btree_node_read_error(b);
+ clear_btree_node_read_in_flight(b);
+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
+ printbuf_exit(&buf);
+ return;
+ }
+
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+ bio = bio_alloc_bioset(NULL,
+ buf_pages(b->data, btree_bytes(c)),
+ REQ_OP_READ|REQ_SYNC|REQ_META,
+ GFP_NOFS,
+ &c->btree_bio);
+ rb = container_of(bio, struct btree_read_bio, bio);
+ rb->c = c;
+ rb->b = b;
+ rb->ra = NULL;
+ rb->start_time = local_clock();
+ rb->have_ioref = bch2_dev_get_ioref(ca, READ);
+ rb->pick = pick;
+ INIT_WORK(&rb->work, btree_node_read_work);
+ bio->bi_iter.bi_sector = pick.ptr.offset;
+ bio->bi_end_io = btree_node_read_endio;
+ bch2_bio_map(bio, b->data, btree_bytes(c));
+
+ if (rb->have_ioref) {
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
+ bio_sectors(bio));
+ bio_set_dev(bio, ca->disk_sb.bdev);
+
+ if (sync) {
+ submit_bio_wait(bio);
+ bch2_latency_acct(ca, rb->start_time, READ);
+ btree_node_read_work(&rb->work);
+ } else {
+ submit_bio(bio);
+ }
+ } else {
+ bio->bi_status = BLK_STS_REMOVED;
+
+ if (sync)
+ btree_node_read_work(&rb->work);
+ else
+ queue_work(c->io_complete_wq, &rb->work);
+ }
+}
+
+static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
+ const struct bkey_i *k, unsigned level)
+{
+ struct bch_fs *c = trans->c;
+ struct closure cl;
+ struct btree *b;
+ int ret;
+
+ closure_init_stack(&cl);
+
+ do {
+ ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
+ closure_sync(&cl);
+ } while (ret);
+
+ b = bch2_btree_node_mem_alloc(trans, level != 0);
+ bch2_btree_cache_cannibalize_unlock(trans);
+
+ BUG_ON(IS_ERR(b));
+
+ bkey_copy(&b->key, k);
+ BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id));
+
+ set_btree_node_read_in_flight(b);
+
+ bch2_btree_node_read(trans, b, true);
+
+ if (btree_node_read_error(b)) {
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+
+ ret = -EIO;
+ goto err;
+ }
+
+ bch2_btree_set_root_for_read(c, b);
+err:
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+
+ return ret;
+}
+
+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
+ const struct bkey_i *k, unsigned level)
+{
+ return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
+}
+
+static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
+ struct btree_write *w)
+{
+ unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
+
+ do {
+ old = new = v;
+ if (!(old & 1))
+ break;
+
+ new &= ~1UL;
+ } while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old);
+
+ if (old & 1)
+ closure_put(&((struct btree_update *) new)->cl);
+
+ bch2_journal_pin_drop(&c->journal, &w->journal);
+}
+
+static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
+{
+ struct btree_write *w = btree_prev_write(b);
+ unsigned long old, new, v;
+ unsigned type = 0;
+
+ bch2_btree_complete_write(c, b, w);
+
+ v = READ_ONCE(b->flags);
+ do {
+ old = new = v;
+
+ if ((old & (1U << BTREE_NODE_dirty)) &&
+ (old & (1U << BTREE_NODE_need_write)) &&
+ !(old & (1U << BTREE_NODE_never_write)) &&
+ !(old & (1U << BTREE_NODE_write_blocked)) &&
+ !(old & (1U << BTREE_NODE_will_make_reachable))) {
+ new &= ~(1U << BTREE_NODE_dirty);
+ new &= ~(1U << BTREE_NODE_need_write);
+ new |= (1U << BTREE_NODE_write_in_flight);
+ new |= (1U << BTREE_NODE_write_in_flight_inner);
+ new |= (1U << BTREE_NODE_just_written);
+ new ^= (1U << BTREE_NODE_write_idx);
+
+ type = new & BTREE_WRITE_TYPE_MASK;
+ new &= ~BTREE_WRITE_TYPE_MASK;
+ } else {
+ new &= ~(1U << BTREE_NODE_write_in_flight);
+ new &= ~(1U << BTREE_NODE_write_in_flight_inner);
+ }
+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
+
+ if (new & (1U << BTREE_NODE_write_in_flight))
+ __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type);
+ else
+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
+}
+
+static void btree_node_write_done(struct bch_fs *c, struct btree *b)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ __btree_node_write_done(c, b);
+ six_unlock_read(&b->c.lock);
+
+ bch2_trans_put(trans);
+}
+
+static void btree_node_write_work(struct work_struct *work)
+{
+ struct btree_write_bio *wbio =
+ container_of(work, struct btree_write_bio, work);
+ struct bch_fs *c = wbio->wbio.c;
+ struct btree *b = wbio->wbio.bio.bi_private;
+ struct bch_extent_ptr *ptr;
+ int ret = 0;
+
+ btree_bounce_free(c,
+ wbio->data_bytes,
+ wbio->wbio.used_mempool,
+ wbio->data);
+
+ bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr,
+ bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
+
+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
+ ret = -BCH_ERR_btree_write_all_failed;
+ goto err;
+ }
+
+ if (wbio->wbio.first_btree_write) {
+ if (wbio->wbio.failed.nr) {
+
+ }
+ } else {
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_no_check_rw,
+ !wbio->wbio.failed.nr));
+ if (ret)
+ goto err;
+ }
+out:
+ bio_put(&wbio->wbio.bio);
+ btree_node_write_done(c, b);
+ return;
+err:
+ set_btree_node_noevict(b);
+ if (!bch2_err_matches(ret, EROFS))
+ bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
+ goto out;
+}
+
+static void btree_node_write_endio(struct bio *bio)
+{
+ struct bch_write_bio *wbio = to_wbio(bio);
+ struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
+ struct bch_write_bio *orig = parent ?: wbio;
+ struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio);
+ struct bch_fs *c = wbio->c;
+ struct btree *b = wbio->bio.bi_private;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
+ unsigned long flags;
+
+ if (wbio->have_ioref)
+ bch2_latency_acct(ca, wbio->submit_time, WRITE);
+
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+ "btree write error: %s",
+ bch2_blk_status_to_str(bio->bi_status)) ||
+ bch2_meta_write_fault("btree")) {
+ spin_lock_irqsave(&c->btree_write_error_lock, flags);
+ bch2_dev_list_add_dev(&orig->failed, wbio->dev);
+ spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
+ }
+
+ if (wbio->have_ioref)
+ percpu_ref_put(&ca->io_ref);
+
+ if (parent) {
+ bio_put(bio);
+ bio_endio(&parent->bio);
+ return;
+ }
+
+ clear_btree_node_write_in_flight_inner(b);
+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
+ INIT_WORK(&wb->work, btree_node_write_work);
+ queue_work(c->btree_io_complete_wq, &wb->work);
+}
+
+static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
+ struct bset *i, unsigned sectors)
+{
+ struct printbuf buf = PRINTBUF;
+ bool saw_error;
+ int ret;
+
+ ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key),
+ BKEY_TYPE_btree, WRITE, &buf);
+
+ if (ret)
+ bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf);
+ printbuf_exit(&buf);
+ if (ret)
+ return ret;
+
+ ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?:
+ validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error);
+ if (ret) {
+ bch2_inconsistent_error(c);
+ dump_stack();
+ }
+
+ return ret;
+}
+
+static void btree_write_submit(struct work_struct *work)
+{
+ struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work);
+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+
+ bkey_copy(&tmp.k, &wbio->key);
+
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&tmp.k)), ptr)
+ ptr->offset += wbio->sector_offset;
+
+ bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree,
+ &tmp.k, false);
+}
+
+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
+{
+ struct btree_write_bio *wbio;
+ struct bset_tree *t;
+ struct bset *i;
+ struct btree_node *bn = NULL;
+ struct btree_node_entry *bne = NULL;
+ struct sort_iter_stack sort_iter;
+ struct nonce nonce;
+ unsigned bytes_to_write, sectors_to_write, bytes, u64s;
+ u64 seq = 0;
+ bool used_mempool;
+ unsigned long old, new;
+ bool validate_before_checksum = false;
+ enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK;
+ void *data;
+ int ret;
+
+ if (flags & BTREE_WRITE_ALREADY_STARTED)
+ goto do_write;
+
+ /*
+ * We may only have a read lock on the btree node - the dirty bit is our
+ * "lock" against racing with other threads that may be trying to start
+ * a write, we do a write iff we clear the dirty bit. Since setting the
+ * dirty bit requires a write lock, we can't race with other threads
+ * redirtying it:
+ */
+ do {
+ old = new = READ_ONCE(b->flags);
+
+ if (!(old & (1 << BTREE_NODE_dirty)))
+ return;
+
+ if ((flags & BTREE_WRITE_ONLY_IF_NEED) &&
+ !(old & (1 << BTREE_NODE_need_write)))
+ return;
+
+ if (old &
+ ((1 << BTREE_NODE_never_write)|
+ (1 << BTREE_NODE_write_blocked)))
+ return;
+
+ if (b->written &&
+ (old & (1 << BTREE_NODE_will_make_reachable)))
+ return;
+
+ if (old & (1 << BTREE_NODE_write_in_flight))
+ return;
+
+ if (flags & BTREE_WRITE_ONLY_IF_NEED)
+ type = new & BTREE_WRITE_TYPE_MASK;
+ new &= ~BTREE_WRITE_TYPE_MASK;
+
+ new &= ~(1 << BTREE_NODE_dirty);
+ new &= ~(1 << BTREE_NODE_need_write);
+ new |= (1 << BTREE_NODE_write_in_flight);
+ new |= (1 << BTREE_NODE_write_in_flight_inner);
+ new |= (1 << BTREE_NODE_just_written);
+ new ^= (1 << BTREE_NODE_write_idx);
+ } while (cmpxchg_acquire(&b->flags, old, new) != old);
+
+ if (new & (1U << BTREE_NODE_need_write))
+ return;
+do_write:
+ BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0));
+
+ atomic_dec(&c->btree_cache.dirty);
+
+ BUG_ON(btree_node_fake(b));
+ BUG_ON((b->will_make_reachable != 0) != !b->written);
+
+ BUG_ON(b->written >= btree_sectors(c));
+ BUG_ON(b->written & (block_sectors(c) - 1));
+ BUG_ON(bset_written(b, btree_bset_last(b)));
+ BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
+ BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
+
+ bch2_sort_whiteouts(c, b);
+
+ sort_iter_stack_init(&sort_iter, b);
+
+ bytes = !b->written
+ ? sizeof(struct btree_node)
+ : sizeof(struct btree_node_entry);
+
+ bytes += b->whiteout_u64s * sizeof(u64);
+
+ for_each_bset(b, t) {
+ i = bset(b, t);
+
+ if (bset_written(b, i))
+ continue;
+
+ bytes += le16_to_cpu(i->u64s) * sizeof(u64);
+ sort_iter_add(&sort_iter.iter,
+ btree_bkey_first(b, t),
+ btree_bkey_last(b, t));
+ seq = max(seq, le64_to_cpu(i->journal_seq));
+ }
+
+ BUG_ON(b->written && !seq);
+
+ /* bch2_varint_decode may read up to 7 bytes past the end of the buffer: */
+ bytes += 8;
+
+ /* buffer must be a multiple of the block size */
+ bytes = round_up(bytes, block_bytes(c));
+
+ data = btree_bounce_alloc(c, bytes, &used_mempool);
+
+ if (!b->written) {
+ bn = data;
+ *bn = *b->data;
+ i = &bn->keys;
+ } else {
+ bne = data;
+ bne->keys = b->data->keys;
+ i = &bne->keys;
+ }
+
+ i->journal_seq = cpu_to_le64(seq);
+ i->u64s = 0;
+
+ sort_iter_add(&sort_iter.iter,
+ unwritten_whiteouts_start(c, b),
+ unwritten_whiteouts_end(c, b));
+ SET_BSET_SEPARATE_WHITEOUTS(i, false);
+
+ b->whiteout_u64s = 0;
+
+ u64s = bch2_sort_keys(i->start, &sort_iter.iter, false);
+ le16_add_cpu(&i->u64s, u64s);
+
+ BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
+
+ set_needs_whiteout(i, false);
+
+ /* do we have data to write? */
+ if (b->written && !i->u64s)
+ goto nowrite;
+
+ bytes_to_write = vstruct_end(i) - data;
+ sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
+
+ if (!b->written &&
+ b->key.k.type == KEY_TYPE_btree_ptr_v2)
+ BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write);
+
+ memset(data + bytes_to_write, 0,
+ (sectors_to_write << 9) - bytes_to_write);
+
+ BUG_ON(b->written + sectors_to_write > btree_sectors(c));
+ BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
+ BUG_ON(i->seq != b->data->keys.seq);
+
+ i->version = cpu_to_le16(c->sb.version);
+ SET_BSET_OFFSET(i, b->written);
+ SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c));
+
+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)))
+ validate_before_checksum = true;
+
+ /* validate_bset will be modifying: */
+ if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
+ validate_before_checksum = true;
+
+ /* if we're going to be encrypting, check metadata validity first: */
+ if (validate_before_checksum &&
+ validate_bset_for_write(c, b, i, sectors_to_write))
+ goto err;
+
+ ret = bset_encrypt(c, i, b->written << 9);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "error encrypting btree node: %i\n", ret))
+ goto err;
+
+ nonce = btree_nonce(i, b->written << 9);
+
+ if (bn)
+ bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
+ else
+ bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+ /* if we're not encrypting, check metadata after checksumming: */
+ if (!validate_before_checksum &&
+ validate_bset_for_write(c, b, i, sectors_to_write))
+ goto err;
+
+ /*
+ * We handle btree write errors by immediately halting the journal -
+ * after we've done that, we can't issue any subsequent btree writes
+ * because they might have pointers to new nodes that failed to write.
+ *
+ * Furthermore, there's no point in doing any more btree writes because
+ * with the journal stopped, we're never going to update the journal to
+ * reflect that those writes were done and the data flushed from the
+ * journal:
+ *
+ * Also on journal error, the pending write may have updates that were
+ * never journalled (interior nodes, see btree_update_nodes_written()) -
+ * it's critical that we don't do the write in that case otherwise we
+ * will have updates visible that weren't in the journal:
+ *
+ * Make sure to update b->written so bch2_btree_init_next() doesn't
+ * break:
+ */
+ if (bch2_journal_error(&c->journal) ||
+ c->opts.nochanges)
+ goto err;
+
+ trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write);
+
+ wbio = container_of(bio_alloc_bioset(NULL,
+ buf_pages(data, sectors_to_write << 9),
+ REQ_OP_WRITE|REQ_META,
+ GFP_NOFS,
+ &c->btree_bio),
+ struct btree_write_bio, wbio.bio);
+ wbio_init(&wbio->wbio.bio);
+ wbio->data = data;
+ wbio->data_bytes = bytes;
+ wbio->sector_offset = b->written;
+ wbio->wbio.c = c;
+ wbio->wbio.used_mempool = used_mempool;
+ wbio->wbio.first_btree_write = !b->written;
+ wbio->wbio.bio.bi_end_io = btree_node_write_endio;
+ wbio->wbio.bio.bi_private = b;
+
+ bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
+
+ bkey_copy(&wbio->key, &b->key);
+
+ b->written += sectors_to_write;
+
+ if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
+ cpu_to_le16(b->written);
+
+ atomic64_inc(&c->btree_write_stats[type].nr);
+ atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
+
+ INIT_WORK(&wbio->work, btree_write_submit);
+ queue_work(c->io_complete_wq, &wbio->work);
+ return;
+err:
+ set_btree_node_noevict(b);
+ b->written += sectors_to_write;
+nowrite:
+ btree_bounce_free(c, bytes, used_mempool, data);
+ __btree_node_write_done(c, b);
+}
+
+/*
+ * Work that must be done with write lock held:
+ */
+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
+{
+ bool invalidated_iter = false;
+ struct btree_node_entry *bne;
+ struct bset_tree *t;
+
+ if (!btree_node_just_written(b))
+ return false;
+
+ BUG_ON(b->whiteout_u64s);
+
+ clear_btree_node_just_written(b);
+
+ /*
+ * Note: immediately after write, bset_written() doesn't work - the
+ * amount of data we had to write after compaction might have been
+ * smaller than the offset of the last bset.
+ *
+ * However, we know that all bsets have been written here, as long as
+ * we're still holding the write lock:
+ */
+
+ /*
+ * XXX: decide if we really want to unconditionally sort down to a
+ * single bset:
+ */
+ if (b->nsets > 1) {
+ btree_node_sort(c, b, 0, b->nsets, true);
+ invalidated_iter = true;
+ } else {
+ invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
+ }
+
+ for_each_bset(b, t)
+ set_needs_whiteout(bset(b, t), true);
+
+ bch2_btree_verify(c, b);
+
+ /*
+ * If later we don't unconditionally sort down to a single bset, we have
+ * to ensure this is still true:
+ */
+ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
+
+ bne = want_new_bset(c, b);
+ if (bne)
+ bch2_bset_init_next(c, b, bne);
+
+ bch2_btree_build_aux_trees(b);
+
+ return invalidated_iter;
+}
+
+/*
+ * Use this one if the node is intent locked:
+ */
+void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
+ enum six_lock_type lock_type_held,
+ unsigned flags)
+{
+ if (lock_type_held == SIX_LOCK_intent ||
+ (lock_type_held == SIX_LOCK_read &&
+ six_lock_tryupgrade(&b->c.lock))) {
+ __bch2_btree_node_write(c, b, flags);
+
+ /* don't cycle lock unnecessarily: */
+ if (btree_node_just_written(b) &&
+ six_trylock_write(&b->c.lock)) {
+ bch2_btree_post_write_cleanup(c, b);
+ six_unlock_write(&b->c.lock);
+ }
+
+ if (lock_type_held == SIX_LOCK_read)
+ six_lock_downgrade(&b->c.lock);
+ } else {
+ __bch2_btree_node_write(c, b, flags);
+ if (lock_type_held == SIX_LOCK_write &&
+ btree_node_just_written(b))
+ bch2_btree_post_write_cleanup(c, b);
+ }
+}
+
+static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
+{
+ struct bucket_table *tbl;
+ struct rhash_head *pos;
+ struct btree *b;
+ unsigned i;
+ bool ret = false;
+restart:
+ rcu_read_lock();
+ for_each_cached_btree(b, c, tbl, i, pos)
+ if (test_bit(flag, &b->flags)) {
+ rcu_read_unlock();
+ wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
+ ret = true;
+ goto restart;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+bool bch2_btree_flush_all_reads(struct bch_fs *c)
+{
+ return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
+}
+
+bool bch2_btree_flush_all_writes(struct bch_fs *c)
+{
+ return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
+}
+
+static const char * const bch2_btree_write_types[] = {
+#define x(t, n) [n] = #t,
+ BCH_BTREE_WRITE_TYPES()
+ NULL
+};
+
+void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 10);
+
+ prt_tab(out);
+ prt_str(out, "nr");
+ prt_tab(out);
+ prt_str(out, "size");
+ prt_newline(out);
+
+ for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) {
+ u64 nr = atomic64_read(&c->btree_write_stats[i].nr);
+ u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes);
+
+ prt_printf(out, "%s:", bch2_btree_write_types[i]);
+ prt_tab(out);
+ prt_u64(out, nr);
+ prt_tab(out);
+ prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0);
+ prt_newline(out);
+ }
+}
diff --git a/c_src/libbcachefs/btree_io.h b/c_src/libbcachefs/btree_io.h
new file mode 100644
index 00000000..e251cb6b
--- /dev/null
+++ b/c_src/libbcachefs/btree_io.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_IO_H
+#define _BCACHEFS_BTREE_IO_H
+
+#include "bkey_methods.h"
+#include "bset.h"
+#include "btree_locking.h"
+#include "checksum.h"
+#include "extents.h"
+#include "io_write_types.h"
+
+struct bch_fs;
+struct btree_write;
+struct btree;
+struct btree_iter;
+struct btree_node_read_all;
+
+static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
+{
+ if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
+ atomic_inc(&c->btree_cache.dirty);
+}
+
+static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b)
+{
+ if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
+ atomic_dec(&c->btree_cache.dirty);
+}
+
+static inline unsigned btree_ptr_sectors_written(struct bkey_i *k)
+{
+ return k->k.type == KEY_TYPE_btree_ptr_v2
+ ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written)
+ : 0;
+}
+
+struct btree_read_bio {
+ struct bch_fs *c;
+ struct btree *b;
+ struct btree_node_read_all *ra;
+ u64 start_time;
+ unsigned have_ioref:1;
+ unsigned idx:7;
+ struct extent_ptr_decoded pick;
+ struct work_struct work;
+ struct bio bio;
+};
+
+struct btree_write_bio {
+ struct work_struct work;
+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
+ void *data;
+ unsigned data_bytes;
+ unsigned sector_offset;
+ struct bch_write_bio wbio;
+};
+
+void bch2_btree_node_io_unlock(struct btree *);
+void bch2_btree_node_io_lock(struct btree *);
+void __bch2_btree_node_wait_on_read(struct btree *);
+void __bch2_btree_node_wait_on_write(struct btree *);
+void bch2_btree_node_wait_on_read(struct btree *);
+void bch2_btree_node_wait_on_write(struct btree *);
+
+enum compact_mode {
+ COMPACT_LAZY,
+ COMPACT_ALL,
+};
+
+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *,
+ enum compact_mode);
+
+static inline bool should_compact_bset_lazy(struct btree *b,
+ struct bset_tree *t)
+{
+ unsigned total_u64s = bset_u64s(t);
+ unsigned dead_u64s = bset_dead_u64s(b, t);
+
+ return dead_u64s > 64 && dead_u64s * 3 > total_u64s;
+}
+
+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b)
+{
+ struct bset_tree *t;
+
+ for_each_bset(b, t)
+ if (should_compact_bset_lazy(b, t))
+ return bch2_compact_whiteouts(c, b, COMPACT_LAZY);
+
+ return false;
+}
+
+static inline struct nonce btree_nonce(struct bset *i, unsigned offset)
+{
+ return (struct nonce) {{
+ [0] = cpu_to_le32(offset),
+ [1] = ((__le32 *) &i->seq)[0],
+ [2] = ((__le32 *) &i->seq)[1],
+ [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
+ }};
+}
+
+static inline int bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset)
+{
+ struct nonce nonce = btree_nonce(i, offset);
+ int ret;
+
+ if (!offset) {
+ struct btree_node *bn = container_of(i, struct btree_node, keys);
+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+ ret = bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
+ &bn->flags, bytes);
+ if (ret)
+ return ret;
+
+ nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE));
+ }
+
+ return bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
+ vstruct_end(i) - (void *) i->_data);
+}
+
+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *);
+
+void bch2_btree_node_drop_keys_outside_node(struct btree *);
+
+void bch2_btree_build_aux_trees(struct btree *);
+void bch2_btree_init_next(struct btree_trans *, struct btree *);
+
+int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *,
+ struct btree *, bool, bool *);
+void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
+int bch2_btree_root_read(struct bch_fs *, enum btree_id,
+ const struct bkey_i *, unsigned);
+
+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
+
+enum btree_write_flags {
+ __BTREE_WRITE_ONLY_IF_NEED = BTREE_WRITE_TYPE_BITS,
+ __BTREE_WRITE_ALREADY_STARTED,
+};
+#define BTREE_WRITE_ONLY_IF_NEED BIT(__BTREE_WRITE_ONLY_IF_NEED)
+#define BTREE_WRITE_ALREADY_STARTED BIT(__BTREE_WRITE_ALREADY_STARTED)
+
+void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned);
+void bch2_btree_node_write(struct bch_fs *, struct btree *,
+ enum six_lock_type, unsigned);
+
+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
+ enum six_lock_type lock_held)
+{
+ bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
+}
+
+bool bch2_btree_flush_all_reads(struct bch_fs *);
+bool bch2_btree_flush_all_writes(struct bch_fs *);
+
+static inline void compat_bformat(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write, struct bkey_format *f)
+{
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id == BTREE_ID_inodes) {
+ swap(f->bits_per_field[BKEY_FIELD_INODE],
+ f->bits_per_field[BKEY_FIELD_OFFSET]);
+ swap(f->field_offset[BKEY_FIELD_INODE],
+ f->field_offset[BKEY_FIELD_OFFSET]);
+ }
+
+ if (version < bcachefs_metadata_version_snapshot &&
+ (level || btree_type_has_snapshots(btree_id))) {
+ u64 max_packed =
+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
+
+ f->field_offset[BKEY_FIELD_SNAPSHOT] = write
+ ? 0
+ : cpu_to_le64(U32_MAX - max_packed);
+ }
+}
+
+static inline void compat_bpos(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write, struct bpos *p)
+{
+ if (big_endian != CPU_BIG_ENDIAN)
+ bch2_bpos_swab(p);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id == BTREE_ID_inodes)
+ swap(p->inode, p->offset);
+}
+
+static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
+ unsigned version, unsigned big_endian,
+ int write,
+ struct btree_node *bn)
+{
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id_is_extents(btree_id) &&
+ !bpos_eq(bn->min_key, POS_MIN) &&
+ write)
+ bn->min_key = bpos_nosnap_predecessor(bn->min_key);
+
+ if (version < bcachefs_metadata_version_snapshot &&
+ write)
+ bn->max_key.snapshot = 0;
+
+ compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
+ compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
+
+ if (version < bcachefs_metadata_version_snapshot &&
+ !write)
+ bn->max_key.snapshot = U32_MAX;
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id_is_extents(btree_id) &&
+ !bpos_eq(bn->min_key, POS_MIN) &&
+ !write)
+ bn->min_key = bpos_nosnap_successor(bn->min_key);
+}
+
+void bch2_btree_write_stats_to_text(struct printbuf *, struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_IO_H */
diff --git a/c_src/libbcachefs/btree_iter.c b/c_src/libbcachefs/btree_iter.c
new file mode 100644
index 00000000..fa298289
--- /dev/null
+++ b/c_src/libbcachefs/btree_iter.c
@@ -0,0 +1,3268 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "bkey_buf.h"
+#include "btree_cache.h"
+#include "btree_iter.h"
+#include "btree_journal_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "debug.h"
+#include "error.h"
+#include "extents.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "replicas.h"
+#include "snapshot.h"
+#include "trace.h"
+
+#include <linux/random.h>
+#include <linux/prefetch.h>
+
+static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
+static inline void btree_path_list_add(struct btree_trans *,
+ btree_path_idx_t, btree_path_idx_t);
+
+static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter)
+{
+#ifdef TRACK_PATH_ALLOCATED
+ return iter->ip_allocated;
+#else
+ return 0;
+#endif
+}
+
+static btree_path_idx_t btree_path_alloc(struct btree_trans *, btree_path_idx_t);
+static void bch2_trans_srcu_lock(struct btree_trans *);
+
+static inline int __btree_path_cmp(const struct btree_path *l,
+ enum btree_id r_btree_id,
+ bool r_cached,
+ struct bpos r_pos,
+ unsigned r_level)
+{
+ /*
+ * Must match lock ordering as defined by __bch2_btree_node_lock:
+ */
+ return cmp_int(l->btree_id, r_btree_id) ?:
+ cmp_int((int) l->cached, (int) r_cached) ?:
+ bpos_cmp(l->pos, r_pos) ?:
+ -cmp_int(l->level, r_level);
+}
+
+static inline int btree_path_cmp(const struct btree_path *l,
+ const struct btree_path *r)
+{
+ return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
+}
+
+static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
+{
+ /* Are we iterating over keys in all snapshots? */
+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
+ p = bpos_successor(p);
+ } else {
+ p = bpos_nosnap_successor(p);
+ p.snapshot = iter->snapshot;
+ }
+
+ return p;
+}
+
+static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
+{
+ /* Are we iterating over keys in all snapshots? */
+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
+ p = bpos_predecessor(p);
+ } else {
+ p = bpos_nosnap_predecessor(p);
+ p.snapshot = iter->snapshot;
+ }
+
+ return p;
+}
+
+static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
+{
+ struct bpos pos = iter->pos;
+
+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ !bkey_eq(pos, POS_MAX))
+ pos = bkey_successor(iter, pos);
+ return pos;
+}
+
+static inline bool btree_path_pos_before_node(struct btree_path *path,
+ struct btree *b)
+{
+ return bpos_lt(path->pos, b->data->min_key);
+}
+
+static inline bool btree_path_pos_after_node(struct btree_path *path,
+ struct btree *b)
+{
+ return bpos_gt(path->pos, b->key.k.p);
+}
+
+static inline bool btree_path_pos_in_node(struct btree_path *path,
+ struct btree *b)
+{
+ return path->btree_id == b->c.btree_id &&
+ !btree_path_pos_before_node(path, b) &&
+ !btree_path_pos_after_node(path, b);
+}
+
+/* Btree iterator: */
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+static void bch2_btree_path_verify_cached(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct bkey_cached *ck;
+ bool locked = btree_node_locked(path, 0);
+
+ if (!bch2_btree_node_relock(trans, path, 0))
+ return;
+
+ ck = (void *) path->l[0].b;
+ BUG_ON(ck->key.btree_id != path->btree_id ||
+ !bkey_eq(ck->key.pos, path->pos));
+
+ if (!locked)
+ btree_node_unlock(trans, path, 0);
+}
+
+static void bch2_btree_path_verify_level(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+ struct btree_path_level *l;
+ struct btree_node_iter tmp;
+ bool locked;
+ struct bkey_packed *p, *k;
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+ struct printbuf buf3 = PRINTBUF;
+ const char *msg;
+
+ if (!bch2_debug_check_iterators)
+ return;
+
+ l = &path->l[level];
+ tmp = l->iter;
+ locked = btree_node_locked(path, level);
+
+ if (path->cached) {
+ if (!level)
+ bch2_btree_path_verify_cached(trans, path);
+ return;
+ }
+
+ if (!btree_path_node(path, level))
+ return;
+
+ if (!bch2_btree_node_relock_notrace(trans, path, level))
+ return;
+
+ BUG_ON(!btree_path_pos_in_node(path, l->b));
+
+ bch2_btree_node_iter_verify(&l->iter, l->b);
+
+ /*
+ * For interior nodes, the iterator will have skipped past deleted keys:
+ */
+ p = level
+ ? bch2_btree_node_iter_prev(&tmp, l->b)
+ : bch2_btree_node_iter_prev_all(&tmp, l->b);
+ k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+ if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
+ msg = "before";
+ goto err;
+ }
+
+ if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
+ msg = "after";
+ goto err;
+ }
+
+ if (!locked)
+ btree_node_unlock(trans, path, level);
+ return;
+err:
+ bch2_bpos_to_text(&buf1, path->pos);
+
+ if (p) {
+ struct bkey uk = bkey_unpack_key(l->b, p);
+
+ bch2_bkey_to_text(&buf2, &uk);
+ } else {
+ prt_printf(&buf2, "(none)");
+ }
+
+ if (k) {
+ struct bkey uk = bkey_unpack_key(l->b, k);
+
+ bch2_bkey_to_text(&buf3, &uk);
+ } else {
+ prt_printf(&buf3, "(none)");
+ }
+
+ panic("path should be %s key at level %u:\n"
+ "path pos %s\n"
+ "prev key %s\n"
+ "cur key %s\n",
+ msg, level, buf1.buf, buf2.buf, buf3.buf);
+}
+
+static void bch2_btree_path_verify(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ unsigned i;
+
+ EBUG_ON(path->btree_id >= BTREE_ID_NR);
+
+ for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
+ if (!path->l[i].b) {
+ BUG_ON(!path->cached &&
+ bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
+ break;
+ }
+
+ bch2_btree_path_verify_level(trans, path, i);
+ }
+
+ bch2_btree_path_verify_locks(path);
+}
+
+void bch2_trans_verify_paths(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned iter;
+
+ trans_for_each_path(trans, path, iter)
+ bch2_btree_path_verify(trans, path);
+}
+
+static void bch2_btree_iter_verify(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+
+ BUG_ON(iter->btree_id >= BTREE_ID_NR);
+
+ BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != btree_iter_path(trans, iter)->cached);
+
+ BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
+
+ BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
+ !btree_type_has_snapshot_field(iter->btree_id));
+
+ if (iter->update_path)
+ bch2_btree_path_verify(trans, &trans->paths[iter->update_path]);
+ bch2_btree_path_verify(trans, btree_iter_path(trans, iter));
+}
+
+static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
+{
+ BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
+ !iter->pos.snapshot);
+
+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
+ iter->pos.snapshot != iter->snapshot);
+
+ BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) ||
+ bkey_gt(iter->pos, iter->k.p));
+}
+
+static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
+{
+ struct btree_trans *trans = iter->trans;
+ struct btree_iter copy;
+ struct bkey_s_c prev;
+ int ret = 0;
+
+ if (!bch2_debug_check_iterators)
+ return 0;
+
+ if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
+ return 0;
+
+ if (bkey_err(k) || !k.k)
+ return 0;
+
+ BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
+ iter->snapshot,
+ k.k->p.snapshot));
+
+ bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
+ BTREE_ITER_NOPRESERVE|
+ BTREE_ITER_ALL_SNAPSHOTS);
+ prev = bch2_btree_iter_prev(&copy);
+ if (!prev.k)
+ goto out;
+
+ ret = bkey_err(prev);
+ if (ret)
+ goto out;
+
+ if (bkey_eq(prev.k->p, k.k->p) &&
+ bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
+ prev.k->p.snapshot) > 0) {
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+
+ bch2_bkey_to_text(&buf1, k.k);
+ bch2_bkey_to_text(&buf2, prev.k);
+
+ panic("iter snap %u\n"
+ "k %s\n"
+ "prev %s\n",
+ iter->snapshot,
+ buf1.buf, buf2.buf);
+ }
+out:
+ bch2_trans_iter_exit(trans, &copy);
+ return ret;
+}
+
+void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
+ struct bpos pos, bool key_cache)
+{
+ struct btree_path *path;
+ struct trans_for_each_path_inorder_iter iter;
+ struct printbuf buf = PRINTBUF;
+
+ btree_trans_sort_paths(trans);
+
+ trans_for_each_path_inorder(trans, path, iter) {
+ int cmp = cmp_int(path->btree_id, id) ?:
+ cmp_int(path->cached, key_cache);
+
+ if (cmp > 0)
+ break;
+ if (cmp < 0)
+ continue;
+
+ if (!btree_node_locked(path, 0) ||
+ !path->should_be_locked)
+ continue;
+
+ if (!key_cache) {
+ if (bkey_ge(pos, path->l[0].b->data->min_key) &&
+ bkey_le(pos, path->l[0].b->key.k.p))
+ return;
+ } else {
+ if (bkey_eq(pos, path->pos))
+ return;
+ }
+ }
+
+ bch2_dump_trans_paths_updates(trans);
+ bch2_bpos_to_text(&buf, pos);
+
+ panic("not locked: %s %s%s\n",
+ bch2_btree_id_str(id), buf.buf,
+ key_cache ? " cached" : "");
+}
+
+#else
+
+static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
+ struct btree_path *path, unsigned l) {}
+static inline void bch2_btree_path_verify(struct btree_trans *trans,
+ struct btree_path *path) {}
+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
+static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
+static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
+
+#endif
+
+/* Btree path: fixups after btree updates */
+
+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
+ struct btree *b,
+ struct bset_tree *t,
+ struct bkey_packed *k)
+{
+ struct btree_node_iter_set *set;
+
+ btree_node_iter_for_each(iter, set)
+ if (set->end == t->end_offset) {
+ set->k = __btree_node_key_to_offset(b, k);
+ bch2_btree_node_iter_sort(iter, b);
+ return;
+ }
+
+ bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
+}
+
+static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
+ struct btree *b,
+ struct bkey_packed *where)
+{
+ struct btree_path_level *l = &path->l[b->c.level];
+
+ if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
+ return;
+
+ if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
+ bch2_btree_node_iter_advance(&l->iter, l->b);
+}
+
+void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
+ struct btree *b,
+ struct bkey_packed *where)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path_with_node(trans, b, path, i) {
+ __bch2_btree_path_fix_key_modified(path, b, where);
+ bch2_btree_path_verify_level(trans, path, b->c.level);
+ }
+}
+
+static void __bch2_btree_node_iter_fix(struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bset_tree *t,
+ struct bkey_packed *where,
+ unsigned clobber_u64s,
+ unsigned new_u64s)
+{
+ const struct bkey_packed *end = btree_bkey_last(b, t);
+ struct btree_node_iter_set *set;
+ unsigned offset = __btree_node_key_to_offset(b, where);
+ int shift = new_u64s - clobber_u64s;
+ unsigned old_end = t->end_offset - shift;
+ unsigned orig_iter_pos = node_iter->data[0].k;
+ bool iter_current_key_modified =
+ orig_iter_pos >= offset &&
+ orig_iter_pos <= offset + clobber_u64s;
+
+ btree_node_iter_for_each(node_iter, set)
+ if (set->end == old_end)
+ goto found;
+
+ /* didn't find the bset in the iterator - might have to readd it: */
+ if (new_u64s &&
+ bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
+ bch2_btree_node_iter_push(node_iter, b, where, end);
+ goto fixup_done;
+ } else {
+ /* Iterator is after key that changed */
+ return;
+ }
+found:
+ set->end = t->end_offset;
+
+ /* Iterator hasn't gotten to the key that changed yet: */
+ if (set->k < offset)
+ return;
+
+ if (new_u64s &&
+ bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
+ set->k = offset;
+ } else if (set->k < offset + clobber_u64s) {
+ set->k = offset + new_u64s;
+ if (set->k == set->end)
+ bch2_btree_node_iter_set_drop(node_iter, set);
+ } else {
+ /* Iterator is after key that changed */
+ set->k = (int) set->k + shift;
+ return;
+ }
+
+ bch2_btree_node_iter_sort(node_iter, b);
+fixup_done:
+ if (node_iter->data[0].k != orig_iter_pos)
+ iter_current_key_modified = true;
+
+ /*
+ * When a new key is added, and the node iterator now points to that
+ * key, the iterator might have skipped past deleted keys that should
+ * come after the key the iterator now points to. We have to rewind to
+ * before those deleted keys - otherwise
+ * bch2_btree_node_iter_prev_all() breaks:
+ */
+ if (!bch2_btree_node_iter_end(node_iter) &&
+ iter_current_key_modified &&
+ b->c.level) {
+ struct bkey_packed *k, *k2, *p;
+
+ k = bch2_btree_node_iter_peek_all(node_iter, b);
+
+ for_each_bset(b, t) {
+ bool set_pos = false;
+
+ if (node_iter->data[0].end == t->end_offset)
+ continue;
+
+ k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
+
+ while ((p = bch2_bkey_prev_all(b, t, k2)) &&
+ bkey_iter_cmp(b, k, p) < 0) {
+ k2 = p;
+ set_pos = true;
+ }
+
+ if (set_pos)
+ btree_node_iter_set_set_pos(node_iter,
+ b, t, k2);
+ }
+ }
+}
+
+void bch2_btree_node_iter_fix(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_packed *where,
+ unsigned clobber_u64s,
+ unsigned new_u64s)
+{
+ struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
+ struct btree_path *linked;
+ unsigned i;
+
+ if (node_iter != &path->l[b->c.level].iter) {
+ __bch2_btree_node_iter_fix(path, b, node_iter, t,
+ where, clobber_u64s, new_u64s);
+
+ if (bch2_debug_check_iterators)
+ bch2_btree_node_iter_verify(node_iter, b);
+ }
+
+ trans_for_each_path_with_node(trans, b, linked, i) {
+ __bch2_btree_node_iter_fix(linked, b,
+ &linked->l[b->c.level].iter, t,
+ where, clobber_u64s, new_u64s);
+ bch2_btree_path_verify_level(trans, linked, b->c.level);
+ }
+}
+
+/* Btree path level: pointer to a particular btree node and node iter */
+
+static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
+ struct btree_path_level *l,
+ struct bkey *u,
+ struct bkey_packed *k)
+{
+ if (unlikely(!k)) {
+ /*
+ * signal to bch2_btree_iter_peek_slot() that we're currently at
+ * a hole
+ */
+ u->type = KEY_TYPE_deleted;
+ return bkey_s_c_null;
+ }
+
+ return bkey_disassemble(l->b, k, u);
+}
+
+static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
+ struct btree_path_level *l,
+ struct bkey *u)
+{
+ return __btree_iter_unpack(c, l, u,
+ bch2_btree_node_iter_peek_all(&l->iter, l->b));
+}
+
+static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_path_level *l,
+ struct bkey *u)
+{
+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
+ bch2_btree_node_iter_peek(&l->iter, l->b));
+
+ path->pos = k.k ? k.k->p : l->b->key.k.p;
+ trans->paths_sorted = false;
+ bch2_btree_path_verify_level(trans, path, l - path->l);
+ return k;
+}
+
+static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_path_level *l,
+ struct bkey *u)
+{
+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
+ bch2_btree_node_iter_prev(&l->iter, l->b));
+
+ path->pos = k.k ? k.k->p : l->b->data->min_key;
+ trans->paths_sorted = false;
+ bch2_btree_path_verify_level(trans, path, l - path->l);
+ return k;
+}
+
+static inline bool btree_path_advance_to_pos(struct btree_path *path,
+ struct btree_path_level *l,
+ int max_advance)
+{
+ struct bkey_packed *k;
+ int nr_advanced = 0;
+
+ while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
+ bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
+ if (max_advance > 0 && nr_advanced >= max_advance)
+ return false;
+
+ bch2_btree_node_iter_advance(&l->iter, l->b);
+ nr_advanced++;
+ }
+
+ return true;
+}
+
+static inline void __btree_path_level_init(struct btree_path *path,
+ unsigned level)
+{
+ struct btree_path_level *l = &path->l[level];
+
+ bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
+
+ /*
+ * Iterators to interior nodes should always be pointed at the first non
+ * whiteout:
+ */
+ if (level)
+ bch2_btree_node_iter_peek(&l->iter, l->b);
+}
+
+void bch2_btree_path_level_init(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b)
+{
+ BUG_ON(path->cached);
+
+ EBUG_ON(!btree_path_pos_in_node(path, b));
+
+ path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
+ path->l[b->c.level].b = b;
+ __btree_path_level_init(path, b->c.level);
+}
+
+/* Btree path: fixups after btree node updates: */
+
+static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+
+ trans_for_each_update(trans, i)
+ if (!i->cached &&
+ i->level == b->c.level &&
+ i->btree_id == b->c.btree_id &&
+ bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
+ bpos_cmp(i->k->k.p, b->data->max_key) <= 0) {
+ i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v;
+
+ if (unlikely(trans->journal_replay_not_finished)) {
+ struct bkey_i *j_k =
+ bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
+ i->k->k.p);
+
+ if (j_k) {
+ i->old_k = j_k->k;
+ i->old_v = &j_k->v;
+ }
+ }
+ }
+}
+
+/*
+ * A btree node is being replaced - update the iterator to point to the new
+ * node:
+ */
+void bch2_trans_node_add(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b)
+{
+ struct btree_path *prev;
+
+ BUG_ON(!btree_path_pos_in_node(path, b));
+
+ while ((prev = prev_btree_path(trans, path)) &&
+ btree_path_pos_in_node(prev, b))
+ path = prev;
+
+ for (;
+ path && btree_path_pos_in_node(path, b);
+ path = next_btree_path(trans, path))
+ if (path->uptodate == BTREE_ITER_UPTODATE && !path->cached) {
+ enum btree_node_locked_type t =
+ btree_lock_want(path, b->c.level);
+
+ if (t != BTREE_NODE_UNLOCKED) {
+ btree_node_unlock(trans, path, b->c.level);
+ six_lock_increment(&b->c.lock, (enum six_lock_type) t);
+ mark_btree_node_locked(trans, path, b->c.level, t);
+ }
+
+ bch2_btree_path_level_init(trans, path, b);
+ }
+
+ bch2_trans_revalidate_updates_in_node(trans, b);
+}
+
+/*
+ * A btree node has been modified in such a way as to invalidate iterators - fix
+ * them:
+ */
+void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path_with_node(trans, b, path, i)
+ __btree_path_level_init(path, b->c.level);
+
+ bch2_trans_revalidate_updates_in_node(trans, b);
+}
+
+/* Btree path: traverse, set_pos: */
+
+static inline int btree_path_lock_root(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned depth_want,
+ unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b;
+ enum six_lock_type lock_type;
+ unsigned i;
+ int ret;
+
+ EBUG_ON(path->nodes_locked);
+
+ while (1) {
+ b = READ_ONCE(*rootp);
+ path->level = READ_ONCE(b->c.level);
+
+ if (unlikely(path->level < depth_want)) {
+ /*
+ * the root is at a lower depth than the depth we want:
+ * got to the end of the btree, or we're walking nodes
+ * greater than some depth and there are no nodes >=
+ * that depth
+ */
+ path->level = depth_want;
+ for (i = path->level; i < BTREE_MAX_DEPTH; i++)
+ path->l[i].b = NULL;
+ return 1;
+ }
+
+ lock_type = __btree_lock_want(path, path->level);
+ ret = btree_node_lock(trans, path, &b->c,
+ path->level, lock_type, trace_ip);
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
+ continue;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+ BUG();
+ }
+
+ if (likely(b == READ_ONCE(*rootp) &&
+ b->c.level == path->level &&
+ !race_fault())) {
+ for (i = 0; i < path->level; i++)
+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
+ path->l[path->level].b = b;
+ for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
+ path->l[i].b = NULL;
+
+ mark_btree_node_locked(trans, path, path->level,
+ (enum btree_node_locked_type) lock_type);
+ bch2_btree_path_level_init(trans, path, b);
+ return 0;
+ }
+
+ six_unlock_type(&b->c.lock, lock_type);
+ }
+}
+
+noinline
+static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path_level *l = path_l(path);
+ struct btree_node_iter node_iter = l->iter;
+ struct bkey_packed *k;
+ struct bkey_buf tmp;
+ unsigned nr = test_bit(BCH_FS_started, &c->flags)
+ ? (path->level > 1 ? 0 : 2)
+ : (path->level > 1 ? 1 : 16);
+ bool was_locked = btree_node_locked(path, path->level);
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ while (nr-- && !ret) {
+ if (!bch2_btree_node_relock(trans, path, path->level))
+ break;
+
+ bch2_btree_node_iter_advance(&node_iter, l->b);
+ k = bch2_btree_node_iter_peek(&node_iter, l->b);
+ if (!k)
+ break;
+
+ bch2_bkey_buf_unpack(&tmp, c, l->b, k);
+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
+ path->level - 1);
+ }
+
+ if (!was_locked)
+ btree_node_unlock(trans, path, path->level);
+
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
+static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
+ struct btree_and_journal_iter *jiter)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+ struct bkey_buf tmp;
+ unsigned nr = test_bit(BCH_FS_started, &c->flags)
+ ? (path->level > 1 ? 0 : 2)
+ : (path->level > 1 ? 1 : 16);
+ bool was_locked = btree_node_locked(path, path->level);
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ while (nr-- && !ret) {
+ if (!bch2_btree_node_relock(trans, path, path->level))
+ break;
+
+ bch2_btree_and_journal_iter_advance(jiter);
+ k = bch2_btree_and_journal_iter_peek(jiter);
+ if (!k.k)
+ break;
+
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
+ path->level - 1);
+ }
+
+ if (!was_locked)
+ btree_node_unlock(trans, path, path->level);
+
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
+static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned plevel, struct btree *b)
+{
+ struct btree_path_level *l = &path->l[plevel];
+ bool locked = btree_node_locked(path, plevel);
+ struct bkey_packed *k;
+ struct bch_btree_ptr_v2 *bp;
+
+ if (!bch2_btree_node_relock(trans, path, plevel))
+ return;
+
+ k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+ BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
+
+ bp = (void *) bkeyp_val(&l->b->format, k);
+ bp->mem_ptr = (unsigned long)b;
+
+ if (!locked)
+ btree_node_unlock(trans, path, plevel);
+}
+
+static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned flags,
+ struct bkey_buf *out)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path_level *l = path_l(path);
+ struct btree_and_journal_iter jiter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
+
+ k = bch2_btree_and_journal_iter_peek(&jiter);
+
+ bch2_bkey_buf_reassemble(out, c, k);
+
+ if ((flags & BTREE_ITER_PREFETCH) &&
+ c->opts.btree_node_prefetch)
+ ret = btree_path_prefetch_j(trans, path, &jiter);
+
+ bch2_btree_and_journal_iter_exit(&jiter);
+ return ret;
+}
+
+static __always_inline int btree_path_down(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned flags,
+ unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path_level *l = path_l(path);
+ struct btree *b;
+ unsigned level = path->level - 1;
+ enum six_lock_type lock_type = __btree_lock_want(path, level);
+ struct bkey_buf tmp;
+ int ret;
+
+ EBUG_ON(!btree_node_locked(path, path->level));
+
+ bch2_bkey_buf_init(&tmp);
+
+ if (unlikely(trans->journal_replay_not_finished)) {
+ ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
+ if (ret)
+ goto err;
+ } else {
+ bch2_bkey_buf_unpack(&tmp, c, l->b,
+ bch2_btree_node_iter_peek(&l->iter, l->b));
+
+ if ((flags & BTREE_ITER_PREFETCH) &&
+ c->opts.btree_node_prefetch) {
+ ret = btree_path_prefetch(trans, path);
+ if (ret)
+ goto err;
+ }
+ }
+
+ b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (unlikely(ret))
+ goto err;
+
+ if (likely(!trans->journal_replay_not_finished &&
+ tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
+ unlikely(b != btree_node_mem_ptr(tmp.k)))
+ btree_node_mem_ptr_set(trans, path, level + 1, b);
+
+ if (btree_node_read_locked(path, level + 1))
+ btree_node_unlock(trans, path, level + 1);
+
+ mark_btree_node_locked(trans, path, level,
+ (enum btree_node_locked_type) lock_type);
+ path->level = level;
+ bch2_btree_path_level_init(trans, path, b);
+
+ bch2_btree_path_verify_locks(path);
+err:
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
+
+static int bch2_btree_path_traverse_all(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+ unsigned long trace_ip = _RET_IP_;
+ unsigned i;
+ int ret = 0;
+
+ if (trans->in_traverse_all)
+ return -BCH_ERR_transaction_restart_in_traverse_all;
+
+ trans->in_traverse_all = true;
+retry_all:
+ trans->restarted = 0;
+ trans->last_restarted_ip = 0;
+
+ trans_for_each_path(trans, path, i)
+ path->should_be_locked = false;
+
+ btree_trans_sort_paths(trans);
+
+ bch2_trans_unlock(trans);
+ cond_resched();
+
+ if (unlikely(trans->memory_allocation_failure)) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+
+ do {
+ ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
+ closure_sync(&cl);
+ } while (ret);
+ }
+
+ /* Now, redo traversals in correct order: */
+ i = 0;
+ while (i < trans->nr_sorted) {
+ btree_path_idx_t idx = trans->sorted[i];
+
+ /*
+ * Traversing a path can cause another path to be added at about
+ * the same position:
+ */
+ if (trans->paths[idx].uptodate) {
+ __btree_path_get(&trans->paths[idx], false);
+ ret = bch2_btree_path_traverse_one(trans, idx, 0, _THIS_IP_);
+ __btree_path_put(&trans->paths[idx], false);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, ENOMEM))
+ goto retry_all;
+ if (ret)
+ goto err;
+ } else {
+ i++;
+ }
+ }
+
+ /*
+ * We used to assert that all paths had been traversed here
+ * (path->uptodate < BTREE_ITER_NEED_TRAVERSE); however, since
+ * path->should_be_locked is not set yet, we might have unlocked and
+ * then failed to relock a path - that's fine.
+ */
+err:
+ bch2_btree_cache_cannibalize_unlock(trans);
+
+ trans->in_traverse_all = false;
+
+ trace_and_count(c, trans_traverse_all, trans, trace_ip);
+ return ret;
+}
+
+static inline bool btree_path_check_pos_in_node(struct btree_path *path,
+ unsigned l, int check_pos)
+{
+ if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
+ return false;
+ if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
+ return false;
+ return true;
+}
+
+static inline bool btree_path_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned l, int check_pos)
+{
+ return is_btree_node(path, l) &&
+ bch2_btree_node_relock(trans, path, l) &&
+ btree_path_check_pos_in_node(path, l, check_pos);
+}
+
+static void btree_path_set_level_down(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_level)
+{
+ unsigned l;
+
+ path->level = new_level;
+
+ for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
+ if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(trans, path, l);
+
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ bch2_btree_path_verify(trans, path);
+}
+
+static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ int check_pos)
+{
+ unsigned i, l = path->level;
+again:
+ while (btree_path_node(path, l) &&
+ !btree_path_good_node(trans, path, l, check_pos))
+ __btree_path_set_level_up(trans, path, l++);
+
+ /* If we need intent locks, take them too: */
+ for (i = l + 1;
+ i < path->locks_want && btree_path_node(path, i);
+ i++)
+ if (!bch2_btree_node_relock(trans, path, i)) {
+ while (l <= i)
+ __btree_path_set_level_up(trans, path, l++);
+ goto again;
+ }
+
+ return l;
+}
+
+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
+ struct btree_path *path,
+ int check_pos)
+{
+ return likely(btree_node_locked(path, path->level) &&
+ btree_path_check_pos_in_node(path, path->level, check_pos))
+ ? path->level
+ : __btree_path_up_until_good_node(trans, path, check_pos);
+}
+
+/*
+ * This is the main state machine for walking down the btree - walks down to a
+ * specified depth
+ *
+ * Returns 0 on success, -EIO on error (error reading in a btree node).
+ *
+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is
+ * stashed in the iterator and returned from bch2_trans_exit().
+ */
+int bch2_btree_path_traverse_one(struct btree_trans *trans,
+ btree_path_idx_t path_idx,
+ unsigned flags,
+ unsigned long trace_ip)
+{
+ struct btree_path *path = &trans->paths[path_idx];
+ unsigned depth_want = path->level;
+ int ret = -((int) trans->restarted);
+
+ if (unlikely(ret))
+ goto out;
+
+ if (unlikely(!trans->srcu_held))
+ bch2_trans_srcu_lock(trans);
+
+ /*
+ * Ensure we obey path->should_be_locked: if it's set, we can't unlock
+ * and re-traverse the path without a transaction restart:
+ */
+ if (path->should_be_locked) {
+ ret = bch2_btree_path_relock(trans, path, trace_ip);
+ goto out;
+ }
+
+ if (path->cached) {
+ ret = bch2_btree_path_traverse_cached(trans, path, flags);
+ goto out;
+ }
+
+ path = &trans->paths[path_idx];
+
+ if (unlikely(path->level >= BTREE_MAX_DEPTH))
+ goto out;
+
+ path->level = btree_path_up_until_good_node(trans, path, 0);
+
+ EBUG_ON(btree_path_node(path, path->level) &&
+ !btree_node_locked(path, path->level));
+
+ /*
+ * Note: path->nodes[path->level] may be temporarily NULL here - that
+ * would indicate to other code that we got to the end of the btree,
+ * here it indicates that relocking the root failed - it's critical that
+ * btree_path_lock_root() comes next and that it can't fail
+ */
+ while (path->level > depth_want) {
+ ret = btree_path_node(path, path->level)
+ ? btree_path_down(trans, path, flags, trace_ip)
+ : btree_path_lock_root(trans, path, depth_want, trace_ip);
+ if (unlikely(ret)) {
+ if (ret == 1) {
+ /*
+ * No nodes at this level - got to the end of
+ * the btree:
+ */
+ ret = 0;
+ goto out;
+ }
+
+ __bch2_btree_path_unlock(trans, path);
+ path->level = depth_want;
+ path->l[path->level].b = ERR_PTR(ret);
+ goto out;
+ }
+ }
+
+ path->uptodate = BTREE_ITER_UPTODATE;
+out:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
+ panic("ret %s (%i) trans->restarted %s (%i)\n",
+ bch2_err_str(ret), ret,
+ bch2_err_str(trans->restarted), trans->restarted);
+ bch2_btree_path_verify(trans, path);
+ return ret;
+}
+
+static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
+ struct btree_path *src)
+{
+ unsigned i, offset = offsetof(struct btree_path, pos);
+
+ memcpy((void *) dst + offset,
+ (void *) src + offset,
+ sizeof(struct btree_path) - offset);
+
+ for (i = 0; i < BTREE_MAX_DEPTH; i++) {
+ unsigned t = btree_node_locked_type(dst, i);
+
+ if (t != BTREE_NODE_UNLOCKED)
+ six_lock_increment(&dst->l[i].b->c.lock, t);
+ }
+}
+
+static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_idx_t src,
+ bool intent)
+{
+ btree_path_idx_t new = btree_path_alloc(trans, src);
+ btree_path_copy(trans, trans->paths + new, trans->paths + src);
+ __btree_path_get(trans->paths + new, intent);
+ return new;
+}
+
+__flatten
+btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *trans,
+ btree_path_idx_t path, bool intent, unsigned long ip)
+{
+ __btree_path_put(trans->paths + path, intent);
+ path = btree_path_clone(trans, path, intent);
+ trans->paths[path].preserve = false;
+ return path;
+}
+
+btree_path_idx_t __must_check
+__bch2_btree_path_set_pos(struct btree_trans *trans,
+ btree_path_idx_t path_idx, struct bpos new_pos,
+ bool intent, unsigned long ip)
+{
+ int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos);
+
+ bch2_trans_verify_not_in_restart(trans);
+ EBUG_ON(!trans->paths[path_idx].ref);
+
+ path_idx = bch2_btree_path_make_mut(trans, path_idx, intent, ip);
+
+ struct btree_path *path = trans->paths + path_idx;
+ path->pos = new_pos;
+ trans->paths_sorted = false;
+
+ if (unlikely(path->cached)) {
+ btree_node_unlock(trans, path, 0);
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ goto out;
+ }
+
+ unsigned level = btree_path_up_until_good_node(trans, path, cmp);
+
+ if (btree_path_node(path, level)) {
+ struct btree_path_level *l = &path->l[level];
+
+ BUG_ON(!btree_node_locked(path, level));
+ /*
+ * We might have to skip over many keys, or just a few: try
+ * advancing the node iterator, and if we have to skip over too
+ * many keys just reinit it (or if we're rewinding, since that
+ * is expensive).
+ */
+ if (cmp < 0 ||
+ !btree_path_advance_to_pos(path, l, 8))
+ bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
+
+ /*
+ * Iterators to interior nodes should always be pointed at the first non
+ * whiteout:
+ */
+ if (unlikely(level))
+ bch2_btree_node_iter_peek(&l->iter, l->b);
+ }
+
+ if (unlikely(level != path->level)) {
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ __bch2_btree_path_unlock(trans, path);
+ }
+out:
+ bch2_btree_path_verify(trans, path);
+ return path_idx;
+}
+
+/* Btree path: main interface: */
+
+static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+ struct btree_path *sib;
+
+ sib = prev_btree_path(trans, path);
+ if (sib && !btree_path_cmp(sib, path))
+ return sib;
+
+ sib = next_btree_path(trans, path);
+ if (sib && !btree_path_cmp(sib, path))
+ return sib;
+
+ return NULL;
+}
+
+static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
+{
+ struct btree_path *sib;
+
+ sib = prev_btree_path(trans, path);
+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
+ return sib;
+
+ sib = next_btree_path(trans, path);
+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
+ return sib;
+
+ return NULL;
+}
+
+static inline void __bch2_path_free(struct btree_trans *trans, btree_path_idx_t path)
+{
+ __bch2_btree_path_unlock(trans, trans->paths + path);
+ btree_path_list_remove(trans, trans->paths + path);
+ __clear_bit(path, trans->paths_allocated);
+}
+
+void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool intent)
+{
+ struct btree_path *path = trans->paths + path_idx, *dup;
+
+ if (!__btree_path_put(path, intent))
+ return;
+
+ dup = path->preserve
+ ? have_path_at_pos(trans, path)
+ : have_node_at_pos(trans, path);
+
+ if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
+ return;
+
+ if (path->should_be_locked &&
+ !trans->restarted &&
+ (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
+ return;
+
+ if (dup) {
+ dup->preserve |= path->preserve;
+ dup->should_be_locked |= path->should_be_locked;
+ }
+
+ __bch2_path_free(trans, path_idx);
+}
+
+static void bch2_path_put_nokeep(struct btree_trans *trans, btree_path_idx_t path,
+ bool intent)
+{
+ if (!__btree_path_put(trans->paths + path, intent))
+ return;
+
+ __bch2_path_free(trans, path);
+}
+
+void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
+{
+ panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
+ trans->restart_count, restart_count,
+ (void *) trans->last_begin_ip);
+}
+
+void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
+{
+ panic("in transaction restart: %s, last restarted by %pS\n",
+ bch2_err_str(trans->restarted),
+ (void *) trans->last_restarted_ip);
+}
+
+noinline __cold
+void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
+{
+ prt_printf(buf, "transaction updates for %s journal seq %llu",
+ trans->fn, trans->journal_res.seq);
+ prt_newline(buf);
+ printbuf_indent_add(buf, 2);
+
+ trans_for_each_update(trans, i) {
+ struct bkey_s_c old = { &i->old_k, i->old_v };
+
+ prt_printf(buf, "update: btree=%s cached=%u %pS",
+ bch2_btree_id_str(i->btree_id),
+ i->cached,
+ (void *) i->ip_allocated);
+ prt_newline(buf);
+
+ prt_printf(buf, " old ");
+ bch2_bkey_val_to_text(buf, trans->c, old);
+ prt_newline(buf);
+
+ prt_printf(buf, " new ");
+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
+ prt_newline(buf);
+ }
+
+ for (struct jset_entry *e = trans->journal_entries;
+ e != btree_trans_journal_entries_top(trans);
+ e = vstruct_next(e))
+ bch2_journal_entry_to_text(buf, trans->c, e);
+
+ printbuf_indent_sub(buf, 2);
+}
+
+noinline __cold
+void bch2_dump_trans_updates(struct btree_trans *trans)
+{
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_updates_to_text(&buf, trans);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+}
+
+static void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
+{
+ struct btree_path *path = trans->paths + path_idx;
+
+ prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
+ path_idx, path->ref, path->intent_ref,
+ path->preserve ? 'P' : ' ',
+ path->should_be_locked ? 'S' : ' ',
+ bch2_btree_id_str(path->btree_id),
+ path->level);
+ bch2_bpos_to_text(out, path->pos);
+
+ prt_printf(out, " locks %u", path->nodes_locked);
+#ifdef TRACK_PATH_ALLOCATED
+ prt_printf(out, " %pS", (void *) path->ip_allocated);
+#endif
+ prt_newline(out);
+}
+
+static noinline __cold
+void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans,
+ bool nosort)
+{
+ struct trans_for_each_path_inorder_iter iter;
+
+ if (!nosort)
+ btree_trans_sort_paths(trans);
+
+ trans_for_each_path_idx_inorder(trans, iter)
+ bch2_btree_path_to_text(out, trans, iter.path_idx);
+}
+
+noinline __cold
+void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
+{
+ __bch2_trans_paths_to_text(out, trans, false);
+}
+
+static noinline __cold
+void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
+{
+ struct printbuf buf = PRINTBUF;
+
+ __bch2_trans_paths_to_text(&buf, trans, nosort);
+ bch2_trans_updates_to_text(&buf, trans);
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+}
+
+noinline __cold
+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+{
+ __bch2_dump_trans_paths_updates(trans, false);
+}
+
+noinline __cold
+static void bch2_trans_update_max_paths(struct btree_trans *trans)
+{
+ struct btree_transaction_stats *s = btree_trans_stats(trans);
+ struct printbuf buf = PRINTBUF;
+ size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths);
+
+ bch2_trans_paths_to_text(&buf, trans);
+
+ if (!buf.allocation_failure) {
+ mutex_lock(&s->lock);
+ if (nr > s->nr_max_paths) {
+ s->nr_max_paths = nr;
+ swap(s->max_paths_text, buf.buf);
+ }
+ mutex_unlock(&s->lock);
+ }
+
+ printbuf_exit(&buf);
+
+ trans->nr_paths_max = nr;
+}
+
+noinline __cold
+int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
+{
+ if (trace_trans_restart_too_many_iters_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_paths_to_text(&buf, trans);
+ trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ count_event(trans->c, trans_restart_too_many_iters);
+
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
+}
+
+static noinline void btree_path_overflow(struct btree_trans *trans)
+{
+ bch2_dump_trans_paths_updates(trans);
+ bch_err(trans->c, "trans path overflow");
+}
+
+static noinline void btree_paths_realloc(struct btree_trans *trans)
+{
+ unsigned nr = trans->nr_paths * 2;
+
+ void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
+ sizeof(struct btree_trans_paths) +
+ nr * sizeof(struct btree_path) +
+ nr * sizeof(btree_path_idx_t) + 8 +
+ nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL);
+
+ unsigned long *paths_allocated = p;
+ memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long));
+ p += BITS_TO_LONGS(nr) * sizeof(unsigned long);
+
+ p += sizeof(struct btree_trans_paths);
+ struct btree_path *paths = p;
+ *trans_paths_nr(paths) = nr;
+ memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path));
+ p += nr * sizeof(struct btree_path);
+
+ btree_path_idx_t *sorted = p;
+ memcpy(sorted, trans->sorted, trans->nr_sorted * sizeof(btree_path_idx_t));
+ p += nr * sizeof(btree_path_idx_t) + 8;
+
+ struct btree_insert_entry *updates = p;
+ memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_insert_entry));
+
+ unsigned long *old = trans->paths_allocated;
+
+ rcu_assign_pointer(trans->paths_allocated, paths_allocated);
+ rcu_assign_pointer(trans->paths, paths);
+ rcu_assign_pointer(trans->sorted, sorted);
+ rcu_assign_pointer(trans->updates, updates);
+
+ trans->nr_paths = nr;
+
+ if (old != trans->_paths_allocated)
+ kfree_rcu_mightsleep(old);
+}
+
+static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
+ btree_path_idx_t pos)
+{
+ btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths);
+
+ if (unlikely(idx == trans->nr_paths)) {
+ if (trans->nr_paths == BTREE_ITER_MAX) {
+ btree_path_overflow(trans);
+ return 0;
+ }
+
+ btree_paths_realloc(trans);
+ }
+
+ /*
+ * Do this before marking the new path as allocated, since it won't be
+ * initialized yet:
+ */
+ if (unlikely(idx > trans->nr_paths_max))
+ bch2_trans_update_max_paths(trans);
+
+ __set_bit(idx, trans->paths_allocated);
+
+ struct btree_path *path = &trans->paths[idx];
+ path->ref = 0;
+ path->intent_ref = 0;
+ path->nodes_locked = 0;
+
+ btree_path_list_add(trans, pos, idx);
+ trans->paths_sorted = false;
+ return idx;
+}
+
+btree_path_idx_t bch2_path_get(struct btree_trans *trans,
+ enum btree_id btree_id, struct bpos pos,
+ unsigned locks_want, unsigned level,
+ unsigned flags, unsigned long ip)
+{
+ struct btree_path *path;
+ bool cached = flags & BTREE_ITER_CACHED;
+ bool intent = flags & BTREE_ITER_INTENT;
+ struct trans_for_each_path_inorder_iter iter;
+ btree_path_idx_t path_pos = 0, path_idx;
+
+ bch2_trans_verify_not_in_restart(trans);
+ bch2_trans_verify_locks(trans);
+
+ btree_trans_sort_paths(trans);
+
+ trans_for_each_path_inorder(trans, path, iter) {
+ if (__btree_path_cmp(path,
+ btree_id,
+ cached,
+ pos,
+ level) > 0)
+ break;
+
+ path_pos = iter.path_idx;
+ }
+
+ if (path_pos &&
+ trans->paths[path_pos].cached == cached &&
+ trans->paths[path_pos].btree_id == btree_id &&
+ trans->paths[path_pos].level == level) {
+ __btree_path_get(trans->paths + path_pos, intent);
+ path_idx = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip);
+ path = trans->paths + path_idx;
+ } else {
+ path_idx = btree_path_alloc(trans, path_pos);
+ path = trans->paths + path_idx;
+
+ __btree_path_get(path, intent);
+ path->pos = pos;
+ path->btree_id = btree_id;
+ path->cached = cached;
+ path->uptodate = BTREE_ITER_NEED_TRAVERSE;
+ path->should_be_locked = false;
+ path->level = level;
+ path->locks_want = locks_want;
+ path->nodes_locked = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(path->l); i++)
+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
+#ifdef TRACK_PATH_ALLOCATED
+ path->ip_allocated = ip;
+#endif
+ trans->paths_sorted = false;
+ }
+
+ if (!(flags & BTREE_ITER_NOPRESERVE))
+ path->preserve = true;
+
+ if (path->intent_ref)
+ locks_want = max(locks_want, level + 1);
+
+ /*
+ * If the path has locks_want greater than requested, we don't downgrade
+ * it here - on transaction restart because btree node split needs to
+ * upgrade locks, we might be putting/getting the iterator again.
+ * Downgrading iterators only happens via bch2_trans_downgrade(), after
+ * a successful transaction commit.
+ */
+
+ locks_want = min(locks_want, BTREE_MAX_DEPTH);
+ if (locks_want > path->locks_want)
+ bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
+
+ return path_idx;
+}
+
+struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
+{
+
+ struct btree_path_level *l = path_l(path);
+ struct bkey_packed *_k;
+ struct bkey_s_c k;
+
+ if (unlikely(!l->b))
+ return bkey_s_c_null;
+
+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+ EBUG_ON(!btree_node_locked(path, path->level));
+
+ if (!path->cached) {
+ _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
+ k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
+
+ EBUG_ON(k.k && bkey_deleted(k.k) && bpos_eq(k.k->p, path->pos));
+
+ if (!k.k || !bpos_eq(path->pos, k.k->p))
+ goto hole;
+ } else {
+ struct bkey_cached *ck = (void *) path->l[0].b;
+
+ EBUG_ON(ck &&
+ (path->btree_id != ck->key.btree_id ||
+ !bkey_eq(path->pos, ck->key.pos)));
+ if (!ck || !ck->valid)
+ return bkey_s_c_null;
+
+ *u = ck->k->k;
+ k = bkey_i_to_s_c(ck->k);
+ }
+
+ return k;
+hole:
+ bkey_init(u);
+ u->p = path->pos;
+ return (struct bkey_s_c) { u, NULL };
+}
+
+/* Btree iterators: */
+
+int __must_check
+__bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+ return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
+}
+
+int __must_check
+bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ int ret;
+
+ iter->path = bch2_btree_path_set_pos(trans, iter->path,
+ btree_iter_search_key(iter),
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
+ if (ret)
+ return ret;
+
+ btree_path_set_should_be_locked(trans->paths + iter->path);
+ return 0;
+}
+
+/* Iterate across nodes (leaf and interior nodes) */
+
+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct btree *b = NULL;
+ int ret;
+
+ EBUG_ON(trans->paths[iter->path].cached);
+ bch2_btree_iter_verify(iter);
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (ret)
+ goto err;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ b = btree_path_node(path, path->level);
+ if (!b)
+ goto out;
+
+ BUG_ON(bpos_lt(b->key.k.p, iter->pos));
+
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos = b->key.k.p;
+
+ iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+ btree_path_set_should_be_locked(btree_iter_path(trans, iter));
+out:
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+
+ return b;
+err:
+ b = ERR_PTR(ret);
+ goto out;
+}
+
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
+{
+ struct btree *b;
+
+ while (b = bch2_btree_iter_peek_node(iter),
+ bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
+ bch2_trans_begin(iter->trans);
+
+ return b;
+}
+
+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct btree *b = NULL;
+ int ret;
+
+ EBUG_ON(trans->paths[iter->path].cached);
+ bch2_trans_verify_not_in_restart(trans);
+ bch2_btree_iter_verify(iter);
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+
+ /* already at end? */
+ if (!btree_path_node(path, path->level))
+ return NULL;
+
+ /* got to end? */
+ if (!btree_path_node(path, path->level + 1)) {
+ btree_path_set_level_up(trans, path);
+ return NULL;
+ }
+
+ if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
+ __bch2_btree_path_unlock(trans, path);
+ path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
+ path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
+ goto err;
+ }
+
+ b = btree_path_node(path, path->level + 1);
+
+ if (bpos_eq(iter->pos, b->key.k.p)) {
+ __btree_path_set_level_up(trans, path, path->level++);
+ } else {
+ /*
+ * Haven't gotten to the end of the parent node: go back down to
+ * the next child node
+ */
+ iter->path = bch2_btree_path_set_pos(trans, iter->path,
+ bpos_successor(iter->pos),
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ path = btree_iter_path(trans, iter);
+ btree_path_set_level_down(trans, path, iter->min_depth);
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (ret)
+ goto err;
+
+ path = btree_iter_path(trans, iter);
+ b = path->l[path->level].b;
+ }
+
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos = b->key.k.p;
+
+ iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+ btree_path_set_should_be_locked(btree_iter_path(trans, iter));
+ EBUG_ON(btree_iter_path(trans, iter)->uptodate);
+out:
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+
+ return b;
+err:
+ b = ERR_PTR(ret);
+ goto out;
+}
+
+/* Iterate across keys (in leaf nodes only) */
+
+inline bool bch2_btree_iter_advance(struct btree_iter *iter)
+{
+ struct bpos pos = iter->k.p;
+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
+ ? bpos_eq(pos, SPOS_MAX)
+ : bkey_eq(pos, SPOS_MAX));
+
+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ pos = bkey_successor(iter, pos);
+ bch2_btree_iter_set_pos(iter, pos);
+ return ret;
+}
+
+inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
+{
+ struct bpos pos = bkey_start_pos(&iter->k);
+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS
+ ? bpos_eq(pos, POS_MIN)
+ : bkey_eq(pos, POS_MIN));
+
+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ pos = bkey_predecessor(iter, pos);
+ bch2_btree_iter_set_pos(iter, pos);
+ return ret;
+}
+
+static noinline
+void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c *k)
+{
+ struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key;
+
+ trans_for_each_update(trans, i)
+ if (!i->key_cache_already_flushed &&
+ i->btree_id == iter->btree_id &&
+ bpos_le(i->k->k.p, iter->pos) &&
+ bpos_ge(i->k->k.p, k->k ? k->k->p : end)) {
+ iter->k = i->k->k;
+ *k = bkey_i_to_s_c(i->k);
+ }
+}
+
+static noinline
+void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c *k)
+{
+ struct btree_path *path = btree_iter_path(trans, iter);
+ struct bpos end = path_l(path)->b->key.k.p;
+
+ trans_for_each_update(trans, i)
+ if (!i->key_cache_already_flushed &&
+ i->btree_id == iter->btree_id &&
+ bpos_ge(i->k->k.p, path->pos) &&
+ bpos_le(i->k->k.p, k->k ? k->k->p : end)) {
+ iter->k = i->k->k;
+ *k = bkey_i_to_s_c(i->k);
+ }
+}
+
+static noinline
+void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c *k)
+{
+ trans_for_each_update(trans, i)
+ if (!i->key_cache_already_flushed &&
+ i->btree_id == iter->btree_id &&
+ bpos_eq(i->k->k.p, iter->pos)) {
+ iter->k = i->k->k;
+ *k = bkey_i_to_s_c(i->k);
+ }
+}
+
+static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos end_pos)
+{
+ struct btree_path *path = btree_iter_path(trans, iter);
+
+ return bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
+ path->level,
+ path->pos,
+ end_pos,
+ &iter->journal_idx);
+}
+
+static noinline
+struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
+ struct btree_iter *iter)
+{
+ struct btree_path *path = btree_iter_path(trans, iter);
+ struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos);
+
+ if (k) {
+ iter->k = k->k;
+ return bkey_i_to_s_c(k);
+ } else {
+ return bkey_s_c_null;
+ }
+}
+
+static noinline
+struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct btree_path *path = btree_iter_path(trans, iter);
+ struct bkey_i *next_journal =
+ bch2_btree_journal_peek(trans, iter,
+ k.k ? k.k->p : path_l(path)->b->key.k.p);
+
+ if (next_journal) {
+ iter->k = next_journal->k;
+ k = bkey_i_to_s_c(next_journal);
+ }
+
+ return k;
+}
+
+/*
+ * Checks btree key cache for key at iter->pos and returns it if present, or
+ * bkey_s_c_null:
+ */
+static noinline
+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bch_fs *c = trans->c;
+ struct bkey u;
+ struct bkey_s_c k;
+ int ret;
+
+ if ((iter->flags & BTREE_ITER_KEY_CACHE_FILL) &&
+ bpos_eq(iter->pos, pos))
+ return bkey_s_c_null;
+
+ if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
+ return bkey_s_c_null;
+
+ if (!iter->key_cache_path)
+ iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
+ iter->flags & BTREE_ITER_INTENT, 0,
+ iter->flags|BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL,
+ _THIS_IP_);
+
+ iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path,
+ iter->flags|BTREE_ITER_CACHED) ?:
+ bch2_btree_path_relock(trans, btree_iter_path(trans, iter), _THIS_IP_);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
+
+ btree_path_set_should_be_locked(trans->paths + iter->key_cache_path);
+
+ k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
+ if (k.k && !bkey_err(k)) {
+ iter->k = u;
+ k.k = &iter->k;
+ }
+ return k;
+}
+
+static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bkey_s_c k, k2;
+ int ret;
+
+ EBUG_ON(btree_iter_path(trans, iter)->cached);
+ bch2_btree_iter_verify(iter);
+
+ while (1) {
+ struct btree_path_level *l;
+
+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (unlikely(ret)) {
+ /* ensure that iter->k is consistent with iter->pos: */
+ bch2_btree_iter_set_pos(iter, iter->pos);
+ k = bkey_s_c_err(ret);
+ goto out;
+ }
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ l = path_l(path);
+
+ if (unlikely(!l->b)) {
+ /* No btree nodes at requested level: */
+ bch2_btree_iter_set_pos(iter, SPOS_MAX);
+ k = bkey_s_c_null;
+ goto out;
+ }
+
+ btree_path_set_should_be_locked(path);
+
+ k = btree_path_level_peek_all(trans->c, l, &iter->k);
+
+ if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
+ k.k &&
+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
+ k = k2;
+ ret = bkey_err(k);
+ if (ret) {
+ bch2_btree_iter_set_pos(iter, iter->pos);
+ goto out;
+ }
+ }
+
+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
+ k = btree_trans_peek_journal(trans, iter, k);
+
+ if (unlikely((iter->flags & BTREE_ITER_WITH_UPDATES) &&
+ trans->nr_updates))
+ bch2_btree_trans_peek_updates(trans, iter, &k);
+
+ if (k.k && bkey_deleted(k.k)) {
+ /*
+ * If we've got a whiteout, and it's after the search
+ * key, advance the search key to the whiteout instead
+ * of just after the whiteout - it might be a btree
+ * whiteout, with a real key at the same position, since
+ * in the btree deleted keys sort before non deleted.
+ */
+ search_key = !bpos_eq(search_key, k.k->p)
+ ? k.k->p
+ : bpos_successor(k.k->p);
+ continue;
+ }
+
+ if (likely(k.k)) {
+ break;
+ } else if (likely(!bpos_eq(l->b->key.k.p, SPOS_MAX))) {
+ /* Advance to next leaf node: */
+ search_key = bpos_successor(l->b->key.k.p);
+ } else {
+ /* End of btree: */
+ bch2_btree_iter_set_pos(iter, SPOS_MAX);
+ k = bkey_s_c_null;
+ goto out;
+ }
+ }
+out:
+ bch2_btree_iter_verify(iter);
+
+ return k;
+}
+
+/**
+ * bch2_btree_iter_peek_upto() - returns first key greater than or equal to
+ * iterator's current position
+ * @iter: iterator to peek from
+ * @end: search limit: returns keys less than or equal to @end
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
+ */
+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bpos search_key = btree_iter_search_key(iter);
+ struct bkey_s_c k;
+ struct bpos iter_pos;
+ int ret;
+
+ EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX));
+
+ if (iter->update_path) {
+ bch2_path_put_nokeep(trans, iter->update_path,
+ iter->flags & BTREE_ITER_INTENT);
+ iter->update_path = 0;
+ }
+
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ while (1) {
+ k = __bch2_btree_iter_peek(iter, search_key);
+ if (unlikely(!k.k))
+ goto end;
+ if (unlikely(bkey_err(k)))
+ goto out_no_locked;
+
+ /*
+ * We need to check against @end before FILTER_SNAPSHOTS because
+ * if we get to a different inode that requested we might be
+ * seeing keys for a different snapshot tree that will all be
+ * filtered out.
+ *
+ * But we can't do the full check here, because bkey_start_pos()
+ * isn't monotonically increasing before FILTER_SNAPSHOTS, and
+ * that's what we check against in extents mode:
+ */
+ if (k.k->p.inode > end.inode)
+ goto end;
+
+ if (iter->update_path &&
+ !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) {
+ bch2_path_put_nokeep(trans, iter->update_path,
+ iter->flags & BTREE_ITER_INTENT);
+ iter->update_path = 0;
+ }
+
+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
+ (iter->flags & BTREE_ITER_INTENT) &&
+ !(iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ !iter->update_path) {
+ struct bpos pos = k.k->p;
+
+ if (pos.snapshot < iter->snapshot) {
+ search_key = bpos_successor(k.k->p);
+ continue;
+ }
+
+ pos.snapshot = iter->snapshot;
+
+ /*
+ * advance, same as on exit for iter->path, but only up
+ * to snapshot
+ */
+ __btree_path_get(trans->paths + iter->path, iter->flags & BTREE_ITER_INTENT);
+ iter->update_path = iter->path;
+
+ iter->update_path = bch2_btree_path_set_pos(trans,
+ iter->update_path, pos,
+ iter->flags & BTREE_ITER_INTENT,
+ _THIS_IP_);
+ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+ }
+
+ /*
+ * We can never have a key in a leaf node at POS_MAX, so
+ * we don't have to check these successor() calls:
+ */
+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
+ !bch2_snapshot_is_ancestor(trans->c,
+ iter->snapshot,
+ k.k->p.snapshot)) {
+ search_key = bpos_successor(k.k->p);
+ continue;
+ }
+
+ if (bkey_whiteout(k.k) &&
+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
+ search_key = bkey_successor(iter, k.k->p);
+ continue;
+ }
+
+ /*
+ * iter->pos should be mononotically increasing, and always be
+ * equal to the key we just returned - except extents can
+ * straddle iter->pos:
+ */
+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+ iter_pos = k.k->p;
+ else
+ iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
+
+ if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? bkey_gt(iter_pos, end)
+ : bkey_ge(iter_pos, end)))
+ goto end;
+
+ break;
+ }
+
+ iter->pos = iter_pos;
+
+ iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ btree_path_set_should_be_locked(btree_iter_path(trans, iter));
+out_no_locked:
+ if (iter->update_path) {
+ ret = bch2_btree_path_relock(trans, trans->paths + iter->update_path, _THIS_IP_);
+ if (unlikely(ret))
+ k = bkey_s_c_err(ret);
+ else
+ btree_path_set_should_be_locked(trans->paths + iter->update_path);
+ }
+
+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
+ iter->pos.snapshot = iter->snapshot;
+
+ ret = bch2_btree_iter_verify_ret(iter, k);
+ if (unlikely(ret)) {
+ bch2_btree_iter_set_pos(iter, iter->pos);
+ k = bkey_s_c_err(ret);
+ }
+
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ return k;
+end:
+ bch2_btree_iter_set_pos(iter, end);
+ k = bkey_s_c_null;
+ goto out_no_locked;
+}
+
+/**
+ * bch2_btree_iter_next() - returns first key greater than iterator's current
+ * position
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
+ */
+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
+{
+ if (!bch2_btree_iter_advance(iter))
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek(iter);
+}
+
+/**
+ * bch2_btree_iter_peek_prev() - returns first key less than or equal to
+ * iterator's current position
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
+ */
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bpos search_key = iter->pos;
+ struct bkey_s_c k;
+ struct bkey saved_k;
+ const struct bch_val *saved_v;
+ btree_path_idx_t saved_path = 0;
+ int ret;
+
+ EBUG_ON(btree_iter_path(trans, iter)->cached ||
+ btree_iter_path(trans, iter)->level);
+
+ if (iter->flags & BTREE_ITER_WITH_JOURNAL)
+ return bkey_s_c_err(-EIO);
+
+ bch2_btree_iter_verify(iter);
+ bch2_btree_iter_verify_entry_exit(iter);
+
+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
+ search_key.snapshot = U32_MAX;
+
+ while (1) {
+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (unlikely(ret)) {
+ /* ensure that iter->k is consistent with iter->pos: */
+ bch2_btree_iter_set_pos(iter, iter->pos);
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+
+ k = btree_path_level_peek(trans, path, &path->l[0], &iter->k);
+ if (!k.k ||
+ ((iter->flags & BTREE_ITER_IS_EXTENTS)
+ ? bpos_ge(bkey_start_pos(k.k), search_key)
+ : bpos_gt(k.k->p, search_key)))
+ k = btree_path_level_prev(trans, path, &path->l[0], &iter->k);
+
+ if (unlikely((iter->flags & BTREE_ITER_WITH_UPDATES) &&
+ trans->nr_updates))
+ bch2_btree_trans_peek_prev_updates(trans, iter, &k);
+
+ if (likely(k.k)) {
+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
+ if (k.k->p.snapshot == iter->snapshot)
+ goto got_key;
+
+ /*
+ * If we have a saved candidate, and we're no
+ * longer at the same _key_ (not pos), return
+ * that candidate
+ */
+ if (saved_path && !bkey_eq(k.k->p, saved_k.p)) {
+ bch2_path_put_nokeep(trans, iter->path,
+ iter->flags & BTREE_ITER_INTENT);
+ iter->path = saved_path;
+ saved_path = 0;
+ iter->k = saved_k;
+ k.v = saved_v;
+ goto got_key;
+ }
+
+ if (bch2_snapshot_is_ancestor(trans->c,
+ iter->snapshot,
+ k.k->p.snapshot)) {
+ if (saved_path)
+ bch2_path_put_nokeep(trans, saved_path,
+ iter->flags & BTREE_ITER_INTENT);
+ saved_path = btree_path_clone(trans, iter->path,
+ iter->flags & BTREE_ITER_INTENT);
+ path = btree_iter_path(trans, iter);
+ saved_k = *k.k;
+ saved_v = k.v;
+ }
+
+ search_key = bpos_predecessor(k.k->p);
+ continue;
+ }
+got_key:
+ if (bkey_whiteout(k.k) &&
+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
+ search_key = bkey_predecessor(iter, k.k->p);
+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
+ search_key.snapshot = U32_MAX;
+ continue;
+ }
+
+ btree_path_set_should_be_locked(path);
+ break;
+ } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) {
+ /* Advance to previous leaf node: */
+ search_key = bpos_predecessor(path->l[0].b->data->min_key);
+ } else {
+ /* Start of btree: */
+ bch2_btree_iter_set_pos(iter, POS_MIN);
+ k = bkey_s_c_null;
+ goto out_no_locked;
+ }
+ }
+
+ EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos));
+
+ /* Extents can straddle iter->pos: */
+ if (bkey_lt(k.k->p, iter->pos))
+ iter->pos = k.k->p;
+
+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
+ iter->pos.snapshot = iter->snapshot;
+out_no_locked:
+ if (saved_path)
+ bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
+
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+
+ return k;
+}
+
+/**
+ * bch2_btree_iter_prev() - returns first key less than iterator's current
+ * position
+ * @iter: iterator to peek from
+ *
+ * Returns: key if found, or an error extractable with bkey_err().
+ */
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+{
+ if (!bch2_btree_iter_rewind(iter))
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek_prev(iter);
+}
+
+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+ struct bpos search_key;
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_btree_iter_verify(iter);
+ bch2_btree_iter_verify_entry_exit(iter);
+ EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
+
+ /* extents can't span inode numbers: */
+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
+ if (iter->pos.inode == KEY_INODE_MAX)
+ return bkey_s_c_null;
+
+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
+ }
+
+ search_key = btree_iter_search_key(iter);
+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
+ iter->flags & BTREE_ITER_INTENT,
+ btree_iter_ip_allocated(iter));
+
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
+ if ((iter->flags & BTREE_ITER_CACHED) ||
+ !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
+ k = bkey_s_c_null;
+
+ if (unlikely((iter->flags & BTREE_ITER_WITH_UPDATES) &&
+ trans->nr_updates)) {
+ bch2_btree_trans_peek_slot_updates(trans, iter, &k);
+ if (k.k)
+ goto out;
+ }
+
+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
+ (k = btree_trans_peek_slot_journal(trans, iter)).k)
+ goto out;
+
+ if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
+ (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
+ if (!bkey_err(k))
+ iter->k = *k.k;
+ /* We're not returning a key from iter->path: */
+ goto out_no_locked;
+ }
+
+ k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
+ if (unlikely(!k.k))
+ goto out_no_locked;
+ } else {
+ struct bpos next;
+ struct bpos end = iter->pos;
+
+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
+ end.offset = U64_MAX;
+
+ EBUG_ON(btree_iter_path(trans, iter)->level);
+
+ if (iter->flags & BTREE_ITER_INTENT) {
+ struct btree_iter iter2;
+
+ bch2_trans_copy_iter(&iter2, iter);
+ k = bch2_btree_iter_peek_upto(&iter2, end);
+
+ if (k.k && !bkey_err(k)) {
+ iter->k = iter2.k;
+ k.k = &iter->k;
+ }
+ bch2_trans_iter_exit(trans, &iter2);
+ } else {
+ struct bpos pos = iter->pos;
+
+ k = bch2_btree_iter_peek_upto(iter, end);
+ if (unlikely(bkey_err(k)))
+ bch2_btree_iter_set_pos(iter, pos);
+ else
+ iter->pos = pos;
+ }
+
+ if (unlikely(bkey_err(k)))
+ goto out_no_locked;
+
+ next = k.k ? bkey_start_pos(k.k) : POS_MAX;
+
+ if (bkey_lt(iter->pos, next)) {
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos;
+
+ if (iter->flags & BTREE_ITER_IS_EXTENTS) {
+ bch2_key_resize(&iter->k,
+ min_t(u64, KEY_SIZE_MAX,
+ (next.inode == iter->pos.inode
+ ? next.offset
+ : KEY_OFFSET_MAX) -
+ iter->pos.offset));
+ EBUG_ON(!iter->k.size);
+ }
+
+ k = (struct bkey_s_c) { &iter->k, NULL };
+ }
+ }
+out:
+ btree_path_set_should_be_locked(btree_iter_path(trans, iter));
+out_no_locked:
+ bch2_btree_iter_verify_entry_exit(iter);
+ bch2_btree_iter_verify(iter);
+ ret = bch2_btree_iter_verify_ret(iter, k);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
+
+ return k;
+}
+
+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
+{
+ if (!bch2_btree_iter_advance(iter))
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek_slot(iter);
+}
+
+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
+{
+ if (!bch2_btree_iter_rewind(iter))
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek_slot(iter);
+}
+
+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
+{
+ struct bkey_s_c k;
+
+ while (btree_trans_too_many_iters(iter->trans) ||
+ (k = bch2_btree_iter_peek_type(iter, iter->flags),
+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
+ bch2_trans_begin(iter->trans);
+
+ return k;
+}
+
+/* new transactional stuff: */
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, trans->nr_paths) - 1);
+
+ trans_for_each_path(trans, path, i) {
+ BUG_ON(path->sorted_idx >= trans->nr_sorted);
+ BUG_ON(trans->sorted[path->sorted_idx] != i);
+ }
+
+ for (i = 0; i < trans->nr_sorted; i++) {
+ unsigned idx = trans->sorted[i];
+
+ BUG_ON(!test_bit(idx, trans->paths_allocated));
+ BUG_ON(trans->paths[idx].sorted_idx != i);
+ }
+}
+
+static void btree_trans_verify_sorted(struct btree_trans *trans)
+{
+ struct btree_path *path, *prev = NULL;
+ struct trans_for_each_path_inorder_iter iter;
+
+ if (!bch2_debug_check_iterators)
+ return;
+
+ trans_for_each_path_inorder(trans, path, iter) {
+ if (prev && btree_path_cmp(prev, path) > 0) {
+ __bch2_dump_trans_paths_updates(trans, true);
+ panic("trans paths out of order!\n");
+ }
+ prev = path;
+ }
+}
+#else
+static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
+static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
+#endif
+
+void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
+{
+ int i, l = 0, r = trans->nr_sorted, inc = 1;
+ bool swapped;
+
+ btree_trans_verify_sorted_refs(trans);
+
+ if (trans->paths_sorted)
+ goto out;
+
+ /*
+ * Cocktail shaker sort: this is efficient because iterators will be
+ * mostly sorted.
+ */
+ do {
+ swapped = false;
+
+ for (i = inc > 0 ? l : r - 2;
+ i + 1 < r && i >= l;
+ i += inc) {
+ if (btree_path_cmp(trans->paths + trans->sorted[i],
+ trans->paths + trans->sorted[i + 1]) > 0) {
+ swap(trans->sorted[i], trans->sorted[i + 1]);
+ trans->paths[trans->sorted[i]].sorted_idx = i;
+ trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
+ swapped = true;
+ }
+ }
+
+ if (inc > 0)
+ --r;
+ else
+ l++;
+ inc = -inc;
+ } while (swapped);
+
+ trans->paths_sorted = true;
+out:
+ btree_trans_verify_sorted(trans);
+}
+
+static inline void btree_path_list_remove(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ EBUG_ON(path->sorted_idx >= trans->nr_sorted);
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ trans->nr_sorted--;
+ memmove_u64s_down_small(trans->sorted + path->sorted_idx,
+ trans->sorted + path->sorted_idx + 1,
+ DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
+ sizeof(u64) / sizeof(btree_path_idx_t)));
+#else
+ array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
+#endif
+ for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
+ trans->paths[trans->sorted[i]].sorted_idx = i;
+}
+
+static inline void btree_path_list_add(struct btree_trans *trans,
+ btree_path_idx_t pos,
+ btree_path_idx_t path_idx)
+{
+ struct btree_path *path = trans->paths + path_idx;
+
+ path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted;
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
+ trans->sorted + path->sorted_idx,
+ DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
+ sizeof(u64) / sizeof(btree_path_idx_t)));
+ trans->nr_sorted++;
+ trans->sorted[path->sorted_idx] = path_idx;
+#else
+ array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx);
+#endif
+
+ for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
+ trans->paths[trans->sorted[i]].sorted_idx = i;
+
+ btree_trans_verify_sorted_refs(trans);
+}
+
+void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
+{
+ if (iter->update_path)
+ bch2_path_put_nokeep(trans, iter->update_path,
+ iter->flags & BTREE_ITER_INTENT);
+ if (iter->path)
+ bch2_path_put(trans, iter->path,
+ iter->flags & BTREE_ITER_INTENT);
+ if (iter->key_cache_path)
+ bch2_path_put(trans, iter->key_cache_path,
+ iter->flags & BTREE_ITER_INTENT);
+ iter->path = 0;
+ iter->update_path = 0;
+ iter->key_cache_path = 0;
+ iter->trans = NULL;
+}
+
+void bch2_trans_iter_init_outlined(struct btree_trans *trans,
+ struct btree_iter *iter,
+ enum btree_id btree_id, struct bpos pos,
+ unsigned flags)
+{
+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
+ bch2_btree_iter_flags(trans, btree_id, flags),
+ _RET_IP_);
+}
+
+void bch2_trans_node_iter_init(struct btree_trans *trans,
+ struct btree_iter *iter,
+ enum btree_id btree_id,
+ struct bpos pos,
+ unsigned locks_want,
+ unsigned depth,
+ unsigned flags)
+{
+ flags |= BTREE_ITER_NOT_EXTENTS;
+ flags |= __BTREE_ITER_ALL_SNAPSHOTS;
+ flags |= BTREE_ITER_ALL_SNAPSHOTS;
+
+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth,
+ __bch2_btree_iter_flags(trans, btree_id, flags),
+ _RET_IP_);
+
+ iter->min_depth = depth;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ BUG_ON(path->locks_want < min(locks_want, BTREE_MAX_DEPTH));
+ BUG_ON(path->level != depth);
+ BUG_ON(iter->min_depth != depth);
+}
+
+void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
+{
+ struct btree_trans *trans = src->trans;
+
+ *dst = *src;
+ if (src->path)
+ __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
+ if (src->update_path)
+ __btree_path_get(trans->paths + src->update_path, src->flags & BTREE_ITER_INTENT);
+ dst->key_cache_path = 0;
+}
+
+void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+{
+ struct bch_fs *c = trans->c;
+ unsigned new_top = trans->mem_top + size;
+ unsigned old_bytes = trans->mem_bytes;
+ unsigned new_bytes = roundup_pow_of_two(new_top);
+ int ret;
+ void *new_mem;
+ void *p;
+
+ WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+
+ struct btree_transaction_stats *s = btree_trans_stats(trans);
+ s->max_mem = max(s->max_mem, new_bytes);
+
+ new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ if (unlikely(!new_mem)) {
+ bch2_trans_unlock(trans);
+
+ new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
+ if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
+ new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
+ new_bytes = BTREE_TRANS_MEM_MAX;
+ kfree(trans->mem);
+ }
+
+ if (!new_mem)
+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+
+ trans->mem = new_mem;
+ trans->mem_bytes = new_bytes;
+
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ trans->mem = new_mem;
+ trans->mem_bytes = new_bytes;
+
+ if (old_bytes) {
+ trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
+ }
+
+ p = trans->mem + trans->mem_top;
+ trans->mem_top += size;
+ memset(p, 0, size);
+ return p;
+}
+
+static inline void check_srcu_held_too_long(struct btree_trans *trans)
+{
+ WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+ "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+ (jiffies - trans->srcu_lock_time) / HZ);
+}
+
+void bch2_trans_srcu_unlock(struct btree_trans *trans)
+{
+ if (trans->srcu_held) {
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ if (path->cached && !btree_node_locked(path, 0))
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+ check_srcu_held_too_long(trans);
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ trans->srcu_held = false;
+ }
+}
+
+static void bch2_trans_srcu_lock(struct btree_trans *trans)
+{
+ if (!trans->srcu_held) {
+ trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
+ trans->srcu_held = true;
+ }
+}
+
+/**
+ * bch2_trans_begin() - reset a transaction after a interrupted attempt
+ * @trans: transaction to reset
+ *
+ * Returns: current restart counter, to be used with trans_was_restarted()
+ *
+ * While iterating over nodes or updating nodes a attempt to lock a btree node
+ * may return BCH_ERR_transaction_restart when the trylock fails. When this
+ * occurs bch2_trans_begin() should be called and the transaction retried.
+ */
+u32 bch2_trans_begin(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+ u64 now;
+
+ bch2_trans_reset_updates(trans);
+
+ trans->restart_count++;
+ trans->mem_top = 0;
+ trans->journal_entries = NULL;
+
+ trans_for_each_path(trans, path, i) {
+ path->should_be_locked = false;
+
+ /*
+ * If the transaction wasn't restarted, we're presuming to be
+ * doing something new: dont keep iterators excpt the ones that
+ * are in use - except for the subvolumes btree:
+ */
+ if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes)
+ path->preserve = false;
+
+ /*
+ * XXX: we probably shouldn't be doing this if the transaction
+ * was restarted, but currently we still overflow transaction
+ * iterators if we do that
+ */
+ if (!path->ref && !path->preserve)
+ __bch2_path_free(trans, i);
+ else
+ path->preserve = false;
+ }
+
+ now = local_clock();
+
+ if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LATENCY_ACCT) &&
+ time_after64(now, trans->last_begin_time + 10))
+ __bch2_time_stats_update(&btree_trans_stats(trans)->duration,
+ trans->last_begin_time, now);
+
+ if (!trans->restarted &&
+ (need_resched() ||
+ time_after64(now, trans->last_begin_time + BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS))) {
+ drop_locks_do(trans, (cond_resched(), 0));
+ now = local_clock();
+ }
+ trans->last_begin_time = now;
+
+ if (unlikely(trans->srcu_held &&
+ time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
+ bch2_trans_srcu_unlock(trans);
+
+ trans->last_begin_ip = _RET_IP_;
+ if (trans->restarted) {
+ bch2_btree_path_traverse_all(trans);
+ trans->notrace_relock_fail = false;
+ }
+
+ return trans->restart_count;
+}
+
+const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR] = { "(unknown)" };
+
+unsigned bch2_trans_get_fn_idx(const char *fn)
+{
+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
+ if (!bch2_btree_transaction_fns[i] ||
+ bch2_btree_transaction_fns[i] == fn) {
+ bch2_btree_transaction_fns[i] = fn;
+ return i;
+ }
+
+ pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
+ return 0;
+}
+
+struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
+ __acquires(&c->btree_trans_barrier)
+{
+ struct btree_trans *trans;
+
+ if (IS_ENABLED(__KERNEL__)) {
+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
+ if (trans) {
+ memset(trans, 0, offsetof(struct btree_trans, list));
+ goto got_trans;
+ }
+ }
+
+ trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
+ memset(trans, 0, sizeof(*trans));
+ closure_init_stack(&trans->ref);
+
+ seqmutex_lock(&c->btree_trans_lock);
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+ struct btree_trans *pos;
+ pid_t pid = current->pid;
+
+ trans->locking_wait.task = current;
+
+ list_for_each_entry(pos, &c->btree_trans_list, list) {
+ struct task_struct *pos_task = READ_ONCE(pos->locking_wait.task);
+ /*
+ * We'd much prefer to be stricter here and completely
+ * disallow multiple btree_trans in the same thread -
+ * but the data move path calls bch2_write when we
+ * already have a btree_trans initialized.
+ */
+ BUG_ON(pos_task &&
+ pid == pos_task->pid &&
+ bch2_trans_locked(pos));
+
+ if (pos_task && pid < pos_task->pid) {
+ list_add_tail(&trans->list, &pos->list);
+ goto list_add_done;
+ }
+ }
+ }
+ list_add_tail(&trans->list, &c->btree_trans_list);
+list_add_done:
+ seqmutex_unlock(&c->btree_trans_lock);
+got_trans:
+ trans->c = c;
+ trans->last_begin_time = local_clock();
+ trans->fn_idx = fn_idx;
+ trans->locking_wait.task = current;
+ trans->journal_replay_not_finished =
+ unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) &&
+ atomic_inc_not_zero(&c->journal_keys.ref);
+ trans->nr_paths = ARRAY_SIZE(trans->_paths);
+ trans->paths_allocated = trans->_paths_allocated;
+ trans->sorted = trans->_sorted;
+ trans->paths = trans->_paths;
+ trans->updates = trans->_updates;
+
+ *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL;
+
+ trans->paths_allocated[0] = 1;
+
+ if (fn_idx < BCH_TRANSACTIONS_NR) {
+ trans->fn = bch2_btree_transaction_fns[fn_idx];
+
+ struct btree_transaction_stats *s = &c->btree_transaction_stats[fn_idx];
+
+ if (s->max_mem) {
+ unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
+
+ trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
+ if (likely(trans->mem))
+ trans->mem_bytes = expected_mem_bytes;
+ }
+
+ trans->nr_paths_max = s->nr_max_paths;
+ trans->journal_entries_size = s->journal_entries_size;
+ }
+
+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
+ trans->srcu_held = true;
+ return trans;
+}
+
+static void check_btree_paths_leaked(struct btree_trans *trans)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ if (path->ref)
+ goto leaked;
+ return;
+leaked:
+ bch_err(c, "btree paths leaked from %s!", trans->fn);
+ trans_for_each_path(trans, path, i)
+ if (path->ref)
+ printk(KERN_ERR " btree %s %pS\n",
+ bch2_btree_id_str(path->btree_id),
+ (void *) path->ip_allocated);
+ /* Be noisy about this: */
+ bch2_fatal_error(c);
+#endif
+}
+
+void bch2_trans_put(struct btree_trans *trans)
+ __releases(&c->btree_trans_barrier)
+{
+ struct bch_fs *c = trans->c;
+
+ bch2_trans_unlock(trans);
+
+ trans_for_each_update(trans, i)
+ __btree_path_put(trans->paths + i->path, true);
+ trans->nr_updates = 0;
+ trans->locking_wait.task = NULL;
+
+ check_btree_paths_leaked(trans);
+
+ if (trans->srcu_held) {
+ check_srcu_held_too_long(trans);
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ }
+
+ if (trans->fs_usage_deltas) {
+ if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) ==
+ REPLICAS_DELTA_LIST_MAX)
+ mempool_free(trans->fs_usage_deltas,
+ &c->replicas_delta_pool);
+ else
+ kfree(trans->fs_usage_deltas);
+ }
+
+ if (unlikely(trans->journal_replay_not_finished))
+ bch2_journal_keys_put(c);
+
+ unsigned long *paths_allocated = trans->paths_allocated;
+ trans->paths_allocated = NULL;
+ trans->paths = NULL;
+
+ if (paths_allocated != trans->_paths_allocated)
+ kfree_rcu_mightsleep(paths_allocated);
+
+ if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
+ mempool_free(trans->mem, &c->btree_trans_mem_pool);
+ else
+ kfree(trans->mem);
+
+ /* Userspace doesn't have a real percpu implementation: */
+ if (IS_ENABLED(__KERNEL__))
+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
+
+ if (trans) {
+ closure_sync(&trans->ref);
+
+ seqmutex_lock(&c->btree_trans_lock);
+ list_del(&trans->list);
+ seqmutex_unlock(&c->btree_trans_lock);
+
+ mempool_free(trans, &c->btree_trans_pool);
+ }
+}
+
+static void __maybe_unused
+bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
+ struct btree_bkey_cached_common *b)
+{
+ struct six_lock_count c = six_lock_counts(&b->lock);
+ struct task_struct *owner;
+ pid_t pid;
+
+ rcu_read_lock();
+ owner = READ_ONCE(b->lock.owner);
+ pid = owner ? owner->pid : 0;
+ rcu_read_unlock();
+
+ prt_tab(out);
+ prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
+ b->level, bch2_btree_id_str(b->btree_id));
+ bch2_bpos_to_text(out, btree_node_pos(b));
+
+ prt_tab(out);
+ prt_printf(out, " locks %u:%u:%u held by pid %u",
+ c.n[0], c.n[1], c.n[2], pid);
+}
+
+void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
+{
+ struct btree_bkey_cached_common *b;
+ static char lock_types[] = { 'r', 'i', 'w' };
+ struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ unsigned l, idx;
+
+ /* before rcu_read_lock(): */
+ bch2_printbuf_make_room(out, 4096);
+
+ if (!out->nr_tabstops) {
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 32);
+ }
+
+ prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn);
+
+ /* trans->paths is rcu protected vs. freeing */
+ rcu_read_lock();
+ out->atomic++;
+
+ struct btree_path *paths = rcu_dereference(trans->paths);
+ if (!paths)
+ goto out;
+
+ unsigned long *paths_allocated = trans_paths_allocated(paths);
+
+ trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths), idx, 1) {
+ struct btree_path *path = paths + idx;
+ if (!path->nodes_locked)
+ continue;
+
+ prt_printf(out, " path %u %c l=%u %s:",
+ idx,
+ path->cached ? 'c' : 'b',
+ path->level,
+ bch2_btree_id_str(path->btree_id));
+ bch2_bpos_to_text(out, path->pos);
+ prt_newline(out);
+
+ for (l = 0; l < BTREE_MAX_DEPTH; l++) {
+ if (btree_node_locked(path, l) &&
+ !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
+ prt_printf(out, " %c l=%u ",
+ lock_types[btree_node_locked_type(path, l)], l);
+ bch2_btree_bkey_cached_common_to_text(out, b);
+ prt_newline(out);
+ }
+ }
+ }
+
+ b = READ_ONCE(trans->locking);
+ if (b) {
+ prt_printf(out, " blocked for %lluus on",
+ div_u64(local_clock() - trans->locking_wait.start_time,
+ 1000));
+ prt_newline(out);
+ prt_printf(out, " %c", lock_types[trans->locking_wait.lock_want]);
+ bch2_btree_bkey_cached_common_to_text(out, b);
+ prt_newline(out);
+ }
+out:
+ --out->atomic;
+ rcu_read_unlock();
+}
+
+void bch2_fs_btree_iter_exit(struct bch_fs *c)
+{
+ struct btree_transaction_stats *s;
+ struct btree_trans *trans;
+ int cpu;
+
+ if (c->btree_trans_bufs)
+ for_each_possible_cpu(cpu) {
+ struct btree_trans *trans =
+ per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
+
+ if (trans) {
+ closure_sync(&trans->ref);
+
+ seqmutex_lock(&c->btree_trans_lock);
+ list_del(&trans->list);
+ seqmutex_unlock(&c->btree_trans_lock);
+ }
+ kfree(trans);
+ }
+ free_percpu(c->btree_trans_bufs);
+
+ trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
+ if (trans)
+ panic("%s leaked btree_trans\n", trans->fn);
+
+ for (s = c->btree_transaction_stats;
+ s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
+ s++) {
+ kfree(s->max_paths_text);
+ bch2_time_stats_exit(&s->lock_hold_times);
+ }
+
+ if (c->btree_trans_barrier_initialized)
+ cleanup_srcu_struct(&c->btree_trans_barrier);
+ mempool_exit(&c->btree_trans_mem_pool);
+ mempool_exit(&c->btree_trans_pool);
+}
+
+void bch2_fs_btree_iter_init_early(struct bch_fs *c)
+{
+ struct btree_transaction_stats *s;
+
+ for (s = c->btree_transaction_stats;
+ s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
+ s++) {
+ bch2_time_stats_init(&s->duration);
+ bch2_time_stats_init(&s->lock_hold_times);
+ mutex_init(&s->lock);
+ }
+
+ INIT_LIST_HEAD(&c->btree_trans_list);
+ seqmutex_init(&c->btree_trans_lock);
+}
+
+int bch2_fs_btree_iter_init(struct bch_fs *c)
+{
+ int ret;
+
+ c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
+ if (!c->btree_trans_bufs)
+ return -ENOMEM;
+
+ ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1,
+ sizeof(struct btree_trans)) ?:
+ mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
+ BTREE_TRANS_MEM_MAX) ?:
+ init_srcu_struct(&c->btree_trans_barrier);
+ if (!ret)
+ c->btree_trans_barrier_initialized = true;
+ return ret;
+}
diff --git a/c_src/libbcachefs/btree_iter.h b/c_src/libbcachefs/btree_iter.h
new file mode 100644
index 00000000..da2b74fa
--- /dev/null
+++ b/c_src/libbcachefs/btree_iter.h
@@ -0,0 +1,879 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_ITER_H
+#define _BCACHEFS_BTREE_ITER_H
+
+#include "bset.h"
+#include "btree_types.h"
+#include "trace.h"
+
+static inline int __bkey_err(const struct bkey *k)
+{
+ return PTR_ERR_OR_ZERO(k);
+}
+
+#define bkey_err(_k) __bkey_err((_k).k)
+
+static inline void __btree_path_get(struct btree_path *path, bool intent)
+{
+ path->ref++;
+ path->intent_ref += intent;
+}
+
+static inline bool __btree_path_put(struct btree_path *path, bool intent)
+{
+ EBUG_ON(!path->ref);
+ EBUG_ON(!path->intent_ref && intent);
+ path->intent_ref -= intent;
+ return --path->ref == 0;
+}
+
+static inline void btree_path_set_dirty(struct btree_path *path,
+ enum btree_path_uptodate u)
+{
+ path->uptodate = max_t(unsigned, path->uptodate, u);
+}
+
+static inline struct btree *btree_path_node(struct btree_path *path,
+ unsigned level)
+{
+ return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL;
+}
+
+static inline bool btree_node_lock_seq_matches(const struct btree_path *path,
+ const struct btree *b, unsigned level)
+{
+ return path->l[level].lock_seq == six_lock_seq(&b->c.lock);
+}
+
+static inline struct btree *btree_node_parent(struct btree_path *path,
+ struct btree *b)
+{
+ return btree_path_node(path, b->c.level + 1);
+}
+
+/* Iterate over paths within a transaction: */
+
+void __bch2_btree_trans_sort_paths(struct btree_trans *);
+
+static inline void btree_trans_sort_paths(struct btree_trans *trans)
+{
+ if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+ trans->paths_sorted)
+ return;
+ __bch2_btree_trans_sort_paths(trans);
+}
+
+static inline unsigned long *trans_paths_nr(struct btree_path *paths)
+{
+ return &container_of(paths, struct btree_trans_paths, paths[0])->nr_paths;
+}
+
+static inline unsigned long *trans_paths_allocated(struct btree_path *paths)
+{
+ unsigned long *v = trans_paths_nr(paths);
+ return v - BITS_TO_LONGS(*v);
+}
+
+#define trans_for_each_path_idx_from(_paths_allocated, _nr, _idx, _start)\
+ for (_idx = _start; \
+ (_idx = find_next_bit(_paths_allocated, _nr, _idx)) < _nr; \
+ _idx++)
+
+static inline struct btree_path *
+__trans_next_path(struct btree_trans *trans, unsigned *idx)
+{
+ unsigned long *w = trans->paths_allocated + *idx / BITS_PER_LONG;
+ /*
+ * Open coded find_next_bit(), because
+ * - this is fast path, we can't afford the function call
+ * - and we know that nr_paths is a multiple of BITS_PER_LONG,
+ */
+ while (*idx < trans->nr_paths) {
+ unsigned long v = *w >> (*idx & (BITS_PER_LONG - 1));
+ if (v) {
+ *idx += __ffs(v);
+ return trans->paths + *idx;
+ }
+
+ *idx += BITS_PER_LONG;
+ *idx &= ~(BITS_PER_LONG - 1);
+ w++;
+ }
+
+ return NULL;
+}
+
+/*
+ * This version is intended to be safe for use on a btree_trans that is owned by
+ * another thread, for bch2_btree_trans_to_text();
+ */
+#define trans_for_each_path_from(_trans, _path, _idx, _start) \
+ for (_idx = _start; \
+ (_path = __trans_next_path((_trans), &_idx)); \
+ _idx++)
+
+#define trans_for_each_path(_trans, _path, _idx) \
+ trans_for_each_path_from(_trans, _path, _idx, 1)
+
+static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
+{
+ unsigned idx = path ? path->sorted_idx + 1 : 0;
+
+ EBUG_ON(idx > trans->nr_sorted);
+
+ return idx < trans->nr_sorted
+ ? trans->paths + trans->sorted[idx]
+ : NULL;
+}
+
+static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path)
+{
+ unsigned idx = path ? path->sorted_idx : trans->nr_sorted;
+
+ return idx
+ ? trans->paths + trans->sorted[idx - 1]
+ : NULL;
+}
+
+#define trans_for_each_path_idx_inorder(_trans, _iter) \
+ for (_iter = (struct trans_for_each_path_inorder_iter) { 0 }; \
+ (_iter.path_idx = trans->sorted[_iter.sorted_idx], \
+ _iter.sorted_idx < (_trans)->nr_sorted); \
+ _iter.sorted_idx++)
+
+struct trans_for_each_path_inorder_iter {
+ btree_path_idx_t sorted_idx;
+ btree_path_idx_t path_idx;
+};
+
+#define trans_for_each_path_inorder(_trans, _path, _iter) \
+ for (_iter = (struct trans_for_each_path_inorder_iter) { 0 }; \
+ (_iter.path_idx = trans->sorted[_iter.sorted_idx], \
+ _path = (_trans)->paths + _iter.path_idx, \
+ _iter.sorted_idx < (_trans)->nr_sorted); \
+ _iter.sorted_idx++)
+
+#define trans_for_each_path_inorder_reverse(_trans, _path, _i) \
+ for (_i = trans->nr_sorted - 1; \
+ ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) >= 0;\
+ --_i)
+
+static inline bool __path_has_node(const struct btree_path *path,
+ const struct btree *b)
+{
+ return path->l[b->c.level].b == b &&
+ btree_node_lock_seq_matches(path, b, b->c.level);
+}
+
+static inline struct btree_path *
+__trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
+ unsigned *idx)
+{
+ struct btree_path *path;
+
+ while ((path = __trans_next_path(trans, idx)) &&
+ !__path_has_node(path, b))
+ (*idx)++;
+
+ return path;
+}
+
+#define trans_for_each_path_with_node(_trans, _b, _path, _iter) \
+ for (_iter = 1; \
+ (_path = __trans_next_path_with_node((_trans), (_b), &_iter));\
+ _iter++)
+
+btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *, btree_path_idx_t,
+ bool, unsigned long);
+
+static inline btree_path_idx_t __must_check
+bch2_btree_path_make_mut(struct btree_trans *trans,
+ btree_path_idx_t path, bool intent,
+ unsigned long ip)
+{
+ if (trans->paths[path].ref > 1 ||
+ trans->paths[path].preserve)
+ path = __bch2_btree_path_make_mut(trans, path, intent, ip);
+ trans->paths[path].should_be_locked = false;
+ return path;
+}
+
+btree_path_idx_t __must_check
+__bch2_btree_path_set_pos(struct btree_trans *, btree_path_idx_t,
+ struct bpos, bool, unsigned long);
+
+static inline btree_path_idx_t __must_check
+bch2_btree_path_set_pos(struct btree_trans *trans,
+ btree_path_idx_t path, struct bpos new_pos,
+ bool intent, unsigned long ip)
+{
+ return !bpos_eq(new_pos, trans->paths[path].pos)
+ ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, ip)
+ : path;
+}
+
+int __must_check bch2_btree_path_traverse_one(struct btree_trans *,
+ btree_path_idx_t,
+ unsigned, unsigned long);
+
+static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
+ btree_path_idx_t path, unsigned flags)
+{
+ if (trans->paths[path].uptodate < BTREE_ITER_NEED_RELOCK)
+ return 0;
+
+ return bch2_btree_path_traverse_one(trans, path, flags, _RET_IP_);
+}
+
+btree_path_idx_t bch2_path_get(struct btree_trans *, enum btree_id, struct bpos,
+ unsigned, unsigned, unsigned, unsigned long);
+struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *);
+
+/*
+ * bch2_btree_path_peek_slot() for a cached iterator might return a key in a
+ * different snapshot:
+ */
+static inline struct bkey_s_c bch2_btree_path_peek_slot_exact(struct btree_path *path, struct bkey *u)
+{
+ struct bkey_s_c k = bch2_btree_path_peek_slot(path, u);
+
+ if (k.k && bpos_eq(path->pos, k.k->p))
+ return k;
+
+ bkey_init(u);
+ u->p = path->pos;
+ return (struct bkey_s_c) { u, NULL };
+}
+
+struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
+ struct btree_iter *, struct bpos);
+
+void bch2_btree_path_level_init(struct btree_trans *, struct btree_path *, struct btree *);
+
+int __bch2_trans_mutex_lock(struct btree_trans *, struct mutex *);
+
+static inline int bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex *lock)
+{
+ return mutex_trylock(lock)
+ ? 0
+ : __bch2_trans_mutex_lock(trans, lock);
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_trans_verify_paths(struct btree_trans *);
+void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
+ struct bpos, bool);
+#else
+static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
+static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
+ struct bpos pos, bool key_cache) {}
+#endif
+
+void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
+ struct btree *, struct bkey_packed *);
+void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *,
+ struct btree *, struct btree_node_iter *,
+ struct bkey_packed *, unsigned, unsigned);
+
+int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
+
+void bch2_path_put(struct btree_trans *, btree_path_idx_t, bool);
+
+int bch2_trans_relock(struct btree_trans *);
+int bch2_trans_relock_notrace(struct btree_trans *);
+void bch2_trans_unlock(struct btree_trans *);
+void bch2_trans_unlock_long(struct btree_trans *);
+bool bch2_trans_locked(struct btree_trans *);
+
+static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
+{
+ return restart_count != trans->restart_count
+ ? -BCH_ERR_transaction_restart_nested
+ : 0;
+}
+
+void __noreturn bch2_trans_restart_error(struct btree_trans *, u32);
+
+static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans,
+ u32 restart_count)
+{
+ if (trans_was_restarted(trans, restart_count))
+ bch2_trans_restart_error(trans, restart_count);
+}
+
+void __noreturn bch2_trans_in_restart_error(struct btree_trans *);
+
+static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans)
+{
+ if (trans->restarted)
+ bch2_trans_in_restart_error(trans);
+}
+
+__always_inline
+static int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
+{
+ BUG_ON(err <= 0);
+ BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart));
+
+ trans->restarted = err;
+ trans->last_restarted_ip = _THIS_IP_;
+ return -err;
+}
+
+__always_inline
+static int btree_trans_restart(struct btree_trans *trans, int err)
+{
+ btree_trans_restart_nounlock(trans, err);
+ return -err;
+}
+
+bool bch2_btree_node_upgrade(struct btree_trans *,
+ struct btree_path *, unsigned);
+
+void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);
+
+static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ unsigned new_locks_want = path->level + !!path->intent_ref;
+
+ if (path->locks_want > new_locks_want)
+ __bch2_btree_path_downgrade(trans, path, new_locks_want);
+}
+
+void bch2_trans_downgrade(struct btree_trans *);
+
+void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct btree *);
+void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
+
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
+int __must_check bch2_btree_iter_traverse(struct btree_iter *);
+
+struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
+struct btree *bch2_btree_iter_next_node(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
+
+static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+{
+ return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
+}
+
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
+
+bool bch2_btree_iter_advance(struct btree_iter *);
+bool bch2_btree_iter_rewind(struct btree_iter *);
+
+static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+{
+ iter->k.type = KEY_TYPE_deleted;
+ iter->k.p.inode = iter->pos.inode = new_pos.inode;
+ iter->k.p.offset = iter->pos.offset = new_pos.offset;
+ iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot;
+ iter->k.size = 0;
+}
+
+static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+{
+ struct btree_trans *trans = iter->trans;
+
+ if (unlikely(iter->update_path))
+ bch2_path_put(trans, iter->update_path,
+ iter->flags & BTREE_ITER_INTENT);
+ iter->update_path = 0;
+
+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
+ new_pos.snapshot = iter->snapshot;
+
+ __bch2_btree_iter_set_pos(iter, new_pos);
+}
+
+static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter)
+{
+ BUG_ON(!(iter->flags & BTREE_ITER_IS_EXTENTS));
+ iter->pos = bkey_start_pos(&iter->k);
+}
+
+static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
+{
+ struct bpos pos = iter->pos;
+
+ iter->snapshot = snapshot;
+ pos.snapshot = snapshot;
+ bch2_btree_iter_set_pos(iter, pos);
+}
+
+void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
+
+static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans,
+ unsigned btree_id,
+ unsigned flags)
+{
+ if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
+ btree_id_is_extents(btree_id))
+ flags |= BTREE_ITER_IS_EXTENTS;
+
+ if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) &&
+ !btree_type_has_snapshot_field(btree_id))
+ flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
+
+ if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) &&
+ btree_type_has_snapshots(btree_id))
+ flags |= BTREE_ITER_FILTER_SNAPSHOTS;
+
+ if (trans->journal_replay_not_finished)
+ flags |= BTREE_ITER_WITH_JOURNAL;
+
+ return flags;
+}
+
+static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans,
+ unsigned btree_id,
+ unsigned flags)
+{
+ if (!btree_id_cached(trans->c, btree_id)) {
+ flags &= ~BTREE_ITER_CACHED;
+ flags &= ~BTREE_ITER_WITH_KEY_CACHE;
+ } else if (!(flags & BTREE_ITER_CACHED))
+ flags |= BTREE_ITER_WITH_KEY_CACHE;
+
+ return __bch2_btree_iter_flags(trans, btree_id, flags);
+}
+
+static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned locks_want,
+ unsigned depth,
+ unsigned flags,
+ unsigned long ip)
+{
+ iter->trans = trans;
+ iter->update_path = 0;
+ iter->key_cache_path = 0;
+ iter->btree_id = btree_id;
+ iter->min_depth = 0;
+ iter->flags = flags;
+ iter->snapshot = pos.snapshot;
+ iter->pos = pos;
+ iter->k = POS_KEY(pos);
+ iter->journal_idx = 0;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ iter->ip_allocated = ip;
+#endif
+ iter->path = bch2_path_get(trans, btree_id, iter->pos,
+ locks_want, depth, flags, ip);
+}
+
+void bch2_trans_iter_init_outlined(struct btree_trans *, struct btree_iter *,
+ enum btree_id, struct bpos, unsigned);
+
+static inline void bch2_trans_iter_init(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags)
+{
+ if (__builtin_constant_p(btree_id) &&
+ __builtin_constant_p(flags))
+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
+ bch2_btree_iter_flags(trans, btree_id, flags),
+ _THIS_IP_);
+ else
+ bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags);
+}
+
+void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
+ enum btree_id, struct bpos,
+ unsigned, unsigned, unsigned);
+void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
+
+static inline void set_btree_iter_dontneed(struct btree_iter *iter)
+{
+ struct btree_trans *trans = iter->trans;
+
+ if (!trans->restarted)
+ btree_iter_path(trans, iter)->preserve = false;
+}
+
+void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
+
+static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+{
+ size = roundup(size, 8);
+
+ if (likely(trans->mem_top + size <= trans->mem_bytes)) {
+ void *p = trans->mem + trans->mem_top;
+
+ trans->mem_top += size;
+ memset(p, 0, size);
+ return p;
+ } else {
+ return __bch2_trans_kmalloc(trans, size);
+ }
+}
+
+static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
+{
+ size = round_up(size, 8);
+
+ if (likely(trans->mem_top + size <= trans->mem_bytes)) {
+ void *p = trans->mem + trans->mem_top;
+
+ trans->mem_top += size;
+ return p;
+ } else {
+ return __bch2_trans_kmalloc(trans, size);
+ }
+}
+
+static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags, unsigned type)
+{
+ struct bkey_s_c k;
+
+ bch2_trans_iter_init(trans, iter, btree_id, pos, flags);
+ k = bch2_btree_iter_peek_slot(iter);
+
+ if (!bkey_err(k) && type && k.k->type != type)
+ k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch);
+ if (unlikely(bkey_err(k)))
+ bch2_trans_iter_exit(trans, iter);
+ return k;
+}
+
+static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags)
+{
+ return __bch2_bkey_get_iter(trans, iter, btree_id, pos, flags, 0);
+}
+
+#define bch2_bkey_get_iter_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
+ bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
+ _btree_id, _pos, _flags, KEY_TYPE_##_type))
+
+static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags, unsigned type,
+ unsigned val_size, void *val)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type);
+ ret = bkey_err(k);
+ if (!ret) {
+ unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size);
+
+ memcpy(val, k.v, b);
+ if (unlikely(b < sizeof(*val)))
+ memset((void *) val + b, 0, sizeof(*val) - b);
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ return ret;
+}
+
+#define bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, _type, _val)\
+ __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \
+ KEY_TYPE_##_type, sizeof(*_val), _val)
+
+void bch2_trans_srcu_unlock(struct btree_trans *);
+
+u32 bch2_trans_begin(struct btree_trans *);
+
+/*
+ * XXX
+ * this does not handle transaction restarts from bch2_btree_iter_next_node()
+ * correctly
+ */
+#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \
+ _locks_want, _depth, _flags, _b, _ret) \
+ for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
+ _start, _locks_want, _depth, _flags); \
+ (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \
+ !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \
+ (_b) = bch2_btree_iter_next_node(&(_iter)))
+
+#define for_each_btree_node(_trans, _iter, _btree_id, _start, \
+ _flags, _b, _ret) \
+ __for_each_btree_node(_trans, _iter, _btree_id, _start, \
+ 0, 0, _flags, _b, _ret)
+
+static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
+ unsigned flags)
+{
+ return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) :
+ bch2_btree_iter_peek_prev(iter);
+}
+
+static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
+ unsigned flags)
+{
+ return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) :
+ bch2_btree_iter_peek(iter);
+}
+
+static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
+ struct bpos end,
+ unsigned flags)
+{
+ if (!(flags & BTREE_ITER_SLOTS))
+ return bch2_btree_iter_peek_upto(iter, end);
+
+ if (bkey_gt(iter->pos, end))
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek_slot(iter);
+}
+
+int __bch2_btree_trans_too_many_iters(struct btree_trans *);
+
+static inline int btree_trans_too_many_iters(struct btree_trans *trans)
+{
+ if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+ return __bch2_btree_trans_too_many_iters(trans);
+
+ return 0;
+}
+
+/*
+ * goto instead of loop, so that when used inside for_each_btree_key2()
+ * break/continue work correctly
+ */
+#define lockrestart_do(_trans, _do) \
+({ \
+ __label__ transaction_restart; \
+ u32 _restart_count; \
+ int _ret2; \
+transaction_restart: \
+ _restart_count = bch2_trans_begin(_trans); \
+ _ret2 = (_do); \
+ \
+ if (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)) \
+ goto transaction_restart; \
+ \
+ if (!_ret2) \
+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
+ _ret2; \
+})
+
+/*
+ * nested_lockrestart_do(), nested_commit_do():
+ *
+ * These are like lockrestart_do() and commit_do(), with two differences:
+ *
+ * - We don't call bch2_trans_begin() unless we had a transaction restart
+ * - We return -BCH_ERR_transaction_restart_nested if we succeeded after a
+ * transaction restart
+ */
+#define nested_lockrestart_do(_trans, _do) \
+({ \
+ u32 _restart_count, _orig_restart_count; \
+ int _ret2; \
+ \
+ _restart_count = _orig_restart_count = (_trans)->restart_count; \
+ \
+ while (bch2_err_matches(_ret2 = (_do), BCH_ERR_transaction_restart))\
+ _restart_count = bch2_trans_begin(_trans); \
+ \
+ if (!_ret2) \
+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
+ \
+ _ret2 ?: trans_was_restarted(_trans, _restart_count); \
+})
+
+#define for_each_btree_key_upto(_trans, _iter, _btree_id, \
+ _start, _end, _flags, _k, _do) \
+({ \
+ struct btree_iter _iter; \
+ struct bkey_s_c _k; \
+ int _ret3 = 0; \
+ \
+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
+ (_start), (_flags)); \
+ \
+ do { \
+ _ret3 = lockrestart_do(_trans, ({ \
+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), \
+ _end, (_flags)); \
+ if (!(_k).k) \
+ break; \
+ \
+ bkey_err(_k) ?: (_do); \
+ })); \
+ } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \
+ \
+ bch2_trans_iter_exit((_trans), &(_iter)); \
+ _ret3; \
+})
+
+#define for_each_btree_key(_trans, _iter, _btree_id, \
+ _start, _flags, _k, _do) \
+ for_each_btree_key_upto(_trans, _iter, _btree_id, _start, \
+ SPOS_MAX, _flags, _k, _do)
+
+#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
+ _start, _flags, _k, _do) \
+({ \
+ struct btree_iter _iter; \
+ struct bkey_s_c _k; \
+ int _ret3 = 0; \
+ \
+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
+ (_start), (_flags)); \
+ \
+ do { \
+ _ret3 = lockrestart_do(_trans, ({ \
+ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), \
+ (_flags)); \
+ if (!(_k).k) \
+ break; \
+ \
+ bkey_err(_k) ?: (_do); \
+ })); \
+ } while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \
+ \
+ bch2_trans_iter_exit((_trans), &(_iter)); \
+ _ret3; \
+})
+
+#define for_each_btree_key_commit(_trans, _iter, _btree_id, \
+ _start, _iter_flags, _k, \
+ _disk_res, _journal_seq, _commit_flags,\
+ _do) \
+ for_each_btree_key(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_commit_flags)))
+
+#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \
+ _start, _iter_flags, _k, \
+ _disk_res, _journal_seq, _commit_flags,\
+ _do) \
+ for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_commit_flags)))
+
+#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \
+ _start, _end, _iter_flags, _k, \
+ _disk_res, _journal_seq, _commit_flags,\
+ _do) \
+ for_each_btree_key_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\
+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_commit_flags)))
+
+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
+
+static inline struct bkey_s_c
+__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
+ struct btree_iter *iter, unsigned flags)
+{
+ struct bkey_s_c k;
+
+ while (btree_trans_too_many_iters(trans) ||
+ (k = bch2_btree_iter_peek_type(iter, flags),
+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
+ bch2_trans_begin(trans);
+
+ return k;
+}
+
+#define for_each_btree_key_old(_trans, _iter, _btree_id, \
+ _start, _flags, _k, _ret) \
+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
+ (_start), (_flags)); \
+ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
+ !((_ret) = bkey_err(_k)) && (_k).k; \
+ bch2_btree_iter_advance(&(_iter)))
+
+#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
+ _start, _end, _flags, _k, _ret) \
+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
+ (_start), (_flags)); \
+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
+ !((_ret) = bkey_err(_k)) && (_k).k; \
+ bch2_btree_iter_advance(&(_iter)))
+
+#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\
+ for (; \
+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \
+ !((_ret) = bkey_err(_k)) && (_k).k; \
+ bch2_btree_iter_advance(&(_iter)))
+
+#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
+ _start, _flags, _k, _ret) \
+ for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\
+ SPOS_MAX, _flags, _k, _ret)
+
+#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
+ for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
+
+#define drop_locks_do(_trans, _do) \
+({ \
+ bch2_trans_unlock(_trans); \
+ _do ?: bch2_trans_relock(_trans); \
+})
+
+#define allocate_dropping_locks_errcode(_trans, _do) \
+({ \
+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
+ int _ret = _do; \
+ \
+ if (bch2_err_matches(_ret, ENOMEM)) { \
+ _gfp = GFP_KERNEL; \
+ _ret = drop_locks_do(trans, _do); \
+ } \
+ _ret; \
+})
+
+#define allocate_dropping_locks(_trans, _ret, _do) \
+({ \
+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
+ typeof(_do) _p = _do; \
+ \
+ _ret = 0; \
+ if (unlikely(!_p)) { \
+ _gfp = GFP_KERNEL; \
+ _ret = drop_locks_do(trans, ((_p = _do), 0)); \
+ } \
+ _p; \
+})
+
+void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
+void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
+void bch2_dump_trans_updates(struct btree_trans *);
+void bch2_dump_trans_paths_updates(struct btree_trans *);
+
+struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
+void bch2_trans_put(struct btree_trans *);
+
+extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
+unsigned bch2_trans_get_fn_idx(const char *);
+
+#define bch2_trans_get(_c) \
+({ \
+ static unsigned trans_fn_idx; \
+ \
+ if (unlikely(!trans_fn_idx)) \
+ trans_fn_idx = bch2_trans_get_fn_idx(__func__); \
+ __bch2_trans_get(_c, trans_fn_idx); \
+})
+
+void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);
+
+void bch2_fs_btree_iter_exit(struct bch_fs *);
+void bch2_fs_btree_iter_init_early(struct bch_fs *);
+int bch2_fs_btree_iter_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_ITER_H */
diff --git a/c_src/libbcachefs/btree_journal_iter.c b/c_src/libbcachefs/btree_journal_iter.c
new file mode 100644
index 00000000..719a94a8
--- /dev/null
+++ b/c_src/libbcachefs/btree_journal_iter.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bset.h"
+#include "btree_journal_iter.h"
+#include "journal_io.h"
+
+#include <linux/sort.h>
+
+/*
+ * For managing keys we read from the journal: until journal replay works normal
+ * btree lookups need to be able to find and return keys from the journal where
+ * they overwrite what's in the btree, so we have a special iterator and
+ * operations for the regular btree iter code to use:
+ */
+
+static int __journal_key_cmp(enum btree_id l_btree_id,
+ unsigned l_level,
+ struct bpos l_pos,
+ const struct journal_key *r)
+{
+ return (cmp_int(l_btree_id, r->btree_id) ?:
+ cmp_int(l_level, r->level) ?:
+ bpos_cmp(l_pos, r->k->k.p));
+}
+
+static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
+{
+ return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
+}
+
+static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx)
+{
+ size_t gap_size = keys->size - keys->nr;
+
+ if (idx >= keys->gap)
+ idx += gap_size;
+ return idx;
+}
+
+static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t idx)
+{
+ return keys->d + idx_to_pos(keys, idx);
+}
+
+static size_t __bch2_journal_key_search(struct journal_keys *keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
+{
+ size_t l = 0, r = keys->nr, m;
+
+ while (l < r) {
+ m = l + ((r - l) >> 1);
+ if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0)
+ l = m + 1;
+ else
+ r = m;
+ }
+
+ BUG_ON(l < keys->nr &&
+ __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0);
+
+ BUG_ON(l &&
+ __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0);
+
+ return l;
+}
+
+static size_t bch2_journal_key_search(struct journal_keys *keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
+{
+ return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
+}
+
+/* Returns first non-overwritten key >= search key: */
+struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
+ unsigned level, struct bpos pos,
+ struct bpos end_pos, size_t *idx)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ unsigned iters = 0;
+ struct journal_key *k;
+
+ BUG_ON(*idx > keys->nr);
+search:
+ if (!*idx)
+ *idx = __bch2_journal_key_search(keys, btree_id, level, pos);
+
+ while (*idx &&
+ __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
+ --(*idx);
+ iters++;
+ if (iters == 10) {
+ *idx = 0;
+ goto search;
+ }
+ }
+
+ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
+ if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
+ return NULL;
+
+ if (k->overwritten) {
+ (*idx)++;
+ continue;
+ }
+
+ if (__journal_key_cmp(btree_id, level, pos, k) <= 0)
+ return k->k;
+
+ (*idx)++;
+ iters++;
+ if (iters == 10) {
+ *idx = 0;
+ goto search;
+ }
+ }
+
+ return NULL;
+}
+
+struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
+ unsigned level, struct bpos pos)
+{
+ size_t idx = 0;
+
+ return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
+}
+
+static void journal_iters_fix(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ /* The key we just inserted is immediately before the gap: */
+ size_t gap_end = keys->gap + (keys->size - keys->nr);
+ struct btree_and_journal_iter *iter;
+
+ /*
+ * If an iterator points one after the key we just inserted, decrement
+ * the iterator so it points at the key we just inserted - if the
+ * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
+ * handle that:
+ */
+ list_for_each_entry(iter, &c->journal_iters, journal.list)
+ if (iter->journal.idx == gap_end)
+ iter->journal.idx = keys->gap - 1;
+}
+
+static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ struct journal_iter *iter;
+ size_t gap_size = keys->size - keys->nr;
+
+ list_for_each_entry(iter, &c->journal_iters, list) {
+ if (iter->idx > old_gap)
+ iter->idx -= gap_size;
+ if (iter->idx >= new_gap)
+ iter->idx += gap_size;
+ }
+}
+
+int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
+ unsigned level, struct bkey_i *k)
+{
+ struct journal_key n = {
+ .btree_id = id,
+ .level = level,
+ .k = k,
+ .allocated = true,
+ /*
+ * Ensure these keys are done last by journal replay, to unblock
+ * journal reclaim:
+ */
+ .journal_seq = U32_MAX,
+ };
+ struct journal_keys *keys = &c->journal_keys;
+ size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
+
+ BUG_ON(test_bit(BCH_FS_rw, &c->flags));
+
+ if (idx < keys->size &&
+ journal_key_cmp(&n, &keys->d[idx]) == 0) {
+ if (keys->d[idx].allocated)
+ kfree(keys->d[idx].k);
+ keys->d[idx] = n;
+ return 0;
+ }
+
+ if (idx > keys->gap)
+ idx -= keys->size - keys->nr;
+
+ if (keys->nr == keys->size) {
+ struct journal_keys new_keys = {
+ .nr = keys->nr,
+ .size = max_t(size_t, keys->size, 8) * 2,
+ };
+
+ new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL);
+ if (!new_keys.d) {
+ bch_err(c, "%s: error allocating new key array (size %zu)",
+ __func__, new_keys.size);
+ return -BCH_ERR_ENOMEM_journal_key_insert;
+ }
+
+ /* Since @keys was full, there was no gap: */
+ memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
+ kvfree(keys->d);
+ keys->d = new_keys.d;
+ keys->nr = new_keys.nr;
+ keys->size = new_keys.size;
+
+ /* And now the gap is at the end: */
+ keys->gap = keys->nr;
+ }
+
+ journal_iters_move_gap(c, keys->gap, idx);
+
+ move_gap(keys->d, keys->nr, keys->size, keys->gap, idx);
+ keys->gap = idx;
+
+ keys->nr++;
+ keys->d[keys->gap++] = n;
+
+ journal_iters_fix(c);
+
+ return 0;
+}
+
+/*
+ * Can only be used from the recovery thread while we're still RO - can't be
+ * used once we've got RW, as journal_keys is at that point used by multiple
+ * threads:
+ */
+int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
+ unsigned level, struct bkey_i *k)
+{
+ struct bkey_i *n;
+ int ret;
+
+ n = kmalloc(bkey_bytes(&k->k), GFP_KERNEL);
+ if (!n)
+ return -BCH_ERR_ENOMEM_journal_key_insert;
+
+ bkey_copy(n, k);
+ ret = bch2_journal_key_insert_take(c, id, level, n);
+ if (ret)
+ kfree(n);
+ return ret;
+}
+
+int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
+ unsigned level, struct bpos pos)
+{
+ struct bkey_i whiteout;
+
+ bkey_init(&whiteout.k);
+ whiteout.k.p = pos;
+
+ return bch2_journal_key_insert(c, id, level, &whiteout);
+}
+
+void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
+ unsigned level, struct bpos pos)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ size_t idx = bch2_journal_key_search(keys, btree, level, pos);
+
+ if (idx < keys->size &&
+ keys->d[idx].btree_id == btree &&
+ keys->d[idx].level == level &&
+ bpos_eq(keys->d[idx].k->k.p, pos))
+ keys->d[idx].overwritten = true;
+}
+
+static void bch2_journal_iter_advance(struct journal_iter *iter)
+{
+ if (iter->idx < iter->keys->size) {
+ iter->idx++;
+ if (iter->idx == iter->keys->gap)
+ iter->idx += iter->keys->size - iter->keys->nr;
+ }
+}
+
+static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
+{
+ struct journal_key *k = iter->keys->d + iter->idx;
+
+ while (k < iter->keys->d + iter->keys->size &&
+ k->btree_id == iter->btree_id &&
+ k->level == iter->level) {
+ if (!k->overwritten)
+ return bkey_i_to_s_c(k->k);
+
+ bch2_journal_iter_advance(iter);
+ k = iter->keys->d + iter->idx;
+ }
+
+ return bkey_s_c_null;
+}
+
+static void bch2_journal_iter_exit(struct journal_iter *iter)
+{
+ list_del(&iter->list);
+}
+
+static void bch2_journal_iter_init(struct bch_fs *c,
+ struct journal_iter *iter,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
+{
+ iter->btree_id = id;
+ iter->level = level;
+ iter->keys = &c->journal_keys;
+ iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+}
+
+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
+{
+ return bch2_btree_node_iter_peek_unpack(&iter->node_iter,
+ iter->b, &iter->unpacked);
+}
+
+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter)
+{
+ bch2_btree_node_iter_advance(&iter->node_iter, iter->b);
+}
+
+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
+{
+ if (bpos_eq(iter->pos, SPOS_MAX))
+ iter->at_end = true;
+ else
+ iter->pos = bpos_successor(iter->pos);
+}
+
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
+{
+ struct bkey_s_c btree_k, journal_k, ret;
+again:
+ if (iter->at_end)
+ return bkey_s_c_null;
+
+ while ((btree_k = bch2_journal_iter_peek_btree(iter)).k &&
+ bpos_lt(btree_k.k->p, iter->pos))
+ bch2_journal_iter_advance_btree(iter);
+
+ while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
+ bpos_lt(journal_k.k->p, iter->pos))
+ bch2_journal_iter_advance(&iter->journal);
+
+ ret = journal_k.k &&
+ (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
+ ? journal_k
+ : btree_k;
+
+ if (ret.k && iter->b && bpos_gt(ret.k->p, iter->b->data->max_key))
+ ret = bkey_s_c_null;
+
+ if (ret.k) {
+ iter->pos = ret.k->p;
+ if (bkey_deleted(ret.k)) {
+ bch2_btree_and_journal_iter_advance(iter);
+ goto again;
+ }
+ } else {
+ iter->pos = SPOS_MAX;
+ iter->at_end = true;
+ }
+
+ return ret;
+}
+
+void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
+{
+ bch2_journal_iter_exit(&iter->journal);
+}
+
+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
+ struct bch_fs *c,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct bpos pos)
+{
+ memset(iter, 0, sizeof(*iter));
+
+ iter->b = b;
+ iter->node_iter = node_iter;
+ bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
+ INIT_LIST_HEAD(&iter->journal.list);
+ iter->pos = b->data->min_key;
+ iter->at_end = false;
+}
+
+/*
+ * this version is used by btree_gc before filesystem has gone RW and
+ * multithreaded, so uses the journal_iters list:
+ */
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
+ struct bch_fs *c,
+ struct btree *b)
+{
+ struct btree_node_iter node_iter;
+
+ bch2_btree_node_iter_init_from_start(&node_iter, b);
+ __bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
+ list_add(&iter->journal.list, &c->journal_iters);
+}
+
+/* sort and dedup all keys in the journal: */
+
+void bch2_journal_entries_free(struct bch_fs *c)
+{
+ struct journal_replay **i;
+ struct genradix_iter iter;
+
+ genradix_for_each(&c->journal_entries, iter, i)
+ if (*i)
+ kvpfree(*i, offsetof(struct journal_replay, j) +
+ vstruct_bytes(&(*i)->j));
+ genradix_free(&c->journal_entries);
+}
+
+/*
+ * When keys compare equal, oldest compares first:
+ */
+static int journal_sort_key_cmp(const void *_l, const void *_r)
+{
+ const struct journal_key *l = _l;
+ const struct journal_key *r = _r;
+
+ return journal_key_cmp(l, r) ?:
+ cmp_int(l->journal_seq, r->journal_seq) ?:
+ cmp_int(l->journal_offset, r->journal_offset);
+}
+
+void bch2_journal_keys_put(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ struct journal_key *i;
+
+ BUG_ON(atomic_read(&keys->ref) <= 0);
+
+ if (!atomic_dec_and_test(&keys->ref))
+ return;
+
+ move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
+ keys->gap = keys->nr;
+
+ for (i = keys->d; i < keys->d + keys->nr; i++)
+ if (i->allocated)
+ kfree(i->k);
+
+ kvfree(keys->d);
+ keys->d = NULL;
+ keys->nr = keys->gap = keys->size = 0;
+
+ bch2_journal_entries_free(c);
+}
+
+static void __journal_keys_sort(struct journal_keys *keys)
+{
+ struct journal_key *src, *dst;
+
+ sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL);
+
+ src = dst = keys->d;
+ while (src < keys->d + keys->nr) {
+ while (src + 1 < keys->d + keys->nr &&
+ !journal_key_cmp(src, src + 1))
+ src++;
+
+ *dst++ = *src++;
+ }
+
+ keys->nr = dst - keys->d;
+}
+
+int bch2_journal_keys_sort(struct bch_fs *c)
+{
+ struct genradix_iter iter;
+ struct journal_replay *i, **_i;
+ struct jset_entry *entry;
+ struct bkey_i *k;
+ struct journal_keys *keys = &c->journal_keys;
+ size_t nr_keys = 0, nr_read = 0;
+
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ for_each_jset_key(k, entry, &i->j)
+ nr_keys++;
+ }
+
+ if (!nr_keys)
+ return 0;
+
+ keys->size = roundup_pow_of_two(nr_keys);
+
+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
+ if (!keys->d) {
+ bch_err(c, "Failed to allocate buffer for sorted journal keys (%zu keys); trying slowpath",
+ nr_keys);
+
+ do {
+ keys->size >>= 1;
+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
+ } while (!keys->d && keys->size > nr_keys / 8);
+
+ if (!keys->d) {
+ bch_err(c, "Failed to allocate %zu size buffer for sorted journal keys; exiting",
+ keys->size);
+ return -BCH_ERR_ENOMEM_journal_keys_sort;
+ }
+ }
+
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ cond_resched();
+
+ for_each_jset_key(k, entry, &i->j) {
+ if (keys->nr == keys->size) {
+ __journal_keys_sort(keys);
+
+ if (keys->nr > keys->size * 7 / 8) {
+ bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu/%zu",
+ keys->nr, keys->size, nr_read, nr_keys);
+ return -BCH_ERR_ENOMEM_journal_keys_sort;
+ }
+ }
+
+ keys->d[keys->nr++] = (struct journal_key) {
+ .btree_id = entry->btree_id,
+ .level = entry->level,
+ .k = k,
+ .journal_seq = le64_to_cpu(i->j.seq),
+ .journal_offset = k->_data - i->j._data,
+ };
+
+ nr_read++;
+ }
+ }
+
+ __journal_keys_sort(keys);
+ keys->gap = keys->nr;
+
+ bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_keys, keys->nr);
+ return 0;
+}
diff --git a/c_src/libbcachefs/btree_journal_iter.h b/c_src/libbcachefs/btree_journal_iter.h
new file mode 100644
index 00000000..8ca4c100
--- /dev/null
+++ b/c_src/libbcachefs/btree_journal_iter.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_H
+#define _BCACHEFS_BTREE_JOURNAL_ITER_H
+
+struct journal_iter {
+ struct list_head list;
+ enum btree_id btree_id;
+ unsigned level;
+ size_t idx;
+ struct journal_keys *keys;
+};
+
+/*
+ * Iterate over keys in the btree, with keys from the journal overlaid on top:
+ */
+
+struct btree_and_journal_iter {
+ struct btree *b;
+ struct btree_node_iter node_iter;
+ struct bkey unpacked;
+
+ struct journal_iter journal;
+ struct bpos pos;
+ bool at_end;
+};
+
+struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
+ unsigned, struct bpos, struct bpos, size_t *);
+struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
+ unsigned, struct bpos);
+
+int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
+ unsigned, struct bkey_i *);
+int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
+ unsigned, struct bkey_i *);
+int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
+ unsigned, struct bpos);
+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
+ unsigned, struct bpos);
+
+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
+
+void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
+ struct bch_fs *, struct btree *,
+ struct btree_node_iter, struct bpos);
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
+ struct bch_fs *,
+ struct btree *);
+
+void bch2_journal_keys_put(struct bch_fs *);
+
+static inline void bch2_journal_keys_put_initial(struct bch_fs *c)
+{
+ if (c->journal_keys.initial_ref_held)
+ bch2_journal_keys_put(c);
+ c->journal_keys.initial_ref_held = false;
+}
+
+void bch2_journal_entries_free(struct bch_fs *);
+
+int bch2_journal_keys_sort(struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
diff --git a/c_src/libbcachefs/btree_key_cache.c b/c_src/libbcachefs/btree_key_cache.c
new file mode 100644
index 00000000..74e52fd2
--- /dev/null
+++ b/c_src/libbcachefs/btree_key_cache.c
@@ -0,0 +1,1067 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "errcode.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+#include "trace.h"
+
+#include <linux/sched/mm.h>
+
+static inline bool btree_uses_pcpu_readers(enum btree_id id)
+{
+ return id == BTREE_ID_subvolumes;
+}
+
+static struct kmem_cache *bch2_key_cache;
+
+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct bkey_cached *ck = obj;
+ const struct bkey_cached_key *key = arg->key;
+
+ return ck->key.btree_id != key->btree_id ||
+ !bpos_eq(ck->key.pos, key->pos);
+}
+
+static const struct rhashtable_params bch2_btree_key_cache_params = {
+ .head_offset = offsetof(struct bkey_cached, hash),
+ .key_offset = offsetof(struct bkey_cached, key),
+ .key_len = sizeof(struct bkey_cached_key),
+ .obj_cmpfn = bch2_btree_key_cache_cmp_fn,
+};
+
+__flatten
+inline struct bkey_cached *
+bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
+{
+ struct bkey_cached_key key = {
+ .btree_id = btree_id,
+ .pos = pos,
+ };
+
+ return rhashtable_lookup_fast(&c->btree_key_cache.table, &key,
+ bch2_btree_key_cache_params);
+}
+
+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck)
+{
+ if (!six_trylock_intent(&ck->c.lock))
+ return false;
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ six_unlock_intent(&ck->c.lock);
+ return false;
+ }
+
+ if (!six_trylock_write(&ck->c.lock)) {
+ six_unlock_intent(&ck->c.lock);
+ return false;
+ }
+
+ return true;
+}
+
+static void bkey_cached_evict(struct btree_key_cache *c,
+ struct bkey_cached *ck)
+{
+ BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
+ bch2_btree_key_cache_params));
+ memset(&ck->key, ~0, sizeof(ck->key));
+
+ atomic_long_dec(&c->nr_keys);
+}
+
+static void bkey_cached_free(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+
+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
+
+ ck->btree_trans_barrier_seq =
+ start_poll_synchronize_srcu(&c->btree_trans_barrier);
+
+ if (ck->c.lock.readers) {
+ list_move_tail(&ck->list, &bc->freed_pcpu);
+ bc->nr_freed_pcpu++;
+ } else {
+ list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
+ }
+ atomic_long_inc(&bc->nr_freed);
+
+ kfree(ck->k);
+ ck->k = NULL;
+ ck->u64s = 0;
+
+ six_unlock_write(&ck->c.lock);
+ six_unlock_intent(&ck->c.lock);
+}
+
+#ifdef __KERNEL__
+static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ struct bkey_cached *pos;
+
+ bc->nr_freed_nonpcpu++;
+
+ list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
+ if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
+ pos->btree_trans_barrier_seq)) {
+ list_move(&ck->list, &pos->list);
+ return;
+ }
+ }
+
+ list_move(&ck->list, &bc->freed_nonpcpu);
+}
+#endif
+
+static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
+
+ if (!ck->c.lock.readers) {
+#ifdef __KERNEL__
+ struct btree_key_cache_freelist *f;
+ bool freed = false;
+
+ preempt_disable();
+ f = this_cpu_ptr(bc->pcpu_freed);
+
+ if (f->nr < ARRAY_SIZE(f->objs)) {
+ f->objs[f->nr++] = ck;
+ freed = true;
+ }
+ preempt_enable();
+
+ if (!freed) {
+ mutex_lock(&bc->lock);
+ preempt_disable();
+ f = this_cpu_ptr(bc->pcpu_freed);
+
+ while (f->nr > ARRAY_SIZE(f->objs) / 2) {
+ struct bkey_cached *ck2 = f->objs[--f->nr];
+
+ __bkey_cached_move_to_freelist_ordered(bc, ck2);
+ }
+ preempt_enable();
+
+ __bkey_cached_move_to_freelist_ordered(bc, ck);
+ mutex_unlock(&bc->lock);
+ }
+#else
+ mutex_lock(&bc->lock);
+ list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
+ mutex_unlock(&bc->lock);
+#endif
+ } else {
+ mutex_lock(&bc->lock);
+ list_move_tail(&ck->list, &bc->freed_pcpu);
+ mutex_unlock(&bc->lock);
+ }
+}
+
+static void bkey_cached_free_fast(struct btree_key_cache *bc,
+ struct bkey_cached *ck)
+{
+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+
+ ck->btree_trans_barrier_seq =
+ start_poll_synchronize_srcu(&c->btree_trans_barrier);
+
+ list_del_init(&ck->list);
+ atomic_long_inc(&bc->nr_freed);
+
+ kfree(ck->k);
+ ck->k = NULL;
+ ck->u64s = 0;
+
+ bkey_cached_move_to_freelist(bc, ck);
+
+ six_unlock_write(&ck->c.lock);
+ six_unlock_intent(&ck->c.lock);
+}
+
+static struct bkey_cached *
+bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
+ bool *was_new)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_key_cache *bc = &c->btree_key_cache;
+ struct bkey_cached *ck = NULL;
+ bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
+ int ret;
+
+ if (!pcpu_readers) {
+#ifdef __KERNEL__
+ struct btree_key_cache_freelist *f;
+
+ preempt_disable();
+ f = this_cpu_ptr(bc->pcpu_freed);
+ if (f->nr)
+ ck = f->objs[--f->nr];
+ preempt_enable();
+
+ if (!ck) {
+ mutex_lock(&bc->lock);
+ preempt_disable();
+ f = this_cpu_ptr(bc->pcpu_freed);
+
+ while (!list_empty(&bc->freed_nonpcpu) &&
+ f->nr < ARRAY_SIZE(f->objs) / 2) {
+ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
+ list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
+ f->objs[f->nr++] = ck;
+ }
+
+ ck = f->nr ? f->objs[--f->nr] : NULL;
+ preempt_enable();
+ mutex_unlock(&bc->lock);
+ }
+#else
+ mutex_lock(&bc->lock);
+ if (!list_empty(&bc->freed_nonpcpu)) {
+ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
+ list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
+ }
+ mutex_unlock(&bc->lock);
+#endif
+ } else {
+ mutex_lock(&bc->lock);
+ if (!list_empty(&bc->freed_pcpu)) {
+ ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
+ list_del_init(&ck->list);
+ }
+ mutex_unlock(&bc->lock);
+ }
+
+ if (ck) {
+ ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_);
+ if (unlikely(ret)) {
+ bkey_cached_move_to_freelist(bc, ck);
+ return ERR_PTR(ret);
+ }
+
+ path->l[0].b = (void *) ck;
+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
+
+ ret = bch2_btree_node_lock_write(trans, path, &ck->c);
+ if (unlikely(ret)) {
+ btree_node_unlock(trans, path, 0);
+ bkey_cached_move_to_freelist(bc, ck);
+ return ERR_PTR(ret);
+ }
+
+ return ck;
+ }
+
+ ck = allocate_dropping_locks(trans, ret,
+ kmem_cache_zalloc(bch2_key_cache, _gfp));
+ if (ret) {
+ kmem_cache_free(bch2_key_cache, ck);
+ return ERR_PTR(ret);
+ }
+
+ if (!ck)
+ return NULL;
+
+ INIT_LIST_HEAD(&ck->list);
+ bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
+
+ ck->c.cached = true;
+ BUG_ON(!six_trylock_intent(&ck->c.lock));
+ BUG_ON(!six_trylock_write(&ck->c.lock));
+ *was_new = true;
+ return ck;
+}
+
+static struct bkey_cached *
+bkey_cached_reuse(struct btree_key_cache *c)
+{
+ struct bucket_table *tbl;
+ struct rhash_head *pos;
+ struct bkey_cached *ck;
+ unsigned i;
+
+ mutex_lock(&c->lock);
+ rcu_read_lock();
+ tbl = rht_dereference_rcu(c->table.tbl, &c->table);
+ for (i = 0; i < tbl->size; i++)
+ rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+ bkey_cached_lock_for_evict(ck)) {
+ bkey_cached_evict(c, ck);
+ goto out;
+ }
+ }
+ ck = NULL;
+out:
+ rcu_read_unlock();
+ mutex_unlock(&c->lock);
+ return ck;
+}
+
+static struct bkey_cached *
+btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_key_cache *bc = &c->btree_key_cache;
+ struct bkey_cached *ck;
+ bool was_new = false;
+
+ ck = bkey_cached_alloc(trans, path, &was_new);
+ if (IS_ERR(ck))
+ return ck;
+
+ if (unlikely(!ck)) {
+ ck = bkey_cached_reuse(bc);
+ if (unlikely(!ck)) {
+ bch_err(c, "error allocating memory for key cache item, btree %s",
+ bch2_btree_id_str(path->btree_id));
+ return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create);
+ }
+
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
+ }
+
+ ck->c.level = 0;
+ ck->c.btree_id = path->btree_id;
+ ck->key.btree_id = path->btree_id;
+ ck->key.pos = path->pos;
+ ck->valid = false;
+ ck->flags = 1U << BKEY_CACHED_ACCESSED;
+
+ if (unlikely(rhashtable_lookup_insert_fast(&bc->table,
+ &ck->hash,
+ bch2_btree_key_cache_params))) {
+ /* We raced with another fill: */
+
+ if (likely(was_new)) {
+ six_unlock_write(&ck->c.lock);
+ six_unlock_intent(&ck->c.lock);
+ kfree(ck);
+ } else {
+ bkey_cached_free_fast(bc, ck);
+ }
+
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
+ return NULL;
+ }
+
+ atomic_long_inc(&bc->nr_keys);
+
+ six_unlock_write(&ck->c.lock);
+
+ return ck;
+}
+
+static int btree_key_cache_fill(struct btree_trans *trans,
+ struct btree_path *ck_path,
+ struct bkey_cached *ck)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ unsigned new_u64s = 0;
+ struct bkey_i *new_k = NULL;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, ck->key.btree_id, ck->key.pos,
+ BTREE_ITER_KEY_CACHE_FILL|
+ BTREE_ITER_CACHED_NOFILL);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!bch2_btree_node_relock(trans, ck_path, 0)) {
+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
+ goto err;
+ }
+
+ /*
+ * bch2_varint_decode can read past the end of the buffer by at
+ * most 7 bytes (it won't be used):
+ */
+ new_u64s = k.k->u64s + 1;
+
+ /*
+ * Allocate some extra space so that the transaction commit path is less
+ * likely to have to reallocate, since that requires a transaction
+ * restart:
+ */
+ new_u64s = min(256U, (new_u64s * 3) / 2);
+
+ if (new_u64s > ck->u64s) {
+ new_u64s = roundup_pow_of_two(new_u64s);
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
+ if (!new_k) {
+ bch2_trans_unlock(trans);
+
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+ if (!new_k) {
+ bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
+ bch2_btree_id_str(ck->key.btree_id), new_u64s);
+ ret = -BCH_ERR_ENOMEM_btree_key_cache_fill;
+ goto err;
+ }
+
+ if (!bch2_btree_node_relock(trans, ck_path, 0)) {
+ kfree(new_k);
+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
+ goto err;
+ }
+
+ ret = bch2_trans_relock(trans);
+ if (ret) {
+ kfree(new_k);
+ goto err;
+ }
+ }
+ }
+
+ ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c);
+ if (ret) {
+ kfree(new_k);
+ goto err;
+ }
+
+ if (new_k) {
+ kfree(ck->k);
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ }
+
+ bkey_reassemble(ck->k, k);
+ ck->valid = true;
+ bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
+
+ /* We're not likely to need this iterator again: */
+ set_btree_iter_dontneed(&iter);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static noinline int
+bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck;
+ int ret = 0;
+
+ BUG_ON(path->level);
+
+ path->l[1].b = NULL;
+
+ if (bch2_btree_node_relock_notrace(trans, path, 0)) {
+ ck = (void *) path->l[0].b;
+ goto fill;
+ }
+retry:
+ ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
+ if (!ck) {
+ ck = btree_key_cache_create(trans, path);
+ ret = PTR_ERR_OR_ZERO(ck);
+ if (ret)
+ goto err;
+ if (!ck)
+ goto retry;
+
+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED);
+ path->locks_want = 1;
+ } else {
+ enum six_lock_type lock_want = __btree_lock_want(path, 0);
+
+ ret = btree_node_lock(trans, path, (void *) ck, 0,
+ lock_want, _THIS_IP_);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto err;
+
+ BUG_ON(ret);
+
+ if (ck->key.btree_id != path->btree_id ||
+ !bpos_eq(ck->key.pos, path->pos)) {
+ six_unlock_type(&ck->c.lock, lock_want);
+ goto retry;
+ }
+
+ mark_btree_node_locked(trans, path, 0,
+ (enum btree_node_locked_type) lock_want);
+ }
+
+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
+ path->l[0].b = (void *) ck;
+fill:
+ path->uptodate = BTREE_ITER_UPTODATE;
+
+ if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
+ /*
+ * Using the underscore version because we haven't set
+ * path->uptodate yet:
+ */
+ if (!path->locks_want &&
+ !__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
+ trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
+ goto err;
+ }
+
+ ret = btree_key_cache_fill(trans, path, ck);
+ if (ret)
+ goto err;
+
+ ret = bch2_btree_path_relock(trans, path, _THIS_IP_);
+ if (ret)
+ goto err;
+
+ path->uptodate = BTREE_ITER_UPTODATE;
+ }
+
+ if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+ set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
+
+ BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
+ BUG_ON(path->uptodate);
+
+ return ret;
+err:
+ path->uptodate = BTREE_ITER_NEED_TRAVERSE;
+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ btree_node_unlock(trans, path, 0);
+ path->l[0].b = ERR_PTR(ret);
+ }
+ return ret;
+}
+
+int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck;
+ int ret = 0;
+
+ EBUG_ON(path->level);
+
+ path->l[1].b = NULL;
+
+ if (bch2_btree_node_relock_notrace(trans, path, 0)) {
+ ck = (void *) path->l[0].b;
+ goto fill;
+ }
+retry:
+ ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
+ if (!ck) {
+ return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
+ } else {
+ enum six_lock_type lock_want = __btree_lock_want(path, 0);
+
+ ret = btree_node_lock(trans, path, (void *) ck, 0,
+ lock_want, _THIS_IP_);
+ EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart));
+
+ if (ret)
+ return ret;
+
+ if (ck->key.btree_id != path->btree_id ||
+ !bpos_eq(ck->key.pos, path->pos)) {
+ six_unlock_type(&ck->c.lock, lock_want);
+ goto retry;
+ }
+
+ mark_btree_node_locked(trans, path, 0,
+ (enum btree_node_locked_type) lock_want);
+ }
+
+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock);
+ path->l[0].b = (void *) ck;
+fill:
+ if (!ck->valid)
+ return bch2_btree_path_traverse_cached_slowpath(trans, path, flags);
+
+ if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+ set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
+
+ path->uptodate = BTREE_ITER_UPTODATE;
+ EBUG_ON(!ck->valid);
+ EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
+
+ return ret;
+}
+
+static int btree_key_cache_flush_pos(struct btree_trans *trans,
+ struct bkey_cached_key key,
+ u64 journal_seq,
+ unsigned commit_flags,
+ bool evict)
+{
+ struct bch_fs *c = trans->c;
+ struct journal *j = &c->journal;
+ struct btree_iter c_iter, b_iter;
+ struct bkey_cached *ck = NULL;
+ int ret;
+
+ bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos,
+ BTREE_ITER_SLOTS|
+ BTREE_ITER_INTENT|
+ BTREE_ITER_ALL_SNAPSHOTS);
+ bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
+
+ ret = bch2_btree_iter_traverse(&c_iter);
+ if (ret)
+ goto out;
+
+ ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b;
+ if (!ck)
+ goto out;
+
+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ if (evict)
+ goto evict;
+ goto out;
+ }
+
+ BUG_ON(!ck->valid);
+
+ if (journal_seq && ck->journal.seq != journal_seq)
+ goto out;
+
+ trans->journal_res.seq = ck->journal.seq;
+
+ /*
+ * If we're at the end of the journal, we really want to free up space
+ * in the journal right away - we don't want to pin that old journal
+ * sequence number with a new btree node write, we want to re-journal
+ * the update
+ */
+ if (ck->journal.seq == journal_last_seq(j))
+ commit_flags |= BCH_WATERMARK_reclaim;
+
+ if (ck->journal.seq != journal_last_seq(j) ||
+ j->watermark == BCH_WATERMARK_stripe)
+ commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
+
+ ret = bch2_btree_iter_traverse(&b_iter) ?:
+ bch2_trans_update(trans, &b_iter, ck->k,
+ BTREE_UPDATE_KEY_CACHE_RECLAIM|
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ BTREE_TRIGGER_NORUN) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc|
+ commit_flags);
+
+ bch2_fs_fatal_err_on(ret &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+ !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
+ !bch2_journal_error(j), c,
+ "error flushing key cache: %s", bch2_err_str(ret));
+ if (ret)
+ goto out;
+
+ bch2_journal_pin_drop(j, &ck->journal);
+
+ struct btree_path *path = btree_iter_path(trans, &c_iter);
+ BUG_ON(!btree_node_locked(path, 0));
+
+ if (!evict) {
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
+ }
+ } else {
+ struct btree_path *path2;
+ unsigned i;
+evict:
+ trans_for_each_path(trans, path2, i)
+ if (path2 != path)
+ __bch2_btree_path_unlock(trans, path2);
+
+ bch2_btree_node_lock_write_nofail(trans, path, &ck->c);
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
+ }
+
+ mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
+ bkey_cached_evict(&c->btree_key_cache, ck);
+ bkey_cached_free_fast(&c->btree_key_cache, ck);
+ }
+out:
+ bch2_trans_iter_exit(trans, &b_iter);
+ bch2_trans_iter_exit(trans, &c_iter);
+ return ret;
+}
+
+int bch2_btree_key_cache_journal_flush(struct journal *j,
+ struct journal_entry_pin *pin, u64 seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct bkey_cached *ck =
+ container_of(pin, struct bkey_cached, journal);
+ struct bkey_cached_key key;
+ struct btree_trans *trans = bch2_trans_get(c);
+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ int ret = 0;
+
+ btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read);
+ key = ck->key;
+
+ if (ck->journal.seq != seq ||
+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ six_unlock_read(&ck->c.lock);
+ goto unlock;
+ }
+
+ if (ck->seq != seq) {
+ bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal,
+ bch2_btree_key_cache_journal_flush);
+ six_unlock_read(&ck->c.lock);
+ goto unlock;
+ }
+ six_unlock_read(&ck->c.lock);
+
+ ret = lockrestart_do(trans,
+ btree_key_cache_flush_pos(trans, key, seq,
+ BCH_TRANS_COMMIT_journal_reclaim, false));
+unlock:
+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+bool bch2_btree_insert_key_cached(struct btree_trans *trans,
+ unsigned flags,
+ struct btree_insert_entry *insert_entry)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck = (void *) (trans->paths + insert_entry->path)->l[0].b;
+ struct bkey_i *insert = insert_entry->k;
+ bool kick_reclaim = false;
+
+ BUG_ON(insert->k.u64s > ck->u64s);
+
+ bkey_copy(ck->k, insert);
+ ck->valid = true;
+
+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
+ set_bit(BKEY_CACHED_DIRTY, &ck->flags);
+ atomic_long_inc(&c->btree_key_cache.nr_dirty);
+
+ if (bch2_nr_btree_keys_need_flush(c))
+ kick_reclaim = true;
+ }
+
+ /*
+ * To minimize lock contention, we only add the journal pin here and
+ * defer pin updates to the flush callback via ->seq. Be careful not to
+ * update ->seq on nojournal commits because we don't want to update the
+ * pin to a seq that doesn't include journal updates on disk. Otherwise
+ * we risk losing the update after a crash.
+ *
+ * The only exception is if the pin is not active in the first place. We
+ * have to add the pin because journal reclaim drives key cache
+ * flushing. The flush callback will not proceed unless ->seq matches
+ * the latest pin, so make sure it starts with a consistent value.
+ */
+ if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) ||
+ !journal_pin_active(&ck->journal)) {
+ ck->seq = trans->journal_res.seq;
+ }
+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
+ &ck->journal, bch2_btree_key_cache_journal_flush);
+
+ if (kick_reclaim)
+ journal_reclaim_kick(&c->journal);
+ return true;
+}
+
+void bch2_btree_key_cache_drop(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck = (void *) path->l[0].b;
+
+ BUG_ON(!ck->valid);
+
+ /*
+ * We just did an update to the btree, bypassing the key cache: the key
+ * cache key is now stale and must be dropped, even if dirty:
+ */
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
+ bch2_journal_pin_drop(&c->journal, &ck->journal);
+ }
+
+ ck->valid = false;
+}
+
+static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct bch_fs *c = shrink->private_data;
+ struct btree_key_cache *bc = &c->btree_key_cache;
+ struct bucket_table *tbl;
+ struct bkey_cached *ck, *t;
+ size_t scanned = 0, freed = 0, nr = sc->nr_to_scan;
+ unsigned start, flags;
+ int srcu_idx;
+
+ mutex_lock(&bc->lock);
+ srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ flags = memalloc_nofs_save();
+
+ /*
+ * Newest freed entries are at the end of the list - once we hit one
+ * that's too new to be freed, we can bail out:
+ */
+ scanned += bc->nr_freed_nonpcpu;
+
+ list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
+ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
+ ck->btree_trans_barrier_seq))
+ break;
+
+ list_del(&ck->list);
+ six_lock_exit(&ck->c.lock);
+ kmem_cache_free(bch2_key_cache, ck);
+ atomic_long_dec(&bc->nr_freed);
+ freed++;
+ bc->nr_freed_nonpcpu--;
+ }
+
+ if (scanned >= nr)
+ goto out;
+
+ scanned += bc->nr_freed_pcpu;
+
+ list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
+ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
+ ck->btree_trans_barrier_seq))
+ break;
+
+ list_del(&ck->list);
+ six_lock_exit(&ck->c.lock);
+ kmem_cache_free(bch2_key_cache, ck);
+ atomic_long_dec(&bc->nr_freed);
+ freed++;
+ bc->nr_freed_pcpu--;
+ }
+
+ if (scanned >= nr)
+ goto out;
+
+ rcu_read_lock();
+ tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
+ if (bc->shrink_iter >= tbl->size)
+ bc->shrink_iter = 0;
+ start = bc->shrink_iter;
+
+ do {
+ struct rhash_head *pos, *next;
+
+ pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
+
+ while (!rht_is_a_nulls(pos)) {
+ next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
+ ck = container_of(pos, struct bkey_cached, hash);
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
+ goto next;
+
+ if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+ clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
+ else if (bkey_cached_lock_for_evict(ck)) {
+ bkey_cached_evict(bc, ck);
+ bkey_cached_free(bc, ck);
+ }
+
+ scanned++;
+ if (scanned >= nr)
+ break;
+next:
+ pos = next;
+ }
+
+ bc->shrink_iter++;
+ if (bc->shrink_iter >= tbl->size)
+ bc->shrink_iter = 0;
+ } while (scanned < nr && bc->shrink_iter != start);
+
+ rcu_read_unlock();
+out:
+ memalloc_nofs_restore(flags);
+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+ mutex_unlock(&bc->lock);
+
+ return freed;
+}
+
+static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct bch_fs *c = shrink->private_data;
+ struct btree_key_cache *bc = &c->btree_key_cache;
+ long nr = atomic_long_read(&bc->nr_keys) -
+ atomic_long_read(&bc->nr_dirty);
+
+ return max(0L, nr);
+}
+
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
+{
+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+ struct bucket_table *tbl;
+ struct bkey_cached *ck, *n;
+ struct rhash_head *pos;
+ LIST_HEAD(items);
+ unsigned i;
+#ifdef __KERNEL__
+ int cpu;
+#endif
+
+ shrinker_free(bc->shrink);
+
+ mutex_lock(&bc->lock);
+
+ /*
+ * The loop is needed to guard against racing with rehash:
+ */
+ while (atomic_long_read(&bc->nr_keys)) {
+ rcu_read_lock();
+ tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
+ if (tbl)
+ for (i = 0; i < tbl->size; i++)
+ rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
+ bkey_cached_evict(bc, ck);
+ list_add(&ck->list, &items);
+ }
+ rcu_read_unlock();
+ }
+
+#ifdef __KERNEL__
+ for_each_possible_cpu(cpu) {
+ struct btree_key_cache_freelist *f =
+ per_cpu_ptr(bc->pcpu_freed, cpu);
+
+ for (i = 0; i < f->nr; i++) {
+ ck = f->objs[i];
+ list_add(&ck->list, &items);
+ }
+ }
+#endif
+
+ BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+ BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
+ list_splice(&bc->freed_pcpu, &items);
+ list_splice(&bc->freed_nonpcpu, &items);
+
+ mutex_unlock(&bc->lock);
+
+ list_for_each_entry_safe(ck, n, &items, list) {
+ cond_resched();
+
+ list_del(&ck->list);
+ kfree(ck->k);
+ six_lock_exit(&ck->c.lock);
+ kmem_cache_free(bch2_key_cache, ck);
+ }
+
+ if (atomic_long_read(&bc->nr_dirty) &&
+ !bch2_journal_error(&c->journal) &&
+ test_bit(BCH_FS_was_rw, &c->flags))
+ panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n",
+ atomic_long_read(&bc->nr_dirty));
+
+ if (atomic_long_read(&bc->nr_keys))
+ panic("btree key cache shutdown error: nr_keys nonzero (%li)\n",
+ atomic_long_read(&bc->nr_keys));
+
+ if (bc->table_init_done)
+ rhashtable_destroy(&bc->table);
+
+ free_percpu(bc->pcpu_freed);
+}
+
+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
+{
+ mutex_init(&c->lock);
+ INIT_LIST_HEAD(&c->freed_pcpu);
+ INIT_LIST_HEAD(&c->freed_nonpcpu);
+}
+
+int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
+{
+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+ struct shrinker *shrink;
+
+#ifdef __KERNEL__
+ bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist);
+ if (!bc->pcpu_freed)
+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+#endif
+
+ if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+
+ bc->table_init_done = true;
+
+ shrink = shrinker_alloc(0, "%s-btree_key_cache", c->name);
+ if (!shrink)
+ return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+ bc->shrink = shrink;
+ shrink->seeks = 0;
+ shrink->count_objects = bch2_btree_key_cache_count;
+ shrink->scan_objects = bch2_btree_key_cache_scan;
+ shrink->private_data = c;
+ shrinker_register(shrink);
+ return 0;
+}
+
+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
+{
+ prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed));
+ prt_newline(out);
+ prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys));
+ prt_newline(out);
+ prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty));
+ prt_newline(out);
+}
+
+void bch2_btree_key_cache_exit(void)
+{
+ kmem_cache_destroy(bch2_key_cache);
+}
+
+int __init bch2_btree_key_cache_init(void)
+{
+ bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT);
+ if (!bch2_key_cache)
+ return -ENOMEM;
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/btree_key_cache.h b/c_src/libbcachefs/btree_key_cache.h
new file mode 100644
index 00000000..e6b2cd0d
--- /dev/null
+++ b/c_src/libbcachefs/btree_key_cache.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
+#define _BCACHEFS_BTREE_KEY_CACHE_H
+
+static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
+{
+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
+ size_t max_dirty = 1024 + nr_keys / 2;
+
+ return max_t(ssize_t, 0, nr_dirty - max_dirty);
+}
+
+static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
+{
+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
+ size_t max_dirty = 4096 + (nr_keys * 3) / 4;
+
+ return nr_dirty > max_dirty;
+}
+
+int bch2_btree_key_cache_journal_flush(struct journal *,
+ struct journal_entry_pin *, u64);
+
+struct bkey_cached *
+bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
+
+int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *,
+ unsigned);
+
+bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned,
+ struct btree_insert_entry *);
+void bch2_btree_key_cache_drop(struct btree_trans *,
+ struct btree_path *);
+
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *);
+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *);
+int bch2_fs_btree_key_cache_init(struct btree_key_cache *);
+
+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *);
+
+void bch2_btree_key_cache_exit(void);
+int __init bch2_btree_key_cache_init(void);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */
diff --git a/c_src/libbcachefs/btree_key_cache_types.h b/c_src/libbcachefs/btree_key_cache_types.h
new file mode 100644
index 00000000..290e4e57
--- /dev/null
+++ b/c_src/libbcachefs/btree_key_cache_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+ struct bkey_cached *objs[16];
+ unsigned nr;
+};
+
+struct btree_key_cache {
+ struct mutex lock;
+ struct rhashtable table;
+ bool table_init_done;
+
+ struct list_head freed_pcpu;
+ size_t nr_freed_pcpu;
+ struct list_head freed_nonpcpu;
+ size_t nr_freed_nonpcpu;
+
+ struct shrinker *shrink;
+ unsigned shrink_iter;
+ struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+ atomic_long_t nr_freed;
+ atomic_long_t nr_keys;
+ atomic_long_t nr_dirty;
+};
+
+struct bkey_cached_key {
+ u32 btree_id;
+ struct bpos pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
diff --git a/c_src/libbcachefs/btree_locking.c b/c_src/libbcachefs/btree_locking.c
new file mode 100644
index 00000000..2d1c95c4
--- /dev/null
+++ b/c_src/libbcachefs/btree_locking.c
@@ -0,0 +1,868 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_locking.h"
+#include "btree_types.h"
+
+static struct lock_class_key bch2_btree_node_lock_key;
+
+void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
+ enum six_lock_init_flags flags)
+{
+ __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
+ lockdep_set_novalidate_class(&b->lock);
+}
+
+#ifdef CONFIG_LOCKDEP
+void bch2_assert_btree_nodes_not_locked(void)
+{
+#if 0
+ //Re-enable when lock_class_is_held() is merged:
+ BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+#endif
+}
+#endif
+
+/* Btree node locking: */
+
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
+ struct btree_path *skip,
+ struct btree_bkey_cached_common *b,
+ unsigned level)
+{
+ struct btree_path *path;
+ struct six_lock_count ret;
+ unsigned i;
+
+ memset(&ret, 0, sizeof(ret));
+
+ if (IS_ERR_OR_NULL(b))
+ return ret;
+
+ trans_for_each_path(trans, path, i)
+ if (path != skip && &path->l[level].b->c == b) {
+ int t = btree_node_locked_type(path, level);
+
+ if (t != BTREE_NODE_UNLOCKED)
+ ret.n[t]++;
+ }
+
+ return ret;
+}
+
+/* unlock */
+
+void bch2_btree_node_unlock_write(struct btree_trans *trans,
+ struct btree_path *path, struct btree *b)
+{
+ bch2_btree_node_unlock_write_inlined(trans, path, b);
+}
+
+/* lock */
+
+/*
+ * @trans wants to lock @b with type @type
+ */
+struct trans_waiting_for_lock {
+ struct btree_trans *trans;
+ struct btree_bkey_cached_common *node_want;
+ enum six_lock_type lock_want;
+
+ /* for iterating over held locks :*/
+ u8 path_idx;
+ u8 level;
+ u64 lock_start_time;
+};
+
+struct lock_graph {
+ struct trans_waiting_for_lock g[8];
+ unsigned nr;
+};
+
+static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
+{
+ struct trans_waiting_for_lock *i;
+
+ prt_printf(out, "Found lock cycle (%u entries):", g->nr);
+ prt_newline(out);
+
+ for (i = g->g; i < g->g + g->nr; i++) {
+ struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
+ if (!task)
+ continue;
+
+ bch2_btree_trans_to_text(out, i->trans);
+ bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1);
+ }
+}
+
+static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
+{
+ struct trans_waiting_for_lock *i;
+
+ for (i = g->g; i != g->g + g->nr; i++) {
+ struct task_struct *task = i->trans->locking_wait.task;
+ if (i != g->g)
+ prt_str(out, "<- ");
+ prt_printf(out, "%u ", task ?task->pid : 0);
+ }
+ prt_newline(out);
+}
+
+static void lock_graph_up(struct lock_graph *g)
+{
+ closure_put(&g->g[--g->nr].trans->ref);
+}
+
+static noinline void lock_graph_pop_all(struct lock_graph *g)
+{
+ while (g->nr)
+ lock_graph_up(g);
+}
+
+static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
+{
+ g->g[g->nr++] = (struct trans_waiting_for_lock) {
+ .trans = trans,
+ .node_want = trans->locking,
+ .lock_want = trans->locking_wait.lock_want,
+ };
+}
+
+static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
+{
+ closure_get(&trans->ref);
+ __lock_graph_down(g, trans);
+}
+
+static bool lock_graph_remove_non_waiters(struct lock_graph *g)
+{
+ struct trans_waiting_for_lock *i;
+
+ for (i = g->g + 1; i < g->g + g->nr; i++)
+ if (i->trans->locking != i->node_want ||
+ i->trans->locking_wait.start_time != i[-1].lock_start_time) {
+ while (g->g + g->nr > i)
+ lock_graph_up(g);
+ return true;
+ }
+
+ return false;
+}
+
+static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+
+ count_event(c, trans_restart_would_deadlock);
+
+ if (trace_trans_restart_would_deadlock_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ buf.atomic++;
+ print_cycle(&buf, g);
+
+ trace_trans_restart_would_deadlock(trans, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
+{
+ if (i == g->g) {
+ trace_would_deadlock(g, i->trans);
+ return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
+ } else {
+ i->trans->lock_must_abort = true;
+ wake_up_process(i->trans->locking_wait.task);
+ return 0;
+ }
+}
+
+static int btree_trans_abort_preference(struct btree_trans *trans)
+{
+ if (trans->lock_may_not_fail)
+ return 0;
+ if (trans->locking_wait.lock_want == SIX_LOCK_write)
+ return 1;
+ if (!trans->in_traverse_all)
+ return 2;
+ return 3;
+}
+
+static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
+{
+ struct trans_waiting_for_lock *i, *abort = NULL;
+ unsigned best = 0, pref;
+ int ret;
+
+ if (lock_graph_remove_non_waiters(g))
+ return 0;
+
+ /* Only checking, for debugfs: */
+ if (cycle) {
+ print_cycle(cycle, g);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = g->g; i < g->g + g->nr; i++) {
+ pref = btree_trans_abort_preference(i->trans);
+ if (pref > best) {
+ abort = i;
+ best = pref;
+ }
+ }
+
+ if (unlikely(!best)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
+
+ for (i = g->g; i < g->g + g->nr; i++) {
+ struct btree_trans *trans = i->trans;
+
+ bch2_btree_trans_to_text(&buf, trans);
+
+ prt_printf(&buf, "backtrace:");
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2);
+ printbuf_indent_sub(&buf, 2);
+ prt_newline(&buf);
+ }
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ BUG();
+ }
+
+ ret = abort_lock(g, abort);
+out:
+ if (ret)
+ while (g->nr)
+ lock_graph_up(g);
+ return ret;
+}
+
+static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
+ struct printbuf *cycle)
+{
+ struct btree_trans *orig_trans = g->g->trans;
+ struct trans_waiting_for_lock *i;
+
+ for (i = g->g; i < g->g + g->nr; i++)
+ if (i->trans == trans) {
+ closure_put(&trans->ref);
+ return break_cycle(g, cycle);
+ }
+
+ if (g->nr == ARRAY_SIZE(g->g)) {
+ closure_put(&trans->ref);
+
+ if (orig_trans->lock_may_not_fail)
+ return 0;
+
+ while (g->nr)
+ lock_graph_up(g);
+
+ if (cycle)
+ return 0;
+
+ trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
+ return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
+ }
+
+ __lock_graph_down(g, trans);
+ return 0;
+}
+
+static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
+{
+ return t1 + t2 > 1;
+}
+
+int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
+{
+ struct lock_graph g;
+ struct trans_waiting_for_lock *top;
+ struct btree_bkey_cached_common *b;
+ btree_path_idx_t path_idx;
+ int ret = 0;
+
+ g.nr = 0;
+
+ if (trans->lock_must_abort) {
+ if (cycle)
+ return -1;
+
+ trace_would_deadlock(&g, trans);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
+ }
+
+ lock_graph_down(&g, trans);
+
+ /* trans->paths is rcu protected vs. freeing */
+ rcu_read_lock();
+ if (cycle)
+ cycle->atomic++;
+next:
+ if (!g.nr)
+ goto out;
+
+ top = &g.g[g.nr - 1];
+
+ struct btree_path *paths = rcu_dereference(top->trans->paths);
+ if (!paths)
+ goto up;
+
+ unsigned long *paths_allocated = trans_paths_allocated(paths);
+
+ trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths),
+ path_idx, top->path_idx) {
+ struct btree_path *path = paths + path_idx;
+ if (!path->nodes_locked)
+ continue;
+
+ if (path_idx != top->path_idx) {
+ top->path_idx = path_idx;
+ top->level = 0;
+ top->lock_start_time = 0;
+ }
+
+ for (;
+ top->level < BTREE_MAX_DEPTH;
+ top->level++, top->lock_start_time = 0) {
+ int lock_held = btree_node_locked_type(path, top->level);
+
+ if (lock_held == BTREE_NODE_UNLOCKED)
+ continue;
+
+ b = &READ_ONCE(path->l[top->level].b)->c;
+
+ if (IS_ERR_OR_NULL(b)) {
+ /*
+ * If we get here, it means we raced with the
+ * other thread updating its btree_path
+ * structures - which means it can't be blocked
+ * waiting on a lock:
+ */
+ if (!lock_graph_remove_non_waiters(&g)) {
+ /*
+ * If lock_graph_remove_non_waiters()
+ * didn't do anything, it must be
+ * because we're being called by debugfs
+ * checking for lock cycles, which
+ * invokes us on btree_transactions that
+ * aren't actually waiting on anything.
+ * Just bail out:
+ */
+ lock_graph_pop_all(&g);
+ }
+
+ goto next;
+ }
+
+ if (list_empty_careful(&b->lock.wait_list))
+ continue;
+
+ raw_spin_lock(&b->lock.wait_lock);
+ list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) {
+ BUG_ON(b != trans->locking);
+
+ if (top->lock_start_time &&
+ time_after_eq64(top->lock_start_time, trans->locking_wait.start_time))
+ continue;
+
+ top->lock_start_time = trans->locking_wait.start_time;
+
+ /* Don't check for self deadlock: */
+ if (trans == top->trans ||
+ !lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
+ continue;
+
+ closure_get(&trans->ref);
+ raw_spin_unlock(&b->lock.wait_lock);
+
+ ret = lock_graph_descend(&g, trans, cycle);
+ if (ret)
+ goto out;
+ goto next;
+
+ }
+ raw_spin_unlock(&b->lock.wait_lock);
+ }
+ }
+up:
+ if (g.nr > 1 && cycle)
+ print_chain(cycle, &g);
+ lock_graph_up(&g);
+ goto next;
+out:
+ if (cycle)
+ --cycle->atomic;
+ rcu_read_unlock();
+ return ret;
+}
+
+int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
+{
+ struct btree_trans *trans = p;
+
+ return bch2_check_for_deadlock(trans, NULL);
+}
+
+int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
+ struct btree_bkey_cached_common *b,
+ bool lock_may_not_fail)
+{
+ int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
+ int ret;
+
+ /*
+ * Must drop our read locks before calling six_lock_write() -
+ * six_unlock() won't do wakeups until the reader count
+ * goes to 0, and it's safe because we have the node intent
+ * locked:
+ */
+ six_lock_readers_add(&b->lock, -readers);
+ ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write,
+ lock_may_not_fail, _RET_IP_);
+ six_lock_readers_add(&b->lock, readers);
+
+ if (ret)
+ mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_INTENT_LOCKED);
+
+ return ret;
+}
+
+void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_bkey_cached_common *b)
+{
+ struct btree_path *linked;
+ unsigned i, iter;
+ int ret;
+
+ /*
+ * XXX BIG FAT NOTICE
+ *
+ * Drop all read locks before taking a write lock:
+ *
+ * This is a hack, because bch2_btree_node_lock_write_nofail() is a
+ * hack - but by dropping read locks first, this should never fail, and
+ * we only use this in code paths where whatever read locks we've
+ * already taken are no longer needed:
+ */
+
+ trans_for_each_path(trans, linked, iter) {
+ if (!linked->nodes_locked)
+ continue;
+
+ for (i = 0; i < BTREE_MAX_DEPTH; i++)
+ if (btree_node_read_locked(linked, i)) {
+ btree_node_unlock(trans, linked, i);
+ btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
+ }
+ }
+
+ ret = __btree_node_lock_write(trans, path, b, true);
+ BUG_ON(ret);
+}
+
+/* relock */
+
+static inline bool btree_path_get_locks(struct btree_trans *trans,
+ struct btree_path *path,
+ bool upgrade,
+ struct get_locks_fail *f)
+{
+ unsigned l = path->level;
+ int fail_idx = -1;
+
+ do {
+ if (!btree_path_node(path, l))
+ break;
+
+ if (!(upgrade
+ ? bch2_btree_node_upgrade(trans, path, l)
+ : bch2_btree_node_relock(trans, path, l))) {
+ fail_idx = l;
+
+ if (f) {
+ f->l = l;
+ f->b = path->l[l].b;
+ }
+ }
+
+ l++;
+ } while (l < path->locks_want);
+
+ /*
+ * When we fail to get a lock, we have to ensure that any child nodes
+ * can't be relocked so bch2_btree_path_traverse has to walk back up to
+ * the node that we failed to relock:
+ */
+ if (fail_idx >= 0) {
+ __bch2_btree_path_unlock(trans, path);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+
+ do {
+ path->l[fail_idx].b = upgrade
+ ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
+ : ERR_PTR(-BCH_ERR_no_btree_node_relock);
+ --fail_idx;
+ } while (fail_idx >= 0);
+ }
+
+ if (path->uptodate == BTREE_ITER_NEED_RELOCK)
+ path->uptodate = BTREE_ITER_UPTODATE;
+
+ bch2_trans_verify_locks(trans);
+
+ return path->uptodate < BTREE_ITER_NEED_RELOCK;
+}
+
+bool __bch2_btree_node_relock(struct btree_trans *trans,
+ struct btree_path *path, unsigned level,
+ bool trace)
+{
+ struct btree *b = btree_path_node(path, level);
+ int want = __btree_lock_want(path, level);
+
+ if (race_fault())
+ goto fail;
+
+ if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
+ (btree_node_lock_seq_matches(path, b, level) &&
+ btree_node_lock_increment(trans, &b->c, level, want))) {
+ mark_btree_node_locked(trans, path, level, want);
+ return true;
+ }
+fail:
+ if (trace && !trans->notrace_relock_fail)
+ trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
+ return false;
+}
+
+/* upgrade */
+
+bool bch2_btree_node_upgrade(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+ struct btree *b = path->l[level].b;
+ struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
+
+ if (!is_btree_node(path, level))
+ return false;
+
+ switch (btree_lock_want(path, level)) {
+ case BTREE_NODE_UNLOCKED:
+ BUG_ON(btree_node_locked(path, level));
+ return true;
+ case BTREE_NODE_READ_LOCKED:
+ BUG_ON(btree_node_intent_locked(path, level));
+ return bch2_btree_node_relock(trans, path, level);
+ case BTREE_NODE_INTENT_LOCKED:
+ break;
+ case BTREE_NODE_WRITE_LOCKED:
+ BUG();
+ }
+
+ if (btree_node_intent_locked(path, level))
+ return true;
+
+ if (race_fault())
+ return false;
+
+ if (btree_node_locked(path, level)) {
+ bool ret;
+
+ six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
+ ret = six_lock_tryupgrade(&b->c.lock);
+ six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);
+
+ if (ret)
+ goto success;
+ } else {
+ if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
+ goto success;
+ }
+
+ /*
+ * Do we already have an intent lock via another path? If so, just bump
+ * lock count:
+ */
+ if (btree_node_lock_seq_matches(path, b, level) &&
+ btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
+ btree_node_unlock(trans, path, level);
+ goto success;
+ }
+
+ trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level);
+ return false;
+success:
+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
+ return true;
+}
+
+/* Btree path locking: */
+
+/*
+ * Only for btree_cache.c - only relocks intent locks
+ */
+int bch2_btree_path_relock_intent(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ unsigned l;
+
+ for (l = path->level;
+ l < path->locks_want && btree_path_node(path, l);
+ l++) {
+ if (!bch2_btree_node_relock(trans, path, l)) {
+ __bch2_btree_path_unlock(trans, path);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
+ }
+ }
+
+ return 0;
+}
+
+__flatten
+bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
+ struct btree_path *path, unsigned long trace_ip)
+{
+ struct get_locks_fail f;
+
+ return btree_path_get_locks(trans, path, false, &f);
+}
+
+int __bch2_btree_path_relock(struct btree_trans *trans,
+ struct btree_path *path, unsigned long trace_ip)
+{
+ if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
+ trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
+ }
+
+ return 0;
+}
+
+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want,
+ struct get_locks_fail *f)
+{
+ EBUG_ON(path->locks_want >= new_locks_want);
+
+ path->locks_want = new_locks_want;
+
+ return btree_path_get_locks(trans, path, true, f);
+}
+
+bool __bch2_btree_path_upgrade(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want,
+ struct get_locks_fail *f)
+{
+ if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
+ return true;
+
+ /*
+ * XXX: this is ugly - we'd prefer to not be mucking with other
+ * iterators in the btree_trans here.
+ *
+ * On failure to upgrade the iterator, setting iter->locks_want and
+ * calling get_locks() is sufficient to make bch2_btree_path_traverse()
+ * get the locks we want on transaction restart.
+ *
+ * But if this iterator was a clone, on transaction restart what we did
+ * to this iterator isn't going to be preserved.
+ *
+ * Possibly we could add an iterator field for the parent iterator when
+ * an iterator is a copy - for now, we'll just upgrade any other
+ * iterators with the same btree id.
+ *
+ * The code below used to be needed to ensure ancestor nodes get locked
+ * before interior nodes - now that's handled by
+ * bch2_btree_path_traverse_all().
+ */
+ if (!path->cached && !trans->in_traverse_all) {
+ struct btree_path *linked;
+ unsigned i;
+
+ trans_for_each_path(trans, linked, i)
+ if (linked != path &&
+ linked->cached == path->cached &&
+ linked->btree_id == path->btree_id &&
+ linked->locks_want < new_locks_want) {
+ linked->locks_want = new_locks_want;
+ btree_path_get_locks(trans, linked, true, NULL);
+ }
+ }
+
+ return false;
+}
+
+void __bch2_btree_path_downgrade(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want)
+{
+ unsigned l, old_locks_want = path->locks_want;
+
+ if (trans->restarted)
+ return;
+
+ EBUG_ON(path->locks_want < new_locks_want);
+
+ path->locks_want = new_locks_want;
+
+ while (path->nodes_locked &&
+ (l = btree_path_highest_level_locked(path)) >= path->locks_want) {
+ if (l > path->level) {
+ btree_node_unlock(trans, path, l);
+ } else {
+ if (btree_node_intent_locked(path, l)) {
+ six_lock_downgrade(&path->l[l].b->c.lock);
+ mark_btree_node_locked_noreset(path, l, BTREE_NODE_READ_LOCKED);
+ }
+ break;
+ }
+ }
+
+ bch2_btree_path_verify_locks(path);
+
+ trace_path_downgrade(trans, _RET_IP_, path, old_locks_want);
+}
+
+/* Btree transaction locking: */
+
+void bch2_trans_downgrade(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ if (trans->restarted)
+ return;
+
+ trans_for_each_path(trans, path, i)
+ bch2_btree_path_downgrade(trans, path);
+}
+
+int bch2_trans_relock(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ if (unlikely(trans->restarted))
+ return -((int) trans->restarted);
+
+ trans_for_each_path(trans, path, i)
+ if (path->should_be_locked &&
+ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
+ trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
+ }
+ return 0;
+}
+
+int bch2_trans_relock_notrace(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ if (unlikely(trans->restarted))
+ return -((int) trans->restarted);
+
+ trans_for_each_path(trans, path, i)
+ if (path->should_be_locked &&
+ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
+ }
+ return 0;
+}
+
+void bch2_trans_unlock_noassert(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ __bch2_btree_path_unlock(trans, path);
+}
+
+void bch2_trans_unlock(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ __bch2_btree_path_unlock(trans, path);
+}
+
+void bch2_trans_unlock_long(struct btree_trans *trans)
+{
+ bch2_trans_unlock(trans);
+ bch2_trans_srcu_unlock(trans);
+}
+
+bool bch2_trans_locked(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ if (path->nodes_locked)
+ return true;
+ return false;
+}
+
+int __bch2_trans_mutex_lock(struct btree_trans *trans,
+ struct mutex *lock)
+{
+ int ret = drop_locks_do(trans, (mutex_lock(lock), 0));
+
+ if (ret)
+ mutex_unlock(lock);
+ return ret;
+}
+
+/* Debug */
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void bch2_btree_path_verify_locks(struct btree_path *path)
+{
+ unsigned l;
+
+ if (!path->nodes_locked) {
+ BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
+ btree_path_node(path, path->level));
+ return;
+ }
+
+ for (l = 0; l < BTREE_MAX_DEPTH; l++) {
+ int want = btree_lock_want(path, l);
+ int have = btree_node_locked_type(path, l);
+
+ BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED);
+
+ BUG_ON(is_btree_node(path, l) &&
+ (want == BTREE_NODE_UNLOCKED ||
+ have != BTREE_NODE_WRITE_LOCKED) &&
+ want != have);
+ }
+}
+
+void bch2_trans_verify_locks(struct btree_trans *trans)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ bch2_btree_path_verify_locks(path);
+}
+
+#endif
diff --git a/c_src/libbcachefs/btree_locking.h b/c_src/libbcachefs/btree_locking.h
new file mode 100644
index 00000000..cc5500a9
--- /dev/null
+++ b/c_src/libbcachefs/btree_locking.h
@@ -0,0 +1,431 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_LOCKING_H
+#define _BCACHEFS_BTREE_LOCKING_H
+
+/*
+ * Only for internal btree use:
+ *
+ * The btree iterator tracks what locks it wants to take, and what locks it
+ * currently has - here we have wrappers for locking/unlocking btree nodes and
+ * updating the iterator state
+ */
+
+#include "btree_iter.h"
+#include "six.h"
+
+void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
+
+#ifdef CONFIG_LOCKDEP
+void bch2_assert_btree_nodes_not_locked(void);
+#else
+static inline void bch2_assert_btree_nodes_not_locked(void) {}
+#endif
+
+void bch2_trans_unlock_noassert(struct btree_trans *);
+
+static inline bool is_btree_node(struct btree_path *path, unsigned l)
+{
+ return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b);
+}
+
+static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
+{
+ return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
+ ? &trans->c->btree_transaction_stats[trans->fn_idx]
+ : NULL;
+}
+
+/* matches six lock types */
+enum btree_node_locked_type {
+ BTREE_NODE_UNLOCKED = -1,
+ BTREE_NODE_READ_LOCKED = SIX_LOCK_read,
+ BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent,
+ BTREE_NODE_WRITE_LOCKED = SIX_LOCK_write,
+};
+
+static inline int btree_node_locked_type(struct btree_path *path,
+ unsigned level)
+{
+ return BTREE_NODE_UNLOCKED + ((path->nodes_locked >> (level << 1)) & 3);
+}
+
+static inline bool btree_node_write_locked(struct btree_path *path, unsigned l)
+{
+ return btree_node_locked_type(path, l) == BTREE_NODE_WRITE_LOCKED;
+}
+
+static inline bool btree_node_intent_locked(struct btree_path *path, unsigned l)
+{
+ return btree_node_locked_type(path, l) == BTREE_NODE_INTENT_LOCKED;
+}
+
+static inline bool btree_node_read_locked(struct btree_path *path, unsigned l)
+{
+ return btree_node_locked_type(path, l) == BTREE_NODE_READ_LOCKED;
+}
+
+static inline bool btree_node_locked(struct btree_path *path, unsigned level)
+{
+ return btree_node_locked_type(path, level) != BTREE_NODE_UNLOCKED;
+}
+
+static inline void mark_btree_node_locked_noreset(struct btree_path *path,
+ unsigned level,
+ enum btree_node_locked_type type)
+{
+ /* relying on this to avoid a branch */
+ BUILD_BUG_ON(SIX_LOCK_read != 0);
+ BUILD_BUG_ON(SIX_LOCK_intent != 1);
+
+ path->nodes_locked &= ~(3U << (level << 1));
+ path->nodes_locked |= (type + 1) << (level << 1);
+}
+
+static inline void mark_btree_node_unlocked(struct btree_path *path,
+ unsigned level)
+{
+ EBUG_ON(btree_node_write_locked(path, level));
+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED);
+}
+
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned level,
+ enum btree_node_locked_type type)
+{
+ mark_btree_node_locked_noreset(path, level, (enum btree_node_locked_type) type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ path->l[level].lock_taken_time = local_clock();
+#endif
+}
+
+static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
+{
+ return level < path->locks_want
+ ? SIX_LOCK_intent
+ : SIX_LOCK_read;
+}
+
+static inline enum btree_node_locked_type
+btree_lock_want(struct btree_path *path, int level)
+{
+ if (level < path->level)
+ return BTREE_NODE_UNLOCKED;
+ if (level < path->locks_want)
+ return BTREE_NODE_INTENT_LOCKED;
+ if (level == path->level)
+ return BTREE_NODE_READ_LOCKED;
+ return BTREE_NODE_UNLOCKED;
+}
+
+static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ __bch2_time_stats_update(&btree_trans_stats(trans)->lock_hold_times,
+ path->l[level].lock_taken_time,
+ local_clock());
+#endif
+}
+
+/* unlock: */
+
+static inline void btree_node_unlock(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+ int lock_type = btree_node_locked_type(path, level);
+
+ EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+ if (lock_type != BTREE_NODE_UNLOCKED) {
+ six_unlock_type(&path->l[level].b->c.lock, lock_type);
+ btree_trans_lock_hold_time_update(trans, path, level);
+ }
+ mark_btree_node_unlocked(path, level);
+}
+
+static inline int btree_path_lowest_level_locked(struct btree_path *path)
+{
+ return __ffs(path->nodes_locked) >> 1;
+}
+
+static inline int btree_path_highest_level_locked(struct btree_path *path)
+{
+ return __fls(path->nodes_locked) >> 1;
+}
+
+static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
+
+ while (path->nodes_locked)
+ btree_node_unlock(trans, path, btree_path_lowest_level_locked(path));
+}
+
+/*
+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will
+ * succeed:
+ */
+static inline void
+bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path,
+ struct btree *b)
+{
+ struct btree_path *linked;
+ unsigned i;
+
+ EBUG_ON(path->l[b->c.level].b != b);
+ EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock));
+ EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write);
+
+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
+
+ trans_for_each_path_with_node(trans, b, linked, i)
+ linked->l[b->c.level].lock_seq++;
+
+ six_unlock_write(&b->c.lock);
+}
+
+void bch2_btree_node_unlock_write(struct btree_trans *,
+ struct btree_path *, struct btree *);
+
+int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
+
+/* lock: */
+
+static inline int __btree_node_lock_nopath(struct btree_trans *trans,
+ struct btree_bkey_cached_common *b,
+ enum six_lock_type type,
+ bool lock_may_not_fail,
+ unsigned long ip)
+{
+ int ret;
+
+ trans->lock_may_not_fail = lock_may_not_fail;
+ trans->lock_must_abort = false;
+ trans->locking = b;
+
+ ret = six_lock_ip_waiter(&b->lock, type, &trans->locking_wait,
+ bch2_six_check_for_deadlock, trans, ip);
+ WRITE_ONCE(trans->locking, NULL);
+ WRITE_ONCE(trans->locking_wait.start_time, 0);
+ return ret;
+}
+
+static inline int __must_check
+btree_node_lock_nopath(struct btree_trans *trans,
+ struct btree_bkey_cached_common *b,
+ enum six_lock_type type,
+ unsigned long ip)
+{
+ return __btree_node_lock_nopath(trans, b, type, false, ip);
+}
+
+static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
+ struct btree_bkey_cached_common *b,
+ enum six_lock_type type)
+{
+ int ret = __btree_node_lock_nopath(trans, b, type, true, _THIS_IP_);
+
+ BUG_ON(ret);
+}
+
+/*
+ * Lock a btree node if we already have it locked on one of our linked
+ * iterators:
+ */
+static inline bool btree_node_lock_increment(struct btree_trans *trans,
+ struct btree_bkey_cached_common *b,
+ unsigned level,
+ enum btree_node_locked_type want)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path(trans, path, i)
+ if (&path->l[level].b->c == b &&
+ btree_node_locked_type(path, level) >= want) {
+ six_lock_increment(&b->lock, (enum six_lock_type) want);
+ return true;
+ }
+
+ return false;
+}
+
+static inline int btree_node_lock(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_bkey_cached_common *b,
+ unsigned level,
+ enum six_lock_type type,
+ unsigned long ip)
+{
+ int ret = 0;
+
+ EBUG_ON(level >= BTREE_MAX_DEPTH);
+
+ if (likely(six_trylock_type(&b->lock, type)) ||
+ btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) ||
+ !(ret = btree_node_lock_nopath(trans, b, type, btree_path_ip_allocated(path)))) {
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ path->l[b->level].lock_taken_time = local_clock();
+#endif
+ }
+
+ return ret;
+}
+
+int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_path *,
+ struct btree_bkey_cached_common *b, bool);
+
+static inline int __btree_node_lock_write(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_bkey_cached_common *b,
+ bool lock_may_not_fail)
+{
+ EBUG_ON(&path->l[b->level].b->c != b);
+ EBUG_ON(path->l[b->level].lock_seq != six_lock_seq(&b->lock));
+ EBUG_ON(!btree_node_intent_locked(path, b->level));
+
+ /*
+ * six locks are unfair, and read locks block while a thread wants a
+ * write lock: thus, we need to tell the cycle detector we have a write
+ * lock _before_ taking the lock:
+ */
+ mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_WRITE_LOCKED);
+
+ return likely(six_trylock_write(&b->lock))
+ ? 0
+ : __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
+}
+
+static inline int __must_check
+bch2_btree_node_lock_write(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree_bkey_cached_common *b)
+{
+ return __btree_node_lock_write(trans, path, b, false);
+}
+
+void bch2_btree_node_lock_write_nofail(struct btree_trans *,
+ struct btree_path *,
+ struct btree_bkey_cached_common *);
+
+/* relock: */
+
+bool bch2_btree_path_relock_norestart(struct btree_trans *,
+ struct btree_path *, unsigned long);
+int __bch2_btree_path_relock(struct btree_trans *,
+ struct btree_path *, unsigned long);
+
+static inline int bch2_btree_path_relock(struct btree_trans *trans,
+ struct btree_path *path, unsigned long trace_ip)
+{
+ return btree_node_locked(path, path->level)
+ ? 0
+ : __bch2_btree_path_relock(trans, path, trace_ip);
+}
+
+bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned, bool trace);
+
+static inline bool bch2_btree_node_relock(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+ EBUG_ON(btree_node_locked(path, level) &&
+ !btree_node_write_locked(path, level) &&
+ btree_node_locked_type(path, level) != __btree_lock_want(path, level));
+
+ return likely(btree_node_locked(path, level)) ||
+ (!IS_ERR_OR_NULL(path->l[level].b) &&
+ __bch2_btree_node_relock(trans, path, level, true));
+}
+
+static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+ EBUG_ON(btree_node_locked(path, level) &&
+ !btree_node_write_locked(path, level) &&
+ btree_node_locked_type(path, level) != __btree_lock_want(path, level));
+
+ return likely(btree_node_locked(path, level)) ||
+ (!IS_ERR_OR_NULL(path->l[level].b) &&
+ __bch2_btree_node_relock(trans, path, level, false));
+}
+
+/* upgrade */
+
+
+struct get_locks_fail {
+ unsigned l;
+ struct btree *b;
+};
+
+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
+ struct btree_path *, unsigned,
+ struct get_locks_fail *);
+
+bool __bch2_btree_path_upgrade(struct btree_trans *,
+ struct btree_path *, unsigned,
+ struct get_locks_fail *);
+
+static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned new_locks_want)
+{
+ struct get_locks_fail f;
+ unsigned old_locks_want = path->locks_want;
+
+ new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
+
+ if (path->locks_want < new_locks_want
+ ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
+ : path->uptodate == BTREE_ITER_UPTODATE)
+ return 0;
+
+ trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
+ old_locks_want, new_locks_want, &f);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
+}
+
+/* misc: */
+
+static inline void btree_path_set_should_be_locked(struct btree_path *path)
+{
+ EBUG_ON(!btree_node_locked(path, path->level));
+ EBUG_ON(path->uptodate);
+
+ path->should_be_locked = true;
+}
+
+static inline void __btree_path_set_level_up(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned l)
+{
+ btree_node_unlock(trans, path, l);
+ path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
+}
+
+static inline void btree_path_set_level_up(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ __btree_path_set_level_up(trans, path, path->level++);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+}
+
+/* debug */
+
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
+ struct btree_path *,
+ struct btree_bkey_cached_common *b,
+ unsigned);
+
+int bch2_check_for_deadlock(struct btree_trans *, struct printbuf *);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_path_verify_locks(struct btree_path *);
+void bch2_trans_verify_locks(struct btree_trans *);
+#else
+static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
+static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
+#endif
+
+#endif /* _BCACHEFS_BTREE_LOCKING_H */
diff --git a/c_src/libbcachefs/btree_trans_commit.c b/c_src/libbcachefs/btree_trans_commit.c
new file mode 100644
index 00000000..80505554
--- /dev/null
+++ b/c_src/libbcachefs/btree_trans_commit.c
@@ -0,0 +1,1132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_gc.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_journal_iter.h"
+#include "btree_key_cache.h"
+#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "errcode.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "replicas.h"
+#include "snapshot.h"
+
+#include <linux/prefetch.h>
+
+static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bch_fs *c = trans->c;
+ struct bkey u;
+ struct bkey_s_c k = bch2_btree_path_peek_slot_exact(trans->paths + i->path, &u);
+
+ if (unlikely(trans->journal_replay_not_finished)) {
+ struct bkey_i *j_k =
+ bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p);
+
+ if (j_k)
+ k = bkey_i_to_s_c(j_k);
+ }
+
+ u = *k.k;
+ u.needs_whiteout = i->old_k.needs_whiteout;
+
+ BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey)));
+ BUG_ON(i->old_v != k.v);
+#endif
+}
+
+static inline struct btree_path_level *insert_l(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+ return (trans->paths + i->path)->l + i->level;
+}
+
+static inline bool same_leaf_as_prev(struct btree_trans *trans,
+ struct btree_insert_entry *i)
+{
+ return i != trans->updates &&
+ insert_l(trans, &i[0])->b == insert_l(trans, &i[-1])->b;
+}
+
+static inline bool same_leaf_as_next(struct btree_trans *trans,
+ struct btree_insert_entry *i)
+{
+ return i + 1 < trans->updates + trans->nr_updates &&
+ insert_l(trans, &i[0])->b == insert_l(trans, &i[1])->b;
+}
+
+inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+
+ if (unlikely(btree_node_just_written(b)) &&
+ bch2_btree_post_write_cleanup(c, b))
+ bch2_trans_node_reinit_iter(trans, b);
+
+ /*
+ * If the last bset has been written, or if it's gotten too big - start
+ * a new bset to insert into:
+ */
+ if (want_new_bset(c, b))
+ bch2_btree_init_next(trans, b);
+}
+
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+ while (--i >= trans->updates) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ bch2_btree_node_unlock_write(trans, trans->paths + i->path, insert_l(trans, i)->b);
+ }
+
+ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+ EBUG_ON(trans->write_locked);
+
+ trans_for_each_update(trans, i) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ if (bch2_btree_node_lock_write(trans, trans->paths + i->path, &insert_l(trans, i)->b->c))
+ return trans_lock_write_fail(trans, i);
+
+ if (!i->cached)
+ bch2_btree_node_prep_for_write(trans, trans->paths + i->path, insert_l(trans, i)->b);
+ }
+
+ trans->write_locked = true;
+ return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+ if (likely(trans->write_locked)) {
+ trans_for_each_update(trans, i)
+ if (!same_leaf_as_prev(trans, i))
+ bch2_btree_node_unlock_write_inlined(trans,
+ trans->paths + i->path, insert_l(trans, i)->b);
+ trans->write_locked = false;
+ }
+}
+
+/* Inserting into a given leaf node (last stage of insert): */
+
+/* Handle overwrites and do insert, for non extents: */
+bool bch2_btree_bset_insert_key(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_i *insert)
+{
+ struct bkey_packed *k;
+ unsigned clobber_u64s = 0, new_u64s = 0;
+
+ EBUG_ON(btree_node_just_written(b));
+ EBUG_ON(bset_written(b, btree_bset_last(b)));
+ EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
+ EBUG_ON(bpos_lt(insert->k.p, b->data->min_key));
+ EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
+ EBUG_ON(insert->k.u64s >
+ bch_btree_keys_u64s_remaining(trans->c, b));
+ EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
+
+ k = bch2_btree_node_iter_peek_all(node_iter, b);
+ if (k && bkey_cmp_left_packed(b, k, &insert->k.p))
+ k = NULL;
+
+ /* @k is the key being overwritten/deleted, if any: */
+ EBUG_ON(k && bkey_deleted(k));
+
+ /* Deleting, but not found? nothing to do: */
+ if (bkey_deleted(&insert->k) && !k)
+ return false;
+
+ if (bkey_deleted(&insert->k)) {
+ /* Deleting: */
+ btree_account_key_drop(b, k);
+ k->type = KEY_TYPE_deleted;
+
+ if (k->needs_whiteout)
+ push_whiteout(trans->c, b, insert->k.p);
+ k->needs_whiteout = false;
+
+ if (k >= btree_bset_last(b)->start) {
+ clobber_u64s = k->u64s;
+ bch2_bset_delete(b, k, clobber_u64s);
+ goto fix_iter;
+ } else {
+ bch2_btree_path_fix_key_modified(trans, b, k);
+ }
+
+ return true;
+ }
+
+ if (k) {
+ /* Overwriting: */
+ btree_account_key_drop(b, k);
+ k->type = KEY_TYPE_deleted;
+
+ insert->k.needs_whiteout = k->needs_whiteout;
+ k->needs_whiteout = false;
+
+ if (k >= btree_bset_last(b)->start) {
+ clobber_u64s = k->u64s;
+ goto overwrite;
+ } else {
+ bch2_btree_path_fix_key_modified(trans, b, k);
+ }
+ }
+
+ k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
+overwrite:
+ bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
+ new_u64s = k->u64s;
+fix_iter:
+ if (clobber_u64s != new_u64s)
+ bch2_btree_node_iter_fix(trans, path, b, node_iter, k,
+ clobber_u64s, new_u64s);
+ return true;
+}
+
+static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
+ unsigned i, u64 seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct btree_write *w = container_of(pin, struct btree_write, journal);
+ struct btree *b = container_of(w, struct btree, writes[i]);
+ struct btree_trans *trans = bch2_trans_get(c);
+ unsigned long old, new, v;
+ unsigned idx = w - b->writes;
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ v = READ_ONCE(b->flags);
+
+ do {
+ old = new = v;
+
+ if (!(old & (1 << BTREE_NODE_dirty)) ||
+ !!(old & (1 << BTREE_NODE_write_idx)) != idx ||
+ w->journal.seq != seq)
+ break;
+
+ new &= ~BTREE_WRITE_TYPE_MASK;
+ new |= BTREE_WRITE_journal_reclaim;
+ new |= 1 << BTREE_NODE_need_write;
+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
+
+ btree_node_write_if_need(c, b, SIX_LOCK_read);
+ six_unlock_read(&b->c.lock);
+
+ bch2_trans_put(trans);
+ return 0;
+}
+
+int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+{
+ return __btree_node_flush(j, pin, 0, seq);
+}
+
+int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+{
+ return __btree_node_flush(j, pin, 1, seq);
+}
+
+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
+ struct btree *b, u64 seq)
+{
+ struct btree_write *w = btree_current_write(b);
+
+ bch2_journal_pin_add(&c->journal, seq, &w->journal,
+ btree_node_write_idx(b) == 0
+ ? bch2_btree_node_flush0
+ : bch2_btree_node_flush1);
+}
+
+/**
+ * bch2_btree_insert_key_leaf() - insert a key one key into a leaf node
+ * @trans: btree transaction object
+ * @path: path pointing to @insert's pos
+ * @insert: key to insert
+ * @journal_seq: sequence number of journal reservation
+ */
+inline void bch2_btree_insert_key_leaf(struct btree_trans *trans,
+ struct btree_path *path,
+ struct bkey_i *insert,
+ u64 journal_seq)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *b = path_l(path)->b;
+ struct bset_tree *t = bset_tree_last(b);
+ struct bset *i = bset(b, t);
+ int old_u64s = bset_u64s(t);
+ int old_live_u64s = b->nr.live_u64s;
+ int live_u64s_added, u64s_added;
+
+ if (unlikely(!bch2_btree_bset_insert_key(trans, path, b,
+ &path_l(path)->iter, insert)))
+ return;
+
+ i->journal_seq = cpu_to_le64(max(journal_seq, le64_to_cpu(i->journal_seq)));
+
+ bch2_btree_add_journal_pin(c, b, journal_seq);
+
+ if (unlikely(!btree_node_dirty(b))) {
+ EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
+ set_btree_node_dirty_acct(c, b);
+ }
+
+ live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
+ u64s_added = (int) bset_u64s(t) - old_u64s;
+
+ if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
+ b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
+ if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
+ b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
+
+ if (u64s_added > live_u64s_added &&
+ bch2_maybe_compact_whiteouts(c, b))
+ bch2_trans_node_reinit_iter(trans, b);
+}
+
+/* Cached btree updates: */
+
+/* Normal update interface: */
+
+static inline void btree_insert_entry_checks(struct btree_trans *trans,
+ struct btree_insert_entry *i)
+{
+ struct btree_path *path = trans->paths + i->path;
+
+ BUG_ON(!bpos_eq(i->k->k.p, path->pos));
+ BUG_ON(i->cached != path->cached);
+ BUG_ON(i->level != path->level);
+ BUG_ON(i->btree_id != path->btree_id);
+ EBUG_ON(!i->level &&
+ btree_type_has_snapshots(i->btree_id) &&
+ !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
+ test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
+ i->k->k.p.snapshot &&
+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
+}
+
+static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
+ unsigned flags)
+{
+ return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
+ trans->journal_u64s, flags);
+}
+
+#define JSET_ENTRY_LOG_U64s 4
+
+static noinline void journal_transaction_name(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct journal *j = &c->journal;
+ struct jset_entry *entry =
+ bch2_journal_add_entry(j, &trans->journal_res,
+ BCH_JSET_ENTRY_log, 0, 0,
+ JSET_ENTRY_LOG_U64s);
+ struct jset_entry_log *l =
+ container_of(entry, struct jset_entry_log, entry);
+
+ strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
+}
+
+static inline int btree_key_can_insert(struct btree_trans *trans,
+ struct btree *b, unsigned u64s)
+{
+ struct bch_fs *c = trans->c;
+
+ if (!bch2_btree_node_insert_fits(c, b, u64s))
+ return -BCH_ERR_btree_insert_btree_node_full;
+
+ return 0;
+}
+
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+ struct btree_path *path, unsigned new_u64s)
+{
+ struct bkey_cached *ck = (void *) path->l[0].b;
+ struct bkey_i *new_k;
+ int ret;
+
+ bch2_trans_unlock_write(trans);
+ bch2_trans_unlock(trans);
+
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+ if (!new_k) {
+ bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
+ bch2_btree_id_str(path->btree_id), new_u64s);
+ return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+ }
+
+ ret = bch2_trans_relock(trans) ?:
+ bch2_trans_lock_write(trans);
+ if (unlikely(ret)) {
+ kfree(new_k);
+ return ret;
+ }
+
+ memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+ trans_for_each_update(trans, i)
+ if (i->old_v == &ck->k->v)
+ i->old_v = &new_k->v;
+
+ kfree(ck->k);
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ return 0;
+}
+
+static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
+ struct btree_path *path, unsigned u64s)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_cached *ck = (void *) path->l[0].b;
+ unsigned new_u64s;
+ struct bkey_i *new_k;
+
+ EBUG_ON(path->level);
+
+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+ bch2_btree_key_cache_must_wait(c) &&
+ !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+ return -BCH_ERR_btree_insert_need_journal_reclaim;
+
+ /*
+ * bch2_varint_decode can read past the end of the buffer by at most 7
+ * bytes (it won't be used):
+ */
+ u64s += 1;
+
+ if (u64s <= ck->u64s)
+ return 0;
+
+ new_u64s = roundup_pow_of_two(u64s);
+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+ if (unlikely(!new_k))
+ return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
+
+ trans_for_each_update(trans, i)
+ if (i->old_v == &ck->k->v)
+ i->old_v = &new_k->v;
+
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ return 0;
+}
+
+/* Triggers: */
+
+static int run_one_mem_trigger(struct btree_trans *trans,
+ struct btree_insert_entry *i,
+ unsigned flags)
+{
+ struct bkey_s_c old = { &i->old_k, i->old_v };
+ struct bkey_i *new = i->k;
+ const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type);
+ const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type);
+ int ret;
+
+ verify_update_old_key(trans, i);
+
+ if (unlikely(flags & BTREE_TRIGGER_NORUN))
+ return 0;
+
+ if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
+ return 0;
+
+ if (old_ops->trigger == new_ops->trigger) {
+ ret = bch2_key_trigger(trans, i->btree_id, i->level,
+ old, bkey_i_to_s(new),
+ BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
+ } else {
+ ret = bch2_key_trigger_new(trans, i->btree_id, i->level,
+ bkey_i_to_s(new), flags) ?:
+ bch2_key_trigger_old(trans, i->btree_id, i->level,
+ old, flags);
+ }
+
+ return ret;
+}
+
+static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i,
+ bool overwrite)
+{
+ /*
+ * Transactional triggers create new btree_insert_entries, so we can't
+ * pass them a pointer to a btree_insert_entry, that memory is going to
+ * move:
+ */
+ struct bkey old_k = i->old_k;
+ struct bkey_s_c old = { &old_k, i->old_v };
+ const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type);
+ const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type);
+ unsigned flags = i->flags|BTREE_TRIGGER_TRANSACTIONAL;
+
+ verify_update_old_key(trans, i);
+
+ if ((i->flags & BTREE_TRIGGER_NORUN) ||
+ !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)))
+ return 0;
+
+ if (!i->insert_trigger_run &&
+ !i->overwrite_trigger_run &&
+ old_ops->trigger == new_ops->trigger) {
+ i->overwrite_trigger_run = true;
+ i->insert_trigger_run = true;
+ return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k),
+ BTREE_TRIGGER_INSERT|
+ BTREE_TRIGGER_OVERWRITE|flags) ?: 1;
+ } else if (overwrite && !i->overwrite_trigger_run) {
+ i->overwrite_trigger_run = true;
+ return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1;
+ } else if (!overwrite && !i->insert_trigger_run) {
+ i->insert_trigger_run = true;
+ return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1;
+ } else {
+ return 0;
+ }
+}
+
+static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
+ struct btree_insert_entry *btree_id_start)
+{
+ struct btree_insert_entry *i;
+ bool trans_trigger_run;
+ int ret, overwrite;
+
+ for (overwrite = 1; overwrite >= 0; --overwrite) {
+
+ /*
+ * Running triggers will append more updates to the list of updates as
+ * we're walking it:
+ */
+ do {
+ trans_trigger_run = false;
+
+ for (i = btree_id_start;
+ i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+ i++) {
+ if (i->btree_id != btree_id)
+ continue;
+
+ ret = run_one_trans_trigger(trans, i, overwrite);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ trans_trigger_run = true;
+ }
+ } while (trans_trigger_run);
+ }
+
+ return 0;
+}
+
+static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
+{
+ struct btree_insert_entry *btree_id_start = trans->updates;
+ unsigned btree_id = 0;
+ int ret = 0;
+
+ /*
+ *
+ * For a given btree, this algorithm runs insert triggers before
+ * overwrite triggers: this is so that when extents are being moved
+ * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before
+ * they are re-added.
+ */
+ for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) {
+ if (btree_id == BTREE_ID_alloc)
+ continue;
+
+ while (btree_id_start < trans->updates + trans->nr_updates &&
+ btree_id_start->btree_id < btree_id)
+ btree_id_start++;
+
+ ret = run_btree_triggers(trans, btree_id, btree_id_start);
+ if (ret)
+ return ret;
+ }
+
+ trans_for_each_update(trans, i) {
+ if (i->btree_id > BTREE_ID_alloc)
+ break;
+ if (i->btree_id == BTREE_ID_alloc) {
+ ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+ if (ret)
+ return ret;
+ break;
+ }
+ }
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+ trans_for_each_update(trans, i)
+ BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) &&
+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) &&
+ (!i->insert_trigger_run || !i->overwrite_trigger_run));
+#endif
+ return 0;
+}
+
+static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ trans_for_each_update(trans, i) {
+ /*
+ * XXX: synchronization of cached update triggers with gc
+ * XXX: synchronization of interior node updates with gc
+ */
+ BUG_ON(i->cached || i->level);
+
+ if (gc_visited(c, gc_pos_btree_node(insert_l(trans, i)->b))) {
+ ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static inline int
+bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
+ struct btree_insert_entry **stopped_at,
+ unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_trans_commit_hook *h;
+ unsigned u64s = 0;
+ int ret;
+
+ if (race_fault()) {
+ trace_and_count(c, trans_restart_fault_inject, trans, trace_ip);
+ return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
+ }
+
+ /*
+ * Check if the insert will fit in the leaf node with the write lock
+ * held, otherwise another thread could write the node changing the
+ * amount of space available:
+ */
+
+ prefetch(&trans->c->journal.flags);
+
+ trans_for_each_update(trans, i) {
+ /* Multiple inserts might go to same leaf: */
+ if (!same_leaf_as_prev(trans, i))
+ u64s = 0;
+
+ u64s += i->k->k.u64s;
+ ret = !i->cached
+ ? btree_key_can_insert(trans, insert_l(trans, i)->b, u64s)
+ : btree_key_can_insert_cached(trans, flags, trans->paths + i->path, u64s);
+ if (ret) {
+ *stopped_at = i;
+ return ret;
+ }
+
+ i->k->k.needs_whiteout = false;
+ }
+
+ /*
+ * Don't get journal reservation until after we know insert will
+ * succeed:
+ */
+ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
+ ret = bch2_trans_journal_res_get(trans,
+ (flags & BCH_WATERMARK_MASK)|
+ JOURNAL_RES_GET_NONBLOCK);
+ if (ret)
+ return ret;
+
+ if (unlikely(trans->journal_transaction_names))
+ journal_transaction_name(trans);
+ }
+
+ /*
+ * Not allowed to fail after we've gotten our journal reservation - we
+ * have to use it:
+ */
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+ !(flags & BCH_TRANS_COMMIT_no_journal_res)) {
+ if (bch2_journal_seq_verify)
+ trans_for_each_update(trans, i)
+ i->k->k.version.lo = trans->journal_res.seq;
+ else if (bch2_inject_invalid_keys)
+ trans_for_each_update(trans, i)
+ i->k->k.version = MAX_VERSION;
+ }
+
+ if (trans->fs_usage_deltas &&
+ bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
+ return -BCH_ERR_btree_insert_need_mark_replicas;
+
+ h = trans->hooks;
+ while (h) {
+ ret = h->fn(trans, h);
+ if (ret)
+ goto revert_fs_usage;
+ h = h->next;
+ }
+
+ trans_for_each_update(trans, i)
+ if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
+ ret = run_one_mem_trigger(trans, i, i->flags);
+ if (ret)
+ goto fatal_err;
+ }
+
+ if (unlikely(c->gc_pos.phase)) {
+ ret = bch2_trans_commit_run_gc_triggers(trans);
+ if (ret)
+ goto fatal_err;
+ }
+
+ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
+ struct journal *j = &c->journal;
+ struct jset_entry *entry;
+
+ trans_for_each_update(trans, i) {
+ if (i->key_cache_already_flushed)
+ continue;
+
+ if (i->flags & BTREE_UPDATE_NOJOURNAL)
+ continue;
+
+ verify_update_old_key(trans, i);
+
+ if (trans->journal_transaction_names) {
+ entry = bch2_journal_add_entry(j, &trans->journal_res,
+ BCH_JSET_ENTRY_overwrite,
+ i->btree_id, i->level,
+ i->old_k.u64s);
+ bkey_reassemble((struct bkey_i *) entry->start,
+ (struct bkey_s_c) { &i->old_k, i->old_v });
+ }
+
+ entry = bch2_journal_add_entry(j, &trans->journal_res,
+ BCH_JSET_ENTRY_btree_keys,
+ i->btree_id, i->level,
+ i->k->k.u64s);
+ bkey_copy((struct bkey_i *) entry->start, i->k);
+ }
+
+ memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
+ trans->journal_entries,
+ trans->journal_entries_u64s);
+
+ trans->journal_res.offset += trans->journal_entries_u64s;
+ trans->journal_res.u64s -= trans->journal_entries_u64s;
+
+ if (trans->journal_seq)
+ *trans->journal_seq = trans->journal_res.seq;
+ }
+
+ trans_for_each_update(trans, i) {
+ struct btree_path *path = trans->paths + i->path;
+
+ if (!i->cached) {
+ bch2_btree_insert_key_leaf(trans, path, i->k, trans->journal_res.seq);
+ } else if (!i->key_cache_already_flushed)
+ bch2_btree_insert_key_cached(trans, flags, i);
+ else {
+ bch2_btree_key_cache_drop(trans, path);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ }
+ }
+
+ return 0;
+fatal_err:
+ bch2_fatal_error(c);
+revert_fs_usage:
+ if (trans->fs_usage_deltas)
+ bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas);
+ return ret;
+}
+
+static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
+{
+ trans_for_each_update(trans, i)
+ bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
+}
+
+static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans,
+ enum bkey_invalid_flags flags,
+ struct btree_insert_entry *i,
+ struct printbuf *err)
+{
+ struct bch_fs *c = trans->c;
+
+ printbuf_reset(err);
+ prt_printf(err, "invalid bkey on insert from %s -> %ps",
+ trans->fn, (void *) i->ip_allocated);
+ prt_newline(err);
+ printbuf_indent_add(err, 2);
+
+ bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k));
+ prt_newline(err);
+
+ bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err);
+ bch2_print_string_as_lines(KERN_ERR, err->buf);
+
+ bch2_inconsistent_error(c);
+ bch2_dump_trans_updates(trans);
+
+ return -EINVAL;
+}
+
+static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans,
+ struct jset_entry *i)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "invalid bkey on insert from %s", trans->fn);
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ bch2_journal_entry_to_text(&buf, c, i);
+ prt_newline(&buf);
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+
+ bch2_inconsistent_error(c);
+ bch2_dump_trans_updates(trans);
+
+ return -EINVAL;
+}
+
+static int bch2_trans_commit_journal_pin_flush(struct journal *j,
+ struct journal_entry_pin *_pin, u64 seq)
+{
+ return 0;
+}
+
+/*
+ * Get journal reservation, take write locks, and attempt to do btree update(s):
+ */
+static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags,
+ struct btree_insert_entry **stopped_at,
+ unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0, u64s_delta = 0;
+
+ trans_for_each_update(trans, i) {
+ if (i->cached)
+ continue;
+
+ u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0;
+ u64s_delta -= i->old_btree_u64s;
+
+ if (!same_leaf_as_next(trans, i)) {
+ if (u64s_delta <= 0) {
+ ret = bch2_foreground_maybe_merge(trans, i->path,
+ i->level, flags);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ u64s_delta = 0;
+ }
+ }
+
+ ret = bch2_trans_lock_write(trans);
+ if (unlikely(ret))
+ return ret;
+
+ ret = bch2_trans_commit_write_locked(trans, flags, stopped_at, trace_ip);
+
+ if (!ret && unlikely(trans->journal_replay_not_finished))
+ bch2_drop_overwrites_from_journal(trans);
+
+ bch2_trans_unlock_write(trans);
+
+ if (!ret && trans->journal_pin)
+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
+ trans->journal_pin,
+ bch2_trans_commit_journal_pin_flush);
+
+ /*
+ * Drop journal reservation after dropping write locks, since dropping
+ * the journal reservation may kick off a journal write:
+ */
+ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
+ bch2_journal_res_put(&c->journal, &trans->journal_res);
+
+ return ret;
+}
+
+static int journal_reclaim_wait_done(struct bch_fs *c)
+{
+ int ret = bch2_journal_error(&c->journal) ?:
+ !bch2_btree_key_cache_must_wait(c);
+
+ if (!ret)
+ journal_reclaim_kick(&c->journal);
+ return ret;
+}
+
+static noinline
+int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
+ struct btree_insert_entry *i,
+ int ret, unsigned long trace_ip)
+{
+ struct bch_fs *c = trans->c;
+
+ switch (ret) {
+ case -BCH_ERR_btree_insert_btree_node_full:
+ ret = bch2_btree_split_leaf(trans, i->path, flags);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ trace_and_count(c, trans_restart_btree_node_split, trans,
+ trace_ip, trans->paths + i->path);
+ break;
+ case -BCH_ERR_btree_insert_need_mark_replicas:
+ ret = drop_locks_do(trans,
+ bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas));
+ break;
+ case -BCH_ERR_journal_res_get_blocked:
+ /*
+ * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
+ * flag
+ */
+ if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+ (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ break;
+ }
+
+ ret = drop_locks_do(trans,
+ bch2_trans_journal_res_get(trans,
+ (flags & BCH_WATERMARK_MASK)|
+ JOURNAL_RES_GET_CHECK));
+ break;
+ case -BCH_ERR_btree_insert_need_journal_reclaim:
+ bch2_trans_unlock(trans);
+
+ trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip);
+
+ wait_event_freezable(c->journal.reclaim_wait,
+ (ret = journal_reclaim_wait_done(c)));
+ if (ret < 0)
+ break;
+
+ ret = bch2_trans_relock(trans);
+ break;
+ default:
+ BUG_ON(ret >= 0);
+ break;
+ }
+
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
+
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) &&
+ (flags & BCH_TRANS_COMMIT_no_enospc), c,
+ "%s: incorrectly got %s\n", __func__, bch2_err_str(ret));
+
+ return ret;
+}
+
+static noinline int
+bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ int ret;
+
+ if (likely(!(flags & BCH_TRANS_COMMIT_lazy_rw)) ||
+ test_bit(BCH_FS_started, &c->flags))
+ return -BCH_ERR_erofs_trans_commit;
+
+ ret = drop_locks_do(trans, bch2_fs_read_write_early(c));
+ if (ret)
+ return ret;
+
+ bch2_write_ref_get(c, BCH_WRITE_REF_trans);
+ return 0;
+}
+
+/*
+ * This is for updates done in the early part of fsck - btree_gc - before we've
+ * gone RW. we only add the new key to the list of keys for journal replay to
+ * do.
+ */
+static noinline int
+do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ trans_for_each_update(trans, i) {
+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
+{
+ struct btree_insert_entry *errored_at = NULL;
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ if (!trans->nr_updates &&
+ !trans->journal_entries_u64s)
+ goto out_reset;
+
+ ret = bch2_trans_commit_run_triggers(trans);
+ if (ret)
+ goto out_reset;
+
+ trans_for_each_update(trans, i) {
+ struct printbuf buf = PRINTBUF;
+ enum bkey_invalid_flags invalid_flags = 0;
+
+ if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
+ invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT;
+
+ if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k),
+ i->bkey_type, invalid_flags, &buf)))
+ ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf);
+ btree_insert_entry_checks(trans, i);
+ printbuf_exit(&buf);
+
+ if (ret)
+ return ret;
+ }
+
+ for (struct jset_entry *i = trans->journal_entries;
+ i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+ i = vstruct_next(i)) {
+ if (!jset_entry_is_key(i))
+ continue;
+
+ enum bkey_invalid_flags invalid_flags = 0;
+
+ if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
+ invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT;
+
+ if (unlikely(bch2_journal_entry_validate(c, NULL, i,
+ bcachefs_metadata_version_current,
+ CPU_BIG_ENDIAN, invalid_flags)))
+ ret = bch2_trans_commit_journal_entry_invalid(trans, i);
+
+ if (ret)
+ return ret;
+ }
+
+ if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
+ ret = do_bch2_trans_commit_to_journal_replay(trans);
+ goto out_reset;
+ }
+
+ if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
+ unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
+ ret = bch2_trans_commit_get_rw_cold(trans, flags);
+ if (ret)
+ goto out_reset;
+ }
+
+ EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
+
+ trans->journal_u64s = trans->journal_entries_u64s;
+ trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
+ if (trans->journal_transaction_names)
+ trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
+
+ trans_for_each_update(trans, i) {
+ struct btree_path *path = trans->paths + i->path;
+
+ EBUG_ON(!path->should_be_locked);
+
+ ret = bch2_btree_path_upgrade(trans, path, i->level + 1);
+ if (unlikely(ret))
+ goto out;
+
+ EBUG_ON(!btree_node_intent_locked(path, i->level));
+
+ if (i->key_cache_already_flushed)
+ continue;
+
+ if (i->flags & BTREE_UPDATE_NOJOURNAL)
+ continue;
+
+ /* we're going to journal the key being updated: */
+ trans->journal_u64s += jset_u64s(i->k->k.u64s);
+
+ /* and we're also going to log the overwrite: */
+ if (trans->journal_transaction_names)
+ trans->journal_u64s += jset_u64s(i->old_k.u64s);
+ }
+
+ if (trans->extra_disk_res) {
+ ret = bch2_disk_reservation_add(c, trans->disk_res,
+ trans->extra_disk_res,
+ (flags & BCH_TRANS_COMMIT_no_enospc)
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
+ if (ret)
+ goto err;
+ }
+retry:
+ errored_at = NULL;
+ bch2_trans_verify_not_in_restart(trans);
+ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
+ memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+
+ ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_);
+
+ /* make sure we didn't drop or screw up locks: */
+ bch2_trans_verify_locks(trans);
+
+ if (ret)
+ goto err;
+
+ trace_and_count(c, transaction_commit, trans, _RET_IP_);
+out:
+ if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
+ bch2_write_ref_put(c, BCH_WRITE_REF_trans);
+out_reset:
+ if (!ret)
+ bch2_trans_downgrade(trans);
+ bch2_trans_reset_updates(trans);
+
+ return ret;
+err:
+ ret = bch2_trans_commit_error(trans, flags, errored_at, ret, _RET_IP_);
+ if (ret)
+ goto out;
+
+ /*
+ * We might have done another transaction commit in the error path -
+ * i.e. btree write buffer flush - which will have made use of
+ * trans->journal_res, but with BCH_TRANS_COMMIT_no_journal_res that is
+ * how the journal sequence number to pin is passed in - so we must
+ * restart:
+ */
+ if (flags & BCH_TRANS_COMMIT_no_journal_res) {
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto out;
+ }
+
+ goto retry;
+}
diff --git a/c_src/libbcachefs/btree_types.h b/c_src/libbcachefs/btree_types.h
new file mode 100644
index 00000000..d5303070
--- /dev/null
+++ b/c_src/libbcachefs/btree_types.h
@@ -0,0 +1,741 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_TYPES_H
+#define _BCACHEFS_BTREE_TYPES_H
+
+#include <linux/list.h>
+#include <linux/rhashtable.h>
+
+#include "btree_key_cache_types.h"
+#include "buckets_types.h"
+#include "darray.h"
+#include "errcode.h"
+#include "journal_types.h"
+#include "replicas_types.h"
+#include "six.h"
+
+struct open_bucket;
+struct btree_update;
+struct btree_trans;
+
+#define MAX_BSETS 3U
+
+struct btree_nr_keys {
+
+ /*
+ * Amount of live metadata (i.e. size of node after a compaction) in
+ * units of u64s
+ */
+ u16 live_u64s;
+ u16 bset_u64s[MAX_BSETS];
+
+ /* live keys only: */
+ u16 packed_keys;
+ u16 unpacked_keys;
+};
+
+struct bset_tree {
+ /*
+ * We construct a binary tree in an array as if the array
+ * started at 1, so that things line up on the same cachelines
+ * better: see comments in bset.c at cacheline_to_bkey() for
+ * details
+ */
+
+ /* size of the binary tree and prev array */
+ u16 size;
+
+ /* function of size - precalculated for to_inorder() */
+ u16 extra;
+
+ u16 data_offset;
+ u16 aux_data_offset;
+ u16 end_offset;
+};
+
+struct btree_write {
+ struct journal_entry_pin journal;
+};
+
+struct btree_alloc {
+ struct open_buckets ob;
+ __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
+};
+
+struct btree_bkey_cached_common {
+ struct six_lock lock;
+ u8 level;
+ u8 btree_id;
+ bool cached;
+};
+
+struct btree {
+ struct btree_bkey_cached_common c;
+
+ struct rhash_head hash;
+ u64 hash_val;
+
+ unsigned long flags;
+ u16 written;
+ u8 nsets;
+ u8 nr_key_bits;
+ u16 version_ondisk;
+
+ struct bkey_format format;
+
+ struct btree_node *data;
+ void *aux_data;
+
+ /*
+ * Sets of sorted keys - the real btree node - plus a binary search tree
+ *
+ * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
+ * to the memory we have allocated for this btree node. Additionally,
+ * set[0]->data points to the entire btree node as it exists on disk.
+ */
+ struct bset_tree set[MAX_BSETS];
+
+ struct btree_nr_keys nr;
+ u16 sib_u64s[2];
+ u16 whiteout_u64s;
+ u8 byte_order;
+ u8 unpack_fn_len;
+
+ struct btree_write writes[2];
+
+ /* Key/pointer for this btree node */
+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
+
+ /*
+ * XXX: add a delete sequence number, so when bch2_btree_node_relock()
+ * fails because the lock sequence number has changed - i.e. the
+ * contents were modified - we can still relock the node if it's still
+ * the one we want, without redoing the traversal
+ */
+
+ /*
+ * For asynchronous splits/interior node updates:
+ * When we do a split, we allocate new child nodes and update the parent
+ * node to point to them: we update the parent in memory immediately,
+ * but then we must wait until the children have been written out before
+ * the update to the parent can be written - this is a list of the
+ * btree_updates that are blocking this node from being
+ * written:
+ */
+ struct list_head write_blocked;
+
+ /*
+ * Also for asynchronous splits/interior node updates:
+ * If a btree node isn't reachable yet, we don't want to kick off
+ * another write - because that write also won't yet be reachable and
+ * marking it as completed before it's reachable would be incorrect:
+ */
+ unsigned long will_make_reachable;
+
+ struct open_buckets ob;
+
+ /* lru list */
+ struct list_head list;
+};
+
+struct btree_cache {
+ struct rhashtable table;
+ bool table_init_done;
+ /*
+ * We never free a struct btree, except on shutdown - we just put it on
+ * the btree_cache_freed list and reuse it later. This simplifies the
+ * code, and it doesn't cost us much memory as the memory usage is
+ * dominated by buffers that hold the actual btree node data and those
+ * can be freed - and the number of struct btrees allocated is
+ * effectively bounded.
+ *
+ * btree_cache_freeable effectively is a small cache - we use it because
+ * high order page allocations can be rather expensive, and it's quite
+ * common to delete and allocate btree nodes in quick succession. It
+ * should never grow past ~2-3 nodes in practice.
+ */
+ struct mutex lock;
+ struct list_head live;
+ struct list_head freeable;
+ struct list_head freed_pcpu;
+ struct list_head freed_nonpcpu;
+
+ /* Number of elements in live + freeable lists */
+ unsigned used;
+ unsigned reserve;
+ atomic_t dirty;
+ struct shrinker *shrink;
+
+ /*
+ * If we need to allocate memory for a new btree node and that
+ * allocation fails, we can cannibalize another node in the btree cache
+ * to satisfy the allocation - lock to guarantee only one thread does
+ * this at a time:
+ */
+ struct task_struct *alloc_lock;
+ struct closure_waitlist alloc_wait;
+};
+
+struct btree_node_iter {
+ struct btree_node_iter_set {
+ u16 k, end;
+ } data[MAX_BSETS];
+};
+
+/*
+ * Iterate over all possible positions, synthesizing deleted keys for holes:
+ */
+static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
+/*
+ * Indicates that intent locks should be taken on leaf nodes, because we expect
+ * to be doing updates:
+ */
+static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1;
+/*
+ * Causes the btree iterator code to prefetch additional btree nodes from disk:
+ */
+static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2;
+/*
+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
+ * @pos or the first key strictly greater than @pos
+ */
+static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3;
+static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4;
+static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5;
+static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6;
+static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7;
+static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8;
+static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9;
+static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
+static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11;
+static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12;
+static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13;
+static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14;
+#define __BTREE_ITER_FLAGS_END 15
+
+enum btree_path_uptodate {
+ BTREE_ITER_UPTODATE = 0,
+ BTREE_ITER_NEED_RELOCK = 1,
+ BTREE_ITER_NEED_TRAVERSE = 2,
+};
+
+#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG)
+#define TRACK_PATH_ALLOCATED
+#endif
+
+typedef u16 btree_path_idx_t;
+
+struct btree_path {
+ btree_path_idx_t sorted_idx;
+ u8 ref;
+ u8 intent_ref;
+
+ /* btree_iter_copy starts here: */
+ struct bpos pos;
+
+ enum btree_id btree_id:5;
+ bool cached:1;
+ bool preserve:1;
+ enum btree_path_uptodate uptodate:2;
+ /*
+ * When true, failing to relock this path will cause the transaction to
+ * restart:
+ */
+ bool should_be_locked:1;
+ unsigned level:3,
+ locks_want:3;
+ u8 nodes_locked;
+
+ struct btree_path_level {
+ struct btree *b;
+ struct btree_node_iter iter;
+ u32 lock_seq;
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ u64 lock_taken_time;
+#endif
+ } l[BTREE_MAX_DEPTH];
+#ifdef TRACK_PATH_ALLOCATED
+ unsigned long ip_allocated;
+#endif
+};
+
+static inline struct btree_path_level *path_l(struct btree_path *path)
+{
+ return path->l + path->level;
+}
+
+static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
+{
+#ifdef TRACK_PATH_ALLOCATED
+ return path->ip_allocated;
+#else
+ return _THIS_IP_;
+#endif
+}
+
+/*
+ * @pos - iterator's current position
+ * @level - current btree depth
+ * @locks_want - btree level below which we start taking intent locks
+ * @nodes_locked - bitmask indicating which nodes in @nodes are locked
+ * @nodes_intent_locked - bitmask indicating which locks are intent locks
+ */
+struct btree_iter {
+ struct btree_trans *trans;
+ btree_path_idx_t path;
+ btree_path_idx_t update_path;
+ btree_path_idx_t key_cache_path;
+
+ enum btree_id btree_id:8;
+ u8 min_depth;
+
+ /* btree_iter_copy starts here: */
+ u16 flags;
+
+ /* When we're filtering by snapshot, the snapshot ID we're looking for: */
+ unsigned snapshot;
+
+ struct bpos pos;
+ /*
+ * Current unpacked key - so that bch2_btree_iter_next()/
+ * bch2_btree_iter_next_slot() can correctly advance pos.
+ */
+ struct bkey k;
+
+ /* BTREE_ITER_WITH_JOURNAL: */
+ size_t journal_idx;
+#ifdef TRACK_PATH_ALLOCATED
+ unsigned long ip_allocated;
+#endif
+};
+
+#define BKEY_CACHED_ACCESSED 0
+#define BKEY_CACHED_DIRTY 1
+
+struct bkey_cached {
+ struct btree_bkey_cached_common c;
+
+ unsigned long flags;
+ u16 u64s;
+ bool valid;
+ u32 btree_trans_barrier_seq;
+ struct bkey_cached_key key;
+
+ struct rhash_head hash;
+ struct list_head list;
+
+ struct journal_entry_pin journal;
+ u64 seq;
+
+ struct bkey_i *k;
+};
+
+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
+{
+ return !b->cached
+ ? container_of(b, struct btree, c)->key.k.p
+ : container_of(b, struct bkey_cached, c)->key.pos;
+}
+
+struct btree_insert_entry {
+ unsigned flags;
+ u8 bkey_type;
+ enum btree_id btree_id:8;
+ u8 level:4;
+ bool cached:1;
+ bool insert_trigger_run:1;
+ bool overwrite_trigger_run:1;
+ bool key_cache_already_flushed:1;
+ /*
+ * @old_k may be a key from the journal; @old_btree_u64s always refers
+ * to the size of the key being overwritten in the btree:
+ */
+ u8 old_btree_u64s;
+ btree_path_idx_t path;
+ struct bkey_i *k;
+ /* key being overwritten: */
+ struct bkey old_k;
+ const struct bch_val *old_v;
+ unsigned long ip_allocated;
+};
+
+#define BTREE_ITER_INITIAL 64
+#define BTREE_ITER_MAX (1U << 10)
+
+struct btree_trans_commit_hook;
+typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
+
+struct btree_trans_commit_hook {
+ btree_trans_commit_hook_fn *fn;
+ struct btree_trans_commit_hook *next;
+};
+
+#define BTREE_TRANS_MEM_MAX (1U << 16)
+
+#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000
+
+struct btree_trans_paths {
+ unsigned long nr_paths;
+ struct btree_path paths[];
+};
+
+struct btree_trans {
+ struct bch_fs *c;
+
+ unsigned long *paths_allocated;
+ struct btree_path *paths;
+ btree_path_idx_t *sorted;
+ struct btree_insert_entry *updates;
+
+ void *mem;
+ unsigned mem_top;
+ unsigned mem_bytes;
+
+ btree_path_idx_t nr_sorted;
+ btree_path_idx_t nr_paths;
+ btree_path_idx_t nr_paths_max;
+ u8 fn_idx;
+ u8 nr_updates;
+ u8 lock_must_abort;
+ bool lock_may_not_fail:1;
+ bool srcu_held:1;
+ bool used_mempool:1;
+ bool in_traverse_all:1;
+ bool paths_sorted:1;
+ bool memory_allocation_failure:1;
+ bool journal_transaction_names:1;
+ bool journal_replay_not_finished:1;
+ bool notrace_relock_fail:1;
+ bool write_locked:1;
+ enum bch_errcode restarted:16;
+ u32 restart_count;
+
+ u64 last_begin_time;
+ unsigned long last_begin_ip;
+ unsigned long last_restarted_ip;
+ unsigned long srcu_lock_time;
+
+ const char *fn;
+ struct btree_bkey_cached_common *locking;
+ struct six_lock_waiter locking_wait;
+ int srcu_idx;
+
+ /* update path: */
+ u16 journal_entries_u64s;
+ u16 journal_entries_size;
+ struct jset_entry *journal_entries;
+
+ struct btree_trans_commit_hook *hooks;
+ struct journal_entry_pin *journal_pin;
+
+ struct journal_res journal_res;
+ u64 *journal_seq;
+ struct disk_reservation *disk_res;
+ unsigned journal_u64s;
+ unsigned extra_disk_res; /* XXX kill */
+ struct replicas_delta_list *fs_usage_deltas;
+
+ /* Entries before this are zeroed out on every bch2_trans_get() call */
+
+ struct list_head list;
+ struct closure ref;
+
+ unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)];
+ struct btree_trans_paths trans_paths;
+ struct btree_path _paths[BTREE_ITER_INITIAL];
+ btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4];
+ struct btree_insert_entry _updates[BTREE_ITER_INITIAL];
+};
+
+static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)
+{
+ return trans->paths + iter->path;
+}
+
+static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter)
+{
+ return iter->key_cache_path
+ ? trans->paths + iter->key_cache_path
+ : NULL;
+}
+
+#define BCH_BTREE_WRITE_TYPES() \
+ x(initial, 0) \
+ x(init_next_bset, 1) \
+ x(cache_reclaim, 2) \
+ x(journal_reclaim, 3) \
+ x(interior, 4)
+
+enum btree_write_type {
+#define x(t, n) BTREE_WRITE_##t,
+ BCH_BTREE_WRITE_TYPES()
+#undef x
+ BTREE_WRITE_TYPE_NR,
+};
+
+#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
+#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR))
+
+#define BTREE_FLAGS() \
+ x(read_in_flight) \
+ x(read_error) \
+ x(dirty) \
+ x(need_write) \
+ x(write_blocked) \
+ x(will_make_reachable) \
+ x(noevict) \
+ x(write_idx) \
+ x(accessed) \
+ x(write_in_flight) \
+ x(write_in_flight_inner) \
+ x(just_written) \
+ x(dying) \
+ x(fake) \
+ x(need_rewrite) \
+ x(never_write)
+
+enum btree_flags {
+ /* First bits for btree node write type */
+ BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1,
+#define x(flag) BTREE_NODE_##flag,
+ BTREE_FLAGS()
+#undef x
+};
+
+#define x(flag) \
+static inline bool btree_node_ ## flag(struct btree *b) \
+{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
+ \
+static inline void set_btree_node_ ## flag(struct btree *b) \
+{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \
+ \
+static inline void clear_btree_node_ ## flag(struct btree *b) \
+{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
+
+BTREE_FLAGS()
+#undef x
+
+static inline struct btree_write *btree_current_write(struct btree *b)
+{
+ return b->writes + btree_node_write_idx(b);
+}
+
+static inline struct btree_write *btree_prev_write(struct btree *b)
+{
+ return b->writes + (btree_node_write_idx(b) ^ 1);
+}
+
+static inline struct bset_tree *bset_tree_last(struct btree *b)
+{
+ EBUG_ON(!b->nsets);
+ return b->set + b->nsets - 1;
+}
+
+static inline void *
+__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
+{
+ return (void *) ((u64 *) b->data + 1 + offset);
+}
+
+static inline u16
+__btree_node_ptr_to_offset(const struct btree *b, const void *p)
+{
+ u16 ret = (u64 *) p - 1 - (u64 *) b->data;
+
+ EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
+ return ret;
+}
+
+static inline struct bset *bset(const struct btree *b,
+ const struct bset_tree *t)
+{
+ return __btree_node_offset_to_ptr(b, t->data_offset);
+}
+
+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
+{
+ t->end_offset =
+ __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
+}
+
+static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
+ const struct bset *i)
+{
+ t->data_offset = __btree_node_ptr_to_offset(b, i);
+ set_btree_bset_end(b, t);
+}
+
+static inline struct bset *btree_bset_first(struct btree *b)
+{
+ return bset(b, b->set);
+}
+
+static inline struct bset *btree_bset_last(struct btree *b)
+{
+ return bset(b, bset_tree_last(b));
+}
+
+static inline u16
+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
+{
+ return __btree_node_ptr_to_offset(b, k);
+}
+
+static inline struct bkey_packed *
+__btree_node_offset_to_key(const struct btree *b, u16 k)
+{
+ return __btree_node_offset_to_ptr(b, k);
+}
+
+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
+{
+ return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
+}
+
+#define btree_bkey_first(_b, _t) \
+({ \
+ EBUG_ON(bset(_b, _t)->start != \
+ __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
+ \
+ bset(_b, _t)->start; \
+})
+
+#define btree_bkey_last(_b, _t) \
+({ \
+ EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \
+ vstruct_last(bset(_b, _t))); \
+ \
+ __btree_node_offset_to_key(_b, (_t)->end_offset); \
+})
+
+static inline unsigned bset_u64s(struct bset_tree *t)
+{
+ return t->end_offset - t->data_offset -
+ sizeof(struct bset) / sizeof(u64);
+}
+
+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
+{
+ return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
+}
+
+static inline unsigned bset_byte_offset(struct btree *b, void *i)
+{
+ return i - (void *) b->data;
+}
+
+enum btree_node_type {
+ BKEY_TYPE_btree,
+#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
+ BCH_BTREE_IDS()
+#undef x
+ BKEY_TYPE_NR
+};
+
+/* Type of a key in btree @id at level @level: */
+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
+{
+ return level ? BKEY_TYPE_btree : (unsigned) id + 1;
+}
+
+/* Type of keys @b contains: */
+static inline enum btree_node_type btree_node_type(struct btree *b)
+{
+ return __btree_node_type(b->c.level, b->c.btree_id);
+}
+
+const char *bch2_btree_node_type_str(enum btree_node_type);
+
+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
+ (BIT_ULL(BKEY_TYPE_extents)| \
+ BIT_ULL(BKEY_TYPE_alloc)| \
+ BIT_ULL(BKEY_TYPE_inodes)| \
+ BIT_ULL(BKEY_TYPE_stripes)| \
+ BIT_ULL(BKEY_TYPE_reflink)| \
+ BIT_ULL(BKEY_TYPE_btree))
+
+#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \
+ (BIT_ULL(BKEY_TYPE_alloc)| \
+ BIT_ULL(BKEY_TYPE_inodes)| \
+ BIT_ULL(BKEY_TYPE_stripes)| \
+ BIT_ULL(BKEY_TYPE_snapshots))
+
+#define BTREE_NODE_TYPE_HAS_TRIGGERS \
+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
+ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
+
+static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+{
+ return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
+}
+
+static inline bool btree_node_type_is_extents(enum btree_node_type type)
+{
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << type) & mask;
+}
+
+static inline bool btree_id_is_extents(enum btree_id btree)
+{
+ return btree_node_type_is_extents(__btree_node_type(0, btree));
+}
+
+static inline bool btree_type_has_snapshots(enum btree_id id)
+{
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
+}
+
+static inline bool btree_type_has_snapshot_field(enum btree_id id)
+{
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
+}
+
+static inline bool btree_type_has_ptrs(enum btree_id id)
+{
+ const unsigned mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return (1U << id) & mask;
+}
+
+struct btree_root {
+ struct btree *b;
+
+ /* On disk root - see async splits: */
+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
+ u8 level;
+ u8 alive;
+ s8 error;
+};
+
+enum btree_gc_coalesce_fail_reason {
+ BTREE_GC_COALESCE_FAIL_RESERVE_GET,
+ BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
+ BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
+};
+
+enum btree_node_sibling {
+ btree_prev_sib,
+ btree_next_sib,
+};
+
+#endif /* _BCACHEFS_BTREE_TYPES_H */
diff --git a/c_src/libbcachefs/btree_update.c b/c_src/libbcachefs/btree_update.c
new file mode 100644
index 00000000..c3ff365a
--- /dev/null
+++ b/c_src/libbcachefs/btree_update.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "btree_iter.h"
+#include "btree_journal_iter.h"
+#include "btree_locking.h"
+#include "buckets.h"
+#include "debug.h"
+#include "errcode.h"
+#include "error.h"
+#include "extents.h"
+#include "keylist.h"
+#include "snapshot.h"
+#include "trace.h"
+
+static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
+ const struct btree_insert_entry *r)
+{
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ cmp_int(l->cached, r->cached) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->k->k.p, r->k->k.p);
+}
+
+static int __must_check
+bch2_trans_update_by_path(struct btree_trans *, btree_path_idx_t,
+ struct bkey_i *, enum btree_update_flags,
+ unsigned long ip);
+
+static noinline int extent_front_merge(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bkey_i **insert,
+ enum btree_update_flags flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *update;
+ int ret;
+
+ update = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ return ret;
+
+ if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert)))
+ return 0;
+
+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?:
+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ return 0;
+
+ ret = bch2_btree_delete_at(trans, iter, flags);
+ if (ret)
+ return ret;
+
+ *insert = update;
+ return 0;
+}
+
+static noinline int extent_back_merge(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ int ret;
+
+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ return 0;
+
+ bch2_bkey_merge(c, bkey_i_to_s(insert), k);
+ return 0;
+}
+
+/*
+ * When deleting, check if we need to emit a whiteout (because we're overwriting
+ * something in an ancestor snapshot)
+ */
+static int need_whiteout_for_snapshot(struct btree_trans *trans,
+ enum btree_id btree_id, struct bpos pos)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u32 snapshot = pos.snapshot;
+ int ret;
+
+ if (!bch2_snapshot_parent(trans->c, pos.snapshot))
+ return 0;
+
+ pos.snapshot++;
+
+ for_each_btree_key_norestart(trans, iter, btree_id, pos,
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_NOPRESERVE, k, ret) {
+ if (!bkey_eq(k.k->p, pos))
+ break;
+
+ if (bch2_snapshot_is_ancestor(trans->c, snapshot,
+ k.k->p.snapshot)) {
+ ret = !bkey_whiteout(k.k);
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+ enum btree_id id,
+ struct bpos old_pos,
+ struct bpos new_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter old_iter, new_iter = { NULL };
+ struct bkey_s_c old_k, new_k;
+ snapshot_id_list s;
+ struct bkey_i *update;
+ int ret = 0;
+
+ if (!bch2_snapshot_has_children(c, old_pos.snapshot))
+ return 0;
+
+ darray_init(&s);
+
+ bch2_trans_iter_init(trans, &old_iter, id, old_pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS);
+ while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
+ !(ret = bkey_err(old_k)) &&
+ bkey_eq(old_pos, old_k.k->p)) {
+ struct bpos whiteout_pos =
+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
+
+ if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
+ snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
+ continue;
+
+ new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_INTENT);
+ ret = bkey_err(new_k);
+ if (ret)
+ break;
+
+ if (new_k.k->type == KEY_TYPE_deleted) {
+ update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ break;
+
+ bkey_init(&update->k);
+ update->k.p = whiteout_pos;
+ update->k.type = KEY_TYPE_whiteout;
+
+ ret = bch2_trans_update(trans, &new_iter, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ }
+ bch2_trans_iter_exit(trans, &new_iter);
+
+ ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &new_iter);
+ bch2_trans_iter_exit(trans, &old_iter);
+ darray_exit(&s);
+
+ return ret;
+}
+
+int bch2_trans_update_extent_overwrite(struct btree_trans *trans,
+ struct btree_iter *iter,
+ enum btree_update_flags flags,
+ struct bkey_s_c old,
+ struct bkey_s_c new)
+{
+ enum btree_id btree_id = iter->btree_id;
+ struct bkey_i *update;
+ struct bpos new_start = bkey_start_pos(new.k);
+ unsigned front_split = bkey_lt(bkey_start_pos(old.k), new_start);
+ unsigned back_split = bkey_gt(old.k->p, new.k->p);
+ unsigned middle_split = (front_split || back_split) &&
+ old.k->p.snapshot != new.k->p.snapshot;
+ unsigned nr_splits = front_split + back_split + middle_split;
+ int ret = 0, compressed_sectors;
+
+ /*
+ * If we're going to be splitting a compressed extent, note it
+ * so that __bch2_trans_commit() can increase our disk
+ * reservation:
+ */
+ if (nr_splits > 1 &&
+ (compressed_sectors = bch2_bkey_sectors_compressed(old)))
+ trans->extra_disk_res += compressed_sectors * (nr_splits - 1);
+
+ if (front_split) {
+ update = bch2_bkey_make_mut_noupdate(trans, old);
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ return ret;
+
+ bch2_cut_back(new_start, update);
+
+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
+ old.k->p, update->k.p) ?:
+ bch2_btree_insert_nonextent(trans, btree_id, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
+ if (ret)
+ return ret;
+ }
+
+ /* If we're overwriting in a different snapshot - middle split: */
+ if (middle_split) {
+ update = bch2_bkey_make_mut_noupdate(trans, old);
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ return ret;
+
+ bch2_cut_front(new_start, update);
+ bch2_cut_back(new.k->p, update);
+
+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id,
+ old.k->p, update->k.p) ?:
+ bch2_btree_insert_nonextent(trans, btree_id, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
+ if (ret)
+ return ret;
+ }
+
+ if (bkey_le(old.k->p, new.k->p)) {
+ update = bch2_trans_kmalloc(trans, sizeof(*update));
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ return ret;
+
+ bkey_init(&update->k);
+ update->k.p = old.k->p;
+ update->k.p.snapshot = new.k->p.snapshot;
+
+ if (new.k->p.snapshot != old.k->p.snapshot) {
+ update->k.type = KEY_TYPE_whiteout;
+ } else if (btree_type_has_snapshots(btree_id)) {
+ ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ update->k.type = KEY_TYPE_whiteout;
+ }
+
+ ret = bch2_btree_insert_nonextent(trans, btree_id, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
+ if (ret)
+ return ret;
+ }
+
+ if (back_split) {
+ update = bch2_bkey_make_mut_noupdate(trans, old);
+ if ((ret = PTR_ERR_OR_ZERO(update)))
+ return ret;
+
+ bch2_cut_front(new.k->p, update);
+
+ ret = bch2_trans_update_by_path(trans, iter->path, update,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ flags, _RET_IP_);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bch2_trans_update_extent(struct btree_trans *trans,
+ struct btree_iter *orig_iter,
+ struct bkey_i *insert,
+ enum btree_update_flags flags)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ enum btree_id btree_id = orig_iter->btree_id;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_WITH_UPDATES|
+ BTREE_ITER_NOT_EXTENTS);
+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
+ if ((ret = bkey_err(k)))
+ goto err;
+ if (!k.k)
+ goto out;
+
+ if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) {
+ if (bch2_bkey_maybe_mergable(k.k, &insert->k)) {
+ ret = extent_front_merge(trans, &iter, k, &insert, flags);
+ if (ret)
+ goto err;
+ }
+
+ goto next;
+ }
+
+ while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) {
+ bool done = bkey_lt(insert->k.p, k.k->p);
+
+ ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert));
+ if (ret)
+ goto err;
+
+ if (done)
+ goto out;
+next:
+ bch2_btree_iter_advance(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
+ if ((ret = bkey_err(k)))
+ goto err;
+ if (!k.k)
+ goto out;
+ }
+
+ if (bch2_bkey_maybe_mergable(&insert->k, k.k)) {
+ ret = extent_back_merge(trans, &iter, insert, k);
+ if (ret)
+ goto err;
+ }
+out:
+ if (!bkey_deleted(&insert->k))
+ ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+static noinline int flush_new_cached_update(struct btree_trans *trans,
+ struct btree_insert_entry *i,
+ enum btree_update_flags flags,
+ unsigned long ip)
+{
+ struct bkey k;
+ int ret;
+
+ btree_path_idx_t path_idx =
+ bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0,
+ BTREE_ITER_INTENT, _THIS_IP_);
+ ret = bch2_btree_path_traverse(trans, path_idx, 0);
+ if (ret)
+ goto out;
+
+ struct btree_path *btree_path = trans->paths + path_idx;
+
+ /*
+ * The old key in the insert entry might actually refer to an existing
+ * key in the btree that has been deleted from cache and not yet
+ * flushed. Check for this and skip the flush so we don't run triggers
+ * against a stale key.
+ */
+ bch2_btree_path_peek_slot_exact(btree_path, &k);
+ if (!bkey_deleted(&k))
+ goto out;
+
+ i->key_cache_already_flushed = true;
+ i->flags |= BTREE_TRIGGER_NORUN;
+
+ btree_path_set_should_be_locked(btree_path);
+ ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip);
+out:
+ bch2_path_put(trans, path_idx, true);
+ return ret;
+}
+
+static int __must_check
+bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
+ struct bkey_i *k, enum btree_update_flags flags,
+ unsigned long ip)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i, n;
+ int cmp;
+
+ struct btree_path *path = trans->paths + path_idx;
+ EBUG_ON(!path->should_be_locked);
+ EBUG_ON(trans->nr_updates >= trans->nr_paths);
+ EBUG_ON(!bpos_eq(k->k.p, path->pos));
+
+ n = (struct btree_insert_entry) {
+ .flags = flags,
+ .bkey_type = __btree_node_type(path->level, path->btree_id),
+ .btree_id = path->btree_id,
+ .level = path->level,
+ .cached = path->cached,
+ .path = path_idx,
+ .k = k,
+ .ip_allocated = ip,
+ };
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+ trans_for_each_update(trans, i)
+ BUG_ON(i != trans->updates &&
+ btree_insert_entry_cmp(i - 1, i) >= 0);
+#endif
+
+ /*
+ * Pending updates are kept sorted: first, find position of new update,
+ * then delete/trim any updates the new update overwrites:
+ */
+ for (i = trans->updates; i < trans->updates + trans->nr_updates; i++) {
+ cmp = btree_insert_entry_cmp(&n, i);
+ if (cmp <= 0)
+ break;
+ }
+
+ if (!cmp && i < trans->updates + trans->nr_updates) {
+ EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run);
+
+ bch2_path_put(trans, i->path, true);
+ i->flags = n.flags;
+ i->cached = n.cached;
+ i->k = n.k;
+ i->path = n.path;
+ i->ip_allocated = n.ip_allocated;
+ } else {
+ array_insert_item(trans->updates, trans->nr_updates,
+ i - trans->updates, n);
+
+ i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v;
+ i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0;
+
+ if (unlikely(trans->journal_replay_not_finished)) {
+ struct bkey_i *j_k =
+ bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p);
+
+ if (j_k) {
+ i->old_k = j_k->k;
+ i->old_v = &j_k->v;
+ }
+ }
+ }
+
+ __btree_path_get(trans->paths + i->path, true);
+
+ /*
+ * If a key is present in the key cache, it must also exist in the
+ * btree - this is necessary for cache coherency. When iterating over
+ * a btree that's cached in the key cache, the btree iter code checks
+ * the key cache - but the key has to exist in the btree for that to
+ * work:
+ */
+ if (path->cached && bkey_deleted(&i->old_k))
+ return flush_new_cached_update(trans, i, flags, ip);
+
+ return 0;
+}
+
+static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree_path *path)
+{
+ struct btree_path *key_cache_path = btree_iter_key_cache_path(trans, iter);
+
+ if (!key_cache_path ||
+ !key_cache_path->should_be_locked ||
+ !bpos_eq(key_cache_path->pos, iter->pos)) {
+ struct bkey_cached *ck;
+ int ret;
+
+ if (!iter->key_cache_path)
+ iter->key_cache_path =
+ bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_CACHED, _THIS_IP_);
+
+ iter->key_cache_path =
+ bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos,
+ iter->flags & BTREE_ITER_INTENT,
+ _THIS_IP_);
+
+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path, BTREE_ITER_CACHED);
+ if (unlikely(ret))
+ return ret;
+
+ ck = (void *) trans->paths[iter->key_cache_path].l[0].b;
+
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
+ }
+
+ btree_path_set_should_be_locked(trans->paths + iter->key_cache_path);
+ }
+
+ return 0;
+}
+
+int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_i *k, enum btree_update_flags flags)
+{
+ btree_path_idx_t path_idx = iter->update_path ?: iter->path;
+ int ret;
+
+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
+ return bch2_trans_update_extent(trans, iter, k, flags);
+
+ if (bkey_deleted(&k->k) &&
+ !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
+ (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
+ ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (ret)
+ k->k.type = KEY_TYPE_whiteout;
+ }
+
+ /*
+ * Ensure that updates to cached btrees go to the key cache:
+ */
+ struct btree_path *path = trans->paths + path_idx;
+ if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
+ !path->cached &&
+ !path->level &&
+ btree_id_cached(trans->c, path->btree_id)) {
+ ret = bch2_trans_update_get_key_cache(trans, iter, path);
+ if (ret)
+ return ret;
+
+ path_idx = iter->key_cache_path;
+ }
+
+ return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_);
+}
+
+int bch2_btree_insert_clone_trans(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bkey_i *k)
+{
+ struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k));
+ int ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bkey_copy(n, k);
+ return bch2_btree_insert_trans(trans, btree, n, 0);
+}
+
+struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
+{
+ unsigned new_top = trans->journal_entries_u64s + u64s;
+ unsigned old_size = trans->journal_entries_size;
+
+ if (new_top > trans->journal_entries_size) {
+ trans->journal_entries_size = roundup_pow_of_two(new_top);
+
+ btree_trans_stats(trans)->journal_entries_size = trans->journal_entries_size;
+ }
+
+ struct jset_entry *n =
+ bch2_trans_kmalloc_nomemzero(trans,
+ trans->journal_entries_size * sizeof(u64));
+ if (IS_ERR(n))
+ return ERR_CAST(n);
+
+ if (trans->journal_entries)
+ memcpy(n, trans->journal_entries, old_size * sizeof(u64));
+ trans->journal_entries = n;
+
+ struct jset_entry *e = btree_trans_journal_entries_top(trans);
+ trans->journal_entries_u64s = new_top;
+ return e;
+}
+
+int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
+ enum btree_id btree, struct bpos end)
+{
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT);
+ k = bch2_btree_iter_prev(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_advance(iter);
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ BUG_ON(k.k->type != KEY_TYPE_deleted);
+
+ if (bkey_gt(k.k->p, end)) {
+ ret = -BCH_ERR_ENOSPC_btree_slot;
+ goto err;
+ }
+
+ return 0;
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+}
+
+void bch2_trans_commit_hook(struct btree_trans *trans,
+ struct btree_trans_commit_hook *h)
+{
+ h->next = trans->hooks;
+ trans->hooks = h;
+}
+
+int bch2_btree_insert_nonextent(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_i *k,
+ enum btree_update_flags flags)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, btree, k->k.p,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, flags);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
+ struct bkey_i *k, enum btree_update_flags flags)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k),
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, flags);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/**
+ * bch2_btree_insert - insert keys into the extent btree
+ * @c: pointer to struct bch_fs
+ * @id: btree to insert into
+ * @k: key to insert
+ * @disk_res: must be non-NULL whenever inserting or potentially
+ * splitting data extents
+ * @flags: transaction commit flags
+ *
+ * Returns: 0 on success, error code on failure
+ */
+int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
+ struct disk_reservation *disk_res, int flags)
+{
+ return bch2_trans_do(c, disk_res, NULL, flags,
+ bch2_btree_insert_trans(trans, id, k, 0));
+}
+
+int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter,
+ unsigned len, unsigned update_flags)
+{
+ struct bkey_i *k;
+
+ k = bch2_trans_kmalloc(trans, sizeof(*k));
+ if (IS_ERR(k))
+ return PTR_ERR(k);
+
+ bkey_init(&k->k);
+ k->k.p = iter->pos;
+ bch2_key_resize(&k->k, len);
+ return bch2_trans_update(trans, iter, k, update_flags);
+}
+
+int bch2_btree_delete_at(struct btree_trans *trans,
+ struct btree_iter *iter, unsigned update_flags)
+{
+ return bch2_btree_delete_extent_at(trans, iter, 0, update_flags);
+}
+
+int bch2_btree_delete(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos,
+ unsigned update_flags)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, btree, pos,
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_btree_delete_at(trans, &iter, update_flags);
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
+ struct bpos start, struct bpos end,
+ unsigned update_flags,
+ u64 *journal_seq)
+{
+ u32 restart_count = trans->restart_count;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
+ while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(trans->c, 0);
+ struct bkey_i delete;
+
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ bkey_init(&delete.k);
+
+ /*
+ * This could probably be more efficient for extents:
+ */
+
+ /*
+ * For extents, iter.pos won't necessarily be the same as
+ * bkey_start_pos(k.k) (for non extents they always will be the
+ * same). It's important that we delete starting from iter.pos
+ * because the range we want to delete could start in the middle
+ * of k.
+ *
+ * (bch2_btree_iter_peek() does guarantee that iter.pos >=
+ * bkey_start_pos(k.k)).
+ */
+ delete.k.p = iter.pos;
+
+ if (iter.flags & BTREE_ITER_IS_EXTENTS)
+ bch2_key_resize(&delete.k,
+ bpos_min(end, k.k->p).offset -
+ iter.pos.offset);
+
+ ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?:
+ bch2_trans_commit(trans, &disk_res, journal_seq,
+ BCH_TRANS_COMMIT_no_enospc);
+ bch2_disk_reservation_put(trans->c, &disk_res);
+err:
+ /*
+ * the bch2_trans_begin() call is in a weird place because we
+ * need to call it after every transaction commit, to avoid path
+ * overflow, but don't want to call it if the delete operation
+ * is a no-op and we have no work to do:
+ */
+ bch2_trans_begin(trans);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
+/*
+ * bch_btree_delete_range - delete everything within a given range
+ *
+ * Range is a half open interval - [start, end)
+ */
+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
+ struct bpos start, struct bpos end,
+ unsigned update_flags,
+ u64 *journal_seq)
+{
+ int ret = bch2_trans_run(c,
+ bch2_btree_delete_range_trans(trans, id, start, end,
+ update_flags, journal_seq));
+ if (ret == -BCH_ERR_transaction_restart_nested)
+ ret = 0;
+ return ret;
+}
+
+int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
+ struct bpos pos, bool set)
+{
+ struct bkey_i k;
+
+ bkey_init(&k.k);
+ k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+ k.k.p = pos;
+
+ return bch2_trans_update_buffered(trans, btree, &k);
+}
+
+static int __bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf, unsigned u64s)
+{
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s));
+ int ret = PTR_ERR_OR_ZERO(e);
+ if (ret)
+ return ret;
+
+ struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry);
+ journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s);
+ memcpy(l->d, buf->buf, buf->pos);
+ return 0;
+}
+
+__printf(3, 0)
+static int
+__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
+ va_list args)
+{
+ struct printbuf buf = PRINTBUF;
+ prt_vprintf(&buf, fmt, args);
+
+ unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
+ prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos);
+
+ int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
+ if (ret)
+ goto err;
+
+ if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
+ ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s));
+ if (ret)
+ goto err;
+
+ struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
+ journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
+ memcpy(l->d, buf.buf, buf.pos);
+ c->journal.early_journal_entries.nr += jset_u64s(u64s);
+ } else {
+ ret = bch2_trans_do(c, NULL, NULL,
+ BCH_TRANS_COMMIT_lazy_rw|commit_flags,
+ __bch2_trans_log_msg(trans, &buf, u64s));
+ }
+err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+__printf(2, 3)
+int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = __bch2_fs_log_msg(c, 0, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+/*
+ * Use for logging messages during recovery to enable reserved space and avoid
+ * blocking.
+ */
+__printf(2, 3)
+int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args);
+ va_end(args);
+ return ret;
+}
diff --git a/c_src/libbcachefs/btree_update.h b/c_src/libbcachefs/btree_update.h
new file mode 100644
index 00000000..b9382b7b
--- /dev/null
+++ b/c_src/libbcachefs/btree_update.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_UPDATE_H
+#define _BCACHEFS_BTREE_UPDATE_H
+
+#include "btree_iter.h"
+#include "journal.h"
+
+struct bch_fs;
+struct btree;
+
+void bch2_btree_node_prep_for_write(struct btree_trans *,
+ struct btree_path *, struct btree *);
+bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
+ struct btree *, struct btree_node_iter *,
+ struct bkey_i *);
+
+int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
+int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
+
+void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
+ struct bkey_i *, u64);
+
+#define BCH_TRANS_COMMIT_FLAGS() \
+ x(no_enospc, "don't check for enospc") \
+ x(no_check_rw, "don't attempt to take a ref on c->writes") \
+ x(lazy_rw, "go read-write if we haven't yet - only for use in recovery") \
+ x(no_journal_res, "don't take a journal reservation, instead " \
+ "pin journal entry referred to by trans->journal_res.seq") \
+ x(journal_reclaim, "operation required for journal reclaim; may return error" \
+ "instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
+
+enum __bch_trans_commit_flags {
+ /* First bits for bch_watermark: */
+ __BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS,
+#define x(n, ...) __BCH_TRANS_COMMIT_##n,
+ BCH_TRANS_COMMIT_FLAGS()
+#undef x
+};
+
+enum bch_trans_commit_flags {
+#define x(n, ...) BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n),
+ BCH_TRANS_COMMIT_FLAGS()
+#undef x
+};
+
+int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
+ unsigned, unsigned);
+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
+
+int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
+ struct bkey_i *, enum btree_update_flags);
+
+int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
+ enum btree_update_flags);
+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
+ struct disk_reservation *, int flags);
+
+int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
+ struct bpos, struct bpos, unsigned, u64 *);
+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
+ struct bpos, struct bpos, unsigned, u64 *);
+
+int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
+
+static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
+ enum btree_id btree, struct bpos pos)
+{
+ return bch2_btree_bit_mod(trans, btree, pos, false);
+}
+
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
+ struct bpos, struct bpos);
+
+/*
+ * For use when splitting extents in existing snapshots:
+ *
+ * If @old_pos is an interior snapshot node, iterate over descendent snapshot
+ * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
+ * not visible, emit a whiteout at @new_pos.
+ */
+static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bpos old_pos,
+ struct bpos new_pos)
+{
+ if (!btree_type_has_snapshots(btree) ||
+ bkey_eq(old_pos, new_pos))
+ return 0;
+
+ return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
+}
+
+int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
+ enum btree_update_flags,
+ struct bkey_s_c, struct bkey_s_c);
+
+int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
+ enum btree_id, struct bpos);
+
+int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, enum btree_update_flags);
+
+struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
+
+static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans)
+{
+ return (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
+}
+
+static inline struct jset_entry *
+bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
+{
+ if (!trans->journal_entries ||
+ trans->journal_entries_u64s + u64s > trans->journal_entries_size)
+ return __bch2_trans_jset_entry_alloc(trans, u64s);
+
+ struct jset_entry *e = btree_trans_journal_entries_top(trans);
+ trans->journal_entries_u64s += u64s;
+ return e;
+}
+
+int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
+
+static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bkey_i *k)
+{
+ if (unlikely(trans->journal_replay_not_finished))
+ return bch2_btree_insert_clone_trans(trans, btree, k);
+
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
+ int ret = PTR_ERR_OR_ZERO(e);
+ if (ret)
+ return ret;
+
+ journal_entry_init(e, BCH_JSET_ENTRY_write_buffer_keys, btree, 0, k->k.u64s);
+ bkey_copy(e->start, k);
+ return 0;
+}
+
+void bch2_trans_commit_hook(struct btree_trans *,
+ struct btree_trans_commit_hook *);
+int __bch2_trans_commit(struct btree_trans *, unsigned);
+
+__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
+__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
+
+/**
+ * bch2_trans_commit - insert keys at given iterator positions
+ *
+ * This is main entry point for btree updates.
+ *
+ * Return values:
+ * -EROFS: filesystem read only
+ * -EIO: journal or btree node IO error
+ */
+static inline int bch2_trans_commit(struct btree_trans *trans,
+ struct disk_reservation *disk_res,
+ u64 *journal_seq,
+ unsigned flags)
+{
+ trans->disk_res = disk_res;
+ trans->journal_seq = journal_seq;
+
+ return __bch2_trans_commit(trans, flags);
+}
+
+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
+ lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_flags)))
+
+#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
+ nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_flags)))
+
+#define bch2_trans_run(_c, _do) \
+({ \
+ struct btree_trans *trans = bch2_trans_get(_c); \
+ int _ret = (_do); \
+ bch2_trans_put(trans); \
+ _ret; \
+})
+
+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
+ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
+
+#define trans_for_each_update(_trans, _i) \
+ for (struct btree_insert_entry *_i = (_trans)->updates; \
+ (_i) < (_trans)->updates + (_trans)->nr_updates; \
+ (_i)++)
+
+static inline void bch2_trans_reset_updates(struct btree_trans *trans)
+{
+ trans_for_each_update(trans, i)
+ bch2_path_put(trans, i->path, true);
+
+ trans->nr_updates = 0;
+ trans->journal_entries_u64s = 0;
+ trans->hooks = NULL;
+ trans->extra_disk_res = 0;
+
+ if (trans->fs_usage_deltas) {
+ trans->fs_usage_deltas->used = 0;
+ memset((void *) trans->fs_usage_deltas +
+ offsetof(struct replicas_delta_list, memset_start), 0,
+ (void *) &trans->fs_usage_deltas->memset_end -
+ (void *) &trans->fs_usage_deltas->memset_start);
+ }
+}
+
+static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
+ unsigned type, unsigned min_bytes)
+{
+ unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
+ struct bkey_i *mut;
+
+ if (type && k.k->type != type)
+ return ERR_PTR(-ENOENT);
+
+ mut = bch2_trans_kmalloc_nomemzero(trans, bytes);
+ if (!IS_ERR(mut)) {
+ bkey_reassemble(mut, k);
+
+ if (unlikely(bytes > bkey_bytes(k.k))) {
+ memset((void *) mut + bkey_bytes(k.k), 0,
+ bytes - bkey_bytes(k.k));
+ mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64));
+ }
+ }
+ return mut;
+}
+
+static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
+{
+ return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
+}
+
+#define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type) \
+ bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k, \
+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
+
+static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c *k, unsigned flags,
+ unsigned type, unsigned min_bytes)
+{
+ struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes);
+ int ret;
+
+ if (IS_ERR(mut))
+ return mut;
+
+ ret = bch2_trans_update(trans, iter, mut, flags);
+ if (ret)
+ return ERR_PTR(ret);
+
+ *k = bkey_i_to_s_c(mut);
+ return mut;
+}
+
+static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c *k, unsigned flags)
+{
+ return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0);
+}
+
+#define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type) \
+ bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\
+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
+
+static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags, unsigned type, unsigned min_bytes)
+{
+ struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter,
+ btree_id, pos, flags|BTREE_ITER_INTENT, type);
+ struct bkey_i *ret = IS_ERR(k.k)
+ ? ERR_CAST(k.k)
+ : __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes);
+ if (IS_ERR(ret))
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+}
+
+static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags)
+{
+ return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0);
+}
+
+static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags, unsigned type, unsigned min_bytes)
+{
+ struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter,
+ btree_id, pos, flags|BTREE_ITER_INTENT, type, min_bytes);
+ int ret;
+
+ if (IS_ERR(mut))
+ return mut;
+
+ ret = bch2_trans_update(trans, iter, mut, flags);
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
+ return ERR_PTR(ret);
+ }
+
+ return mut;
+}
+
+static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags, unsigned min_bytes)
+{
+ return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes);
+}
+
+static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
+ struct btree_iter *iter,
+ unsigned btree_id, struct bpos pos,
+ unsigned flags)
+{
+ return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0);
+}
+
+#define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
+ bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter, \
+ _btree_id, _pos, _flags, \
+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
+
+static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter,
+ unsigned flags, unsigned type, unsigned val_size)
+{
+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size);
+ int ret;
+
+ if (IS_ERR(k))
+ return k;
+
+ bkey_init(&k->k);
+ k->k.p = iter->pos;
+ k->k.type = type;
+ set_bkey_val_bytes(&k->k, val_size);
+
+ ret = bch2_trans_update(trans, iter, k, flags);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+ return k;
+}
+
+#define bch2_bkey_alloc(_trans, _iter, _flags, _type) \
+ bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags, \
+ KEY_TYPE_##_type, sizeof(struct bch_##_type)))
+
+#endif /* _BCACHEFS_BTREE_UPDATE_H */
diff --git a/c_src/libbcachefs/btree_update_interior.c b/c_src/libbcachefs/btree_update_interior.c
new file mode 100644
index 00000000..44f9dfa2
--- /dev/null
+++ b/c_src/libbcachefs/btree_update_interior.c
@@ -0,0 +1,2496 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+#include "btree_gc.h"
+#include "btree_journal_iter.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "buckets.h"
+#include "clock.h"
+#include "error.h"
+#include "extents.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+#include "keylist.h"
+#include "replicas.h"
+#include "super-io.h"
+#include "trace.h"
+
+#include <linux/random.h>
+
+static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
+ btree_path_idx_t, struct btree *,
+ struct keylist *, unsigned);
+static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
+
+static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
+ enum btree_id btree_id,
+ unsigned level,
+ struct bpos pos)
+{
+ btree_path_idx_t path_idx = bch2_path_get(trans, btree_id, pos, level + 1, level,
+ BTREE_ITER_NOPRESERVE|
+ BTREE_ITER_INTENT, _RET_IP_);
+ path_idx = bch2_btree_path_make_mut(trans, path_idx, true, _RET_IP_);
+
+ struct btree_path *path = trans->paths + path_idx;
+ bch2_btree_path_downgrade(trans, path);
+ __bch2_btree_path_unlock(trans, path);
+ return path_idx;
+}
+
+/* Debug code: */
+
+/*
+ * Verify that child nodes correctly span parent node's range:
+ */
+static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bpos next_node = b->data->min_key;
+ struct btree_node_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_btree_ptr_v2 bp;
+ struct bkey unpacked;
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+
+ BUG_ON(!b->c.level);
+
+ if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ return;
+
+ bch2_btree_node_iter_init_from_start(&iter, b);
+
+ while (1) {
+ k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
+ if (k.k->type != KEY_TYPE_btree_ptr_v2)
+ break;
+ bp = bkey_s_c_to_btree_ptr_v2(k);
+
+ if (!bpos_eq(next_node, bp.v->min_key)) {
+ bch2_dump_btree_node(c, b);
+ bch2_bpos_to_text(&buf1, next_node);
+ bch2_bpos_to_text(&buf2, bp.v->min_key);
+ panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
+ }
+
+ bch2_btree_node_iter_advance(&iter, b);
+
+ if (bch2_btree_node_iter_end(&iter)) {
+ if (!bpos_eq(k.k->p, b->key.k.p)) {
+ bch2_dump_btree_node(c, b);
+ bch2_bpos_to_text(&buf1, b->key.k.p);
+ bch2_bpos_to_text(&buf2, k.k->p);
+ panic("expected end %s got %s\n", buf1.buf, buf2.buf);
+ }
+ break;
+ }
+
+ next_node = bpos_successor(k.k->p);
+ }
+#endif
+}
+
+/* Calculate ideal packed bkey format for new btree nodes: */
+
+static void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
+{
+ struct bkey_packed *k;
+ struct bset_tree *t;
+ struct bkey uk;
+
+ for_each_bset(b, t)
+ bset_tree_for_each_key(b, t, k)
+ if (!bkey_deleted(k)) {
+ uk = bkey_unpack_key(b, k);
+ bch2_bkey_format_add_key(s, &uk);
+ }
+}
+
+static struct bkey_format bch2_btree_calc_format(struct btree *b)
+{
+ struct bkey_format_state s;
+
+ bch2_bkey_format_init(&s);
+ bch2_bkey_format_add_pos(&s, b->data->min_key);
+ bch2_bkey_format_add_pos(&s, b->data->max_key);
+ __bch2_btree_calc_format(&s, b);
+
+ return bch2_bkey_format_done(&s);
+}
+
+static size_t btree_node_u64s_with_format(struct btree_nr_keys nr,
+ struct bkey_format *old_f,
+ struct bkey_format *new_f)
+{
+ /* stupid integer promotion rules */
+ ssize_t delta =
+ (((int) new_f->key_u64s - old_f->key_u64s) *
+ (int) nr.packed_keys) +
+ (((int) new_f->key_u64s - BKEY_U64s) *
+ (int) nr.unpacked_keys);
+
+ BUG_ON(delta + nr.live_u64s < 0);
+
+ return nr.live_u64s + delta;
+}
+
+/**
+ * bch2_btree_node_format_fits - check if we could rewrite node with a new format
+ *
+ * @c: filesystem handle
+ * @b: btree node to rewrite
+ * @nr: number of keys for new node (i.e. b->nr)
+ * @new_f: bkey format to translate keys to
+ *
+ * Returns: true if all re-packed keys will be able to fit in a new node.
+ *
+ * Assumes all keys will successfully pack with the new format.
+ */
+static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
+ struct btree_nr_keys nr,
+ struct bkey_format *new_f)
+{
+ size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
+
+ return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
+}
+
+/* Btree node freeing/allocation: */
+
+static void __btree_node_free(struct btree_trans *trans, struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+
+ trace_and_count(c, btree_node_free, trans, b);
+
+ BUG_ON(btree_node_write_blocked(b));
+ BUG_ON(btree_node_dirty(b));
+ BUG_ON(btree_node_need_write(b));
+ BUG_ON(b == btree_node_root(c, b));
+ BUG_ON(b->ob.nr);
+ BUG_ON(!list_empty(&b->write_blocked));
+ BUG_ON(b->will_make_reachable);
+
+ clear_btree_node_noevict(b);
+
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+}
+
+static void bch2_btree_node_free_inmem(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b)
+{
+ struct bch_fs *c = trans->c;
+ unsigned i, level = b->c.level;
+
+ bch2_btree_node_lock_write_nofail(trans, path, &b->c);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ __btree_node_free(trans, b);
+ six_unlock_write(&b->c.lock);
+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
+
+ trans_for_each_path(trans, path, i)
+ if (path->l[level].b == b) {
+ btree_node_unlock(trans, path, level);
+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
+ }
+}
+
+static void bch2_btree_node_free_never_used(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree *b)
+{
+ struct bch_fs *c = as->c;
+ struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
+ struct btree_path *path;
+ unsigned i, level = b->c.level;
+
+ BUG_ON(!list_empty(&b->write_blocked));
+ BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
+
+ b->will_make_reachable = 0;
+ closure_put(&as->cl);
+
+ clear_btree_node_will_make_reachable(b);
+ clear_btree_node_accessed(b);
+ clear_btree_node_dirty_acct(c, b);
+ clear_btree_node_need_write(b);
+
+ mutex_lock(&c->btree_cache.lock);
+ list_del_init(&b->list);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_unlock(&c->btree_cache.lock);
+
+ BUG_ON(p->nr >= ARRAY_SIZE(p->b));
+ p->b[p->nr++] = b;
+
+ six_unlock_intent(&b->c.lock);
+
+ trans_for_each_path(trans, path, i)
+ if (path->l[level].b == b) {
+ btree_node_unlock(trans, path, level);
+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
+ }
+}
+
+static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
+ struct disk_reservation *res,
+ struct closure *cl,
+ bool interior_node,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct write_point *wp;
+ struct btree *b;
+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+ struct open_buckets obs = { .nr = 0 };
+ struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
+ unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
+ ? BTREE_NODE_RESERVE
+ : 0;
+ int ret;
+
+ mutex_lock(&c->btree_reserve_cache_lock);
+ if (c->btree_reserve_cache_nr > nr_reserve) {
+ struct btree_alloc *a =
+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
+
+ obs = a->ob;
+ bkey_copy(&tmp.k, &a->k);
+ mutex_unlock(&c->btree_reserve_cache_lock);
+ goto mem_alloc;
+ }
+ mutex_unlock(&c->btree_reserve_cache_lock);
+
+retry:
+ ret = bch2_alloc_sectors_start_trans(trans,
+ c->opts.metadata_target ?:
+ c->opts.foreground_target,
+ 0,
+ writepoint_ptr(&c->btree_write_point),
+ &devs_have,
+ res->nr_replicas,
+ c->opts.metadata_replicas_required,
+ watermark, 0, cl, &wp);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+
+ if (wp->sectors_free < btree_sectors(c)) {
+ struct open_bucket *ob;
+ unsigned i;
+
+ open_bucket_for_each(c, &wp->ptrs, ob, i)
+ if (ob->sectors_free < btree_sectors(c))
+ ob->sectors_free = 0;
+
+ bch2_alloc_sectors_done(c, wp);
+ goto retry;
+ }
+
+ bkey_btree_ptr_v2_init(&tmp.k);
+ bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false);
+
+ bch2_open_bucket_get(c, wp, &obs);
+ bch2_alloc_sectors_done(c, wp);
+mem_alloc:
+ b = bch2_btree_node_mem_alloc(trans, interior_node);
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+
+ /* we hold cannibalize_lock: */
+ BUG_ON(IS_ERR(b));
+ BUG_ON(b->ob.nr);
+
+ bkey_copy(&b->key, &tmp.k);
+ b->ob = obs;
+
+ return b;
+}
+
+static struct btree *bch2_btree_node_alloc(struct btree_update *as,
+ struct btree_trans *trans,
+ unsigned level)
+{
+ struct bch_fs *c = as->c;
+ struct btree *b;
+ struct prealloc_nodes *p = &as->prealloc_nodes[!!level];
+ int ret;
+
+ BUG_ON(level >= BTREE_MAX_DEPTH);
+ BUG_ON(!p->nr);
+
+ b = p->b[--p->nr];
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+
+ set_btree_node_accessed(b);
+ set_btree_node_dirty_acct(c, b);
+ set_btree_node_need_write(b);
+
+ bch2_bset_init_first(b, &b->data->keys);
+ b->c.level = level;
+ b->c.btree_id = as->btree_id;
+ b->version_ondisk = c->sb.version;
+
+ memset(&b->nr, 0, sizeof(b->nr));
+ b->data->magic = cpu_to_le64(bset_magic(c));
+ memset(&b->data->_ptr, 0, sizeof(b->data->_ptr));
+ b->data->flags = 0;
+ SET_BTREE_NODE_ID(b->data, as->btree_id);
+ SET_BTREE_NODE_LEVEL(b->data, level);
+
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+ struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
+
+ bp->v.mem_ptr = 0;
+ bp->v.seq = b->data->keys.seq;
+ bp->v.sectors_written = 0;
+ }
+
+ SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
+
+ bch2_btree_build_aux_trees(b);
+
+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
+ BUG_ON(ret);
+
+ trace_and_count(c, btree_node_alloc, trans, b);
+ bch2_increment_clock(c, btree_sectors(c), WRITE);
+ return b;
+}
+
+static void btree_set_min(struct btree *b, struct bpos pos)
+{
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
+ b->data->min_key = pos;
+}
+
+static void btree_set_max(struct btree *b, struct bpos pos)
+{
+ b->key.k.p = pos;
+ b->data->max_key = pos;
+}
+
+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree *b)
+{
+ struct btree *n = bch2_btree_node_alloc(as, trans, b->c.level);
+ struct bkey_format format = bch2_btree_calc_format(b);
+
+ /*
+ * The keys might expand with the new format - if they wouldn't fit in
+ * the btree node anymore, use the old format for now:
+ */
+ if (!bch2_btree_node_format_fits(as->c, b, b->nr, &format))
+ format = b->format;
+
+ SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
+
+ btree_set_min(n, b->data->min_key);
+ btree_set_max(n, b->data->max_key);
+
+ n->data->format = format;
+ btree_node_set_format(n, format);
+
+ bch2_btree_sort_into(as->c, n, b);
+
+ btree_node_reset_sib_u64s(n);
+ return n;
+}
+
+static struct btree *__btree_root_alloc(struct btree_update *as,
+ struct btree_trans *trans, unsigned level)
+{
+ struct btree *b = bch2_btree_node_alloc(as, trans, level);
+
+ btree_set_min(b, POS_MIN);
+ btree_set_max(b, SPOS_MAX);
+ b->data->format = bch2_btree_calc_format(b);
+
+ btree_node_set_format(b, b->data->format);
+ bch2_btree_build_aux_trees(b);
+
+ return b;
+}
+
+static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
+{
+ struct bch_fs *c = as->c;
+ struct prealloc_nodes *p;
+
+ for (p = as->prealloc_nodes;
+ p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes);
+ p++) {
+ while (p->nr) {
+ struct btree *b = p->b[--p->nr];
+
+ mutex_lock(&c->btree_reserve_cache_lock);
+
+ if (c->btree_reserve_cache_nr <
+ ARRAY_SIZE(c->btree_reserve_cache)) {
+ struct btree_alloc *a =
+ &c->btree_reserve_cache[c->btree_reserve_cache_nr++];
+
+ a->ob = b->ob;
+ b->ob.nr = 0;
+ bkey_copy(&a->k, &b->key);
+ } else {
+ bch2_open_buckets_put(c, &b->ob);
+ }
+
+ mutex_unlock(&c->btree_reserve_cache_lock);
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+ __btree_node_free(trans, b);
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ }
+ }
+}
+
+static int bch2_btree_reserve_get(struct btree_trans *trans,
+ struct btree_update *as,
+ unsigned nr_nodes[2],
+ unsigned flags,
+ struct closure *cl)
+{
+ struct btree *b;
+ unsigned interior;
+ int ret = 0;
+
+ BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
+
+ /*
+ * Protects reaping from the btree node cache and using the btree node
+ * open bucket reserve:
+ */
+ ret = bch2_btree_cache_cannibalize_lock(trans, cl);
+ if (ret)
+ return ret;
+
+ for (interior = 0; interior < 2; interior++) {
+ struct prealloc_nodes *p = as->prealloc_nodes + interior;
+
+ while (p->nr < nr_nodes[interior]) {
+ b = __bch2_btree_node_alloc(trans, &as->disk_res, cl,
+ interior, flags);
+ if (IS_ERR(b)) {
+ ret = PTR_ERR(b);
+ goto err;
+ }
+
+ p->b[p->nr++] = b;
+ }
+ }
+err:
+ bch2_btree_cache_cannibalize_unlock(trans);
+ return ret;
+}
+
+/* Asynchronous interior node update machinery */
+
+static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
+{
+ struct bch_fs *c = as->c;
+
+ if (as->took_gc_lock)
+ up_read(&c->gc_lock);
+ as->took_gc_lock = false;
+
+ bch2_journal_pin_drop(&c->journal, &as->journal);
+ bch2_journal_pin_flush(&c->journal, &as->journal);
+ bch2_disk_reservation_put(c, &as->disk_res);
+ bch2_btree_reserve_put(as, trans);
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
+ as->start_time);
+
+ mutex_lock(&c->btree_interior_update_lock);
+ list_del(&as->unwritten_list);
+ list_del(&as->list);
+
+ closure_debug_destroy(&as->cl);
+ mempool_free(as, &c->btree_interior_update_pool);
+
+ /*
+ * Have to do the wakeup with btree_interior_update_lock still held,
+ * since being on btree_interior_update_list is our ref on @c:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
+
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+
+static void btree_update_add_key(struct btree_update *as,
+ struct keylist *keys, struct btree *b)
+{
+ struct bkey_i *k = &b->key;
+
+ BUG_ON(bch2_keylist_u64s(keys) + k->k.u64s >
+ ARRAY_SIZE(as->_old_keys));
+
+ bkey_copy(keys->top, k);
+ bkey_i_to_btree_ptr_v2(keys->top)->v.mem_ptr = b->c.level + 1;
+
+ bch2_keylist_push(keys);
+}
+
+/*
+ * The transactional part of an interior btree node update, where we journal the
+ * update we did to the interior node and update alloc info:
+ */
+static int btree_update_nodes_written_trans(struct btree_trans *trans,
+ struct btree_update *as)
+{
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, as->journal_u64s);
+ int ret = PTR_ERR_OR_ZERO(e);
+ if (ret)
+ return ret;
+
+ memcpy(e, as->journal_entries, as->journal_u64s * sizeof(u64));
+
+ trans->journal_pin = &as->journal;
+
+ for_each_keylist_key(&as->old_keys, k) {
+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
+
+ ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k),
+ BTREE_TRIGGER_TRANSACTIONAL);
+ if (ret)
+ return ret;
+ }
+
+ for_each_keylist_key(&as->new_keys, k) {
+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
+
+ ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k),
+ BTREE_TRIGGER_TRANSACTIONAL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void btree_update_nodes_written(struct btree_update *as)
+{
+ struct bch_fs *c = as->c;
+ struct btree *b;
+ struct btree_trans *trans = bch2_trans_get(c);
+ u64 journal_seq = 0;
+ unsigned i;
+ int ret;
+
+ /*
+ * If we're already in an error state, it might be because a btree node
+ * was never written, and we might be trying to free that same btree
+ * node here, but it won't have been marked as allocated and we'll see
+ * spurious disk usage inconsistencies in the transactional part below
+ * if we don't skip it:
+ */
+ ret = bch2_journal_error(&c->journal);
+ if (ret)
+ goto err;
+
+ /*
+ * Wait for any in flight writes to finish before we free the old nodes
+ * on disk:
+ */
+ for (i = 0; i < as->nr_old_nodes; i++) {
+ __le64 seq;
+
+ b = as->old_nodes[i];
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ seq = b->data ? b->data->keys.seq : 0;
+ six_unlock_read(&b->c.lock);
+
+ if (seq == as->old_nodes_seq[i])
+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
+ TASK_UNINTERRUPTIBLE);
+ }
+
+ /*
+ * We did an update to a parent node where the pointers we added pointed
+ * to child nodes that weren't written yet: now, the child nodes have
+ * been written so we can write out the update to the interior node.
+ */
+
+ /*
+ * We can't call into journal reclaim here: we'd block on the journal
+ * reclaim lock, but we may need to release the open buckets we have
+ * pinned in order for other btree updates to make forward progress, and
+ * journal reclaim does btree updates when flushing bkey_cached entries,
+ * which may require allocations as well.
+ */
+ ret = commit_do(trans, &as->disk_res, &journal_seq,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_journal_reclaim,
+ btree_update_nodes_written_trans(trans, as));
+ bch2_trans_unlock(trans);
+
+ bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
+ "%s(): error %s", __func__, bch2_err_str(ret));
+err:
+ if (as->b) {
+
+ b = as->b;
+ btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
+ as->btree_id, b->c.level, b->key.k.p);
+ struct btree_path *path = trans->paths + path_idx;
+ /*
+ * @b is the node we did the final insert into:
+ *
+ * On failure to get a journal reservation, we still have to
+ * unblock the write and allow most of the write path to happen
+ * so that shutdown works, but the i->journal_seq mechanism
+ * won't work to prevent the btree write from being visible (we
+ * didn't get a journal sequence number) - instead
+ * __bch2_btree_node_write() doesn't do the actual write if
+ * we're in journal error state:
+ */
+
+ /*
+ * Ensure transaction is unlocked before using
+ * btree_node_lock_nopath() (the use of which is always suspect,
+ * we need to work on removing this in the future)
+ *
+ * It should be, but get_unlocked_mut_path() -> bch2_path_get()
+ * calls bch2_path_upgrade(), before we call path_make_mut(), so
+ * we may rarely end up with a locked path besides the one we
+ * have here:
+ */
+ bch2_trans_unlock(trans);
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED);
+ path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
+ path->l[b->c.level].b = b;
+
+ bch2_btree_node_lock_write_nofail(trans, path, &b->c);
+
+ mutex_lock(&c->btree_interior_update_lock);
+
+ list_del(&as->write_blocked_list);
+ if (list_empty(&b->write_blocked))
+ clear_btree_node_write_blocked(b);
+
+ /*
+ * Node might have been freed, recheck under
+ * btree_interior_update_lock:
+ */
+ if (as->b == b) {
+ BUG_ON(!b->c.level);
+ BUG_ON(!btree_node_dirty(b));
+
+ if (!ret) {
+ struct bset *last = btree_bset_last(b);
+
+ last->journal_seq = cpu_to_le64(
+ max(journal_seq,
+ le64_to_cpu(last->journal_seq)));
+
+ bch2_btree_add_journal_pin(c, b, journal_seq);
+ } else {
+ /*
+ * If we didn't get a journal sequence number we
+ * can't write this btree node, because recovery
+ * won't know to ignore this write:
+ */
+ set_btree_node_never_write(b);
+ }
+ }
+
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED);
+ six_unlock_write(&b->c.lock);
+
+ btree_node_write_if_need(c, b, SIX_LOCK_intent);
+ btree_node_unlock(trans, path, b->c.level);
+ bch2_path_put(trans, path_idx, true);
+ }
+
+ bch2_journal_pin_drop(&c->journal, &as->journal);
+
+ mutex_lock(&c->btree_interior_update_lock);
+ for (i = 0; i < as->nr_new_nodes; i++) {
+ b = as->new_nodes[i];
+
+ BUG_ON(b->will_make_reachable != (unsigned long) as);
+ b->will_make_reachable = 0;
+ clear_btree_node_will_make_reachable(b);
+ }
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ for (i = 0; i < as->nr_new_nodes; i++) {
+ b = as->new_nodes[i];
+
+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
+ btree_node_write_if_need(c, b, SIX_LOCK_read);
+ six_unlock_read(&b->c.lock);
+ }
+
+ for (i = 0; i < as->nr_open_buckets; i++)
+ bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
+
+ bch2_btree_update_free(as, trans);
+ bch2_trans_put(trans);
+}
+
+static void btree_interior_update_work(struct work_struct *work)
+{
+ struct bch_fs *c =
+ container_of(work, struct bch_fs, btree_interior_update_work);
+ struct btree_update *as;
+
+ while (1) {
+ mutex_lock(&c->btree_interior_update_lock);
+ as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
+ struct btree_update, unwritten_list);
+ if (as && !as->nodes_written)
+ as = NULL;
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ if (!as)
+ break;
+
+ btree_update_nodes_written(as);
+ }
+}
+
+static CLOSURE_CALLBACK(btree_update_set_nodes_written)
+{
+ closure_type(as, struct btree_update, cl);
+ struct bch_fs *c = as->c;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ as->nodes_written = true;
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
+}
+
+/*
+ * We're updating @b with pointers to nodes that haven't finished writing yet:
+ * block @b from being written until @as completes
+ */
+static void btree_update_updated_node(struct btree_update *as, struct btree *b)
+{
+ struct bch_fs *c = as->c;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+ BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(!btree_node_dirty(b));
+ BUG_ON(!b->c.level);
+
+ as->mode = BTREE_INTERIOR_UPDATING_NODE;
+ as->b = b;
+
+ set_btree_node_write_blocked(b);
+ list_add(&as->write_blocked_list, &b->write_blocked);
+
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+
+static int bch2_update_reparent_journal_pin_flush(struct journal *j,
+ struct journal_entry_pin *_pin, u64 seq)
+{
+ return 0;
+}
+
+static void btree_update_reparent(struct btree_update *as,
+ struct btree_update *child)
+{
+ struct bch_fs *c = as->c;
+
+ lockdep_assert_held(&c->btree_interior_update_lock);
+
+ child->b = NULL;
+ child->mode = BTREE_INTERIOR_UPDATING_AS;
+
+ bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
+ bch2_update_reparent_journal_pin_flush);
+}
+
+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
+{
+ struct bkey_i *insert = &b->key;
+ struct bch_fs *c = as->c;
+
+ BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+
+ BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
+ ARRAY_SIZE(as->journal_entries));
+
+ as->journal_u64s +=
+ journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
+ BCH_JSET_ENTRY_btree_root,
+ b->c.btree_id, b->c.level,
+ insert, insert->k.u64s);
+
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+ as->mode = BTREE_INTERIOR_UPDATING_ROOT;
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+
+/*
+ * bch2_btree_update_add_new_node:
+ *
+ * This causes @as to wait on @b to be written, before it gets to
+ * bch2_btree_update_nodes_written
+ *
+ * Additionally, it sets b->will_make_reachable to prevent any additional writes
+ * to @b from happening besides the first until @b is reachable on disk
+ *
+ * And it adds @b to the list of @as's new nodes, so that we can update sector
+ * counts in bch2_btree_update_nodes_written:
+ */
+static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b)
+{
+ struct bch_fs *c = as->c;
+
+ closure_get(&as->cl);
+
+ mutex_lock(&c->btree_interior_update_lock);
+ BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes));
+ BUG_ON(b->will_make_reachable);
+
+ as->new_nodes[as->nr_new_nodes++] = b;
+ b->will_make_reachable = 1UL|(unsigned long) as;
+ set_btree_node_will_make_reachable(b);
+
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ btree_update_add_key(as, &as->new_keys, b);
+
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+ unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data;
+ unsigned sectors = round_up(bytes, block_bytes(c)) >> 9;
+
+ bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+ cpu_to_le16(sectors);
+ }
+}
+
+/*
+ * returns true if @b was a new node
+ */
+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
+{
+ struct btree_update *as;
+ unsigned long v;
+ unsigned i;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ /*
+ * When b->will_make_reachable != 0, it owns a ref on as->cl that's
+ * dropped when it gets written by bch2_btree_complete_write - the
+ * xchg() is for synchronization with bch2_btree_complete_write:
+ */
+ v = xchg(&b->will_make_reachable, 0);
+ clear_btree_node_will_make_reachable(b);
+ as = (struct btree_update *) (v & ~1UL);
+
+ if (!as) {
+ mutex_unlock(&c->btree_interior_update_lock);
+ return;
+ }
+
+ for (i = 0; i < as->nr_new_nodes; i++)
+ if (as->new_nodes[i] == b)
+ goto found;
+
+ BUG();
+found:
+ array_remove_item(as->new_nodes, as->nr_new_nodes, i);
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ if (v & 1)
+ closure_put(&as->cl);
+}
+
+static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b)
+{
+ while (b->ob.nr)
+ as->open_buckets[as->nr_open_buckets++] =
+ b->ob.v[--b->ob.nr];
+}
+
+static int bch2_btree_update_will_free_node_journal_pin_flush(struct journal *j,
+ struct journal_entry_pin *_pin, u64 seq)
+{
+ return 0;
+}
+
+/*
+ * @b is being split/rewritten: it may have pointers to not-yet-written btree
+ * nodes and thus outstanding btree_updates - redirect @b's
+ * btree_updates to point to this btree_update:
+ */
+static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
+ struct btree *b)
+{
+ struct bch_fs *c = as->c;
+ struct btree_update *p, *n;
+ struct btree_write *w;
+
+ set_btree_node_dying(b);
+
+ if (btree_node_fake(b))
+ return;
+
+ mutex_lock(&c->btree_interior_update_lock);
+
+ /*
+ * Does this node have any btree_update operations preventing
+ * it from being written?
+ *
+ * If so, redirect them to point to this btree_update: we can
+ * write out our new nodes, but we won't make them visible until those
+ * operations complete
+ */
+ list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) {
+ list_del_init(&p->write_blocked_list);
+ btree_update_reparent(as, p);
+
+ /*
+ * for flush_held_btree_writes() waiting on updates to flush or
+ * nodes to be writeable:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
+ }
+
+ clear_btree_node_dirty_acct(c, b);
+ clear_btree_node_need_write(b);
+ clear_btree_node_write_blocked(b);
+
+ /*
+ * Does this node have unwritten data that has a pin on the journal?
+ *
+ * If so, transfer that pin to the btree_update operation -
+ * note that if we're freeing multiple nodes, we only need to keep the
+ * oldest pin of any of the nodes we're freeing. We'll release the pin
+ * when the new nodes are persistent and reachable on disk:
+ */
+ w = btree_current_write(b);
+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal,
+ bch2_btree_update_will_free_node_journal_pin_flush);
+ bch2_journal_pin_drop(&c->journal, &w->journal);
+
+ w = btree_prev_write(b);
+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal,
+ bch2_btree_update_will_free_node_journal_pin_flush);
+ bch2_journal_pin_drop(&c->journal, &w->journal);
+
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ /*
+ * Is this a node that isn't reachable on disk yet?
+ *
+ * Nodes that aren't reachable yet have writes blocked until they're
+ * reachable - now that we've cancelled any pending writes and moved
+ * things waiting on that write to wait on this update, we can drop this
+ * node from the list of nodes that the other update is making
+ * reachable, prior to freeing it:
+ */
+ btree_update_drop_new_node(c, b);
+
+ btree_update_add_key(as, &as->old_keys, b);
+
+ as->old_nodes[as->nr_old_nodes] = b;
+ as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
+ as->nr_old_nodes++;
+}
+
+static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
+{
+ struct bch_fs *c = as->c;
+ u64 start_time = as->start_time;
+
+ BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+
+ if (as->took_gc_lock)
+ up_read(&as->c->gc_lock);
+ as->took_gc_lock = false;
+
+ bch2_btree_reserve_put(as, trans);
+
+ continue_at(&as->cl, btree_update_set_nodes_written,
+ as->c->btree_interior_update_worker);
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground],
+ start_time);
+}
+
+static struct btree_update *
+bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
+ unsigned level, bool split, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_update *as;
+ u64 start_time = local_clock();
+ int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
+ ? BCH_DISK_RESERVATION_NOFAIL : 0;
+ unsigned nr_nodes[2] = { 0, 0 };
+ unsigned update_level = level;
+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
+ int ret = 0;
+ u32 restart_count = trans->restart_count;
+
+ BUG_ON(!path->should_be_locked);
+
+ if (watermark == BCH_WATERMARK_copygc)
+ watermark = BCH_WATERMARK_btree_copygc;
+ if (watermark < BCH_WATERMARK_btree)
+ watermark = BCH_WATERMARK_btree;
+
+ flags &= ~BCH_WATERMARK_MASK;
+ flags |= watermark;
+
+ if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+ watermark < c->journal.watermark) {
+ struct journal_res res = { 0 };
+
+ ret = drop_locks_do(trans,
+ bch2_journal_res_get(&c->journal, &res, 1,
+ watermark|JOURNAL_RES_GET_CHECK));
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ while (1) {
+ nr_nodes[!!update_level] += 1 + split;
+ update_level++;
+
+ ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!btree_path_node(path, update_level)) {
+ /* Allocating new root? */
+ nr_nodes[1] += split;
+ update_level = BTREE_MAX_DEPTH;
+ break;
+ }
+
+ /*
+ * Always check for space for two keys, even if we won't have to
+ * split at prior level - it might have been a merge instead:
+ */
+ if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
+ BKEY_BTREE_PTR_U64s_MAX * 2))
+ break;
+
+ split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+ }
+
+ if (!down_read_trylock(&c->gc_lock)) {
+ ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0));
+ if (ret) {
+ up_read(&c->gc_lock);
+ return ERR_PTR(ret);
+ }
+ }
+
+ as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
+ memset(as, 0, sizeof(*as));
+ closure_init(&as->cl, NULL);
+ as->c = c;
+ as->start_time = start_time;
+ as->mode = BTREE_INTERIOR_NO_UPDATE;
+ as->took_gc_lock = true;
+ as->btree_id = path->btree_id;
+ as->update_level = update_level;
+ INIT_LIST_HEAD(&as->list);
+ INIT_LIST_HEAD(&as->unwritten_list);
+ INIT_LIST_HEAD(&as->write_blocked_list);
+ bch2_keylist_init(&as->old_keys, as->_old_keys);
+ bch2_keylist_init(&as->new_keys, as->_new_keys);
+ bch2_keylist_init(&as->parent_keys, as->inline_keys);
+
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->list, &c->btree_interior_update_list);
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ /*
+ * We don't want to allocate if we're in an error state, that can cause
+ * deadlock on emergency shutdown due to open buckets getting stuck in
+ * the btree_reserve_cache after allocator shutdown has cleared it out.
+ * This check needs to come after adding us to the btree_interior_update
+ * list but before calling bch2_btree_reserve_get, to synchronize with
+ * __bch2_fs_read_only().
+ */
+ ret = bch2_journal_error(&c->journal);
+ if (ret)
+ goto err;
+
+ ret = bch2_disk_reservation_get(c, &as->disk_res,
+ (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
+ c->opts.metadata_replicas,
+ disk_res_flags);
+ if (ret)
+ goto err;
+
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
+ if (bch2_err_matches(ret, ENOSPC) ||
+ bch2_err_matches(ret, ENOMEM)) {
+ struct closure cl;
+
+ /*
+ * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK
+ * flag
+ */
+ if (bch2_err_matches(ret, ENOSPC) &&
+ (flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+ watermark != BCH_WATERMARK_reclaim) {
+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ goto err;
+ }
+
+ closure_init_stack(&cl);
+
+ do {
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
+
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
+ }
+
+ if (ret) {
+ trace_and_count(c, btree_reserve_get_fail, trans->fn,
+ _RET_IP_, nr_nodes[0] + nr_nodes[1], ret);
+ goto err;
+ }
+
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ goto err;
+
+ bch2_trans_verify_not_restarted(trans, restart_count);
+ return as;
+err:
+ bch2_btree_update_free(as, trans);
+ if (!bch2_err_matches(ret, ENOSPC) &&
+ !bch2_err_matches(ret, EROFS))
+ bch_err_fn_ratelimited(c, ret);
+ return ERR_PTR(ret);
+}
+
+/* Btree root updates: */
+
+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
+{
+ /* Root nodes cannot be reaped */
+ mutex_lock(&c->btree_cache.lock);
+ list_del_init(&b->list);
+ mutex_unlock(&c->btree_cache.lock);
+
+ mutex_lock(&c->btree_root_lock);
+ BUG_ON(btree_node_root(c, b) &&
+ (b->c.level < btree_node_root(c, b)->c.level ||
+ !btree_node_dying(btree_node_root(c, b))));
+
+ bch2_btree_id_root(c, b->c.btree_id)->b = b;
+ mutex_unlock(&c->btree_root_lock);
+
+ bch2_recalc_btree_reserve(c);
+}
+
+static void bch2_btree_set_root(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b)
+{
+ struct bch_fs *c = as->c;
+ struct btree *old;
+
+ trace_and_count(c, btree_node_set_root, trans, b);
+
+ old = btree_node_root(c, b);
+
+ /*
+ * Ensure no one is using the old root while we switch to the
+ * new root:
+ */
+ bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+
+ bch2_btree_set_root_inmem(c, b);
+
+ btree_update_updated_root(as, b);
+
+ /*
+ * Unlock old root after new root is visible:
+ *
+ * The new root isn't persistent, but that's ok: we still have
+ * an intent lock on the new root, and any updates that would
+ * depend on the new root would have to update the new root.
+ */
+ bch2_btree_node_unlock_write(trans, path, old);
+}
+
+/* Interior node updates: */
+
+static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter *node_iter,
+ struct bkey_i *insert)
+{
+ struct bch_fs *c = as->c;
+ struct bkey_packed *k;
+ struct printbuf buf = PRINTBUF;
+ unsigned long old, new, v;
+
+ BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
+ !btree_ptr_sectors_written(insert));
+
+ if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
+ bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
+
+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
+ btree_node_type(b), WRITE, &buf) ?:
+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "inserting invalid bkey\n ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+ prt_printf(&buf, "\n ");
+ bch2_bkey_invalid(c, bkey_i_to_s_c(insert),
+ btree_node_type(b), WRITE, &buf);
+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf);
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ dump_stack();
+ }
+
+ BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
+ ARRAY_SIZE(as->journal_entries));
+
+ as->journal_u64s +=
+ journal_entry_set((void *) &as->journal_entries[as->journal_u64s],
+ BCH_JSET_ENTRY_btree_keys,
+ b->c.btree_id, b->c.level,
+ insert, insert->k.u64s);
+
+ while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
+ bkey_iter_pos_cmp(b, k, &insert->k.p) < 0)
+ bch2_btree_node_iter_advance(node_iter, b);
+
+ bch2_btree_bset_insert_key(trans, path, b, node_iter, insert);
+ set_btree_node_dirty_acct(c, b);
+
+ v = READ_ONCE(b->flags);
+ do {
+ old = new = v;
+
+ new &= ~BTREE_WRITE_TYPE_MASK;
+ new |= BTREE_WRITE_interior;
+ new |= 1 << BTREE_NODE_need_write;
+ } while ((v = cmpxchg(&b->flags, old, new)) != old);
+
+ printbuf_exit(&buf);
+}
+
+static void
+__bch2_btree_insert_keys_interior(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct keylist *keys)
+{
+ struct bkey_i *insert = bch2_keylist_front(keys);
+ struct bkey_packed *k;
+
+ BUG_ON(btree_node_type(b) != BKEY_TYPE_btree);
+
+ while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) &&
+ (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0))
+ ;
+
+ while (!bch2_keylist_empty(keys)) {
+ insert = bch2_keylist_front(keys);
+
+ if (bpos_gt(insert->k.p, b->key.k.p))
+ break;
+
+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert);
+ bch2_keylist_pop_front(keys);
+ }
+}
+
+/*
+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher
+ * node)
+ */
+static void __btree_split_node(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree *b,
+ struct btree *n[2])
+{
+ struct bkey_packed *k;
+ struct bpos n1_pos = POS_MIN;
+ struct btree_node_iter iter;
+ struct bset *bsets[2];
+ struct bkey_format_state format[2];
+ struct bkey_packed *out[2];
+ struct bkey uk;
+ unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5;
+ struct { unsigned nr_keys, val_u64s; } nr_keys[2];
+ int i;
+
+ memset(&nr_keys, 0, sizeof(nr_keys));
+
+ for (i = 0; i < 2; i++) {
+ BUG_ON(n[i]->nsets != 1);
+
+ bsets[i] = btree_bset_first(n[i]);
+ out[i] = bsets[i]->start;
+
+ SET_BTREE_NODE_SEQ(n[i]->data, BTREE_NODE_SEQ(b->data) + 1);
+ bch2_bkey_format_init(&format[i]);
+ }
+
+ u64s = 0;
+ for_each_btree_node_key(b, k, &iter) {
+ if (bkey_deleted(k))
+ continue;
+
+ i = u64s >= n1_u64s;
+ u64s += k->u64s;
+ uk = bkey_unpack_key(b, k);
+ if (!i)
+ n1_pos = uk.p;
+ bch2_bkey_format_add_key(&format[i], &uk);
+
+ nr_keys[i].nr_keys++;
+ nr_keys[i].val_u64s += bkeyp_val_u64s(&b->format, k);
+ }
+
+ btree_set_min(n[0], b->data->min_key);
+ btree_set_max(n[0], n1_pos);
+ btree_set_min(n[1], bpos_successor(n1_pos));
+ btree_set_max(n[1], b->data->max_key);
+
+ for (i = 0; i < 2; i++) {
+ bch2_bkey_format_add_pos(&format[i], n[i]->data->min_key);
+ bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key);
+
+ n[i]->data->format = bch2_bkey_format_done(&format[i]);
+
+ unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
+ nr_keys[i].val_u64s;
+ if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c))
+ n[i]->data->format = b->format;
+
+ btree_node_set_format(n[i], n[i]->data->format);
+ }
+
+ u64s = 0;
+ for_each_btree_node_key(b, k, &iter) {
+ if (bkey_deleted(k))
+ continue;
+
+ i = u64s >= n1_u64s;
+ u64s += k->u64s;
+
+ if (bch2_bkey_transform(&n[i]->format, out[i], bkey_packed(k)
+ ? &b->format: &bch2_bkey_format_current, k))
+ out[i]->format = KEY_FORMAT_LOCAL_BTREE;
+ else
+ bch2_bkey_unpack(b, (void *) out[i], k);
+
+ out[i]->needs_whiteout = false;
+
+ btree_keys_account_key_add(&n[i]->nr, 0, out[i]);
+ out[i] = bkey_p_next(out[i]);
+ }
+
+ for (i = 0; i < 2; i++) {
+ bsets[i]->u64s = cpu_to_le16((u64 *) out[i] - bsets[i]->_data);
+
+ BUG_ON(!bsets[i]->u64s);
+
+ set_btree_bset_end(n[i], n[i]->set);
+
+ btree_node_reset_sib_u64s(n[i]);
+
+ bch2_verify_btree_nr_keys(n[i]);
+
+ if (b->c.level)
+ btree_node_interior_verify(as->c, n[i]);
+ }
+}
+
+/*
+ * For updates to interior nodes, we've got to do the insert before we split
+ * because the stuff we're inserting has to be inserted atomically. Post split,
+ * the keys might have to go in different nodes and the split would no longer be
+ * atomic.
+ *
+ * Worse, if the insert is from btree node coalescing, if we do the insert after
+ * we do the split (and pick the pivot) - the pivot we pick might be between
+ * nodes that were coalesced, and thus in the middle of a child node post
+ * coalescing:
+ */
+static void btree_split_insert_keys(struct btree_update *as,
+ struct btree_trans *trans,
+ btree_path_idx_t path_idx,
+ struct btree *b,
+ struct keylist *keys)
+{
+ struct btree_path *path = trans->paths + path_idx;
+
+ if (!bch2_keylist_empty(keys) &&
+ bpos_le(bch2_keylist_front(keys)->k.p, b->data->max_key)) {
+ struct btree_node_iter node_iter;
+
+ bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
+
+ __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+
+ btree_node_interior_verify(as->c, b);
+ }
+}
+
+static int btree_split(struct btree_update *as, struct btree_trans *trans,
+ btree_path_idx_t path, struct btree *b,
+ struct keylist *keys, unsigned flags)
+{
+ struct bch_fs *c = as->c;
+ struct btree *parent = btree_node_parent(trans->paths + path, b);
+ struct btree *n1, *n2 = NULL, *n3 = NULL;
+ btree_path_idx_t path1 = 0, path2 = 0;
+ u64 start_time = local_clock();
+ int ret = 0;
+
+ BUG_ON(!parent && (b != btree_node_root(c, b)));
+ BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
+
+ bch2_btree_interior_update_will_free_node(as, b);
+
+ if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
+ struct btree *n[2];
+
+ trace_and_count(c, btree_node_split, trans, b);
+
+ n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
+ n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
+
+ __btree_split_node(as, trans, b, n);
+
+ if (keys) {
+ btree_split_insert_keys(as, trans, path, n1, keys);
+ btree_split_insert_keys(as, trans, path, n2, keys);
+ BUG_ON(!bch2_keylist_empty(keys));
+ }
+
+ bch2_btree_build_aux_trees(n2);
+ bch2_btree_build_aux_trees(n1);
+
+ bch2_btree_update_add_new_node(as, n1);
+ bch2_btree_update_add_new_node(as, n2);
+ six_unlock_write(&n2->c.lock);
+ six_unlock_write(&n1->c.lock);
+
+ path1 = get_unlocked_mut_path(trans, as->btree_id, n1->c.level, n1->key.k.p);
+ six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
+ bch2_btree_path_level_init(trans, trans->paths + path1, n1);
+
+ path2 = get_unlocked_mut_path(trans, as->btree_id, n2->c.level, n2->key.k.p);
+ six_lock_increment(&n2->c.lock, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, trans->paths + path2, n2->c.level, BTREE_NODE_INTENT_LOCKED);
+ bch2_btree_path_level_init(trans, trans->paths + path2, n2);
+
+ /*
+ * Note that on recursive parent_keys == keys, so we
+ * can't start adding new keys to parent_keys before emptying it
+ * out (which we did with btree_split_insert_keys() above)
+ */
+ bch2_keylist_add(&as->parent_keys, &n1->key);
+ bch2_keylist_add(&as->parent_keys, &n2->key);
+
+ if (!parent) {
+ /* Depth increases, make a new root */
+ n3 = __btree_root_alloc(as, trans, b->c.level + 1);
+
+ bch2_btree_update_add_new_node(as, n3);
+ six_unlock_write(&n3->c.lock);
+
+ trans->paths[path2].locks_want++;
+ BUG_ON(btree_node_locked(trans->paths + path2, n3->c.level));
+ six_lock_increment(&n3->c.lock, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, trans->paths + path2, n3->c.level, BTREE_NODE_INTENT_LOCKED);
+ bch2_btree_path_level_init(trans, trans->paths + path2, n3);
+
+ n3->sib_u64s[0] = U16_MAX;
+ n3->sib_u64s[1] = U16_MAX;
+
+ btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+ }
+ } else {
+ trace_and_count(c, btree_node_compact, trans, b);
+
+ n1 = bch2_btree_node_alloc_replacement(as, trans, b);
+
+ if (keys) {
+ btree_split_insert_keys(as, trans, path, n1, keys);
+ BUG_ON(!bch2_keylist_empty(keys));
+ }
+
+ bch2_btree_build_aux_trees(n1);
+ bch2_btree_update_add_new_node(as, n1);
+ six_unlock_write(&n1->c.lock);
+
+ path1 = get_unlocked_mut_path(trans, as->btree_id, n1->c.level, n1->key.k.p);
+ six_lock_increment(&n1->c.lock, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, trans->paths + path1, n1->c.level, BTREE_NODE_INTENT_LOCKED);
+ bch2_btree_path_level_init(trans, trans->paths + path1, n1);
+
+ if (parent)
+ bch2_keylist_add(&as->parent_keys, &n1->key);
+ }
+
+ /* New nodes all written, now make them visible: */
+
+ if (parent) {
+ /* Split a non root node */
+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+ if (ret)
+ goto err;
+ } else if (n3) {
+ bch2_btree_set_root(as, trans, trans->paths + path, n3);
+ } else {
+ /* Root filled up but didn't need to be split */
+ bch2_btree_set_root(as, trans, trans->paths + path, n1);
+ }
+
+ if (n3) {
+ bch2_btree_update_get_open_buckets(as, n3);
+ bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+ }
+ if (n2) {
+ bch2_btree_update_get_open_buckets(as, n2);
+ bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+ }
+ bch2_btree_update_get_open_buckets(as, n1);
+ bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
+
+ /*
+ * The old node must be freed (in memory) _before_ unlocking the new
+ * nodes - else another thread could re-acquire a read lock on the old
+ * node after another thread has locked and updated the new node, thus
+ * seeing stale data:
+ */
+ bch2_btree_node_free_inmem(trans, trans->paths + path, b);
+
+ if (n3)
+ bch2_trans_node_add(trans, trans->paths + path, n3);
+ if (n2)
+ bch2_trans_node_add(trans, trans->paths + path2, n2);
+ bch2_trans_node_add(trans, trans->paths + path1, n1);
+
+ if (n3)
+ six_unlock_intent(&n3->c.lock);
+ if (n2)
+ six_unlock_intent(&n2->c.lock);
+ six_unlock_intent(&n1->c.lock);
+out:
+ if (path2) {
+ __bch2_btree_path_unlock(trans, trans->paths + path2);
+ bch2_path_put(trans, path2, true);
+ }
+ if (path1) {
+ __bch2_btree_path_unlock(trans, trans->paths + path1);
+ bch2_path_put(trans, path1, true);
+ }
+
+ bch2_trans_verify_locks(trans);
+
+ bch2_time_stats_update(&c->times[n2
+ ? BCH_TIME_btree_node_split
+ : BCH_TIME_btree_node_compact],
+ start_time);
+ return ret;
+err:
+ if (n3)
+ bch2_btree_node_free_never_used(as, trans, n3);
+ if (n2)
+ bch2_btree_node_free_never_used(as, trans, n2);
+ bch2_btree_node_free_never_used(as, trans, n1);
+ goto out;
+}
+
+static void
+bch2_btree_insert_keys_interior(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct keylist *keys)
+{
+ struct btree_path *linked;
+ unsigned i;
+
+ __bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
+
+ btree_update_updated_node(as, b);
+
+ trans_for_each_path_with_node(trans, b, linked, i)
+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
+
+ bch2_trans_verify_paths(trans);
+}
+
+/**
+ * bch2_btree_insert_node - insert bkeys into a given btree node
+ *
+ * @as: btree_update object
+ * @trans: btree_trans object
+ * @path_idx: path that points to current node
+ * @b: node to insert keys into
+ * @keys: list of keys to insert
+ * @flags: transaction commit flags
+ *
+ * Returns: 0 on success, typically transaction restart error on failure
+ *
+ * Inserts as many keys as it can into a given btree node, splitting it if full.
+ * If a split occurred, this function will return early. This can only happen
+ * for leaf nodes -- inserts into interior nodes have to be atomic.
+ */
+static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
+ btree_path_idx_t path_idx, struct btree *b,
+ struct keylist *keys, unsigned flags)
+{
+ struct bch_fs *c = as->c;
+ struct btree_path *path = trans->paths + path_idx;
+ int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
+ int old_live_u64s = b->nr.live_u64s;
+ int live_u64s_added, u64s_added;
+ int ret;
+
+ lockdep_assert_held(&c->gc_lock);
+ BUG_ON(!btree_node_intent_locked(path, b->c.level));
+ BUG_ON(!b->c.level);
+ BUG_ON(!as || as->b);
+ bch2_verify_keylist_sorted(keys);
+
+ ret = bch2_btree_node_lock_write(trans, path, &b->c);
+ if (ret)
+ return ret;
+
+ bch2_btree_node_prep_for_write(trans, path, b);
+
+ if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
+ bch2_btree_node_unlock_write(trans, path, b);
+ goto split;
+ }
+
+ btree_node_interior_verify(c, b);
+
+ bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+
+ live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
+ u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
+
+ if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
+ b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
+ if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
+ b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
+
+ if (u64s_added > live_u64s_added &&
+ bch2_maybe_compact_whiteouts(c, b))
+ bch2_trans_node_reinit_iter(trans, b);
+
+ bch2_btree_node_unlock_write(trans, path, b);
+
+ btree_node_interior_verify(c, b);
+ return 0;
+split:
+ /*
+ * We could attempt to avoid the transaction restart, by calling
+ * bch2_btree_path_upgrade() and allocating more nodes:
+ */
+ if (b->c.level >= as->update_level) {
+ trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
+ }
+
+ return btree_split(as, trans, path_idx, b, keys, flags);
+}
+
+int bch2_btree_split_leaf(struct btree_trans *trans,
+ btree_path_idx_t path,
+ unsigned flags)
+{
+ /* btree_split & merge may both cause paths array to be reallocated */
+
+ struct btree *b = path_l(trans->paths + path)->b;
+ struct btree_update *as;
+ unsigned l;
+ int ret = 0;
+
+ as = bch2_btree_update_start(trans, trans->paths + path,
+ trans->paths[path].level,
+ true, flags);
+ if (IS_ERR(as))
+ return PTR_ERR(as);
+
+ ret = btree_split(as, trans, path, b, NULL, flags);
+ if (ret) {
+ bch2_btree_update_free(as, trans);
+ return ret;
+ }
+
+ bch2_btree_update_done(as, trans);
+
+ for (l = trans->paths[path].level + 1;
+ btree_node_intent_locked(&trans->paths[path], l) && !ret;
+ l++)
+ ret = bch2_foreground_maybe_merge(trans, path, l, flags);
+
+ return ret;
+}
+
+int __bch2_foreground_maybe_merge(struct btree_trans *trans,
+ btree_path_idx_t path,
+ unsigned level,
+ unsigned flags,
+ enum btree_node_sibling sib)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_update *as;
+ struct bkey_format_state new_s;
+ struct bkey_format new_f;
+ struct bkey_i delete;
+ struct btree *b, *m, *n, *prev, *next, *parent;
+ struct bpos sib_pos;
+ size_t sib_u64s;
+ enum btree_id btree = trans->paths[path].btree_id;
+ btree_path_idx_t sib_path = 0, new_path = 0;
+ u64 start_time = local_clock();
+ int ret = 0;
+
+ BUG_ON(!trans->paths[path].should_be_locked);
+ BUG_ON(!btree_node_locked(&trans->paths[path], level));
+
+ b = trans->paths[path].l[level].b;
+
+ if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
+ (sib == btree_next_sib && bpos_eq(b->data->max_key, SPOS_MAX))) {
+ b->sib_u64s[sib] = U16_MAX;
+ return 0;
+ }
+
+ sib_pos = sib == btree_prev_sib
+ ? bpos_predecessor(b->data->min_key)
+ : bpos_successor(b->data->max_key);
+
+ sib_path = bch2_path_get(trans, btree, sib_pos,
+ U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_);
+ ret = bch2_btree_path_traverse(trans, sib_path, false);
+ if (ret)
+ goto err;
+
+ btree_path_set_should_be_locked(trans->paths + sib_path);
+
+ m = trans->paths[sib_path].l[level].b;
+
+ if (btree_node_parent(trans->paths + path, b) !=
+ btree_node_parent(trans->paths + sib_path, m)) {
+ b->sib_u64s[sib] = U16_MAX;
+ goto out;
+ }
+
+ if (sib == btree_prev_sib) {
+ prev = m;
+ next = b;
+ } else {
+ prev = b;
+ next = m;
+ }
+
+ if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+
+ bch2_bpos_to_text(&buf1, prev->data->max_key);
+ bch2_bpos_to_text(&buf2, next->data->min_key);
+ bch_err(c,
+ "%s(): btree topology error:\n"
+ " prev ends at %s\n"
+ " next starts at %s",
+ __func__, buf1.buf, buf2.buf);
+ printbuf_exit(&buf1);
+ printbuf_exit(&buf2);
+ bch2_topology_error(c);
+ ret = -EIO;
+ goto err;
+ }
+
+ bch2_bkey_format_init(&new_s);
+ bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
+ __bch2_btree_calc_format(&new_s, prev);
+ __bch2_btree_calc_format(&new_s, next);
+ bch2_bkey_format_add_pos(&new_s, next->data->max_key);
+ new_f = bch2_bkey_format_done(&new_s);
+
+ sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) +
+ btree_node_u64s_with_format(m->nr, &m->format, &new_f);
+
+ if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
+ sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
+ sib_u64s /= 2;
+ sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
+ }
+
+ sib_u64s = min(sib_u64s, btree_max_u64s(c));
+ sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
+ b->sib_u64s[sib] = sib_u64s;
+
+ if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
+ goto out;
+
+ parent = btree_node_parent(trans->paths + path, b);
+ as = bch2_btree_update_start(trans, trans->paths + path, level, false,
+ BCH_TRANS_COMMIT_no_enospc|flags);
+ ret = PTR_ERR_OR_ZERO(as);
+ if (ret)
+ goto err;
+
+ trace_and_count(c, btree_node_merge, trans, b);
+
+ bch2_btree_interior_update_will_free_node(as, b);
+ bch2_btree_interior_update_will_free_node(as, m);
+
+ n = bch2_btree_node_alloc(as, trans, b->c.level);
+
+ SET_BTREE_NODE_SEQ(n->data,
+ max(BTREE_NODE_SEQ(b->data),
+ BTREE_NODE_SEQ(m->data)) + 1);
+
+ btree_set_min(n, prev->data->min_key);
+ btree_set_max(n, next->data->max_key);
+
+ n->data->format = new_f;
+ btree_node_set_format(n, new_f);
+
+ bch2_btree_sort_into(c, n, prev);
+ bch2_btree_sort_into(c, n, next);
+
+ bch2_btree_build_aux_trees(n);
+ bch2_btree_update_add_new_node(as, n);
+ six_unlock_write(&n->c.lock);
+
+ new_path = get_unlocked_mut_path(trans, btree, n->c.level, n->key.k.p);
+ six_lock_increment(&n->c.lock, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
+ bch2_btree_path_level_init(trans, trans->paths + new_path, n);
+
+ bkey_init(&delete.k);
+ delete.k.p = prev->key.k.p;
+ bch2_keylist_add(&as->parent_keys, &delete);
+ bch2_keylist_add(&as->parent_keys, &n->key);
+
+ bch2_trans_verify_paths(trans);
+
+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+ if (ret)
+ goto err_free_update;
+
+ bch2_trans_verify_paths(trans);
+
+ bch2_btree_update_get_open_buckets(as, n);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+
+ bch2_btree_node_free_inmem(trans, trans->paths + path, b);
+ bch2_btree_node_free_inmem(trans, trans->paths + sib_path, m);
+
+ bch2_trans_node_add(trans, trans->paths + path, n);
+
+ bch2_trans_verify_paths(trans);
+
+ six_unlock_intent(&n->c.lock);
+
+ bch2_btree_update_done(as, trans);
+
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
+out:
+err:
+ if (new_path)
+ bch2_path_put(trans, new_path, true);
+ bch2_path_put(trans, sib_path, true);
+ bch2_trans_verify_locks(trans);
+ return ret;
+err_free_update:
+ bch2_btree_node_free_never_used(as, trans, n);
+ bch2_btree_update_free(as, trans);
+ goto out;
+}
+
+int bch2_btree_node_rewrite(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree *b,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *n, *parent;
+ struct btree_update *as;
+ btree_path_idx_t new_path = 0;
+ int ret;
+
+ flags |= BCH_TRANS_COMMIT_no_enospc;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ parent = btree_node_parent(path, b);
+ as = bch2_btree_update_start(trans, path, b->c.level, false, flags);
+ ret = PTR_ERR_OR_ZERO(as);
+ if (ret)
+ goto out;
+
+ bch2_btree_interior_update_will_free_node(as, b);
+
+ n = bch2_btree_node_alloc_replacement(as, trans, b);
+
+ bch2_btree_build_aux_trees(n);
+ bch2_btree_update_add_new_node(as, n);
+ six_unlock_write(&n->c.lock);
+
+ new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p);
+ six_lock_increment(&n->c.lock, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, trans->paths + new_path, n->c.level, BTREE_NODE_INTENT_LOCKED);
+ bch2_btree_path_level_init(trans, trans->paths + new_path, n);
+
+ trace_and_count(c, btree_node_rewrite, trans, b);
+
+ if (parent) {
+ bch2_keylist_add(&as->parent_keys, &n->key);
+ ret = bch2_btree_insert_node(as, trans, iter->path,
+ parent, &as->parent_keys, flags);
+ if (ret)
+ goto err;
+ } else {
+ bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+ }
+
+ bch2_btree_update_get_open_buckets(as, n);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+
+ bch2_btree_node_free_inmem(trans, btree_iter_path(trans, iter), b);
+
+ bch2_trans_node_add(trans, trans->paths + iter->path, n);
+ six_unlock_intent(&n->c.lock);
+
+ bch2_btree_update_done(as, trans);
+out:
+ if (new_path)
+ bch2_path_put(trans, new_path, true);
+ bch2_trans_downgrade(trans);
+ return ret;
+err:
+ bch2_btree_node_free_never_used(as, trans, n);
+ bch2_btree_update_free(as, trans);
+ goto out;
+}
+
+struct async_btree_rewrite {
+ struct bch_fs *c;
+ struct work_struct work;
+ struct list_head list;
+ enum btree_id btree_id;
+ unsigned level;
+ struct bpos pos;
+ __le64 seq;
+};
+
+static int async_btree_node_rewrite_trans(struct btree_trans *trans,
+ struct async_btree_rewrite *a)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct btree *b;
+ int ret;
+
+ bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos,
+ BTREE_MAX_DEPTH, a->level, 0);
+ b = bch2_btree_iter_peek_node(&iter);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ goto out;
+
+ if (!b || b->data->keys.seq != a->seq) {
+ struct printbuf buf = PRINTBUF;
+
+ if (b)
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ else
+ prt_str(&buf, "(null");
+ bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s",
+ __func__, a->seq, buf.buf);
+ printbuf_exit(&buf);
+ goto out;
+ }
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+static void async_btree_node_rewrite_work(struct work_struct *work)
+{
+ struct async_btree_rewrite *a =
+ container_of(work, struct async_btree_rewrite, work);
+ struct bch_fs *c = a->c;
+ int ret;
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ async_btree_node_rewrite_trans(trans, a));
+ bch_err_fn(c, ret);
+ bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
+ kfree(a);
+}
+
+void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
+{
+ struct async_btree_rewrite *a;
+ int ret;
+
+ a = kmalloc(sizeof(*a), GFP_NOFS);
+ if (!a) {
+ bch_err(c, "%s: error allocating memory", __func__);
+ return;
+ }
+
+ a->c = c;
+ a->btree_id = b->c.btree_id;
+ a->level = b->c.level;
+ a->pos = b->key.k.p;
+ a->seq = b->data->keys.seq;
+ INIT_WORK(&a->work, async_btree_node_rewrite_work);
+
+ if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
+ mutex_lock(&c->pending_node_rewrites_lock);
+ list_add(&a->list, &c->pending_node_rewrites);
+ mutex_unlock(&c->pending_node_rewrites_lock);
+ return;
+ }
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
+ if (test_bit(BCH_FS_started, &c->flags)) {
+ bch_err(c, "%s: error getting c->writes ref", __func__);
+ kfree(a);
+ return;
+ }
+
+ ret = bch2_fs_read_write_early(c);
+ bch_err_msg(c, ret, "going read-write");
+ if (ret) {
+ kfree(a);
+ return;
+ }
+
+ bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
+ }
+
+ queue_work(c->btree_interior_update_worker, &a->work);
+}
+
+void bch2_do_pending_node_rewrites(struct bch_fs *c)
+{
+ struct async_btree_rewrite *a, *n;
+
+ mutex_lock(&c->pending_node_rewrites_lock);
+ list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
+ list_del(&a->list);
+
+ bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
+ queue_work(c->btree_interior_update_worker, &a->work);
+ }
+ mutex_unlock(&c->pending_node_rewrites_lock);
+}
+
+void bch2_free_pending_node_rewrites(struct bch_fs *c)
+{
+ struct async_btree_rewrite *a, *n;
+
+ mutex_lock(&c->pending_node_rewrites_lock);
+ list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) {
+ list_del(&a->list);
+
+ kfree(a);
+ }
+ mutex_unlock(&c->pending_node_rewrites_lock);
+}
+
+static int __bch2_btree_node_update_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree *b, struct btree *new_hash,
+ struct bkey_i *new_key,
+ unsigned commit_flags,
+ bool skip_triggers)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter2 = { NULL };
+ struct btree *parent;
+ int ret;
+
+ if (!skip_triggers) {
+ ret = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
+ bkey_i_to_s_c(&b->key),
+ BTREE_TRIGGER_TRANSACTIONAL) ?:
+ bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
+ bkey_i_to_s(new_key),
+ BTREE_TRIGGER_TRANSACTIONAL);
+ if (ret)
+ return ret;
+ }
+
+ if (new_hash) {
+ bkey_copy(&new_hash->key, new_key);
+ ret = bch2_btree_node_hash_insert(&c->btree_cache,
+ new_hash, b->c.level, b->c.btree_id);
+ BUG_ON(ret);
+ }
+
+ parent = btree_node_parent(btree_iter_path(trans, iter), b);
+ if (parent) {
+ bch2_trans_copy_iter(&iter2, iter);
+
+ iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
+ iter2.flags & BTREE_ITER_INTENT,
+ _THIS_IP_);
+
+ struct btree_path *path2 = btree_iter_path(trans, &iter2);
+ BUG_ON(path2->level != b->c.level);
+ BUG_ON(!bpos_eq(path2->pos, new_key->k.p));
+
+ btree_path_set_level_up(trans, path2);
+
+ trans->paths_sorted = false;
+
+ ret = bch2_btree_iter_traverse(&iter2) ?:
+ bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN);
+ if (ret)
+ goto err;
+ } else {
+ BUG_ON(btree_node_root(c, b) != b);
+
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans,
+ jset_u64s(new_key->k.u64s));
+ ret = PTR_ERR_OR_ZERO(e);
+ if (ret)
+ return ret;
+
+ journal_entry_set(e,
+ BCH_JSET_ENTRY_btree_root,
+ b->c.btree_id, b->c.level,
+ new_key, new_key->k.u64s);
+ }
+
+ ret = bch2_trans_commit(trans, NULL, NULL, commit_flags);
+ if (ret)
+ goto err;
+
+ bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c);
+
+ if (new_hash) {
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ bkey_copy(&b->key, new_key);
+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
+ BUG_ON(ret);
+ mutex_unlock(&c->btree_cache.lock);
+ } else {
+ bkey_copy(&b->key, new_key);
+ }
+
+ bch2_btree_node_unlock_write(trans, btree_iter_path(trans, iter), b);
+out:
+ bch2_trans_iter_exit(trans, &iter2);
+ return ret;
+err:
+ if (new_hash) {
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_unlock(&c->btree_cache.lock);
+ }
+ goto out;
+}
+
+int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree *b, struct bkey_i *new_key,
+ unsigned commit_flags, bool skip_triggers)
+{
+ struct bch_fs *c = trans->c;
+ struct btree *new_hash = NULL;
+ struct btree_path *path = btree_iter_path(trans, iter);
+ struct closure cl;
+ int ret = 0;
+
+ ret = bch2_btree_path_upgrade(trans, path, b->c.level + 1);
+ if (ret)
+ return ret;
+
+ closure_init_stack(&cl);
+
+ /*
+ * check btree_ptr_hash_val() after @b is locked by
+ * btree_iter_traverse():
+ */
+ if (btree_ptr_hash_val(new_key) != b->hash_val) {
+ ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
+ if (ret) {
+ ret = drop_locks_do(trans, (closure_sync(&cl), 0));
+ if (ret)
+ return ret;
+ }
+
+ new_hash = bch2_btree_node_mem_alloc(trans, false);
+ }
+
+ path->intent_ref++;
+ ret = __bch2_btree_node_update_key(trans, iter, b, new_hash, new_key,
+ commit_flags, skip_triggers);
+ --path->intent_ref;
+
+ if (new_hash) {
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&new_hash->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+
+ six_unlock_write(&new_hash->c.lock);
+ six_unlock_intent(&new_hash->c.lock);
+ }
+ closure_sync(&cl);
+ bch2_btree_cache_cannibalize_unlock(trans);
+ return ret;
+}
+
+int bch2_btree_node_update_key_get_iter(struct btree_trans *trans,
+ struct btree *b, struct bkey_i *new_key,
+ unsigned commit_flags, bool skip_triggers)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p,
+ BTREE_MAX_DEPTH, b->c.level,
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(&iter);
+ if (ret)
+ goto out;
+
+ /* has node been freed? */
+ if (btree_iter_path(trans, &iter)->l[b->c.level].b != b) {
+ /* node has been freed: */
+ BUG_ON(!btree_node_dying(b));
+ goto out;
+ }
+
+ BUG_ON(!btree_node_hashed(b));
+
+ struct bch_extent_ptr *ptr;
+ bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr,
+ !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev));
+
+ ret = bch2_btree_node_update_key(trans, &iter, b, new_key,
+ commit_flags, skip_triggers);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* Init code: */
+
+/*
+ * Only for filesystem bringup, when first reading the btree roots or allocating
+ * btree roots when initializing a new filesystem:
+ */
+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
+{
+ BUG_ON(btree_node_root(c, b));
+
+ bch2_btree_set_root_inmem(c, b);
+}
+
+static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
+{
+ struct bch_fs *c = trans->c;
+ struct closure cl;
+ struct btree *b;
+ int ret;
+
+ closure_init_stack(&cl);
+
+ do {
+ ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
+ closure_sync(&cl);
+ } while (ret);
+
+ b = bch2_btree_node_mem_alloc(trans, false);
+ bch2_btree_cache_cannibalize_unlock(trans);
+
+ set_btree_node_fake(b);
+ set_btree_node_need_rewrite(b);
+ b->c.level = 0;
+ b->c.btree_id = id;
+
+ bkey_btree_ptr_init(&b->key);
+ b->key.k.p = SPOS_MAX;
+ *((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id;
+
+ bch2_bset_init_first(b, &b->data->keys);
+ bch2_btree_build_aux_trees(b);
+
+ b->data->flags = 0;
+ btree_set_min(b, POS_MIN);
+ btree_set_max(b, SPOS_MAX);
+ b->data->format = bch2_btree_calc_format(b);
+ btree_node_set_format(b, b->data->format);
+
+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b,
+ b->c.level, b->c.btree_id);
+ BUG_ON(ret);
+
+ bch2_btree_set_root_inmem(c, b);
+
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+ return 0;
+}
+
+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+{
+ bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
+}
+
+void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct btree_update *as;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ list_for_each_entry(as, &c->btree_interior_update_list, list)
+ prt_printf(out, "%p m %u w %u r %u j %llu\n",
+ as,
+ as->mode,
+ as->nodes_written,
+ closure_nr_remaining(&as->cl),
+ as->journal.seq);
+ mutex_unlock(&c->btree_interior_update_lock);
+}
+
+static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
+{
+ bool ret;
+
+ mutex_lock(&c->btree_interior_update_lock);
+ ret = !list_empty(&c->btree_interior_update_list);
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ return ret;
+}
+
+bool bch2_btree_interior_updates_flush(struct bch_fs *c)
+{
+ bool ret = bch2_btree_interior_updates_pending(c);
+
+ if (ret)
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_pending(c));
+ return ret;
+}
+
+void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry)
+{
+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
+
+ mutex_lock(&c->btree_root_lock);
+
+ r->level = entry->level;
+ r->alive = true;
+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
+
+ mutex_unlock(&c->btree_root_lock);
+}
+
+struct jset_entry *
+bch2_btree_roots_to_journal_entries(struct bch_fs *c,
+ struct jset_entry *end,
+ unsigned long skip)
+{
+ unsigned i;
+
+ mutex_lock(&c->btree_root_lock);
+
+ for (i = 0; i < btree_id_nr_alive(c); i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (r->alive && !test_bit(i, &skip)) {
+ journal_entry_set(end, BCH_JSET_ENTRY_btree_root,
+ i, r->level, &r->key, r->key.k.u64s);
+ end = vstruct_next(end);
+ }
+ }
+
+ mutex_unlock(&c->btree_root_lock);
+
+ return end;
+}
+
+void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
+{
+ if (c->btree_interior_update_worker)
+ destroy_workqueue(c->btree_interior_update_worker);
+ mempool_exit(&c->btree_interior_update_pool);
+}
+
+void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
+{
+ mutex_init(&c->btree_reserve_cache_lock);
+ INIT_LIST_HEAD(&c->btree_interior_update_list);
+ INIT_LIST_HEAD(&c->btree_interior_updates_unwritten);
+ mutex_init(&c->btree_interior_update_lock);
+ INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work);
+
+ INIT_LIST_HEAD(&c->pending_node_rewrites);
+ mutex_init(&c->pending_node_rewrites_lock);
+}
+
+int bch2_fs_btree_interior_update_init(struct bch_fs *c)
+{
+ c->btree_interior_update_worker =
+ alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
+ if (!c->btree_interior_update_worker)
+ return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
+
+ if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
+ sizeof(struct btree_update)))
+ return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/btree_update_interior.h b/c_src/libbcachefs/btree_update_interior.h
new file mode 100644
index 00000000..adfc6208
--- /dev/null
+++ b/c_src/libbcachefs/btree_update_interior.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H
+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H
+
+#include "btree_cache.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+
+#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES)
+
+#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
+
+/*
+ * Tracks an in progress split/rewrite of a btree node and the update to the
+ * parent node:
+ *
+ * When we split/rewrite a node, we do all the updates in memory without
+ * waiting for any writes to complete - we allocate the new node(s) and update
+ * the parent node, possibly recursively up to the root.
+ *
+ * The end result is that we have one or more new nodes being written -
+ * possibly several, if there were multiple splits - and then a write (updating
+ * an interior node) which will make all these new nodes visible.
+ *
+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old
+ * nodes can't be freed (their space on disk can't be reclaimed) until the
+ * update to the interior node that makes the new node visible completes -
+ * until then, the old nodes are still reachable on disk.
+ *
+ */
+struct btree_update {
+ struct closure cl;
+ struct bch_fs *c;
+ u64 start_time;
+
+ struct list_head list;
+ struct list_head unwritten_list;
+
+ /* What kind of update are we doing? */
+ enum {
+ BTREE_INTERIOR_NO_UPDATE,
+ BTREE_INTERIOR_UPDATING_NODE,
+ BTREE_INTERIOR_UPDATING_ROOT,
+ BTREE_INTERIOR_UPDATING_AS,
+ } mode;
+
+ unsigned nodes_written:1;
+ unsigned took_gc_lock:1;
+
+ enum btree_id btree_id;
+ unsigned update_level;
+
+ struct disk_reservation disk_res;
+
+ /*
+ * BTREE_INTERIOR_UPDATING_NODE:
+ * The update that made the new nodes visible was a regular update to an
+ * existing interior node - @b. We can't write out the update to @b
+ * until the new nodes we created are finished writing, so we block @b
+ * from writing by putting this btree_interior update on the
+ * @b->write_blocked list with @write_blocked_list:
+ */
+ struct btree *b;
+ struct list_head write_blocked_list;
+
+ /*
+ * We may be freeing nodes that were dirty, and thus had journal entries
+ * pinned: we need to transfer the oldest of those pins to the
+ * btree_update operation, and release it when the new node(s)
+ * are all persistent and reachable:
+ */
+ struct journal_entry_pin journal;
+
+ /* Preallocated nodes we reserve when we start the update: */
+ struct prealloc_nodes {
+ struct btree *b[BTREE_UPDATE_NODES_MAX];
+ unsigned nr;
+ } prealloc_nodes[2];
+
+ /* Nodes being freed: */
+ struct keylist old_keys;
+ u64 _old_keys[BTREE_UPDATE_NODES_MAX *
+ BKEY_BTREE_PTR_U64s_MAX];
+
+ /* Nodes being added: */
+ struct keylist new_keys;
+ u64 _new_keys[BTREE_UPDATE_NODES_MAX *
+ BKEY_BTREE_PTR_U64s_MAX];
+
+ /* New nodes, that will be made reachable by this update: */
+ struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
+ unsigned nr_new_nodes;
+
+ struct btree *old_nodes[BTREE_UPDATE_NODES_MAX];
+ __le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX];
+ unsigned nr_old_nodes;
+
+ open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX *
+ BCH_REPLICAS_MAX];
+ open_bucket_idx_t nr_open_buckets;
+
+ unsigned journal_u64s;
+ u64 journal_entries[BTREE_UPDATE_JOURNAL_RES];
+
+ /* Only here to reduce stack usage on recursive splits: */
+ struct keylist parent_keys;
+ /*
+ * Enough room for btree_split's keys without realloc - btree node
+ * pointers never have crc/compression info, so we only need to acount
+ * for the pointers for three keys
+ */
+ u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
+};
+
+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
+ struct btree_trans *,
+ struct btree *,
+ struct bkey_format);
+
+int bch2_btree_split_leaf(struct btree_trans *, btree_path_idx_t, unsigned);
+
+int __bch2_foreground_maybe_merge(struct btree_trans *, btree_path_idx_t,
+ unsigned, unsigned, enum btree_node_sibling);
+
+static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans,
+ btree_path_idx_t path_idx,
+ unsigned level, unsigned flags,
+ enum btree_node_sibling sib)
+{
+ struct btree_path *path = trans->paths + path_idx;
+ struct btree *b;
+
+ EBUG_ON(!btree_node_locked(path, level));
+
+ b = path->l[level].b;
+ if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold)
+ return 0;
+
+ return __bch2_foreground_maybe_merge(trans, path_idx, level, flags, sib);
+}
+
+static inline int bch2_foreground_maybe_merge(struct btree_trans *trans,
+ btree_path_idx_t path,
+ unsigned level,
+ unsigned flags)
+{
+ return bch2_foreground_maybe_merge_sibling(trans, path, level, flags,
+ btree_prev_sib) ?:
+ bch2_foreground_maybe_merge_sibling(trans, path, level, flags,
+ btree_next_sib);
+}
+
+int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
+ struct btree *, unsigned);
+void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
+int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
+ struct btree *, struct bkey_i *,
+ unsigned, bool);
+int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
+ struct bkey_i *, unsigned, bool);
+
+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
+
+static inline unsigned btree_update_reserve_required(struct bch_fs *c,
+ struct btree *b)
+{
+ unsigned depth = btree_node_root(c, b)->c.level + 1;
+
+ /*
+ * Number of nodes we might have to allocate in a worst case btree
+ * split operation - we split all the way up to the root, then allocate
+ * a new root, unless we're already at max depth:
+ */
+ if (depth < BTREE_MAX_DEPTH)
+ return (depth - b->c.level) * 2 + 1;
+ else
+ return (depth - b->c.level) * 2 - 1;
+}
+
+static inline void btree_node_reset_sib_u64s(struct btree *b)
+{
+ b->sib_u64s[0] = b->nr.live_u64s;
+ b->sib_u64s[1] = b->nr.live_u64s;
+}
+
+static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
+{
+ return (void *) b->data + btree_bytes(c);
+}
+
+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
+ struct btree *b)
+{
+ return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
+}
+
+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
+ struct btree *b)
+{
+ return btree_data_end(c, b);
+}
+
+static inline void *write_block(struct btree *b)
+{
+ return (void *) b->data + (b->written << 9);
+}
+
+static inline bool __btree_addr_written(struct btree *b, void *p)
+{
+ return p < write_block(b);
+}
+
+static inline bool bset_written(struct btree *b, struct bset *i)
+{
+ return __btree_addr_written(b, i);
+}
+
+static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
+{
+ return __btree_addr_written(b, k);
+}
+
+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
+ struct btree *b,
+ void *end)
+{
+ ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
+ b->whiteout_u64s;
+ ssize_t total = c->opts.btree_node_size >> 3;
+
+ /* Always leave one extra u64 for bch2_varint_decode: */
+ used++;
+
+ return total - used;
+}
+
+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
+ struct btree *b)
+{
+ ssize_t remaining = __bch_btree_u64s_remaining(c, b,
+ btree_bkey_last(b, bset_tree_last(b)));
+
+ BUG_ON(remaining < 0);
+
+ if (bset_written(b, btree_bset_last(b)))
+ return 0;
+
+ return remaining;
+}
+
+#define BTREE_WRITE_SET_U64s_BITS 9
+
+static inline unsigned btree_write_set_buffer(struct btree *b)
+{
+ /*
+ * Could buffer up larger amounts of keys for btrees with larger keys,
+ * pending benchmarking:
+ */
+ return 8 << BTREE_WRITE_SET_U64s_BITS;
+}
+
+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
+ struct btree *b)
+{
+ struct bset_tree *t = bset_tree_last(b);
+ struct btree_node_entry *bne = max(write_block(b),
+ (void *) btree_bkey_last(b, bset_tree_last(b)));
+ ssize_t remaining_space =
+ __bch_btree_u64s_remaining(c, b, bne->keys.start);
+
+ if (unlikely(bset_written(b, bset(b, t)))) {
+ if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
+ return bne;
+ } else {
+ if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
+ remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3))
+ return bne;
+ }
+
+ return NULL;
+}
+
+static inline void push_whiteout(struct bch_fs *c, struct btree *b,
+ struct bpos pos)
+{
+ struct bkey_packed k;
+
+ BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
+ EBUG_ON(btree_node_just_written(b));
+
+ if (!bkey_pack_pos(&k, pos, b)) {
+ struct bkey *u = (void *) &k;
+
+ bkey_init(u);
+ u->p = pos;
+ }
+
+ k.needs_whiteout = true;
+
+ b->whiteout_u64s += k.u64s;
+ bkey_p_copy(unwritten_whiteouts_start(c, b), &k);
+}
+
+/*
+ * write lock must be held on @b (else the dirty bset that we were going to
+ * insert into could be written out from under us)
+ */
+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
+ struct btree *b, unsigned u64s)
+{
+ if (unlikely(btree_node_need_rewrite(b)))
+ return false;
+
+ return u64s <= bch_btree_keys_u64s_remaining(c, b);
+}
+
+void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
+
+bool bch2_btree_interior_updates_flush(struct bch_fs *);
+
+void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *);
+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
+ struct jset_entry *, unsigned long);
+
+void bch2_do_pending_node_rewrites(struct bch_fs *);
+void bch2_free_pending_node_rewrites(struct bch_fs *);
+
+void bch2_fs_btree_interior_update_exit(struct bch_fs *);
+void bch2_fs_btree_interior_update_init_early(struct bch_fs *);
+int bch2_fs_btree_interior_update_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */
diff --git a/c_src/libbcachefs/btree_write_buffer.c b/c_src/libbcachefs/btree_write_buffer.c
new file mode 100644
index 00000000..5c1169c7
--- /dev/null
+++ b/c_src/libbcachefs/btree_write_buffer.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+
+#include <linux/prefetch.h>
+
+static int bch2_btree_write_buffer_journal_flush(struct journal *,
+ struct journal_entry_pin *, u64);
+
+static int bch2_journal_keys_to_write_buffer(struct bch_fs *, struct journal_buf *);
+
+static inline bool __wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
+{
+ return (cmp_int(l->hi, r->hi) ?:
+ cmp_int(l->mi, r->mi) ?:
+ cmp_int(l->lo, r->lo)) >= 0;
+}
+
+static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_key_ref *r)
+{
+#ifdef CONFIG_X86_64
+ int cmp;
+
+ asm("mov (%[l]), %%rax;"
+ "sub (%[r]), %%rax;"
+ "mov 8(%[l]), %%rax;"
+ "sbb 8(%[r]), %%rax;"
+ "mov 16(%[l]), %%rax;"
+ "sbb 16(%[r]), %%rax;"
+ : "=@ccae" (cmp)
+ : [l] "r" (l), [r] "r" (r)
+ : "rax", "cc");
+
+ EBUG_ON(cmp != __wb_key_ref_cmp(l, r));
+ return cmp;
+#else
+ return __wb_key_ref_cmp(l, r);
+#endif
+}
+
+/* Compare excluding idx, the low 24 bits: */
+static inline bool wb_key_eq(const void *_l, const void *_r)
+{
+ const struct wb_key_ref *l = _l;
+ const struct wb_key_ref *r = _r;
+
+ return !((l->hi ^ r->hi)|
+ (l->mi ^ r->mi)|
+ ((l->lo >> 24) ^ (r->lo >> 24)));
+}
+
+static noinline void wb_sort(struct wb_key_ref *base, size_t num)
+{
+ size_t n = num, a = num / 2;
+
+ if (!a) /* num < 2 || size == 0 */
+ return;
+
+ for (;;) {
+ size_t b, c, d;
+
+ if (a) /* Building heap: sift down --a */
+ --a;
+ else if (--n) /* Sorting: Extract root to --n */
+ swap(base[0], base[n]);
+ else /* Sort complete */
+ break;
+
+ /*
+ * Sift element at "a" down into heap. This is the
+ * "bottom-up" variant, which significantly reduces
+ * calls to cmp_func(): we find the sift-down path all
+ * the way to the leaves (one compare per level), then
+ * backtrack to find where to insert the target element.
+ *
+ * Because elements tend to sift down close to the leaves,
+ * this uses fewer compares than doing two per level
+ * on the way down. (A bit more than half as many on
+ * average, 3/4 worst-case.)
+ */
+ for (b = a; c = 2*b + 1, (d = c + 1) < n;)
+ b = wb_key_ref_cmp(base + c, base + d) ? c : d;
+ if (d == n) /* Special case last leaf with no sibling */
+ b = c;
+
+ /* Now backtrack from "b" to the correct location for "a" */
+ while (b != a && wb_key_ref_cmp(base + a, base + b))
+ b = (b - 1) / 2;
+ c = b; /* Where "a" belongs */
+ while (b != a) { /* Shift it into place */
+ b = (b - 1) / 2;
+ swap(base[b], base[c]);
+ }
+ }
+}
+
+static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct btree_write_buffered_key *wb)
+{
+ struct btree_path *path = btree_iter_path(trans, iter);
+
+ bch2_btree_node_unlock_write(trans, path, path->l[0].b);
+
+ trans->journal_res.seq = wb->journal_seq;
+
+ return bch2_trans_update(trans, iter, &wb->k,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_journal_res|
+ BCH_TRANS_COMMIT_journal_reclaim);
+}
+
+static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree_write_buffered_key *wb,
+ bool *write_locked, size_t *fast)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+ int ret;
+
+ EBUG_ON(!wb->journal_seq);
+ EBUG_ON(!c->btree_write_buffer.flushing.pin.seq);
+ EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
+
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ return ret;
+
+ /*
+ * We can't clone a path that has write locks: unshare it now, before
+ * set_pos and traverse():
+ */
+ if (btree_iter_path(trans, iter)->ref > 1)
+ iter->path = __bch2_btree_path_make_mut(trans, iter->path, true, _THIS_IP_);
+
+ path = btree_iter_path(trans, iter);
+
+ if (!*write_locked) {
+ ret = bch2_btree_node_lock_write(trans, path, &path->l[0].b->c);
+ if (ret)
+ return ret;
+
+ bch2_btree_node_prep_for_write(trans, path, path->l[0].b);
+ *write_locked = true;
+ }
+
+ if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) {
+ *write_locked = false;
+ return wb_flush_one_slowpath(trans, iter, wb);
+ }
+
+ bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq);
+ (*fast)++;
+ return 0;
+}
+
+/*
+ * Update a btree with a write buffered key using the journal seq of the
+ * original write buffer insert.
+ *
+ * It is not safe to rejournal the key once it has been inserted into the write
+ * buffer because that may break recovery ordering. For example, the key may
+ * have already been modified in the active write buffer in a seq that comes
+ * before the current transaction. If we were to journal this key again and
+ * crash, recovery would process updates in the wrong order.
+ */
+static int
+btree_write_buffered_insert(struct btree_trans *trans,
+ struct btree_write_buffered_key *wb)
+{
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k),
+ BTREE_ITER_CACHED|BTREE_ITER_INTENT);
+
+ trans->journal_res.seq = wb->journal_seq;
+
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, &wb->k,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static void move_keys_from_inc_to_flushing(struct btree_write_buffer *wb)
+{
+ struct bch_fs *c = container_of(wb, struct bch_fs, btree_write_buffer);
+ struct journal *j = &c->journal;
+
+ if (!wb->inc.keys.nr)
+ return;
+
+ bch2_journal_pin_add(j, wb->inc.keys.data[0].journal_seq, &wb->flushing.pin,
+ bch2_btree_write_buffer_journal_flush);
+
+ darray_resize(&wb->flushing.keys, min_t(size_t, 1U << 20, wb->flushing.keys.nr + wb->inc.keys.nr));
+ darray_resize(&wb->sorted, wb->flushing.keys.size);
+
+ if (!wb->flushing.keys.nr && wb->sorted.size >= wb->inc.keys.nr) {
+ swap(wb->flushing.keys, wb->inc.keys);
+ goto out;
+ }
+
+ size_t nr = min(darray_room(wb->flushing.keys),
+ wb->sorted.size - wb->flushing.keys.nr);
+ nr = min(nr, wb->inc.keys.nr);
+
+ memcpy(&darray_top(wb->flushing.keys),
+ wb->inc.keys.data,
+ sizeof(wb->inc.keys.data[0]) * nr);
+
+ memmove(wb->inc.keys.data,
+ wb->inc.keys.data + nr,
+ sizeof(wb->inc.keys.data[0]) * (wb->inc.keys.nr - nr));
+
+ wb->flushing.keys.nr += nr;
+ wb->inc.keys.nr -= nr;
+out:
+ if (!wb->inc.keys.nr)
+ bch2_journal_pin_drop(j, &wb->inc.pin);
+ else
+ bch2_journal_pin_update(j, wb->inc.keys.data[0].journal_seq, &wb->inc.pin,
+ bch2_btree_write_buffer_journal_flush);
+
+ if (j->watermark) {
+ spin_lock(&j->lock);
+ bch2_journal_set_watermark(j);
+ spin_unlock(&j->lock);
+ }
+
+ BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
+}
+
+static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct journal *j = &c->journal;
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ struct btree_iter iter = { NULL };
+ size_t skipped = 0, fast = 0, slowpath = 0;
+ bool write_locked = false;
+ int ret = 0;
+
+ bch2_trans_unlock(trans);
+ bch2_trans_begin(trans);
+
+ mutex_lock(&wb->inc.lock);
+ move_keys_from_inc_to_flushing(wb);
+ mutex_unlock(&wb->inc.lock);
+
+ for (size_t i = 0; i < wb->flushing.keys.nr; i++) {
+ wb->sorted.data[i].idx = i;
+ wb->sorted.data[i].btree = wb->flushing.keys.data[i].btree;
+ memcpy(&wb->sorted.data[i].pos, &wb->flushing.keys.data[i].k.k.p, sizeof(struct bpos));
+ }
+ wb->sorted.nr = wb->flushing.keys.nr;
+
+ /*
+ * We first sort so that we can detect and skip redundant updates, and
+ * then we attempt to flush in sorted btree order, as this is most
+ * efficient.
+ *
+ * However, since we're not flushing in the order they appear in the
+ * journal we won't be able to drop our journal pin until everything is
+ * flushed - which means this could deadlock the journal if we weren't
+ * passing BCH_TRANS_COMMIT_journal_reclaim. This causes the update to fail
+ * if it would block taking a journal reservation.
+ *
+ * If that happens, simply skip the key so we can optimistically insert
+ * as many keys as possible in the fast path.
+ */
+ wb_sort(wb->sorted.data, wb->sorted.nr);
+
+ darray_for_each(wb->sorted, i) {
+ struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
+
+ for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
+ prefetch(&wb->flushing.keys.data[n->idx]);
+
+ BUG_ON(!k->journal_seq);
+
+ if (i + 1 < &darray_top(wb->sorted) &&
+ wb_key_eq(i, i + 1)) {
+ struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
+
+ skipped++;
+ n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
+ k->journal_seq = 0;
+ continue;
+ }
+
+ if (write_locked) {
+ struct btree_path *path = btree_iter_path(trans, &iter);
+
+ if (path->btree_id != i->btree ||
+ bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
+ bch2_btree_node_unlock_write(trans, path, path->l[0].b);
+ write_locked = false;
+ }
+ }
+
+ if (!iter.path || iter.btree_id != k->btree) {
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_iter_init(trans, &iter, k->btree, k->k.k.p,
+ BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS);
+ }
+
+ bch2_btree_iter_set_pos(&iter, k->k.k.p);
+ btree_iter_path(trans, &iter)->preserve = false;
+
+ do {
+ if (race_fault()) {
+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ break;
+ }
+
+ ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
+ if (!write_locked)
+ bch2_trans_begin(trans);
+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
+
+ if (!ret) {
+ k->journal_seq = 0;
+ } else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) {
+ slowpath++;
+ ret = 0;
+ } else
+ break;
+ }
+
+ if (write_locked) {
+ struct btree_path *path = btree_iter_path(trans, &iter);
+ bch2_btree_node_unlock_write(trans, path, path->l[0].b);
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ goto err;
+
+ if (slowpath) {
+ /*
+ * Flush in the order they were present in the journal, so that
+ * we can release journal pins:
+ * The fastpath zapped the seq of keys that were successfully flushed so
+ * we can skip those here.
+ */
+ trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
+
+ darray_for_each(wb->flushing.keys, i) {
+ if (!i->journal_seq)
+ continue;
+
+ bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
+ bch2_btree_write_buffer_journal_flush);
+
+ bch2_trans_begin(trans);
+
+ ret = commit_do(trans, NULL, NULL,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_no_journal_res|
+ BCH_TRANS_COMMIT_journal_reclaim,
+ btree_write_buffered_insert(trans, i));
+ if (ret)
+ goto err;
+ }
+ }
+err:
+ bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
+ trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
+ bch2_journal_pin_drop(j, &wb->flushing.pin);
+ wb->flushing.keys.nr = 0;
+ return ret;
+}
+
+static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
+{
+ struct journal *j = &c->journal;
+ struct journal_buf *buf;
+ int ret = 0;
+
+ while (!ret && (buf = bch2_next_write_buffer_flush_journal_buf(j, seq))) {
+ ret = bch2_journal_keys_to_write_buffer(c, buf);
+ mutex_unlock(&j->buf_lock);
+ }
+
+ return ret;
+}
+
+static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ int ret = 0, fetch_from_journal_err;
+
+ do {
+ bch2_trans_unlock(trans);
+
+ fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
+
+ /*
+ * On memory allocation failure, bch2_btree_write_buffer_flush_locked()
+ * is not guaranteed to empty wb->inc:
+ */
+ mutex_lock(&wb->flushing.lock);
+ ret = bch2_btree_write_buffer_flush_locked(trans);
+ mutex_unlock(&wb->flushing.lock);
+ } while (!ret &&
+ (fetch_from_journal_err ||
+ (wb->inc.pin.seq && wb->inc.pin.seq <= seq) ||
+ (wb->flushing.pin.seq && wb->flushing.pin.seq <= seq)));
+
+ return ret;
+}
+
+static int bch2_btree_write_buffer_journal_flush(struct journal *j,
+ struct journal_entry_pin *_pin, u64 seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq));
+}
+
+int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+
+ trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_);
+
+ return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal));
+}
+
+int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ int ret = 0;
+
+ if (mutex_trylock(&wb->flushing.lock)) {
+ ret = bch2_btree_write_buffer_flush_locked(trans);
+ mutex_unlock(&wb->flushing.lock);
+ }
+
+ return ret;
+}
+
+int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
+ return -BCH_ERR_erofs_no_writes;
+
+ int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+ return ret;
+}
+
+static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ int ret;
+
+ mutex_lock(&wb->flushing.lock);
+ do {
+ ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans));
+ } while (!ret && bch2_btree_write_buffer_should_flush(c));
+ mutex_unlock(&wb->flushing.lock);
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+}
+
+int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
+ struct journal_keys_to_wb *dst,
+ enum btree_id btree, struct bkey_i *k)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ int ret;
+retry:
+ ret = darray_make_room_gfp(&dst->wb->keys, 1, GFP_KERNEL);
+ if (!ret && dst->wb == &wb->flushing)
+ ret = darray_resize(&wb->sorted, wb->flushing.keys.size);
+
+ if (unlikely(ret)) {
+ if (dst->wb == &c->btree_write_buffer.flushing) {
+ mutex_unlock(&dst->wb->lock);
+ dst->wb = &c->btree_write_buffer.inc;
+ bch2_journal_pin_add(&c->journal, dst->seq, &dst->wb->pin,
+ bch2_btree_write_buffer_journal_flush);
+ goto retry;
+ }
+
+ return ret;
+ }
+
+ dst->room = darray_room(dst->wb->keys);
+ if (dst->wb == &wb->flushing)
+ dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
+ BUG_ON(!dst->room);
+ BUG_ON(!dst->seq);
+
+ struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
+ wb_k->journal_seq = dst->seq;
+ wb_k->btree = btree;
+ bkey_copy(&wb_k->k, k);
+ dst->wb->keys.nr++;
+ dst->room--;
+ return 0;
+}
+
+void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_keys_to_wb *dst, u64 seq)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ if (mutex_trylock(&wb->flushing.lock)) {
+ mutex_lock(&wb->inc.lock);
+ move_keys_from_inc_to_flushing(wb);
+
+ /*
+ * Attempt to skip wb->inc, and add keys directly to
+ * wb->flushing, saving us a copy later:
+ */
+
+ if (!wb->inc.keys.nr) {
+ dst->wb = &wb->flushing;
+ } else {
+ mutex_unlock(&wb->flushing.lock);
+ dst->wb = &wb->inc;
+ }
+ } else {
+ mutex_lock(&wb->inc.lock);
+ dst->wb = &wb->inc;
+ }
+
+ dst->room = darray_room(dst->wb->keys);
+ if (dst->wb == &wb->flushing)
+ dst->room = min(dst->room, wb->sorted.size - wb->flushing.keys.nr);
+ dst->seq = seq;
+
+ bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin,
+ bch2_btree_write_buffer_journal_flush);
+}
+
+void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ if (!dst->wb->keys.nr)
+ bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
+
+ if (bch2_btree_write_buffer_should_flush(c) &&
+ __bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
+ !queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
+
+ if (dst->wb == &wb->flushing)
+ mutex_unlock(&wb->flushing.lock);
+ mutex_unlock(&wb->inc.lock);
+}
+
+static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
+{
+ struct journal_keys_to_wb dst;
+ struct jset_entry *entry;
+ struct bkey_i *k;
+ int ret = 0;
+
+ bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq));
+
+ for_each_jset_entry_type(entry, buf->data, BCH_JSET_ENTRY_write_buffer_keys) {
+ jset_entry_for_each_key(entry, k) {
+ ret = bch2_journal_key_to_wb(c, &dst, entry->btree_id, k);
+ if (ret)
+ goto out;
+ }
+
+ entry->type = BCH_JSET_ENTRY_btree_keys;
+ }
+
+ buf->need_flush_to_write_buffer = false;
+out:
+ bch2_journal_keys_to_write_buffer_end(c, &dst);
+ return ret;
+}
+
+static int wb_keys_resize(struct btree_write_buffer_keys *wb, size_t new_size)
+{
+ if (wb->keys.size >= new_size)
+ return 0;
+
+ if (!mutex_trylock(&wb->lock))
+ return -EINTR;
+
+ int ret = darray_resize(&wb->keys, new_size);
+ mutex_unlock(&wb->lock);
+ return ret;
+}
+
+int bch2_btree_write_buffer_resize(struct bch_fs *c, size_t new_size)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ return wb_keys_resize(&wb->flushing, new_size) ?:
+ wb_keys_resize(&wb->inc, new_size);
+}
+
+void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
+ !bch2_journal_error(&c->journal));
+
+ darray_exit(&wb->sorted);
+ darray_exit(&wb->flushing.keys);
+ darray_exit(&wb->inc.keys);
+}
+
+int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ mutex_init(&wb->inc.lock);
+ mutex_init(&wb->flushing.lock);
+ INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
+
+ /* Will be resized by journal as needed: */
+ unsigned initial_size = 1 << 16;
+
+ return darray_make_room(&wb->inc.keys, initial_size) ?:
+ darray_make_room(&wb->flushing.keys, initial_size) ?:
+ darray_make_room(&wb->sorted, initial_size);
+}
diff --git a/c_src/libbcachefs/btree_write_buffer.h b/c_src/libbcachefs/btree_write_buffer.h
new file mode 100644
index 00000000..eebcd2b1
--- /dev/null
+++ b/c_src/libbcachefs/btree_write_buffer.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H
+#define _BCACHEFS_BTREE_WRITE_BUFFER_H
+
+#include "bkey.h"
+
+static inline bool bch2_btree_write_buffer_should_flush(struct bch_fs *c)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ return wb->inc.keys.nr + wb->flushing.keys.nr > wb->inc.keys.size / 4;
+}
+
+static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+
+ return wb->inc.keys.nr > wb->inc.keys.size * 3 / 4;
+}
+
+struct btree_trans;
+int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
+int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
+int bch2_btree_write_buffer_tryflush(struct btree_trans *);
+
+struct journal_keys_to_wb {
+ struct btree_write_buffer_keys *wb;
+ size_t room;
+ u64 seq;
+};
+
+int bch2_journal_key_to_wb_slowpath(struct bch_fs *,
+ struct journal_keys_to_wb *,
+ enum btree_id, struct bkey_i *);
+
+static inline int bch2_journal_key_to_wb(struct bch_fs *c,
+ struct journal_keys_to_wb *dst,
+ enum btree_id btree, struct bkey_i *k)
+{
+ EBUG_ON(!dst->seq);
+
+ if (unlikely(!dst->room))
+ return bch2_journal_key_to_wb_slowpath(c, dst, btree, k);
+
+ struct btree_write_buffered_key *wb_k = &darray_top(dst->wb->keys);
+ wb_k->journal_seq = dst->seq;
+ wb_k->btree = btree;
+ bkey_copy(&wb_k->k, k);
+ dst->wb->keys.nr++;
+ dst->room--;
+ return 0;
+}
+
+void bch2_journal_keys_to_write_buffer_start(struct bch_fs *, struct journal_keys_to_wb *, u64);
+void bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_to_wb *);
+
+int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
+void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
+int bch2_fs_btree_write_buffer_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */
diff --git a/c_src/libbcachefs/btree_write_buffer_types.h b/c_src/libbcachefs/btree_write_buffer_types.h
new file mode 100644
index 00000000..9b9433de
--- /dev/null
+++ b/c_src/libbcachefs/btree_write_buffer_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
+#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H
+
+#include "darray.h"
+#include "journal_types.h"
+
+#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4
+#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX)
+
+struct wb_key_ref {
+union {
+ struct {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ unsigned idx:24;
+ u8 pos[sizeof(struct bpos)];
+ enum btree_id btree:8;
+#else
+ enum btree_id btree:8;
+ u8 pos[sizeof(struct bpos)];
+ unsigned idx:24;
+#endif
+ } __packed;
+ struct {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ u64 lo;
+ u64 mi;
+ u64 hi;
+#else
+ u64 hi;
+ u64 mi;
+ u64 lo;
+#endif
+ };
+};
+};
+
+struct btree_write_buffered_key {
+ enum btree_id btree:8;
+ u64 journal_seq:56;
+ __BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX);
+};
+
+struct btree_write_buffer_keys {
+ DARRAY(struct btree_write_buffered_key) keys;
+ struct journal_entry_pin pin;
+ struct mutex lock;
+};
+
+struct btree_write_buffer {
+ DARRAY(struct wb_key_ref) sorted;
+ struct btree_write_buffer_keys inc;
+ struct btree_write_buffer_keys flushing;
+ struct work_struct flush_work;
+};
+
+#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */
diff --git a/c_src/libbcachefs/buckets.c b/c_src/libbcachefs/buckets.c
new file mode 100644
index 00000000..67b7e796
--- /dev/null
+++ b/c_src/libbcachefs/buckets.c
@@ -0,0 +1,1413 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for manipulating bucket marks for garbage collection.
+ *
+ * Copyright 2014 Datera, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "bset.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "buckets_waiting_for_journal.h"
+#include "ec.h"
+#include "error.h"
+#include "inode.h"
+#include "movinggc.h"
+#include "recovery.h"
+#include "reflink.h"
+#include "replicas.h"
+#include "subvolume.h"
+#include "trace.h"
+
+#include <linux/preempt.h>
+
+static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage,
+ enum bch_data_type data_type,
+ s64 sectors)
+{
+ switch (data_type) {
+ case BCH_DATA_btree:
+ fs_usage->btree += sectors;
+ break;
+ case BCH_DATA_user:
+ case BCH_DATA_parity:
+ fs_usage->data += sectors;
+ break;
+ case BCH_DATA_cached:
+ fs_usage->cached += sectors;
+ break;
+ default:
+ break;
+ }
+}
+
+void bch2_fs_usage_initialize(struct bch_fs *c)
+{
+ percpu_down_write(&c->mark_lock);
+ struct bch_fs_usage *usage = c->usage_base;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(c->usage); i++)
+ bch2_fs_usage_acc_to_base(c, i);
+
+ for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++)
+ usage->reserved += usage->persistent_reserved[i];
+
+ for (unsigned i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]);
+ }
+
+ for_each_member_device(c, ca) {
+ struct bch_dev_usage dev = bch2_dev_usage_read(ca);
+
+ usage->hidden += (dev.d[BCH_DATA_sb].buckets +
+ dev.d[BCH_DATA_journal].buckets) *
+ ca->mi.bucket_size;
+ }
+
+ percpu_up_write(&c->mark_lock);
+}
+
+static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
+ unsigned journal_seq,
+ bool gc)
+{
+ BUG_ON(!gc && !journal_seq);
+
+ return this_cpu_ptr(gc
+ ? ca->usage_gc
+ : ca->usage[journal_seq & JOURNAL_BUF_MASK]);
+}
+
+void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
+{
+ struct bch_fs *c = ca->fs;
+ unsigned seq, i, u64s = dev_usage_u64s();
+
+ do {
+ seq = read_seqcount_begin(&c->usage_lock);
+ memcpy(usage, ca->usage_base, u64s * sizeof(u64));
+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
+ acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s);
+ } while (read_seqcount_retry(&c->usage_lock, seq));
+}
+
+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v)
+{
+ ssize_t offset = v - (u64 *) c->usage_base;
+ unsigned i, seq;
+ u64 ret;
+
+ BUG_ON(offset < 0 || offset >= fs_usage_u64s(c));
+ percpu_rwsem_assert_held(&c->mark_lock);
+
+ do {
+ seq = read_seqcount_begin(&c->usage_lock);
+ ret = *v;
+
+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+ ret += percpu_u64_get((u64 __percpu *) c->usage[i] + offset);
+ } while (read_seqcount_retry(&c->usage_lock, seq));
+
+ return ret;
+}
+
+struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c)
+{
+ struct bch_fs_usage_online *ret;
+ unsigned nr_replicas = READ_ONCE(c->replicas.nr);
+ unsigned seq, i;
+retry:
+ ret = kmalloc(__fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_KERNEL);
+ if (unlikely(!ret))
+ return NULL;
+
+ percpu_down_read(&c->mark_lock);
+
+ if (nr_replicas != c->replicas.nr) {
+ nr_replicas = c->replicas.nr;
+ percpu_up_read(&c->mark_lock);
+ kfree(ret);
+ goto retry;
+ }
+
+ ret->online_reserved = percpu_u64_get(c->online_reserved);
+
+ do {
+ seq = read_seqcount_begin(&c->usage_lock);
+ unsafe_memcpy(&ret->u, c->usage_base,
+ __fs_usage_u64s(nr_replicas) * sizeof(u64),
+ "embedded variable length struct");
+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+ acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i],
+ __fs_usage_u64s(nr_replicas));
+ } while (read_seqcount_retry(&c->usage_lock, seq));
+
+ return ret;
+}
+
+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
+{
+ unsigned u64s = fs_usage_u64s(c);
+
+ BUG_ON(idx >= ARRAY_SIZE(c->usage));
+
+ preempt_disable();
+ write_seqcount_begin(&c->usage_lock);
+
+ acc_u64s_percpu((u64 *) c->usage_base,
+ (u64 __percpu *) c->usage[idx], u64s);
+ percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL) {
+ u64s = dev_usage_u64s();
+
+ acc_u64s_percpu((u64 *) ca->usage_base,
+ (u64 __percpu *) ca->usage[idx], u64s);
+ percpu_memset(ca->usage[idx], 0, u64s * sizeof(u64));
+ }
+ rcu_read_unlock();
+
+ write_seqcount_end(&c->usage_lock);
+ preempt_enable();
+}
+
+void bch2_fs_usage_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_fs_usage_online *fs_usage)
+{
+ unsigned i;
+
+ prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
+
+ prt_printf(out, "hidden:\t\t\t\t%llu\n",
+ fs_usage->u.hidden);
+ prt_printf(out, "data:\t\t\t\t%llu\n",
+ fs_usage->u.data);
+ prt_printf(out, "cached:\t\t\t\t%llu\n",
+ fs_usage->u.cached);
+ prt_printf(out, "reserved:\t\t\t%llu\n",
+ fs_usage->u.reserved);
+ prt_printf(out, "nr_inodes:\t\t\t%llu\n",
+ fs_usage->u.nr_inodes);
+ prt_printf(out, "online reserved:\t\t%llu\n",
+ fs_usage->online_reserved);
+
+ for (i = 0;
+ i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
+ i++) {
+ prt_printf(out, "%u replicas:\n", i + 1);
+ prt_printf(out, "\treserved:\t\t%llu\n",
+ fs_usage->u.persistent_reserved[i]);
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ prt_printf(out, "\t");
+ bch2_replicas_entry_to_text(out, e);
+ prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
+ }
+}
+
+static u64 reserve_factor(u64 r)
+{
+ return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
+}
+
+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage)
+{
+ return min(fs_usage->u.hidden +
+ fs_usage->u.btree +
+ fs_usage->u.data +
+ reserve_factor(fs_usage->u.reserved +
+ fs_usage->online_reserved),
+ c->capacity);
+}
+
+static struct bch_fs_usage_short
+__bch2_fs_usage_read_short(struct bch_fs *c)
+{
+ struct bch_fs_usage_short ret;
+ u64 data, reserved;
+
+ ret.capacity = c->capacity -
+ bch2_fs_usage_read_one(c, &c->usage_base->hidden);
+
+ data = bch2_fs_usage_read_one(c, &c->usage_base->data) +
+ bch2_fs_usage_read_one(c, &c->usage_base->btree);
+ reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
+ percpu_u64_get(c->online_reserved);
+
+ ret.used = min(ret.capacity, data + reserve_factor(reserved));
+ ret.free = ret.capacity - ret.used;
+
+ ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
+
+ return ret;
+}
+
+struct bch_fs_usage_short
+bch2_fs_usage_read_short(struct bch_fs *c)
+{
+ struct bch_fs_usage_short ret;
+
+ percpu_down_read(&c->mark_lock);
+ ret = __bch2_fs_usage_read_short(c);
+ percpu_up_read(&c->mark_lock);
+
+ return ret;
+}
+
+void bch2_dev_usage_init(struct bch_dev *ca)
+{
+ ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
+}
+
+void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
+{
+ prt_tab(out);
+ prt_str(out, "buckets");
+ prt_tab_rjust(out);
+ prt_str(out, "sectors");
+ prt_tab_rjust(out);
+ prt_str(out, "fragmented");
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (unsigned i = 0; i < BCH_DATA_NR; i++) {
+ prt_str(out, bch2_data_types[i]);
+ prt_tab(out);
+ prt_u64(out, usage->d[i].buckets);
+ prt_tab_rjust(out);
+ prt_u64(out, usage->d[i].sectors);
+ prt_tab_rjust(out);
+ prt_u64(out, usage->d[i].fragmented);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+}
+
+void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
+ const struct bch_alloc_v4 *old,
+ const struct bch_alloc_v4 *new,
+ u64 journal_seq, bool gc)
+{
+ struct bch_fs_usage *fs_usage;
+ struct bch_dev_usage *u;
+
+ preempt_disable();
+ fs_usage = fs_usage_ptr(c, journal_seq, gc);
+
+ if (data_type_is_hidden(old->data_type))
+ fs_usage->hidden -= ca->mi.bucket_size;
+ if (data_type_is_hidden(new->data_type))
+ fs_usage->hidden += ca->mi.bucket_size;
+
+ u = dev_usage_ptr(ca, journal_seq, gc);
+
+ u->d[old->data_type].buckets--;
+ u->d[new->data_type].buckets++;
+
+ u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old);
+ u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new);
+
+ u->d[BCH_DATA_cached].sectors += new->cached_sectors;
+ u->d[BCH_DATA_cached].sectors -= old->cached_sectors;
+
+ u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old);
+ u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new);
+
+ preempt_enable();
+}
+
+static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
+{
+ return (struct bch_alloc_v4) {
+ .gen = b.gen,
+ .data_type = b.data_type,
+ .dirty_sectors = b.dirty_sectors,
+ .cached_sectors = b.cached_sectors,
+ .stripe = b.stripe,
+ };
+}
+
+void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *old, struct bucket *new)
+{
+ struct bch_alloc_v4 old_a = bucket_m_to_alloc(*old);
+ struct bch_alloc_v4 new_a = bucket_m_to_alloc(*new);
+
+ bch2_dev_usage_update(c, ca, &old_a, &new_a, 0, true);
+}
+
+static inline int __update_replicas(struct bch_fs *c,
+ struct bch_fs_usage *fs_usage,
+ struct bch_replicas_entry_v1 *r,
+ s64 sectors)
+{
+ int idx = bch2_replicas_entry_idx(c, r);
+
+ if (idx < 0)
+ return -1;
+
+ fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
+ fs_usage->replicas[idx] += sectors;
+ return 0;
+}
+
+int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_replicas_entry_v1 *r, s64 sectors,
+ unsigned journal_seq, bool gc)
+{
+ struct bch_fs_usage *fs_usage;
+ int idx, ret = 0;
+ struct printbuf buf = PRINTBUF;
+
+ percpu_down_read(&c->mark_lock);
+
+ idx = bch2_replicas_entry_idx(c, r);
+ if (idx < 0 &&
+ fsck_err(c, ptr_to_missing_replicas_entry,
+ "no replicas entry\n while marking %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ percpu_up_read(&c->mark_lock);
+ ret = bch2_mark_replicas(c, r);
+ percpu_down_read(&c->mark_lock);
+
+ if (ret)
+ goto err;
+ idx = bch2_replicas_entry_idx(c, r);
+ }
+ if (idx < 0) {
+ ret = -1;
+ goto err;
+ }
+
+ preempt_disable();
+ fs_usage = fs_usage_ptr(c, journal_seq, gc);
+ fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
+ fs_usage->replicas[idx] += sectors;
+ preempt_enable();
+err:
+fsck_err:
+ percpu_up_read(&c->mark_lock);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static inline int update_cached_sectors(struct bch_fs *c,
+ struct bkey_s_c k,
+ unsigned dev, s64 sectors,
+ unsigned journal_seq, bool gc)
+{
+ struct bch_replicas_padded r;
+
+ bch2_replicas_entry_cached(&r.e, dev);
+
+ return bch2_update_replicas(c, k, &r.e, sectors, journal_seq, gc);
+}
+
+static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more,
+ gfp_t gfp)
+{
+ struct replicas_delta_list *d = trans->fs_usage_deltas;
+ unsigned new_size = d ? (d->size + more) * 2 : 128;
+ unsigned alloc_size = sizeof(*d) + new_size;
+
+ WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX);
+
+ if (!d || d->used + more > d->size) {
+ d = krealloc(d, alloc_size, gfp|__GFP_ZERO);
+
+ if (unlikely(!d)) {
+ if (alloc_size > REPLICAS_DELTA_LIST_MAX)
+ return -ENOMEM;
+
+ d = mempool_alloc(&trans->c->replicas_delta_pool, gfp);
+ if (!d)
+ return -ENOMEM;
+
+ memset(d, 0, REPLICAS_DELTA_LIST_MAX);
+
+ if (trans->fs_usage_deltas)
+ memcpy(d, trans->fs_usage_deltas,
+ trans->fs_usage_deltas->size + sizeof(*d));
+
+ new_size = REPLICAS_DELTA_LIST_MAX - sizeof(*d);
+ kfree(trans->fs_usage_deltas);
+ }
+
+ d->size = new_size;
+ trans->fs_usage_deltas = d;
+ }
+
+ return 0;
+}
+
+int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more)
+{
+ return allocate_dropping_locks_errcode(trans,
+ __replicas_deltas_realloc(trans, more, _gfp));
+}
+
+int bch2_update_replicas_list(struct btree_trans *trans,
+ struct bch_replicas_entry_v1 *r,
+ s64 sectors)
+{
+ struct replicas_delta_list *d;
+ struct replicas_delta *n;
+ unsigned b;
+ int ret;
+
+ if (!sectors)
+ return 0;
+
+ b = replicas_entry_bytes(r) + 8;
+ ret = bch2_replicas_deltas_realloc(trans, b);
+ if (ret)
+ return ret;
+
+ d = trans->fs_usage_deltas;
+ n = (void *) d->d + d->used;
+ n->delta = sectors;
+ unsafe_memcpy((void *) n + offsetof(struct replicas_delta, r),
+ r, replicas_entry_bytes(r),
+ "flexible array member embedded in strcuct with padding");
+ bch2_replicas_entry_sort(&n->r);
+ d->used += b;
+ return 0;
+}
+
+int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 sectors)
+{
+ struct bch_replicas_padded r;
+
+ bch2_replicas_entry_cached(&r.e, dev);
+
+ return bch2_update_replicas_list(trans, &r.e, sectors);
+}
+
+int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, enum bch_data_type data_type,
+ unsigned sectors, struct gc_pos pos,
+ unsigned flags)
+{
+ struct bucket old, new, *g;
+ int ret = 0;
+
+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
+ BUG_ON(data_type != BCH_DATA_sb &&
+ data_type != BCH_DATA_journal);
+
+ /*
+ * Backup superblock might be past the end of our normal usable space:
+ */
+ if (b >= ca->mi.nbuckets)
+ return 0;
+
+ percpu_down_read(&c->mark_lock);
+ g = gc_bucket(ca, b);
+
+ bucket_lock(g);
+ old = *g;
+
+ if (bch2_fs_inconsistent_on(g->data_type &&
+ g->data_type != data_type, c,
+ "different types of data in same bucket: %s, %s",
+ bch2_data_types[g->data_type],
+ bch2_data_types[data_type])) {
+ ret = -EIO;
+ goto err;
+ }
+
+ if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
+ ca->dev_idx, b, g->gen,
+ bch2_data_types[g->data_type ?: data_type],
+ g->dirty_sectors, sectors)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ g->data_type = data_type;
+ g->dirty_sectors += sectors;
+ new = *g;
+err:
+ bucket_unlock(g);
+ if (!ret)
+ bch2_dev_usage_update_m(c, ca, &old, &new);
+ percpu_up_read(&c->mark_lock);
+ return ret;
+}
+
+int bch2_check_bucket_ref(struct btree_trans *trans,
+ struct bkey_s_c k,
+ const struct bch_extent_ptr *ptr,
+ s64 sectors, enum bch_data_type ptr_data_type,
+ u8 b_gen, u8 bucket_data_type,
+ u32 bucket_sectors)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ if (bucket_data_type == BCH_DATA_cached)
+ bucket_data_type = BCH_DATA_user;
+
+ if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) ||
+ (bucket_data_type == BCH_DATA_user && ptr_data_type == BCH_DATA_stripe))
+ bucket_data_type = ptr_data_type = BCH_DATA_stripe;
+
+ if (gen_after(ptr->gen, b_gen)) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen,
+ "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
+ "while marking %s",
+ ptr->dev, bucket_nr, b_gen,
+ bch2_data_types[bucket_data_type ?: ptr_data_type],
+ ptr->gen,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ goto err;
+ }
+
+ if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ BCH_FSCK_ERR_ptr_too_stale,
+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
+ "while marking %s",
+ ptr->dev, bucket_nr, b_gen,
+ bch2_data_types[bucket_data_type ?: ptr_data_type],
+ ptr->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ goto err;
+ }
+
+ if (b_gen != ptr->gen && !ptr->cached) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ BCH_FSCK_ERR_stale_dirty_ptr,
+ "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
+ "while marking %s",
+ ptr->dev, bucket_nr, b_gen,
+ *bucket_gen(ca, bucket_nr),
+ bch2_data_types[bucket_data_type ?: ptr_data_type],
+ ptr->gen,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ goto err;
+ }
+
+ if (b_gen != ptr->gen) {
+ ret = 1;
+ goto out;
+ }
+
+ if (!data_type_is_empty(bucket_data_type) &&
+ ptr_data_type &&
+ bucket_data_type != ptr_data_type) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ BCH_FSCK_ERR_ptr_bucket_data_type_mismatch,
+ "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
+ "while marking %s",
+ ptr->dev, bucket_nr, b_gen,
+ bch2_data_types[bucket_data_type],
+ bch2_data_types[ptr_data_type],
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ goto err;
+ }
+
+ if ((u64) bucket_sectors + sectors > U32_MAX) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ BCH_FSCK_ERR_bucket_sector_count_overflow,
+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
+ "while marking %s",
+ ptr->dev, bucket_nr, b_gen,
+ bch2_data_types[bucket_data_type ?: ptr_data_type],
+ bucket_sectors, sectors,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ goto err;
+ }
+out:
+ printbuf_exit(&buf);
+ return ret;
+err:
+ bch2_dump_trans_updates(trans);
+ goto out;
+}
+
+void bch2_trans_fs_usage_revert(struct btree_trans *trans,
+ struct replicas_delta_list *deltas)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_fs_usage *dst;
+ struct replicas_delta *d, *top = (void *) deltas->d + deltas->used;
+ s64 added = 0;
+ unsigned i;
+
+ percpu_down_read(&c->mark_lock);
+ preempt_disable();
+ dst = fs_usage_ptr(c, trans->journal_res.seq, false);
+
+ /* revert changes: */
+ for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
+ switch (d->r.data_type) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ case BCH_DATA_parity:
+ added += d->delta;
+ }
+ BUG_ON(__update_replicas(c, dst, &d->r, -d->delta));
+ }
+
+ dst->nr_inodes -= deltas->nr_inodes;
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+ added -= deltas->persistent_reserved[i];
+ dst->reserved -= deltas->persistent_reserved[i];
+ dst->persistent_reserved[i] -= deltas->persistent_reserved[i];
+ }
+
+ if (added > 0) {
+ trans->disk_res->sectors += added;
+ this_cpu_add(*c->online_reserved, added);
+ }
+
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+}
+
+int bch2_trans_fs_usage_apply(struct btree_trans *trans,
+ struct replicas_delta_list *deltas)
+{
+ struct bch_fs *c = trans->c;
+ static int warned_disk_usage = 0;
+ bool warn = false;
+ u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
+ struct replicas_delta *d, *d2;
+ struct replicas_delta *top = (void *) deltas->d + deltas->used;
+ struct bch_fs_usage *dst;
+ s64 added = 0, should_not_have_added;
+ unsigned i;
+
+ percpu_down_read(&c->mark_lock);
+ preempt_disable();
+ dst = fs_usage_ptr(c, trans->journal_res.seq, false);
+
+ for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
+ switch (d->r.data_type) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ case BCH_DATA_parity:
+ added += d->delta;
+ }
+
+ if (__update_replicas(c, dst, &d->r, d->delta))
+ goto need_mark;
+ }
+
+ dst->nr_inodes += deltas->nr_inodes;
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+ added += deltas->persistent_reserved[i];
+ dst->reserved += deltas->persistent_reserved[i];
+ dst->persistent_reserved[i] += deltas->persistent_reserved[i];
+ }
+
+ /*
+ * Not allowed to reduce sectors_available except by getting a
+ * reservation:
+ */
+ should_not_have_added = added - (s64) disk_res_sectors;
+ if (unlikely(should_not_have_added > 0)) {
+ u64 old, new, v = atomic64_read(&c->sectors_available);
+
+ do {
+ old = v;
+ new = max_t(s64, 0, old - should_not_have_added);
+ } while ((v = atomic64_cmpxchg(&c->sectors_available,
+ old, new)) != old);
+
+ added -= should_not_have_added;
+ warn = true;
+ }
+
+ if (added > 0) {
+ trans->disk_res->sectors -= added;
+ this_cpu_sub(*c->online_reserved, added);
+ }
+
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+
+ if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
+ bch2_trans_inconsistent(trans,
+ "disk usage increased %lli more than %llu sectors reserved)",
+ should_not_have_added, disk_res_sectors);
+ return 0;
+need_mark:
+ /* revert changes: */
+ for (d2 = deltas->d; d2 != d; d2 = replicas_delta_next(d2))
+ BUG_ON(__update_replicas(c, dst, &d2->r, -d2->delta));
+
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+ return -1;
+}
+
+/* KEY_TYPE_extent: */
+
+static int __mark_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ const struct bch_extent_ptr *ptr,
+ s64 sectors, enum bch_data_type ptr_data_type,
+ u8 bucket_gen, u8 *bucket_data_type,
+ u32 *dirty_sectors, u32 *cached_sectors)
+{
+ u32 *dst_sectors = !ptr->cached
+ ? dirty_sectors
+ : cached_sectors;
+ int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
+ bucket_gen, *bucket_data_type, *dst_sectors);
+
+ if (ret)
+ return ret;
+
+ *dst_sectors += sectors;
+
+ if (!*dirty_sectors && !*cached_sectors)
+ *bucket_data_type = 0;
+ else if (*bucket_data_type != BCH_DATA_stripe)
+ *bucket_data_type = ptr_data_type;
+
+ return 0;
+}
+
+static int bch2_trigger_pointer(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p,
+ s64 *sectors,
+ unsigned flags)
+{
+ bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
+ struct bpos bucket;
+ struct bch_backpointer bp;
+
+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
+ *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct btree_iter iter;
+ struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &iter, bucket);
+ int ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
+
+ ret = __mark_pointer(trans, k, &p.ptr, *sectors, bp.data_type,
+ a->v.gen, &a->v.data_type,
+ &a->v.dirty_sectors, &a->v.cached_sectors) ?:
+ bch2_trans_update(trans, &iter, &a->k_i, 0);
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ return ret;
+
+ if (!p.ptr.cached) {
+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+
+ percpu_down_read(&c->mark_lock);
+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ bucket_lock(g);
+ struct bucket old = *g;
+
+ u8 bucket_data_type = g->data_type;
+ int ret = __mark_pointer(trans, k, &p.ptr, *sectors,
+ data_type, g->gen,
+ &bucket_data_type,
+ &g->dirty_sectors,
+ &g->cached_sectors);
+ if (ret) {
+ bucket_unlock(g);
+ percpu_up_read(&c->mark_lock);
+ return ret;
+ }
+
+ g->data_type = bucket_data_type;
+ struct bucket new = *g;
+ bucket_unlock(g);
+ bch2_dev_usage_update_m(c, ca, &old, &new);
+ percpu_up_read(&c->mark_lock);
+ }
+
+ return 0;
+}
+
+static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct extent_ptr_decoded p,
+ enum bch_data_type data_type,
+ s64 sectors, unsigned flags)
+{
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct btree_iter iter;
+ struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_stripes, POS(0, p.ec.idx),
+ BTREE_ITER_WITH_UPDATES, stripe);
+ int ret = PTR_ERR_OR_ZERO(s);
+ if (unlikely(ret)) {
+ bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
+ "pointer to nonexistent stripe %llu",
+ (u64) p.ec.idx);
+ goto err;
+ }
+
+ if (!bch2_ptr_matches_stripe(&s->v, p)) {
+ bch2_trans_inconsistent(trans,
+ "stripe pointer doesn't match stripe %llu",
+ (u64) p.ec.idx);
+ ret = -EIO;
+ goto err;
+ }
+
+ stripe_blockcount_set(&s->v, p.ec.block,
+ stripe_blockcount_get(&s->v, p.ec.block) +
+ sectors);
+
+ struct bch_replicas_padded r;
+ bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
+ r.e.data_type = data_type;
+ ret = bch2_update_replicas_list(trans, &r.e, sectors);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ struct bch_fs *c = trans->c;
+
+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+ struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
+ if (!m) {
+ bch_err(c, "error allocating memory for gc_stripes, idx %llu",
+ (u64) p.ec.idx);
+ return -BCH_ERR_ENOMEM_mark_stripe_ptr;
+ }
+
+ mutex_lock(&c->ec_stripes_heap_lock);
+
+ if (!m || !m->alive) {
+ mutex_unlock(&c->ec_stripes_heap_lock);
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
+ (u64) p.ec.idx, buf.buf);
+ printbuf_exit(&buf);
+ bch2_inconsistent_error(c);
+ return -EIO;
+ }
+
+ m->block_sectors[p.ec.block] += sectors;
+
+ struct bch_replicas_padded r = m->r;
+ mutex_unlock(&c->ec_stripes_heap_lock);
+
+ r.e.data_type = data_type;
+ bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true);
+ }
+
+ return 0;
+}
+
+static int __trigger_extent(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, unsigned flags)
+{
+ bool gc = flags & BTREE_TRIGGER_GC;
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bch_replicas_padded r;
+ enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
+ ? BCH_DATA_btree
+ : BCH_DATA_user;
+ s64 dirty_sectors = 0;
+ int ret = 0;
+
+ r.e.data_type = data_type;
+ r.e.nr_devs = 0;
+ r.e.nr_required = 1;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ s64 disk_sectors;
+ ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
+ if (ret < 0)
+ return ret;
+
+ bool stale = ret > 0;
+
+ if (p.ptr.cached) {
+ if (!stale) {
+ ret = !gc
+ ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
+ : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
+ bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
+ __func__);
+ if (ret)
+ return ret;
+ }
+ } else if (!p.has_ec) {
+ dirty_sectors += disk_sectors;
+ r.e.devs[r.e.nr_devs++] = p.ptr.dev;
+ } else {
+ ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
+ if (ret)
+ return ret;
+
+ /*
+ * There may be other dirty pointers in this extent, but
+ * if so they're not required for mounting if we have an
+ * erasure coded pointer in this extent:
+ */
+ r.e.nr_required = 0;
+ }
+ }
+
+ if (r.e.nr_devs) {
+ ret = !gc
+ ? bch2_update_replicas_list(trans, &r.e, dirty_sectors)
+ : bch2_update_replicas(c, k, &r.e, dirty_sectors, 0, true);
+ if (unlikely(ret && gc)) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int bch2_trigger_extent(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct bch_fs *c = trans->c;
+ int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
+ (int) bch2_bkey_needs_rebalance(c, old);
+
+ if (mod) {
+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))
+ return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree_id, level, old, new, flags);
+
+ return 0;
+}
+
+/* KEY_TYPE_reservation */
+
+static int __trigger_reservation(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+ s64 sectors = (s64) k.k->size * replicas;
+
+ if (flags & BTREE_TRIGGER_OVERWRITE)
+ sectors = -sectors;
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ int ret = bch2_replicas_deltas_realloc(trans, 0);
+ if (ret)
+ return ret;
+
+ struct replicas_delta_list *d = trans->fs_usage_deltas;
+ replicas = min(replicas, ARRAY_SIZE(d->persistent_reserved));
+
+ d->persistent_reserved[replicas - 1] += sectors;
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ percpu_down_read(&c->mark_lock);
+ preempt_disable();
+
+ struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
+
+ replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
+ fs_usage->reserved += sectors;
+ fs_usage->persistent_reserved[replicas - 1] += sectors;
+
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+ }
+
+ return 0;
+}
+
+int bch2_trigger_reservation(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags);
+}
+
+/* Mark superblocks: */
+
+static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
+ struct bch_dev *ca, size_t b,
+ enum bch_data_type type,
+ unsigned sectors)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_i_alloc_v4 *a;
+ int ret = 0;
+
+ /*
+ * Backup superblock might be past the end of our normal usable space:
+ */
+ if (b >= ca->mi.nbuckets)
+ return 0;
+
+ a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b));
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ if (a->v.data_type && type && a->v.data_type != type) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ BCH_FSCK_ERR_bucket_metadata_type_mismatch,
+ "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
+ "while marking %s",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ bch2_data_types[a->v.data_type],
+ bch2_data_types[type],
+ bch2_data_types[type]);
+ ret = -EIO;
+ goto err;
+ }
+
+ if (a->v.data_type != type ||
+ a->v.dirty_sectors != sectors) {
+ a->v.data_type = type;
+ a->v.dirty_sectors = sectors;
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
+ struct bch_dev *ca, size_t b,
+ enum bch_data_type type,
+ unsigned sectors)
+{
+ return commit_do(trans, NULL, NULL, 0,
+ __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
+}
+
+static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
+ struct bch_dev *ca,
+ u64 start, u64 end,
+ enum bch_data_type type,
+ u64 *bucket, unsigned *bucket_sectors)
+{
+ do {
+ u64 b = sector_to_bucket(ca, start);
+ unsigned sectors =
+ min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
+
+ if (b != *bucket && *bucket_sectors) {
+ int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket,
+ type, *bucket_sectors);
+ if (ret)
+ return ret;
+
+ *bucket_sectors = 0;
+ }
+
+ *bucket = b;
+ *bucket_sectors += sectors;
+ start += sectors;
+ } while (start < end);
+
+ return 0;
+}
+
+static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
+ struct bch_dev *ca)
+{
+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+ u64 bucket = 0;
+ unsigned i, bucket_sectors = 0;
+ int ret;
+
+ for (i = 0; i < layout->nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
+
+ if (offset == BCH_SB_SECTOR) {
+ ret = bch2_trans_mark_metadata_sectors(trans, ca,
+ 0, BCH_SB_SECTOR,
+ BCH_DATA_sb, &bucket, &bucket_sectors);
+ if (ret)
+ return ret;
+ }
+
+ ret = bch2_trans_mark_metadata_sectors(trans, ca, offset,
+ offset + (1 << layout->sb_max_size_bits),
+ BCH_DATA_sb, &bucket, &bucket_sectors);
+ if (ret)
+ return ret;
+ }
+
+ if (bucket_sectors) {
+ ret = bch2_trans_mark_metadata_bucket(trans, ca,
+ bucket, BCH_DATA_sb, bucket_sectors);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < ca->journal.nr; i++) {
+ ret = bch2_trans_mark_metadata_bucket(trans, ca,
+ ca->journal.buckets[i],
+ BCH_DATA_journal, ca->mi.bucket_size);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
+{
+ int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca));
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_trans_mark_dev_sbs(struct bch_fs *c)
+{
+ for_each_online_member(c, ca) {
+ int ret = bch2_trans_mark_dev_sb(c, ca);
+ if (ret) {
+ percpu_ref_put(&ca->ref);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* Disk reservations: */
+
+#define SECTORS_CACHE 1024
+
+int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
+ u64 sectors, int flags)
+{
+ struct bch_fs_pcpu *pcpu;
+ u64 old, v, get;
+ s64 sectors_available;
+ int ret;
+
+ percpu_down_read(&c->mark_lock);
+ preempt_disable();
+ pcpu = this_cpu_ptr(c->pcpu);
+
+ if (sectors <= pcpu->sectors_available)
+ goto out;
+
+ v = atomic64_read(&c->sectors_available);
+ do {
+ old = v;
+ get = min((u64) sectors + SECTORS_CACHE, old);
+
+ if (get < sectors) {
+ preempt_enable();
+ goto recalculate;
+ }
+ } while ((v = atomic64_cmpxchg(&c->sectors_available,
+ old, old - get)) != old);
+
+ pcpu->sectors_available += get;
+
+out:
+ pcpu->sectors_available -= sectors;
+ this_cpu_add(*c->online_reserved, sectors);
+ res->sectors += sectors;
+
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+ return 0;
+
+recalculate:
+ mutex_lock(&c->sectors_available_lock);
+
+ percpu_u64_set(&c->pcpu->sectors_available, 0);
+ sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
+
+ if (sectors <= sectors_available ||
+ (flags & BCH_DISK_RESERVATION_NOFAIL)) {
+ atomic64_set(&c->sectors_available,
+ max_t(s64, 0, sectors_available - sectors));
+ this_cpu_add(*c->online_reserved, sectors);
+ res->sectors += sectors;
+ ret = 0;
+ } else {
+ atomic64_set(&c->sectors_available, sectors_available);
+ ret = -BCH_ERR_ENOSPC_disk_reservation;
+ }
+
+ mutex_unlock(&c->sectors_available_lock);
+ percpu_up_read(&c->mark_lock);
+
+ return ret;
+}
+
+/* Startup/shutdown: */
+
+static void bucket_gens_free_rcu(struct rcu_head *rcu)
+{
+ struct bucket_gens *buckets =
+ container_of(rcu, struct bucket_gens, rcu);
+
+ kvpfree(buckets, sizeof(*buckets) + buckets->nbuckets);
+}
+
+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
+{
+ struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
+ unsigned long *buckets_nouse = NULL;
+ bool resize = ca->bucket_gens != NULL;
+ int ret;
+
+ if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
+ GFP_KERNEL|__GFP_ZERO))) {
+ ret = -BCH_ERR_ENOMEM_bucket_gens;
+ goto err;
+ }
+
+ if ((c->opts.buckets_nouse &&
+ !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
+ sizeof(unsigned long),
+ GFP_KERNEL|__GFP_ZERO)))) {
+ ret = -BCH_ERR_ENOMEM_buckets_nouse;
+ goto err;
+ }
+
+ bucket_gens->first_bucket = ca->mi.first_bucket;
+ bucket_gens->nbuckets = nbuckets;
+
+ if (resize) {
+ down_write(&c->gc_lock);
+ down_write(&ca->bucket_lock);
+ percpu_down_write(&c->mark_lock);
+ }
+
+ old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
+
+ if (resize) {
+ size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
+
+ memcpy(bucket_gens->b,
+ old_bucket_gens->b,
+ n);
+ if (buckets_nouse)
+ memcpy(buckets_nouse,
+ ca->buckets_nouse,
+ BITS_TO_LONGS(n) * sizeof(unsigned long));
+ }
+
+ rcu_assign_pointer(ca->bucket_gens, bucket_gens);
+ bucket_gens = old_bucket_gens;
+
+ swap(ca->buckets_nouse, buckets_nouse);
+
+ nbuckets = ca->mi.nbuckets;
+
+ if (resize) {
+ percpu_up_write(&c->mark_lock);
+ up_write(&ca->bucket_lock);
+ up_write(&c->gc_lock);
+ }
+
+ ret = 0;
+err:
+ kvpfree(buckets_nouse,
+ BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
+ if (bucket_gens)
+ call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
+
+ return ret;
+}
+
+void bch2_dev_buckets_free(struct bch_dev *ca)
+{
+ unsigned i;
+
+ kvpfree(ca->buckets_nouse,
+ BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
+ kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
+ sizeof(struct bucket_gens) + ca->mi.nbuckets);
+
+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
+ free_percpu(ca->usage[i]);
+ kfree(ca->usage_base);
+}
+
+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
+{
+ unsigned i;
+
+ ca->usage_base = kzalloc(sizeof(struct bch_dev_usage), GFP_KERNEL);
+ if (!ca->usage_base)
+ return -BCH_ERR_ENOMEM_usage_init;
+
+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++) {
+ ca->usage[i] = alloc_percpu(struct bch_dev_usage);
+ if (!ca->usage[i])
+ return -BCH_ERR_ENOMEM_usage_init;
+ }
+
+ return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
+}
diff --git a/c_src/libbcachefs/buckets.h b/c_src/libbcachefs/buckets.h
new file mode 100644
index 00000000..2c95cc5d
--- /dev/null
+++ b/c_src/libbcachefs/buckets.h
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code for manipulating bucket marks for garbage collection.
+ *
+ * Copyright 2014 Datera, Inc.
+ */
+
+#ifndef _BUCKETS_H
+#define _BUCKETS_H
+
+#include "buckets_types.h"
+#include "extents.h"
+#include "sb-members.h"
+
+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s)
+{
+ return div_u64(s, ca->mi.bucket_size);
+}
+
+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b)
+{
+ return ((sector_t) b) * ca->mi.bucket_size;
+}
+
+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s)
+{
+ u32 remainder;
+
+ div_u64_rem(s, ca->mi.bucket_size, &remainder);
+ return remainder;
+}
+
+static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t s,
+ u32 *offset)
+{
+ return div_u64_rem(s, ca->mi.bucket_size, offset);
+}
+
+#define for_each_bucket(_b, _buckets) \
+ for (_b = (_buckets)->b + (_buckets)->first_bucket; \
+ _b < (_buckets)->b + (_buckets)->nbuckets; _b++)
+
+/*
+ * Ugly hack alert:
+ *
+ * We need to cram a spinlock in a single byte, because that's what we have left
+ * in struct bucket, and we care about the size of these - during fsck, we need
+ * in memory state for every single bucket on every device.
+ *
+ * We used to do
+ * while (xchg(&b->lock, 1) cpu_relax();
+ * but, it turns out not all architectures support xchg on a single byte.
+ *
+ * So now we use bit_spin_lock(), with fun games since we can't burn a whole
+ * ulong for this - we just need to make sure the lock bit always ends up in the
+ * first byte.
+ */
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define BUCKET_LOCK_BITNR 0
+#else
+#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1)
+#endif
+
+union ulong_byte_assert {
+ ulong ulong;
+ u8 byte;
+};
+
+static inline void bucket_unlock(struct bucket *b)
+{
+ BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte);
+
+ clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock);
+ wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR);
+}
+
+static inline void bucket_lock(struct bucket *b)
+{
+ wait_on_bit_lock((void *) &b->lock, BUCKET_LOCK_BITNR,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca)
+{
+ return rcu_dereference_check(ca->buckets_gc,
+ !ca->fs ||
+ percpu_rwsem_is_held(&ca->fs->mark_lock) ||
+ lockdep_is_held(&ca->fs->gc_lock) ||
+ lockdep_is_held(&ca->bucket_lock));
+}
+
+static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
+{
+ struct bucket_array *buckets = gc_bucket_array(ca);
+
+ BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets);
+ return buckets->b + b;
+}
+
+static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
+{
+ return rcu_dereference_check(ca->bucket_gens,
+ !ca->fs ||
+ percpu_rwsem_is_held(&ca->fs->mark_lock) ||
+ lockdep_is_held(&ca->fs->gc_lock) ||
+ lockdep_is_held(&ca->bucket_lock));
+}
+
+static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
+{
+ struct bucket_gens *gens = bucket_gens(ca);
+
+ BUG_ON(b < gens->first_bucket || b >= gens->nbuckets);
+ return gens->b + b;
+}
+
+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
+ const struct bch_extent_ptr *ptr)
+{
+ return sector_to_bucket(ca, ptr->offset);
+}
+
+static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c,
+ const struct bch_extent_ptr *ptr)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr));
+}
+
+static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c,
+ const struct bch_extent_ptr *ptr,
+ u32 *bucket_offset)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset));
+}
+
+static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca,
+ const struct bch_extent_ptr *ptr)
+{
+ return gc_bucket(ca, PTR_BUCKET_NR(ca, ptr));
+}
+
+static inline enum bch_data_type ptr_data_type(const struct bkey *k,
+ const struct bch_extent_ptr *ptr)
+{
+ if (bkey_is_btree_ptr(k))
+ return BCH_DATA_btree;
+
+ return ptr->cached ? BCH_DATA_cached : BCH_DATA_user;
+}
+
+static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
+{
+ EBUG_ON(sectors < 0);
+
+ return crc_is_compressed(p.crc)
+ ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
+ p.crc.uncompressed_size)
+ : sectors;
+}
+
+static inline int gen_cmp(u8 a, u8 b)
+{
+ return (s8) (a - b);
+}
+
+static inline int gen_after(u8 a, u8 b)
+{
+ int r = gen_cmp(a, b);
+
+ return r > 0 ? r : 0;
+}
+
+/**
+ * ptr_stale() - check if a pointer points into a bucket that has been
+ * invalidated.
+ */
+static inline u8 ptr_stale(struct bch_dev *ca,
+ const struct bch_extent_ptr *ptr)
+{
+ u8 ret;
+
+ rcu_read_lock();
+ ret = gen_after(*bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)), ptr->gen);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* Device usage: */
+
+void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *);
+static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+{
+ struct bch_dev_usage ret;
+
+ bch2_dev_usage_read_fast(ca, &ret);
+ return ret;
+}
+
+void bch2_dev_usage_init(struct bch_dev *);
+void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *);
+
+static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
+{
+ s64 reserved = 0;
+
+ switch (watermark) {
+ case BCH_WATERMARK_NR:
+ BUG();
+ case BCH_WATERMARK_stripe:
+ reserved += ca->mi.nbuckets >> 6;
+ fallthrough;
+ case BCH_WATERMARK_normal:
+ reserved += ca->mi.nbuckets >> 6;
+ fallthrough;
+ case BCH_WATERMARK_copygc:
+ reserved += ca->nr_btree_reserve;
+ fallthrough;
+ case BCH_WATERMARK_btree:
+ reserved += ca->nr_btree_reserve;
+ fallthrough;
+ case BCH_WATERMARK_btree_copygc:
+ case BCH_WATERMARK_reclaim:
+ break;
+ }
+
+ return reserved;
+}
+
+static inline u64 dev_buckets_free(struct bch_dev *ca,
+ struct bch_dev_usage usage,
+ enum bch_watermark watermark)
+{
+ return max_t(s64, 0,
+ usage.d[BCH_DATA_free].buckets -
+ ca->nr_open_buckets -
+ bch2_dev_buckets_reserved(ca, watermark));
+}
+
+static inline u64 __dev_buckets_available(struct bch_dev *ca,
+ struct bch_dev_usage usage,
+ enum bch_watermark watermark)
+{
+ return max_t(s64, 0,
+ usage.d[BCH_DATA_free].buckets
+ + usage.d[BCH_DATA_cached].buckets
+ + usage.d[BCH_DATA_need_gc_gens].buckets
+ + usage.d[BCH_DATA_need_discard].buckets
+ - ca->nr_open_buckets
+ - bch2_dev_buckets_reserved(ca, watermark));
+}
+
+static inline u64 dev_buckets_available(struct bch_dev *ca,
+ enum bch_watermark watermark)
+{
+ return __dev_buckets_available(ca, bch2_dev_usage_read(ca), watermark);
+}
+
+/* Filesystem usage: */
+
+static inline unsigned __fs_usage_u64s(unsigned nr_replicas)
+{
+ return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas;
+}
+
+static inline unsigned fs_usage_u64s(struct bch_fs *c)
+{
+ return __fs_usage_u64s(READ_ONCE(c->replicas.nr));
+}
+
+static inline unsigned __fs_usage_online_u64s(unsigned nr_replicas)
+{
+ return sizeof(struct bch_fs_usage_online) / sizeof(u64) + nr_replicas;
+}
+
+static inline unsigned fs_usage_online_u64s(struct bch_fs *c)
+{
+ return __fs_usage_online_u64s(READ_ONCE(c->replicas.nr));
+}
+
+static inline unsigned dev_usage_u64s(void)
+{
+ return sizeof(struct bch_dev_usage) / sizeof(u64);
+}
+
+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *);
+
+struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *);
+
+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned);
+
+void bch2_fs_usage_to_text(struct printbuf *,
+ struct bch_fs *, struct bch_fs_usage_online *);
+
+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *);
+
+struct bch_fs_usage_short
+bch2_fs_usage_read_short(struct bch_fs *);
+
+void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *,
+ const struct bch_alloc_v4 *,
+ const struct bch_alloc_v4 *, u64, bool);
+void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *,
+ struct bucket *, struct bucket *);
+
+/* key/bucket marking: */
+
+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
+ unsigned journal_seq,
+ bool gc)
+{
+ percpu_rwsem_assert_held(&c->mark_lock);
+ BUG_ON(!gc && !journal_seq);
+
+ return this_cpu_ptr(gc
+ ? c->usage_gc
+ : c->usage[journal_seq & JOURNAL_BUF_MASK]);
+}
+
+int bch2_update_replicas(struct bch_fs *, struct bkey_s_c,
+ struct bch_replicas_entry_v1 *, s64,
+ unsigned, bool);
+int bch2_update_replicas_list(struct btree_trans *,
+ struct bch_replicas_entry_v1 *, s64);
+int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64);
+int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned);
+
+void bch2_fs_usage_initialize(struct bch_fs *);
+
+int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c,
+ const struct bch_extent_ptr *,
+ s64, enum bch_data_type, u8, u8, u32);
+
+int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
+ size_t, enum bch_data_type, unsigned,
+ struct gc_pos, unsigned);
+
+int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
+({ \
+ int ret = 0; \
+ \
+ if (_old.k->type) \
+ ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \
+ if (!ret && _new.k->type) \
+ ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\
+ ret; \
+})
+
+void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
+int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
+
+int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,
+ size_t, enum bch_data_type, unsigned);
+int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *);
+int bch2_trans_mark_dev_sbs(struct bch_fs *);
+
+static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
+{
+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+ u64 b_offset = bucket_to_sector(ca, b);
+ u64 b_end = bucket_to_sector(ca, b + 1);
+ unsigned i;
+
+ if (!b)
+ return true;
+
+ for (i = 0; i < layout->nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
+ u64 end = offset + (1 << layout->sb_max_size_bits);
+
+ if (!(offset >= b_end || end <= b_offset))
+ return true;
+ }
+
+ return false;
+}
+
+/* disk reservations: */
+
+static inline void bch2_disk_reservation_put(struct bch_fs *c,
+ struct disk_reservation *res)
+{
+ if (res->sectors) {
+ this_cpu_sub(*c->online_reserved, res->sectors);
+ res->sectors = 0;
+ }
+}
+
+#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
+
+int __bch2_disk_reservation_add(struct bch_fs *,
+ struct disk_reservation *,
+ u64, int);
+
+static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
+ u64 sectors, int flags)
+{
+#ifdef __KERNEL__
+ u64 old, new;
+
+ do {
+ old = this_cpu_read(c->pcpu->sectors_available);
+ if (sectors > old)
+ return __bch2_disk_reservation_add(c, res, sectors, flags);
+
+ new = old - sectors;
+ } while (this_cpu_cmpxchg(c->pcpu->sectors_available, old, new) != old);
+
+ this_cpu_add(*c->online_reserved, sectors);
+ res->sectors += sectors;
+ return 0;
+#else
+ return __bch2_disk_reservation_add(c, res, sectors, flags);
+#endif
+}
+
+static inline struct disk_reservation
+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas)
+{
+ return (struct disk_reservation) {
+ .sectors = 0,
+#if 0
+ /* not used yet: */
+ .gen = c->capacity_gen,
+#endif
+ .nr_replicas = nr_replicas,
+ };
+}
+
+static inline int bch2_disk_reservation_get(struct bch_fs *c,
+ struct disk_reservation *res,
+ u64 sectors, unsigned nr_replicas,
+ int flags)
+{
+ *res = bch2_disk_reservation_init(c, nr_replicas);
+
+ return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags);
+}
+
+#define RESERVE_FACTOR 6
+
+static inline u64 avail_factor(u64 r)
+{
+ return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1);
+}
+
+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64);
+void bch2_dev_buckets_free(struct bch_dev *);
+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *);
+
+#endif /* _BUCKETS_H */
diff --git a/c_src/libbcachefs/buckets_types.h b/c_src/libbcachefs/buckets_types.h
new file mode 100644
index 00000000..783f7101
--- /dev/null
+++ b/c_src/libbcachefs/buckets_types.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BUCKETS_TYPES_H
+#define _BUCKETS_TYPES_H
+
+#include "bcachefs_format.h"
+#include "util.h"
+
+#define BUCKET_JOURNAL_SEQ_BITS 16
+
+struct bucket {
+ u8 lock;
+ u8 gen_valid:1;
+ u8 data_type:7;
+ u8 gen;
+ u8 stripe_redundancy;
+ u32 stripe;
+ u32 dirty_sectors;
+ u32 cached_sectors;
+};
+
+struct bucket_array {
+ struct rcu_head rcu;
+ u16 first_bucket;
+ size_t nbuckets;
+ struct bucket b[];
+};
+
+struct bucket_gens {
+ struct rcu_head rcu;
+ u16 first_bucket;
+ size_t nbuckets;
+ u8 b[];
+};
+
+struct bch_dev_usage {
+ struct {
+ u64 buckets;
+ u64 sectors; /* _compressed_ sectors: */
+ /*
+ * XXX
+ * Why do we have this? Isn't it just buckets * bucket_size -
+ * sectors?
+ */
+ u64 fragmented;
+ } d[BCH_DATA_NR];
+};
+
+struct bch_fs_usage {
+ /* all fields are in units of 512 byte sectors: */
+ u64 hidden;
+ u64 btree;
+ u64 data;
+ u64 cached;
+ u64 reserved;
+ u64 nr_inodes;
+
+ /* XXX: add stats for compression ratio */
+#if 0
+ u64 uncompressed;
+ u64 compressed;
+#endif
+
+ /* broken out: */
+
+ u64 persistent_reserved[BCH_REPLICAS_MAX];
+ u64 replicas[];
+};
+
+struct bch_fs_usage_online {
+ u64 online_reserved;
+ struct bch_fs_usage u;
+};
+
+struct bch_fs_usage_short {
+ u64 capacity;
+ u64 used;
+ u64 free;
+ u64 nr_inodes;
+};
+
+/*
+ * A reservation for space on disk:
+ */
+struct disk_reservation {
+ u64 sectors;
+ u32 gen;
+ unsigned nr_replicas;
+};
+
+#endif /* _BUCKETS_TYPES_H */
diff --git a/c_src/libbcachefs/buckets_waiting_for_journal.c b/c_src/libbcachefs/buckets_waiting_for_journal.c
new file mode 100644
index 00000000..ec1b636e
--- /dev/null
+++ b/c_src/libbcachefs/buckets_waiting_for_journal.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "buckets_waiting_for_journal.h"
+#include <linux/hash.h>
+#include <linux/random.h>
+
+static inline struct bucket_hashed *
+bucket_hash(struct buckets_waiting_for_journal_table *t,
+ unsigned hash_seed_idx, u64 dev_bucket)
+{
+ return t->d + hash_64(dev_bucket ^ t->hash_seeds[hash_seed_idx], t->bits);
+}
+
+static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_t bits)
+{
+ unsigned i;
+
+ t->bits = bits;
+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++)
+ get_random_bytes(&t->hash_seeds[i], sizeof(t->hash_seeds[i]));
+ memset(t->d, 0, sizeof(t->d[0]) << t->bits);
+}
+
+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
+ u64 flushed_seq,
+ unsigned dev, u64 bucket)
+{
+ struct buckets_waiting_for_journal_table *t;
+ u64 dev_bucket = (u64) dev << 56 | bucket;
+ bool ret = false;
+ unsigned i;
+
+ mutex_lock(&b->lock);
+ t = b->t;
+
+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
+ struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
+
+ if (h->dev_bucket == dev_bucket) {
+ ret = h->journal_seq > flushed_seq;
+ break;
+ }
+ }
+
+ mutex_unlock(&b->lock);
+
+ return ret;
+}
+
+static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t,
+ struct bucket_hashed *new,
+ u64 flushed_seq)
+{
+ struct bucket_hashed *last_evicted = NULL;
+ unsigned tries, i;
+
+ for (tries = 0; tries < 10; tries++) {
+ struct bucket_hashed *old, *victim = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
+ old = bucket_hash(t, i, new->dev_bucket);
+
+ if (old->dev_bucket == new->dev_bucket ||
+ old->journal_seq <= flushed_seq) {
+ *old = *new;
+ return true;
+ }
+
+ if (last_evicted != old)
+ victim = old;
+ }
+
+ /* hashed to same slot 3 times: */
+ if (!victim)
+ break;
+
+ /* Failed to find an empty slot: */
+ swap(*new, *victim);
+ last_evicted = victim;
+ }
+
+ return false;
+}
+
+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
+ u64 flushed_seq,
+ unsigned dev, u64 bucket,
+ u64 journal_seq)
+{
+ struct buckets_waiting_for_journal_table *t, *n;
+ struct bucket_hashed tmp, new = {
+ .dev_bucket = (u64) dev << 56 | bucket,
+ .journal_seq = journal_seq,
+ };
+ size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0;
+ int ret = 0;
+
+ mutex_lock(&b->lock);
+
+ if (likely(bucket_table_insert(b->t, &new, flushed_seq)))
+ goto out;
+
+ t = b->t;
+ size = 1UL << t->bits;
+ for (i = 0; i < size; i++)
+ nr_elements += t->d[i].journal_seq > flushed_seq;
+
+ new_bits = t->bits + (nr_elements * 3 > size);
+
+ n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
+ if (!n) {
+ ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set;
+ goto out;
+ }
+
+retry_rehash:
+ nr_rehashes++;
+ bucket_table_init(n, new_bits);
+
+ tmp = new;
+ BUG_ON(!bucket_table_insert(n, &tmp, flushed_seq));
+
+ for (i = 0; i < 1UL << t->bits; i++) {
+ if (t->d[i].journal_seq <= flushed_seq)
+ continue;
+
+ tmp = t->d[i];
+ if (!bucket_table_insert(n, &tmp, flushed_seq))
+ goto retry_rehash;
+ }
+
+ b->t = n;
+ kvfree(t);
+
+ pr_debug("took %zu rehashes, table at %zu/%lu elements",
+ nr_rehashes, nr_elements, 1UL << b->t->bits);
+out:
+ mutex_unlock(&b->lock);
+
+ return ret;
+}
+
+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c)
+{
+ struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal;
+
+ kvfree(b->t);
+}
+
+#define INITIAL_TABLE_BITS 3
+
+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *c)
+{
+ struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal;
+
+ mutex_init(&b->lock);
+
+ b->t = kvmalloc(sizeof(*b->t) +
+ (sizeof(b->t->d[0]) << INITIAL_TABLE_BITS), GFP_KERNEL);
+ if (!b->t)
+ return -BCH_ERR_ENOMEM_buckets_waiting_for_journal_init;
+
+ bucket_table_init(b->t, INITIAL_TABLE_BITS);
+ return 0;
+}
diff --git a/c_src/libbcachefs/buckets_waiting_for_journal.h b/c_src/libbcachefs/buckets_waiting_for_journal.h
new file mode 100644
index 00000000..d2ae19cb
--- /dev/null
+++ b/c_src/libbcachefs/buckets_waiting_for_journal.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_H
+#define _BUCKETS_WAITING_FOR_JOURNAL_H
+
+#include "buckets_waiting_for_journal_types.h"
+
+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
+ u64, unsigned, u64);
+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
+ u64, unsigned, u64, u64);
+
+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *);
+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *);
+
+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_H */
diff --git a/c_src/libbcachefs/buckets_waiting_for_journal_types.h b/c_src/libbcachefs/buckets_waiting_for_journal_types.h
new file mode 100644
index 00000000..e593db06
--- /dev/null
+++ b/c_src/libbcachefs/buckets_waiting_for_journal_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H
+#define _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H
+
+#include <linux/siphash.h>
+
+struct bucket_hashed {
+ u64 dev_bucket;
+ u64 journal_seq;
+};
+
+struct buckets_waiting_for_journal_table {
+ unsigned bits;
+ u64 hash_seeds[3];
+ struct bucket_hashed d[];
+};
+
+struct buckets_waiting_for_journal {
+ struct mutex lock;
+ struct buckets_waiting_for_journal_table *t;
+};
+
+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H */
diff --git a/c_src/libbcachefs/chardev.c b/c_src/libbcachefs/chardev.c
new file mode 100644
index 00000000..226b39c1
--- /dev/null
+++ b/c_src/libbcachefs/chardev.c
@@ -0,0 +1,999 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_CHARDEV
+
+#include "bcachefs.h"
+#include "bcachefs_ioctl.h"
+#include "buckets.h"
+#include "chardev.h"
+#include "journal.h"
+#include "move.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "super.h"
+#include "super-io.h"
+#include "thread_with_file.h"
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/major.h>
+#include <linux/sched/task.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+__must_check
+static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
+{
+ return copy_to_user(to, from, n) ? -EFAULT : 0;
+}
+
+/* returns with ref on ca->ref */
+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
+ unsigned flags)
+{
+ struct bch_dev *ca;
+
+ if (flags & BCH_BY_INDEX) {
+ if (dev >= c->sb.nr_devices)
+ return ERR_PTR(-EINVAL);
+
+ rcu_read_lock();
+ ca = rcu_dereference(c->devs[dev]);
+ if (ca)
+ percpu_ref_get(&ca->ref);
+ rcu_read_unlock();
+
+ if (!ca)
+ return ERR_PTR(-EINVAL);
+ } else {
+ char *path;
+
+ path = strndup_user((const char __user *)
+ (unsigned long) dev, PATH_MAX);
+ if (IS_ERR(path))
+ return ERR_CAST(path);
+
+ ca = bch2_dev_lookup(c, path);
+ kfree(path);
+ }
+
+ return ca;
+}
+
+#if 0
+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
+{
+ struct bch_ioctl_assemble arg;
+ struct bch_fs *c;
+ u64 *user_devs = NULL;
+ char **devs = NULL;
+ unsigned i;
+ int ret = -EFAULT;
+
+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
+ return -EFAULT;
+
+ if (arg.flags || arg.pad)
+ return -EINVAL;
+
+ user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
+ if (!user_devs)
+ return -ENOMEM;
+
+ devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
+
+ if (copy_from_user(user_devs, user_arg->devs,
+ sizeof(u64) * arg.nr_devs))
+ goto err;
+
+ for (i = 0; i < arg.nr_devs; i++) {
+ devs[i] = strndup_user((const char __user *)(unsigned long)
+ user_devs[i],
+ PATH_MAX);
+ ret= PTR_ERR_OR_ZERO(devs[i]);
+ if (ret)
+ goto err;
+ }
+
+ c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
+ ret = PTR_ERR_OR_ZERO(c);
+ if (!ret)
+ closure_put(&c->cl);
+err:
+ if (devs)
+ for (i = 0; i < arg.nr_devs; i++)
+ kfree(devs[i]);
+ kfree(devs);
+ return ret;
+}
+
+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
+{
+ struct bch_ioctl_incremental arg;
+ const char *err;
+ char *path;
+
+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
+ return -EFAULT;
+
+ if (arg.flags || arg.pad)
+ return -EINVAL;
+
+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(path);
+ if (ret)
+ return ret;
+
+ err = bch2_fs_open_incremental(path);
+ kfree(path);
+
+ if (err) {
+ pr_err("Could not register bcachefs devices: %s", err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+struct fsck_thread {
+ struct thread_with_stdio thr;
+ struct bch_fs *c;
+ char **devs;
+ size_t nr_devs;
+ struct bch_opts opts;
+};
+
+static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
+{
+ struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
+ if (thr->devs)
+ for (size_t i = 0; i < thr->nr_devs; i++)
+ kfree(thr->devs[i]);
+ kfree(thr->devs);
+ kfree(thr);
+}
+
+static int bch2_fsck_offline_thread_fn(void *arg)
+{
+ struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+ struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
+
+ thr->thr.thr.ret = PTR_ERR_OR_ZERO(c);
+ if (!thr->thr.thr.ret)
+ bch2_fs_stop(c);
+
+ thread_with_stdio_done(&thr->thr);
+ return 0;
+}
+
+static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
+{
+ struct bch_ioctl_fsck_offline arg;
+ struct fsck_thread *thr = NULL;
+ u64 *devs = NULL;
+ long ret = 0;
+
+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
+ return -EFAULT;
+
+ if (arg.flags)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
+ !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
+ !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ thr->opts = bch2_opts_empty();
+ thr->nr_devs = arg.nr_devs;
+
+ if (copy_from_user(devs, &user_arg->devs[0],
+ array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ for (size_t i = 0; i < arg.nr_devs; i++) {
+ thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(thr->devs[i]);
+ if (ret)
+ goto err;
+ }
+
+ if (arg.opts) {
+ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
+
+ ret = PTR_ERR_OR_ZERO(optstr) ?:
+ bch2_parse_mount_opts(NULL, &thr->opts, optstr);
+ kfree(optstr);
+
+ if (ret)
+ goto err;
+ }
+
+ opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
+
+ ret = bch2_run_thread_with_stdio(&thr->thr,
+ bch2_fsck_thread_exit,
+ bch2_fsck_offline_thread_fn);
+err:
+ if (ret < 0) {
+ if (thr)
+ bch2_fsck_thread_exit(&thr->thr);
+ pr_err("ret %s", bch2_err_str(ret));
+ }
+ kfree(devs);
+ return ret;
+}
+
+static long bch2_global_ioctl(unsigned cmd, void __user *arg)
+{
+ long ret;
+
+ switch (cmd) {
+#if 0
+ case BCH_IOCTL_ASSEMBLE:
+ return bch2_ioctl_assemble(arg);
+ case BCH_IOCTL_INCREMENTAL:
+ return bch2_ioctl_incremental(arg);
+#endif
+ case BCH_IOCTL_FSCK_OFFLINE: {
+ ret = bch2_ioctl_fsck_offline(arg);
+ break;
+ }
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+
+ if (ret < 0)
+ ret = bch2_err_class(ret);
+ return ret;
+}
+
+static long bch2_ioctl_query_uuid(struct bch_fs *c,
+ struct bch_ioctl_query_uuid __user *user_arg)
+{
+ return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
+ sizeof(c->sb.user_uuid));
+}
+
+#if 0
+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (arg.flags || arg.pad)
+ return -EINVAL;
+
+ return bch2_fs_start(c);
+}
+
+static long bch2_ioctl_stop(struct bch_fs *c)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ bch2_fs_stop(c);
+ return 0;
+}
+#endif
+
+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+ char *path;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (arg.flags || arg.pad)
+ return -EINVAL;
+
+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(path);
+ if (ret)
+ return ret;
+
+ ret = bch2_dev_add(c, path);
+ kfree(path);
+
+ return ret;
+}
+
+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+ struct bch_dev *ca;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
+ BCH_FORCE_IF_METADATA_LOST|
+ BCH_FORCE_IF_DEGRADED|
+ BCH_BY_INDEX)) ||
+ arg.pad)
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ return bch2_dev_remove(c, ca, arg.flags);
+}
+
+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+ char *path;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (arg.flags || arg.pad)
+ return -EINVAL;
+
+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(path);
+ if (ret)
+ return ret;
+
+ ret = bch2_dev_online(c, path);
+ kfree(path);
+ return ret;
+}
+
+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
+ BCH_FORCE_IF_METADATA_LOST|
+ BCH_FORCE_IF_DEGRADED|
+ BCH_BY_INDEX)) ||
+ arg.pad)
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ ret = bch2_dev_offline(c, ca, arg.flags);
+ percpu_ref_put(&ca->ref);
+ return ret;
+}
+
+static long bch2_ioctl_disk_set_state(struct bch_fs *c,
+ struct bch_ioctl_disk_set_state arg)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
+ BCH_FORCE_IF_METADATA_LOST|
+ BCH_FORCE_IF_DEGRADED|
+ BCH_BY_INDEX)) ||
+ arg.pad[0] || arg.pad[1] || arg.pad[2] ||
+ arg.new_state >= BCH_MEMBER_STATE_NR)
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
+ if (ret)
+ bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
+
+ percpu_ref_put(&ca->ref);
+ return ret;
+}
+
+struct bch_data_ctx {
+ struct thread_with_file thr;
+
+ struct bch_fs *c;
+ struct bch_ioctl_data arg;
+ struct bch_move_stats stats;
+};
+
+static int bch2_data_thread(void *arg)
+{
+ struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
+
+ ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
+ ctx->stats.data_type = U8_MAX;
+ return 0;
+}
+
+static int bch2_data_job_release(struct inode *inode, struct file *file)
+{
+ struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
+
+ bch2_thread_with_file_exit(&ctx->thr);
+ kfree(ctx);
+ return 0;
+}
+
+static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
+ struct bch_fs *c = ctx->c;
+ struct bch_ioctl_data_event e = {
+ .type = BCH_DATA_EVENT_PROGRESS,
+ .p.data_type = ctx->stats.data_type,
+ .p.btree_id = ctx->stats.pos.btree,
+ .p.pos = ctx->stats.pos.pos,
+ .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
+ .p.sectors_total = bch2_fs_usage_read_short(c).used,
+ };
+
+ if (len < sizeof(e))
+ return -EINVAL;
+
+ return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
+}
+
+static const struct file_operations bcachefs_data_ops = {
+ .release = bch2_data_job_release,
+ .read = bch2_data_job_read,
+ .llseek = no_llseek,
+};
+
+static long bch2_ioctl_data(struct bch_fs *c,
+ struct bch_ioctl_data arg)
+{
+ struct bch_data_ctx *ctx;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (arg.op >= BCH_DATA_OP_NR || arg.flags)
+ return -EINVAL;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->c = c;
+ ctx->arg = arg;
+
+ ret = bch2_run_thread_with_file(&ctx->thr,
+ &bcachefs_data_ops,
+ bch2_data_thread);
+ if (ret < 0)
+ kfree(ctx);
+ return ret;
+}
+
+static long bch2_ioctl_fs_usage(struct bch_fs *c,
+ struct bch_ioctl_fs_usage __user *user_arg)
+{
+ struct bch_ioctl_fs_usage *arg = NULL;
+ struct bch_replicas_usage *dst_e, *dst_end;
+ struct bch_fs_usage_online *src;
+ u32 replica_entries_bytes;
+ unsigned i;
+ int ret = 0;
+
+ if (!test_bit(BCH_FS_started, &c->flags))
+ return -EINVAL;
+
+ if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
+ return -EFAULT;
+
+ arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
+ if (!arg)
+ return -ENOMEM;
+
+ src = bch2_fs_usage_read(c);
+ if (!src) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ arg->capacity = c->capacity;
+ arg->used = bch2_fs_sectors_used(c, src);
+ arg->online_reserved = src->online_reserved;
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
+ arg->persistent_reserved[i] = src->u.persistent_reserved[i];
+
+ dst_e = arg->replicas;
+ dst_end = (void *) arg->replicas + replica_entries_bytes;
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *src_e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ /* check that we have enough space for one replicas entry */
+ if (dst_e + 1 > dst_end) {
+ ret = -ERANGE;
+ break;
+ }
+
+ dst_e->sectors = src->u.replicas[i];
+ dst_e->r = *src_e;
+
+ /* recheck after setting nr_devs: */
+ if (replicas_usage_next(dst_e) > dst_end) {
+ ret = -ERANGE;
+ break;
+ }
+
+ memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
+
+ dst_e = replicas_usage_next(dst_e);
+ }
+
+ arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
+
+ percpu_up_read(&c->mark_lock);
+ kfree(src);
+
+ if (ret)
+ goto err;
+
+ ret = copy_to_user_errcode(user_arg, arg,
+ sizeof(*arg) + arg->replica_entries_bytes);
+err:
+ kfree(arg);
+ return ret;
+}
+
+/* obsolete, didn't allow for new data types: */
+static long bch2_ioctl_dev_usage(struct bch_fs *c,
+ struct bch_ioctl_dev_usage __user *user_arg)
+{
+ struct bch_ioctl_dev_usage arg;
+ struct bch_dev_usage src;
+ struct bch_dev *ca;
+ unsigned i;
+
+ if (!test_bit(BCH_FS_started, &c->flags))
+ return -EINVAL;
+
+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
+ return -EFAULT;
+
+ if ((arg.flags & ~BCH_BY_INDEX) ||
+ arg.pad[0] ||
+ arg.pad[1] ||
+ arg.pad[2])
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ src = bch2_dev_usage_read(ca);
+
+ arg.state = ca->mi.state;
+ arg.bucket_size = ca->mi.bucket_size;
+ arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
+
+ for (i = 0; i < BCH_DATA_NR; i++) {
+ arg.d[i].buckets = src.d[i].buckets;
+ arg.d[i].sectors = src.d[i].sectors;
+ arg.d[i].fragmented = src.d[i].fragmented;
+ }
+
+ percpu_ref_put(&ca->ref);
+
+ return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+}
+
+static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
+ struct bch_ioctl_dev_usage_v2 __user *user_arg)
+{
+ struct bch_ioctl_dev_usage_v2 arg;
+ struct bch_dev_usage src;
+ struct bch_dev *ca;
+ int ret = 0;
+
+ if (!test_bit(BCH_FS_started, &c->flags))
+ return -EINVAL;
+
+ if (copy_from_user(&arg, user_arg, sizeof(arg)))
+ return -EFAULT;
+
+ if ((arg.flags & ~BCH_BY_INDEX) ||
+ arg.pad[0] ||
+ arg.pad[1] ||
+ arg.pad[2])
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ src = bch2_dev_usage_read(ca);
+
+ arg.state = ca->mi.state;
+ arg.bucket_size = ca->mi.bucket_size;
+ arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
+ arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
+
+ ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+ if (ret)
+ goto err;
+
+ for (unsigned i = 0; i < arg.nr_data_types; i++) {
+ struct bch_ioctl_dev_usage_type t = {
+ .buckets = src.d[i].buckets,
+ .sectors = src.d[i].sectors,
+ .fragmented = src.d[i].fragmented,
+ };
+
+ ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
+ if (ret)
+ goto err;
+ }
+err:
+ percpu_ref_put(&ca->ref);
+ return ret;
+}
+
+static long bch2_ioctl_read_super(struct bch_fs *c,
+ struct bch_ioctl_read_super arg)
+{
+ struct bch_dev *ca = NULL;
+ struct bch_sb *sb;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
+ arg.pad)
+ return -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+
+ if (arg.flags & BCH_READ_DEV) {
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+
+ if (IS_ERR(ca)) {
+ ret = PTR_ERR(ca);
+ goto err;
+ }
+
+ sb = ca->disk_sb.sb;
+ } else {
+ sb = c->disk_sb.sb;
+ }
+
+ if (vstruct_bytes(sb) > arg.size) {
+ ret = -ERANGE;
+ goto err;
+ }
+
+ ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
+ vstruct_bytes(sb));
+err:
+ if (!IS_ERR_OR_NULL(ca))
+ percpu_ref_put(&ca->ref);
+ mutex_unlock(&c->sb_lock);
+ return ret;
+}
+
+static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
+ struct bch_ioctl_disk_get_idx arg)
+{
+ dev_t dev = huge_decode_dev(arg.dev);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!dev)
+ return -EINVAL;
+
+ for_each_online_member(c, ca)
+ if (ca->dev == dev) {
+ percpu_ref_put(&ca->io_ref);
+ return ca->dev_idx;
+ }
+
+ return -BCH_ERR_ENOENT_dev_idx_not_found;
+}
+
+static long bch2_ioctl_disk_resize(struct bch_fs *c,
+ struct bch_ioctl_disk_resize arg)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((arg.flags & ~BCH_BY_INDEX) ||
+ arg.pad)
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ ret = bch2_dev_resize(c, ca, arg.nbuckets);
+
+ percpu_ref_put(&ca->ref);
+ return ret;
+}
+
+static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
+ struct bch_ioctl_disk_resize_journal arg)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((arg.flags & ~BCH_BY_INDEX) ||
+ arg.pad)
+ return -EINVAL;
+
+ if (arg.nbuckets > U32_MAX)
+ return -EINVAL;
+
+ ca = bch2_device_lookup(c, arg.dev, arg.flags);
+ if (IS_ERR(ca))
+ return PTR_ERR(ca);
+
+ ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
+
+ percpu_ref_put(&ca->ref);
+ return ret;
+}
+
+static int bch2_fsck_online_thread_fn(void *arg)
+{
+ struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+ struct bch_fs *c = thr->c;
+
+ c->stdio_filter = current;
+ c->stdio = &thr->thr.stdio;
+
+ /*
+ * XXX: can we figure out a way to do this without mucking with c->opts?
+ */
+ unsigned old_fix_errors = c->opts.fix_errors;
+ if (opt_defined(thr->opts, fix_errors))
+ c->opts.fix_errors = thr->opts.fix_errors;
+ else
+ c->opts.fix_errors = FSCK_FIX_ask;
+
+ c->opts.fsck = true;
+ set_bit(BCH_FS_fsck_running, &c->flags);
+
+ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
+ int ret = bch2_run_online_recovery_passes(c);
+
+ clear_bit(BCH_FS_fsck_running, &c->flags);
+ bch_err_fn(c, ret);
+
+ c->stdio = NULL;
+ c->stdio_filter = NULL;
+ c->opts.fix_errors = old_fix_errors;
+
+ thread_with_stdio_done(&thr->thr);
+
+ up(&c->online_fsck_mutex);
+ bch2_ro_ref_put(c);
+ return 0;
+}
+
+static long bch2_ioctl_fsck_online(struct bch_fs *c,
+ struct bch_ioctl_fsck_online arg)
+{
+ struct fsck_thread *thr = NULL;
+ long ret = 0;
+
+ if (arg.flags)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!bch2_ro_ref_tryget(c))
+ return -EROFS;
+
+ if (down_trylock(&c->online_fsck_mutex)) {
+ bch2_ro_ref_put(c);
+ return -EAGAIN;
+ }
+
+ thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+ if (!thr) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ thr->c = c;
+ thr->opts = bch2_opts_empty();
+
+ if (arg.opts) {
+ char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
+
+ ret = PTR_ERR_OR_ZERO(optstr) ?:
+ bch2_parse_mount_opts(c, &thr->opts, optstr);
+ kfree(optstr);
+
+ if (ret)
+ goto err;
+ }
+
+ ret = bch2_run_thread_with_stdio(&thr->thr,
+ bch2_fsck_thread_exit,
+ bch2_fsck_online_thread_fn);
+err:
+ if (ret < 0) {
+ bch_err_fn(c, ret);
+ if (thr)
+ bch2_fsck_thread_exit(&thr->thr);
+ up(&c->online_fsck_mutex);
+ bch2_ro_ref_put(c);
+ }
+ return ret;
+}
+
+#define BCH_IOCTL(_name, _argtype) \
+do { \
+ _argtype i; \
+ \
+ if (copy_from_user(&i, arg, sizeof(i))) \
+ return -EFAULT; \
+ ret = bch2_ioctl_##_name(c, i); \
+ goto out; \
+} while (0)
+
+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
+{
+ long ret;
+
+ switch (cmd) {
+ case BCH_IOCTL_QUERY_UUID:
+ return bch2_ioctl_query_uuid(c, arg);
+ case BCH_IOCTL_FS_USAGE:
+ return bch2_ioctl_fs_usage(c, arg);
+ case BCH_IOCTL_DEV_USAGE:
+ return bch2_ioctl_dev_usage(c, arg);
+ case BCH_IOCTL_DEV_USAGE_V2:
+ return bch2_ioctl_dev_usage_v2(c, arg);
+#if 0
+ case BCH_IOCTL_START:
+ BCH_IOCTL(start, struct bch_ioctl_start);
+ case BCH_IOCTL_STOP:
+ return bch2_ioctl_stop(c);
+#endif
+ case BCH_IOCTL_READ_SUPER:
+ BCH_IOCTL(read_super, struct bch_ioctl_read_super);
+ case BCH_IOCTL_DISK_GET_IDX:
+ BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
+ }
+
+ if (!test_bit(BCH_FS_started, &c->flags))
+ return -EINVAL;
+
+ switch (cmd) {
+ case BCH_IOCTL_DISK_ADD:
+ BCH_IOCTL(disk_add, struct bch_ioctl_disk);
+ case BCH_IOCTL_DISK_REMOVE:
+ BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
+ case BCH_IOCTL_DISK_ONLINE:
+ BCH_IOCTL(disk_online, struct bch_ioctl_disk);
+ case BCH_IOCTL_DISK_OFFLINE:
+ BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
+ case BCH_IOCTL_DISK_SET_STATE:
+ BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
+ case BCH_IOCTL_DATA:
+ BCH_IOCTL(data, struct bch_ioctl_data);
+ case BCH_IOCTL_DISK_RESIZE:
+ BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
+ case BCH_IOCTL_DISK_RESIZE_JOURNAL:
+ BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
+ case BCH_IOCTL_FSCK_ONLINE:
+ BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
+ default:
+ return -ENOTTY;
+ }
+out:
+ if (ret < 0)
+ ret = bch2_err_class(ret);
+ return ret;
+}
+
+static DEFINE_IDR(bch_chardev_minor);
+
+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
+{
+ unsigned minor = iminor(file_inode(filp));
+ struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
+ void __user *arg = (void __user *) v;
+
+ return c
+ ? bch2_fs_ioctl(c, cmd, arg)
+ : bch2_global_ioctl(cmd, arg);
+}
+
+static const struct file_operations bch_chardev_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = bch2_chardev_ioctl,
+ .open = nonseekable_open,
+};
+
+static int bch_chardev_major;
+static struct class *bch_chardev_class;
+static struct device *bch_chardev;
+
+void bch2_fs_chardev_exit(struct bch_fs *c)
+{
+ if (!IS_ERR_OR_NULL(c->chardev))
+ device_unregister(c->chardev);
+ if (c->minor >= 0)
+ idr_remove(&bch_chardev_minor, c->minor);
+}
+
+int bch2_fs_chardev_init(struct bch_fs *c)
+{
+ c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
+ if (c->minor < 0)
+ return c->minor;
+
+ c->chardev = device_create(bch_chardev_class, NULL,
+ MKDEV(bch_chardev_major, c->minor), c,
+ "bcachefs%u-ctl", c->minor);
+ if (IS_ERR(c->chardev))
+ return PTR_ERR(c->chardev);
+
+ return 0;
+}
+
+void bch2_chardev_exit(void)
+{
+ if (!IS_ERR_OR_NULL(bch_chardev_class))
+ device_destroy(bch_chardev_class,
+ MKDEV(bch_chardev_major, U8_MAX));
+ if (!IS_ERR_OR_NULL(bch_chardev_class))
+ class_destroy(bch_chardev_class);
+ if (bch_chardev_major > 0)
+ unregister_chrdev(bch_chardev_major, "bcachefs");
+}
+
+int __init bch2_chardev_init(void)
+{
+ bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
+ if (bch_chardev_major < 0)
+ return bch_chardev_major;
+
+ bch_chardev_class = class_create("bcachefs");
+ if (IS_ERR(bch_chardev_class))
+ return PTR_ERR(bch_chardev_class);
+
+ bch_chardev = device_create(bch_chardev_class, NULL,
+ MKDEV(bch_chardev_major, U8_MAX),
+ NULL, "bcachefs-ctl");
+ if (IS_ERR(bch_chardev))
+ return PTR_ERR(bch_chardev);
+
+ return 0;
+}
+
+#endif /* NO_BCACHEFS_CHARDEV */
diff --git a/c_src/libbcachefs/chardev.h b/c_src/libbcachefs/chardev.h
new file mode 100644
index 00000000..0f563ca5
--- /dev/null
+++ b/c_src/libbcachefs/chardev.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CHARDEV_H
+#define _BCACHEFS_CHARDEV_H
+
+#ifndef NO_BCACHEFS_FS
+
+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *);
+
+void bch2_fs_chardev_exit(struct bch_fs *);
+int bch2_fs_chardev_init(struct bch_fs *);
+
+void bch2_chardev_exit(void);
+int __init bch2_chardev_init(void);
+
+#else
+
+static inline long bch2_fs_ioctl(struct bch_fs *c,
+ unsigned cmd, void __user * arg)
+{
+ return -ENOTTY;
+}
+
+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {}
+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; }
+
+static inline void bch2_chardev_exit(void) {}
+static inline int __init bch2_chardev_init(void) { return 0; }
+
+#endif /* NO_BCACHEFS_FS */
+
+#endif /* _BCACHEFS_CHARDEV_H */
diff --git a/c_src/libbcachefs/checksum.c b/c_src/libbcachefs/checksum.c
new file mode 100644
index 00000000..3c761ad6
--- /dev/null
+++ b/c_src/libbcachefs/checksum.c
@@ -0,0 +1,804 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "checksum.h"
+#include "errcode.h"
+#include "super.h"
+#include "super-io.h"
+
+#include <linux/crc32c.h>
+#include <linux/crypto.h>
+#include <linux/xxhash.h>
+#include <linux/key.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+#include <crypto/skcipher.h>
+#include <keys/user-type.h>
+
+/*
+ * bch2_checksum state is an abstraction of the checksum state calculated over different pages.
+ * it features page merging without having the checksum algorithm lose its state.
+ * for native checksum aglorithms (like crc), a default seed value will do.
+ * for hash-like algorithms, a state needs to be stored
+ */
+
+struct bch2_checksum_state {
+ union {
+ u64 seed;
+ struct xxh64_state h64state;
+ };
+ unsigned int type;
+};
+
+static void bch2_checksum_init(struct bch2_checksum_state *state)
+{
+ switch (state->type) {
+ case BCH_CSUM_none:
+ case BCH_CSUM_crc32c:
+ case BCH_CSUM_crc64:
+ state->seed = 0;
+ break;
+ case BCH_CSUM_crc32c_nonzero:
+ state->seed = U32_MAX;
+ break;
+ case BCH_CSUM_crc64_nonzero:
+ state->seed = U64_MAX;
+ break;
+ case BCH_CSUM_xxhash:
+ xxh64_reset(&state->h64state, 0);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static u64 bch2_checksum_final(const struct bch2_checksum_state *state)
+{
+ switch (state->type) {
+ case BCH_CSUM_none:
+ case BCH_CSUM_crc32c:
+ case BCH_CSUM_crc64:
+ return state->seed;
+ case BCH_CSUM_crc32c_nonzero:
+ return state->seed ^ U32_MAX;
+ case BCH_CSUM_crc64_nonzero:
+ return state->seed ^ U64_MAX;
+ case BCH_CSUM_xxhash:
+ return xxh64_digest(&state->h64state);
+ default:
+ BUG();
+ }
+}
+
+static void bch2_checksum_update(struct bch2_checksum_state *state, const void *data, size_t len)
+{
+ switch (state->type) {
+ case BCH_CSUM_none:
+ return;
+ case BCH_CSUM_crc32c_nonzero:
+ case BCH_CSUM_crc32c:
+ state->seed = crc32c(state->seed, data, len);
+ break;
+ case BCH_CSUM_crc64_nonzero:
+ case BCH_CSUM_crc64:
+ state->seed = crc64_be(state->seed, data, len);
+ break;
+ case BCH_CSUM_xxhash:
+ xxh64_update(&state->h64state, data, len);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
+ struct nonce nonce,
+ struct scatterlist *sg, size_t len)
+{
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
+ int ret;
+
+ skcipher_request_set_sync_tfm(req, tfm);
+ skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
+
+ ret = crypto_skcipher_encrypt(req);
+ if (ret)
+ pr_err("got error %i from crypto_skcipher_encrypt()", ret);
+
+ return ret;
+}
+
+static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
+ struct nonce nonce,
+ void *buf, size_t len)
+{
+ if (!is_vmalloc_addr(buf)) {
+ struct scatterlist sg;
+
+ sg_init_table(&sg, 1);
+ sg_set_page(&sg,
+ is_vmalloc_addr(buf)
+ ? vmalloc_to_page(buf)
+ : virt_to_page(buf),
+ len, offset_in_page(buf));
+ return do_encrypt_sg(tfm, nonce, &sg, len);
+ } else {
+ unsigned pages = buf_pages(buf, len);
+ struct scatterlist *sg;
+ size_t orig_len = len;
+ int ret, i;
+
+ sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ return -BCH_ERR_ENOMEM_do_encrypt;
+
+ sg_init_table(sg, pages);
+
+ for (i = 0; i < pages; i++) {
+ unsigned offset = offset_in_page(buf);
+ unsigned pg_len = min_t(size_t, len, PAGE_SIZE - offset);
+
+ sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset);
+ buf += pg_len;
+ len -= pg_len;
+ }
+
+ ret = do_encrypt_sg(tfm, nonce, sg, orig_len);
+ kfree(sg);
+ return ret;
+ }
+}
+
+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
+ void *buf, size_t len)
+{
+ struct crypto_sync_skcipher *chacha20 =
+ crypto_alloc_sync_skcipher("chacha20", 0, 0);
+ int ret;
+
+ ret = PTR_ERR_OR_ZERO(chacha20);
+ if (ret) {
+ pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret));
+ return ret;
+ }
+
+ ret = crypto_skcipher_setkey(&chacha20->base,
+ (void *) key, sizeof(*key));
+ if (ret) {
+ pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret));
+ goto err;
+ }
+
+ ret = do_encrypt(chacha20, nonce, buf, len);
+err:
+ crypto_free_sync_skcipher(chacha20);
+ return ret;
+}
+
+static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
+ struct nonce nonce)
+{
+ u8 key[POLY1305_KEY_SIZE];
+ int ret;
+
+ nonce.d[3] ^= BCH_NONCE_POLY;
+
+ memset(key, 0, sizeof(key));
+ ret = do_encrypt(c->chacha20, nonce, key, sizeof(key));
+ if (ret)
+ return ret;
+
+ desc->tfm = c->poly1305;
+ crypto_shash_init(desc);
+ crypto_shash_update(desc, key, sizeof(key));
+ return 0;
+}
+
+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type,
+ struct nonce nonce, const void *data, size_t len)
+{
+ switch (type) {
+ case BCH_CSUM_none:
+ case BCH_CSUM_crc32c_nonzero:
+ case BCH_CSUM_crc64_nonzero:
+ case BCH_CSUM_crc32c:
+ case BCH_CSUM_xxhash:
+ case BCH_CSUM_crc64: {
+ struct bch2_checksum_state state;
+
+ state.type = type;
+
+ bch2_checksum_init(&state);
+ bch2_checksum_update(&state, data, len);
+
+ return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
+ }
+
+ case BCH_CSUM_chacha20_poly1305_80:
+ case BCH_CSUM_chacha20_poly1305_128: {
+ SHASH_DESC_ON_STACK(desc, c->poly1305);
+ u8 digest[POLY1305_DIGEST_SIZE];
+ struct bch_csum ret = { 0 };
+
+ gen_poly_key(c, desc, nonce);
+
+ crypto_shash_update(desc, data, len);
+ crypto_shash_final(desc, digest);
+
+ memcpy(&ret, digest, bch_crc_bytes[type]);
+ return ret;
+ }
+ default:
+ BUG();
+ }
+}
+
+int bch2_encrypt(struct bch_fs *c, unsigned type,
+ struct nonce nonce, void *data, size_t len)
+{
+ if (!bch2_csum_type_is_encryption(type))
+ return 0;
+
+ return do_encrypt(c->chacha20, nonce, data, len);
+}
+
+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
+ struct nonce nonce, struct bio *bio,
+ struct bvec_iter *iter)
+{
+ struct bio_vec bv;
+
+ switch (type) {
+ case BCH_CSUM_none:
+ return (struct bch_csum) { 0 };
+ case BCH_CSUM_crc32c_nonzero:
+ case BCH_CSUM_crc64_nonzero:
+ case BCH_CSUM_crc32c:
+ case BCH_CSUM_xxhash:
+ case BCH_CSUM_crc64: {
+ struct bch2_checksum_state state;
+
+ state.type = type;
+ bch2_checksum_init(&state);
+
+#ifdef CONFIG_HIGHMEM
+ __bio_for_each_segment(bv, bio, *iter, *iter) {
+ void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
+
+ bch2_checksum_update(&state, p, bv.bv_len);
+ kunmap_local(p);
+ }
+#else
+ __bio_for_each_bvec(bv, bio, *iter, *iter)
+ bch2_checksum_update(&state, page_address(bv.bv_page) + bv.bv_offset,
+ bv.bv_len);
+#endif
+ return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
+ }
+
+ case BCH_CSUM_chacha20_poly1305_80:
+ case BCH_CSUM_chacha20_poly1305_128: {
+ SHASH_DESC_ON_STACK(desc, c->poly1305);
+ u8 digest[POLY1305_DIGEST_SIZE];
+ struct bch_csum ret = { 0 };
+
+ gen_poly_key(c, desc, nonce);
+
+#ifdef CONFIG_HIGHMEM
+ __bio_for_each_segment(bv, bio, *iter, *iter) {
+ void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
+
+ crypto_shash_update(desc, p, bv.bv_len);
+ kunmap_local(p);
+ }
+#else
+ __bio_for_each_bvec(bv, bio, *iter, *iter)
+ crypto_shash_update(desc,
+ page_address(bv.bv_page) + bv.bv_offset,
+ bv.bv_len);
+#endif
+ crypto_shash_final(desc, digest);
+
+ memcpy(&ret, digest, bch_crc_bytes[type]);
+ return ret;
+ }
+ default:
+ BUG();
+ }
+}
+
+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type,
+ struct nonce nonce, struct bio *bio)
+{
+ struct bvec_iter iter = bio->bi_iter;
+
+ return __bch2_checksum_bio(c, type, nonce, bio, &iter);
+}
+
+int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
+ struct nonce nonce, struct bio *bio)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ struct scatterlist sgl[16], *sg = sgl;
+ size_t bytes = 0;
+ int ret = 0;
+
+ if (!bch2_csum_type_is_encryption(type))
+ return 0;
+
+ sg_init_table(sgl, ARRAY_SIZE(sgl));
+
+ bio_for_each_segment(bv, bio, iter) {
+ if (sg == sgl + ARRAY_SIZE(sgl)) {
+ sg_mark_end(sg - 1);
+
+ ret = do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
+ if (ret)
+ return ret;
+
+ nonce = nonce_add(nonce, bytes);
+ bytes = 0;
+
+ sg_init_table(sgl, ARRAY_SIZE(sgl));
+ sg = sgl;
+ }
+
+ sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
+ bytes += bv.bv_len;
+ }
+
+ sg_mark_end(sg - 1);
+ return do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
+}
+
+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a,
+ struct bch_csum b, size_t b_len)
+{
+ struct bch2_checksum_state state;
+
+ state.type = type;
+ bch2_checksum_init(&state);
+ state.seed = le64_to_cpu(a.lo);
+
+ BUG_ON(!bch2_checksum_mergeable(type));
+
+ while (b_len) {
+ unsigned page_len = min_t(unsigned, b_len, PAGE_SIZE);
+
+ bch2_checksum_update(&state,
+ page_address(ZERO_PAGE(0)), page_len);
+ b_len -= page_len;
+ }
+ a.lo = cpu_to_le64(bch2_checksum_final(&state));
+ a.lo ^= b.lo;
+ a.hi ^= b.hi;
+ return a;
+}
+
+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
+ struct bversion version,
+ struct bch_extent_crc_unpacked crc_old,
+ struct bch_extent_crc_unpacked *crc_a,
+ struct bch_extent_crc_unpacked *crc_b,
+ unsigned len_a, unsigned len_b,
+ unsigned new_csum_type)
+{
+ struct bvec_iter iter = bio->bi_iter;
+ struct nonce nonce = extent_nonce(version, crc_old);
+ struct bch_csum merged = { 0 };
+ struct crc_split {
+ struct bch_extent_crc_unpacked *crc;
+ unsigned len;
+ unsigned csum_type;
+ struct bch_csum csum;
+ } splits[3] = {
+ { crc_a, len_a, new_csum_type, { 0 }},
+ { crc_b, len_b, new_csum_type, { 0 } },
+ { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type, { 0 } },
+ }, *i;
+ bool mergeable = crc_old.csum_type == new_csum_type &&
+ bch2_checksum_mergeable(new_csum_type);
+ unsigned crc_nonce = crc_old.nonce;
+
+ BUG_ON(len_a + len_b > bio_sectors(bio));
+ BUG_ON(crc_old.uncompressed_size != bio_sectors(bio));
+ BUG_ON(crc_is_compressed(crc_old));
+ BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) !=
+ bch2_csum_type_is_encryption(new_csum_type));
+
+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
+ iter.bi_size = i->len << 9;
+ if (mergeable || i->crc)
+ i->csum = __bch2_checksum_bio(c, i->csum_type,
+ nonce, bio, &iter);
+ else
+ bio_advance_iter(bio, &iter, i->len << 9);
+ nonce = nonce_add(nonce, i->len << 9);
+ }
+
+ if (mergeable)
+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++)
+ merged = bch2_checksum_merge(new_csum_type, merged,
+ i->csum, i->len << 9);
+ else
+ merged = bch2_checksum_bio(c, crc_old.csum_type,
+ extent_nonce(version, crc_old), bio);
+
+ if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
+ bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
+ "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
+ __func__,
+ crc_old.csum.hi,
+ crc_old.csum.lo,
+ merged.hi,
+ merged.lo,
+ bch2_csum_types[crc_old.csum_type],
+ bch2_csum_types[new_csum_type]);
+ return -EIO;
+ }
+
+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) {
+ if (i->crc)
+ *i->crc = (struct bch_extent_crc_unpacked) {
+ .csum_type = i->csum_type,
+ .compression_type = crc_old.compression_type,
+ .compressed_size = i->len,
+ .uncompressed_size = i->len,
+ .offset = 0,
+ .live_size = i->len,
+ .nonce = crc_nonce,
+ .csum = i->csum,
+ };
+
+ if (bch2_csum_type_is_encryption(new_csum_type))
+ crc_nonce += i->len;
+ }
+
+ return 0;
+}
+
+/* BCH_SB_FIELD_crypt: */
+
+static int bch2_sb_crypt_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
+
+ if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) {
+ prt_printf(err, "wrong size (got %zu should be %zu)",
+ vstruct_bytes(&crypt->field), sizeof(*crypt));
+ return -BCH_ERR_invalid_sb_crypt;
+ }
+
+ if (BCH_CRYPT_KDF_TYPE(crypt)) {
+ prt_printf(err, "bad kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
+ return -BCH_ERR_invalid_sb_crypt;
+ }
+
+ return 0;
+}
+
+static void bch2_sb_crypt_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_crypt *crypt = field_to_type(f, crypt);
+
+ prt_printf(out, "KFD: %llu", BCH_CRYPT_KDF_TYPE(crypt));
+ prt_newline(out);
+ prt_printf(out, "scrypt n: %llu", BCH_KDF_SCRYPT_N(crypt));
+ prt_newline(out);
+ prt_printf(out, "scrypt r: %llu", BCH_KDF_SCRYPT_R(crypt));
+ prt_newline(out);
+ prt_printf(out, "scrypt p: %llu", BCH_KDF_SCRYPT_P(crypt));
+ prt_newline(out);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_crypt = {
+ .validate = bch2_sb_crypt_validate,
+ .to_text = bch2_sb_crypt_to_text,
+};
+
+#ifdef __KERNEL__
+static int __bch2_request_key(char *key_description, struct bch_key *key)
+{
+ struct key *keyring_key;
+ const struct user_key_payload *ukp;
+ int ret;
+
+ keyring_key = request_key(&key_type_user, key_description, NULL);
+ if (IS_ERR(keyring_key))
+ return PTR_ERR(keyring_key);
+
+ down_read(&keyring_key->sem);
+ ukp = dereference_key_locked(keyring_key);
+ if (ukp->datalen == sizeof(*key)) {
+ memcpy(key, ukp->data, ukp->datalen);
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ up_read(&keyring_key->sem);
+ key_put(keyring_key);
+
+ return ret;
+}
+#else
+#include <keyutils.h>
+
+static int __bch2_request_key(char *key_description, struct bch_key *key)
+{
+ key_serial_t key_id;
+
+ key_id = request_key("user", key_description, NULL,
+ KEY_SPEC_SESSION_KEYRING);
+ if (key_id >= 0)
+ goto got_key;
+
+ key_id = request_key("user", key_description, NULL,
+ KEY_SPEC_USER_KEYRING);
+ if (key_id >= 0)
+ goto got_key;
+
+ key_id = request_key("user", key_description, NULL,
+ KEY_SPEC_USER_SESSION_KEYRING);
+ if (key_id >= 0)
+ goto got_key;
+
+ return -errno;
+got_key:
+
+ if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
+ return -1;
+
+ return 0;
+}
+
+#include "../crypto.h"
+#endif
+
+int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
+{
+ struct printbuf key_description = PRINTBUF;
+ int ret;
+
+ prt_printf(&key_description, "bcachefs:");
+ pr_uuid(&key_description, sb->user_uuid.b);
+
+ ret = __bch2_request_key(key_description.buf, key);
+ printbuf_exit(&key_description);
+
+#ifndef __KERNEL__
+ if (ret) {
+ char *passphrase = read_passphrase("Enter passphrase: ");
+ struct bch_encrypted_key sb_key;
+
+ bch2_passphrase_check(sb, passphrase,
+ key, &sb_key);
+ ret = 0;
+ }
+#endif
+
+ /* stash with memfd, pass memfd fd to mount */
+
+ return ret;
+}
+
+#ifndef __KERNEL__
+int bch2_revoke_key(struct bch_sb *sb)
+{
+ key_serial_t key_id;
+ struct printbuf key_description = PRINTBUF;
+
+ prt_printf(&key_description, "bcachefs:");
+ pr_uuid(&key_description, sb->user_uuid.b);
+
+ key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING);
+ printbuf_exit(&key_description);
+ if (key_id < 0)
+ return errno;
+
+ keyctl_revoke(key_id);
+
+ return 0;
+}
+#endif
+
+int bch2_decrypt_sb_key(struct bch_fs *c,
+ struct bch_sb_field_crypt *crypt,
+ struct bch_key *key)
+{
+ struct bch_encrypted_key sb_key = crypt->key;
+ struct bch_key user_key;
+ int ret = 0;
+
+ /* is key encrypted? */
+ if (!bch2_key_is_encrypted(&sb_key))
+ goto out;
+
+ ret = bch2_request_key(c->disk_sb.sb, &user_key);
+ if (ret) {
+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
+ goto err;
+ }
+
+ /* decrypt real key: */
+ ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
+ &sb_key, sizeof(sb_key));
+ if (ret)
+ goto err;
+
+ if (bch2_key_is_encrypted(&sb_key)) {
+ bch_err(c, "incorrect encryption key");
+ ret = -EINVAL;
+ goto err;
+ }
+out:
+ *key = sb_key.key;
+err:
+ memzero_explicit(&sb_key, sizeof(sb_key));
+ memzero_explicit(&user_key, sizeof(user_key));
+ return ret;
+}
+
+static int bch2_alloc_ciphers(struct bch_fs *c)
+{
+ int ret;
+
+ if (!c->chacha20)
+ c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
+ ret = PTR_ERR_OR_ZERO(c->chacha20);
+
+ if (ret) {
+ bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret));
+ return ret;
+ }
+
+ if (!c->poly1305)
+ c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
+ ret = PTR_ERR_OR_ZERO(c->poly1305);
+
+ if (ret) {
+ bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int bch2_disable_encryption(struct bch_fs *c)
+{
+ struct bch_sb_field_crypt *crypt;
+ struct bch_key key;
+ int ret = -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+
+ crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ if (!crypt)
+ goto out;
+
+ /* is key encrypted? */
+ ret = 0;
+ if (bch2_key_is_encrypted(&crypt->key))
+ goto out;
+
+ ret = bch2_decrypt_sb_key(c, crypt, &key);
+ if (ret)
+ goto out;
+
+ crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC);
+ crypt->key.key = key;
+
+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0);
+ bch2_write_super(c);
+out:
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+int bch2_enable_encryption(struct bch_fs *c, bool keyed)
+{
+ struct bch_encrypted_key key;
+ struct bch_key user_key;
+ struct bch_sb_field_crypt *crypt;
+ int ret = -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+
+ /* Do we already have an encryption key? */
+ if (bch2_sb_field_get(c->disk_sb.sb, crypt))
+ goto err;
+
+ ret = bch2_alloc_ciphers(c);
+ if (ret)
+ goto err;
+
+ key.magic = cpu_to_le64(BCH_KEY_MAGIC);
+ get_random_bytes(&key.key, sizeof(key.key));
+
+ if (keyed) {
+ ret = bch2_request_key(c->disk_sb.sb, &user_key);
+ if (ret) {
+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
+ goto err;
+ }
+
+ ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c),
+ &key, sizeof(key));
+ if (ret)
+ goto err;
+ }
+
+ ret = crypto_skcipher_setkey(&c->chacha20->base,
+ (void *) &key.key, sizeof(key.key));
+ if (ret)
+ goto err;
+
+ crypt = bch2_sb_field_resize(&c->disk_sb, crypt,
+ sizeof(*crypt) / sizeof(u64));
+ if (!crypt) {
+ ret = -BCH_ERR_ENOSPC_sb_crypt;
+ goto err;
+ }
+
+ crypt->key = key;
+
+ /* write superblock */
+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1);
+ bch2_write_super(c);
+err:
+ mutex_unlock(&c->sb_lock);
+ memzero_explicit(&user_key, sizeof(user_key));
+ memzero_explicit(&key, sizeof(key));
+ return ret;
+}
+
+void bch2_fs_encryption_exit(struct bch_fs *c)
+{
+ if (!IS_ERR_OR_NULL(c->poly1305))
+ crypto_free_shash(c->poly1305);
+ if (!IS_ERR_OR_NULL(c->chacha20))
+ crypto_free_sync_skcipher(c->chacha20);
+ if (!IS_ERR_OR_NULL(c->sha256))
+ crypto_free_shash(c->sha256);
+}
+
+int bch2_fs_encryption_init(struct bch_fs *c)
+{
+ struct bch_sb_field_crypt *crypt;
+ struct bch_key key;
+ int ret = 0;
+
+ c->sha256 = crypto_alloc_shash("sha256", 0, 0);
+ ret = PTR_ERR_OR_ZERO(c->sha256);
+ if (ret) {
+ bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret));
+ goto out;
+ }
+
+ crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ if (!crypt)
+ goto out;
+
+ ret = bch2_alloc_ciphers(c);
+ if (ret)
+ goto out;
+
+ ret = bch2_decrypt_sb_key(c, crypt, &key);
+ if (ret)
+ goto out;
+
+ ret = crypto_skcipher_setkey(&c->chacha20->base,
+ (void *) &key.key, sizeof(key.key));
+ if (ret)
+ goto out;
+out:
+ memzero_explicit(&key, sizeof(key));
+ return ret;
+}
diff --git a/c_src/libbcachefs/checksum.h b/c_src/libbcachefs/checksum.h
new file mode 100644
index 00000000..1b8c2c10
--- /dev/null
+++ b/c_src/libbcachefs/checksum.h
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CHECKSUM_H
+#define _BCACHEFS_CHECKSUM_H
+
+#include "bcachefs.h"
+#include "extents_types.h"
+#include "super-io.h"
+
+#include <linux/crc64.h>
+#include <crypto/chacha.h>
+
+static inline bool bch2_checksum_mergeable(unsigned type)
+{
+
+ switch (type) {
+ case BCH_CSUM_none:
+ case BCH_CSUM_crc32c:
+ case BCH_CSUM_crc64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum,
+ struct bch_csum, size_t);
+
+#define BCH_NONCE_EXTENT cpu_to_le32(1 << 28)
+#define BCH_NONCE_BTREE cpu_to_le32(2 << 28)
+#define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28)
+#define BCH_NONCE_PRIO cpu_to_le32(4 << 28)
+#define BCH_NONCE_POLY cpu_to_le32(1 << 31)
+
+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
+ const void *, size_t);
+
+/*
+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
+ * jset: The checksum is _always_ the first field of these structs
+ */
+#define csum_vstruct(_c, _type, _nonce, _i) \
+({ \
+ const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\
+ \
+ bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
+})
+
+static inline void bch2_csum_to_text(struct printbuf *out,
+ enum bch_csum_type type,
+ struct bch_csum csum)
+{
+ const u8 *p = (u8 *) &csum;
+ unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16;
+
+ for (unsigned i = 0; i < bytes; i++)
+ prt_hex_byte(out, p[i]);
+}
+
+static inline void bch2_csum_err_msg(struct printbuf *out,
+ enum bch_csum_type type,
+ struct bch_csum expected,
+ struct bch_csum got)
+{
+ prt_printf(out, "checksum error: got ");
+ bch2_csum_to_text(out, type, got);
+ prt_str(out, " should be ");
+ bch2_csum_to_text(out, type, expected);
+ prt_printf(out, " type %s", bch2_csum_types[type]);
+}
+
+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
+int bch2_request_key(struct bch_sb *, struct bch_key *);
+#ifndef __KERNEL__
+int bch2_revoke_key(struct bch_sb *);
+#endif
+
+int bch2_encrypt(struct bch_fs *, unsigned, struct nonce,
+ void *data, size_t);
+
+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned,
+ struct nonce, struct bio *);
+
+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion,
+ struct bch_extent_crc_unpacked,
+ struct bch_extent_crc_unpacked *,
+ struct bch_extent_crc_unpacked *,
+ unsigned, unsigned, unsigned);
+
+int __bch2_encrypt_bio(struct bch_fs *, unsigned,
+ struct nonce, struct bio *);
+
+static inline int bch2_encrypt_bio(struct bch_fs *c, unsigned type,
+ struct nonce nonce, struct bio *bio)
+{
+ return bch2_csum_type_is_encryption(type)
+ ? __bch2_encrypt_bio(c, type, nonce, bio)
+ : 0;
+}
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_crypt;
+
+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *,
+ struct bch_key *);
+
+int bch2_disable_encryption(struct bch_fs *);
+int bch2_enable_encryption(struct bch_fs *, bool);
+
+void bch2_fs_encryption_exit(struct bch_fs *);
+int bch2_fs_encryption_init(struct bch_fs *);
+
+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type,
+ bool data)
+{
+ switch (type) {
+ case BCH_CSUM_OPT_none:
+ return BCH_CSUM_none;
+ case BCH_CSUM_OPT_crc32c:
+ return data ? BCH_CSUM_crc32c : BCH_CSUM_crc32c_nonzero;
+ case BCH_CSUM_OPT_crc64:
+ return data ? BCH_CSUM_crc64 : BCH_CSUM_crc64_nonzero;
+ case BCH_CSUM_OPT_xxhash:
+ return BCH_CSUM_xxhash;
+ default:
+ BUG();
+ }
+}
+
+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c,
+ struct bch_io_opts opts)
+{
+ if (opts.nocow)
+ return 0;
+
+ if (c->sb.encryption_type)
+ return c->opts.wide_macs
+ ? BCH_CSUM_chacha20_poly1305_128
+ : BCH_CSUM_chacha20_poly1305_80;
+
+ return bch2_csum_opt_to_type(opts.data_checksum, true);
+}
+
+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c)
+{
+ if (c->sb.encryption_type)
+ return BCH_CSUM_chacha20_poly1305_128;
+
+ return bch2_csum_opt_to_type(c->opts.metadata_checksum, false);
+}
+
+static inline bool bch2_checksum_type_valid(const struct bch_fs *c,
+ unsigned type)
+{
+ if (type >= BCH_CSUM_NR)
+ return false;
+
+ if (bch2_csum_type_is_encryption(type) && !c->chacha20)
+ return false;
+
+ return true;
+}
+
+/* returns true if not equal */
+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r)
+{
+ /*
+ * XXX: need some way of preventing the compiler from optimizing this
+ * into a form that isn't constant time..
+ */
+ return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
+}
+
+/* for skipping ahead and encrypting/decrypting at an offset: */
+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
+{
+ EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1));
+
+ le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE);
+ return nonce;
+}
+
+static inline struct nonce null_nonce(void)
+{
+ struct nonce ret;
+
+ memset(&ret, 0, sizeof(ret));
+ return ret;
+}
+
+static inline struct nonce extent_nonce(struct bversion version,
+ struct bch_extent_crc_unpacked crc)
+{
+ unsigned compression_type = crc_is_compressed(crc)
+ ? crc.compression_type
+ : 0;
+ unsigned size = compression_type ? crc.uncompressed_size : 0;
+ struct nonce nonce = (struct nonce) {{
+ [0] = cpu_to_le32(size << 22),
+ [1] = cpu_to_le32(version.lo),
+ [2] = cpu_to_le32(version.lo >> 32),
+ [3] = cpu_to_le32(version.hi|
+ (compression_type << 24))^BCH_NONCE_EXTENT,
+ }};
+
+ return nonce_add(nonce, crc.nonce << 9);
+}
+
+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key)
+{
+ return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
+}
+
+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb)
+{
+ __le64 magic = __bch2_sb_magic(sb);
+
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = 0,
+ [2] = ((__le32 *) &magic)[0],
+ [3] = ((__le32 *) &magic)[1],
+ }};
+}
+
+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c)
+{
+ __le64 magic = bch2_sb_magic(c);
+
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = 0,
+ [2] = ((__le32 *) &magic)[0],
+ [3] = ((__le32 *) &magic)[1],
+ }};
+}
+
+#endif /* _BCACHEFS_CHECKSUM_H */
diff --git a/c_src/libbcachefs/clock.c b/c_src/libbcachefs/clock.c
new file mode 100644
index 00000000..f4188909
--- /dev/null
+++ b/c_src/libbcachefs/clock.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "clock.h"
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/preempt.h>
+
+static inline long io_timer_cmp(io_timer_heap *h,
+ struct io_timer *l,
+ struct io_timer *r)
+{
+ return l->expire - r->expire;
+}
+
+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
+{
+ size_t i;
+
+ spin_lock(&clock->timer_lock);
+
+ if (time_after_eq((unsigned long) atomic64_read(&clock->now),
+ timer->expire)) {
+ spin_unlock(&clock->timer_lock);
+ timer->fn(timer);
+ return;
+ }
+
+ for (i = 0; i < clock->timers.used; i++)
+ if (clock->timers.data[i] == timer)
+ goto out;
+
+ BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
+out:
+ spin_unlock(&clock->timer_lock);
+}
+
+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
+{
+ size_t i;
+
+ spin_lock(&clock->timer_lock);
+
+ for (i = 0; i < clock->timers.used; i++)
+ if (clock->timers.data[i] == timer) {
+ heap_del(&clock->timers, i, io_timer_cmp, NULL);
+ break;
+ }
+
+ spin_unlock(&clock->timer_lock);
+}
+
+struct io_clock_wait {
+ struct io_timer io_timer;
+ struct timer_list cpu_timer;
+ struct task_struct *task;
+ int expired;
+};
+
+static void io_clock_wait_fn(struct io_timer *timer)
+{
+ struct io_clock_wait *wait = container_of(timer,
+ struct io_clock_wait, io_timer);
+
+ wait->expired = 1;
+ wake_up_process(wait->task);
+}
+
+static void io_clock_cpu_timeout(struct timer_list *timer)
+{
+ struct io_clock_wait *wait = container_of(timer,
+ struct io_clock_wait, cpu_timer);
+
+ wait->expired = 1;
+ wake_up_process(wait->task);
+}
+
+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until)
+{
+ struct io_clock_wait wait;
+
+ /* XXX: calculate sleep time rigorously */
+ wait.io_timer.expire = until;
+ wait.io_timer.fn = io_clock_wait_fn;
+ wait.task = current;
+ wait.expired = 0;
+ bch2_io_timer_add(clock, &wait.io_timer);
+
+ schedule();
+
+ bch2_io_timer_del(clock, &wait.io_timer);
+}
+
+void bch2_kthread_io_clock_wait(struct io_clock *clock,
+ unsigned long io_until,
+ unsigned long cpu_timeout)
+{
+ bool kthread = (current->flags & PF_KTHREAD) != 0;
+ struct io_clock_wait wait;
+
+ wait.io_timer.expire = io_until;
+ wait.io_timer.fn = io_clock_wait_fn;
+ wait.task = current;
+ wait.expired = 0;
+ bch2_io_timer_add(clock, &wait.io_timer);
+
+ timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0);
+
+ if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
+ mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread && kthread_should_stop())
+ break;
+
+ if (wait.expired)
+ break;
+
+ schedule();
+ try_to_freeze();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ del_timer_sync(&wait.cpu_timer);
+ destroy_timer_on_stack(&wait.cpu_timer);
+ bch2_io_timer_del(clock, &wait.io_timer);
+}
+
+static struct io_timer *get_expired_timer(struct io_clock *clock,
+ unsigned long now)
+{
+ struct io_timer *ret = NULL;
+
+ spin_lock(&clock->timer_lock);
+
+ if (clock->timers.used &&
+ time_after_eq(now, clock->timers.data[0]->expire))
+ heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
+
+ spin_unlock(&clock->timer_lock);
+
+ return ret;
+}
+
+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
+{
+ struct io_timer *timer;
+ unsigned long now = atomic64_add_return(sectors, &clock->now);
+
+ while ((timer = get_expired_timer(clock, now)))
+ timer->fn(timer);
+}
+
+void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
+{
+ unsigned long now;
+ unsigned i;
+
+ out->atomic++;
+ spin_lock(&clock->timer_lock);
+ now = atomic64_read(&clock->now);
+
+ for (i = 0; i < clock->timers.used; i++)
+ prt_printf(out, "%ps:\t%li\n",
+ clock->timers.data[i]->fn,
+ clock->timers.data[i]->expire - now);
+ spin_unlock(&clock->timer_lock);
+ --out->atomic;
+}
+
+void bch2_io_clock_exit(struct io_clock *clock)
+{
+ free_heap(&clock->timers);
+ free_percpu(clock->pcpu_buf);
+}
+
+int bch2_io_clock_init(struct io_clock *clock)
+{
+ atomic64_set(&clock->now, 0);
+ spin_lock_init(&clock->timer_lock);
+
+ clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus();
+
+ clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf);
+ if (!clock->pcpu_buf)
+ return -BCH_ERR_ENOMEM_io_clock_init;
+
+ if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL))
+ return -BCH_ERR_ENOMEM_io_clock_init;
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/clock.h b/c_src/libbcachefs/clock.h
new file mode 100644
index 00000000..70a0f743
--- /dev/null
+++ b/c_src/libbcachefs/clock.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CLOCK_H
+#define _BCACHEFS_CLOCK_H
+
+void bch2_io_timer_add(struct io_clock *, struct io_timer *);
+void bch2_io_timer_del(struct io_clock *, struct io_timer *);
+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
+ unsigned long);
+
+void __bch2_increment_clock(struct io_clock *, unsigned);
+
+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
+ int rw)
+{
+ struct io_clock *clock = &c->io_clock[rw];
+
+ if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
+ IO_CLOCK_PCPU_SECTORS))
+ __bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0));
+}
+
+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);
+
+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
+({ \
+ long __ret = timeout; \
+ might_sleep(); \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __wait_event_timeout(wq, condition, timeout); \
+ __ret; \
+})
+
+void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
+
+void bch2_io_clock_exit(struct io_clock *);
+int bch2_io_clock_init(struct io_clock *);
+
+#endif /* _BCACHEFS_CLOCK_H */
diff --git a/c_src/libbcachefs/clock_types.h b/c_src/libbcachefs/clock_types.h
new file mode 100644
index 00000000..5fae0012
--- /dev/null
+++ b/c_src/libbcachefs/clock_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_CLOCK_TYPES_H
+#define _BCACHEFS_CLOCK_TYPES_H
+
+#include "util.h"
+
+#define NR_IO_TIMERS (BCH_SB_MEMBERS_MAX * 3)
+
+/*
+ * Clocks/timers in units of sectors of IO:
+ *
+ * Note - they use percpu batching, so they're only approximate.
+ */
+
+struct io_timer;
+typedef void (*io_timer_fn)(struct io_timer *);
+
+struct io_timer {
+ io_timer_fn fn;
+ unsigned long expire;
+};
+
+/* Amount to buffer up on a percpu counter */
+#define IO_CLOCK_PCPU_SECTORS 128
+
+typedef HEAP(struct io_timer *) io_timer_heap;
+
+struct io_clock {
+ atomic64_t now;
+ u16 __percpu *pcpu_buf;
+ unsigned max_slop;
+
+ spinlock_t timer_lock;
+ io_timer_heap timers;
+};
+
+#endif /* _BCACHEFS_CLOCK_TYPES_H */
diff --git a/c_src/libbcachefs/compress.c b/c_src/libbcachefs/compress.c
new file mode 100644
index 00000000..33df8cf8
--- /dev/null
+++ b/c_src/libbcachefs/compress.c
@@ -0,0 +1,728 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "checksum.h"
+#include "compress.h"
+#include "extents.h"
+#include "super-io.h"
+
+#include <linux/lz4.h>
+#include <linux/zlib.h>
+#include <linux/zstd.h>
+
+/* Bounce buffer: */
+struct bbuf {
+ void *b;
+ enum {
+ BB_NONE,
+ BB_VMAP,
+ BB_KMALLOC,
+ BB_MEMPOOL,
+ } type;
+ int rw;
+};
+
+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
+{
+ void *b;
+
+ BUG_ON(size > c->opts.encoded_extent_max);
+
+ b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
+ if (b)
+ return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
+
+ b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
+ if (b)
+ return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
+
+ BUG();
+}
+
+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ void *expected_start = NULL;
+
+ __bio_for_each_bvec(bv, bio, iter, start) {
+ if (expected_start &&
+ expected_start != page_address(bv.bv_page) + bv.bv_offset)
+ return false;
+
+ expected_start = page_address(bv.bv_page) +
+ bv.bv_offset + bv.bv_len;
+ }
+
+ return true;
+}
+
+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
+ struct bvec_iter start, int rw)
+{
+ struct bbuf ret;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ unsigned nr_pages = 0;
+ struct page *stack_pages[16];
+ struct page **pages = NULL;
+ void *data;
+
+ BUG_ON(start.bi_size > c->opts.encoded_extent_max);
+
+ if (!PageHighMem(bio_iter_page(bio, start)) &&
+ bio_phys_contig(bio, start))
+ return (struct bbuf) {
+ .b = page_address(bio_iter_page(bio, start)) +
+ bio_iter_offset(bio, start),
+ .type = BB_NONE, .rw = rw
+ };
+
+ /* check if we can map the pages contiguously: */
+ __bio_for_each_segment(bv, bio, iter, start) {
+ if (iter.bi_size != start.bi_size &&
+ bv.bv_offset)
+ goto bounce;
+
+ if (bv.bv_len < iter.bi_size &&
+ bv.bv_offset + bv.bv_len < PAGE_SIZE)
+ goto bounce;
+
+ nr_pages++;
+ }
+
+ BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
+
+ pages = nr_pages > ARRAY_SIZE(stack_pages)
+ ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
+ : stack_pages;
+ if (!pages)
+ goto bounce;
+
+ nr_pages = 0;
+ __bio_for_each_segment(bv, bio, iter, start)
+ pages[nr_pages++] = bv.bv_page;
+
+ data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (pages != stack_pages)
+ kfree(pages);
+
+ if (data)
+ return (struct bbuf) {
+ .b = data + bio_iter_offset(bio, start),
+ .type = BB_VMAP, .rw = rw
+ };
+bounce:
+ ret = __bounce_alloc(c, start.bi_size, rw);
+
+ if (rw == READ)
+ memcpy_from_bio(ret.b, bio, start);
+
+ return ret;
+}
+
+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
+{
+ return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
+}
+
+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
+{
+ switch (buf.type) {
+ case BB_NONE:
+ break;
+ case BB_VMAP:
+ vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
+ break;
+ case BB_KMALLOC:
+ kfree(buf.b);
+ break;
+ case BB_MEMPOOL:
+ mempool_free(buf.b, &c->compression_bounce[buf.rw]);
+ break;
+ }
+}
+
+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
+{
+#ifdef __KERNEL__
+ strm->workspace = workspace;
+#endif
+}
+
+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
+ void *dst_data, struct bch_extent_crc_unpacked crc)
+{
+ struct bbuf src_data = { NULL };
+ size_t src_len = src->bi_iter.bi_size;
+ size_t dst_len = crc.uncompressed_size << 9;
+ void *workspace;
+ int ret;
+
+ src_data = bio_map_or_bounce(c, src, READ);
+
+ switch (crc.compression_type) {
+ case BCH_COMPRESSION_TYPE_lz4_old:
+ case BCH_COMPRESSION_TYPE_lz4:
+ ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
+ src_len, dst_len, dst_len);
+ if (ret != dst_len)
+ goto err;
+ break;
+ case BCH_COMPRESSION_TYPE_gzip: {
+ z_stream strm = {
+ .next_in = src_data.b,
+ .avail_in = src_len,
+ .next_out = dst_data,
+ .avail_out = dst_len,
+ };
+
+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
+
+ zlib_set_workspace(&strm, workspace);
+ zlib_inflateInit2(&strm, -MAX_WBITS);
+ ret = zlib_inflate(&strm, Z_FINISH);
+
+ mempool_free(workspace, &c->decompress_workspace);
+
+ if (ret != Z_STREAM_END)
+ goto err;
+ break;
+ }
+ case BCH_COMPRESSION_TYPE_zstd: {
+ ZSTD_DCtx *ctx;
+ size_t real_src_len = le32_to_cpup(src_data.b);
+
+ if (real_src_len > src_len - 4)
+ goto err;
+
+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
+ ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
+
+ ret = zstd_decompress_dctx(ctx,
+ dst_data, dst_len,
+ src_data.b + 4, real_src_len);
+
+ mempool_free(workspace, &c->decompress_workspace);
+
+ if (ret != dst_len)
+ goto err;
+ break;
+ }
+ default:
+ BUG();
+ }
+ ret = 0;
+out:
+ bio_unmap_or_unbounce(c, src_data);
+ return ret;
+err:
+ ret = -EIO;
+ goto out;
+}
+
+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
+ struct bch_extent_crc_unpacked *crc)
+{
+ struct bbuf data = { NULL };
+ size_t dst_len = crc->uncompressed_size << 9;
+
+ /* bio must own its pages: */
+ BUG_ON(!bio->bi_vcnt);
+ BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
+
+ if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
+ crc->compressed_size << 9 > c->opts.encoded_extent_max) {
+ bch_err(c, "error rewriting existing data: extent too big");
+ return -EIO;
+ }
+
+ data = __bounce_alloc(c, dst_len, WRITE);
+
+ if (__bio_uncompress(c, bio, data.b, *crc)) {
+ if (!c->opts.no_data_io)
+ bch_err(c, "error rewriting existing data: decompression error");
+ bio_unmap_or_unbounce(c, data);
+ return -EIO;
+ }
+
+ /*
+ * XXX: don't have a good way to assert that the bio was allocated with
+ * enough space, we depend on bch2_move_extent doing the right thing
+ */
+ bio->bi_iter.bi_size = crc->live_size << 9;
+
+ memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
+
+ crc->csum_type = 0;
+ crc->compression_type = 0;
+ crc->compressed_size = crc->live_size;
+ crc->uncompressed_size = crc->live_size;
+ crc->offset = 0;
+ crc->csum = (struct bch_csum) { 0, 0 };
+
+ bio_unmap_or_unbounce(c, data);
+ return 0;
+}
+
+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
+ struct bio *dst, struct bvec_iter dst_iter,
+ struct bch_extent_crc_unpacked crc)
+{
+ struct bbuf dst_data = { NULL };
+ size_t dst_len = crc.uncompressed_size << 9;
+ int ret;
+
+ if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
+ crc.compressed_size << 9 > c->opts.encoded_extent_max)
+ return -EIO;
+
+ dst_data = dst_len == dst_iter.bi_size
+ ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
+ : __bounce_alloc(c, dst_len, WRITE);
+
+ ret = __bio_uncompress(c, src, dst_data.b, crc);
+ if (ret)
+ goto err;
+
+ if (dst_data.type != BB_NONE &&
+ dst_data.type != BB_VMAP)
+ memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
+err:
+ bio_unmap_or_unbounce(c, dst_data);
+ return ret;
+}
+
+static int attempt_compress(struct bch_fs *c,
+ void *workspace,
+ void *dst, size_t dst_len,
+ void *src, size_t src_len,
+ struct bch_compression_opt compression)
+{
+ enum bch_compression_type compression_type =
+ __bch2_compression_opt_to_type[compression.type];
+
+ switch (compression_type) {
+ case BCH_COMPRESSION_TYPE_lz4:
+ if (compression.level < LZ4HC_MIN_CLEVEL) {
+ int len = src_len;
+ int ret = LZ4_compress_destSize(
+ src, dst,
+ &len, dst_len,
+ workspace);
+ if (len < src_len)
+ return -len;
+
+ return ret;
+ } else {
+ int ret = LZ4_compress_HC(
+ src, dst,
+ src_len, dst_len,
+ compression.level,
+ workspace);
+
+ return ret ?: -1;
+ }
+ case BCH_COMPRESSION_TYPE_gzip: {
+ z_stream strm = {
+ .next_in = src,
+ .avail_in = src_len,
+ .next_out = dst,
+ .avail_out = dst_len,
+ };
+
+ zlib_set_workspace(&strm, workspace);
+ zlib_deflateInit2(&strm,
+ compression.level
+ ? clamp_t(unsigned, compression.level,
+ Z_BEST_SPEED, Z_BEST_COMPRESSION)
+ : Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
+ Z_DEFAULT_STRATEGY);
+
+ if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
+ return 0;
+
+ if (zlib_deflateEnd(&strm) != Z_OK)
+ return 0;
+
+ return strm.total_out;
+ }
+ case BCH_COMPRESSION_TYPE_zstd: {
+ /*
+ * rescale:
+ * zstd max compression level is 22, our max level is 15
+ */
+ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
+ ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
+
+ /*
+ * ZSTD requires that when we decompress we pass in the exact
+ * compressed size - rounding it up to the nearest sector
+ * doesn't work, so we use the first 4 bytes of the buffer for
+ * that.
+ *
+ * Additionally, the ZSTD code seems to have a bug where it will
+ * write just past the end of the buffer - so subtract a fudge
+ * factor (7 bytes) from the dst buffer size to account for
+ * that.
+ */
+ size_t len = zstd_compress_cctx(ctx,
+ dst + 4, dst_len - 4 - 7,
+ src, src_len,
+ &params);
+ if (zstd_is_error(len))
+ return 0;
+
+ *((__le32 *) dst) = cpu_to_le32(len);
+ return len + 4;
+ }
+ default:
+ BUG();
+ }
+}
+
+static unsigned __bio_compress(struct bch_fs *c,
+ struct bio *dst, size_t *dst_len,
+ struct bio *src, size_t *src_len,
+ struct bch_compression_opt compression)
+{
+ struct bbuf src_data = { NULL }, dst_data = { NULL };
+ void *workspace;
+ enum bch_compression_type compression_type =
+ __bch2_compression_opt_to_type[compression.type];
+ unsigned pad;
+ int ret = 0;
+
+ BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
+ BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
+
+ /* If it's only one block, don't bother trying to compress: */
+ if (src->bi_iter.bi_size <= c->opts.block_size)
+ return BCH_COMPRESSION_TYPE_incompressible;
+
+ dst_data = bio_map_or_bounce(c, dst, WRITE);
+ src_data = bio_map_or_bounce(c, src, READ);
+
+ workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
+
+ *src_len = src->bi_iter.bi_size;
+ *dst_len = dst->bi_iter.bi_size;
+
+ /*
+ * XXX: this algorithm sucks when the compression code doesn't tell us
+ * how much would fit, like LZ4 does:
+ */
+ while (1) {
+ if (*src_len <= block_bytes(c)) {
+ ret = -1;
+ break;
+ }
+
+ ret = attempt_compress(c, workspace,
+ dst_data.b, *dst_len,
+ src_data.b, *src_len,
+ compression);
+ if (ret > 0) {
+ *dst_len = ret;
+ ret = 0;
+ break;
+ }
+
+ /* Didn't fit: should we retry with a smaller amount? */
+ if (*src_len <= *dst_len) {
+ ret = -1;
+ break;
+ }
+
+ /*
+ * If ret is negative, it's a hint as to how much data would fit
+ */
+ BUG_ON(-ret >= *src_len);
+
+ if (ret < 0)
+ *src_len = -ret;
+ else
+ *src_len -= (*src_len - *dst_len) / 2;
+ *src_len = round_down(*src_len, block_bytes(c));
+ }
+
+ mempool_free(workspace, &c->compress_workspace[compression_type]);
+
+ if (ret)
+ goto err;
+
+ /* Didn't get smaller: */
+ if (round_up(*dst_len, block_bytes(c)) >= *src_len)
+ goto err;
+
+ pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
+
+ memset(dst_data.b + *dst_len, 0, pad);
+ *dst_len += pad;
+
+ if (dst_data.type != BB_NONE &&
+ dst_data.type != BB_VMAP)
+ memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
+
+ BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
+ BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
+ BUG_ON(*dst_len & (block_bytes(c) - 1));
+ BUG_ON(*src_len & (block_bytes(c) - 1));
+ ret = compression_type;
+out:
+ bio_unmap_or_unbounce(c, src_data);
+ bio_unmap_or_unbounce(c, dst_data);
+ return ret;
+err:
+ ret = BCH_COMPRESSION_TYPE_incompressible;
+ goto out;
+}
+
+unsigned bch2_bio_compress(struct bch_fs *c,
+ struct bio *dst, size_t *dst_len,
+ struct bio *src, size_t *src_len,
+ unsigned compression_opt)
+{
+ unsigned orig_dst = dst->bi_iter.bi_size;
+ unsigned orig_src = src->bi_iter.bi_size;
+ unsigned compression_type;
+
+ /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
+ src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
+ c->opts.encoded_extent_max);
+ /* Don't generate a bigger output than input: */
+ dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
+
+ compression_type =
+ __bio_compress(c, dst, dst_len, src, src_len,
+ bch2_compression_decode(compression_opt));
+
+ dst->bi_iter.bi_size = orig_dst;
+ src->bi_iter.bi_size = orig_src;
+ return compression_type;
+}
+
+static int __bch2_fs_compress_init(struct bch_fs *, u64);
+
+#define BCH_FEATURE_none 0
+
+static const unsigned bch2_compression_opt_to_feature[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
+ BCH_COMPRESSION_OPTS()
+#undef x
+};
+
+#undef BCH_FEATURE_none
+
+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
+{
+ int ret = 0;
+
+ if ((c->sb.features & f) == f)
+ return 0;
+
+ mutex_lock(&c->sb_lock);
+
+ if ((c->sb.features & f) == f) {
+ mutex_unlock(&c->sb_lock);
+ return 0;
+ }
+
+ ret = __bch2_fs_compress_init(c, c->sb.features|f);
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ return ret;
+ }
+
+ c->disk_sb.sb->features[0] |= cpu_to_le64(f);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+int bch2_check_set_has_compressed_data(struct bch_fs *c,
+ unsigned compression_opt)
+{
+ unsigned compression_type = bch2_compression_decode(compression_opt).type;
+
+ BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
+
+ return compression_type
+ ? __bch2_check_set_has_compressed_data(c,
+ 1ULL << bch2_compression_opt_to_feature[compression_type])
+ : 0;
+}
+
+void bch2_fs_compress_exit(struct bch_fs *c)
+{
+ unsigned i;
+
+ mempool_exit(&c->decompress_workspace);
+ for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
+ mempool_exit(&c->compress_workspace[i]);
+ mempool_exit(&c->compression_bounce[WRITE]);
+ mempool_exit(&c->compression_bounce[READ]);
+}
+
+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
+{
+ size_t decompress_workspace_size = 0;
+ ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
+ c->opts.encoded_extent_max);
+
+ c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams);
+
+ struct {
+ unsigned feature;
+ enum bch_compression_type type;
+ size_t compress_workspace;
+ size_t decompress_workspace;
+ } compression_types[] = {
+ { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS),
+ 0 },
+ { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
+ zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
+ zlib_inflate_workspacesize(), },
+ { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
+ c->zstd_workspace_size,
+ zstd_dctx_workspace_bound() },
+ }, *i;
+ bool have_compressed = false;
+
+ for (i = compression_types;
+ i < compression_types + ARRAY_SIZE(compression_types);
+ i++)
+ have_compressed |= (features & (1 << i->feature)) != 0;
+
+ if (!have_compressed)
+ return 0;
+
+ if (!mempool_initialized(&c->compression_bounce[READ]) &&
+ mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
+ 1, c->opts.encoded_extent_max))
+ return -BCH_ERR_ENOMEM_compression_bounce_read_init;
+
+ if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
+ mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
+ 1, c->opts.encoded_extent_max))
+ return -BCH_ERR_ENOMEM_compression_bounce_write_init;
+
+ for (i = compression_types;
+ i < compression_types + ARRAY_SIZE(compression_types);
+ i++) {
+ decompress_workspace_size =
+ max(decompress_workspace_size, i->decompress_workspace);
+
+ if (!(features & (1 << i->feature)))
+ continue;
+
+ if (mempool_initialized(&c->compress_workspace[i->type]))
+ continue;
+
+ if (mempool_init_kvpmalloc_pool(
+ &c->compress_workspace[i->type],
+ 1, i->compress_workspace))
+ return -BCH_ERR_ENOMEM_compression_workspace_init;
+ }
+
+ if (!mempool_initialized(&c->decompress_workspace) &&
+ mempool_init_kvpmalloc_pool(&c->decompress_workspace,
+ 1, decompress_workspace_size))
+ return -BCH_ERR_ENOMEM_decompression_workspace_init;
+
+ return 0;
+}
+
+static u64 compression_opt_to_feature(unsigned v)
+{
+ unsigned type = bch2_compression_decode(v).type;
+
+ return BIT_ULL(bch2_compression_opt_to_feature[type]);
+}
+
+int bch2_fs_compress_init(struct bch_fs *c)
+{
+ u64 f = c->sb.features;
+
+ f |= compression_opt_to_feature(c->opts.compression);
+ f |= compression_opt_to_feature(c->opts.background_compression);
+
+ return __bch2_fs_compress_init(c, f);
+}
+
+int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
+ struct printbuf *err)
+{
+ char *val = kstrdup(_val, GFP_KERNEL);
+ char *p = val, *type_str, *level_str;
+ struct bch_compression_opt opt = { 0 };
+ int ret;
+
+ if (!val)
+ return -ENOMEM;
+
+ type_str = strsep(&p, ":");
+ level_str = p;
+
+ ret = match_string(bch2_compression_opts, -1, type_str);
+ if (ret < 0 && err)
+ prt_str(err, "invalid compression type");
+ if (ret < 0)
+ goto err;
+
+ opt.type = ret;
+
+ if (level_str) {
+ unsigned level;
+
+ ret = kstrtouint(level_str, 10, &level);
+ if (!ret && !opt.type && level)
+ ret = -EINVAL;
+ if (!ret && level > 15)
+ ret = -EINVAL;
+ if (ret < 0 && err)
+ prt_str(err, "invalid compression level");
+ if (ret < 0)
+ goto err;
+
+ opt.level = level;
+ }
+
+ *res = bch2_compression_encode(opt);
+err:
+ kfree(val);
+ return ret;
+}
+
+void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
+{
+ struct bch_compression_opt opt = bch2_compression_decode(v);
+
+ if (opt.type < BCH_COMPRESSION_OPT_NR)
+ prt_str(out, bch2_compression_opts[opt.type]);
+ else
+ prt_printf(out, "(unknown compression opt %u)", opt.type);
+ if (opt.level)
+ prt_printf(out, ":%u", opt.level);
+}
+
+void bch2_opt_compression_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_sb *sb,
+ u64 v)
+{
+ return bch2_compression_opt_to_text(out, v);
+}
+
+int bch2_opt_compression_validate(u64 v, struct printbuf *err)
+{
+ if (!bch2_compression_opt_valid(v)) {
+ prt_printf(err, "invalid compression opt %llu", v);
+ return -BCH_ERR_invalid_sb_opt_compression;
+ }
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/compress.h b/c_src/libbcachefs/compress.h
new file mode 100644
index 00000000..607fd5e2
--- /dev/null
+++ b/c_src/libbcachefs/compress.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_COMPRESS_H
+#define _BCACHEFS_COMPRESS_H
+
+#include "extents_types.h"
+
+static const unsigned __bch2_compression_opt_to_type[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t,
+ BCH_COMPRESSION_OPTS()
+#undef x
+};
+
+struct bch_compression_opt {
+ u8 type:4,
+ level:4;
+};
+
+static inline struct bch_compression_opt __bch2_compression_decode(unsigned v)
+{
+ return (struct bch_compression_opt) {
+ .type = v & 15,
+ .level = v >> 4,
+ };
+}
+
+static inline bool bch2_compression_opt_valid(unsigned v)
+{
+ struct bch_compression_opt opt = __bch2_compression_decode(v);
+
+ return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level);
+}
+
+static inline struct bch_compression_opt bch2_compression_decode(unsigned v)
+{
+ return bch2_compression_opt_valid(v)
+ ? __bch2_compression_decode(v)
+ : (struct bch_compression_opt) { 0 };
+}
+
+static inline unsigned bch2_compression_encode(struct bch_compression_opt opt)
+{
+ return opt.type|(opt.level << 4);
+}
+
+static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
+{
+ return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
+}
+
+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
+ struct bch_extent_crc_unpacked *);
+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
+ struct bvec_iter, struct bch_extent_crc_unpacked);
+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
+ struct bio *, size_t *, unsigned);
+
+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned);
+void bch2_fs_compress_exit(struct bch_fs *);
+int bch2_fs_compress_init(struct bch_fs *);
+
+void bch2_compression_opt_to_text(struct printbuf *, u64);
+
+int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
+void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+int bch2_opt_compression_validate(u64, struct printbuf *);
+
+#define bch2_opt_compression (struct bch_opt_fn) { \
+ .parse = bch2_opt_compression_parse, \
+ .to_text = bch2_opt_compression_to_text, \
+ .validate = bch2_opt_compression_validate, \
+}
+
+#endif /* _BCACHEFS_COMPRESS_H */
diff --git a/c_src/libbcachefs/counters.c b/c_src/libbcachefs/counters.c
new file mode 100644
index 00000000..02a996e0
--- /dev/null
+++ b/c_src/libbcachefs/counters.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "super-io.h"
+#include "counters.h"
+
+/* BCH_SB_FIELD_counters */
+
+static const char * const bch2_counter_names[] = {
+#define x(t, n, ...) (#t),
+ BCH_PERSISTENT_COUNTERS()
+#undef x
+ NULL
+};
+
+static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs)
+{
+ if (!ctrs)
+ return 0;
+
+ return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0];
+};
+
+static int bch2_sb_counters_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ return 0;
+};
+
+static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_counters *ctrs = field_to_type(f, counters);
+ unsigned int i;
+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
+
+ for (i = 0; i < nr; i++) {
+ if (i < BCH_COUNTER_NR)
+ prt_printf(out, "%s ", bch2_counter_names[i]);
+ else
+ prt_printf(out, "(unknown)");
+
+ prt_tab(out);
+ prt_printf(out, "%llu", le64_to_cpu(ctrs->d[i]));
+ prt_newline(out);
+ }
+};
+
+int bch2_sb_counters_to_cpu(struct bch_fs *c)
+{
+ struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters);
+ unsigned int i;
+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
+ u64 val = 0;
+
+ for (i = 0; i < BCH_COUNTER_NR; i++)
+ c->counters_on_mount[i] = 0;
+
+ for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) {
+ val = le64_to_cpu(ctrs->d[i]);
+ percpu_u64_set(&c->counters[i], val);
+ c->counters_on_mount[i] = val;
+ }
+ return 0;
+};
+
+int bch2_sb_counters_from_cpu(struct bch_fs *c)
+{
+ struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters);
+ struct bch_sb_field_counters *ret;
+ unsigned int i;
+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
+
+ if (nr < BCH_COUNTER_NR) {
+ ret = bch2_sb_field_resize(&c->disk_sb, counters,
+ sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR);
+
+ if (ret) {
+ ctrs = ret;
+ nr = bch2_sb_counter_nr_entries(ctrs);
+ }
+ }
+
+
+ for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++)
+ ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i]));
+ return 0;
+}
+
+void bch2_fs_counters_exit(struct bch_fs *c)
+{
+ free_percpu(c->counters);
+}
+
+int bch2_fs_counters_init(struct bch_fs *c)
+{
+ c->counters = __alloc_percpu(sizeof(u64) * BCH_COUNTER_NR, sizeof(u64));
+ if (!c->counters)
+ return -BCH_ERR_ENOMEM_fs_counters_init;
+
+ return bch2_sb_counters_to_cpu(c);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_counters = {
+ .validate = bch2_sb_counters_validate,
+ .to_text = bch2_sb_counters_to_text,
+};
diff --git a/c_src/libbcachefs/counters.h b/c_src/libbcachefs/counters.h
new file mode 100644
index 00000000..4778aa19
--- /dev/null
+++ b/c_src/libbcachefs/counters.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_COUNTERS_H
+#define _BCACHEFS_COUNTERS_H
+
+#include "bcachefs.h"
+#include "super-io.h"
+
+
+int bch2_sb_counters_to_cpu(struct bch_fs *);
+int bch2_sb_counters_from_cpu(struct bch_fs *);
+
+void bch2_fs_counters_exit(struct bch_fs *);
+int bch2_fs_counters_init(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_counters;
+
+#endif // _BCACHEFS_COUNTERS_H
diff --git a/c_src/libbcachefs/darray.c b/c_src/libbcachefs/darray.c
new file mode 100644
index 00000000..ac35b8b7
--- /dev/null
+++ b/c_src/libbcachefs/darray.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include "darray.h"
+
+int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
+{
+ if (new_size > d->size) {
+ new_size = roundup_pow_of_two(new_size);
+
+ void *data = kvmalloc_array(new_size, element_size, gfp);
+ if (!data)
+ return -ENOMEM;
+
+ memcpy(data, d->data, d->size * element_size);
+ if (d->data != d->preallocated)
+ kvfree(d->data);
+ d->data = data;
+ d->size = new_size;
+ }
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/darray.h b/c_src/libbcachefs/darray.h
new file mode 100644
index 00000000..4b340d13
--- /dev/null
+++ b/c_src/libbcachefs/darray.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DARRAY_H
+#define _BCACHEFS_DARRAY_H
+
+/*
+ * Dynamic arrays:
+ *
+ * Inspired by CCAN's darray
+ */
+
+#include <linux/slab.h>
+
+#define DARRAY_PREALLOCATED(_type, _nr) \
+struct { \
+ size_t nr, size; \
+ _type *data; \
+ _type preallocated[_nr]; \
+}
+
+#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
+
+typedef DARRAY(char) darray_char;
+typedef DARRAY(char *) darray_str;
+
+int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
+
+static inline int __darray_resize(darray_char *d, size_t element_size,
+ size_t new_size, gfp_t gfp)
+{
+ return unlikely(new_size > d->size)
+ ? __bch2_darray_resize(d, element_size, new_size, gfp)
+ : 0;
+}
+
+#define darray_resize_gfp(_d, _new_size, _gfp) \
+ unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))
+
+#define darray_resize(_d, _new_size) \
+ darray_resize_gfp(_d, _new_size, GFP_KERNEL)
+
+static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
+{
+ return __darray_resize(d, t_size, d->nr + more, gfp);
+}
+
+#define darray_make_room_gfp(_d, _more, _gfp) \
+ __darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
+
+#define darray_make_room(_d, _more) \
+ darray_make_room_gfp(_d, _more, GFP_KERNEL)
+
+#define darray_room(_d) ((_d).size - (_d).nr)
+
+#define darray_top(_d) ((_d).data[(_d).nr])
+
+#define darray_push_gfp(_d, _item, _gfp) \
+({ \
+ int _ret = darray_make_room_gfp((_d), 1, _gfp); \
+ \
+ if (!_ret) \
+ (_d)->data[(_d)->nr++] = (_item); \
+ _ret; \
+})
+
+#define darray_push(_d, _item) darray_push_gfp(_d, _item, GFP_KERNEL)
+
+#define darray_pop(_d) ((_d)->data[--(_d)->nr])
+
+#define darray_first(_d) ((_d).data[0])
+#define darray_last(_d) ((_d).data[(_d).nr - 1])
+
+#define darray_insert_item(_d, pos, _item) \
+({ \
+ size_t _pos = (pos); \
+ int _ret = darray_make_room((_d), 1); \
+ \
+ if (!_ret) \
+ array_insert_item((_d)->data, (_d)->nr, _pos, (_item)); \
+ _ret; \
+})
+
+#define darray_remove_item(_d, _pos) \
+ array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
+
+#define __darray_for_each(_d, _i) \
+ for ((_i) = (_d).data; _i < (_d).data + (_d).nr; _i++)
+
+#define darray_for_each(_d, _i) \
+ for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++)
+
+#define darray_for_each_reverse(_d, _i) \
+ for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
+
+#define darray_init(_d) \
+do { \
+ (_d)->nr = 0; \
+ (_d)->size = ARRAY_SIZE((_d)->preallocated); \
+ (_d)->data = (_d)->size ? (_d)->preallocated : NULL; \
+} while (0)
+
+#define darray_exit(_d) \
+do { \
+ if (!ARRAY_SIZE((_d)->preallocated) || \
+ (_d)->data != (_d)->preallocated) \
+ kvfree((_d)->data); \
+ darray_init(_d); \
+} while (0)
+
+#endif /* _BCACHEFS_DARRAY_H */
diff --git a/c_src/libbcachefs/data_update.c b/c_src/libbcachefs/data_update.c
new file mode 100644
index 00000000..6f13477f
--- /dev/null
+++ b/c_src/libbcachefs/data_update.c
@@ -0,0 +1,665 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "data_update.h"
+#include "ec.h"
+#include "error.h"
+#include "extents.h"
+#include "io_write.h"
+#include "keylist.h"
+#include "move.h"
+#include "nocow_locking.h"
+#include "rebalance.h"
+#include "subvolume.h"
+#include "trace.h"
+
+static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
+{
+ if (trace_move_extent_finish_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_move_extent_finish(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+static void trace_move_extent_fail2(struct data_update *m,
+ struct bkey_s_c new,
+ struct bkey_s_c wrote,
+ struct bkey_i *insert,
+ const char *msg)
+{
+ struct bch_fs *c = m->op.c;
+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
+ const union bch_extent_entry *entry;
+ struct bch_extent_ptr *ptr;
+ struct extent_ptr_decoded p;
+ struct printbuf buf = PRINTBUF;
+ unsigned i, rewrites_found = 0;
+
+ if (!trace_move_extent_fail_enabled())
+ return;
+
+ prt_str(&buf, msg);
+
+ if (insert) {
+ i = 0;
+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
+ !ptr->cached)
+ rewrites_found |= 1U << i;
+ i++;
+ }
+ }
+
+ prt_printf(&buf, "\nrewrite ptrs: %u%u%u%u",
+ (m->data_opts.rewrite_ptrs & (1 << 0)) != 0,
+ (m->data_opts.rewrite_ptrs & (1 << 1)) != 0,
+ (m->data_opts.rewrite_ptrs & (1 << 2)) != 0,
+ (m->data_opts.rewrite_ptrs & (1 << 3)) != 0);
+
+ prt_printf(&buf, "\nrewrites found: %u%u%u%u",
+ (rewrites_found & (1 << 0)) != 0,
+ (rewrites_found & (1 << 1)) != 0,
+ (rewrites_found & (1 << 2)) != 0,
+ (rewrites_found & (1 << 3)) != 0);
+
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, new);
+
+ prt_str(&buf, "\nwrote: ");
+ bch2_bkey_val_to_text(&buf, c, wrote);
+
+ if (insert) {
+ prt_str(&buf, "\ninsert: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+ }
+
+ trace_move_extent_fail(c, buf.buf);
+ printbuf_exit(&buf);
+}
+
+static int __bch2_data_update_index_update(struct btree_trans *trans,
+ struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct btree_iter iter;
+ struct data_update *m =
+ container_of(op, struct data_update, op);
+ struct keylist *keys = &op->insert_keys;
+ struct bkey_buf _new, _insert;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&_new);
+ bch2_bkey_buf_init(&_insert);
+ bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
+
+ bch2_trans_iter_init(trans, &iter, m->btree_id,
+ bkey_start_pos(&bch2_keylist_front(keys)->k),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ while (1) {
+ struct bkey_s_c k;
+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
+ struct bkey_i *insert = NULL;
+ struct bkey_i_extent *new;
+ const union bch_extent_entry *entry_c;
+ union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bch_extent_ptr *ptr;
+ const struct bch_extent_ptr *ptr_c;
+ struct bpos next_pos;
+ bool should_check_enospc;
+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
+ unsigned rewrites_found = 0, durability, i;
+
+ bch2_trans_begin(trans);
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ new = bkey_i_to_extent(bch2_keylist_front(keys));
+
+ if (!bch2_extents_match(k, old)) {
+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i),
+ NULL, "no match:");
+ goto nowork;
+ }
+
+ bkey_reassemble(_insert.k, k);
+ insert = _insert.k;
+
+ bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
+ new = bkey_i_to_extent(_new.k);
+ bch2_cut_front(iter.pos, &new->k_i);
+
+ bch2_cut_front(iter.pos, insert);
+ bch2_cut_back(new->k.p, insert);
+ bch2_cut_back(insert->k.p, &new->k_i);
+
+ /*
+ * @old: extent that we read from
+ * @insert: key that we're going to update, initialized from
+ * extent currently in btree - same as @old unless we raced with
+ * other updates
+ * @new: extent with new pointers that we'll be adding to @insert
+ *
+ * Fist, drop rewrite_ptrs from @new:
+ */
+ i = 0;
+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) {
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
+ !ptr->cached) {
+ bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
+ rewrites_found |= 1U << i;
+ }
+ i++;
+ }
+
+ if (m->data_opts.rewrite_ptrs &&
+ !rewrites_found &&
+ bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) {
+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
+ goto nowork;
+ }
+
+ /*
+ * A replica that we just wrote might conflict with a replica
+ * that we want to keep, due to racing with another move:
+ */
+restart_drop_conflicting_replicas:
+ extent_for_each_ptr(extent_i_to_s(new), ptr)
+ if ((ptr_c = bch2_bkey_has_device_c(bkey_i_to_s_c(insert), ptr->dev)) &&
+ !ptr_c->cached) {
+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(&new->k_i), ptr);
+ goto restart_drop_conflicting_replicas;
+ }
+
+ if (!bkey_val_u64s(&new->k)) {
+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
+ goto nowork;
+ }
+
+ /* Now, drop pointers that conflict with what we just wrote: */
+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
+ if ((ptr = bch2_bkey_has_device(bkey_i_to_s(insert), p.ptr.dev)))
+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
+
+ durability = bch2_bkey_durability(c, bkey_i_to_s_c(insert)) +
+ bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
+
+ /* Now, drop excess replicas: */
+restart_drop_extra_replicas:
+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
+ unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
+
+ if (!p.ptr.cached &&
+ durability - ptr_durability >= m->op.opts.data_replicas) {
+ durability -= ptr_durability;
+
+ bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
+ goto restart_drop_extra_replicas;
+ }
+ }
+
+ /* Finally, add the pointers we just wrote: */
+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
+ bch2_extent_ptr_decoded_append(insert, &p);
+
+ bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
+ bch2_extent_normalize(c, bkey_i_to_s(insert));
+
+ ret = bch2_sum_sector_overwrites(trans, &iter, insert,
+ &should_check_enospc,
+ &i_sectors_delta,
+ &disk_sectors_delta);
+ if (ret)
+ goto err;
+
+ if (disk_sectors_delta > (s64) op->res.sectors) {
+ ret = bch2_disk_reservation_add(c, &op->res,
+ disk_sectors_delta - op->res.sectors,
+ !should_check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
+ if (ret)
+ goto out;
+ }
+
+ next_pos = insert->k.p;
+
+ /*
+ * Check for nonce offset inconsistency:
+ * This is debug code - we've been seeing this bug rarely, and
+ * it's been hard to reproduce, so this should give us some more
+ * information when it does occur:
+ */
+ struct printbuf err = PRINTBUF;
+ int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+ printbuf_exit(&err);
+
+ if (invalid) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "about to insert invalid key in data update path");
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_fatal_error(c);
+ goto out;
+ }
+
+ if (trace_data_update_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ trace_data_update(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+ k.k->p, bkey_start_pos(&insert->k)) ?:
+ bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+ k.k->p, insert->k.p) ?:
+ bch2_bkey_set_needs_rebalance(c, insert,
+ op->opts.background_target,
+ op->opts.background_compression) ?:
+ bch2_trans_update(trans, &iter, insert,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, &op->res,
+ NULL,
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc|
+ m->data_opts.btree_insert_flags);
+ if (!ret) {
+ bch2_btree_iter_set_pos(&iter, next_pos);
+
+ this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
+ trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i));
+ }
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+ if (ret)
+ break;
+next:
+ while (bkey_ge(iter.pos, bch2_keylist_front(keys)->k.p)) {
+ bch2_keylist_pop_front(keys);
+ if (bch2_keylist_empty(keys))
+ goto out;
+ }
+ continue;
+nowork:
+ if (m->stats) {
+ BUG_ON(k.k->p.offset <= iter.pos.offset);
+ atomic64_inc(&m->stats->keys_raced);
+ atomic64_add(k.k->p.offset - iter.pos.offset,
+ &m->stats->sectors_raced);
+ }
+
+ count_event(c, move_extent_fail);
+
+ bch2_btree_iter_advance(&iter);
+ goto next;
+ }
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_bkey_buf_exit(&_insert, c);
+ bch2_bkey_buf_exit(&_new, c);
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
+ return ret;
+}
+
+int bch2_data_update_index_update(struct bch_write_op *op)
+{
+ return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
+}
+
+void bch2_data_update_read_done(struct data_update *m,
+ struct bch_extent_crc_unpacked crc)
+{
+ /* write bio must own pages: */
+ BUG_ON(!m->op.wbio.bio.bi_vcnt);
+
+ m->op.crc = crc;
+ m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
+
+ closure_call(&m->op.cl, bch2_write, NULL, NULL);
+}
+
+void bch2_data_update_exit(struct data_update *update)
+{
+ struct bch_fs *c = update->op.c;
+ struct bkey_ptrs_c ptrs =
+ bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k));
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (c->opts.nocow_enabled)
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, ptr), 0);
+ percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref);
+ }
+
+ bch2_bkey_buf_exit(&update->k, c);
+ bch2_disk_reservation_put(c, &update->op.res);
+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
+}
+
+static void bch2_update_unwritten_extent(struct btree_trans *trans,
+ struct data_update *update)
+{
+ struct bch_fs *c = update->op.c;
+ struct bio *bio = &update->op.wbio.bio;
+ struct bkey_i_extent *e;
+ struct write_point *wp;
+ struct closure cl;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ closure_init_stack(&cl);
+ bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys);
+
+ while (bio_sectors(bio)) {
+ unsigned sectors = bio_sectors(bio);
+
+ bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos,
+ BTREE_ITER_SLOTS);
+ ret = lockrestart_do(trans, ({
+ k = bch2_btree_iter_peek_slot(&iter);
+ bkey_err(k);
+ }));
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret || !bch2_extents_match(k, bkey_i_to_s_c(update->k.k)))
+ break;
+
+ e = bkey_extent_init(update->op.insert_keys.top);
+ e->k.p = update->op.pos;
+
+ ret = bch2_alloc_sectors_start_trans(trans,
+ update->op.target,
+ false,
+ update->op.write_point,
+ &update->op.devs_have,
+ update->op.nr_replicas,
+ update->op.nr_replicas,
+ update->op.watermark,
+ 0, &cl, &wp);
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ continue;
+ }
+
+ bch_err_fn_ratelimited(c, ret);
+
+ if (ret)
+ return;
+
+ sectors = min(sectors, wp->sectors_free);
+
+ bch2_key_resize(&e->k, sectors);
+
+ bch2_open_bucket_get(c, wp, &update->op.open_buckets);
+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
+ bch2_alloc_sectors_done(c, wp);
+
+ bio_advance(bio, sectors << 9);
+ update->op.pos.offset += sectors;
+
+ extent_for_each_ptr(extent_i_to_s(e), ptr)
+ ptr->unwritten = true;
+ bch2_keylist_push(&update->op.insert_keys);
+
+ ret = __bch2_data_update_index_update(trans, &update->op);
+
+ bch2_open_buckets_put(c, &update->op.open_buckets);
+
+ if (ret)
+ break;
+ }
+
+ if (closure_nr_remaining(&cl) != 1) {
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ }
+}
+
+int bch2_extent_drop_ptrs(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct data_update_opts data_opts)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *n;
+ int ret;
+
+ n = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ while (data_opts.kill_ptrs) {
+ unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+ struct bch_extent_ptr *ptr;
+
+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+ data_opts.kill_ptrs ^= 1U << drop;
+ }
+
+ /*
+ * If the new extent no longer has any pointers, bch2_extent_normalize()
+ * will do the appropriate thing with it (turning it into a
+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
+ */
+ bch2_extent_normalize(c, bkey_i_to_s(n));
+
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+ n->k.size = 0;
+
+ return bch2_trans_relock(trans) ?:
+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+}
+
+int bch2_data_update_init(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct moving_context *ctxt,
+ struct data_update *m,
+ struct write_point_specifier wp,
+ struct bch_io_opts io_opts,
+ struct data_update_opts data_opts,
+ enum btree_id btree_id,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
+ unsigned ptrs_locked = 0;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&m->k);
+ bch2_bkey_buf_reassemble(&m->k, c, k);
+ m->btree_id = btree_id;
+ m->data_opts = data_opts;
+ m->ctxt = ctxt;
+ m->stats = ctxt ? ctxt->stats : NULL;
+
+ bch2_write_op_init(&m->op, c, io_opts);
+ m->op.pos = bkey_start_pos(k.k);
+ m->op.version = k.k->version;
+ m->op.target = data_opts.target;
+ m->op.write_point = wp;
+ m->op.nr_replicas = 0;
+ m->op.flags |= BCH_WRITE_PAGES_STABLE|
+ BCH_WRITE_PAGES_OWNED|
+ BCH_WRITE_DATA_ENCODED|
+ BCH_WRITE_MOVE|
+ m->data_opts.write_flags;
+ m->op.compression_opt = io_opts.background_compression ?: io_opts.compression;
+ m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
+
+ bkey_for_each_ptr(ptrs, ptr)
+ percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
+
+ unsigned durability_have = 0, durability_removing = 0;
+
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ bool locked;
+
+ if (((1U << i) & m->data_opts.rewrite_ptrs)) {
+ BUG_ON(p.ptr.cached);
+
+ if (crc_is_compressed(p.crc))
+ reserve_sectors += k.k->size;
+
+ m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
+ durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+ } else if (!p.ptr.cached &&
+ !((1U << i) & m->data_opts.kill_ptrs)) {
+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+ durability_have += bch2_extent_ptr_durability(c, &p);
+ }
+
+ /*
+ * op->csum_type is normally initialized from the fs/file's
+ * current options - but if an extent is encrypted, we require
+ * that it stays encrypted:
+ */
+ if (bch2_csum_type_is_encryption(p.crc.csum_type)) {
+ m->op.nonce = p.crc.nonce + p.crc.offset;
+ m->op.csum_type = p.crc.csum_type;
+ }
+
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+ m->op.incompressible = true;
+
+ if (c->opts.nocow_enabled) {
+ if (ctxt) {
+ move_ctxt_wait_event(ctxt,
+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0)) ||
+ (!atomic_read(&ctxt->read_sectors) &&
+ !atomic_read(&ctxt->write_sectors)));
+
+ if (!locked)
+ bch2_bucket_nocow_lock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0);
+ } else {
+ if (!bch2_bucket_nocow_trylock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0)) {
+ ret = -BCH_ERR_nocow_lock_blocked;
+ goto err;
+ }
+ }
+ ptrs_locked |= (1U << i);
+ }
+
+ i++;
+ }
+
+ /*
+ * If current extent durability is less than io_opts.data_replicas,
+ * we're not trying to rereplicate the extent up to data_replicas here -
+ * unless extra_replicas was specified
+ *
+ * Increasing replication is an explicit operation triggered by
+ * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+ */
+ if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
+ durability_have >= io_opts.data_replicas) {
+ m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+ m->data_opts.rewrite_ptrs = 0;
+ /* if iter == NULL, it's just a promote */
+ if (iter)
+ ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
+ goto done;
+ }
+
+ m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->data_opts.extra_replicas;
+ m->op.nr_replicas_required = m->op.nr_replicas;
+
+ BUG_ON(!m->op.nr_replicas);
+
+ if (reserve_sectors) {
+ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
+ m->data_opts.extra_replicas
+ ? 0
+ : BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ goto err;
+ }
+
+ if (bkey_extent_is_unwritten(k)) {
+ bch2_update_unwritten_extent(trans, m);
+ goto done;
+ }
+
+ return 0;
+err:
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if ((1U << i) & ptrs_locked)
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0);
+ percpu_ref_put(&bch_dev_bkey_exists(c, p.ptr.dev)->ref);
+ i++;
+ }
+
+ bch2_bkey_buf_exit(&m->k, c);
+ bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
+ return ret;
+done:
+ bch2_data_update_exit(m);
+ return ret ?: -BCH_ERR_data_update_done;
+}
+
+void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned i = 0;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) {
+ opts->kill_ptrs |= 1U << i;
+ opts->rewrite_ptrs ^= 1U << i;
+ }
+
+ i++;
+ }
+}
diff --git a/c_src/libbcachefs/data_update.h b/c_src/libbcachefs/data_update.h
new file mode 100644
index 00000000..991095bb
--- /dev/null
+++ b/c_src/libbcachefs/data_update.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BCACHEFS_DATA_UPDATE_H
+#define _BCACHEFS_DATA_UPDATE_H
+
+#include "bkey_buf.h"
+#include "io_write_types.h"
+
+struct moving_context;
+
+struct data_update_opts {
+ unsigned rewrite_ptrs;
+ unsigned kill_ptrs;
+ u16 target;
+ u8 extra_replicas;
+ unsigned btree_insert_flags;
+ unsigned write_flags;
+};
+
+struct data_update {
+ /* extent being updated: */
+ enum btree_id btree_id;
+ struct bkey_buf k;
+ struct data_update_opts data_opts;
+ struct moving_context *ctxt;
+ struct bch_move_stats *stats;
+ struct bch_write_op op;
+};
+
+int bch2_data_update_index_update(struct bch_write_op *);
+
+void bch2_data_update_read_done(struct data_update *,
+ struct bch_extent_crc_unpacked);
+
+int bch2_extent_drop_ptrs(struct btree_trans *,
+ struct btree_iter *,
+ struct bkey_s_c,
+ struct data_update_opts);
+
+void bch2_data_update_exit(struct data_update *);
+int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+ struct moving_context *,
+ struct data_update *,
+ struct write_point_specifier,
+ struct bch_io_opts, struct data_update_opts,
+ enum btree_id, struct bkey_s_c);
+void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
+
+#endif /* _BCACHEFS_DATA_UPDATE_H */
diff --git a/c_src/libbcachefs/debug.c b/c_src/libbcachefs/debug.c
new file mode 100644
index 00000000..d6418948
--- /dev/null
+++ b/c_src/libbcachefs/debug.c
@@ -0,0 +1,935 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Assorted bcachefs debug code
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "debug.h"
+#include "error.h"
+#include "extents.h"
+#include "fsck.h"
+#include "inode.h"
+#include "super.h"
+
+#include <linux/console.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+
+static struct dentry *bch_debug;
+
+static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
+ struct extent_ptr_decoded pick)
+{
+ struct btree *v = c->verify_data;
+ struct btree_node *n_ondisk = c->verify_ondisk;
+ struct btree_node *n_sorted = c->verify_data->data;
+ struct bset *sorted, *inmemory = &b->data->keys;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ struct bio *bio;
+ bool failed = false, saw_error = false;
+
+ if (!bch2_dev_get_ioref(ca, READ))
+ return false;
+
+ bio = bio_alloc_bioset(ca->disk_sb.bdev,
+ buf_pages(n_sorted, btree_bytes(c)),
+ REQ_OP_READ|REQ_META,
+ GFP_NOFS,
+ &c->btree_bio);
+ bio->bi_iter.bi_sector = pick.ptr.offset;
+ bch2_bio_map(bio, n_sorted, btree_bytes(c));
+
+ submit_bio_wait(bio);
+
+ bio_put(bio);
+ percpu_ref_put(&ca->io_ref);
+
+ memcpy(n_ondisk, n_sorted, btree_bytes(c));
+
+ v->written = 0;
+ if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
+ return false;
+
+ n_sorted = c->verify_data->data;
+ sorted = &n_sorted->keys;
+
+ if (inmemory->u64s != sorted->u64s ||
+ memcmp(inmemory->start,
+ sorted->start,
+ vstruct_end(inmemory) - (void *) inmemory->start)) {
+ unsigned offset = 0, sectors;
+ struct bset *i;
+ unsigned j;
+
+ console_lock();
+
+ printk(KERN_ERR "*** in memory:\n");
+ bch2_dump_bset(c, b, inmemory, 0);
+
+ printk(KERN_ERR "*** read back in:\n");
+ bch2_dump_bset(c, v, sorted, 0);
+
+ while (offset < v->written) {
+ if (!offset) {
+ i = &n_ondisk->keys;
+ sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
+ c->block_bits;
+ } else {
+ struct btree_node_entry *bne =
+ (void *) n_ondisk + (offset << 9);
+ i = &bne->keys;
+
+ sectors = vstruct_blocks(bne, c->block_bits) <<
+ c->block_bits;
+ }
+
+ printk(KERN_ERR "*** on disk block %u:\n", offset);
+ bch2_dump_bset(c, b, i, offset);
+
+ offset += sectors;
+ }
+
+ for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
+ if (inmemory->_data[j] != sorted->_data[j])
+ break;
+
+ console_unlock();
+ bch_err(c, "verify failed at key %u", j);
+
+ failed = true;
+ }
+
+ if (v->written != b->written) {
+ bch_err(c, "written wrong: expected %u, got %u",
+ b->written, v->written);
+ failed = true;
+ }
+
+ return failed;
+}
+
+void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
+{
+ struct bkey_ptrs_c ptrs;
+ struct extent_ptr_decoded p;
+ const union bch_extent_entry *entry;
+ struct btree *v;
+ struct bset *inmemory = &b->data->keys;
+ struct bkey_packed *k;
+ bool failed = false;
+
+ if (c->opts.nochanges)
+ return;
+
+ bch2_btree_node_io_lock(b);
+ mutex_lock(&c->verify_lock);
+
+ if (!c->verify_ondisk) {
+ c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+ if (!c->verify_ondisk)
+ goto out;
+ }
+
+ if (!c->verify_data) {
+ c->verify_data = __bch2_btree_node_mem_alloc(c);
+ if (!c->verify_data)
+ goto out;
+
+ list_del_init(&c->verify_data->list);
+ }
+
+ BUG_ON(b->nsets != 1);
+
+ for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k))
+ if (k->type == KEY_TYPE_btree_ptr_v2)
+ ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0;
+
+ v = c->verify_data;
+ bkey_copy(&v->key, &b->key);
+ v->c.level = b->c.level;
+ v->c.btree_id = b->c.btree_id;
+ bch2_btree_keys_init(v);
+
+ ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
+ bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry)
+ failed |= bch2_btree_verify_replica(c, b, p);
+
+ if (failed) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
+ printbuf_exit(&buf);
+ }
+out:
+ mutex_unlock(&c->verify_lock);
+ bch2_btree_node_io_unlock(b);
+}
+
+void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
+ const struct btree *b)
+{
+ struct btree_node *n_ondisk = NULL;
+ struct extent_ptr_decoded pick;
+ struct bch_dev *ca;
+ struct bio *bio = NULL;
+ unsigned offset = 0;
+ int ret;
+
+ if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) {
+ prt_printf(out, "error getting device to read from: invalid device\n");
+ return;
+ }
+
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ if (!bch2_dev_get_ioref(ca, READ)) {
+ prt_printf(out, "error getting device to read from: not online\n");
+ return;
+ }
+
+ n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+ if (!n_ondisk) {
+ prt_printf(out, "memory allocation failure\n");
+ goto out;
+ }
+
+ bio = bio_alloc_bioset(ca->disk_sb.bdev,
+ buf_pages(n_ondisk, btree_bytes(c)),
+ REQ_OP_READ|REQ_META,
+ GFP_NOFS,
+ &c->btree_bio);
+ bio->bi_iter.bi_sector = pick.ptr.offset;
+ bch2_bio_map(bio, n_ondisk, btree_bytes(c));
+
+ ret = submit_bio_wait(bio);
+ if (ret) {
+ prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret));
+ goto out;
+ }
+
+ while (offset < btree_sectors(c)) {
+ struct bset *i;
+ struct nonce nonce;
+ struct bch_csum csum;
+ struct bkey_packed *k;
+ unsigned sectors;
+
+ if (!offset) {
+ i = &n_ondisk->keys;
+
+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
+ prt_printf(out, "unknown checksum type at offset %u: %llu\n",
+ offset, BSET_CSUM_TYPE(i));
+ goto out;
+ }
+
+ nonce = btree_nonce(i, offset << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk);
+
+ if (bch2_crc_cmp(csum, n_ondisk->csum)) {
+ prt_printf(out, "invalid checksum\n");
+ goto out;
+ }
+
+ bset_encrypt(c, i, offset << 9);
+
+ sectors = vstruct_sectors(n_ondisk, c->block_bits);
+ } else {
+ struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9);
+
+ i = &bne->keys;
+
+ if (i->seq != n_ondisk->keys.seq)
+ break;
+
+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
+ prt_printf(out, "unknown checksum type at offset %u: %llu\n",
+ offset, BSET_CSUM_TYPE(i));
+ goto out;
+ }
+
+ nonce = btree_nonce(i, offset << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
+ if (bch2_crc_cmp(csum, bne->csum)) {
+ prt_printf(out, "invalid checksum");
+ goto out;
+ }
+
+ bset_encrypt(c, i, offset << 9);
+
+ sectors = vstruct_sectors(bne, c->block_bits);
+ }
+
+ prt_printf(out, " offset %u version %u, journal seq %llu\n",
+ offset,
+ le16_to_cpu(i->version),
+ le64_to_cpu(i->journal_seq));
+ offset += sectors;
+
+ printbuf_indent_add(out, 4);
+
+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
+ struct bkey u;
+
+ bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
+ prt_newline(out);
+ }
+
+ printbuf_indent_sub(out, 4);
+ }
+out:
+ if (bio)
+ bio_put(bio);
+ kvpfree(n_ondisk, btree_bytes(c));
+ percpu_ref_put(&ca->io_ref);
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+/* XXX: bch_fs refcounting */
+
+struct dump_iter {
+ struct bch_fs *c;
+ enum btree_id id;
+ struct bpos from;
+ struct bpos prev_node;
+ u64 iter;
+
+ struct printbuf buf;
+
+ char __user *ubuf; /* destination user buffer */
+ size_t size; /* size of requested read */
+ ssize_t ret; /* bytes read so far */
+};
+
+static ssize_t flush_buf(struct dump_iter *i)
+{
+ if (i->buf.pos) {
+ size_t bytes = min_t(size_t, i->buf.pos, i->size);
+ int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes);
+
+ i->ret += copied;
+ i->ubuf += copied;
+ i->size -= copied;
+ i->buf.pos -= copied;
+ memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos);
+
+ if (copied != bytes)
+ return -EFAULT;
+ }
+
+ return i->size ? 0 : i->ret;
+}
+
+static int bch2_dump_open(struct inode *inode, struct file *file)
+{
+ struct btree_debug *bd = inode->i_private;
+ struct dump_iter *i;
+
+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+ if (!i)
+ return -ENOMEM;
+
+ file->private_data = i;
+ i->from = POS_MIN;
+ i->iter = 0;
+ i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]);
+ i->id = bd->id;
+ i->buf = PRINTBUF;
+
+ return 0;
+}
+
+static int bch2_dump_release(struct inode *inode, struct file *file)
+{
+ struct dump_iter *i = file->private_data;
+
+ printbuf_exit(&i->buf);
+ kfree(i);
+ return 0;
+}
+
+static ssize_t bch2_read_btree(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ return flush_buf(i) ?:
+ bch2_trans_run(i->c,
+ for_each_btree_key(trans, iter, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ bch2_bkey_val_to_text(&i->buf, i->c, k);
+ prt_newline(&i->buf);
+ bch2_trans_unlock(trans);
+ i->from = bpos_successor(iter.pos);
+ flush_buf(i);
+ }))) ?:
+ i->ret;
+}
+
+static const struct file_operations btree_debug_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_read_btree,
+};
+
+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct btree *b;
+ ssize_t ret;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
+
+ if (bpos_eq(SPOS_MAX, i->from))
+ return i->ret;
+
+ trans = bch2_trans_get(i->c);
+retry:
+ bch2_trans_begin(trans);
+
+ for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) {
+ bch2_btree_node_to_text(&i->buf, i->c, b);
+ i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
+ ? bpos_successor(b->key.k.p)
+ : b->key.k.p;
+
+ ret = drop_locks_do(trans, flush_buf(i));
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations btree_format_debug_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_read_btree_formats,
+};
+
+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ return flush_buf(i) ?:
+ bch2_trans_run(i->c,
+ for_each_btree_key(trans, iter, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ struct btree_path_level *l =
+ &btree_iter_path(trans, &iter)->l[0];
+ struct bkey_packed *_k =
+ bch2_btree_node_iter_peek(&l->iter, l->b);
+
+ if (bpos_gt(l->b->key.k.p, i->prev_node)) {
+ bch2_btree_node_to_text(&i->buf, i->c, l->b);
+ i->prev_node = l->b->key.k.p;
+ }
+
+ bch2_bfloat_to_text(&i->buf, l->b, _k);
+ bch2_trans_unlock(trans);
+ i->from = bpos_successor(iter.pos);
+ flush_buf(i);
+ }))) ?:
+ i->ret;
+}
+
+static const struct file_operations bfloat_failed_debug_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_read_bfloat_failed,
+};
+
+static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
+ struct btree *b)
+{
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 32);
+
+ prt_printf(out, "%px btree=%s l=%u ",
+ b,
+ bch2_btree_id_str(b->c.btree_id),
+ b->c.level);
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+
+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+ prt_newline(out);
+
+ prt_printf(out, "flags: ");
+ prt_tab(out);
+ prt_bitflags(out, bch2_btree_node_flags, b->flags);
+ prt_newline(out);
+
+ prt_printf(out, "pcpu read locks: ");
+ prt_tab(out);
+ prt_printf(out, "%u", b->c.lock.readers != NULL);
+ prt_newline(out);
+
+ prt_printf(out, "written:");
+ prt_tab(out);
+ prt_printf(out, "%u", b->written);
+ prt_newline(out);
+
+ prt_printf(out, "writes blocked:");
+ prt_tab(out);
+ prt_printf(out, "%u", !list_empty_careful(&b->write_blocked));
+ prt_newline(out);
+
+ prt_printf(out, "will make reachable:");
+ prt_tab(out);
+ prt_printf(out, "%lx", b->will_make_reachable);
+ prt_newline(out);
+
+ prt_printf(out, "journal pin %px:", &b->writes[0].journal);
+ prt_tab(out);
+ prt_printf(out, "%llu", b->writes[0].journal.seq);
+ prt_newline(out);
+
+ prt_printf(out, "journal pin %px:", &b->writes[1].journal);
+ prt_tab(out);
+ prt_printf(out, "%llu", b->writes[1].journal.seq);
+ prt_newline(out);
+
+ printbuf_indent_sub(out, 2);
+}
+
+static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ bool done = false;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ do {
+ struct bucket_table *tbl;
+ struct rhash_head *pos;
+ struct btree *b;
+
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
+
+ rcu_read_lock();
+ i->buf.atomic++;
+ tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
+ &c->btree_cache.table);
+ if (i->iter < tbl->size) {
+ rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
+ bch2_cached_btree_node_to_text(&i->buf, c, b);
+ i->iter++;
+ } else {
+ done = true;
+ }
+ --i->buf.atomic;
+ rcu_read_unlock();
+ } while (!done);
+
+ if (i->buf.allocation_failure)
+ ret = -ENOMEM;
+
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations cached_btree_nodes_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_cached_btree_nodes_read,
+};
+
+static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ struct btree_trans *trans;
+ ssize_t ret = 0;
+ u32 seq;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+restart:
+ seqmutex_lock(&c->btree_trans_lock);
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+
+ if (!task || task->pid <= i->iter)
+ continue;
+
+ closure_get(&trans->ref);
+ seq = seqmutex_seq(&c->btree_trans_lock);
+ seqmutex_unlock(&c->btree_trans_lock);
+
+ ret = flush_buf(i);
+ if (ret) {
+ closure_put(&trans->ref);
+ goto unlocked;
+ }
+
+ bch2_btree_trans_to_text(&i->buf, trans);
+
+ prt_printf(&i->buf, "backtrace:");
+ prt_newline(&i->buf);
+ printbuf_indent_add(&i->buf, 2);
+ bch2_prt_task_backtrace(&i->buf, task, 0);
+ printbuf_indent_sub(&i->buf, 2);
+ prt_newline(&i->buf);
+
+ i->iter = task->pid;
+
+ closure_put(&trans->ref);
+
+ if (!seqmutex_relock(&c->btree_trans_lock, seq))
+ goto restart;
+ }
+ seqmutex_unlock(&c->btree_trans_lock);
+unlocked:
+ if (i->buf.allocation_failure)
+ ret = -ENOMEM;
+
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations btree_transactions_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_transactions_read,
+};
+
+static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ bool done = false;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ do {
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
+ i->iter++;
+ } while (!done);
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations journal_pins_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_journal_pins_read,
+};
+
+static int btree_transaction_stats_open(struct inode *inode, struct file *file)
+{
+ struct bch_fs *c = inode->i_private;
+ struct dump_iter *i;
+
+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+
+ if (!i)
+ return -ENOMEM;
+
+ i->iter = 1;
+ i->c = c;
+ i->buf = PRINTBUF;
+ file->private_data = i;
+
+ return 0;
+}
+
+static int btree_transaction_stats_release(struct inode *inode, struct file *file)
+{
+ struct dump_iter *i = file->private_data;
+
+ printbuf_exit(&i->buf);
+ kfree(i);
+
+ return 0;
+}
+
+static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ while (1) {
+ struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
+ !bch2_btree_transaction_fns[i->iter])
+ break;
+
+ prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]);
+ prt_newline(&i->buf);
+ printbuf_indent_add(&i->buf, 2);
+
+ mutex_lock(&s->lock);
+
+ prt_printf(&i->buf, "Max mem used: %u", s->max_mem);
+ prt_newline(&i->buf);
+
+ prt_printf(&i->buf, "Transaction duration:");
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ bch2_time_stats_to_text(&i->buf, &s->duration);
+ printbuf_indent_sub(&i->buf, 2);
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+ prt_printf(&i->buf, "Lock hold times:");
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
+ printbuf_indent_sub(&i->buf, 2);
+ }
+
+ if (s->max_paths_text) {
+ prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths);
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ prt_str_indented(&i->buf, s->max_paths_text);
+ printbuf_indent_sub(&i->buf, 2);
+ }
+
+ mutex_unlock(&s->lock);
+
+ printbuf_indent_sub(&i->buf, 2);
+ prt_newline(&i->buf);
+ i->iter++;
+ }
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations btree_transaction_stats_op = {
+ .owner = THIS_MODULE,
+ .open = btree_transaction_stats_open,
+ .release = btree_transaction_stats_release,
+ .read = btree_transaction_stats_read,
+};
+
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ struct btree_trans *trans;
+ ssize_t ret = 0;
+ u32 seq;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (i->iter)
+ goto out;
+restart:
+ seqmutex_lock(&c->btree_trans_lock);
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+
+ if (!task || task->pid <= i->iter)
+ continue;
+
+ closure_get(&trans->ref);
+ seq = seqmutex_seq(&c->btree_trans_lock);
+ seqmutex_unlock(&c->btree_trans_lock);
+
+ ret = flush_buf(i);
+ if (ret) {
+ closure_put(&trans->ref);
+ goto out;
+ }
+
+ bch2_check_for_deadlock(trans, &i->buf);
+
+ i->iter = task->pid;
+
+ closure_put(&trans->ref);
+
+ if (!seqmutex_relock(&c->btree_trans_lock, seq))
+ goto restart;
+ }
+ seqmutex_unlock(&c->btree_trans_lock);
+out:
+ if (i->buf.allocation_failure)
+ ret = -ENOMEM;
+
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
+}
+
+static const struct file_operations btree_deadlock_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_deadlock_read,
+};
+
+void bch2_fs_debug_exit(struct bch_fs *c)
+{
+ if (!IS_ERR_OR_NULL(c->fs_debug_dir))
+ debugfs_remove_recursive(c->fs_debug_dir);
+}
+
+void bch2_fs_debug_init(struct bch_fs *c)
+{
+ struct btree_debug *bd;
+ char name[100];
+
+ if (IS_ERR_OR_NULL(bch_debug))
+ return;
+
+ snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
+ c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
+ if (IS_ERR_OR_NULL(c->fs_debug_dir))
+ return;
+
+ debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
+ c->btree_debug, &cached_btree_nodes_ops);
+
+ debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_transactions_ops);
+
+ debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
+ c->btree_debug, &journal_pins_ops);
+
+ debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
+ c, &btree_transaction_stats_op);
+
+ debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_deadlock_ops);
+
+ c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
+ if (IS_ERR_OR_NULL(c->btree_debug_dir))
+ return;
+
+ for (bd = c->btree_debug;
+ bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
+ bd++) {
+ bd->id = bd - c->btree_debug;
+ debugfs_create_file(bch2_btree_id_str(bd->id),
+ 0400, c->btree_debug_dir, bd,
+ &btree_debug_ops);
+
+ snprintf(name, sizeof(name), "%s-formats",
+ bch2_btree_id_str(bd->id));
+
+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
+ &btree_format_debug_ops);
+
+ snprintf(name, sizeof(name), "%s-bfloat-failed",
+ bch2_btree_id_str(bd->id));
+
+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
+ &bfloat_failed_debug_ops);
+ }
+}
+
+#endif
+
+void bch2_debug_exit(void)
+{
+ if (!IS_ERR_OR_NULL(bch_debug))
+ debugfs_remove_recursive(bch_debug);
+}
+
+int __init bch2_debug_init(void)
+{
+ bch_debug = debugfs_create_dir("bcachefs", NULL);
+ return 0;
+}
diff --git a/c_src/libbcachefs/debug.h b/c_src/libbcachefs/debug.h
new file mode 100644
index 00000000..2c37143b
--- /dev/null
+++ b/c_src/libbcachefs/debug.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DEBUG_H
+#define _BCACHEFS_DEBUG_H
+
+#include "bcachefs.h"
+
+struct bio;
+struct btree;
+struct bch_fs;
+
+void __bch2_btree_verify(struct bch_fs *, struct btree *);
+void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *,
+ const struct btree *);
+
+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
+{
+ if (bch2_verify_btree_ondisk)
+ __bch2_btree_verify(c, b);
+}
+
+#ifdef CONFIG_DEBUG_FS
+void bch2_fs_debug_exit(struct bch_fs *);
+void bch2_fs_debug_init(struct bch_fs *);
+#else
+static inline void bch2_fs_debug_exit(struct bch_fs *c) {}
+static inline void bch2_fs_debug_init(struct bch_fs *c) {}
+#endif
+
+void bch2_debug_exit(void);
+int bch2_debug_init(void);
+
+#endif /* _BCACHEFS_DEBUG_H */
diff --git a/c_src/libbcachefs/dirent.c b/c_src/libbcachefs/dirent.c
new file mode 100644
index 00000000..4ae1e9f0
--- /dev/null
+++ b/c_src/libbcachefs/dirent.c
@@ -0,0 +1,603 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "bkey_methods.h"
+#include "btree_update.h"
+#include "extents.h"
+#include "dirent.h"
+#include "fs.h"
+#include "keylist.h"
+#include "str_hash.h"
+#include "subvolume.h"
+
+#include <linux/dcache.h>
+
+static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
+{
+ unsigned bkey_u64s = bkey_val_u64s(d.k);
+ unsigned bkey_bytes = bkey_u64s * sizeof(u64);
+ u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
+#if CPU_BIG_ENDIAN
+ unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
+#else
+ unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
+#endif
+
+ return bkey_bytes -
+ offsetof(struct bch_dirent, d_name) -
+ trailing_nuls;
+}
+
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
+{
+ return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+}
+
+static u64 bch2_dirent_hash(const struct bch_hash_info *info,
+ const struct qstr *name)
+{
+ struct bch_str_hash_ctx ctx;
+
+ bch2_str_hash_init(&ctx, info);
+ bch2_str_hash_update(&ctx, info, name->name, name->len);
+
+ /* [0,2) reserved for dots */
+ return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
+}
+
+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
+{
+ return bch2_dirent_hash(info, key);
+}
+
+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+{
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+ struct qstr name = bch2_dirent_get_name(d);
+
+ return bch2_dirent_hash(info, &name);
+}
+
+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
+ const struct qstr l_name = bch2_dirent_get_name(l);
+ const struct qstr *r_name = _r;
+
+ return !qstr_eq(l_name, *r_name);
+}
+
+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
+{
+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
+ struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
+ const struct qstr l_name = bch2_dirent_get_name(l);
+ const struct qstr r_name = bch2_dirent_get_name(r);
+
+ return !qstr_eq(l_name, r_name);
+}
+
+static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
+{
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+
+ if (d.v->d_type == DT_SUBVOL)
+ return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol;
+ return true;
+}
+
+const struct bch_hash_desc bch2_dirent_hash_desc = {
+ .btree_id = BTREE_ID_dirents,
+ .key_type = KEY_TYPE_dirent,
+ .hash_key = dirent_hash_key,
+ .hash_bkey = dirent_hash_bkey,
+ .cmp_key = dirent_cmp_key,
+ .cmp_bkey = dirent_cmp_bkey,
+ .is_visible = dirent_is_visible,
+};
+
+int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+ struct qstr d_name = bch2_dirent_get_name(d);
+ int ret = 0;
+
+ bkey_fsck_err_on(!d_name.len, c, err,
+ dirent_empty_name,
+ "empty name");
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err,
+ dirent_val_too_big,
+ "value too big (%zu > %u)",
+ bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
+
+ /*
+ * Check new keys don't exceed the max length
+ * (older keys may be larger.)
+ */
+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err,
+ dirent_name_too_long,
+ "dirent name too big (%u > %u)",
+ d_name.len, BCH_NAME_MAX);
+
+ bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err,
+ dirent_name_embedded_nul,
+ "dirent has stray data after name's NUL");
+
+ bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
+ (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err,
+ dirent_name_dot_or_dotdot,
+ "invalid name");
+
+ bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err,
+ dirent_name_has_slash,
+ "name with /");
+
+ bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
+ le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err,
+ dirent_to_itself,
+ "dirent points to own directory");
+fsck_err:
+ return ret;
+}
+
+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+ struct qstr d_name = bch2_dirent_get_name(d);
+
+ prt_printf(out, "%.*s -> %llu type %s",
+ d_name.len,
+ d_name.name,
+ d.v->d_type != DT_SUBVOL
+ ? le64_to_cpu(d.v->d_inum)
+ : le32_to_cpu(d.v->d_child_subvol),
+ bch2_d_type_str(d.v->d_type));
+}
+
+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
+ subvol_inum dir, u8 type,
+ const struct qstr *name, u64 dst)
+{
+ struct bkey_i_dirent *dirent;
+ unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
+
+ if (name->len > BCH_NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ BUG_ON(u64s > U8_MAX);
+
+ dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
+ if (IS_ERR(dirent))
+ return dirent;
+
+ bkey_dirent_init(&dirent->k_i);
+ dirent->k.u64s = u64s;
+
+ if (type != DT_SUBVOL) {
+ dirent->v.d_inum = cpu_to_le64(dst);
+ } else {
+ dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol);
+ dirent->v.d_child_subvol = cpu_to_le32(dst);
+ }
+
+ dirent->v.d_type = type;
+
+ memcpy(dirent->v.d_name, name->name, name->len);
+ memset(dirent->v.d_name + name->len, 0,
+ bkey_val_bytes(&dirent->k) -
+ offsetof(struct bch_dirent, d_name) -
+ name->len);
+
+ EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+
+ return dirent;
+}
+
+int bch2_dirent_create_snapshot(struct btree_trans *trans,
+ u64 dir, u32 snapshot,
+ const struct bch_hash_info *hash_info,
+ u8 type, const struct qstr *name, u64 dst_inum,
+ u64 *dir_offset,
+ bch_str_hash_flags_t str_hash_flags)
+{
+ subvol_inum zero_inum = { 0 };
+ struct bkey_i_dirent *dirent;
+ int ret;
+
+ dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum);
+ ret = PTR_ERR_OR_ZERO(dirent);
+ if (ret)
+ return ret;
+
+ dirent->k.p.inode = dir;
+ dirent->k.p.snapshot = snapshot;
+
+ ret = bch2_hash_set_snapshot(trans, bch2_dirent_hash_desc, hash_info,
+ zero_inum, snapshot,
+ &dirent->k_i, str_hash_flags,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ *dir_offset = dirent->k.p.offset;
+
+ return ret;
+}
+
+int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
+ const struct bch_hash_info *hash_info,
+ u8 type, const struct qstr *name, u64 dst_inum,
+ u64 *dir_offset,
+ bch_str_hash_flags_t str_hash_flags)
+{
+ struct bkey_i_dirent *dirent;
+ int ret;
+
+ dirent = dirent_create_key(trans, dir, type, name, dst_inum);
+ ret = PTR_ERR_OR_ZERO(dirent);
+ if (ret)
+ return ret;
+
+ ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
+ dir, &dirent->k_i, str_hash_flags);
+ *dir_offset = dirent->k.p.offset;
+
+ return ret;
+}
+
+static void dirent_copy_target(struct bkey_i_dirent *dst,
+ struct bkey_s_c_dirent src)
+{
+ dst->v.d_inum = src.v->d_inum;
+ dst->v.d_type = src.v->d_type;
+}
+
+int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
+ struct bkey_s_c_dirent d, subvol_inum *target)
+{
+ struct bch_subvolume s;
+ int ret = 0;
+
+ if (d.v->d_type == DT_SUBVOL &&
+ le32_to_cpu(d.v->d_parent_subvol) != dir.subvol)
+ return 1;
+
+ if (likely(d.v->d_type != DT_SUBVOL)) {
+ target->subvol = dir.subvol;
+ target->inum = le64_to_cpu(d.v->d_inum);
+ } else {
+ target->subvol = le32_to_cpu(d.v->d_child_subvol);
+
+ ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_CACHED, &s);
+
+ target->inum = le64_to_cpu(s.inode);
+ }
+
+ return ret;
+}
+
+int bch2_dirent_rename(struct btree_trans *trans,
+ subvol_inum src_dir, struct bch_hash_info *src_hash,
+ subvol_inum dst_dir, struct bch_hash_info *dst_hash,
+ const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset,
+ const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
+ enum bch_rename_mode mode)
+{
+ struct btree_iter src_iter = { NULL };
+ struct btree_iter dst_iter = { NULL };
+ struct bkey_s_c old_src, old_dst = bkey_s_c_null;
+ struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
+ struct bpos dst_pos =
+ POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
+ unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
+ int ret = 0;
+
+ if (src_dir.subvol != dst_dir.subvol)
+ return -EXDEV;
+
+ memset(src_inum, 0, sizeof(*src_inum));
+ memset(dst_inum, 0, sizeof(*dst_inum));
+
+ /* Lookup src: */
+ ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
+ src_hash, src_dir, src_name,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto out;
+
+ old_src = bch2_btree_iter_peek_slot(&src_iter);
+ ret = bkey_err(old_src);
+ if (ret)
+ goto out;
+
+ ret = bch2_dirent_read_target(trans, src_dir,
+ bkey_s_c_to_dirent(old_src), src_inum);
+ if (ret)
+ goto out;
+
+ src_type = bkey_s_c_to_dirent(old_src).v->d_type;
+
+ if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
+ return -EOPNOTSUPP;
+
+
+ /* Lookup dst: */
+ if (mode == BCH_RENAME) {
+ /*
+ * Note that we're _not_ checking if the target already exists -
+ * we're relying on the VFS to do that check for us for
+ * correctness:
+ */
+ ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
+ dst_hash, dst_dir, dst_name);
+ if (ret)
+ goto out;
+ } else {
+ ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
+ dst_hash, dst_dir, dst_name,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto out;
+
+ old_dst = bch2_btree_iter_peek_slot(&dst_iter);
+ ret = bkey_err(old_dst);
+ if (ret)
+ goto out;
+
+ ret = bch2_dirent_read_target(trans, dst_dir,
+ bkey_s_c_to_dirent(old_dst), dst_inum);
+ if (ret)
+ goto out;
+
+ dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
+
+ if (dst_type == DT_SUBVOL)
+ return -EOPNOTSUPP;
+ }
+
+ if (mode != BCH_RENAME_EXCHANGE)
+ *src_offset = dst_iter.pos.offset;
+
+ /* Create new dst key: */
+ new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
+ ret = PTR_ERR_OR_ZERO(new_dst);
+ if (ret)
+ goto out;
+
+ dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src));
+ new_dst->k.p = dst_iter.pos;
+
+ /* Create new src key: */
+ if (mode == BCH_RENAME_EXCHANGE) {
+ new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
+ ret = PTR_ERR_OR_ZERO(new_src);
+ if (ret)
+ goto out;
+
+ dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst));
+ new_src->k.p = src_iter.pos;
+ } else {
+ new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+ ret = PTR_ERR_OR_ZERO(new_src);
+ if (ret)
+ goto out;
+
+ bkey_init(&new_src->k);
+ new_src->k.p = src_iter.pos;
+
+ if (bkey_le(dst_pos, src_iter.pos) &&
+ bkey_lt(src_iter.pos, dst_iter.pos)) {
+ /*
+ * We have a hash collision for the new dst key,
+ * and new_src - the key we're deleting - is between
+ * new_dst's hashed slot and the slot we're going to be
+ * inserting it into - oops. This will break the hash
+ * table if we don't deal with it:
+ */
+ if (mode == BCH_RENAME) {
+ /*
+ * If we're not overwriting, we can just insert
+ * new_dst at the src position:
+ */
+ new_src = new_dst;
+ new_src->k.p = src_iter.pos;
+ goto out_set_src;
+ } else {
+ /* If we're overwriting, we can't insert new_dst
+ * at a different slot because it has to
+ * overwrite old_dst - just make sure to use a
+ * whiteout when deleting src:
+ */
+ new_src->k.type = KEY_TYPE_hash_whiteout;
+ }
+ } else {
+ /* Check if we need a whiteout to delete src: */
+ ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc,
+ src_hash, &src_iter);
+ if (ret < 0)
+ goto out;
+
+ if (ret)
+ new_src->k.type = KEY_TYPE_hash_whiteout;
+ }
+ }
+
+ ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
+ if (ret)
+ goto out;
+out_set_src:
+
+ /*
+ * If we're deleting a subvolume, we need to really delete the dirent,
+ * not just emit a whiteout in the current snapshot:
+ */
+ if (src_type == DT_SUBVOL) {
+ bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
+ ret = bch2_btree_iter_traverse(&src_iter);
+ if (ret)
+ goto out;
+
+ new_src->k.p = src_iter.pos;
+ src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
+ }
+
+ ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
+ if (ret)
+ goto out;
+
+ if (mode == BCH_RENAME_EXCHANGE)
+ *src_offset = new_src->k.p.offset;
+ *dst_offset = new_dst->k.p.offset;
+out:
+ bch2_trans_iter_exit(trans, &src_iter);
+ bch2_trans_iter_exit(trans, &dst_iter);
+ return ret;
+}
+
+int __bch2_dirent_lookup_trans(struct btree_trans *trans,
+ struct btree_iter *iter,
+ subvol_inum dir,
+ const struct bch_hash_info *hash_info,
+ const struct qstr *name, subvol_inum *inum,
+ unsigned flags)
+{
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent d;
+ u32 snapshot;
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
+ hash_info, dir, name, flags);
+ if (ret)
+ return ret;
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ d = bkey_s_c_to_dirent(k);
+
+ ret = bch2_dirent_read_target(trans, dir, d, inum);
+ if (ret > 0)
+ ret = -ENOENT;
+err:
+ if (ret)
+ bch2_trans_iter_exit(trans, iter);
+
+ return ret;
+}
+
+u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
+ const struct bch_hash_info *hash_info,
+ const struct qstr *name, subvol_inum *inum)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter = { NULL };
+
+ int ret = lockrestart_do(trans,
+ __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
+ SPOS(dir, 0, snapshot),
+ POS(dir, U64_MAX), 0, k, ret)
+ if (k.k->type == KEY_TYPE_dirent) {
+ ret = -ENOTEMPTY;
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
+{
+ u32 snapshot;
+
+ return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
+ bch2_empty_dir_snapshot(trans, dir.inum, snapshot);
+}
+
+int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent dirent;
+ subvol_inum target;
+ u32 snapshot;
+ struct bkey_buf sk;
+ struct qstr name;
+ int ret;
+
+ bch2_bkey_buf_init(&sk);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
+ SPOS(inum.inum, ctx->pos, snapshot),
+ POS(inum.inum, U64_MAX), 0, k, ret) {
+ if (k.k->type != KEY_TYPE_dirent)
+ continue;
+
+ dirent = bkey_s_c_to_dirent(k);
+
+ ret = bch2_dirent_read_target(trans, inum, dirent, &target);
+ if (ret < 0)
+ break;
+ if (ret)
+ continue;
+
+ /* dir_emit() can fault and block: */
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ dirent = bkey_i_to_s_c_dirent(sk.k);
+ bch2_trans_unlock(trans);
+
+ name = bch2_dirent_get_name(dirent);
+
+ ctx->pos = dirent.k->p.offset;
+ if (!dir_emit(ctx, name.name,
+ name.len,
+ target.inum,
+ vfs_d_type(dirent.v->d_type)))
+ break;
+ ctx->pos = dirent.k->p.offset + 1;
+
+ /*
+ * read_target looks up subvolumes, we can overflow paths if the
+ * directory has many subvolumes in it
+ */
+ ret = btree_trans_too_many_iters(trans);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
+
+ return ret;
+}
diff --git a/c_src/libbcachefs/dirent.h b/c_src/libbcachefs/dirent.h
new file mode 100644
index 00000000..21ffeb78
--- /dev/null
+++ b/c_src/libbcachefs/dirent.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DIRENT_H
+#define _BCACHEFS_DIRENT_H
+
+#include "str_hash.h"
+
+enum bkey_invalid_flags;
+extern const struct bch_hash_desc bch2_dirent_hash_desc;
+
+int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_dirent ((struct bkey_ops) { \
+ .key_invalid = bch2_dirent_invalid, \
+ .val_to_text = bch2_dirent_to_text, \
+ .min_val_size = 16, \
+})
+
+struct qstr;
+struct file;
+struct dir_context;
+struct bch_fs;
+struct bch_hash_info;
+struct bch_inode_info;
+
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
+
+static inline unsigned dirent_val_u64s(unsigned len)
+{
+ return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
+ sizeof(u64));
+}
+
+int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
+ struct bkey_s_c_dirent, subvol_inum *);
+
+int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32,
+ const struct bch_hash_info *, u8,
+ const struct qstr *, u64, u64 *,
+ bch_str_hash_flags_t);
+int bch2_dirent_create(struct btree_trans *, subvol_inum,
+ const struct bch_hash_info *, u8,
+ const struct qstr *, u64, u64 *,
+ bch_str_hash_flags_t);
+
+static inline unsigned vfs_d_type(unsigned type)
+{
+ return type == DT_SUBVOL ? DT_DIR : type;
+}
+
+enum bch_rename_mode {
+ BCH_RENAME,
+ BCH_RENAME_OVERWRITE,
+ BCH_RENAME_EXCHANGE,
+};
+
+int bch2_dirent_rename(struct btree_trans *,
+ subvol_inum, struct bch_hash_info *,
+ subvol_inum, struct bch_hash_info *,
+ const struct qstr *, subvol_inum *, u64 *,
+ const struct qstr *, subvol_inum *, u64 *,
+ enum bch_rename_mode);
+
+int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
+ subvol_inum, const struct bch_hash_info *,
+ const struct qstr *, subvol_inum *, unsigned);
+u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
+ const struct bch_hash_info *,
+ const struct qstr *, subvol_inum *);
+
+int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32);
+int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
+int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
+
+#endif /* _BCACHEFS_DIRENT_H */
diff --git a/c_src/libbcachefs/disk_groups.c b/c_src/libbcachefs/disk_groups.c
new file mode 100644
index 00000000..06a7df52
--- /dev/null
+++ b/c_src/libbcachefs/disk_groups.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "disk_groups.h"
+#include "sb-members.h"
+#include "super-io.h"
+
+#include <linux/sort.h>
+
+static int group_cmp(const void *_l, const void *_r)
+{
+ const struct bch_disk_group *l = _l;
+ const struct bch_disk_group *r = _r;
+
+ return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) -
+ (BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?:
+ ((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) -
+ (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?:
+ strncmp(l->label, r->label, sizeof(l->label));
+}
+
+static int bch2_sb_disk_groups_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_disk_groups *groups =
+ field_to_type(f, disk_groups);
+ struct bch_disk_group *g, *sorted = NULL;
+ unsigned nr_groups = disk_groups_nr(groups);
+ unsigned i, len;
+ int ret = 0;
+
+ for (i = 0; i < sb->nr_devices; i++) {
+ struct bch_member m = bch2_sb_member_get(sb, i);
+ unsigned group_id;
+
+ if (!BCH_MEMBER_GROUP(&m))
+ continue;
+
+ group_id = BCH_MEMBER_GROUP(&m) - 1;
+
+ if (group_id >= nr_groups) {
+ prt_printf(err, "disk %u has invalid label %u (have %u)",
+ i, group_id, nr_groups);
+ return -BCH_ERR_invalid_sb_disk_groups;
+ }
+
+ if (BCH_GROUP_DELETED(&groups->entries[group_id])) {
+ prt_printf(err, "disk %u has deleted label %u", i, group_id);
+ return -BCH_ERR_invalid_sb_disk_groups;
+ }
+ }
+
+ if (!nr_groups)
+ return 0;
+
+ for (i = 0; i < nr_groups; i++) {
+ g = groups->entries + i;
+
+ if (BCH_GROUP_DELETED(g))
+ continue;
+
+ len = strnlen(g->label, sizeof(g->label));
+ if (!len) {
+ prt_printf(err, "label %u empty", i);
+ return -BCH_ERR_invalid_sb_disk_groups;
+ }
+ }
+
+ sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL);
+ if (!sorted)
+ return -BCH_ERR_ENOMEM_disk_groups_validate;
+
+ memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted));
+ sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL);
+
+ for (g = sorted; g + 1 < sorted + nr_groups; g++)
+ if (!BCH_GROUP_DELETED(g) &&
+ !group_cmp(&g[0], &g[1])) {
+ prt_printf(err, "duplicate label %llu.%.*s",
+ BCH_GROUP_PARENT(g),
+ (int) sizeof(g->label), g->label);
+ ret = -BCH_ERR_invalid_sb_disk_groups;
+ goto err;
+ }
+err:
+ kfree(sorted);
+ return ret;
+}
+
+void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ out->atomic++;
+ rcu_read_lock();
+
+ struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
+ if (!g)
+ goto out;
+
+ for (unsigned i = 0; i < g->nr; i++) {
+ if (i)
+ prt_printf(out, " ");
+
+ if (g->entries[i].deleted) {
+ prt_printf(out, "[deleted]");
+ continue;
+ }
+
+ prt_printf(out, "[parent %d devs", g->entries[i].parent);
+ for_each_member_device_rcu(c, ca, &g->entries[i].devs)
+ prt_printf(out, " %s", ca->name);
+ prt_printf(out, "]");
+ }
+
+out:
+ rcu_read_unlock();
+ out->atomic--;
+}
+
+static void bch2_sb_disk_groups_to_text(struct printbuf *out,
+ struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_disk_groups *groups =
+ field_to_type(f, disk_groups);
+ struct bch_disk_group *g;
+ unsigned nr_groups = disk_groups_nr(groups);
+
+ for (g = groups->entries;
+ g < groups->entries + nr_groups;
+ g++) {
+ if (g != groups->entries)
+ prt_printf(out, " ");
+
+ if (BCH_GROUP_DELETED(g))
+ prt_printf(out, "[deleted]");
+ else
+ prt_printf(out, "[parent %llu name %s]",
+ BCH_GROUP_PARENT(g), g->label);
+ }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = {
+ .validate = bch2_sb_disk_groups_validate,
+ .to_text = bch2_sb_disk_groups_to_text
+};
+
+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c)
+{
+ struct bch_sb_field_disk_groups *groups;
+ struct bch_disk_groups_cpu *cpu_g, *old_g;
+ unsigned i, g, nr_groups;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ groups = bch2_sb_field_get(c->disk_sb.sb, disk_groups);
+ nr_groups = disk_groups_nr(groups);
+
+ if (!groups)
+ return 0;
+
+ cpu_g = kzalloc(struct_size(cpu_g, entries, nr_groups), GFP_KERNEL);
+ if (!cpu_g)
+ return -BCH_ERR_ENOMEM_disk_groups_to_cpu;
+
+ cpu_g->nr = nr_groups;
+
+ for (i = 0; i < nr_groups; i++) {
+ struct bch_disk_group *src = &groups->entries[i];
+ struct bch_disk_group_cpu *dst = &cpu_g->entries[i];
+
+ dst->deleted = BCH_GROUP_DELETED(src);
+ dst->parent = BCH_GROUP_PARENT(src);
+ memcpy(dst->label, src->label, sizeof(dst->label));
+ }
+
+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+ struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, i);
+ struct bch_disk_group_cpu *dst;
+
+ if (!bch2_member_exists(&m))
+ continue;
+
+ g = BCH_MEMBER_GROUP(&m);
+ while (g) {
+ dst = &cpu_g->entries[g - 1];
+ __set_bit(i, dst->devs.d);
+ g = dst->parent;
+ }
+ }
+
+ old_g = rcu_dereference_protected(c->disk_groups,
+ lockdep_is_held(&c->sb_lock));
+ rcu_assign_pointer(c->disk_groups, cpu_g);
+ if (old_g)
+ kfree_rcu(old_g, rcu);
+
+ return 0;
+}
+
+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target)
+{
+ struct target t = target_decode(target);
+ struct bch_devs_mask *devs;
+
+ rcu_read_lock();
+
+ switch (t.type) {
+ case TARGET_NULL:
+ devs = NULL;
+ break;
+ case TARGET_DEV: {
+ struct bch_dev *ca = t.dev < c->sb.nr_devices
+ ? rcu_dereference(c->devs[t.dev])
+ : NULL;
+ devs = ca ? &ca->self : NULL;
+ break;
+ }
+ case TARGET_GROUP: {
+ struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
+
+ devs = g && t.group < g->nr && !g->entries[t.group].deleted
+ ? &g->entries[t.group].devs
+ : NULL;
+ break;
+ }
+ default:
+ BUG();
+ }
+
+ rcu_read_unlock();
+
+ return devs;
+}
+
+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
+{
+ struct target t = target_decode(target);
+
+ switch (t.type) {
+ case TARGET_NULL:
+ return false;
+ case TARGET_DEV:
+ return dev == t.dev;
+ case TARGET_GROUP: {
+ struct bch_disk_groups_cpu *g;
+ const struct bch_devs_mask *m;
+ bool ret;
+
+ rcu_read_lock();
+ g = rcu_dereference(c->disk_groups);
+ m = g && t.group < g->nr && !g->entries[t.group].deleted
+ ? &g->entries[t.group].devs
+ : NULL;
+
+ ret = m ? test_bit(dev, m->d) : false;
+ rcu_read_unlock();
+
+ return ret;
+ }
+ default:
+ BUG();
+ }
+}
+
+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups,
+ unsigned parent,
+ const char *name, unsigned namelen)
+{
+ unsigned i, nr_groups = disk_groups_nr(groups);
+
+ if (!namelen || namelen > BCH_SB_LABEL_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < nr_groups; i++) {
+ struct bch_disk_group *g = groups->entries + i;
+
+ if (BCH_GROUP_DELETED(g))
+ continue;
+
+ if (!BCH_GROUP_DELETED(g) &&
+ BCH_GROUP_PARENT(g) == parent &&
+ strnlen(g->label, sizeof(g->label)) == namelen &&
+ !memcmp(name, g->label, namelen))
+ return i;
+ }
+
+ return -1;
+}
+
+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent,
+ const char *name, unsigned namelen)
+{
+ struct bch_sb_field_disk_groups *groups =
+ bch2_sb_field_get(sb->sb, disk_groups);
+ unsigned i, nr_groups = disk_groups_nr(groups);
+ struct bch_disk_group *g;
+
+ if (!namelen || namelen > BCH_SB_LABEL_SIZE)
+ return -EINVAL;
+
+ for (i = 0;
+ i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]);
+ i++)
+ ;
+
+ if (i == nr_groups) {
+ unsigned u64s =
+ (sizeof(struct bch_sb_field_disk_groups) +
+ sizeof(struct bch_disk_group) * (nr_groups + 1)) /
+ sizeof(u64);
+
+ groups = bch2_sb_field_resize(sb, disk_groups, u64s);
+ if (!groups)
+ return -BCH_ERR_ENOSPC_disk_label_add;
+
+ nr_groups = disk_groups_nr(groups);
+ }
+
+ BUG_ON(i >= nr_groups);
+
+ g = &groups->entries[i];
+
+ memcpy(g->label, name, namelen);
+ if (namelen < sizeof(g->label))
+ g->label[namelen] = '\0';
+ SET_BCH_GROUP_DELETED(g, 0);
+ SET_BCH_GROUP_PARENT(g, parent);
+ SET_BCH_GROUP_DATA_ALLOWED(g, ~0);
+
+ return i;
+}
+
+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name)
+{
+ struct bch_sb_field_disk_groups *groups =
+ bch2_sb_field_get(sb->sb, disk_groups);
+ int v = -1;
+
+ do {
+ const char *next = strchrnul(name, '.');
+ unsigned len = next - name;
+
+ if (*next == '.')
+ next++;
+
+ v = __bch2_disk_group_find(groups, v + 1, name, len);
+ name = next;
+ } while (*name && v >= 0);
+
+ return v;
+}
+
+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name)
+{
+ struct bch_sb_field_disk_groups *groups;
+ unsigned parent = 0;
+ int v = -1;
+
+ do {
+ const char *next = strchrnul(name, '.');
+ unsigned len = next - name;
+
+ if (*next == '.')
+ next++;
+
+ groups = bch2_sb_field_get(sb->sb, disk_groups);
+
+ v = __bch2_disk_group_find(groups, parent, name, len);
+ if (v < 0)
+ v = __bch2_disk_group_add(sb, parent, name, len);
+ if (v < 0)
+ return v;
+
+ parent = v + 1;
+ name = next;
+ } while (*name && v >= 0);
+
+ return v;
+}
+
+void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
+{
+ struct bch_disk_groups_cpu *groups;
+ struct bch_disk_group_cpu *g;
+ unsigned nr = 0;
+ u16 path[32];
+
+ out->atomic++;
+ rcu_read_lock();
+ groups = rcu_dereference(c->disk_groups);
+ if (!groups)
+ goto invalid;
+
+ while (1) {
+ if (nr == ARRAY_SIZE(path))
+ goto invalid;
+
+ if (v >= groups->nr)
+ goto invalid;
+
+ g = groups->entries + v;
+
+ if (g->deleted)
+ goto invalid;
+
+ path[nr++] = v;
+
+ if (!g->parent)
+ break;
+
+ v = g->parent - 1;
+ }
+
+ while (nr) {
+ v = path[--nr];
+ g = groups->entries + v;
+
+ prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
+ if (nr)
+ prt_printf(out, ".");
+ }
+out:
+ rcu_read_unlock();
+ out->atomic--;
+ return;
+invalid:
+ prt_printf(out, "invalid label %u", v);
+ goto out;
+}
+
+void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+{
+ struct bch_sb_field_disk_groups *groups =
+ bch2_sb_field_get(sb, disk_groups);
+ struct bch_disk_group *g;
+ unsigned nr = 0;
+ u16 path[32];
+
+ while (1) {
+ if (nr == ARRAY_SIZE(path))
+ goto inval;
+
+ if (v >= disk_groups_nr(groups))
+ goto inval;
+
+ g = groups->entries + v;
+
+ if (BCH_GROUP_DELETED(g))
+ goto inval;
+
+ path[nr++] = v;
+
+ if (!BCH_GROUP_PARENT(g))
+ break;
+
+ v = BCH_GROUP_PARENT(g) - 1;
+ }
+
+ while (nr) {
+ v = path[--nr];
+ g = groups->entries + v;
+
+ prt_printf(out, "%.*s", (int) sizeof(g->label), g->label);
+ if (nr)
+ prt_printf(out, ".");
+ }
+ return;
+inval:
+ prt_printf(out, "invalid label %u", v);
+}
+
+int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
+{
+ struct bch_member *mi;
+ int ret, v = -1;
+
+ if (!strlen(name) || !strcmp(name, "none"))
+ return 0;
+
+ v = bch2_disk_path_find_or_create(&c->disk_sb, name);
+ if (v < 0)
+ return v;
+
+ ret = bch2_sb_disk_groups_to_cpu(c);
+ if (ret)
+ return ret;
+
+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_GROUP(mi, v + 1);
+ return 0;
+}
+
+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name)
+{
+ int ret;
+
+ mutex_lock(&c->sb_lock);
+ ret = __bch2_dev_group_set(c, ca, name) ?:
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res,
+ struct printbuf *err)
+{
+ struct bch_dev *ca;
+ int g;
+
+ if (!val)
+ return -EINVAL;
+
+ if (!c)
+ return 0;
+
+ if (!strlen(val) || !strcmp(val, "none")) {
+ *res = 0;
+ return 0;
+ }
+
+ /* Is it a device? */
+ ca = bch2_dev_lookup(c, val);
+ if (!IS_ERR(ca)) {
+ *res = dev_to_target(ca->dev_idx);
+ percpu_ref_put(&ca->ref);
+ return 0;
+ }
+
+ mutex_lock(&c->sb_lock);
+ g = bch2_disk_path_find(&c->disk_sb, val);
+ mutex_unlock(&c->sb_lock);
+
+ if (g >= 0) {
+ *res = group_to_target(g);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
+{
+ struct target t = target_decode(v);
+
+ switch (t.type) {
+ case TARGET_NULL:
+ prt_printf(out, "none");
+ break;
+ case TARGET_DEV: {
+ struct bch_dev *ca;
+
+ out->atomic++;
+ rcu_read_lock();
+ ca = t.dev < c->sb.nr_devices
+ ? rcu_dereference(c->devs[t.dev])
+ : NULL;
+
+ if (ca && percpu_ref_tryget(&ca->io_ref)) {
+ prt_printf(out, "/dev/%s", ca->name);
+ percpu_ref_put(&ca->io_ref);
+ } else if (ca) {
+ prt_printf(out, "offline device %u", t.dev);
+ } else {
+ prt_printf(out, "invalid device %u", t.dev);
+ }
+
+ rcu_read_unlock();
+ out->atomic--;
+ break;
+ }
+ case TARGET_GROUP:
+ bch2_disk_path_to_text(out, c, t.group);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+{
+ struct target t = target_decode(v);
+
+ switch (t.type) {
+ case TARGET_NULL:
+ prt_printf(out, "none");
+ break;
+ case TARGET_DEV: {
+ struct bch_member m = bch2_sb_member_get(sb, t.dev);
+
+ if (bch2_dev_exists(sb, t.dev)) {
+ prt_printf(out, "Device ");
+ pr_uuid(out, m.uuid.b);
+ prt_printf(out, " (%u)", t.dev);
+ } else {
+ prt_printf(out, "Bad device %u", t.dev);
+ }
+ break;
+ }
+ case TARGET_GROUP:
+ bch2_disk_path_to_text_sb(out, sb, t.group);
+ break;
+ default:
+ BUG();
+ }
+}
+
+void bch2_opt_target_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_sb *sb,
+ u64 v)
+{
+ if (c)
+ bch2_target_to_text(out, c, v);
+ else
+ bch2_target_to_text_sb(out, sb, v);
+}
diff --git a/c_src/libbcachefs/disk_groups.h b/c_src/libbcachefs/disk_groups.h
new file mode 100644
index 00000000..441826ff
--- /dev/null
+++ b/c_src/libbcachefs/disk_groups.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DISK_GROUPS_H
+#define _BCACHEFS_DISK_GROUPS_H
+
+#include "disk_groups_types.h"
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups;
+
+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups)
+{
+ return groups
+ ? (vstruct_end(&groups->field) -
+ (void *) &groups->entries[0]) / sizeof(struct bch_disk_group)
+ : 0;
+}
+
+struct target {
+ enum {
+ TARGET_NULL,
+ TARGET_DEV,
+ TARGET_GROUP,
+ } type;
+ union {
+ unsigned dev;
+ unsigned group;
+ };
+};
+
+#define TARGET_DEV_START 1
+#define TARGET_GROUP_START (256 + TARGET_DEV_START)
+
+static inline u16 dev_to_target(unsigned dev)
+{
+ return TARGET_DEV_START + dev;
+}
+
+static inline u16 group_to_target(unsigned group)
+{
+ return TARGET_GROUP_START + group;
+}
+
+static inline struct target target_decode(unsigned target)
+{
+ if (target >= TARGET_GROUP_START)
+ return (struct target) {
+ .type = TARGET_GROUP,
+ .group = target - TARGET_GROUP_START
+ };
+
+ if (target >= TARGET_DEV_START)
+ return (struct target) {
+ .type = TARGET_DEV,
+ .group = target - TARGET_DEV_START
+ };
+
+ return (struct target) { .type = TARGET_NULL };
+}
+
+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned);
+
+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c,
+ enum bch_data_type data_type,
+ u16 target)
+{
+ struct bch_devs_mask devs = c->rw_devs[data_type];
+ const struct bch_devs_mask *t = bch2_target_to_mask(c, target);
+
+ if (t)
+ bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
+ return devs;
+}
+
+static inline bool bch2_target_accepts_data(struct bch_fs *c,
+ enum bch_data_type data_type,
+ u16 target)
+{
+ struct bch_devs_mask rw_devs = target_rw_devs(c, data_type, target);
+ return !bitmap_empty(rw_devs.d, BCH_SB_MEMBERS_MAX);
+}
+
+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned);
+
+int bch2_disk_path_find(struct bch_sb_handle *, const char *);
+
+/* Exported for userspace bcachefs-tools: */
+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *);
+
+void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned);
+void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned);
+
+void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned);
+
+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *);
+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+
+#define bch2_opt_target (struct bch_opt_fn) { \
+ .parse = bch2_opt_target_parse, \
+ .to_text = bch2_opt_target_to_text, \
+}
+
+int bch2_sb_disk_groups_to_cpu(struct bch_fs *);
+
+int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *);
+
+const char *bch2_sb_validate_disk_groups(struct bch_sb *,
+ struct bch_sb_field *);
+
+void bch2_disk_groups_to_text(struct printbuf *, struct bch_fs *);
+
+#endif /* _BCACHEFS_DISK_GROUPS_H */
diff --git a/c_src/libbcachefs/disk_groups_types.h b/c_src/libbcachefs/disk_groups_types.h
new file mode 100644
index 00000000..a54ef085
--- /dev/null
+++ b/c_src/libbcachefs/disk_groups_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H
+#define _BCACHEFS_DISK_GROUPS_TYPES_H
+
+struct bch_disk_group_cpu {
+ bool deleted;
+ u16 parent;
+ u8 label[BCH_SB_LABEL_SIZE];
+ struct bch_devs_mask devs;
+};
+
+struct bch_disk_groups_cpu {
+ struct rcu_head rcu;
+ unsigned nr;
+ struct bch_disk_group_cpu entries[] __counted_by(nr);
+};
+
+#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */
diff --git a/c_src/libbcachefs/ec.c b/c_src/libbcachefs/ec.c
new file mode 100644
index 00000000..d802bc63
--- /dev/null
+++ b/c_src/libbcachefs/ec.c
@@ -0,0 +1,2259 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* erasure coding */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "backpointers.h"
+#include "bkey_buf.h"
+#include "bset.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io_read.h"
+#include "keylist.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "super-io.h"
+#include "util.h"
+
+#include <linux/sort.h>
+
+#ifdef __KERNEL__
+
+#include <linux/raid/pq.h>
+#include <linux/raid/xor.h>
+
+static void raid5_recov(unsigned disks, unsigned failed_idx,
+ size_t size, void **data)
+{
+ unsigned i = 2, nr;
+
+ BUG_ON(failed_idx >= disks);
+
+ swap(data[0], data[failed_idx]);
+ memcpy(data[0], data[1], size);
+
+ while (i < disks) {
+ nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS);
+ xor_blocks(nr, size, data[0], data + i);
+ i += nr;
+ }
+
+ swap(data[0], data[failed_idx]);
+}
+
+static void raid_gen(int nd, int np, size_t size, void **v)
+{
+ if (np >= 1)
+ raid5_recov(nd + np, nd, size, v);
+ if (np >= 2)
+ raid6_call.gen_syndrome(nd + np, size, v);
+ BUG_ON(np > 2);
+}
+
+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
+{
+ switch (nr) {
+ case 0:
+ break;
+ case 1:
+ if (ir[0] < nd + 1)
+ raid5_recov(nd + 1, ir[0], size, v);
+ else
+ raid6_call.gen_syndrome(nd + np, size, v);
+ break;
+ case 2:
+ if (ir[1] < nd) {
+ /* data+data failure. */
+ raid6_2data_recov(nd + np, size, ir[0], ir[1], v);
+ } else if (ir[0] < nd) {
+ /* data + p/q failure */
+
+ if (ir[1] == nd) /* data + p failure */
+ raid6_datap_recov(nd + np, size, ir[0], v);
+ else { /* data + q failure */
+ raid5_recov(nd + 1, ir[0], size, v);
+ raid6_call.gen_syndrome(nd + np, size, v);
+ }
+ } else {
+ raid_gen(nd, np, size, v);
+ }
+ break;
+ default:
+ BUG();
+ }
+}
+
+#else
+
+#include <raid/raid.h>
+
+#endif
+
+struct ec_bio {
+ struct bch_dev *ca;
+ struct ec_stripe_buf *buf;
+ size_t idx;
+ struct bio bio;
+};
+
+/* Stripes btree keys: */
+
+int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) ||
+ bpos_gt(k.k->p, POS(0, U32_MAX)), c, err,
+ stripe_pos_bad,
+ "stripe at bad pos");
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err,
+ stripe_val_size_bad,
+ "incorrect value size (%zu < %u)",
+ bkey_val_u64s(k.k), stripe_val_u64s(s));
+
+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+ return ret;
+}
+
+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+
+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
+ s->algorithm,
+ le16_to_cpu(s->sectors),
+ nr_data,
+ s->nr_redundant,
+ s->csum_type,
+ 1U << s->csum_granularity_bits);
+
+ for (i = 0; i < s->nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = s->ptrs + i;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ u32 offset;
+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+ prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
+ if (i < nr_data)
+ prt_printf(out, "#%u", stripe_blockcount_get(s, i));
+ prt_printf(out, " gen %u", ptr->gen);
+ if (ptr_stale(ca, ptr))
+ prt_printf(out, " stale");
+ }
+}
+
+/* Triggers: */
+
+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
+ struct bkey_s_c_stripe s,
+ unsigned idx, bool deleting)
+{
+ struct bch_fs *c = trans->c;
+ const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
+ struct btree_iter iter;
+ struct bkey_i_alloc_v4 *a;
+ enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity : 0;
+ s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
+ int ret = 0;
+
+ if (deleting)
+ sectors = -sectors;
+
+ a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ ret = bch2_check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
+ a->v.gen, a->v.data_type,
+ a->v.dirty_sectors);
+ if (ret)
+ goto err;
+
+ if (!deleting) {
+ if (bch2_trans_inconsistent_on(a->v.stripe ||
+ a->v.stripe_redundancy, trans,
+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ bch2_data_types[a->v.data_type],
+ a->v.dirty_sectors,
+ a->v.stripe, s.k->p.offset)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ bch2_data_types[a->v.data_type],
+ a->v.dirty_sectors,
+ s.k->p.offset)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ a->v.stripe = s.k->p.offset;
+ a->v.stripe_redundancy = s.v->nr_redundant;
+ a->v.data_type = BCH_DATA_stripe;
+ } else {
+ if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
+ a->v.stripe_redundancy != s.v->nr_redundant, trans,
+ "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ s.k->p.offset, a->v.stripe)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ a->v.stripe = 0;
+ a->v.stripe_redundancy = 0;
+ a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
+ }
+
+ a->v.dirty_sectors += sectors;
+ if (data_type)
+ a->v.data_type = !deleting ? data_type : 0;
+
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+ if (ret)
+ goto err;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int mark_stripe_bucket(struct btree_trans *trans,
+ struct bkey_s_c k,
+ unsigned ptr_idx,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+ unsigned nr_data = s->nr_blocks - s->nr_redundant;
+ bool parity = ptr_idx >= nr_data;
+ enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
+ s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
+ const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bucket old, new, *g;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+ /* * XXX doesn't handle deletion */
+
+ percpu_down_read(&c->mark_lock);
+ g = PTR_GC_BUCKET(ca, ptr);
+
+ if (g->dirty_sectors ||
+ (g->stripe && g->stripe != k.k->p.offset)) {
+ bch2_fs_inconsistent(c,
+ "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
+ ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EINVAL;
+ goto err;
+ }
+
+ bucket_lock(g);
+ old = *g;
+
+ ret = bch2_check_bucket_ref(trans, k, ptr, sectors, data_type,
+ g->gen, g->data_type,
+ g->dirty_sectors);
+ if (ret)
+ goto err;
+
+ g->data_type = data_type;
+ g->dirty_sectors += sectors;
+
+ g->stripe = k.k->p.offset;
+ g->stripe_redundancy = s->nr_redundant;
+ new = *g;
+err:
+ bucket_unlock(g);
+ if (!ret)
+ bch2_dev_usage_update_m(c, ca, &old, &new);
+ percpu_up_read(&c->mark_lock);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_trigger_stripe(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s _new,
+ unsigned flags)
+{
+ struct bkey_s_c new = _new.s_c;
+ struct bch_fs *c = trans->c;
+ u64 idx = new.k->p.offset;
+ const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
+ ? bkey_s_c_to_stripe(old).v : NULL;
+ const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
+ ? bkey_s_c_to_stripe(new).v : NULL;
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ /*
+ * If the pointers aren't changing, we don't need to do anything:
+ */
+ if (new_s && old_s &&
+ new_s->nr_blocks == old_s->nr_blocks &&
+ new_s->nr_redundant == old_s->nr_redundant &&
+ !memcmp(old_s->ptrs, new_s->ptrs,
+ new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
+ return 0;
+
+ BUG_ON(new_s && old_s &&
+ (new_s->nr_blocks != old_s->nr_blocks ||
+ new_s->nr_redundant != old_s->nr_redundant));
+
+ if (new_s) {
+ s64 sectors = le16_to_cpu(new_s->sectors);
+
+ struct bch_replicas_padded r;
+ bch2_bkey_to_replicas(&r.e, new);
+ int ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
+ if (ret)
+ return ret;
+ }
+
+ if (old_s) {
+ s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+
+ struct bch_replicas_padded r;
+ bch2_bkey_to_replicas(&r.e, old);
+ int ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+ if (ret)
+ return ret;
+ }
+
+ unsigned nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
+ for (unsigned i = 0; i < nr_blocks; i++) {
+ if (new_s && old_s &&
+ !memcmp(&new_s->ptrs[i],
+ &old_s->ptrs[i],
+ sizeof(new_s->ptrs[i])))
+ continue;
+
+ if (new_s) {
+ int ret = bch2_trans_mark_stripe_bucket(trans,
+ bkey_s_c_to_stripe(new), i, false);
+ if (ret)
+ return ret;
+ }
+
+ if (old_s) {
+ int ret = bch2_trans_mark_stripe_bucket(trans,
+ bkey_s_c_to_stripe(old), i, true);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) {
+ struct stripe *m = genradix_ptr(&c->stripes, idx);
+
+ if (!m) {
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf1, c, old);
+ bch2_bkey_val_to_text(&buf2, c, new);
+ bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
+ "old %s\n"
+ "new %s", idx, buf1.buf, buf2.buf);
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ bch2_inconsistent_error(c);
+ return -1;
+ }
+
+ if (!new_s) {
+ bch2_stripes_heap_del(c, m, idx);
+
+ memset(m, 0, sizeof(*m));
+ } else {
+ m->sectors = le16_to_cpu(new_s->sectors);
+ m->algorithm = new_s->algorithm;
+ m->nr_blocks = new_s->nr_blocks;
+ m->nr_redundant = new_s->nr_redundant;
+ m->blocks_nonempty = 0;
+
+ for (unsigned i = 0; i < new_s->nr_blocks; i++)
+ m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
+
+ if (!old_s)
+ bch2_stripes_heap_insert(c, m, idx);
+ else
+ bch2_stripes_heap_update(c, m, idx);
+ }
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ struct gc_stripe *m =
+ genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
+
+ if (!m) {
+ bch_err(c, "error allocating memory for gc_stripes, idx %llu",
+ idx);
+ return -BCH_ERR_ENOMEM_mark_stripe;
+ }
+ /*
+ * This will be wrong when we bring back runtime gc: we should
+ * be unmarking the old key and then marking the new key
+ */
+ m->alive = true;
+ m->sectors = le16_to_cpu(new_s->sectors);
+ m->nr_blocks = new_s->nr_blocks;
+ m->nr_redundant = new_s->nr_redundant;
+
+ for (unsigned i = 0; i < new_s->nr_blocks; i++)
+ m->ptrs[i] = new_s->ptrs[i];
+
+ bch2_bkey_to_replicas(&m->r.e, new);
+
+ /*
+ * gc recalculates this field from stripe ptr
+ * references:
+ */
+ memset(m->block_sectors, 0, sizeof(m->block_sectors));
+
+ for (unsigned i = 0; i < new_s->nr_blocks; i++) {
+ int ret = mark_stripe_bucket(trans, new, i, flags);
+ if (ret)
+ return ret;
+ }
+
+ int ret = bch2_update_replicas(c, new, &m->r.e,
+ ((s64) m->sectors * m->nr_redundant),
+ 0, true);
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, new);
+ bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
+ printbuf_exit(&buf);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* returns blocknr in stripe that we matched: */
+static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
+ struct bkey_s_c k, unsigned *block)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+
+ bkey_for_each_ptr(ptrs, ptr)
+ for (i = 0; i < nr_data; i++)
+ if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr,
+ le16_to_cpu(s->sectors))) {
+ *block = i;
+ return ptr;
+ }
+
+ return NULL;
+}
+
+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_extent: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ const union bch_extent_entry *entry;
+
+ extent_for_each_entry(e, entry)
+ if (extent_entry_type(entry) ==
+ BCH_EXTENT_ENTRY_stripe_ptr &&
+ entry->stripe_ptr.idx == idx)
+ return true;
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/* Stripe bufs: */
+
+static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
+{
+ if (buf->key.k.type == KEY_TYPE_stripe) {
+ struct bkey_i_stripe *s = bkey_i_to_stripe(&buf->key);
+ unsigned i;
+
+ for (i = 0; i < s->v.nr_blocks; i++) {
+ kvpfree(buf->data[i], buf->size << 9);
+ buf->data[i] = NULL;
+ }
+ }
+}
+
+/* XXX: this is a non-mempoolified memory allocation: */
+static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
+ unsigned offset, unsigned size)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned csum_granularity = 1U << v->csum_granularity_bits;
+ unsigned end = offset + size;
+ unsigned i;
+
+ BUG_ON(end > le16_to_cpu(v->sectors));
+
+ offset = round_down(offset, csum_granularity);
+ end = min_t(unsigned, le16_to_cpu(v->sectors),
+ round_up(end, csum_granularity));
+
+ buf->offset = offset;
+ buf->size = end - offset;
+
+ memset(buf->valid, 0xFF, sizeof(buf->valid));
+
+ for (i = 0; i < v->nr_blocks; i++) {
+ buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL);
+ if (!buf->data[i])
+ goto err;
+ }
+
+ return 0;
+err:
+ ec_stripe_buf_exit(buf);
+ return -BCH_ERR_ENOMEM_stripe_buf;
+}
+
+/* Checksumming: */
+
+static struct bch_csum ec_block_checksum(struct ec_stripe_buf *buf,
+ unsigned block, unsigned offset)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned csum_granularity = 1 << v->csum_granularity_bits;
+ unsigned end = buf->offset + buf->size;
+ unsigned len = min(csum_granularity, end - offset);
+
+ BUG_ON(offset >= end);
+ BUG_ON(offset < buf->offset);
+ BUG_ON(offset & (csum_granularity - 1));
+ BUG_ON(offset + len != le16_to_cpu(v->sectors) &&
+ (len & (csum_granularity - 1)));
+
+ return bch2_checksum(NULL, v->csum_type,
+ null_nonce(),
+ buf->data[block] + ((offset - buf->offset) << 9),
+ len << 9);
+}
+
+static void ec_generate_checksums(struct ec_stripe_buf *buf)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned i, j, csums_per_device = stripe_csums_per_device(v);
+
+ if (!v->csum_type)
+ return;
+
+ BUG_ON(buf->offset);
+ BUG_ON(buf->size != le16_to_cpu(v->sectors));
+
+ for (i = 0; i < v->nr_blocks; i++)
+ for (j = 0; j < csums_per_device; j++)
+ stripe_csum_set(v, i, j,
+ ec_block_checksum(buf, i, j << v->csum_granularity_bits));
+}
+
+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned csum_granularity = 1 << v->csum_granularity_bits;
+ unsigned i;
+
+ if (!v->csum_type)
+ return;
+
+ for (i = 0; i < v->nr_blocks; i++) {
+ unsigned offset = buf->offset;
+ unsigned end = buf->offset + buf->size;
+
+ if (!test_bit(i, buf->valid))
+ continue;
+
+ while (offset < end) {
+ unsigned j = offset >> v->csum_granularity_bits;
+ unsigned len = min(csum_granularity, end - offset);
+ struct bch_csum want = stripe_csum_get(v, i, j);
+ struct bch_csum got = ec_block_checksum(buf, i, offset);
+
+ if (bch2_crc_cmp(want, got)) {
+ struct printbuf err = PRINTBUF;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
+
+ prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
+ want.hi, want.lo,
+ got.hi, got.lo,
+ bch2_csum_types[v->csum_type]);
+ prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
+ bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
+ bch_err_ratelimited(ca, "%s", err.buf);
+ printbuf_exit(&err);
+
+ clear_bit(i, buf->valid);
+
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+ break;
+ }
+
+ offset += len;
+ }
+ }
+}
+
+/* Erasure coding: */
+
+static void ec_generate_ec(struct ec_stripe_buf *buf)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned nr_data = v->nr_blocks - v->nr_redundant;
+ unsigned bytes = le16_to_cpu(v->sectors) << 9;
+
+ raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
+}
+
+static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+
+ return v->nr_blocks - bitmap_weight(buf->valid, v->nr_blocks);
+}
+
+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned i, failed[BCH_BKEY_PTRS_MAX], nr_failed = 0;
+ unsigned nr_data = v->nr_blocks - v->nr_redundant;
+ unsigned bytes = buf->size << 9;
+
+ if (ec_nr_failed(buf) > v->nr_redundant) {
+ bch_err_ratelimited(c,
+ "error doing reconstruct read: unable to read enough blocks");
+ return -1;
+ }
+
+ for (i = 0; i < nr_data; i++)
+ if (!test_bit(i, buf->valid))
+ failed[nr_failed++] = i;
+
+ raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data);
+ return 0;
+}
+
+/* IO: */
+
+static void ec_block_endio(struct bio *bio)
+{
+ struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio);
+ struct bch_stripe *v = &bkey_i_to_stripe(&ec_bio->buf->key)->v;
+ struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx];
+ struct bch_dev *ca = ec_bio->ca;
+ struct closure *cl = bio->bi_private;
+
+ if (bch2_dev_io_err_on(bio->bi_status, ca,
+ bio_data_dir(bio)
+ ? BCH_MEMBER_ERROR_write
+ : BCH_MEMBER_ERROR_read,
+ "erasure coding %s error: %s",
+ bio_data_dir(bio) ? "write" : "read",
+ bch2_blk_status_to_str(bio->bi_status)))
+ clear_bit(ec_bio->idx, ec_bio->buf->valid);
+
+ if (ptr_stale(ca, ptr)) {
+ bch_err_ratelimited(ca->fs,
+ "error %s stripe: stale pointer after io",
+ bio_data_dir(bio) == READ ? "reading from" : "writing to");
+ clear_bit(ec_bio->idx, ec_bio->buf->valid);
+ }
+
+ bio_put(&ec_bio->bio);
+ percpu_ref_put(&ca->io_ref);
+ closure_put(cl);
+}
+
+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
+ blk_opf_t opf, unsigned idx, struct closure *cl)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
+ unsigned offset = 0, bytes = buf->size << 9;
+ struct bch_extent_ptr *ptr = &v->ptrs[idx];
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ enum bch_data_type data_type = idx < v->nr_blocks - v->nr_redundant
+ ? BCH_DATA_user
+ : BCH_DATA_parity;
+ int rw = op_is_write(opf);
+
+ if (ptr_stale(ca, ptr)) {
+ bch_err_ratelimited(c,
+ "error %s stripe: stale pointer",
+ rw == READ ? "reading from" : "writing to");
+ clear_bit(idx, buf->valid);
+ return;
+ }
+
+ if (!bch2_dev_get_ioref(ca, rw)) {
+ clear_bit(idx, buf->valid);
+ return;
+ }
+
+ this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size);
+
+ while (offset < bytes) {
+ unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS,
+ DIV_ROUND_UP(bytes, PAGE_SIZE));
+ unsigned b = min_t(size_t, bytes - offset,
+ nr_iovecs << PAGE_SHIFT);
+ struct ec_bio *ec_bio;
+
+ ec_bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev,
+ nr_iovecs,
+ opf,
+ GFP_KERNEL,
+ &c->ec_bioset),
+ struct ec_bio, bio);
+
+ ec_bio->ca = ca;
+ ec_bio->buf = buf;
+ ec_bio->idx = idx;
+
+ ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9);
+ ec_bio->bio.bi_end_io = ec_block_endio;
+ ec_bio->bio.bi_private = cl;
+
+ bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
+
+ closure_get(cl);
+ percpu_ref_get(&ca->io_ref);
+
+ submit_bio(&ec_bio->bio);
+
+ offset += b;
+ }
+
+ percpu_ref_put(&ca->io_ref);
+}
+
+static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
+ struct ec_stripe_buf *stripe)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes,
+ POS(0, idx), BTREE_ITER_SLOTS);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+ if (k.k->type != KEY_TYPE_stripe) {
+ ret = -ENOENT;
+ goto err;
+ }
+ bkey_reassemble(&stripe->key, k);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* recovery read path: */
+int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio)
+{
+ struct bch_fs *c = trans->c;
+ struct ec_stripe_buf *buf;
+ struct closure cl;
+ struct bch_stripe *v;
+ unsigned i, offset;
+ int ret = 0;
+
+ closure_init_stack(&cl);
+
+ BUG_ON(!rbio->pick.has_ec);
+
+ buf = kzalloc(sizeof(*buf), GFP_NOFS);
+ if (!buf)
+ return -BCH_ERR_ENOMEM_ec_read_extent;
+
+ ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
+ if (ret) {
+ bch_err_ratelimited(c,
+ "error doing reconstruct read: error %i looking up stripe", ret);
+ kfree(buf);
+ return -EIO;
+ }
+
+ v = &bkey_i_to_stripe(&buf->key)->v;
+
+ if (!bch2_ptr_matches_stripe(v, rbio->pick)) {
+ bch_err_ratelimited(c,
+ "error doing reconstruct read: pointer doesn't match stripe");
+ ret = -EIO;
+ goto err;
+ }
+
+ offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset;
+ if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) {
+ bch_err_ratelimited(c,
+ "error doing reconstruct read: read is bigger than stripe");
+ ret = -EIO;
+ goto err;
+ }
+
+ ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
+ if (ret)
+ goto err;
+
+ for (i = 0; i < v->nr_blocks; i++)
+ ec_block_io(c, buf, REQ_OP_READ, i, &cl);
+
+ closure_sync(&cl);
+
+ if (ec_nr_failed(buf) > v->nr_redundant) {
+ bch_err_ratelimited(c,
+ "error doing reconstruct read: unable to read enough blocks");
+ ret = -EIO;
+ goto err;
+ }
+
+ ec_validate_checksums(c, buf);
+
+ ret = ec_do_recov(c, buf);
+ if (ret)
+ goto err;
+
+ memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter,
+ buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9));
+err:
+ ec_stripe_buf_exit(buf);
+ kfree(buf);
+ return ret;
+}
+
+/* stripe bucket accounting: */
+
+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
+{
+ ec_stripes_heap n, *h = &c->ec_stripes_heap;
+
+ if (idx >= h->size) {
+ if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
+
+ mutex_lock(&c->ec_stripes_heap_lock);
+ if (n.size > h->size) {
+ memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
+ n.used = h->used;
+ swap(*h, n);
+ }
+ mutex_unlock(&c->ec_stripes_heap_lock);
+
+ free_heap(&n);
+ }
+
+ if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
+
+ if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
+ !genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
+
+ return 0;
+}
+
+static int ec_stripe_mem_alloc(struct btree_trans *trans,
+ struct btree_iter *iter)
+{
+ return allocate_dropping_locks_errcode(trans,
+ __ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp));
+}
+
+/*
+ * Hash table of open stripes:
+ * Stripes that are being created or modified are kept in a hash table, so that
+ * stripe deletion can skip them.
+ */
+
+static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx)
+{
+ unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
+ struct ec_stripe_new *s;
+
+ hlist_for_each_entry(s, &c->ec_stripes_new[hash], hash)
+ if (s->idx == idx)
+ return true;
+ return false;
+}
+
+static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx)
+{
+ bool ret = false;
+
+ spin_lock(&c->ec_stripes_new_lock);
+ ret = __bch2_stripe_is_open(c, idx);
+ spin_unlock(&c->ec_stripes_new_lock);
+
+ return ret;
+}
+
+static bool bch2_try_open_stripe(struct bch_fs *c,
+ struct ec_stripe_new *s,
+ u64 idx)
+{
+ bool ret;
+
+ spin_lock(&c->ec_stripes_new_lock);
+ ret = !__bch2_stripe_is_open(c, idx);
+ if (ret) {
+ unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new)));
+
+ s->idx = idx;
+ hlist_add_head(&s->hash, &c->ec_stripes_new[hash]);
+ }
+ spin_unlock(&c->ec_stripes_new_lock);
+
+ return ret;
+}
+
+static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
+{
+ BUG_ON(!s->idx);
+
+ spin_lock(&c->ec_stripes_new_lock);
+ hlist_del_init(&s->hash);
+ spin_unlock(&c->ec_stripes_new_lock);
+
+ s->idx = 0;
+}
+
+/* Heap of all existing stripes, ordered by blocks_nonempty */
+
+static u64 stripe_idx_to_delete(struct bch_fs *c)
+{
+ ec_stripes_heap *h = &c->ec_stripes_heap;
+
+ lockdep_assert_held(&c->ec_stripes_heap_lock);
+
+ if (h->used &&
+ h->data[0].blocks_nonempty == 0 &&
+ !bch2_stripe_is_open(c, h->data[0].idx))
+ return h->data[0].idx;
+
+ return 0;
+}
+
+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
+ struct ec_stripe_heap_entry l,
+ struct ec_stripe_heap_entry r)
+{
+ return ((l.blocks_nonempty > r.blocks_nonempty) -
+ (l.blocks_nonempty < r.blocks_nonempty));
+}
+
+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
+ size_t i)
+{
+ struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
+
+ genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
+}
+
+static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
+{
+ ec_stripes_heap *h = &c->ec_stripes_heap;
+ struct stripe *m = genradix_ptr(&c->stripes, idx);
+
+ BUG_ON(m->heap_idx >= h->used);
+ BUG_ON(h->data[m->heap_idx].idx != idx);
+}
+
+void bch2_stripes_heap_del(struct bch_fs *c,
+ struct stripe *m, size_t idx)
+{
+ mutex_lock(&c->ec_stripes_heap_lock);
+ heap_verify_backpointer(c, idx);
+
+ heap_del(&c->ec_stripes_heap, m->heap_idx,
+ ec_stripes_heap_cmp,
+ ec_stripes_heap_set_backpointer);
+ mutex_unlock(&c->ec_stripes_heap_lock);
+}
+
+void bch2_stripes_heap_insert(struct bch_fs *c,
+ struct stripe *m, size_t idx)
+{
+ mutex_lock(&c->ec_stripes_heap_lock);
+ BUG_ON(heap_full(&c->ec_stripes_heap));
+
+ heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
+ .idx = idx,
+ .blocks_nonempty = m->blocks_nonempty,
+ }),
+ ec_stripes_heap_cmp,
+ ec_stripes_heap_set_backpointer);
+
+ heap_verify_backpointer(c, idx);
+ mutex_unlock(&c->ec_stripes_heap_lock);
+}
+
+void bch2_stripes_heap_update(struct bch_fs *c,
+ struct stripe *m, size_t idx)
+{
+ ec_stripes_heap *h = &c->ec_stripes_heap;
+ bool do_deletes;
+ size_t i;
+
+ mutex_lock(&c->ec_stripes_heap_lock);
+ heap_verify_backpointer(c, idx);
+
+ h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
+
+ i = m->heap_idx;
+ heap_sift_up(h, i, ec_stripes_heap_cmp,
+ ec_stripes_heap_set_backpointer);
+ heap_sift_down(h, i, ec_stripes_heap_cmp,
+ ec_stripes_heap_set_backpointer);
+
+ heap_verify_backpointer(c, idx);
+
+ do_deletes = stripe_idx_to_delete(c) != 0;
+ mutex_unlock(&c->ec_stripes_heap_lock);
+
+ if (do_deletes)
+ bch2_do_stripe_deletes(c);
+}
+
+/* stripe deletion */
+
+static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_stripe s;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, POS(0, idx),
+ BTREE_ITER_INTENT);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_stripe) {
+ bch2_fs_inconsistent(c, "attempting to delete nonexistent stripe %llu", idx);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ s = bkey_s_c_to_stripe(k);
+ for (unsigned i = 0; i < s.v->nr_blocks; i++)
+ if (stripe_blockcount_get(s.v, i)) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_fs_inconsistent(c, "attempting to delete nonempty stripe %s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = bch2_btree_delete_at(trans, &iter, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static void ec_stripe_delete_work(struct work_struct *work)
+{
+ struct bch_fs *c =
+ container_of(work, struct bch_fs, ec_stripe_delete_work);
+
+ while (1) {
+ mutex_lock(&c->ec_stripes_heap_lock);
+ u64 idx = stripe_idx_to_delete(c);
+ mutex_unlock(&c->ec_stripes_heap_lock);
+
+ if (!idx)
+ break;
+
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ ec_stripe_delete(trans, idx));
+ bch_err_fn(c, ret);
+ if (ret)
+ break;
+ }
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+}
+
+void bch2_do_stripe_deletes(struct bch_fs *c)
+{
+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
+ !queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+}
+
+/* stripe creation: */
+
+static int ec_stripe_key_update(struct btree_trans *trans,
+ struct bkey_i_stripe *new,
+ bool create)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes,
+ new->k.p, BTREE_ITER_INTENT);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != (create ? KEY_TYPE_deleted : KEY_TYPE_stripe)) {
+ bch2_fs_inconsistent(c, "error %s stripe: got existing key type %s",
+ create ? "creating" : "updating",
+ bch2_bkey_types[k.k->type]);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (k.k->type == KEY_TYPE_stripe) {
+ const struct bch_stripe *old = bkey_s_c_to_stripe(k).v;
+ unsigned i;
+
+ if (old->nr_blocks != new->v.nr_blocks) {
+ bch_err(c, "error updating stripe: nr_blocks does not match");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ for (i = 0; i < new->v.nr_blocks; i++) {
+ unsigned v = stripe_blockcount_get(old, i);
+
+ BUG_ON(v &&
+ (old->ptrs[i].dev != new->v.ptrs[i].dev ||
+ old->ptrs[i].gen != new->v.ptrs[i].gen ||
+ old->ptrs[i].offset != new->v.ptrs[i].offset));
+
+ stripe_blockcount_set(&new->v, i, v);
+ }
+ }
+
+ ret = bch2_trans_update(trans, &iter, &new->k_i, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int ec_stripe_update_extent(struct btree_trans *trans,
+ struct bpos bucket, u8 gen,
+ struct ec_stripe_buf *s,
+ struct bpos *bp_pos)
+{
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
+ struct bch_fs *c = trans->c;
+ struct bch_backpointer bp;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ const struct bch_extent_ptr *ptr_c;
+ struct bch_extent_ptr *ptr, *ec_ptr = NULL;
+ struct bch_extent_stripe_ptr stripe_ptr;
+ struct bkey_i *n;
+ int ret, dev, block;
+
+ ret = bch2_get_next_backpointer(trans, bucket, gen,
+ bp_pos, &bp, BTREE_ITER_CACHED);
+ if (ret)
+ return ret;
+ if (bpos_eq(*bp_pos, SPOS_MAX))
+ return 0;
+
+ if (bp.level) {
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter node_iter;
+ struct btree *b;
+
+ b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp);
+ bch2_trans_iter_exit(trans, &node_iter);
+
+ if (!b)
+ return 0;
+
+ prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
+ bch2_backpointer_to_text(&buf, &bp);
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -EIO;
+ }
+
+ k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_INTENT);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+ if (!k.k) {
+ /*
+ * extent no longer exists - we could flush the btree
+ * write buffer and retry to verify, but no need:
+ */
+ return 0;
+ }
+
+ if (extent_has_stripe_ptr(k, s->key.k.p.offset))
+ goto out;
+
+ ptr_c = bkey_matches_stripe(v, k, &block);
+ /*
+ * It doesn't generally make sense to erasure code cached ptrs:
+ * XXX: should we be incrementing a counter?
+ */
+ if (!ptr_c || ptr_c->cached)
+ goto out;
+
+ dev = v->ptrs[block].dev;
+
+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr));
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto out;
+
+ bkey_reassemble(n, k);
+
+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
+ ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev);
+ BUG_ON(!ec_ptr);
+
+ stripe_ptr = (struct bch_extent_stripe_ptr) {
+ .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr,
+ .block = block,
+ .redundancy = v->nr_redundant,
+ .idx = s->key.k.p.offset,
+ };
+
+ __extent_entry_insert(n,
+ (union bch_extent_entry *) ec_ptr,
+ (union bch_extent_entry *) &stripe_ptr);
+
+ ret = bch2_trans_update(trans, &iter, n, 0);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_buf *s,
+ unsigned block)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
+ struct bch_extent_ptr bucket = v->ptrs[block];
+ struct bpos bucket_pos = PTR_BUCKET_POS(c, &bucket);
+ struct bpos bp_pos = POS_MIN;
+ int ret = 0;
+
+ while (1) {
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc,
+ ec_stripe_update_extent(trans, bucket_pos, bucket.gen,
+ s, &bp_pos));
+ if (ret)
+ break;
+ if (bkey_eq(bp_pos, POS_MAX))
+ break;
+
+ bp_pos = bpos_nosnap_successor(bp_pos);
+ }
+
+ return ret;
+}
+
+static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
+ unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
+ int ret = 0;
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < nr_data; i++) {
+ ret = ec_stripe_update_bucket(trans, s, i);
+ if (ret)
+ break;
+ }
+err:
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
+ struct ec_stripe_new *s,
+ unsigned block,
+ struct open_bucket *ob)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+ unsigned offset = ca->mi.bucket_size - ob->sectors_free;
+ int ret;
+
+ if (!bch2_dev_get_ioref(ca, WRITE)) {
+ s->err = -BCH_ERR_erofs_no_writes;
+ return;
+ }
+
+ memset(s->new_stripe.data[block] + (offset << 9),
+ 0,
+ ob->sectors_free << 9);
+
+ ret = blkdev_issue_zeroout(ca->disk_sb.bdev,
+ ob->bucket * ca->mi.bucket_size + offset,
+ ob->sectors_free,
+ GFP_KERNEL, 0);
+
+ percpu_ref_put(&ca->io_ref);
+
+ if (ret)
+ s->err = ret;
+}
+
+void bch2_ec_stripe_new_free(struct bch_fs *c, struct ec_stripe_new *s)
+{
+ if (s->idx)
+ bch2_stripe_close(c, s);
+ kfree(s);
+}
+
+/*
+ * data buckets of new stripe all written: create the stripe
+ */
+static void ec_stripe_create(struct ec_stripe_new *s)
+{
+ struct bch_fs *c = s->c;
+ struct open_bucket *ob;
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
+ unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
+ int ret;
+
+ BUG_ON(s->h->s == s);
+
+ closure_sync(&s->iodone);
+
+ if (!s->err) {
+ for (i = 0; i < nr_data; i++)
+ if (s->blocks[i]) {
+ ob = c->open_buckets + s->blocks[i];
+
+ if (ob->sectors_free)
+ zero_out_rest_of_ec_bucket(c, s, i, ob);
+ }
+ }
+
+ if (s->err) {
+ if (!bch2_err_matches(s->err, EROFS))
+ bch_err(c, "error creating stripe: error writing data buckets");
+ goto err;
+ }
+
+ if (s->have_existing_stripe) {
+ ec_validate_checksums(c, &s->existing_stripe);
+
+ if (ec_do_recov(c, &s->existing_stripe)) {
+ bch_err(c, "error creating stripe: error reading existing stripe");
+ goto err;
+ }
+
+ for (i = 0; i < nr_data; i++)
+ if (stripe_blockcount_get(&bkey_i_to_stripe(&s->existing_stripe.key)->v, i))
+ swap(s->new_stripe.data[i],
+ s->existing_stripe.data[i]);
+
+ ec_stripe_buf_exit(&s->existing_stripe);
+ }
+
+ BUG_ON(!s->allocated);
+ BUG_ON(!s->idx);
+
+ ec_generate_ec(&s->new_stripe);
+
+ ec_generate_checksums(&s->new_stripe);
+
+ /* write p/q: */
+ for (i = nr_data; i < v->nr_blocks; i++)
+ ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
+ closure_sync(&s->iodone);
+
+ if (ec_nr_failed(&s->new_stripe)) {
+ bch_err(c, "error creating stripe: error writing redundancy buckets");
+ goto err;
+ }
+
+ ret = bch2_trans_do(c, &s->res, NULL,
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc,
+ ec_stripe_key_update(trans,
+ bkey_i_to_stripe(&s->new_stripe.key),
+ !s->have_existing_stripe));
+ bch_err_msg(c, ret, "creating stripe key");
+ if (ret) {
+ goto err;
+ }
+
+ ret = ec_stripe_update_extents(c, &s->new_stripe);
+ bch_err_msg(c, ret, "error updating extents");
+ if (ret)
+ goto err;
+err:
+ bch2_disk_reservation_put(c, &s->res);
+
+ for (i = 0; i < v->nr_blocks; i++)
+ if (s->blocks[i]) {
+ ob = c->open_buckets + s->blocks[i];
+
+ if (i < nr_data) {
+ ob->ec = NULL;
+ __bch2_open_bucket_put(c, ob);
+ } else {
+ bch2_open_bucket_put(c, ob);
+ }
+ }
+
+ mutex_lock(&c->ec_stripe_new_lock);
+ list_del(&s->list);
+ mutex_unlock(&c->ec_stripe_new_lock);
+ wake_up(&c->ec_stripe_new_wait);
+
+ ec_stripe_buf_exit(&s->existing_stripe);
+ ec_stripe_buf_exit(&s->new_stripe);
+ closure_debug_destroy(&s->iodone);
+
+ ec_stripe_new_put(c, s, STRIPE_REF_stripe);
+}
+
+static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c)
+{
+ struct ec_stripe_new *s;
+
+ mutex_lock(&c->ec_stripe_new_lock);
+ list_for_each_entry(s, &c->ec_stripe_new_list, list)
+ if (!atomic_read(&s->ref[STRIPE_REF_io]))
+ goto out;
+ s = NULL;
+out:
+ mutex_unlock(&c->ec_stripe_new_lock);
+
+ return s;
+}
+
+static void ec_stripe_create_work(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work,
+ struct bch_fs, ec_stripe_create_work);
+ struct ec_stripe_new *s;
+
+ while ((s = get_pending_stripe(c)))
+ ec_stripe_create(s);
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+}
+
+void bch2_ec_do_stripe_creates(struct bch_fs *c)
+{
+ bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
+
+ if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+}
+
+static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
+{
+ struct ec_stripe_new *s = h->s;
+
+ BUG_ON(!s->allocated && !s->err);
+
+ h->s = NULL;
+ s->pending = true;
+
+ mutex_lock(&c->ec_stripe_new_lock);
+ list_add(&s->list, &c->ec_stripe_new_list);
+ mutex_unlock(&c->ec_stripe_new_lock);
+
+ ec_stripe_new_put(c, s, STRIPE_REF_io);
+}
+
+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
+{
+ struct ec_stripe_new *s = ob->ec;
+
+ s->err = -EIO;
+}
+
+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
+{
+ struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs);
+ struct bch_dev *ca;
+ unsigned offset;
+
+ if (!ob)
+ return NULL;
+
+ BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]);
+
+ ca = bch_dev_bkey_exists(c, ob->dev);
+ offset = ca->mi.bucket_size - ob->sectors_free;
+
+ return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
+}
+
+static int unsigned_cmp(const void *_l, const void *_r)
+{
+ unsigned l = *((const unsigned *) _l);
+ unsigned r = *((const unsigned *) _r);
+
+ return cmp_int(l, r);
+}
+
+/* pick most common bucket size: */
+static unsigned pick_blocksize(struct bch_fs *c,
+ struct bch_devs_mask *devs)
+{
+ unsigned nr = 0, sizes[BCH_SB_MEMBERS_MAX];
+ struct {
+ unsigned nr, size;
+ } cur = { 0, 0 }, best = { 0, 0 };
+
+ for_each_member_device_rcu(c, ca, devs)
+ sizes[nr++] = ca->mi.bucket_size;
+
+ sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL);
+
+ for (unsigned i = 0; i < nr; i++) {
+ if (sizes[i] != cur.size) {
+ if (cur.nr > best.nr)
+ best = cur;
+
+ cur.nr = 0;
+ cur.size = sizes[i];
+ }
+
+ cur.nr++;
+ }
+
+ if (cur.nr > best.nr)
+ best = cur;
+
+ return best.size;
+}
+
+static bool may_create_new_stripe(struct bch_fs *c)
+{
+ return false;
+}
+
+static void ec_stripe_key_init(struct bch_fs *c,
+ struct bkey_i *k,
+ unsigned nr_data,
+ unsigned nr_parity,
+ unsigned stripe_size)
+{
+ struct bkey_i_stripe *s = bkey_stripe_init(k);
+ unsigned u64s;
+
+ s->v.sectors = cpu_to_le16(stripe_size);
+ s->v.algorithm = 0;
+ s->v.nr_blocks = nr_data + nr_parity;
+ s->v.nr_redundant = nr_parity;
+ s->v.csum_granularity_bits = ilog2(c->opts.encoded_extent_max >> 9);
+ s->v.csum_type = BCH_CSUM_crc32c;
+ s->v.pad = 0;
+
+ while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
+ BUG_ON(1 << s->v.csum_granularity_bits >=
+ le16_to_cpu(s->v.sectors) ||
+ s->v.csum_granularity_bits == U8_MAX);
+ s->v.csum_granularity_bits++;
+ }
+
+ set_bkey_val_u64s(&s->k, u64s);
+}
+
+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
+{
+ struct ec_stripe_new *s;
+
+ lockdep_assert_held(&h->lock);
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
+
+ mutex_init(&s->lock);
+ closure_init(&s->iodone, NULL);
+ atomic_set(&s->ref[STRIPE_REF_stripe], 1);
+ atomic_set(&s->ref[STRIPE_REF_io], 1);
+ s->c = c;
+ s->h = h;
+ s->nr_data = min_t(unsigned, h->nr_active_devs,
+ BCH_BKEY_PTRS_MAX) - h->redundancy;
+ s->nr_parity = h->redundancy;
+
+ ec_stripe_key_init(c, &s->new_stripe.key,
+ s->nr_data, s->nr_parity, h->blocksize);
+
+ h->s = s;
+ return 0;
+}
+
+static struct ec_stripe_head *
+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
+ unsigned algo, unsigned redundancy,
+ enum bch_watermark watermark)
+{
+ struct ec_stripe_head *h;
+
+ h = kzalloc(sizeof(*h), GFP_KERNEL);
+ if (!h)
+ return NULL;
+
+ mutex_init(&h->lock);
+ BUG_ON(!mutex_trylock(&h->lock));
+
+ h->target = target;
+ h->algo = algo;
+ h->redundancy = redundancy;
+ h->watermark = watermark;
+
+ rcu_read_lock();
+ h->devs = target_rw_devs(c, BCH_DATA_user, target);
+
+ for_each_member_device_rcu(c, ca, &h->devs)
+ if (!ca->mi.durability)
+ __clear_bit(ca->dev_idx, h->devs.d);
+
+ h->blocksize = pick_blocksize(c, &h->devs);
+
+ for_each_member_device_rcu(c, ca, &h->devs)
+ if (ca->mi.bucket_size == h->blocksize)
+ h->nr_active_devs++;
+
+ rcu_read_unlock();
+
+ /*
+ * If we only have redundancy + 1 devices, we're better off with just
+ * replication:
+ */
+ if (h->nr_active_devs < h->redundancy + 2)
+ bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+ h->nr_active_devs, h->redundancy + 2);
+
+ list_add(&h->list, &c->ec_stripe_head_list);
+ return h;
+}
+
+void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
+{
+ if (h->s &&
+ h->s->allocated &&
+ bitmap_weight(h->s->blocks_allocated,
+ h->s->nr_data) == h->s->nr_data)
+ ec_stripe_set_pending(c, h);
+
+ mutex_unlock(&h->lock);
+}
+
+static struct ec_stripe_head *
+__bch2_ec_stripe_head_get(struct btree_trans *trans,
+ unsigned target,
+ unsigned algo,
+ unsigned redundancy,
+ enum bch_watermark watermark)
+{
+ struct bch_fs *c = trans->c;
+ struct ec_stripe_head *h;
+ int ret;
+
+ if (!redundancy)
+ return NULL;
+
+ ret = bch2_trans_mutex_lock(trans, &c->ec_stripe_head_lock);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (test_bit(BCH_FS_going_ro, &c->flags)) {
+ h = ERR_PTR(-BCH_ERR_erofs_no_writes);
+ goto found;
+ }
+
+ list_for_each_entry(h, &c->ec_stripe_head_list, list)
+ if (h->target == target &&
+ h->algo == algo &&
+ h->redundancy == redundancy &&
+ h->watermark == watermark) {
+ ret = bch2_trans_mutex_lock(trans, &h->lock);
+ if (ret)
+ h = ERR_PTR(ret);
+ goto found;
+ }
+
+ h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
+found:
+ if (!IS_ERR_OR_NULL(h) &&
+ h->nr_active_devs < h->redundancy + 2) {
+ mutex_unlock(&h->lock);
+ h = NULL;
+ }
+ mutex_unlock(&c->ec_stripe_head_lock);
+ return h;
+}
+
+static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
+ enum bch_watermark watermark, struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_devs_mask devs = h->devs;
+ struct open_bucket *ob;
+ struct open_buckets buckets;
+ struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
+ unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
+ bool have_cache = true;
+ int ret = 0;
+
+ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
+ BUG_ON(v->nr_redundant != h->s->nr_parity);
+
+ for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
+ __clear_bit(v->ptrs[i].dev, devs.d);
+ if (i < h->s->nr_data)
+ nr_have_data++;
+ else
+ nr_have_parity++;
+ }
+
+ BUG_ON(nr_have_data > h->s->nr_data);
+ BUG_ON(nr_have_parity > h->s->nr_parity);
+
+ buckets.nr = 0;
+ if (nr_have_parity < h->s->nr_parity) {
+ ret = bch2_bucket_alloc_set_trans(trans, &buckets,
+ &h->parity_stripe,
+ &devs,
+ h->s->nr_parity,
+ &nr_have_parity,
+ &have_cache, 0,
+ BCH_DATA_parity,
+ watermark,
+ cl);
+
+ open_bucket_for_each(c, &buckets, ob, i) {
+ j = find_next_zero_bit(h->s->blocks_gotten,
+ h->s->nr_data + h->s->nr_parity,
+ h->s->nr_data);
+ BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
+
+ h->s->blocks[j] = buckets.v[i];
+ v->ptrs[j] = bch2_ob_ptr(c, ob);
+ __set_bit(j, h->s->blocks_gotten);
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ buckets.nr = 0;
+ if (nr_have_data < h->s->nr_data) {
+ ret = bch2_bucket_alloc_set_trans(trans, &buckets,
+ &h->block_stripe,
+ &devs,
+ h->s->nr_data,
+ &nr_have_data,
+ &have_cache, 0,
+ BCH_DATA_user,
+ watermark,
+ cl);
+
+ open_bucket_for_each(c, &buckets, ob, i) {
+ j = find_next_zero_bit(h->s->blocks_gotten,
+ h->s->nr_data, 0);
+ BUG_ON(j >= h->s->nr_data);
+
+ h->s->blocks[j] = buckets.v[i];
+ v->ptrs[j] = bch2_ob_ptr(c, ob);
+ __set_bit(j, h->s->blocks_gotten);
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* XXX: doesn't obey target: */
+static s64 get_existing_stripe(struct bch_fs *c,
+ struct ec_stripe_head *head)
+{
+ ec_stripes_heap *h = &c->ec_stripes_heap;
+ struct stripe *m;
+ size_t heap_idx;
+ u64 stripe_idx;
+ s64 ret = -1;
+
+ if (may_create_new_stripe(c))
+ return -1;
+
+ mutex_lock(&c->ec_stripes_heap_lock);
+ for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
+ /* No blocks worth reusing, stripe will just be deleted: */
+ if (!h->data[heap_idx].blocks_nonempty)
+ continue;
+
+ stripe_idx = h->data[heap_idx].idx;
+
+ m = genradix_ptr(&c->stripes, stripe_idx);
+
+ if (m->algorithm == head->algo &&
+ m->nr_redundant == head->redundancy &&
+ m->sectors == head->blocksize &&
+ m->blocks_nonempty < m->nr_blocks - m->nr_redundant &&
+ bch2_try_open_stripe(c, head->s, stripe_idx)) {
+ ret = stripe_idx;
+ break;
+ }
+ }
+ mutex_unlock(&c->ec_stripes_heap_lock);
+ return ret;
+}
+
+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
+ struct bch_stripe *existing_v;
+ unsigned i;
+ s64 idx;
+ int ret;
+
+ /*
+ * If we can't allocate a new stripe, and there's no stripes with empty
+ * blocks for us to reuse, that means we have to wait on copygc:
+ */
+ idx = get_existing_stripe(c, h);
+ if (idx < 0)
+ return -BCH_ERR_stripe_alloc_blocked;
+
+ ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
+ if (ret) {
+ bch2_stripe_close(c, h->s);
+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
+ return ret;
+ }
+
+ existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
+
+ BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
+ h->s->nr_data = existing_v->nr_blocks -
+ existing_v->nr_redundant;
+
+ ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
+ if (ret) {
+ bch2_stripe_close(c, h->s);
+ return ret;
+ }
+
+ BUG_ON(h->s->existing_stripe.size != h->blocksize);
+ BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
+
+ /*
+ * Free buckets we initially allocated - they might conflict with
+ * blocks from the stripe we're reusing:
+ */
+ for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
+ bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
+ h->s->blocks[i] = 0;
+ }
+ memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
+ memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
+
+ for (i = 0; i < existing_v->nr_blocks; i++) {
+ if (stripe_blockcount_get(existing_v, i)) {
+ __set_bit(i, h->s->blocks_gotten);
+ __set_bit(i, h->s->blocks_allocated);
+ }
+
+ ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+ }
+
+ bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
+ h->s->have_existing_stripe = true;
+
+ return 0;
+}
+
+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bpos min_pos = POS(0, 1);
+ struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
+ int ret;
+
+ if (!h->s->res.sectors) {
+ ret = bch2_disk_reservation_get(c, &h->s->res,
+ h->blocksize,
+ h->s->nr_parity,
+ BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ return ret;
+ }
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+ if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
+ if (start_pos.offset) {
+ start_pos = min_pos;
+ bch2_btree_iter_set_pos(&iter, start_pos);
+ continue;
+ }
+
+ ret = -BCH_ERR_ENOSPC_stripe_create;
+ break;
+ }
+
+ if (bkey_deleted(k.k) &&
+ bch2_try_open_stripe(c, h->s, k.k->p.offset))
+ break;
+ }
+
+ c->ec_stripe_hint = iter.pos.offset;
+
+ if (ret)
+ goto err;
+
+ ret = ec_stripe_mem_alloc(trans, &iter);
+ if (ret) {
+ bch2_stripe_close(c, h->s);
+ goto err;
+ }
+
+ h->s->new_stripe.key.k.p = iter.pos;
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+err:
+ bch2_disk_reservation_put(c, &h->s->res);
+ goto out;
+}
+
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
+ unsigned target,
+ unsigned algo,
+ unsigned redundancy,
+ enum bch_watermark watermark,
+ struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct ec_stripe_head *h;
+ bool waiting = false;
+ int ret;
+
+ h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
+ if (IS_ERR_OR_NULL(h))
+ return h;
+
+ if (!h->s) {
+ ret = ec_new_stripe_alloc(c, h);
+ if (ret) {
+ bch_err(c, "failed to allocate new stripe");
+ goto err;
+ }
+ }
+
+ if (h->s->allocated)
+ goto allocated;
+
+ if (h->s->have_existing_stripe)
+ goto alloc_existing;
+
+ /* First, try to allocate a full stripe: */
+ ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h);
+ if (!ret)
+ goto allocate_buf;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, ENOMEM))
+ goto err;
+
+ /*
+ * Not enough buckets available for a full stripe: we must reuse an
+ * existing stripe:
+ */
+ while (1) {
+ ret = __bch2_ec_stripe_head_reuse(trans, h);
+ if (!ret)
+ break;
+ if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
+ goto err;
+
+ if (watermark == BCH_WATERMARK_copygc) {
+ ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h);
+ if (ret)
+ goto err;
+ goto allocate_buf;
+ }
+
+ /* XXX freelist_wait? */
+ closure_wait(&c->freelist_wait, cl);
+ waiting = true;
+ }
+
+ if (waiting)
+ closure_wake_up(&c->freelist_wait);
+alloc_existing:
+ /*
+ * Retry allocating buckets, with the watermark for this
+ * particular write:
+ */
+ ret = new_stripe_alloc_buckets(trans, h, watermark, cl);
+ if (ret)
+ goto err;
+
+allocate_buf:
+ ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
+ if (ret)
+ goto err;
+
+ h->s->allocated = true;
+allocated:
+ BUG_ON(!h->s->idx);
+ BUG_ON(!h->s->new_stripe.data[0]);
+ BUG_ON(trans->restarted);
+ return h;
+err:
+ bch2_ec_stripe_head_put(c, h);
+ return ERR_PTR(ret);
+}
+
+static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
+{
+ struct ec_stripe_head *h;
+ struct open_bucket *ob;
+ unsigned i;
+
+ mutex_lock(&c->ec_stripe_head_lock);
+ list_for_each_entry(h, &c->ec_stripe_head_list, list) {
+ mutex_lock(&h->lock);
+ if (!h->s)
+ goto unlock;
+
+ if (!ca)
+ goto found;
+
+ for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) {
+ if (!h->s->blocks[i])
+ continue;
+
+ ob = c->open_buckets + h->s->blocks[i];
+ if (ob->dev == ca->dev_idx)
+ goto found;
+ }
+ goto unlock;
+found:
+ h->s->err = -BCH_ERR_erofs_no_writes;
+ ec_stripe_set_pending(c, h);
+unlock:
+ mutex_unlock(&h->lock);
+ }
+ mutex_unlock(&c->ec_stripe_head_lock);
+}
+
+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
+{
+ __bch2_ec_stop(c, ca);
+}
+
+void bch2_fs_ec_stop(struct bch_fs *c)
+{
+ __bch2_ec_stop(c, NULL);
+}
+
+static bool bch2_fs_ec_flush_done(struct bch_fs *c)
+{
+ bool ret;
+
+ mutex_lock(&c->ec_stripe_new_lock);
+ ret = list_empty(&c->ec_stripe_new_list);
+ mutex_unlock(&c->ec_stripe_new_lock);
+
+ return ret;
+}
+
+void bch2_fs_ec_flush(struct bch_fs *c)
+{
+ wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c));
+}
+
+int bch2_stripes_read(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
+ BTREE_ITER_PREFETCH, k, ({
+ if (k.k->type != KEY_TYPE_stripe)
+ continue;
+
+ ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
+ if (ret)
+ break;
+
+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+
+ struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset);
+ m->sectors = le16_to_cpu(s->sectors);
+ m->algorithm = s->algorithm;
+ m->nr_blocks = s->nr_blocks;
+ m->nr_redundant = s->nr_redundant;
+ m->blocks_nonempty = 0;
+
+ for (unsigned i = 0; i < s->nr_blocks; i++)
+ m->blocks_nonempty += !!stripe_blockcount_get(s, i);
+
+ bch2_stripes_heap_insert(c, m, k.k->p.offset);
+ 0;
+ })));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ ec_stripes_heap *h = &c->ec_stripes_heap;
+ struct stripe *m;
+ size_t i;
+
+ mutex_lock(&c->ec_stripes_heap_lock);
+ for (i = 0; i < min_t(size_t, h->used, 50); i++) {
+ m = genradix_ptr(&c->stripes, h->data[i].idx);
+
+ prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
+ h->data[i].blocks_nonempty,
+ m->nr_blocks - m->nr_redundant,
+ m->nr_redundant);
+ if (bch2_stripe_is_open(c, h->data[i].idx))
+ prt_str(out, " open");
+ prt_newline(out);
+ }
+ mutex_unlock(&c->ec_stripes_heap_lock);
+}
+
+void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct ec_stripe_head *h;
+ struct ec_stripe_new *s;
+
+ mutex_lock(&c->ec_stripe_head_lock);
+ list_for_each_entry(h, &c->ec_stripe_head_list, list) {
+ prt_printf(out, "target %u algo %u redundancy %u %s:\n",
+ h->target, h->algo, h->redundancy,
+ bch2_watermarks[h->watermark]);
+
+ if (h->s)
+ prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
+ h->s->idx, h->s->nr_data, h->s->nr_parity,
+ bitmap_weight(h->s->blocks_allocated,
+ h->s->nr_data));
+ }
+ mutex_unlock(&c->ec_stripe_head_lock);
+
+ prt_printf(out, "in flight:\n");
+
+ mutex_lock(&c->ec_stripe_new_lock);
+ list_for_each_entry(s, &c->ec_stripe_new_list, list) {
+ prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
+ s->idx, s->nr_data, s->nr_parity,
+ atomic_read(&s->ref[STRIPE_REF_io]),
+ atomic_read(&s->ref[STRIPE_REF_stripe]),
+ bch2_watermarks[s->h->watermark]);
+ }
+ mutex_unlock(&c->ec_stripe_new_lock);
+}
+
+void bch2_fs_ec_exit(struct bch_fs *c)
+{
+ struct ec_stripe_head *h;
+ unsigned i;
+
+ while (1) {
+ mutex_lock(&c->ec_stripe_head_lock);
+ h = list_first_entry_or_null(&c->ec_stripe_head_list,
+ struct ec_stripe_head, list);
+ if (h)
+ list_del(&h->list);
+ mutex_unlock(&c->ec_stripe_head_lock);
+ if (!h)
+ break;
+
+ if (h->s) {
+ for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++)
+ BUG_ON(h->s->blocks[i]);
+
+ kfree(h->s);
+ }
+ kfree(h);
+ }
+
+ BUG_ON(!list_empty(&c->ec_stripe_new_list));
+
+ free_heap(&c->ec_stripes_heap);
+ genradix_free(&c->stripes);
+ bioset_exit(&c->ec_bioset);
+}
+
+void bch2_fs_ec_init_early(struct bch_fs *c)
+{
+ spin_lock_init(&c->ec_stripes_new_lock);
+ mutex_init(&c->ec_stripes_heap_lock);
+
+ INIT_LIST_HEAD(&c->ec_stripe_head_list);
+ mutex_init(&c->ec_stripe_head_lock);
+
+ INIT_LIST_HEAD(&c->ec_stripe_new_list);
+ mutex_init(&c->ec_stripe_new_lock);
+ init_waitqueue_head(&c->ec_stripe_new_wait);
+
+ INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work);
+ INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work);
+}
+
+int bch2_fs_ec_init(struct bch_fs *c)
+{
+ return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
+ BIOSET_NEED_BVECS);
+}
diff --git a/c_src/libbcachefs/ec.h b/c_src/libbcachefs/ec.h
new file mode 100644
index 00000000..f4369b02
--- /dev/null
+++ b/c_src/libbcachefs/ec.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EC_H
+#define _BCACHEFS_EC_H
+
+#include "ec_types.h"
+#include "buckets_types.h"
+#include "extents_types.h"
+
+enum bkey_invalid_flags;
+
+int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define bch2_bkey_ops_stripe ((struct bkey_ops) { \
+ .key_invalid = bch2_stripe_invalid, \
+ .val_to_text = bch2_stripe_to_text, \
+ .swab = bch2_ptr_swab, \
+ .trigger = bch2_trigger_stripe, \
+ .min_val_size = 8, \
+})
+
+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
+{
+ return DIV_ROUND_UP(le16_to_cpu(s->sectors),
+ 1 << s->csum_granularity_bits);
+}
+
+static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
+ unsigned dev, unsigned csum_idx)
+{
+ unsigned csum_bytes = bch_crc_bytes[s->csum_type];
+
+ return sizeof(struct bch_stripe) +
+ sizeof(struct bch_extent_ptr) * s->nr_blocks +
+ (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes;
+}
+
+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s,
+ unsigned idx)
+{
+ return stripe_csum_offset(s, s->nr_blocks, 0) +
+ sizeof(u16) * idx;
+}
+
+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s,
+ unsigned idx)
+{
+ return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx));
+}
+
+static inline void stripe_blockcount_set(struct bch_stripe *s,
+ unsigned idx, unsigned v)
+{
+ __le16 *p = (void *) s + stripe_blockcount_offset(s, idx);
+
+ *p = cpu_to_le16(v);
+}
+
+static inline unsigned stripe_val_u64s(const struct bch_stripe *s)
+{
+ return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks),
+ sizeof(u64));
+}
+
+static inline void *stripe_csum(struct bch_stripe *s,
+ unsigned block, unsigned csum_idx)
+{
+ EBUG_ON(block >= s->nr_blocks);
+ EBUG_ON(csum_idx >= stripe_csums_per_device(s));
+
+ return (void *) s + stripe_csum_offset(s, block, csum_idx);
+}
+
+static inline struct bch_csum stripe_csum_get(struct bch_stripe *s,
+ unsigned block, unsigned csum_idx)
+{
+ struct bch_csum csum = { 0 };
+
+ memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]);
+ return csum;
+}
+
+static inline void stripe_csum_set(struct bch_stripe *s,
+ unsigned block, unsigned csum_idx,
+ struct bch_csum csum)
+{
+ memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
+}
+
+static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
+ const struct bch_extent_ptr *data_ptr,
+ unsigned sectors)
+{
+ return data_ptr->dev == stripe_ptr->dev &&
+ data_ptr->gen == stripe_ptr->gen &&
+ data_ptr->offset >= stripe_ptr->offset &&
+ data_ptr->offset < stripe_ptr->offset + sectors;
+}
+
+static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s,
+ struct extent_ptr_decoded p)
+{
+ unsigned nr_data = s->nr_blocks - s->nr_redundant;
+
+ BUG_ON(!p.has_ec);
+
+ if (p.ec.block >= nr_data)
+ return false;
+
+ return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr,
+ le16_to_cpu(s->sectors));
+}
+
+static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m,
+ struct extent_ptr_decoded p)
+{
+ unsigned nr_data = m->nr_blocks - m->nr_redundant;
+
+ BUG_ON(!p.has_ec);
+
+ if (p.ec.block >= nr_data)
+ return false;
+
+ return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr,
+ m->sectors);
+}
+
+struct bch_read_bio;
+
+struct ec_stripe_buf {
+ /* might not be buffering the entire stripe: */
+ unsigned offset;
+ unsigned size;
+ unsigned long valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
+
+ void *data[BCH_BKEY_PTRS_MAX];
+
+ __BKEY_PADDED(key, 255);
+};
+
+struct ec_stripe_head;
+
+enum ec_stripe_ref {
+ STRIPE_REF_io,
+ STRIPE_REF_stripe,
+ STRIPE_REF_NR
+};
+
+struct ec_stripe_new {
+ struct bch_fs *c;
+ struct ec_stripe_head *h;
+ struct mutex lock;
+ struct list_head list;
+
+ struct hlist_node hash;
+ u64 idx;
+
+ struct closure iodone;
+
+ atomic_t ref[STRIPE_REF_NR];
+
+ int err;
+
+ u8 nr_data;
+ u8 nr_parity;
+ bool allocated;
+ bool pending;
+ bool have_existing_stripe;
+
+ unsigned long blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
+ unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
+ open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX];
+ struct disk_reservation res;
+
+ struct ec_stripe_buf new_stripe;
+ struct ec_stripe_buf existing_stripe;
+};
+
+struct ec_stripe_head {
+ struct list_head list;
+ struct mutex lock;
+
+ unsigned target;
+ unsigned algo;
+ unsigned redundancy;
+ enum bch_watermark watermark;
+
+ struct bch_devs_mask devs;
+ unsigned nr_active_devs;
+
+ unsigned blocksize;
+
+ struct dev_stripe_state block_stripe;
+ struct dev_stripe_state parity_stripe;
+
+ struct ec_stripe_new *s;
+};
+
+int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *);
+
+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *);
+
+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
+
+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
+
+void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
+ unsigned, unsigned, unsigned,
+ enum bch_watermark, struct closure *);
+
+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
+
+void bch2_do_stripe_deletes(struct bch_fs *);
+void bch2_ec_do_stripe_creates(struct bch_fs *);
+void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *);
+
+static inline void ec_stripe_new_get(struct ec_stripe_new *s,
+ enum ec_stripe_ref ref)
+{
+ atomic_inc(&s->ref[ref]);
+}
+
+static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
+ enum ec_stripe_ref ref)
+{
+ BUG_ON(atomic_read(&s->ref[ref]) <= 0);
+
+ if (atomic_dec_and_test(&s->ref[ref]))
+ switch (ref) {
+ case STRIPE_REF_stripe:
+ bch2_ec_stripe_new_free(c, s);
+ break;
+ case STRIPE_REF_io:
+ bch2_ec_do_stripe_creates(c);
+ break;
+ default:
+ BUG();
+ }
+}
+
+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
+void bch2_fs_ec_stop(struct bch_fs *);
+void bch2_fs_ec_flush(struct bch_fs *);
+
+int bch2_stripes_read(struct bch_fs *);
+
+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
+void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_fs_ec_exit(struct bch_fs *);
+void bch2_fs_ec_init_early(struct bch_fs *);
+int bch2_fs_ec_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_EC_H */
diff --git a/c_src/libbcachefs/ec_types.h b/c_src/libbcachefs/ec_types.h
new file mode 100644
index 00000000..976426da
--- /dev/null
+++ b/c_src/libbcachefs/ec_types.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EC_TYPES_H
+#define _BCACHEFS_EC_TYPES_H
+
+#include "bcachefs_format.h"
+
+struct bch_replicas_padded {
+ struct bch_replicas_entry_v1 e;
+ u8 pad[BCH_BKEY_PTRS_MAX];
+};
+
+struct stripe {
+ size_t heap_idx;
+ u16 sectors;
+ u8 algorithm;
+ u8 nr_blocks;
+ u8 nr_redundant;
+ u8 blocks_nonempty;
+};
+
+struct gc_stripe {
+ u16 sectors;
+
+ u8 nr_blocks;
+ u8 nr_redundant;
+
+ unsigned alive:1; /* does a corresponding key exist in stripes btree? */
+ u16 block_sectors[BCH_BKEY_PTRS_MAX];
+ struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX];
+
+ struct bch_replicas_padded r;
+};
+
+struct ec_stripe_heap_entry {
+ size_t idx;
+ unsigned blocks_nonempty;
+};
+
+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
+
+#endif /* _BCACHEFS_EC_TYPES_H */
diff --git a/c_src/libbcachefs/errcode.c b/c_src/libbcachefs/errcode.c
new file mode 100644
index 00000000..d260ff9b
--- /dev/null
+++ b/c_src/libbcachefs/errcode.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "errcode.h"
+
+#include <linux/errname.h>
+
+static const char * const bch2_errcode_strs[] = {
+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = #err,
+ BCH_ERRCODES()
+#undef x
+ NULL
+};
+
+static unsigned bch2_errcode_parents[] = {
+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class,
+ BCH_ERRCODES()
+#undef x
+};
+
+const char *bch2_err_str(int err)
+{
+ const char *errstr;
+
+ err = abs(err);
+
+ BUG_ON(err >= BCH_ERR_MAX);
+
+ if (err >= BCH_ERR_START)
+ errstr = bch2_errcode_strs[err - BCH_ERR_START];
+ else if (err)
+ errstr = errname(err);
+ else
+ errstr = "(No error)";
+ return errstr ?: "(Invalid error)";
+}
+
+bool __bch2_err_matches(int err, int class)
+{
+ err = abs(err);
+ class = abs(class);
+
+ BUG_ON(err >= BCH_ERR_MAX);
+ BUG_ON(class >= BCH_ERR_MAX);
+
+ while (err >= BCH_ERR_START && err != class)
+ err = bch2_errcode_parents[err - BCH_ERR_START];
+
+ return err == class;
+}
+
+int __bch2_err_class(int err)
+{
+ err = -err;
+ BUG_ON((unsigned) err >= BCH_ERR_MAX);
+
+ while (err >= BCH_ERR_START && bch2_errcode_parents[err - BCH_ERR_START])
+ err = bch2_errcode_parents[err - BCH_ERR_START];
+
+ return -err;
+}
+
+const char *bch2_blk_status_to_str(blk_status_t status)
+{
+ if (status == BLK_STS_REMOVED)
+ return "device removed";
+ return blk_status_to_str(status);
+}
diff --git a/c_src/libbcachefs/errcode.h b/c_src/libbcachefs/errcode.h
new file mode 100644
index 00000000..8c40c206
--- /dev/null
+++ b/c_src/libbcachefs/errcode.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ERRCODE_H
+#define _BCACHEFS_ERRCODE_H
+
+#define BCH_ERRCODES() \
+ x(ERANGE, ERANGE_option_too_small) \
+ x(ERANGE, ERANGE_option_too_big) \
+ x(ENOMEM, ENOMEM_stripe_buf) \
+ x(ENOMEM, ENOMEM_replicas_table) \
+ x(ENOMEM, ENOMEM_cpu_replicas) \
+ x(ENOMEM, ENOMEM_replicas_gc) \
+ x(ENOMEM, ENOMEM_disk_groups_validate) \
+ x(ENOMEM, ENOMEM_disk_groups_to_cpu) \
+ x(ENOMEM, ENOMEM_mark_snapshot) \
+ x(ENOMEM, ENOMEM_mark_stripe) \
+ x(ENOMEM, ENOMEM_mark_stripe_ptr) \
+ x(ENOMEM, ENOMEM_btree_key_cache_create) \
+ x(ENOMEM, ENOMEM_btree_key_cache_fill) \
+ x(ENOMEM, ENOMEM_btree_key_cache_insert) \
+ x(ENOMEM, ENOMEM_trans_kmalloc) \
+ x(ENOMEM, ENOMEM_trans_log_msg) \
+ x(ENOMEM, ENOMEM_do_encrypt) \
+ x(ENOMEM, ENOMEM_ec_read_extent) \
+ x(ENOMEM, ENOMEM_ec_stripe_mem_alloc) \
+ x(ENOMEM, ENOMEM_ec_new_stripe_alloc) \
+ x(ENOMEM, ENOMEM_fs_btree_cache_init) \
+ x(ENOMEM, ENOMEM_fs_btree_key_cache_init) \
+ x(ENOMEM, ENOMEM_fs_counters_init) \
+ x(ENOMEM, ENOMEM_fs_btree_write_buffer_init) \
+ x(ENOMEM, ENOMEM_io_clock_init) \
+ x(ENOMEM, ENOMEM_blacklist_table_init) \
+ x(ENOMEM, ENOMEM_sb_realloc_injected) \
+ x(ENOMEM, ENOMEM_sb_bio_realloc) \
+ x(ENOMEM, ENOMEM_sb_buf_realloc) \
+ x(ENOMEM, ENOMEM_sb_journal_validate) \
+ x(ENOMEM, ENOMEM_sb_journal_v2_validate) \
+ x(ENOMEM, ENOMEM_journal_entry_add) \
+ x(ENOMEM, ENOMEM_journal_read_buf_realloc) \
+ x(ENOMEM, ENOMEM_btree_interior_update_worker_init)\
+ x(ENOMEM, ENOMEM_btree_interior_update_pool_init) \
+ x(ENOMEM, ENOMEM_bio_read_init) \
+ x(ENOMEM, ENOMEM_bio_read_split_init) \
+ x(ENOMEM, ENOMEM_bio_write_init) \
+ x(ENOMEM, ENOMEM_bio_bounce_pages_init) \
+ x(ENOMEM, ENOMEM_writepage_bioset_init) \
+ x(ENOMEM, ENOMEM_dio_read_bioset_init) \
+ x(ENOMEM, ENOMEM_dio_write_bioset_init) \
+ x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \
+ x(ENOMEM, ENOMEM_promote_table_init) \
+ x(ENOMEM, ENOMEM_compression_bounce_read_init) \
+ x(ENOMEM, ENOMEM_compression_bounce_write_init) \
+ x(ENOMEM, ENOMEM_compression_workspace_init) \
+ x(ENOMEM, ENOMEM_decompression_workspace_init) \
+ x(ENOMEM, ENOMEM_bucket_gens) \
+ x(ENOMEM, ENOMEM_buckets_nouse) \
+ x(ENOMEM, ENOMEM_usage_init) \
+ x(ENOMEM, ENOMEM_btree_node_read_all_replicas) \
+ x(ENOMEM, ENOMEM_btree_node_reclaim) \
+ x(ENOMEM, ENOMEM_btree_node_mem_alloc) \
+ x(ENOMEM, ENOMEM_btree_cache_cannibalize_lock) \
+ x(ENOMEM, ENOMEM_buckets_waiting_for_journal_init)\
+ x(ENOMEM, ENOMEM_buckets_waiting_for_journal_set) \
+ x(ENOMEM, ENOMEM_set_nr_journal_buckets) \
+ x(ENOMEM, ENOMEM_dev_journal_init) \
+ x(ENOMEM, ENOMEM_journal_pin_fifo) \
+ x(ENOMEM, ENOMEM_journal_buf) \
+ x(ENOMEM, ENOMEM_gc_start) \
+ x(ENOMEM, ENOMEM_gc_alloc_start) \
+ x(ENOMEM, ENOMEM_gc_reflink_start) \
+ x(ENOMEM, ENOMEM_gc_gens) \
+ x(ENOMEM, ENOMEM_gc_repair_key) \
+ x(ENOMEM, ENOMEM_fsck_extent_ends_at) \
+ x(ENOMEM, ENOMEM_fsck_add_nlink) \
+ x(ENOMEM, ENOMEM_journal_key_insert) \
+ x(ENOMEM, ENOMEM_journal_keys_sort) \
+ x(ENOMEM, ENOMEM_read_superblock_clean) \
+ x(ENOMEM, ENOMEM_fs_alloc) \
+ x(ENOMEM, ENOMEM_fs_name_alloc) \
+ x(ENOMEM, ENOMEM_fs_other_alloc) \
+ x(ENOMEM, ENOMEM_dev_alloc) \
+ x(ENOSPC, ENOSPC_disk_reservation) \
+ x(ENOSPC, ENOSPC_bucket_alloc) \
+ x(ENOSPC, ENOSPC_disk_label_add) \
+ x(ENOSPC, ENOSPC_stripe_create) \
+ x(ENOSPC, ENOSPC_inode_create) \
+ x(ENOSPC, ENOSPC_str_hash_create) \
+ x(ENOSPC, ENOSPC_snapshot_create) \
+ x(ENOSPC, ENOSPC_subvolume_create) \
+ x(ENOSPC, ENOSPC_sb) \
+ x(ENOSPC, ENOSPC_sb_journal) \
+ x(ENOSPC, ENOSPC_sb_journal_seq_blacklist) \
+ x(ENOSPC, ENOSPC_sb_quota) \
+ x(ENOSPC, ENOSPC_sb_replicas) \
+ x(ENOSPC, ENOSPC_sb_members) \
+ x(ENOSPC, ENOSPC_sb_members_v2) \
+ x(ENOSPC, ENOSPC_sb_crypt) \
+ x(ENOSPC, ENOSPC_sb_downgrade) \
+ x(ENOSPC, ENOSPC_btree_slot) \
+ x(ENOSPC, ENOSPC_snapshot_tree) \
+ x(ENOENT, ENOENT_bkey_type_mismatch) \
+ x(ENOENT, ENOENT_str_hash_lookup) \
+ x(ENOENT, ENOENT_str_hash_set_must_replace) \
+ x(ENOENT, ENOENT_inode) \
+ x(ENOENT, ENOENT_not_subvol) \
+ x(ENOENT, ENOENT_not_directory) \
+ x(ENOENT, ENOENT_directory_dead) \
+ x(ENOENT, ENOENT_subvolume) \
+ x(ENOENT, ENOENT_snapshot_tree) \
+ x(ENOENT, ENOENT_dirent_doesnt_match_inode) \
+ x(ENOENT, ENOENT_dev_not_found) \
+ x(ENOENT, ENOENT_dev_idx_not_found) \
+ x(0, open_buckets_empty) \
+ x(0, freelist_empty) \
+ x(BCH_ERR_freelist_empty, no_buckets_found) \
+ x(0, transaction_restart) \
+ x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \
+ x(BCH_ERR_transaction_restart, transaction_restart_relock) \
+ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \
+ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \
+ x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \
+ x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \
+ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \
+ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \
+ x(BCH_ERR_transaction_restart, transaction_restart_fill_mem_alloc_fail)\
+ x(BCH_ERR_transaction_restart, transaction_restart_mem_realloced) \
+ x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \
+ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \
+ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\
+ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\
+ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \
+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \
+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \
+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
+ x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
+ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
+ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \
+ x(BCH_ERR_transaction_restart, transaction_restart_nested) \
+ x(0, no_btree_node) \
+ x(BCH_ERR_no_btree_node, no_btree_node_relock) \
+ x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \
+ x(BCH_ERR_no_btree_node, no_btree_node_drop) \
+ x(BCH_ERR_no_btree_node, no_btree_node_lock_root) \
+ x(BCH_ERR_no_btree_node, no_btree_node_up) \
+ x(BCH_ERR_no_btree_node, no_btree_node_down) \
+ x(BCH_ERR_no_btree_node, no_btree_node_init) \
+ x(BCH_ERR_no_btree_node, no_btree_node_cached) \
+ x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \
+ x(0, btree_insert_fail) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
+ x(0, backpointer_to_overwritten_btree_node) \
+ x(0, lock_fail_root_changed) \
+ x(0, journal_reclaim_would_deadlock) \
+ x(EINVAL, fsck) \
+ x(BCH_ERR_fsck, fsck_fix) \
+ x(BCH_ERR_fsck, fsck_ignore) \
+ x(BCH_ERR_fsck, fsck_errors_not_fixed) \
+ x(BCH_ERR_fsck, fsck_repair_unimplemented) \
+ x(BCH_ERR_fsck, fsck_repair_impossible) \
+ x(0, restart_recovery) \
+ x(0, data_update_done) \
+ x(EINVAL, device_state_not_allowed) \
+ x(EINVAL, member_info_missing) \
+ x(EINVAL, mismatched_block_size) \
+ x(EINVAL, block_size_too_small) \
+ x(EINVAL, bucket_size_too_small) \
+ x(EINVAL, device_size_too_small) \
+ x(EINVAL, device_not_a_member_of_filesystem) \
+ x(EINVAL, device_has_been_removed) \
+ x(EINVAL, device_splitbrain) \
+ x(EINVAL, device_already_online) \
+ x(EINVAL, insufficient_devices_to_start) \
+ x(EINVAL, invalid) \
+ x(EINVAL, internal_fsck_err) \
+ x(EINVAL, opt_parse_error) \
+ x(EROFS, erofs_trans_commit) \
+ x(EROFS, erofs_no_writes) \
+ x(EROFS, erofs_journal_err) \
+ x(EROFS, erofs_sb_err) \
+ x(EROFS, erofs_unfixed_errors) \
+ x(EROFS, erofs_norecovery) \
+ x(EROFS, erofs_nochanges) \
+ x(EROFS, insufficient_devices) \
+ x(0, operation_blocked) \
+ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \
+ x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
+ x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
+ x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
+ x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \
+ x(BCH_ERR_invalid, invalid_sb) \
+ x(BCH_ERR_invalid_sb, invalid_sb_magic) \
+ x(BCH_ERR_invalid_sb, invalid_sb_version) \
+ x(BCH_ERR_invalid_sb, invalid_sb_features) \
+ x(BCH_ERR_invalid_sb, invalid_sb_too_big) \
+ x(BCH_ERR_invalid_sb, invalid_sb_csum_type) \
+ x(BCH_ERR_invalid_sb, invalid_sb_csum) \
+ x(BCH_ERR_invalid_sb, invalid_sb_block_size) \
+ x(BCH_ERR_invalid_sb, invalid_sb_uuid) \
+ x(BCH_ERR_invalid_sb, invalid_sb_too_many_members) \
+ x(BCH_ERR_invalid_sb, invalid_sb_dev_idx) \
+ x(BCH_ERR_invalid_sb, invalid_sb_time_precision) \
+ x(BCH_ERR_invalid_sb, invalid_sb_field_size) \
+ x(BCH_ERR_invalid_sb, invalid_sb_layout) \
+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_type) \
+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_nr_superblocks) \
+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_superblocks_overlap) \
+ x(BCH_ERR_invalid_sb, invalid_sb_members_missing) \
+ x(BCH_ERR_invalid_sb, invalid_sb_members) \
+ x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
+ x(BCH_ERR_invalid_sb, invalid_sb_replicas) \
+ x(BCH_ERR_invalid_sb, invalid_replicas_entry) \
+ x(BCH_ERR_invalid_sb, invalid_sb_journal) \
+ x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \
+ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \
+ x(BCH_ERR_invalid_sb, invalid_sb_clean) \
+ x(BCH_ERR_invalid_sb, invalid_sb_quota) \
+ x(BCH_ERR_invalid_sb, invalid_sb_errors) \
+ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
+ x(BCH_ERR_invalid_sb, invalid_sb_ext) \
+ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
+ x(BCH_ERR_invalid, invalid_bkey) \
+ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
+ x(EIO, btree_node_read_err) \
+ x(EIO, sb_not_downgraded) \
+ x(EIO, btree_write_all_failed) \
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_bad_node) \
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible) \
+ x(0, nopromote) \
+ x(BCH_ERR_nopromote, nopromote_may_not) \
+ x(BCH_ERR_nopromote, nopromote_already_promoted) \
+ x(BCH_ERR_nopromote, nopromote_unwritten) \
+ x(BCH_ERR_nopromote, nopromote_congested) \
+ x(BCH_ERR_nopromote, nopromote_in_flight) \
+ x(BCH_ERR_nopromote, nopromote_no_writes) \
+ x(BCH_ERR_nopromote, nopromote_enomem)
+
+enum bch_errcode {
+ BCH_ERR_START = 2048,
+#define x(class, err) BCH_ERR_##err,
+ BCH_ERRCODES()
+#undef x
+ BCH_ERR_MAX
+};
+
+const char *bch2_err_str(int);
+bool __bch2_err_matches(int, int);
+
+static inline bool _bch2_err_matches(int err, int class)
+{
+ return err < 0 && __bch2_err_matches(err, class);
+}
+
+#define bch2_err_matches(_err, _class) \
+({ \
+ BUILD_BUG_ON(!__builtin_constant_p(_class)); \
+ unlikely(_bch2_err_matches(_err, _class)); \
+})
+
+int __bch2_err_class(int);
+
+static inline long bch2_err_class(long err)
+{
+ return err < 0 ? __bch2_err_class(err) : err;
+}
+
+#define BLK_STS_REMOVED ((__force blk_status_t)128)
+
+const char *bch2_blk_status_to_str(blk_status_t);
+
+#endif /* _BCACHFES_ERRCODE_H */
diff --git a/c_src/libbcachefs/error.c b/c_src/libbcachefs/error.c
new file mode 100644
index 00000000..d32c8beb
--- /dev/null
+++ b/c_src/libbcachefs/error.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "error.h"
+#include "super.h"
+#include "thread_with_file.h"
+
+#define FSCK_ERR_RATELIMIT_NR 10
+
+bool bch2_inconsistent_error(struct bch_fs *c)
+{
+ set_bit(BCH_FS_error, &c->flags);
+
+ switch (c->opts.errors) {
+ case BCH_ON_ERROR_continue:
+ return false;
+ case BCH_ON_ERROR_ro:
+ if (bch2_fs_emergency_read_only(c))
+ bch_err(c, "inconsistency detected - emergency read only");
+ return true;
+ case BCH_ON_ERROR_panic:
+ panic(bch2_fmt(c, "panic after error"));
+ return true;
+ default:
+ BUG();
+ }
+}
+
+void bch2_topology_error(struct bch_fs *c)
+{
+ set_bit(BCH_FS_topology_error, &c->flags);
+ if (!test_bit(BCH_FS_fsck_running, &c->flags))
+ bch2_inconsistent_error(c);
+}
+
+void bch2_fatal_error(struct bch_fs *c)
+{
+ if (bch2_fs_emergency_read_only(c))
+ bch_err(c, "fatal error - emergency read only");
+}
+
+void bch2_io_error_work(struct work_struct *work)
+{
+ struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
+ struct bch_fs *c = ca->fs;
+ bool dev;
+
+ down_write(&c->state_lock);
+ dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro,
+ BCH_FORCE_IF_DEGRADED);
+ if (dev
+ ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
+ BCH_FORCE_IF_DEGRADED)
+ : bch2_fs_emergency_read_only(c))
+ bch_err(ca,
+ "too many IO errors, setting %s RO",
+ dev ? "device" : "filesystem");
+ up_write(&c->state_lock);
+}
+
+void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
+{
+ atomic64_inc(&ca->errors[type]);
+ //queue_work(system_long_wq, &ca->io_error_work);
+}
+
+enum ask_yn {
+ YN_NO,
+ YN_YES,
+ YN_ALLNO,
+ YN_ALLYES,
+};
+
+static enum ask_yn parse_yn_response(char *buf)
+{
+ buf = strim(buf);
+
+ if (strlen(buf) == 1)
+ switch (buf[0]) {
+ case 'n':
+ return YN_NO;
+ case 'y':
+ return YN_YES;
+ case 'N':
+ return YN_ALLNO;
+ case 'Y':
+ return YN_ALLYES;
+ }
+ return -1;
+}
+
+#ifdef __KERNEL__
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+{
+ struct stdio_redirect *stdio = c->stdio;
+
+ if (c->stdio_filter && c->stdio_filter != current)
+ stdio = NULL;
+
+ if (!stdio)
+ return YN_NO;
+
+ char buf[100];
+ int ret;
+
+ do {
+ bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
+
+ int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
+ if (r < 0)
+ return YN_NO;
+ buf[r] = '\0';
+ } while ((ret = parse_yn_response(buf)) < 0);
+
+ return ret;
+}
+#else
+
+#include "tools-util.h"
+
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+{
+ char *buf = NULL;
+ size_t buflen = 0;
+ int ret;
+
+ do {
+ fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
+ fflush(stdout);
+
+ if (getline(&buf, &buflen, stdin) < 0)
+ die("error reading from standard input");
+ } while ((ret = parse_yn_response(buf)) < 0);
+
+ free(buf);
+ return ret;
+}
+
+#endif
+
+static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
+{
+ struct fsck_err_state *s;
+
+ if (!test_bit(BCH_FS_fsck_running, &c->flags))
+ return NULL;
+
+ list_for_each_entry(s, &c->fsck_error_msgs, list)
+ if (s->fmt == fmt) {
+ /*
+ * move it to the head of the list: repeated fsck errors
+ * are common
+ */
+ list_move(&s->list, &c->fsck_error_msgs);
+ return s;
+ }
+
+ s = kzalloc(sizeof(*s), GFP_NOFS);
+ if (!s) {
+ if (!c->fsck_alloc_msgs_err)
+ bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
+ c->fsck_alloc_msgs_err = true;
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&s->list);
+ s->fmt = fmt;
+ list_add(&s->list, &c->fsck_error_msgs);
+ return s;
+}
+
+int bch2_fsck_err(struct bch_fs *c,
+ enum bch_fsck_flags flags,
+ enum bch_sb_error_id err,
+ const char *fmt, ...)
+{
+ struct fsck_err_state *s = NULL;
+ va_list args;
+ bool print = true, suppressing = false, inconsistent = false;
+ struct printbuf buf = PRINTBUF, *out = &buf;
+ int ret = -BCH_ERR_fsck_ignore;
+
+ if ((flags & FSCK_CAN_FIX) &&
+ test_bit(err, c->sb.errors_silent))
+ return -BCH_ERR_fsck_fix;
+
+ bch2_sb_error_count(c, err);
+
+ va_start(args, fmt);
+ prt_vprintf(out, fmt, args);
+ va_end(args);
+
+ mutex_lock(&c->fsck_error_msgs_lock);
+ s = fsck_err_get(c, fmt);
+ if (s) {
+ /*
+ * We may be called multiple times for the same error on
+ * transaction restart - this memoizes instead of asking the user
+ * multiple times for the same error:
+ */
+ if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
+ ret = s->ret;
+ mutex_unlock(&c->fsck_error_msgs_lock);
+ printbuf_exit(&buf);
+ return ret;
+ }
+
+ kfree(s->last_msg);
+ s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
+
+ if (c->opts.ratelimit_errors &&
+ !(flags & FSCK_NO_RATELIMIT) &&
+ s->nr >= FSCK_ERR_RATELIMIT_NR) {
+ if (s->nr == FSCK_ERR_RATELIMIT_NR)
+ suppressing = true;
+ else
+ print = false;
+ }
+
+ s->nr++;
+ }
+
+#ifdef BCACHEFS_LOG_PREFIX
+ if (!strncmp(fmt, "bcachefs:", 9))
+ prt_printf(out, bch2_log_msg(c, ""));
+#endif
+
+ if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if (c->opts.errors != BCH_ON_ERROR_continue ||
+ !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
+ prt_str(out, ", shutting down");
+ inconsistent = true;
+ ret = -BCH_ERR_fsck_errors_not_fixed;
+ } else if (flags & FSCK_CAN_FIX) {
+ prt_str(out, ", fixing");
+ ret = -BCH_ERR_fsck_fix;
+ } else {
+ prt_str(out, ", continuing");
+ ret = -BCH_ERR_fsck_ignore;
+ }
+ } else if (c->opts.fix_errors == FSCK_FIX_exit) {
+ prt_str(out, ", exiting");
+ ret = -BCH_ERR_fsck_errors_not_fixed;
+ } else if (flags & FSCK_CAN_FIX) {
+ int fix = s && s->fix
+ ? s->fix
+ : c->opts.fix_errors;
+
+ if (fix == FSCK_FIX_ask) {
+ int ask;
+
+ prt_str(out, ": fix?");
+ if (bch2_fs_stdio_redirect(c))
+ bch2_print(c, "%s", out->buf);
+ else
+ bch2_print_string_as_lines(KERN_ERR, out->buf);
+ print = false;
+
+ ask = bch2_fsck_ask_yn(c);
+
+ if (ask >= YN_ALLNO && s)
+ s->fix = ask == YN_ALLNO
+ ? FSCK_FIX_no
+ : FSCK_FIX_yes;
+
+ ret = ask & 1
+ ? -BCH_ERR_fsck_fix
+ : -BCH_ERR_fsck_ignore;
+ } else if (fix == FSCK_FIX_yes ||
+ (c->opts.nochanges &&
+ !(flags & FSCK_CAN_IGNORE))) {
+ prt_str(out, ", fixing");
+ ret = -BCH_ERR_fsck_fix;
+ } else {
+ prt_str(out, ", not fixing");
+ }
+ } else if (flags & FSCK_NEED_FSCK) {
+ prt_str(out, " (run fsck to correct)");
+ } else {
+ prt_str(out, " (repair unimplemented)");
+ }
+
+ if (ret == -BCH_ERR_fsck_ignore &&
+ (c->opts.fix_errors == FSCK_FIX_exit ||
+ !(flags & FSCK_CAN_IGNORE)))
+ ret = -BCH_ERR_fsck_errors_not_fixed;
+
+ if (print) {
+ if (bch2_fs_stdio_redirect(c))
+ bch2_print(c, "%s\n", out->buf);
+ else
+ bch2_print_string_as_lines(KERN_ERR, out->buf);
+ }
+
+ if (test_bit(BCH_FS_fsck_running, &c->flags) &&
+ (ret != -BCH_ERR_fsck_fix &&
+ ret != -BCH_ERR_fsck_ignore))
+ bch_err(c, "Unable to continue, halting");
+ else if (suppressing)
+ bch_err(c, "Ratelimiting new instances of previous error");
+
+ if (s)
+ s->ret = ret;
+
+ mutex_unlock(&c->fsck_error_msgs_lock);
+
+ printbuf_exit(&buf);
+
+ if (inconsistent)
+ bch2_inconsistent_error(c);
+
+ if (ret == -BCH_ERR_fsck_fix) {
+ set_bit(BCH_FS_errors_fixed, &c->flags);
+ } else {
+ set_bit(BCH_FS_errors_not_fixed, &c->flags);
+ set_bit(BCH_FS_error, &c->flags);
+ }
+
+ return ret;
+}
+
+void bch2_flush_fsck_errs(struct bch_fs *c)
+{
+ struct fsck_err_state *s, *n;
+
+ mutex_lock(&c->fsck_error_msgs_lock);
+
+ list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
+ if (s->ratelimited && s->last_msg)
+ bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
+
+ list_del(&s->list);
+ kfree(s->last_msg);
+ kfree(s);
+ }
+
+ mutex_unlock(&c->fsck_error_msgs_lock);
+}
diff --git a/c_src/libbcachefs/error.h b/c_src/libbcachefs/error.h
new file mode 100644
index 00000000..fec17d13
--- /dev/null
+++ b/c_src/libbcachefs/error.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ERROR_H
+#define _BCACHEFS_ERROR_H
+
+#include <linux/list.h>
+#include <linux/printk.h>
+#include "sb-errors.h"
+
+struct bch_dev;
+struct bch_fs;
+struct work_struct;
+
+/*
+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag
+ * superblock as such
+ */
+
+/* Error messages: */
+
+/*
+ * Inconsistency errors: The on disk data is inconsistent. If these occur during
+ * initial recovery, they don't indicate a bug in the running code - we walk all
+ * the metadata before modifying anything. If they occur at runtime, they
+ * indicate either a bug in the running code or (less likely) data is being
+ * silently corrupted under us.
+ *
+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in
+ * BCH_ON_ERROR_CONTINUE mode
+ */
+
+bool bch2_inconsistent_error(struct bch_fs *);
+
+void bch2_topology_error(struct bch_fs *);
+
+#define bch2_fs_inconsistent(c, ...) \
+({ \
+ bch_err(c, __VA_ARGS__); \
+ bch2_inconsistent_error(c); \
+})
+
+#define bch2_fs_inconsistent_on(cond, c, ...) \
+({ \
+ bool _ret = unlikely(!!(cond)); \
+ \
+ if (_ret) \
+ bch2_fs_inconsistent(c, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * Later we might want to mark only the particular device inconsistent, not the
+ * entire filesystem:
+ */
+
+#define bch2_dev_inconsistent(ca, ...) \
+do { \
+ bch_err(ca, __VA_ARGS__); \
+ bch2_inconsistent_error((ca)->fs); \
+} while (0)
+
+#define bch2_dev_inconsistent_on(cond, ca, ...) \
+({ \
+ bool _ret = unlikely(!!(cond)); \
+ \
+ if (_ret) \
+ bch2_dev_inconsistent(ca, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * When a transaction update discovers or is causing a fs inconsistency, it's
+ * helpful to also dump the pending updates:
+ */
+#define bch2_trans_inconsistent(trans, ...) \
+({ \
+ bch_err(trans->c, __VA_ARGS__); \
+ bch2_dump_trans_updates(trans); \
+ bch2_inconsistent_error(trans->c); \
+})
+
+#define bch2_trans_inconsistent_on(cond, trans, ...) \
+({ \
+ bool _ret = unlikely(!!(cond)); \
+ \
+ if (_ret) \
+ bch2_trans_inconsistent(trans, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally
+ * be able to repair:
+ */
+
+struct fsck_err_state {
+ struct list_head list;
+ const char *fmt;
+ u64 nr;
+ bool ratelimited;
+ int ret;
+ int fix;
+ char *last_msg;
+};
+
+enum bch_fsck_flags {
+ FSCK_CAN_FIX = 1 << 0,
+ FSCK_CAN_IGNORE = 1 << 1,
+ FSCK_NEED_FSCK = 1 << 2,
+ FSCK_NO_RATELIMIT = 1 << 3,
+};
+
+#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
+
+__printf(4, 5) __cold
+int bch2_fsck_err(struct bch_fs *,
+ enum bch_fsck_flags,
+ enum bch_sb_error_id,
+ const char *, ...);
+void bch2_flush_fsck_errs(struct bch_fs *);
+
+#define __fsck_err(c, _flags, _err_type, ...) \
+({ \
+ int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type, \
+ __VA_ARGS__); \
+ \
+ if (_ret != -BCH_ERR_fsck_fix && \
+ _ret != -BCH_ERR_fsck_ignore) { \
+ ret = _ret; \
+ goto fsck_err; \
+ } \
+ \
+ _ret == -BCH_ERR_fsck_fix; \
+})
+
+/* These macros return true if error should be fixed: */
+
+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
+
+#define __fsck_err_on(cond, c, _flags, _err_type, ...) \
+ (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false)
+
+#define need_fsck_err_on(cond, c, _err_type, ...) \
+ __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
+
+#define need_fsck_err(c, _err_type, ...) \
+ __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__)
+
+#define mustfix_fsck_err(c, _err_type, ...) \
+ __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
+
+#define mustfix_fsck_err_on(cond, c, _err_type, ...) \
+ __fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__)
+
+#define fsck_err(c, _err_type, ...) \
+ __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
+
+#define fsck_err_on(cond, c, _err_type, ...) \
+ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
+
+__printf(4, 0)
+static inline void bch2_bkey_fsck_err(struct bch_fs *c,
+ struct printbuf *err_msg,
+ enum bch_sb_error_id err_type,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ prt_vprintf(err_msg, fmt, args);
+ va_end(args);
+}
+
+#define bkey_fsck_err(c, _err_msg, _err_type, ...) \
+do { \
+ prt_printf(_err_msg, __VA_ARGS__); \
+ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \
+ ret = -BCH_ERR_invalid_bkey; \
+ goto fsck_err; \
+} while (0)
+
+#define bkey_fsck_err_on(cond, ...) \
+do { \
+ if (unlikely(cond)) \
+ bkey_fsck_err(__VA_ARGS__); \
+} while (0)
+
+/*
+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW
+ * mode - pretty much just due to metadata IO errors:
+ */
+
+void bch2_fatal_error(struct bch_fs *);
+
+#define bch2_fs_fatal_error(c, ...) \
+do { \
+ bch_err(c, __VA_ARGS__); \
+ bch2_fatal_error(c); \
+} while (0)
+
+#define bch2_fs_fatal_err_on(cond, c, ...) \
+({ \
+ bool _ret = unlikely(!!(cond)); \
+ \
+ if (_ret) \
+ bch2_fs_fatal_error(c, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
+ * IO errors: either recoverable metadata IO (because we have replicas), or data
+ * IO - we need to log it and print out a message, but we don't (necessarily)
+ * want to shut down the fs:
+ */
+
+void bch2_io_error_work(struct work_struct *);
+
+/* Does the error handling without logging a message */
+void bch2_io_error(struct bch_dev *, enum bch_member_error_type);
+
+#define bch2_dev_io_err_on(cond, ca, _type, ...) \
+({ \
+ bool _ret = (cond); \
+ \
+ if (_ret) { \
+ bch_err_dev_ratelimited(ca, __VA_ARGS__); \
+ bch2_io_error(ca, _type); \
+ } \
+ _ret; \
+})
+
+#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \
+({ \
+ bool _ret = (cond); \
+ \
+ if (_ret) { \
+ bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \
+ bch2_io_error(ca, _type); \
+ } \
+ _ret; \
+})
+
+#endif /* _BCACHEFS_ERROR_H */
diff --git a/c_src/libbcachefs/extent_update.c b/c_src/libbcachefs/extent_update.c
new file mode 100644
index 00000000..b9033bb4
--- /dev/null
+++ b/c_src/libbcachefs/extent_update.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "debug.h"
+#include "extents.h"
+#include "extent_update.h"
+
+/*
+ * This counts the number of iterators to the alloc & ec btrees we'll need
+ * inserting/removing this extent:
+ */
+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ unsigned ret = 0, lru = 0;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ switch (__extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ /* Might also be updating LRU btree */
+ if (entry->ptr.cached)
+ lru++;
+
+ fallthrough;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ ret++;
+ }
+ }
+
+ /*
+ * Updating keys in the alloc btree may also update keys in the
+ * freespace or discard btrees:
+ */
+ return lru + ret * 2;
+}
+
+static int count_iters_for_insert(struct btree_trans *trans,
+ struct bkey_s_c k,
+ unsigned offset,
+ struct bpos *end,
+ unsigned *nr_iters,
+ unsigned max_iters)
+{
+ int ret = 0, ret2 = 0;
+
+ if (*nr_iters >= max_iters) {
+ *end = bpos_min(*end, k.k->p);
+ ret = 1;
+ }
+
+ switch (k.k->type) {
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ *nr_iters += bch2_bkey_nr_alloc_ptrs(k);
+
+ if (*nr_iters >= max_iters) {
+ *end = bpos_min(*end, k.k->p);
+ ret = 1;
+ }
+
+ break;
+ case KEY_TYPE_reflink_p: {
+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+ u64 idx = le64_to_cpu(p.v->idx);
+ unsigned sectors = bpos_min(*end, p.k->p).offset -
+ bkey_start_offset(p.k);
+ struct btree_iter iter;
+ struct bkey_s_c r_k;
+
+ for_each_btree_key_norestart(trans, iter,
+ BTREE_ID_reflink, POS(0, idx + offset),
+ BTREE_ITER_SLOTS, r_k, ret2) {
+ if (bkey_ge(bkey_start_pos(r_k.k), POS(0, idx + sectors)))
+ break;
+
+ /* extent_update_to_keys(), for the reflink_v update */
+ *nr_iters += 1;
+
+ *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
+
+ if (*nr_iters >= max_iters) {
+ struct bpos pos = bkey_start_pos(k.k);
+ pos.offset += min_t(u64, k.k->size,
+ r_k.k->p.offset - idx);
+
+ *end = bpos_min(*end, pos);
+ ret = 1;
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ break;
+ }
+ }
+
+ return ret2 ?: ret;
+}
+
+#define EXTENT_ITERS_MAX (BTREE_ITER_INITIAL / 3)
+
+int bch2_extent_atomic_end(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct bpos *end)
+{
+ struct btree_iter copy;
+ struct bkey_s_c k;
+ unsigned nr_iters = 0;
+ int ret;
+
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ return ret;
+
+ *end = insert->k.p;
+
+ /* extent_update_to_keys(): */
+ nr_iters += 1;
+
+ ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
+ &nr_iters, EXTENT_ITERS_MAX / 2);
+ if (ret < 0)
+ return ret;
+
+ bch2_trans_copy_iter(&copy, iter);
+
+ for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) {
+ unsigned offset = 0;
+
+ if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k)))
+ offset = bkey_start_offset(&insert->k) -
+ bkey_start_offset(k.k);
+
+ /* extent_handle_overwrites(): */
+ switch (bch2_extent_overlap(&insert->k, k.k)) {
+ case BCH_EXTENT_OVERLAP_ALL:
+ case BCH_EXTENT_OVERLAP_FRONT:
+ nr_iters += 1;
+ break;
+ case BCH_EXTENT_OVERLAP_BACK:
+ case BCH_EXTENT_OVERLAP_MIDDLE:
+ nr_iters += 2;
+ break;
+ }
+
+ ret = count_iters_for_insert(trans, k, offset, end,
+ &nr_iters, EXTENT_ITERS_MAX);
+ if (ret)
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &copy);
+ return ret < 0 ? ret : 0;
+}
+
+int bch2_extent_trim_atomic(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *k)
+{
+ struct bpos end;
+ int ret;
+
+ ret = bch2_extent_atomic_end(trans, iter, k, &end);
+ if (ret)
+ return ret;
+
+ bch2_cut_back(end, k);
+ return 0;
+}
diff --git a/c_src/libbcachefs/extent_update.h b/c_src/libbcachefs/extent_update.h
new file mode 100644
index 00000000..6f5cf449
--- /dev/null
+++ b/c_src/libbcachefs/extent_update.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENT_UPDATE_H
+#define _BCACHEFS_EXTENT_UPDATE_H
+
+#include "bcachefs.h"
+
+int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, struct bpos *);
+int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *);
+
+#endif /* _BCACHEFS_EXTENT_UPDATE_H */
diff --git a/c_src/libbcachefs/extents.c b/c_src/libbcachefs/extents.c
new file mode 100644
index 00000000..82ec056f
--- /dev/null
+++ b/c_src/libbcachefs/extents.c
@@ -0,0 +1,1507 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * Code for managing the extent btree and dynamically updating the writeback
+ * dirty sector count.
+ */
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "btree_gc.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "compress.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "error.h"
+#include "extents.h"
+#include "inode.h"
+#include "journal.h"
+#include "replicas.h"
+#include "super.h"
+#include "super-io.h"
+#include "trace.h"
+#include "util.h"
+
+static unsigned bch2_crc_field_size_max[] = {
+ [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
+ [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
+ [BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX,
+};
+
+static void bch2_extent_crc_pack(union bch_extent_crc *,
+ struct bch_extent_crc_unpacked,
+ enum bch_extent_entry_type);
+
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+ unsigned dev)
+{
+ struct bch_dev_io_failures *i;
+
+ for (i = f->devs; i < f->devs + f->nr; i++)
+ if (i->dev == dev)
+ return i;
+
+ return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+ struct extent_ptr_decoded *p)
+{
+ struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+ f = &failed->devs[failed->nr++];
+ f->dev = p->ptr.dev;
+ f->idx = p->idx;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else if (p->idx != f->idx) {
+ f->idx = p->idx;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else {
+ f->nr_failed++;
+ }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+ const struct extent_ptr_decoded p1,
+ const struct extent_ptr_decoded p2)
+{
+ if (likely(!p1.idx && !p2.idx)) {
+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
+ u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
+ u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
+
+ /* Pick at random, biased in favor of the faster device: */
+
+ return bch2_rand_range(l1 + l2) > l1;
+ }
+
+ if (bch2_force_reconstruct_read)
+ return p1.idx > p2.idx;
+
+ return p1.idx < p2.idx;
+}
+
+/*
+ * This picks a non-stale pointer, preferably from a device other than @avoid.
+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
+ * other devices, it will still pick a pointer from avoid.
+ */
+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_io_failures *failed,
+ struct extent_ptr_decoded *pick)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bch_dev_io_failures *f;
+ struct bch_dev *ca;
+ int ret = 0;
+
+ if (k.k->type == KEY_TYPE_error)
+ return -EIO;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ /*
+ * Unwritten extent: no need to actually read, treat it as a
+ * hole and return 0s:
+ */
+ if (p.ptr.unwritten)
+ return 0;
+
+ ca = bch_dev_bkey_exists(c, p.ptr.dev);
+
+ /*
+ * If there are any dirty pointers it's an error if we can't
+ * read:
+ */
+ if (!ret && !p.ptr.cached)
+ ret = -EIO;
+
+ if (p.ptr.cached && ptr_stale(ca, &p.ptr))
+ continue;
+
+ f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+ if (f)
+ p.idx = f->nr_failed < f->nr_retries
+ ? f->idx
+ : f->idx + 1;
+
+ if (!p.idx &&
+ !bch2_dev_is_readable(ca))
+ p.idx++;
+
+ if (bch2_force_reconstruct_read &&
+ !p.idx && p.has_ec)
+ p.idx++;
+
+ if (p.idx >= (unsigned) p.has_ec + 1)
+ continue;
+
+ if (ret > 0 && !ptr_better(c, p, *pick))
+ continue;
+
+ *pick = p;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+/* KEY_TYPE_btree_ptr: */
+
+int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err,
+ btree_ptr_val_too_big,
+ "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
+
+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+ return ret;
+}
+
+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
+ btree_ptr_v2_val_too_big,
+ "value too big (%zu > %zu)",
+ bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
+
+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
+fsck_err:
+ return ret;
+}
+
+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
+
+ prt_printf(out, "seq %llx written %u min_key %s",
+ le64_to_cpu(bp.v->seq),
+ le16_to_cpu(bp.v->sectors_written),
+ BTREE_PTR_RANGE_UPDATED(bp.v) ? "R " : "");
+
+ bch2_bpos_to_text(out, bp.v->min_key);
+ prt_printf(out, " ");
+ bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
+ unsigned big_endian, int write,
+ struct bkey_s k)
+{
+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
+
+ compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_id_is_extents(btree_id) &&
+ !bkey_eq(bp.v->min_key, POS_MIN))
+ bp.v->min_key = write
+ ? bpos_nosnap_predecessor(bp.v->min_key)
+ : bpos_nosnap_successor(bp.v->min_key);
+}
+
+/* KEY_TYPE_extent: */
+
+bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
+{
+ struct bkey_ptrs l_ptrs = bch2_bkey_ptrs(l);
+ struct bkey_ptrs_c r_ptrs = bch2_bkey_ptrs_c(r);
+ union bch_extent_entry *en_l;
+ const union bch_extent_entry *en_r;
+ struct extent_ptr_decoded lp, rp;
+ bool use_right_ptr;
+ struct bch_dev *ca;
+
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
+ if (extent_entry_type(en_l) != extent_entry_type(en_r))
+ return false;
+
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
+
+ if (en_l < l_ptrs.end || en_r < r_ptrs.end)
+ return false;
+
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ lp.crc = bch2_extent_crc_unpack(l.k, NULL);
+ rp.crc = bch2_extent_crc_unpack(r.k, NULL);
+
+ while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) &&
+ __bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) {
+ if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
+ rp.ptr.offset + rp.crc.offset ||
+ lp.ptr.dev != rp.ptr.dev ||
+ lp.ptr.gen != rp.ptr.gen ||
+ lp.ptr.unwritten != rp.ptr.unwritten ||
+ lp.has_ec != rp.has_ec)
+ return false;
+
+ /* Extents may not straddle buckets: */
+ ca = bch_dev_bkey_exists(c, lp.ptr.dev);
+ if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr))
+ return false;
+
+ if (lp.has_ec != rp.has_ec ||
+ (lp.has_ec &&
+ (lp.ec.block != rp.ec.block ||
+ lp.ec.redundancy != rp.ec.redundancy ||
+ lp.ec.idx != rp.ec.idx)))
+ return false;
+
+ if (lp.crc.compression_type != rp.crc.compression_type ||
+ lp.crc.nonce != rp.crc.nonce)
+ return false;
+
+ if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
+ lp.crc.uncompressed_size) {
+ /* can use left extent's crc entry */
+ } else if (lp.crc.live_size <= rp.crc.offset) {
+ /* can use right extent's crc entry */
+ } else {
+ /* check if checksums can be merged: */
+ if (lp.crc.csum_type != rp.crc.csum_type ||
+ lp.crc.nonce != rp.crc.nonce ||
+ crc_is_compressed(lp.crc) ||
+ !bch2_checksum_mergeable(lp.crc.csum_type))
+ return false;
+
+ if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size ||
+ rp.crc.offset)
+ return false;
+
+ if (lp.crc.csum_type &&
+ lp.crc.uncompressed_size +
+ rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9))
+ return false;
+ }
+
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
+
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
+ if (extent_entry_is_crc(en_l)) {
+ struct bch_extent_crc_unpacked crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+ struct bch_extent_crc_unpacked crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+ if (crc_l.uncompressed_size + crc_r.uncompressed_size >
+ bch2_crc_field_size_max[extent_entry_type(en_l)])
+ return false;
+ }
+
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
+
+ use_right_ptr = false;
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ while (en_l < l_ptrs.end) {
+ if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr &&
+ use_right_ptr)
+ en_l->ptr = en_r->ptr;
+
+ if (extent_entry_is_crc(en_l)) {
+ struct bch_extent_crc_unpacked crc_l =
+ bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+ struct bch_extent_crc_unpacked crc_r =
+ bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+ use_right_ptr = false;
+
+ if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
+ crc_l.uncompressed_size) {
+ /* can use left extent's crc entry */
+ } else if (crc_l.live_size <= crc_r.offset) {
+ /* can use right extent's crc entry */
+ crc_r.offset -= crc_l.live_size;
+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
+ extent_entry_type(en_l));
+ use_right_ptr = true;
+ } else {
+ crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
+ crc_l.csum,
+ crc_r.csum,
+ crc_r.uncompressed_size << 9);
+
+ crc_l.uncompressed_size += crc_r.uncompressed_size;
+ crc_l.compressed_size += crc_r.compressed_size;
+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
+ extent_entry_type(en_l));
+ }
+ }
+
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
+
+ bch2_key_resize(l.k, l.k->size + r.k->size);
+ return true;
+}
+
+/* KEY_TYPE_reservation: */
+
+int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err,
+ reservation_key_nr_replicas_invalid,
+ "invalid nr_replicas (%u)", r.v->nr_replicas);
+fsck_err:
+ return ret;
+}
+
+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ prt_printf(out, "generation %u replicas %u",
+ le32_to_cpu(r.v->generation),
+ r.v->nr_replicas);
+}
+
+bool bch2_reservation_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
+{
+ struct bkey_s_reservation l = bkey_s_to_reservation(_l);
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(_r);
+
+ if (l.v->generation != r.v->generation ||
+ l.v->nr_replicas != r.v->nr_replicas)
+ return false;
+
+ bch2_key_resize(l.k, l.k->size + r.k->size);
+ return true;
+}
+
+/* Extent checksum entries: */
+
+/* returns true if not equal */
+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
+ struct bch_extent_crc_unpacked r)
+{
+ return (l.csum_type != r.csum_type ||
+ l.compression_type != r.compression_type ||
+ l.compressed_size != r.compressed_size ||
+ l.uncompressed_size != r.uncompressed_size ||
+ l.offset != r.offset ||
+ l.live_size != r.live_size ||
+ l.nonce != r.nonce ||
+ bch2_crc_cmp(l.csum, r.csum));
+}
+
+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
+ struct bch_extent_crc_unpacked n)
+{
+ return !crc_is_compressed(u) &&
+ u.csum_type &&
+ u.uncompressed_size > u.live_size &&
+ bch2_csum_type_is_encryption(u.csum_type) ==
+ bch2_csum_type_is_encryption(n.csum_type);
+}
+
+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k,
+ struct bch_extent_crc_unpacked n)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *i;
+
+ if (!n.csum_type)
+ return false;
+
+ bkey_for_each_crc(k.k, ptrs, crc, i)
+ if (can_narrow_crc(crc, n))
+ return true;
+
+ return false;
+}
+
+/*
+ * We're writing another replica for this extent, so while we've got the data in
+ * memory we'll be computing a new checksum for the currently live data.
+ *
+ * If there are other replicas we aren't moving, and they are checksummed but
+ * not compressed, we can modify them to point to only the data that is
+ * currently live (so that readers won't have to bounce) while we've got the
+ * checksum we need:
+ */
+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+ struct bch_extent_crc_unpacked u;
+ struct extent_ptr_decoded p;
+ union bch_extent_entry *i;
+ bool ret = false;
+
+ /* Find a checksum entry that covers only live data: */
+ if (!n.csum_type) {
+ bkey_for_each_crc(&k->k, ptrs, u, i)
+ if (!crc_is_compressed(u) &&
+ u.csum_type &&
+ u.live_size == u.uncompressed_size) {
+ n = u;
+ goto found;
+ }
+ return false;
+ }
+found:
+ BUG_ON(crc_is_compressed(n));
+ BUG_ON(n.offset);
+ BUG_ON(n.live_size != k->k.size);
+
+restart_narrow_pointers:
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+
+ bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
+ if (can_narrow_crc(p.crc, n)) {
+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(k), &i->ptr);
+ p.ptr.offset += p.crc.offset;
+ p.crc = n;
+ bch2_extent_ptr_decoded_append(k, &p);
+ ret = true;
+ goto restart_narrow_pointers;
+ }
+
+ return ret;
+}
+
+static void bch2_extent_crc_pack(union bch_extent_crc *dst,
+ struct bch_extent_crc_unpacked src,
+ enum bch_extent_entry_type type)
+{
+#define set_common_fields(_dst, _src) \
+ _dst.type = 1 << type; \
+ _dst.csum_type = _src.csum_type, \
+ _dst.compression_type = _src.compression_type, \
+ _dst._compressed_size = _src.compressed_size - 1, \
+ _dst._uncompressed_size = _src.uncompressed_size - 1, \
+ _dst.offset = _src.offset
+
+ switch (type) {
+ case BCH_EXTENT_ENTRY_crc32:
+ set_common_fields(dst->crc32, src);
+ dst->crc32.csum = (u32 __force) *((__le32 *) &src.csum.lo);
+ break;
+ case BCH_EXTENT_ENTRY_crc64:
+ set_common_fields(dst->crc64, src);
+ dst->crc64.nonce = src.nonce;
+ dst->crc64.csum_lo = (u64 __force) src.csum.lo;
+ dst->crc64.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi);
+ break;
+ case BCH_EXTENT_ENTRY_crc128:
+ set_common_fields(dst->crc128, src);
+ dst->crc128.nonce = src.nonce;
+ dst->crc128.csum = src.csum;
+ break;
+ default:
+ BUG();
+ }
+#undef set_common_fields
+}
+
+void bch2_extent_crc_append(struct bkey_i *k,
+ struct bch_extent_crc_unpacked new)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+ union bch_extent_crc *crc = (void *) ptrs.end;
+ enum bch_extent_entry_type type;
+
+ if (bch_crc_bytes[new.csum_type] <= 4 &&
+ new.uncompressed_size <= CRC32_SIZE_MAX &&
+ new.nonce <= CRC32_NONCE_MAX)
+ type = BCH_EXTENT_ENTRY_crc32;
+ else if (bch_crc_bytes[new.csum_type] <= 10 &&
+ new.uncompressed_size <= CRC64_SIZE_MAX &&
+ new.nonce <= CRC64_NONCE_MAX)
+ type = BCH_EXTENT_ENTRY_crc64;
+ else if (bch_crc_bytes[new.csum_type] <= 16 &&
+ new.uncompressed_size <= CRC128_SIZE_MAX &&
+ new.nonce <= CRC128_NONCE_MAX)
+ type = BCH_EXTENT_ENTRY_crc128;
+ else
+ BUG();
+
+ bch2_extent_crc_pack(crc, new, type);
+
+ k->k.u64s += extent_entry_u64s(ptrs.end);
+
+ EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX);
+}
+
+/* Generic code for keys with pointers: */
+
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
+{
+ return bch2_bkey_devs(k).nr;
+}
+
+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k)
+{
+ return k.k->type == KEY_TYPE_reservation
+ ? bkey_s_c_to_reservation(k).v->nr_replicas
+ : bch2_bkey_dirty_devs(k).nr;
+}
+
+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k)
+{
+ unsigned ret = 0;
+
+ if (k.k->type == KEY_TYPE_reservation) {
+ ret = bkey_s_c_to_reservation(k).v->nr_replicas;
+ } else {
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ ret += !p.ptr.cached && !crc_is_compressed(p.crc);
+ }
+
+ return ret;
+}
+
+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned ret = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (!p.ptr.cached && crc_is_compressed(p.crc))
+ ret += p.crc.compressed_size;
+
+ return ret;
+}
+
+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct bch_extent_crc_unpacked crc;
+
+ bkey_for_each_crc(k.k, ptrs, crc, entry)
+ if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+ return true;
+ return false;
+}
+
+unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p = { 0 };
+ unsigned replicas = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.ptr.cached)
+ continue;
+
+ if (p.has_ec)
+ replicas += p.ec.redundancy;
+
+ replicas++;
+
+ }
+
+ return replicas;
+}
+
+static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
+{
+ if (p->ptr.cached)
+ return 0;
+
+ return p->has_ec
+ ? p->ec.redundancy + 1
+ : ca->mi.durability;
+}
+
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
+
+ return __extent_ptr_durability(ca, p);
+}
+
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
+
+ if (ca->mi.state == BCH_MEMBER_STATE_failed)
+ return 0;
+
+ return __extent_ptr_durability(ca, p);
+}
+
+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned durability = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ durability += bch2_extent_ptr_durability(c, &p);
+
+ return durability;
+}
+
+static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned durability = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (p.ptr.dev < c->sb.nr_devices && c->devs[p.ptr.dev])
+ durability += bch2_extent_ptr_durability(c, &p);
+
+ return durability;
+}
+
+void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
+{
+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
+ union bch_extent_entry *next = extent_entry_next(entry);
+
+ memmove_u64s(entry, next, (u64 *) end - (u64 *) next);
+ k->k.u64s -= extent_entry_u64s(entry);
+}
+
+void bch2_extent_ptr_decoded_append(struct bkey_i *k,
+ struct extent_ptr_decoded *p)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+ struct bch_extent_crc_unpacked crc =
+ bch2_extent_crc_unpack(&k->k, NULL);
+ union bch_extent_entry *pos;
+
+ if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
+ pos = ptrs.start;
+ goto found;
+ }
+
+ bkey_for_each_crc(&k->k, ptrs, crc, pos)
+ if (!bch2_crc_unpacked_cmp(crc, p->crc)) {
+ pos = extent_entry_next(pos);
+ goto found;
+ }
+
+ bch2_extent_crc_append(k, p->crc);
+ pos = bkey_val_end(bkey_i_to_s(k));
+found:
+ p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+ __extent_entry_insert(k, pos, to_entry(&p->ptr));
+
+ if (p->has_ec) {
+ p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr;
+ __extent_entry_insert(k, pos, to_entry(&p->ec));
+ }
+}
+
+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
+ union bch_extent_entry *entry)
+{
+ union bch_extent_entry *i = ptrs.start;
+
+ if (i == entry)
+ return NULL;
+
+ while (extent_entry_next(i) != entry)
+ i = extent_entry_next(i);
+ return i;
+}
+
+/*
+ * Returns pointer to the next entry after the one being dropped:
+ */
+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ union bch_extent_entry *entry = to_entry(ptr), *next;
+ union bch_extent_entry *ret = entry;
+ bool drop_crc = true;
+
+ EBUG_ON(ptr < &ptrs.start->ptr ||
+ ptr >= &ptrs.end->ptr);
+ EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
+
+ for (next = extent_entry_next(entry);
+ next != ptrs.end;
+ next = extent_entry_next(next)) {
+ if (extent_entry_is_crc(next)) {
+ break;
+ } else if (extent_entry_is_ptr(next)) {
+ drop_crc = false;
+ break;
+ }
+ }
+
+ extent_entry_drop(k, entry);
+
+ while ((entry = extent_entry_prev(ptrs, entry))) {
+ if (extent_entry_is_ptr(entry))
+ break;
+
+ if ((extent_entry_is_crc(entry) && drop_crc) ||
+ extent_entry_is_stripe_ptr(entry)) {
+ ret = (void *) ret - extent_entry_bytes(entry);
+ extent_entry_drop(k, entry);
+ }
+ }
+
+ return ret;
+}
+
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
+{
+ bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
+ union bch_extent_entry *ret =
+ bch2_bkey_drop_ptr_noerror(k, ptr);
+
+ /*
+ * If we deleted all the dirty pointers and there's still cached
+ * pointers, we could set the cached pointers to dirty if they're not
+ * stale - but to do that correctly we'd need to grab an open_bucket
+ * reference so that we don't race with bucket reuse:
+ */
+ if (have_dirty &&
+ !bch2_bkey_dirty_devs(k.s_c).nr) {
+ k.k->type = KEY_TYPE_error;
+ set_bkey_val_u64s(k.k, 0);
+ ret = NULL;
+ } else if (!bch2_bkey_nr_ptrs(k.s_c)) {
+ k.k->type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(k.k, 0);
+ ret = NULL;
+ }
+
+ return ret;
+}
+
+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
+{
+ struct bch_extent_ptr *ptr;
+
+ bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
+}
+
+void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
+{
+ struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev);
+
+ if (ptr)
+ bch2_bkey_drop_ptr_noerror(k, ptr);
+}
+
+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->dev == dev)
+ return ptr;
+
+ return NULL;
+}
+
+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (bch2_dev_in_target(c, ptr->dev, target) &&
+ (!ptr->cached ||
+ !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
+ return true;
+
+ return false;
+}
+
+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_extent_ptr m, u64 offset)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (p.ptr.dev == m.dev &&
+ p.ptr.gen == m.gen &&
+ (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) ==
+ (s64) m.offset - offset)
+ return true;
+
+ return false;
+}
+
+/*
+ * Returns true if two extents refer to the same data:
+ */
+bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
+{
+ if (k1.k->type != k2.k->type)
+ return false;
+
+ if (bkey_extent_is_direct_data(k1.k)) {
+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1);
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry1, *entry2;
+ struct extent_ptr_decoded p1, p2;
+
+ if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
+ return false;
+
+ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+ } else {
+ /* KEY_TYPE_deleted, etc. */
+ return true;
+ }
+}
+
+struct bch_extent_ptr *
+bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bkey_s k2)
+{
+ struct bkey_ptrs ptrs2 = bch2_bkey_ptrs(k2);
+ union bch_extent_entry *entry2;
+ struct extent_ptr_decoded p2;
+
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return &entry2->ptr;
+
+ return NULL;
+}
+
+void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ union bch_extent_entry *entry;
+ union bch_extent_entry *ec = NULL;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (&entry->ptr == ptr) {
+ ptr->cached = true;
+ if (ec)
+ extent_entry_drop(k, ec);
+ return;
+ }
+
+ if (extent_entry_is_stripe_ptr(entry))
+ ec = entry;
+ else if (extent_entry_is_ptr(entry))
+ ec = NULL;
+ }
+
+ BUG();
+}
+
+/*
+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
+ *
+ * Returns true if @k should be dropped entirely
+ *
+ * For existing keys, only called when btree nodes are being rewritten, not when
+ * they're merely being compacted/resorted in memory.
+ */
+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
+{
+ struct bch_extent_ptr *ptr;
+
+ bch2_bkey_drop_ptrs(k, ptr,
+ ptr->cached &&
+ ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
+
+ return bkey_deleted(k.k);
+}
+
+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ bool first = true;
+
+ if (c)
+ prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (!first)
+ prt_printf(out, " ");
+
+ switch (__extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr: {
+ const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+ ? bch_dev_bkey_exists(c, ptr->dev)
+ : NULL;
+
+ if (!ca) {
+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ (u64) ptr->offset, ptr->gen,
+ ptr->cached ? " cached" : "");
+ } else {
+ u32 offset;
+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+ prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ ptr->dev, b, offset, ptr->gen);
+ if (ptr->cached)
+ prt_str(out, " cached");
+ if (ptr->unwritten)
+ prt_str(out, " unwritten");
+ if (ca && ptr_stale(ca, ptr))
+ prt_printf(out, " stale");
+ }
+ break;
+ }
+ case BCH_EXTENT_ENTRY_crc32:
+ case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128: {
+ struct bch_extent_crc_unpacked crc =
+ bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+
+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
+ crc.compressed_size,
+ crc.uncompressed_size,
+ crc.offset, crc.nonce,
+ bch2_csum_types[crc.csum_type],
+ bch2_compression_types[crc.compression_type]);
+ break;
+ }
+ case BCH_EXTENT_ENTRY_stripe_ptr: {
+ const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr;
+
+ prt_printf(out, "ec: idx %llu block %u",
+ (u64) ec->idx, ec->block);
+ break;
+ }
+ case BCH_EXTENT_ENTRY_rebalance: {
+ const struct bch_extent_rebalance *r = &entry->rebalance;
+
+ prt_str(out, "rebalance: target ");
+ if (c)
+ bch2_target_to_text(out, c, r->target);
+ else
+ prt_printf(out, "%u", r->target);
+ prt_str(out, " compression ");
+ bch2_compression_opt_to_text(out, r->compression);
+ break;
+ }
+ default:
+ prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
+ return;
+ }
+
+ first = false;
+ }
+}
+
+static int extent_ptr_invalid(struct bch_fs *c,
+ struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ const struct bch_extent_ptr *ptr,
+ unsigned size_ondisk,
+ bool metadata,
+ struct printbuf *err)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ u64 bucket;
+ u32 bucket_offset;
+ struct bch_dev *ca;
+ int ret = 0;
+
+ if (!bch2_dev_exists2(c, ptr->dev)) {
+ /*
+ * If we're in the write path this key might have already been
+ * overwritten, and we could be seeing a device that doesn't
+ * exist anymore due to racing with device removal:
+ */
+ if (flags & BKEY_INVALID_WRITE)
+ return 0;
+
+ bkey_fsck_err(c, err, ptr_to_invalid_device,
+ "pointer to invalid device (%u)", ptr->dev);
+ }
+
+ ca = bch_dev_bkey_exists(c, ptr->dev);
+ bkey_for_each_ptr(ptrs, ptr2)
+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err,
+ ptr_to_duplicate_device,
+ "multiple pointers to same device (%u)", ptr->dev);
+
+ bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset);
+
+ bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err,
+ ptr_after_last_bucket,
+ "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets);
+ bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err,
+ ptr_before_first_bucket,
+ "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket);
+ bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err,
+ ptr_spans_multiple_buckets,
+ "pointer spans multiple buckets (%u + %u > %u)",
+ bucket_offset, size_ondisk, ca->mi.bucket_size);
+fsck_err:
+ return ret;
+}
+
+int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct bch_extent_crc_unpacked crc;
+ unsigned size_ondisk = k.k->size;
+ unsigned nonce = UINT_MAX;
+ unsigned nr_ptrs = 0;
+ bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false;
+ int ret = 0;
+
+ if (bkey_is_btree_ptr(k.k))
+ size_ondisk = btree_sectors(c);
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err,
+ extent_ptrs_invalid_entry,
+ "invalid extent entry type (got %u, max %u)",
+ __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
+
+ bkey_fsck_err_on(bkey_is_btree_ptr(k.k) &&
+ !extent_entry_is_ptr(entry), c, err,
+ btree_ptr_has_non_ptr,
+ "has non ptr field");
+
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ ret = extent_ptr_invalid(c, k, flags, &entry->ptr,
+ size_ondisk, false, err);
+ if (ret)
+ return ret;
+
+ bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err,
+ ptr_cached_and_erasure_coded,
+ "cached, erasure coded ptr");
+
+ if (!entry->ptr.unwritten)
+ have_written = true;
+ else
+ have_unwritten = true;
+
+ have_ec = false;
+ crc_since_last_ptr = false;
+ nr_ptrs++;
+ break;
+ case BCH_EXTENT_ENTRY_crc32:
+ case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
+ crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
+
+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err,
+ ptr_crc_uncompressed_size_too_small,
+ "checksum offset + key size > uncompressed size");
+ bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err,
+ ptr_crc_csum_type_unknown,
+ "invalid checksum type");
+ bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err,
+ ptr_crc_compression_type_unknown,
+ "invalid compression type");
+
+ if (bch2_csum_type_is_encryption(crc.csum_type)) {
+ if (nonce == UINT_MAX)
+ nonce = crc.offset + crc.nonce;
+ else if (nonce != crc.offset + crc.nonce)
+ bkey_fsck_err(c, err, ptr_crc_nonce_mismatch,
+ "incorrect nonce");
+ }
+
+ bkey_fsck_err_on(crc_since_last_ptr, c, err,
+ ptr_crc_redundant,
+ "redundant crc entry");
+ crc_since_last_ptr = true;
+
+ bkey_fsck_err_on(crc_is_encoded(crc) &&
+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
+ (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err,
+ ptr_crc_uncompressed_size_too_big,
+ "too large encoded extent");
+
+ size_ondisk = crc.compressed_size;
+ break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ bkey_fsck_err_on(have_ec, c, err,
+ ptr_stripe_redundant,
+ "redundant stripe entry");
+ have_ec = true;
+ break;
+ case BCH_EXTENT_ENTRY_rebalance: {
+ const struct bch_extent_rebalance *r = &entry->rebalance;
+
+ if (!bch2_compression_opt_valid(r->compression)) {
+ struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
+ prt_printf(err, "invalid compression opt %u:%u",
+ opt.type, opt.level);
+ return -BCH_ERR_invalid_bkey;
+ }
+ break;
+ }
+ }
+ }
+
+ bkey_fsck_err_on(!nr_ptrs, c, err,
+ extent_ptrs_no_ptrs,
+ "no ptrs");
+ bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err,
+ extent_ptrs_too_many_ptrs,
+ "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX);
+ bkey_fsck_err_on(have_written && have_unwritten, c, err,
+ extent_ptrs_written_and_unwritten,
+ "extent with unwritten and written ptrs");
+ bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err,
+ extent_ptrs_unwritten,
+ "has unwritten ptrs");
+ bkey_fsck_err_on(crc_since_last_ptr, c, err,
+ extent_ptrs_redundant_crc,
+ "redundant crc entry");
+ bkey_fsck_err_on(have_ec, c, err,
+ extent_ptrs_redundant_stripe,
+ "redundant stripe entry");
+fsck_err:
+ return ret;
+}
+
+void bch2_ptr_swab(struct bkey_s k)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ union bch_extent_entry *entry;
+ u64 *d;
+
+ for (d = (u64 *) ptrs.start;
+ d != (u64 *) ptrs.end;
+ d++)
+ *d = swab64(*d);
+
+ for (entry = ptrs.start;
+ entry < ptrs.end;
+ entry = extent_entry_next(entry)) {
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ break;
+ case BCH_EXTENT_ENTRY_crc32:
+ entry->crc32.csum = swab32(entry->crc32.csum);
+ break;
+ case BCH_EXTENT_ENTRY_crc64:
+ entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
+ entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
+ break;
+ case BCH_EXTENT_ENTRY_crc128:
+ entry->crc128.csum.hi = (__force __le64)
+ swab64((__force u64) entry->crc128.csum.hi);
+ entry->crc128.csum.lo = (__force __le64)
+ swab64((__force u64) entry->crc128.csum.lo);
+ break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
+ case BCH_EXTENT_ENTRY_rebalance:
+ break;
+ }
+ }
+}
+
+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
+ return &entry->rebalance;
+
+ return NULL;
+}
+
+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned rewrite_ptrs = 0;
+
+ if (compression) {
+ unsigned compression_type = bch2_compression_opt_to_type(compression);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned i = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
+ p.ptr.unwritten) {
+ rewrite_ptrs = 0;
+ goto incompressible;
+ }
+
+ if (!p.ptr.cached && p.crc.compression_type != compression_type)
+ rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+incompressible:
+ if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
+ unsigned i = 0;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
+ rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+
+ return rewrite_ptrs;
+}
+
+bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+ /*
+ * If it's an indirect extent, we don't delete the rebalance entry when
+ * done so that we know what options were applied - check if it still
+ * needs work done:
+ */
+ if (r &&
+ k.k->type == KEY_TYPE_reflink_v &&
+ !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
+ r = NULL;
+
+ return r != NULL;
+}
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_s k = bkey_i_to_s(_k);
+ struct bch_extent_rebalance *r;
+ bool needs_rebalance;
+
+ if (!bkey_extent_is_direct_data(k.k))
+ return 0;
+
+ /* get existing rebalance entry: */
+ r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
+ if (r) {
+ if (k.k->type == KEY_TYPE_reflink_v) {
+ /*
+ * indirect extents: existing options take precedence,
+ * so that we don't move extents back and forth if
+ * they're referenced by different inodes with different
+ * options:
+ */
+ if (r->target)
+ target = r->target;
+ if (r->compression)
+ compression = r->compression;
+ }
+
+ r->target = target;
+ r->compression = compression;
+ }
+
+ needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
+
+ if (needs_rebalance && !r) {
+ union bch_extent_entry *new = bkey_val_end(k);
+
+ new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance;
+ new->rebalance.compression = compression;
+ new->rebalance.target = target;
+ new->rebalance.unused = 0;
+ k.k->u64s += extent_entry_u64s(new);
+ } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
+ /*
+ * For indirect extents, don't delete the rebalance entry when
+ * we're finished so that we know we specifically moved it or
+ * compressed it to its current location/compression type
+ */
+ extent_entry_drop(k, (union bch_extent_entry *) r);
+ }
+
+ return 0;
+}
+
+/* Generic extent code: */
+
+int bch2_cut_front_s(struct bpos where, struct bkey_s k)
+{
+ unsigned new_val_u64s = bkey_val_u64s(k.k);
+ int val_u64s_delta;
+ u64 sub;
+
+ if (bkey_le(where, bkey_start_pos(k.k)))
+ return 0;
+
+ EBUG_ON(bkey_gt(where, k.k->p));
+
+ sub = where.offset - bkey_start_offset(k.k);
+
+ k.k->size -= sub;
+
+ if (!k.k->size) {
+ k.k->type = KEY_TYPE_deleted;
+ new_val_u64s = 0;
+ }
+
+ switch (k.k->type) {
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v: {
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ union bch_extent_entry *entry;
+ bool seen_crc = false;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ if (!seen_crc)
+ entry->ptr.offset += sub;
+ break;
+ case BCH_EXTENT_ENTRY_crc32:
+ entry->crc32.offset += sub;
+ break;
+ case BCH_EXTENT_ENTRY_crc64:
+ entry->crc64.offset += sub;
+ break;
+ case BCH_EXTENT_ENTRY_crc128:
+ entry->crc128.offset += sub;
+ break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
+ case BCH_EXTENT_ENTRY_rebalance:
+ break;
+ }
+
+ if (extent_entry_is_crc(entry))
+ seen_crc = true;
+ }
+
+ break;
+ }
+ case KEY_TYPE_reflink_p: {
+ struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
+
+ le64_add_cpu(&p.v->idx, sub);
+ break;
+ }
+ case KEY_TYPE_inline_data:
+ case KEY_TYPE_indirect_inline_data: {
+ void *p = bkey_inline_data_p(k);
+ unsigned bytes = bkey_inline_data_bytes(k.k);
+
+ sub = min_t(u64, sub << 9, bytes);
+
+ memmove(p, p + sub, bytes - sub);
+
+ new_val_u64s -= sub >> 3;
+ break;
+ }
+ }
+
+ val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
+ BUG_ON(val_u64s_delta < 0);
+
+ set_bkey_val_u64s(k.k, new_val_u64s);
+ memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
+ return -val_u64s_delta;
+}
+
+int bch2_cut_back_s(struct bpos where, struct bkey_s k)
+{
+ unsigned new_val_u64s = bkey_val_u64s(k.k);
+ int val_u64s_delta;
+ u64 len = 0;
+
+ if (bkey_ge(where, k.k->p))
+ return 0;
+
+ EBUG_ON(bkey_lt(where, bkey_start_pos(k.k)));
+
+ len = where.offset - bkey_start_offset(k.k);
+
+ k.k->p.offset = where.offset;
+ k.k->size = len;
+
+ if (!len) {
+ k.k->type = KEY_TYPE_deleted;
+ new_val_u64s = 0;
+ }
+
+ switch (k.k->type) {
+ case KEY_TYPE_inline_data:
+ case KEY_TYPE_indirect_inline_data:
+ new_val_u64s = (bkey_inline_data_offset(k.k) +
+ min(bkey_inline_data_bytes(k.k), k.k->size << 9)) >> 3;
+ break;
+ }
+
+ val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s;
+ BUG_ON(val_u64s_delta < 0);
+
+ set_bkey_val_u64s(k.k, new_val_u64s);
+ memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
+ return -val_u64s_delta;
+}
diff --git a/c_src/libbcachefs/extents.h b/c_src/libbcachefs/extents.h
new file mode 100644
index 00000000..a855c94d
--- /dev/null
+++ b/c_src/libbcachefs/extents.h
@@ -0,0 +1,757 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENTS_H
+#define _BCACHEFS_EXTENTS_H
+
+#include "bcachefs.h"
+#include "bkey.h"
+#include "extents_types.h"
+
+struct bch_fs;
+struct btree_trans;
+enum bkey_invalid_flags;
+
+/* extent entries: */
+
+#define extent_entry_last(_e) \
+ ((typeof(&(_e).v->start[0])) bkey_val_end(_e))
+
+#define entry_to_ptr(_entry) \
+({ \
+ EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \
+ \
+ __builtin_choose_expr( \
+ type_is_exact(_entry, const union bch_extent_entry *), \
+ (const struct bch_extent_ptr *) (_entry), \
+ (struct bch_extent_ptr *) (_entry)); \
+})
+
+/* downcast, preserves const */
+#define to_entry(_entry) \
+({ \
+ BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \
+ !type_is(_entry, struct bch_extent_ptr *) && \
+ !type_is(_entry, struct bch_extent_stripe_ptr *)); \
+ \
+ __builtin_choose_expr( \
+ (type_is_exact(_entry, const union bch_extent_crc *) || \
+ type_is_exact(_entry, const struct bch_extent_ptr *) ||\
+ type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
+ (const union bch_extent_entry *) (_entry), \
+ (union bch_extent_entry *) (_entry)); \
+})
+
+#define extent_entry_next(_entry) \
+ ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
+
+static inline unsigned
+__extent_entry_type(const union bch_extent_entry *e)
+{
+ return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX;
+}
+
+static inline enum bch_extent_entry_type
+extent_entry_type(const union bch_extent_entry *e)
+{
+ int ret = __ffs(e->type);
+
+ EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX);
+
+ return ret;
+}
+
+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
+{
+ switch (extent_entry_type(entry)) {
+#define x(f, n) \
+ case BCH_EXTENT_ENTRY_##f: \
+ return sizeof(struct bch_extent_##f);
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
+{
+ return extent_entry_bytes(entry) / sizeof(u64);
+}
+
+static inline void __extent_entry_insert(struct bkey_i *k,
+ union bch_extent_entry *dst,
+ union bch_extent_entry *new)
+{
+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
+
+ memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
+ dst, (u64 *) end - (u64 *) dst);
+ k->k.u64s += extent_entry_u64s(new);
+ memcpy_u64s_small(dst, new, extent_entry_u64s(new));
+}
+
+static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
+{
+ union bch_extent_entry *next = extent_entry_next(entry);
+
+ /* stripes have ptrs, but their layout doesn't work with this code */
+ BUG_ON(k.k->type == KEY_TYPE_stripe);
+
+ memmove_u64s_down(entry, next,
+ (u64 *) bkey_val_end(k) - (u64 *) next);
+ k.k->u64s -= (u64 *) next - (u64 *) entry;
+}
+
+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
+{
+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
+}
+
+static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
+{
+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
+}
+
+static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
+{
+ switch (extent_entry_type(e)) {
+ case BCH_EXTENT_ENTRY_crc32:
+ case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
+ return true;
+ default:
+ return false;
+ }
+}
+
+union bch_extent_crc {
+ u8 type;
+ struct bch_extent_crc32 crc32;
+ struct bch_extent_crc64 crc64;
+ struct bch_extent_crc128 crc128;
+};
+
+#define __entry_to_crc(_entry) \
+ __builtin_choose_expr( \
+ type_is_exact(_entry, const union bch_extent_entry *), \
+ (const union bch_extent_crc *) (_entry), \
+ (union bch_extent_crc *) (_entry))
+
+#define entry_to_crc(_entry) \
+({ \
+ EBUG_ON((_entry) && !extent_entry_is_crc(_entry)); \
+ \
+ __entry_to_crc(_entry); \
+})
+
+static inline struct bch_extent_crc_unpacked
+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
+{
+#define common_fields(_crc) \
+ .csum_type = _crc.csum_type, \
+ .compression_type = _crc.compression_type, \
+ .compressed_size = _crc._compressed_size + 1, \
+ .uncompressed_size = _crc._uncompressed_size + 1, \
+ .offset = _crc.offset, \
+ .live_size = k->size
+
+ if (!crc)
+ return (struct bch_extent_crc_unpacked) {
+ .compressed_size = k->size,
+ .uncompressed_size = k->size,
+ .live_size = k->size,
+ };
+
+ switch (extent_entry_type(to_entry(crc))) {
+ case BCH_EXTENT_ENTRY_crc32: {
+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
+ common_fields(crc->crc32),
+ };
+
+ *((__le32 *) &ret.csum.lo) = (__le32 __force) crc->crc32.csum;
+ return ret;
+ }
+ case BCH_EXTENT_ENTRY_crc64: {
+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
+ common_fields(crc->crc64),
+ .nonce = crc->crc64.nonce,
+ .csum.lo = (__force __le64) crc->crc64.csum_lo,
+ };
+
+ *((__le16 *) &ret.csum.hi) = (__le16 __force) crc->crc64.csum_hi;
+
+ return ret;
+ }
+ case BCH_EXTENT_ENTRY_crc128: {
+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
+ common_fields(crc->crc128),
+ .nonce = crc->crc128.nonce,
+ .csum = crc->crc128.csum,
+ };
+
+ return ret;
+ }
+ default:
+ BUG();
+ }
+#undef common_fields
+}
+
+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc)
+{
+ return (crc.compression_type != BCH_COMPRESSION_TYPE_none &&
+ crc.compression_type != BCH_COMPRESSION_TYPE_incompressible);
+}
+
+static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc)
+{
+ return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc);
+}
+
+/* bkey_ptrs: generically over any key type that has ptrs */
+
+struct bkey_ptrs_c {
+ const union bch_extent_entry *start;
+ const union bch_extent_entry *end;
+};
+
+struct bkey_ptrs {
+ union bch_extent_entry *start;
+ union bch_extent_entry *end;
+};
+
+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr: {
+ struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k);
+
+ return (struct bkey_ptrs_c) {
+ to_entry(&e.v->start[0]),
+ to_entry(extent_entry_last(e))
+ };
+ }
+ case KEY_TYPE_extent: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+
+ return (struct bkey_ptrs_c) {
+ e.v->start,
+ extent_entry_last(e)
+ };
+ }
+ case KEY_TYPE_stripe: {
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ return (struct bkey_ptrs_c) {
+ to_entry(&s.v->ptrs[0]),
+ to_entry(&s.v->ptrs[s.v->nr_blocks]),
+ };
+ }
+ case KEY_TYPE_reflink_v: {
+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+ return (struct bkey_ptrs_c) {
+ r.v->start,
+ bkey_val_end(r),
+ };
+ }
+ case KEY_TYPE_btree_ptr_v2: {
+ struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
+
+ return (struct bkey_ptrs_c) {
+ to_entry(&e.v->start[0]),
+ to_entry(extent_entry_last(e))
+ };
+ }
+ default:
+ return (struct bkey_ptrs_c) { NULL, NULL };
+ }
+}
+
+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
+{
+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c);
+
+ return (struct bkey_ptrs) {
+ (void *) p.start,
+ (void *) p.end
+ };
+}
+
+#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \
+ for ((_entry) = (_start); \
+ (_entry) < (_end); \
+ (_entry) = extent_entry_next(_entry))
+
+#define __bkey_ptr_next(_ptr, _end) \
+({ \
+ typeof(_end) _entry; \
+ \
+ __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \
+ if (extent_entry_is_ptr(_entry)) \
+ break; \
+ \
+ _entry < (_end) ? entry_to_ptr(_entry) : NULL; \
+})
+
+#define bkey_extent_entry_for_each_from(_p, _entry, _start) \
+ __bkey_extent_entry_for_each_from(_start, (_p).end, _entry)
+
+#define bkey_extent_entry_for_each(_p, _entry) \
+ bkey_extent_entry_for_each_from(_p, _entry, _p.start)
+
+#define __bkey_for_each_ptr(_start, _end, _ptr) \
+ for (typeof(_start) (_ptr) = (_start); \
+ ((_ptr) = __bkey_ptr_next(_ptr, _end)); \
+ (_ptr)++)
+
+#define bkey_ptr_next(_p, _ptr) \
+ __bkey_ptr_next(_ptr, (_p).end)
+
+#define bkey_for_each_ptr(_p, _ptr) \
+ __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr)
+
+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \
+({ \
+ __label__ out; \
+ \
+ (_ptr).idx = 0; \
+ (_ptr).has_ec = false; \
+ \
+ __bkey_extent_entry_for_each_from(_entry, _end, _entry) \
+ switch (extent_entry_type(_entry)) { \
+ case BCH_EXTENT_ENTRY_ptr: \
+ (_ptr).ptr = _entry->ptr; \
+ goto out; \
+ case BCH_EXTENT_ENTRY_crc32: \
+ case BCH_EXTENT_ENTRY_crc64: \
+ case BCH_EXTENT_ENTRY_crc128: \
+ (_ptr).crc = bch2_extent_crc_unpack(_k, \
+ entry_to_crc(_entry)); \
+ break; \
+ case BCH_EXTENT_ENTRY_stripe_ptr: \
+ (_ptr).ec = _entry->stripe_ptr; \
+ (_ptr).has_ec = true; \
+ break; \
+ default: \
+ /* nothing */ \
+ break; \
+ } \
+out: \
+ _entry < (_end); \
+})
+
+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \
+ for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \
+ (_entry) = _start; \
+ __bkey_ptr_next_decode(_k, _end, _ptr, _entry); \
+ (_entry) = extent_entry_next(_entry))
+
+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \
+ __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \
+ _ptr, _entry)
+
+#define bkey_crc_next(_k, _start, _end, _crc, _iter) \
+({ \
+ __bkey_extent_entry_for_each_from(_iter, _end, _iter) \
+ if (extent_entry_is_crc(_iter)) { \
+ (_crc) = bch2_extent_crc_unpack(_k, \
+ entry_to_crc(_iter)); \
+ break; \
+ } \
+ \
+ (_iter) < (_end); \
+})
+
+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter) \
+ for ((_crc) = bch2_extent_crc_unpack(_k, NULL), \
+ (_iter) = (_start); \
+ bkey_crc_next(_k, _start, _end, _crc, _iter); \
+ (_iter) = extent_entry_next(_iter))
+
+#define bkey_for_each_crc(_k, _p, _crc, _iter) \
+ __bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter)
+
+/* Iterate over pointers in KEY_TYPE_extent: */
+
+#define extent_for_each_entry_from(_e, _entry, _start) \
+ __bkey_extent_entry_for_each_from(_start, \
+ extent_entry_last(_e), _entry)
+
+#define extent_for_each_entry(_e, _entry) \
+ extent_for_each_entry_from(_e, _entry, (_e).v->start)
+
+#define extent_ptr_next(_e, _ptr) \
+ __bkey_ptr_next(_ptr, extent_entry_last(_e))
+
+#define extent_for_each_ptr(_e, _ptr) \
+ __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr)
+
+#define extent_for_each_ptr_decode(_e, _ptr, _entry) \
+ __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \
+ extent_entry_last(_e), _ptr, _entry)
+
+/* utility code common to all keys with pointers: */
+
+void bch2_mark_io_failure(struct bch_io_failures *,
+ struct extent_ptr_decoded *);
+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
+ struct bch_io_failures *,
+ struct extent_ptr_decoded *);
+
+/* KEY_TYPE_btree_ptr: */
+
+int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+
+int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
+ int, struct bkey_s);
+
+#define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \
+ .key_invalid = bch2_btree_ptr_invalid, \
+ .val_to_text = bch2_btree_ptr_to_text, \
+ .swab = bch2_ptr_swab, \
+ .trigger = bch2_trigger_extent, \
+})
+
+#define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \
+ .key_invalid = bch2_btree_ptr_v2_invalid, \
+ .val_to_text = bch2_btree_ptr_v2_to_text, \
+ .swab = bch2_ptr_swab, \
+ .compat = bch2_btree_ptr_v2_compat, \
+ .trigger = bch2_trigger_extent, \
+ .min_val_size = 40, \
+})
+
+/* KEY_TYPE_extent: */
+
+bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+
+#define bch2_bkey_ops_extent ((struct bkey_ops) { \
+ .key_invalid = bch2_bkey_ptrs_invalid, \
+ .val_to_text = bch2_bkey_ptrs_to_text, \
+ .swab = bch2_ptr_swab, \
+ .key_normalize = bch2_extent_normalize, \
+ .key_merge = bch2_extent_merge, \
+ .trigger = bch2_trigger_extent, \
+})
+
+/* KEY_TYPE_reservation: */
+
+int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+
+#define bch2_bkey_ops_reservation ((struct bkey_ops) { \
+ .key_invalid = bch2_reservation_invalid, \
+ .val_to_text = bch2_reservation_to_text, \
+ .key_merge = bch2_reservation_merge, \
+ .trigger = bch2_trigger_reservation, \
+ .min_val_size = 8, \
+})
+
+/* Extent checksum entries: */
+
+bool bch2_can_narrow_extent_crcs(struct bkey_s_c,
+ struct bch_extent_crc_unpacked);
+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked);
+void bch2_extent_crc_append(struct bkey_i *,
+ struct bch_extent_crc_unpacked);
+
+/* Generic code for keys with pointers: */
+
+static inline bool bkey_is_btree_ptr(const struct bkey *k)
+{
+ switch (k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
+{
+ switch (k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool bkey_extent_is_inline_data(const struct bkey *k)
+{
+ return k->type == KEY_TYPE_inline_data ||
+ k->type == KEY_TYPE_indirect_inline_data;
+}
+
+static inline unsigned bkey_inline_data_offset(const struct bkey *k)
+{
+ switch (k->type) {
+ case KEY_TYPE_inline_data:
+ return sizeof(struct bch_inline_data);
+ case KEY_TYPE_indirect_inline_data:
+ return sizeof(struct bch_indirect_inline_data);
+ default:
+ BUG();
+ }
+}
+
+static inline unsigned bkey_inline_data_bytes(const struct bkey *k)
+{
+ return bkey_val_bytes(k) - bkey_inline_data_offset(k);
+}
+
+#define bkey_inline_data_p(_k) (((void *) (_k).v) + bkey_inline_data_offset((_k).k))
+
+static inline bool bkey_extent_is_data(const struct bkey *k)
+{
+ return bkey_extent_is_direct_data(k) ||
+ bkey_extent_is_inline_data(k) ||
+ k->type == KEY_TYPE_reflink_p;
+}
+
+/*
+ * Should extent be counted under inode->i_sectors?
+ */
+static inline bool bkey_extent_is_allocation(const struct bkey *k)
+{
+ switch (k->type) {
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reservation:
+ case KEY_TYPE_reflink_p:
+ case KEY_TYPE_reflink_v:
+ case KEY_TYPE_inline_data:
+ case KEY_TYPE_indirect_inline_data:
+ case KEY_TYPE_error:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->unwritten)
+ return true;
+ return false;
+}
+
+static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
+{
+ return k.k->type == KEY_TYPE_reservation ||
+ bkey_extent_is_unwritten(k);
+}
+
+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
+{
+ struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(p, ptr)
+ ret.data[ret.nr++] = ptr->dev;
+
+ return ret;
+}
+
+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k)
+{
+ struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(p, ptr)
+ if (!ptr->cached)
+ ret.data[ret.nr++] = ptr->dev;
+
+ return ret;
+}
+
+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
+{
+ struct bch_devs_list ret = (struct bch_devs_list) { 0 };
+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(p, ptr)
+ if (ptr->cached)
+ ret.data[ret.nr++] = ptr->dev;
+
+ return ret;
+}
+
+static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ return BCH_DATA_btree;
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ return BCH_DATA_user;
+ case KEY_TYPE_stripe: {
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ BUG_ON(ptr < s.v->ptrs ||
+ ptr >= s.v->ptrs + s.v->nr_blocks);
+
+ return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity
+ : BCH_DATA_user;
+ }
+ default:
+ BUG();
+ }
+}
+
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
+bool bch2_bkey_is_incompressible(struct bkey_s_c);
+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
+
+unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *, struct extent_ptr_decoded *);
+unsigned bch2_extent_ptr_durability(struct bch_fs *, struct extent_ptr_decoded *);
+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
+
+void bch2_bkey_drop_device(struct bkey_s, unsigned);
+void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
+
+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned);
+
+static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev)
+{
+ return (void *) bch2_bkey_has_device_c(k.s_c, dev);
+}
+
+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
+
+void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *);
+
+static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr ptr)
+{
+ struct bch_extent_ptr *dest;
+
+ EBUG_ON(bch2_bkey_has_device(bkey_i_to_s(k), ptr.dev));
+
+ switch (k->k.type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ case KEY_TYPE_extent:
+ EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
+
+ ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+ dest = (struct bch_extent_ptr *)((void *) &k->v + bkey_val_bytes(&k->k));
+ *dest = ptr;
+ k->k.u64s++;
+ break;
+ default:
+ BUG();
+ }
+}
+
+void bch2_extent_ptr_decoded_append(struct bkey_i *,
+ struct extent_ptr_decoded *);
+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
+ struct bch_extent_ptr *);
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
+ struct bch_extent_ptr *);
+
+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
+do { \
+ struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \
+ \
+ _ptr = &_ptrs.start->ptr; \
+ \
+ while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \
+ if (_cond) { \
+ _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \
+ _ptrs = bch2_bkey_ptrs(_k); \
+ continue; \
+ } \
+ \
+ (_ptr)++; \
+ } \
+} while (0)
+
+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
+ struct bch_extent_ptr, u64);
+bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
+struct bch_extent_ptr *
+bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
+
+void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
+
+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+
+void bch2_ptr_swab(struct bkey_s);
+
+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
+ unsigned, unsigned);
+bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
+ unsigned, unsigned);
+
+/* Generic extent code: */
+
+enum bch_extent_overlap {
+ BCH_EXTENT_OVERLAP_ALL = 0,
+ BCH_EXTENT_OVERLAP_BACK = 1,
+ BCH_EXTENT_OVERLAP_FRONT = 2,
+ BCH_EXTENT_OVERLAP_MIDDLE = 3,
+};
+
+/* Returns how k overlaps with m */
+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k,
+ const struct bkey *m)
+{
+ int cmp1 = bkey_lt(k->p, m->p);
+ int cmp2 = bkey_gt(bkey_start_pos(k), bkey_start_pos(m));
+
+ return (cmp1 << 1) + cmp2;
+}
+
+int bch2_cut_front_s(struct bpos, struct bkey_s);
+int bch2_cut_back_s(struct bpos, struct bkey_s);
+
+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k)
+{
+ bch2_cut_front_s(where, bkey_i_to_s(k));
+}
+
+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k)
+{
+ bch2_cut_back_s(where, bkey_i_to_s(k));
+}
+
+/**
+ * bch_key_resize - adjust size of @k
+ *
+ * bkey_start_offset(k) will be preserved, modifies where the extent ends
+ */
+static inline void bch2_key_resize(struct bkey *k, unsigned new_size)
+{
+ k->p.offset -= k->size;
+ k->p.offset += new_size;
+ k->size = new_size;
+}
+
+#endif /* _BCACHEFS_EXTENTS_H */
diff --git a/c_src/libbcachefs/extents_types.h b/c_src/libbcachefs/extents_types.h
new file mode 100644
index 00000000..43d6c341
--- /dev/null
+++ b/c_src/libbcachefs/extents_types.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENTS_TYPES_H
+#define _BCACHEFS_EXTENTS_TYPES_H
+
+#include "bcachefs_format.h"
+
+struct bch_extent_crc_unpacked {
+ u32 compressed_size;
+ u32 uncompressed_size;
+ u32 live_size;
+
+ u8 csum_type;
+ u8 compression_type;
+
+ u16 offset;
+
+ u16 nonce;
+
+ struct bch_csum csum;
+};
+
+struct extent_ptr_decoded {
+ unsigned idx;
+ bool has_ec;
+ struct bch_extent_crc_unpacked crc;
+ struct bch_extent_ptr ptr;
+ struct bch_extent_stripe_ptr ec;
+};
+
+struct bch_io_failures {
+ u8 nr;
+ struct bch_dev_io_failures {
+ u8 dev;
+ u8 idx;
+ u8 nr_failed;
+ u8 nr_retries;
+ } devs[BCH_REPLICAS_MAX];
+};
+
+#endif /* _BCACHEFS_EXTENTS_TYPES_H */
diff --git a/c_src/libbcachefs/eytzinger.h b/c_src/libbcachefs/eytzinger.h
new file mode 100644
index 00000000..05429c96
--- /dev/null
+++ b/c_src/libbcachefs/eytzinger.h
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _EYTZINGER_H
+#define _EYTZINGER_H
+
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "util.h"
+
+/*
+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an
+ * array
+ */
+
+/*
+ * One based indexing version:
+ *
+ * With one based indexing each level of the tree starts at a power of two -
+ * good for cacheline alignment:
+ */
+
+static inline unsigned eytzinger1_child(unsigned i, unsigned child)
+{
+ EBUG_ON(child > 1);
+
+ return (i << 1) + child;
+}
+
+static inline unsigned eytzinger1_left_child(unsigned i)
+{
+ return eytzinger1_child(i, 0);
+}
+
+static inline unsigned eytzinger1_right_child(unsigned i)
+{
+ return eytzinger1_child(i, 1);
+}
+
+static inline unsigned eytzinger1_first(unsigned size)
+{
+ return rounddown_pow_of_two(size);
+}
+
+static inline unsigned eytzinger1_last(unsigned size)
+{
+ return rounddown_pow_of_two(size + 1) - 1;
+}
+
+/*
+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that
+ *
+ * eytzinger1_next(0) == eytzinger1_first())
+ * eytzinger1_prev(0) == eytzinger1_last())
+ *
+ * eytzinger1_prev(eytzinger1_first()) == 0
+ * eytzinger1_next(eytzinger1_last()) == 0
+ */
+
+static inline unsigned eytzinger1_next(unsigned i, unsigned size)
+{
+ EBUG_ON(i > size);
+
+ if (eytzinger1_right_child(i) <= size) {
+ i = eytzinger1_right_child(i);
+
+ i <<= __fls(size + 1) - __fls(i);
+ i >>= i > size;
+ } else {
+ i >>= ffz(i) + 1;
+ }
+
+ return i;
+}
+
+static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
+{
+ EBUG_ON(i > size);
+
+ if (eytzinger1_left_child(i) <= size) {
+ i = eytzinger1_left_child(i) + 1;
+
+ i <<= __fls(size + 1) - __fls(i);
+ i -= 1;
+ i >>= i > size;
+ } else {
+ i >>= __ffs(i) + 1;
+ }
+
+ return i;
+}
+
+static inline unsigned eytzinger1_extra(unsigned size)
+{
+ return (size + 1 - rounddown_pow_of_two(size)) << 1;
+}
+
+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
+ unsigned extra)
+{
+ unsigned b = __fls(i);
+ unsigned shift = __fls(size) - b;
+ int s;
+
+ EBUG_ON(!i || i > size);
+
+ i ^= 1U << b;
+ i <<= 1;
+ i |= 1;
+ i <<= shift;
+
+ /*
+ * sign bit trick:
+ *
+ * if (i > extra)
+ * i -= (i - extra) >> 1;
+ */
+ s = extra - i;
+ i += (s >> 1) & (s >> 31);
+
+ return i;
+}
+
+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
+ unsigned extra)
+{
+ unsigned shift;
+ int s;
+
+ EBUG_ON(!i || i > size);
+
+ /*
+ * sign bit trick:
+ *
+ * if (i > extra)
+ * i += i - extra;
+ */
+ s = extra - i;
+ i -= s & (s >> 31);
+
+ shift = __ffs(i);
+
+ i >>= shift + 1;
+ i |= 1U << (__fls(size) - shift);
+
+ return i;
+}
+
+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size)
+{
+ return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
+{
+ return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size));
+}
+
+#define eytzinger1_for_each(_i, _size) \
+ for ((_i) = eytzinger1_first((_size)); \
+ (_i) != 0; \
+ (_i) = eytzinger1_next((_i), (_size)))
+
+/* Zero based indexing version: */
+
+static inline unsigned eytzinger0_child(unsigned i, unsigned child)
+{
+ EBUG_ON(child > 1);
+
+ return (i << 1) + 1 + child;
+}
+
+static inline unsigned eytzinger0_left_child(unsigned i)
+{
+ return eytzinger0_child(i, 0);
+}
+
+static inline unsigned eytzinger0_right_child(unsigned i)
+{
+ return eytzinger0_child(i, 1);
+}
+
+static inline unsigned eytzinger0_first(unsigned size)
+{
+ return eytzinger1_first(size) - 1;
+}
+
+static inline unsigned eytzinger0_last(unsigned size)
+{
+ return eytzinger1_last(size) - 1;
+}
+
+static inline unsigned eytzinger0_next(unsigned i, unsigned size)
+{
+ return eytzinger1_next(i + 1, size) - 1;
+}
+
+static inline unsigned eytzinger0_prev(unsigned i, unsigned size)
+{
+ return eytzinger1_prev(i + 1, size) - 1;
+}
+
+static inline unsigned eytzinger0_extra(unsigned size)
+{
+ return eytzinger1_extra(size);
+}
+
+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size,
+ unsigned extra)
+{
+ return __eytzinger1_to_inorder(i + 1, size, extra) - 1;
+}
+
+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size,
+ unsigned extra)
+{
+ return __inorder_to_eytzinger1(i + 1, size, extra) - 1;
+}
+
+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size)
+{
+ return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size));
+}
+
+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
+{
+ return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size));
+}
+
+#define eytzinger0_for_each(_i, _size) \
+ for ((_i) = eytzinger0_first((_size)); \
+ (_i) != -1; \
+ (_i) = eytzinger0_next((_i), (_size)))
+
+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
+
+/* return greatest node <= @search, or -1 if not found */
+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
+ eytzinger_cmp_fn cmp, const void *search)
+{
+ unsigned i, n = 0;
+
+ if (!nr)
+ return -1;
+
+ do {
+ i = n;
+ n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+ } while (n < nr);
+
+ if (n & 1) {
+ /* @i was greater than @search, return previous node: */
+
+ if (i == eytzinger0_first(nr))
+ return -1;
+
+ return eytzinger0_prev(i, nr);
+ } else {
+ return i;
+ }
+}
+
+#define eytzinger0_find(base, nr, size, _cmp, search) \
+({ \
+ void *_base = (base); \
+ void *_search = (search); \
+ size_t _nr = (nr); \
+ size_t _size = (size); \
+ size_t _i = 0; \
+ int _res; \
+ \
+ while (_i < _nr && \
+ (_res = _cmp(_search, _base + _i * _size, _size))) \
+ _i = eytzinger0_child(_i, _res > 0); \
+ _i; \
+})
+
+void eytzinger0_sort(void *, size_t, size_t,
+ int (*cmp_func)(const void *, const void *, size_t),
+ void (*swap_func)(void *, void *, size_t));
+
+#endif /* _EYTZINGER_H */
diff --git a/c_src/libbcachefs/fifo.h b/c_src/libbcachefs/fifo.h
new file mode 100644
index 00000000..66b945be
--- /dev/null
+++ b/c_src/libbcachefs/fifo.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FIFO_H
+#define _BCACHEFS_FIFO_H
+
+#include "util.h"
+
+#define FIFO(type) \
+struct { \
+ size_t front, back, size, mask; \
+ type *data; \
+}
+
+#define DECLARE_FIFO(type, name) FIFO(type) name
+
+#define fifo_buf_size(fifo) \
+ ((fifo)->size \
+ ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0]) \
+ : 0)
+
+#define init_fifo(fifo, _size, _gfp) \
+({ \
+ (fifo)->front = (fifo)->back = 0; \
+ (fifo)->size = (_size); \
+ (fifo)->mask = (fifo)->size \
+ ? roundup_pow_of_two((fifo)->size) - 1 \
+ : 0; \
+ (fifo)->data = kvpmalloc(fifo_buf_size(fifo), (_gfp)); \
+})
+
+#define free_fifo(fifo) \
+do { \
+ kvpfree((fifo)->data, fifo_buf_size(fifo)); \
+ (fifo)->data = NULL; \
+} while (0)
+
+#define fifo_swap(l, r) \
+do { \
+ swap((l)->front, (r)->front); \
+ swap((l)->back, (r)->back); \
+ swap((l)->size, (r)->size); \
+ swap((l)->mask, (r)->mask); \
+ swap((l)->data, (r)->data); \
+} while (0)
+
+#define fifo_move(dest, src) \
+do { \
+ typeof(*((dest)->data)) _t; \
+ while (!fifo_full(dest) && \
+ fifo_pop(src, _t)) \
+ fifo_push(dest, _t); \
+} while (0)
+
+#define fifo_used(fifo) (((fifo)->back - (fifo)->front))
+#define fifo_free(fifo) ((fifo)->size - fifo_used(fifo))
+
+#define fifo_empty(fifo) ((fifo)->front == (fifo)->back)
+#define fifo_full(fifo) (fifo_used(fifo) == (fifo)->size)
+
+#define fifo_peek_front(fifo) ((fifo)->data[(fifo)->front & (fifo)->mask])
+#define fifo_peek_back(fifo) ((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
+
+#define fifo_entry_idx_abs(fifo, p) \
+ ((((p) >= &fifo_peek_front(fifo) \
+ ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) + \
+ (((p) - (fifo)->data)))
+
+#define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
+#define fifo_idx_entry(fifo, i) ((fifo)->data[((fifo)->front + (i)) & (fifo)->mask])
+
+#define fifo_push_back_ref(f) \
+ (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
+
+#define fifo_push_front_ref(f) \
+ (fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
+
+#define fifo_push_back(fifo, new) \
+({ \
+ typeof((fifo)->data) _r = fifo_push_back_ref(fifo); \
+ if (_r) \
+ *_r = (new); \
+ _r != NULL; \
+})
+
+#define fifo_push_front(fifo, new) \
+({ \
+ typeof((fifo)->data) _r = fifo_push_front_ref(fifo); \
+ if (_r) \
+ *_r = (new); \
+ _r != NULL; \
+})
+
+#define fifo_pop_front(fifo, i) \
+({ \
+ bool _r = !fifo_empty((fifo)); \
+ if (_r) \
+ (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \
+ _r; \
+})
+
+#define fifo_pop_back(fifo, i) \
+({ \
+ bool _r = !fifo_empty((fifo)); \
+ if (_r) \
+ (i) = (fifo)->data[--(fifo)->back & (fifo)->mask]; \
+ _r; \
+})
+
+#define fifo_push_ref(fifo) fifo_push_back_ref(fifo)
+#define fifo_push(fifo, i) fifo_push_back(fifo, (i))
+#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i))
+#define fifo_peek(fifo) fifo_peek_front(fifo)
+
+#define fifo_for_each_entry(_entry, _fifo, _iter) \
+ for (typecheck(typeof((_fifo)->front), _iter), \
+ (_iter) = (_fifo)->front; \
+ ((_iter != (_fifo)->back) && \
+ (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true)); \
+ (_iter)++)
+
+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter) \
+ for (typecheck(typeof((_fifo)->front), _iter), \
+ (_iter) = (_fifo)->front; \
+ ((_iter != (_fifo)->back) && \
+ (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true)); \
+ (_iter)++)
+
+#endif /* _BCACHEFS_FIFO_H */
diff --git a/c_src/libbcachefs/fs-common.c b/c_src/libbcachefs/fs-common.c
new file mode 100644
index 00000000..1c1ea0f0
--- /dev/null
+++ b/c_src/libbcachefs/fs-common.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "acl.h"
+#include "btree_update.h"
+#include "dirent.h"
+#include "fs-common.h"
+#include "inode.h"
+#include "subvolume.h"
+#include "xattr.h"
+
+#include <linux/posix_acl.h>
+
+static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
+{
+ return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
+}
+
+int bch2_create_trans(struct btree_trans *trans,
+ subvol_inum dir,
+ struct bch_inode_unpacked *dir_u,
+ struct bch_inode_unpacked *new_inode,
+ const struct qstr *name,
+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+ struct posix_acl *default_acl,
+ struct posix_acl *acl,
+ subvol_inum snapshot_src,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dir_iter = { NULL };
+ struct btree_iter inode_iter = { NULL };
+ subvol_inum new_inum = dir;
+ u64 now = bch2_current_time(c);
+ u64 cpu = raw_smp_processor_id();
+ u64 dir_target;
+ u32 snapshot;
+ unsigned dir_type = mode_to_type(mode);
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ if (!(flags & BCH_CREATE_SNAPSHOT)) {
+ /* Normal create path - allocate a new inode: */
+ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
+
+ if (flags & BCH_CREATE_TMPFILE)
+ new_inode->bi_flags |= BCH_INODE_unlinked;
+
+ ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
+ if (ret)
+ goto err;
+
+ snapshot_src = (subvol_inum) { 0 };
+ } else {
+ /*
+ * Creating a snapshot - we're not allocating a new inode, but
+ * we do have to lookup the root inode of the subvolume we're
+ * snapshotting and update it (in the new snapshot):
+ */
+
+ if (!snapshot_src.inum) {
+ /* Inode wasn't specified, just snapshot: */
+ struct bch_subvolume s;
+
+ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true,
+ BTREE_ITER_CACHED, &s);
+ if (ret)
+ goto err;
+
+ snapshot_src.inum = le64_to_cpu(s.inode);
+ }
+
+ ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ if (new_inode->bi_subvol != snapshot_src.subvol) {
+ /* Not a subvolume root: */
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * If we're not root, we have to own the subvolume being
+ * snapshotted:
+ */
+ if (uid && new_inode->bi_uid != uid) {
+ ret = -EPERM;
+ goto err;
+ }
+
+ flags |= BCH_CREATE_SUBVOL;
+ }
+
+ new_inum.inum = new_inode->bi_inum;
+ dir_target = new_inode->bi_inum;
+
+ if (flags & BCH_CREATE_SUBVOL) {
+ u32 new_subvol, dir_snapshot;
+
+ ret = bch2_subvolume_create(trans, new_inode->bi_inum,
+ snapshot_src.subvol,
+ &new_subvol, &snapshot,
+ (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
+ if (ret)
+ goto err;
+
+ new_inode->bi_parent_subvol = dir.subvol;
+ new_inode->bi_subvol = new_subvol;
+ new_inum.subvol = new_subvol;
+ dir_target = new_subvol;
+ dir_type = DT_SUBVOL;
+
+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot);
+ ret = bch2_btree_iter_traverse(&dir_iter);
+ if (ret)
+ goto err;
+ }
+
+ if (!(flags & BCH_CREATE_SNAPSHOT)) {
+ if (default_acl) {
+ ret = bch2_set_acl_trans(trans, new_inum, new_inode,
+ default_acl, ACL_TYPE_DEFAULT);
+ if (ret)
+ goto err;
+ }
+
+ if (acl) {
+ ret = bch2_set_acl_trans(trans, new_inum, new_inode,
+ acl, ACL_TYPE_ACCESS);
+ if (ret)
+ goto err;
+ }
+ }
+
+ if (!(flags & BCH_CREATE_TMPFILE)) {
+ struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
+ u64 dir_offset;
+
+ if (is_subdir_for_nlink(new_inode))
+ dir_u->bi_nlink++;
+ dir_u->bi_mtime = dir_u->bi_ctime = now;
+
+ ret = bch2_inode_write(trans, &dir_iter, dir_u);
+ if (ret)
+ goto err;
+
+ ret = bch2_dirent_create(trans, dir, &dir_hash,
+ dir_type,
+ name,
+ dir_target,
+ &dir_offset,
+ BCH_HASH_SET_MUST_CREATE);
+ if (ret)
+ goto err;
+
+ new_inode->bi_dir = dir_u->bi_inum;
+ new_inode->bi_dir_offset = dir_offset;
+ }
+
+ inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
+ bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
+
+ ret = bch2_btree_iter_traverse(&inode_iter) ?:
+ bch2_inode_write(trans, &inode_iter, new_inode);
+err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &dir_iter);
+ return ret;
+}
+
+int bch2_link_trans(struct btree_trans *trans,
+ subvol_inum dir, struct bch_inode_unpacked *dir_u,
+ subvol_inum inum, struct bch_inode_unpacked *inode_u,
+ const struct qstr *name)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dir_iter = { NULL };
+ struct btree_iter inode_iter = { NULL };
+ struct bch_hash_info dir_hash;
+ u64 now = bch2_current_time(c);
+ u64 dir_offset = 0;
+ int ret;
+
+ if (dir.subvol != inum.subvol)
+ return -EXDEV;
+
+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ inode_u->bi_ctime = now;
+ ret = bch2_inode_nlink_inc(inode_u);
+ if (ret)
+ return ret;
+
+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ if (bch2_reinherit_attrs(inode_u, dir_u)) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ dir_u->bi_mtime = dir_u->bi_ctime = now;
+
+ dir_hash = bch2_hash_info_init(c, dir_u);
+
+ ret = bch2_dirent_create(trans, dir, &dir_hash,
+ mode_to_type(inode_u->bi_mode),
+ name, inum.inum, &dir_offset,
+ BCH_HASH_SET_MUST_CREATE);
+ if (ret)
+ goto err;
+
+ inode_u->bi_dir = dir.inum;
+ inode_u->bi_dir_offset = dir_offset;
+
+ ret = bch2_inode_write(trans, &dir_iter, dir_u) ?:
+ bch2_inode_write(trans, &inode_iter, inode_u);
+err:
+ bch2_trans_iter_exit(trans, &dir_iter);
+ bch2_trans_iter_exit(trans, &inode_iter);
+ return ret;
+}
+
+int bch2_unlink_trans(struct btree_trans *trans,
+ subvol_inum dir,
+ struct bch_inode_unpacked *dir_u,
+ struct bch_inode_unpacked *inode_u,
+ const struct qstr *name,
+ bool deleting_snapshot)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dir_iter = { NULL };
+ struct btree_iter dirent_iter = { NULL };
+ struct btree_iter inode_iter = { NULL };
+ struct bch_hash_info dir_hash;
+ subvol_inum inum;
+ u64 now = bch2_current_time(c);
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ dir_hash = bch2_hash_info_init(c, dir_u);
+
+ ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
+ name, &inum, BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) {
+ ret = bch2_empty_dir_trans(trans, inum);
+ if (ret)
+ goto err;
+ }
+
+ if (deleting_snapshot && !inode_u->bi_subvol) {
+ ret = -BCH_ERR_ENOENT_not_subvol;
+ goto err;
+ }
+
+ if (deleting_snapshot || inode_u->bi_subvol) {
+ ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
+ if (ret)
+ goto err;
+
+ k = bch2_btree_iter_peek_slot(&dirent_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ /*
+ * If we're deleting a subvolume, we need to really delete the
+ * dirent, not just emit a whiteout in the current snapshot:
+ */
+ bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot);
+ ret = bch2_btree_iter_traverse(&dirent_iter);
+ if (ret)
+ goto err;
+ } else {
+ bch2_inode_nlink_dec(trans, inode_u);
+ }
+
+ if (inode_u->bi_dir == dirent_iter.pos.inode &&
+ inode_u->bi_dir_offset == dirent_iter.pos.offset) {
+ inode_u->bi_dir = 0;
+ inode_u->bi_dir_offset = 0;
+ }
+
+ dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
+ dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
+
+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+ &dir_hash, &dirent_iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_inode_write(trans, &dir_iter, dir_u) ?:
+ bch2_inode_write(trans, &inode_iter, inode_u);
+err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &dirent_iter);
+ bch2_trans_iter_exit(trans, &dir_iter);
+ return ret;
+}
+
+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
+ struct bch_inode_unpacked *src_u)
+{
+ u64 src, dst;
+ unsigned id;
+ bool ret = false;
+
+ for (id = 0; id < Inode_opt_nr; id++) {
+ /* Skip attributes that were explicitly set on this inode */
+ if (dst_u->bi_fields_set & (1 << id))
+ continue;
+
+ src = bch2_inode_opt_get(src_u, id);
+ dst = bch2_inode_opt_get(dst_u, id);
+
+ if (src == dst)
+ continue;
+
+ bch2_inode_opt_set(dst_u, id, src);
+ ret = true;
+ }
+
+ return ret;
+}
+
+int bch2_rename_trans(struct btree_trans *trans,
+ subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
+ subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
+ struct bch_inode_unpacked *src_inode_u,
+ struct bch_inode_unpacked *dst_inode_u,
+ const struct qstr *src_name,
+ const struct qstr *dst_name,
+ enum bch_rename_mode mode)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter src_dir_iter = { NULL };
+ struct btree_iter dst_dir_iter = { NULL };
+ struct btree_iter src_inode_iter = { NULL };
+ struct btree_iter dst_inode_iter = { NULL };
+ struct bch_hash_info src_hash, dst_hash;
+ subvol_inum src_inum, dst_inum;
+ u64 src_offset, dst_offset;
+ u64 now = bch2_current_time(c);
+ int ret;
+
+ ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ src_hash = bch2_hash_info_init(c, src_dir_u);
+
+ if (dst_dir.inum != src_dir.inum ||
+ dst_dir.subvol != src_dir.subvol) {
+ ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ dst_hash = bch2_hash_info_init(c, dst_dir_u);
+ } else {
+ dst_dir_u = src_dir_u;
+ dst_hash = src_hash;
+ }
+
+ ret = bch2_dirent_rename(trans,
+ src_dir, &src_hash,
+ dst_dir, &dst_hash,
+ src_name, &src_inum, &src_offset,
+ dst_name, &dst_inum, &dst_offset,
+ mode);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+
+ if (dst_inum.inum) {
+ ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto err;
+ }
+
+ src_inode_u->bi_dir = dst_dir_u->bi_inum;
+ src_inode_u->bi_dir_offset = dst_offset;
+
+ if (mode == BCH_RENAME_EXCHANGE) {
+ dst_inode_u->bi_dir = src_dir_u->bi_inum;
+ dst_inode_u->bi_dir_offset = src_offset;
+ }
+
+ if (mode == BCH_RENAME_OVERWRITE &&
+ dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
+ dst_inode_u->bi_dir_offset == src_offset) {
+ dst_inode_u->bi_dir = 0;
+ dst_inode_u->bi_dir_offset = 0;
+ }
+
+ if (mode == BCH_RENAME_OVERWRITE) {
+ if (S_ISDIR(src_inode_u->bi_mode) !=
+ S_ISDIR(dst_inode_u->bi_mode)) {
+ ret = -ENOTDIR;
+ goto err;
+ }
+
+ if (S_ISDIR(dst_inode_u->bi_mode) &&
+ bch2_empty_dir_trans(trans, dst_inum)) {
+ ret = -ENOTEMPTY;
+ goto err;
+ }
+ }
+
+ if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
+ S_ISDIR(src_inode_u->bi_mode)) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
+ S_ISDIR(dst_inode_u->bi_mode)) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ if (is_subdir_for_nlink(src_inode_u)) {
+ src_dir_u->bi_nlink--;
+ dst_dir_u->bi_nlink++;
+ }
+
+ if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
+ dst_dir_u->bi_nlink--;
+ src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
+ }
+
+ if (mode == BCH_RENAME_OVERWRITE)
+ bch2_inode_nlink_dec(trans, dst_inode_u);
+
+ src_dir_u->bi_mtime = now;
+ src_dir_u->bi_ctime = now;
+
+ if (src_dir.inum != dst_dir.inum) {
+ dst_dir_u->bi_mtime = now;
+ dst_dir_u->bi_ctime = now;
+ }
+
+ src_inode_u->bi_ctime = now;
+
+ if (dst_inum.inum)
+ dst_inode_u->bi_ctime = now;
+
+ ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
+ (src_dir.inum != dst_dir.inum
+ ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
+ : 0) ?:
+ bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
+ (dst_inum.inum
+ ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
+ : 0);
+err:
+ bch2_trans_iter_exit(trans, &dst_inode_iter);
+ bch2_trans_iter_exit(trans, &src_inode_iter);
+ bch2_trans_iter_exit(trans, &dst_dir_iter);
+ bch2_trans_iter_exit(trans, &src_dir_iter);
+ return ret;
+}
diff --git a/c_src/libbcachefs/fs-common.h b/c_src/libbcachefs/fs-common.h
new file mode 100644
index 00000000..dde23785
--- /dev/null
+++ b/c_src/libbcachefs/fs-common.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_COMMON_H
+#define _BCACHEFS_FS_COMMON_H
+
+struct posix_acl;
+
+#define BCH_CREATE_TMPFILE (1U << 0)
+#define BCH_CREATE_SUBVOL (1U << 1)
+#define BCH_CREATE_SNAPSHOT (1U << 2)
+#define BCH_CREATE_SNAPSHOT_RO (1U << 3)
+
+int bch2_create_trans(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *,
+ struct bch_inode_unpacked *,
+ const struct qstr *,
+ uid_t, gid_t, umode_t, dev_t,
+ struct posix_acl *,
+ struct posix_acl *,
+ subvol_inum, unsigned);
+
+int bch2_link_trans(struct btree_trans *,
+ subvol_inum, struct bch_inode_unpacked *,
+ subvol_inum, struct bch_inode_unpacked *,
+ const struct qstr *);
+
+int bch2_unlink_trans(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *,
+ struct bch_inode_unpacked *,
+ const struct qstr *, bool);
+
+int bch2_rename_trans(struct btree_trans *,
+ subvol_inum, struct bch_inode_unpacked *,
+ subvol_inum, struct bch_inode_unpacked *,
+ struct bch_inode_unpacked *,
+ struct bch_inode_unpacked *,
+ const struct qstr *,
+ const struct qstr *,
+ enum bch_rename_mode);
+
+bool bch2_reinherit_attrs(struct bch_inode_unpacked *,
+ struct bch_inode_unpacked *);
+
+#endif /* _BCACHEFS_FS_COMMON_H */
diff --git a/c_src/libbcachefs/fs-io-buffered.c b/c_src/libbcachefs/fs-io-buffered.c
new file mode 100644
index 00000000..73c12e56
--- /dev/null
+++ b/c_src/libbcachefs/fs-io-buffered.c
@@ -0,0 +1,1100 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "fs-io.h"
+#include "fs-io-buffered.h"
+#include "fs-io-direct.h"
+#include "fs-io-pagecache.h"
+#include "io_read.h"
+#include "io_write.h"
+
+#include <linux/backing-dev.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+
+static inline bool bio_full(struct bio *bio, unsigned len)
+{
+ if (bio->bi_vcnt >= bio->bi_max_vecs)
+ return true;
+ if (bio->bi_iter.bi_size > UINT_MAX - len)
+ return true;
+ return false;
+}
+
+/* readpage(s): */
+
+static void bch2_readpages_end_io(struct bio *bio)
+{
+ struct folio_iter fi;
+
+ bio_for_each_folio_all(fi, bio) {
+ if (!bio->bi_status) {
+ folio_mark_uptodate(fi.folio);
+ } else {
+ folio_clear_uptodate(fi.folio);
+ folio_set_error(fi.folio);
+ }
+ folio_unlock(fi.folio);
+ }
+
+ bio_put(bio);
+}
+
+struct readpages_iter {
+ struct address_space *mapping;
+ unsigned idx;
+ folios folios;
+};
+
+static int readpages_iter_init(struct readpages_iter *iter,
+ struct readahead_control *ractl)
+{
+ struct folio *folio;
+
+ *iter = (struct readpages_iter) { ractl->mapping };
+
+ while ((folio = __readahead_folio(ractl))) {
+ if (!bch2_folio_create(folio, GFP_KERNEL) ||
+ darray_push(&iter->folios, folio)) {
+ bch2_folio_release(folio);
+ ractl->_nr_pages += folio_nr_pages(folio);
+ ractl->_index -= folio_nr_pages(folio);
+ return iter->folios.nr ? 0 : -ENOMEM;
+ }
+
+ folio_put(folio);
+ }
+
+ return 0;
+}
+
+static inline struct folio *readpage_iter_peek(struct readpages_iter *iter)
+{
+ if (iter->idx >= iter->folios.nr)
+ return NULL;
+ return iter->folios.data[iter->idx];
+}
+
+static inline void readpage_iter_advance(struct readpages_iter *iter)
+{
+ iter->idx++;
+}
+
+static bool extent_partial_reads_expensive(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *i;
+
+ bkey_for_each_crc(k.k, ptrs, crc, i)
+ if (crc.csum_type || crc.compression_type)
+ return true;
+ return false;
+}
+
+static int readpage_bio_extend(struct btree_trans *trans,
+ struct readpages_iter *iter,
+ struct bio *bio,
+ unsigned sectors_this_extent,
+ bool get_more)
+{
+ /* Don't hold btree locks while allocating memory: */
+ bch2_trans_unlock(trans);
+
+ while (bio_sectors(bio) < sectors_this_extent &&
+ bio->bi_vcnt < bio->bi_max_vecs) {
+ struct folio *folio = readpage_iter_peek(iter);
+ int ret;
+
+ if (folio) {
+ readpage_iter_advance(iter);
+ } else {
+ pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT;
+
+ if (!get_more)
+ break;
+
+ folio = xa_load(&iter->mapping->i_pages, folio_offset);
+ if (folio && !xa_is_value(folio))
+ break;
+
+ folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0);
+ if (!folio)
+ break;
+
+ if (!__bch2_folio_create(folio, GFP_KERNEL)) {
+ folio_put(folio);
+ break;
+ }
+
+ ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL);
+ if (ret) {
+ __bch2_folio_release(folio);
+ folio_put(folio);
+ break;
+ }
+
+ folio_put(folio);
+ }
+
+ BUG_ON(folio_sector(folio) != bio_end_sector(bio));
+
+ BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0));
+ }
+
+ return bch2_trans_relock(trans);
+}
+
+static void bchfs_read(struct btree_trans *trans,
+ struct bch_read_bio *rbio,
+ subvol_inum inum,
+ struct readpages_iter *readpages_iter)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_buf sk;
+ int flags = BCH_READ_RETRY_IF_STALE|
+ BCH_READ_MAY_PROMOTE;
+ u32 snapshot;
+ int ret = 0;
+
+ rbio->c = c;
+ rbio->start_time = local_clock();
+ rbio->subvol = inum.subvol;
+
+ bch2_bkey_buf_init(&sk);
+retry:
+ bch2_trans_begin(trans);
+ iter = (struct btree_iter) { NULL };
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot),
+ BTREE_ITER_SLOTS);
+ while (1) {
+ struct bkey_s_c k;
+ unsigned bytes, sectors, offset_into_extent;
+ enum btree_id data_btree = BTREE_ID_extents;
+
+ /*
+ * read_extent -> io_time_reset may cause a transaction restart
+ * without returning an error, we need to check for that here:
+ */
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ break;
+
+ bch2_btree_iter_set_pos(&iter,
+ POS(inum.inum, rbio->bio.bi_iter.bi_sector));
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ offset_into_extent = iter.pos.offset -
+ bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ bch2_bkey_buf_reassemble(&sk, c, k);
+
+ ret = bch2_read_indirect_extent(trans, &data_btree,
+ &offset_into_extent, &sk);
+ if (ret)
+ break;
+
+ k = bkey_i_to_s_c(sk.k);
+
+ sectors = min(sectors, k.k->size - offset_into_extent);
+
+ if (readpages_iter) {
+ ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors,
+ extent_partial_reads_expensive(k));
+ if (ret)
+ break;
+ }
+
+ bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
+ swap(rbio->bio.bi_iter.bi_size, bytes);
+
+ if (rbio->bio.bi_iter.bi_size == bytes)
+ flags |= BCH_READ_LAST_FRAGMENT;
+
+ bch2_bio_page_state_set(&rbio->bio, k);
+
+ bch2_read_extent(trans, rbio, iter.pos,
+ data_btree, k, offset_into_extent, flags);
+
+ if (flags & BCH_READ_LAST_FRAGMENT)
+ break;
+
+ swap(rbio->bio.bi_iter.bi_size, bytes);
+ bio_advance(&rbio->bio, bytes);
+
+ ret = btree_trans_too_many_iters(trans);
+ if (ret)
+ break;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ if (ret) {
+ bch_err_inum_offset_ratelimited(c,
+ iter.pos.inode,
+ iter.pos.offset << 9,
+ "read error %i from btree lookup", ret);
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ bio_endio(&rbio->bio);
+ }
+
+ bch2_bkey_buf_exit(&sk, c);
+}
+
+void bch2_readahead(struct readahead_control *ractl)
+{
+ struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_io_opts opts;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct folio *folio;
+ struct readpages_iter readpages_iter;
+
+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+
+ int ret = readpages_iter_init(&readpages_iter, ractl);
+ if (ret)
+ return;
+
+ bch2_pagecache_add_get(inode);
+
+ while ((folio = readpage_iter_peek(&readpages_iter))) {
+ unsigned n = min_t(unsigned,
+ readpages_iter.folios.nr -
+ readpages_iter.idx,
+ BIO_MAX_VECS);
+ struct bch_read_bio *rbio =
+ rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ,
+ GFP_KERNEL, &c->bio_read),
+ opts);
+
+ readpage_iter_advance(&readpages_iter);
+
+ rbio->bio.bi_iter.bi_sector = folio_sector(folio);
+ rbio->bio.bi_end_io = bch2_readpages_end_io;
+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
+
+ bchfs_read(trans, rbio, inode_inum(inode),
+ &readpages_iter);
+ bch2_trans_unlock(trans);
+ }
+
+ bch2_pagecache_add_put(inode);
+
+ bch2_trans_put(trans);
+ darray_exit(&readpages_iter.folios);
+}
+
+static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
+ subvol_inum inum, struct folio *folio)
+{
+ bch2_folio_create(folio, __GFP_NOFAIL);
+
+ rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
+ rbio->bio.bi_iter.bi_sector = folio_sector(folio);
+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
+
+ bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
+}
+
+static void bch2_read_single_folio_end_io(struct bio *bio)
+{
+ complete(bio->bi_private);
+}
+
+int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
+{
+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_read_bio *rbio;
+ struct bch_io_opts opts;
+ int ret;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+
+ rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read),
+ opts);
+ rbio->bio.bi_private = &done;
+ rbio->bio.bi_end_io = bch2_read_single_folio_end_io;
+
+ __bchfs_readfolio(c, rbio, inode_inum(inode), folio);
+ wait_for_completion(&done);
+
+ ret = blk_status_to_errno(rbio->bio.bi_status);
+ bio_put(&rbio->bio);
+
+ if (ret < 0)
+ return ret;
+
+ folio_mark_uptodate(folio);
+ return 0;
+}
+
+int bch2_read_folio(struct file *file, struct folio *folio)
+{
+ int ret;
+
+ ret = bch2_read_single_folio(folio, folio->mapping);
+ folio_unlock(folio);
+ return bch2_err_class(ret);
+}
+
+/* writepages: */
+
+struct bch_writepage_io {
+ struct bch_inode_info *inode;
+
+ /* must be last: */
+ struct bch_write_op op;
+};
+
+struct bch_writepage_state {
+ struct bch_writepage_io *io;
+ struct bch_io_opts opts;
+ struct bch_folio_sector *tmp;
+ unsigned tmp_sectors;
+};
+
+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c,
+ struct bch_inode_info *inode)
+{
+ struct bch_writepage_state ret = { 0 };
+
+ bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode);
+ return ret;
+}
+
+/*
+ * Determine when a writepage io is full. We have to limit writepage bios to a
+ * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to
+ * what the bounce path in bch2_write_extent() can handle. In theory we could
+ * loosen this restriction for non-bounce I/O, but we don't have that context
+ * here. Ideally, we can up this limit and make it configurable in the future
+ * when the bounce path can be enhanced to accommodate larger source bios.
+ */
+static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len)
+{
+ struct bio *bio = &io->op.wbio.bio;
+ return bio_full(bio, len) ||
+ (bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE);
+}
+
+static void bch2_writepage_io_done(struct bch_write_op *op)
+{
+ struct bch_writepage_io *io =
+ container_of(op, struct bch_writepage_io, op);
+ struct bch_fs *c = io->op.c;
+ struct bio *bio = &io->op.wbio.bio;
+ struct folio_iter fi;
+ unsigned i;
+
+ if (io->op.error) {
+ set_bit(EI_INODE_ERROR, &io->inode->ei_flags);
+
+ bio_for_each_folio_all(fi, bio) {
+ struct bch_folio *s;
+
+ folio_set_error(fi.folio);
+ mapping_set_error(fi.folio->mapping, -EIO);
+
+ s = __bch2_folio(fi.folio);
+ spin_lock(&s->lock);
+ for (i = 0; i < folio_sectors(fi.folio); i++)
+ s->s[i].nr_replicas = 0;
+ spin_unlock(&s->lock);
+ }
+ }
+
+ if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) {
+ bio_for_each_folio_all(fi, bio) {
+ struct bch_folio *s;
+
+ s = __bch2_folio(fi.folio);
+ spin_lock(&s->lock);
+ for (i = 0; i < folio_sectors(fi.folio); i++)
+ s->s[i].nr_replicas = 0;
+ spin_unlock(&s->lock);
+ }
+ }
+
+ /*
+ * racing with fallocate can cause us to add fewer sectors than
+ * expected - but we shouldn't add more sectors than expected:
+ */
+ WARN_ON_ONCE(io->op.i_sectors_delta > 0);
+
+ /*
+ * (error (due to going RO) halfway through a page can screw that up
+ * slightly)
+ * XXX wtf?
+ BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
+ */
+
+ /*
+ * PageWriteback is effectively our ref on the inode - fixup i_blocks
+ * before calling end_page_writeback:
+ */
+ bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
+
+ bio_for_each_folio_all(fi, bio) {
+ struct bch_folio *s = __bch2_folio(fi.folio);
+
+ if (atomic_dec_and_test(&s->write_count))
+ folio_end_writeback(fi.folio);
+ }
+
+ bio_put(&io->op.wbio.bio);
+}
+
+static void bch2_writepage_do_io(struct bch_writepage_state *w)
+{
+ struct bch_writepage_io *io = w->io;
+
+ w->io = NULL;
+ closure_call(&io->op.cl, bch2_write, NULL, NULL);
+}
+
+/*
+ * Get a bch_writepage_io and add @page to it - appending to an existing one if
+ * possible, else allocating a new one:
+ */
+static void bch2_writepage_io_alloc(struct bch_fs *c,
+ struct writeback_control *wbc,
+ struct bch_writepage_state *w,
+ struct bch_inode_info *inode,
+ u64 sector,
+ unsigned nr_replicas)
+{
+ struct bch_write_op *op;
+
+ w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS,
+ REQ_OP_WRITE,
+ GFP_KERNEL,
+ &c->writepage_bioset),
+ struct bch_writepage_io, op.wbio.bio);
+
+ w->io->inode = inode;
+ op = &w->io->op;
+ bch2_write_op_init(op, c, w->opts);
+ op->target = w->opts.foreground_target;
+ op->nr_replicas = nr_replicas;
+ op->res.nr_replicas = nr_replicas;
+ op->write_point = writepoint_hashed(inode->ei_last_dirtied);
+ op->subvol = inode->ei_subvol;
+ op->pos = POS(inode->v.i_ino, sector);
+ op->end_io = bch2_writepage_io_done;
+ op->devs_need_flush = &inode->ei_devs_need_flush;
+ op->wbio.bio.bi_iter.bi_sector = sector;
+ op->wbio.bio.bi_opf = wbc_to_write_flags(wbc);
+}
+
+static int __bch2_writepage(struct folio *folio,
+ struct writeback_control *wbc,
+ void *data)
+{
+ struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_writepage_state *w = data;
+ struct bch_folio *s;
+ unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX;
+ loff_t i_size = i_size_read(&inode->v);
+ int ret;
+
+ EBUG_ON(!folio_test_uptodate(folio));
+
+ /* Is the folio fully inside i_size? */
+ if (folio_end_pos(folio) <= i_size)
+ goto do_io;
+
+ /* Is the folio fully outside i_size? (truncate in progress) */
+ if (folio_pos(folio) >= i_size) {
+ folio_unlock(folio);
+ return 0;
+ }
+
+ /*
+ * The folio straddles i_size. It must be zeroed out on each and every
+ * writepage invocation because it may be mmapped. "A file is mapped
+ * in multiples of the folio size. For a file that is not a multiple of
+ * the folio size, the remaining memory is zeroed when mapped, and
+ * writes to that region are not written out to the file."
+ */
+ folio_zero_segment(folio,
+ i_size - folio_pos(folio),
+ folio_size(folio));
+do_io:
+ f_sectors = folio_sectors(folio);
+ s = bch2_folio(folio);
+
+ if (f_sectors > w->tmp_sectors) {
+ kfree(w->tmp);
+ w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL);
+ w->tmp_sectors = f_sectors;
+ }
+
+ /*
+ * Things get really hairy with errors during writeback:
+ */
+ ret = bch2_get_folio_disk_reservation(c, inode, folio, false);
+ BUG_ON(ret);
+
+ /* Before unlocking the page, get copy of reservations: */
+ spin_lock(&s->lock);
+ memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors);
+
+ for (i = 0; i < f_sectors; i++) {
+ if (s->s[i].state < SECTOR_dirty)
+ continue;
+
+ nr_replicas_this_write =
+ min_t(unsigned, nr_replicas_this_write,
+ s->s[i].nr_replicas +
+ s->s[i].replicas_reserved);
+ }
+
+ for (i = 0; i < f_sectors; i++) {
+ if (s->s[i].state < SECTOR_dirty)
+ continue;
+
+ s->s[i].nr_replicas = w->opts.compression
+ ? 0 : nr_replicas_this_write;
+
+ s->s[i].replicas_reserved = 0;
+ bch2_folio_sector_set(folio, s, i, SECTOR_allocated);
+ }
+ spin_unlock(&s->lock);
+
+ BUG_ON(atomic_read(&s->write_count));
+ atomic_set(&s->write_count, 1);
+
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
+
+ folio_unlock(folio);
+
+ offset = 0;
+ while (1) {
+ unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0;
+ u64 sector;
+
+ while (offset < f_sectors &&
+ w->tmp[offset].state < SECTOR_dirty)
+ offset++;
+
+ if (offset == f_sectors)
+ break;
+
+ while (offset + sectors < f_sectors &&
+ w->tmp[offset + sectors].state >= SECTOR_dirty) {
+ reserved_sectors += w->tmp[offset + sectors].replicas_reserved;
+ dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty;
+ sectors++;
+ }
+ BUG_ON(!sectors);
+
+ sector = folio_sector(folio) + offset;
+
+ if (w->io &&
+ (w->io->op.res.nr_replicas != nr_replicas_this_write ||
+ bch_io_full(w->io, sectors << 9) ||
+ bio_end_sector(&w->io->op.wbio.bio) != sector))
+ bch2_writepage_do_io(w);
+
+ if (!w->io)
+ bch2_writepage_io_alloc(c, wbc, w, inode, sector,
+ nr_replicas_this_write);
+
+ atomic_inc(&s->write_count);
+
+ BUG_ON(inode != w->io->inode);
+ BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio,
+ sectors << 9, offset << 9));
+
+ /* Check for writing past i_size: */
+ WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) >
+ round_up(i_size, block_bytes(c)) &&
+ !test_bit(BCH_FS_emergency_ro, &c->flags),
+ "writing past i_size: %llu > %llu (unrounded %llu)\n",
+ bio_end_sector(&w->io->op.wbio.bio) << 9,
+ round_up(i_size, block_bytes(c)),
+ i_size);
+
+ w->io->op.res.sectors += reserved_sectors;
+ w->io->op.i_sectors_delta -= dirty_sectors;
+ w->io->op.new_i_size = i_size;
+
+ offset += sectors;
+ }
+
+ if (atomic_dec_and_test(&s->write_count))
+ folio_end_writeback(folio);
+
+ return 0;
+}
+
+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+ struct bch_fs *c = mapping->host->i_sb->s_fs_info;
+ struct bch_writepage_state w =
+ bch_writepage_state_init(c, to_bch_ei(mapping->host));
+ struct blk_plug plug;
+ int ret;
+
+ blk_start_plug(&plug);
+ ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w);
+ if (w.io)
+ bch2_writepage_do_io(&w);
+ blk_finish_plug(&plug);
+ kfree(w.tmp);
+ return bch2_err_class(ret);
+}
+
+/* buffered writes: */
+
+int bch2_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len,
+ struct page **pagep, void **fsdata)
+{
+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_folio_reservation *res;
+ struct folio *folio;
+ unsigned offset;
+ int ret = -ENOMEM;
+
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ bch2_folio_reservation_init(c, inode, res);
+ *fsdata = res;
+
+ bch2_pagecache_add_get(inode);
+
+ folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT,
+ FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR_OR_NULL(folio))
+ goto err_unlock;
+
+ offset = pos - folio_pos(folio);
+ len = min_t(size_t, len, folio_end_pos(folio) - pos);
+
+ if (folio_test_uptodate(folio))
+ goto out;
+
+ /* If we're writing entire folio, don't need to read it in first: */
+ if (!offset && len == folio_size(folio))
+ goto out;
+
+ if (!offset && pos + len >= inode->v.i_size) {
+ folio_zero_segment(folio, len, folio_size(folio));
+ flush_dcache_folio(folio);
+ goto out;
+ }
+
+ if (folio_pos(folio) >= inode->v.i_size) {
+ folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio));
+ flush_dcache_folio(folio);
+ goto out;
+ }
+readpage:
+ ret = bch2_read_single_folio(folio, mapping);
+ if (ret)
+ goto err;
+out:
+ ret = bch2_folio_set(c, inode_inum(inode), &folio, 1);
+ if (ret)
+ goto err;
+
+ ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len);
+ if (ret) {
+ if (!folio_test_uptodate(folio)) {
+ /*
+ * If the folio hasn't been read in, we won't know if we
+ * actually need a reservation - we don't actually need
+ * to read here, we just need to check if the folio is
+ * fully backed by uncompressed data:
+ */
+ goto readpage;
+ }
+
+ goto err;
+ }
+
+ *pagep = &folio->page;
+ return 0;
+err:
+ folio_unlock(folio);
+ folio_put(folio);
+ *pagep = NULL;
+err_unlock:
+ bch2_pagecache_add_put(inode);
+ kfree(res);
+ *fsdata = NULL;
+ return bch2_err_class(ret);
+}
+
+int bch2_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct bch_inode_info *inode = to_bch_ei(mapping->host);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_folio_reservation *res = fsdata;
+ struct folio *folio = page_folio(page);
+ unsigned offset = pos - folio_pos(folio);
+
+ lockdep_assert_held(&inode->v.i_rwsem);
+ BUG_ON(offset + copied > folio_size(folio));
+
+ if (unlikely(copied < len && !folio_test_uptodate(folio))) {
+ /*
+ * The folio needs to be read in, but that would destroy
+ * our partial write - simplest thing is to just force
+ * userspace to redo the write:
+ */
+ folio_zero_range(folio, 0, folio_size(folio));
+ flush_dcache_folio(folio);
+ copied = 0;
+ }
+
+ spin_lock(&inode->v.i_lock);
+ if (pos + copied > inode->v.i_size)
+ i_size_write(&inode->v, pos + copied);
+ spin_unlock(&inode->v.i_lock);
+
+ if (copied) {
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
+
+ bch2_set_folio_dirty(c, inode, folio, res, offset, copied);
+
+ inode->ei_last_dirtied = (unsigned long) current;
+ }
+
+ folio_unlock(folio);
+ folio_put(folio);
+ bch2_pagecache_add_put(inode);
+
+ bch2_folio_reservation_put(c, inode, res);
+ kfree(res);
+
+ return copied;
+}
+
+static noinline void folios_trunc(folios *fs, struct folio **fi)
+{
+ while (fs->data + fs->nr > fi) {
+ struct folio *f = darray_pop(fs);
+
+ folio_unlock(f);
+ folio_put(f);
+ }
+}
+
+static int __bch2_buffered_write(struct bch_inode_info *inode,
+ struct address_space *mapping,
+ struct iov_iter *iter,
+ loff_t pos, unsigned len)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_folio_reservation res;
+ folios fs;
+ struct folio *f;
+ unsigned copied = 0, f_offset, f_copied;
+ u64 end = pos + len, f_pos, f_len;
+ loff_t last_folio_pos = inode->v.i_size;
+ int ret = 0;
+
+ BUG_ON(!len);
+
+ bch2_folio_reservation_init(c, inode, &res);
+ darray_init(&fs);
+
+ ret = bch2_filemap_get_contig_folios_d(mapping, pos, end,
+ FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT,
+ mapping_gfp_mask(mapping),
+ &fs);
+ if (ret)
+ goto out;
+
+ BUG_ON(!fs.nr);
+
+ f = darray_first(fs);
+ if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
+ ret = bch2_read_single_folio(f, mapping);
+ if (ret)
+ goto out;
+ }
+
+ f = darray_last(fs);
+ end = min(end, folio_end_pos(f));
+ last_folio_pos = folio_pos(f);
+ if (end != folio_end_pos(f) && !folio_test_uptodate(f)) {
+ if (end >= inode->v.i_size) {
+ folio_zero_range(f, 0, folio_size(f));
+ } else {
+ ret = bch2_read_single_folio(f, mapping);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr);
+ if (ret)
+ goto out;
+
+ f_pos = pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
+
+ /*
+ * XXX: per POSIX and fstests generic/275, on -ENOSPC we're
+ * supposed to write as much as we have disk space for.
+ *
+ * On failure here we should still write out a partial page if
+ * we aren't completely out of disk space - we don't do that
+ * yet:
+ */
+ ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
+ if (unlikely(ret)) {
+ folios_trunc(&fs, fi);
+ if (!fs.nr)
+ goto out;
+
+ end = min(end, folio_end_pos(darray_last(fs)));
+ break;
+ }
+
+ f_pos = folio_end_pos(f);
+ f_offset = 0;
+ }
+
+ if (mapping_writably_mapped(mapping))
+ darray_for_each(fs, fi)
+ flush_dcache_folio(*fi);
+
+ f_pos = pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
+ f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
+ if (!f_copied) {
+ folios_trunc(&fs, fi);
+ break;
+ }
+
+ if (!folio_test_uptodate(f) &&
+ f_copied != folio_size(f) &&
+ pos + copied + f_copied < inode->v.i_size) {
+ iov_iter_revert(iter, f_copied);
+ folio_zero_range(f, 0, folio_size(f));
+ folios_trunc(&fs, fi);
+ break;
+ }
+
+ flush_dcache_folio(f);
+ copied += f_copied;
+
+ if (f_copied != f_len) {
+ folios_trunc(&fs, fi + 1);
+ break;
+ }
+
+ f_pos = folio_end_pos(f);
+ f_offset = 0;
+ }
+
+ if (!copied)
+ goto out;
+
+ end = pos + copied;
+
+ spin_lock(&inode->v.i_lock);
+ if (end > inode->v.i_size)
+ i_size_write(&inode->v, end);
+ spin_unlock(&inode->v.i_lock);
+
+ f_pos = pos;
+ f_offset = pos - folio_pos(darray_first(fs));
+ darray_for_each(fs, fi) {
+ f = *fi;
+ f_len = min(end, folio_end_pos(f)) - f_pos;
+
+ if (!folio_test_uptodate(f))
+ folio_mark_uptodate(f);
+
+ bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len);
+
+ f_pos = folio_end_pos(f);
+ f_offset = 0;
+ }
+
+ inode->ei_last_dirtied = (unsigned long) current;
+out:
+ darray_for_each(fs, fi) {
+ folio_unlock(*fi);
+ folio_put(*fi);
+ }
+
+ /*
+ * If the last folio added to the mapping starts beyond current EOF, we
+ * performed a short write but left around at least one post-EOF folio.
+ * Clean up the mapping before we return.
+ */
+ if (last_folio_pos >= inode->v.i_size)
+ truncate_pagecache(&inode->v, inode->v.i_size);
+
+ darray_exit(&fs);
+ bch2_folio_reservation_put(c, inode, &res);
+
+ return copied ?: ret;
+}
+
+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
+ int ret = 0;
+
+ bch2_pagecache_add_get(inode);
+
+ do {
+ unsigned offset = pos & (PAGE_SIZE - 1);
+ unsigned bytes = iov_iter_count(iter);
+again:
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ *
+ * Not only is this an optimisation, but it is also required
+ * to check that the address is actually valid, when atomic
+ * usercopies are used, below.
+ */
+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
+ bytes = min_t(unsigned long, iov_iter_count(iter),
+ PAGE_SIZE - offset);
+
+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ if (unlikely(fatal_signal_pending(current))) {
+ ret = -EINTR;
+ break;
+ }
+
+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
+ if (unlikely(ret < 0))
+ break;
+
+ cond_resched();
+
+ if (unlikely(ret == 0)) {
+ /*
+ * If we were unable to copy any data at all, we must
+ * fall back to a single segment length write.
+ *
+ * If we didn't fallback here, we could livelock
+ * because not all segments in the iov can be copied at
+ * once without a pagefault.
+ */
+ bytes = min_t(unsigned long, PAGE_SIZE - offset,
+ iov_iter_single_seg_count(iter));
+ goto again;
+ }
+ pos += ret;
+ written += ret;
+ ret = 0;
+
+ balance_dirty_pages_ratelimited(mapping);
+ } while (iov_iter_count(iter));
+
+ bch2_pagecache_add_put(inode);
+
+ return written ? written : ret;
+}
+
+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = bch2_direct_write(iocb, from);
+ goto out;
+ }
+
+ inode_lock(&inode->v);
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto unlock;
+
+ ret = file_remove_privs(file);
+ if (ret)
+ goto unlock;
+
+ ret = file_update_time(file);
+ if (ret)
+ goto unlock;
+
+ ret = bch2_buffered_write(iocb, from);
+ if (likely(ret > 0))
+ iocb->ki_pos += ret;
+unlock:
+ inode_unlock(&inode->v);
+
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+out:
+ return bch2_err_class(ret);
+}
+
+void bch2_fs_fs_io_buffered_exit(struct bch_fs *c)
+{
+ bioset_exit(&c->writepage_bioset);
+}
+
+int bch2_fs_fs_io_buffered_init(struct bch_fs *c)
+{
+ if (bioset_init(&c->writepage_bioset,
+ 4, offsetof(struct bch_writepage_io, op.wbio.bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_writepage_bioset_init;
+
+ return 0;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/fs-io-buffered.h b/c_src/libbcachefs/fs-io-buffered.h
new file mode 100644
index 00000000..a6126ff7
--- /dev/null
+++ b/c_src/libbcachefs/fs-io-buffered.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IO_BUFFERED_H
+#define _BCACHEFS_FS_IO_BUFFERED_H
+
+#ifndef NO_BCACHEFS_FS
+
+int bch2_read_single_folio(struct folio *, struct address_space *);
+int bch2_read_folio(struct file *, struct folio *);
+
+int bch2_writepages(struct address_space *, struct writeback_control *);
+void bch2_readahead(struct readahead_control *);
+
+int bch2_write_begin(struct file *, struct address_space *, loff_t,
+ unsigned, struct page **, void **);
+int bch2_write_end(struct file *, struct address_space *, loff_t,
+ unsigned, unsigned, struct page *, void *);
+
+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
+
+void bch2_fs_fs_io_buffered_exit(struct bch_fs *);
+int bch2_fs_fs_io_buffered_init(struct bch_fs *);
+#else
+static inline void bch2_fs_fs_io_buffered_exit(struct bch_fs *c) {}
+static inline int bch2_fs_fs_io_buffered_init(struct bch_fs *c) { return 0; }
+#endif
+
+#endif /* _BCACHEFS_FS_IO_BUFFERED_H */
diff --git a/c_src/libbcachefs/fs-io-direct.c b/c_src/libbcachefs/fs-io-direct.c
new file mode 100644
index 00000000..fdd57c57
--- /dev/null
+++ b/c_src/libbcachefs/fs-io-direct.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "fs-io-direct.h"
+#include "fs-io-pagecache.h"
+#include "io_read.h"
+#include "io_write.h"
+
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/prefetch.h>
+#include <linux/task_io_accounting_ops.h>
+
+/* O_DIRECT reads */
+
+struct dio_read {
+ struct closure cl;
+ struct kiocb *req;
+ long ret;
+ bool should_dirty;
+ struct bch_read_bio rbio;
+};
+
+static void bio_check_or_release(struct bio *bio, bool check_dirty)
+{
+ if (check_dirty) {
+ bio_check_pages_dirty(bio);
+ } else {
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ }
+}
+
+static CLOSURE_CALLBACK(bch2_dio_read_complete)
+{
+ closure_type(dio, struct dio_read, cl);
+
+ dio->req->ki_complete(dio->req, dio->ret);
+ bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
+}
+
+static void bch2_direct_IO_read_endio(struct bio *bio)
+{
+ struct dio_read *dio = bio->bi_private;
+
+ if (bio->bi_status)
+ dio->ret = blk_status_to_errno(bio->bi_status);
+
+ closure_put(&dio->cl);
+}
+
+static void bch2_direct_IO_read_split_endio(struct bio *bio)
+{
+ struct dio_read *dio = bio->bi_private;
+ bool should_dirty = dio->should_dirty;
+
+ bch2_direct_IO_read_endio(bio);
+ bio_check_or_release(bio, should_dirty);
+}
+
+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
+{
+ struct file *file = req->ki_filp;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_io_opts opts;
+ struct dio_read *dio;
+ struct bio *bio;
+ loff_t offset = req->ki_pos;
+ bool sync = is_sync_kiocb(req);
+ size_t shorten;
+ ssize_t ret;
+
+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+
+ ret = min_t(loff_t, iter->count,
+ max_t(loff_t, 0, i_size_read(&inode->v) - offset));
+
+ if (!ret)
+ return ret;
+
+ shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
+ iter->count -= shorten;
+
+ bio = bio_alloc_bioset(NULL,
+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
+ REQ_OP_READ,
+ GFP_KERNEL,
+ &c->dio_read_bioset);
+
+ bio->bi_end_io = bch2_direct_IO_read_endio;
+
+ dio = container_of(bio, struct dio_read, rbio.bio);
+ closure_init(&dio->cl, NULL);
+
+ /*
+ * this is a _really_ horrible hack just to avoid an atomic sub at the
+ * end:
+ */
+ if (!sync) {
+ set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL);
+ atomic_set(&dio->cl.remaining,
+ CLOSURE_REMAINING_INITIALIZER -
+ CLOSURE_RUNNING +
+ CLOSURE_DESTRUCTOR);
+ } else {
+ atomic_set(&dio->cl.remaining,
+ CLOSURE_REMAINING_INITIALIZER + 1);
+ dio->cl.closure_get_happened = true;
+ }
+
+ dio->req = req;
+ dio->ret = ret;
+ /*
+ * This is one of the sketchier things I've encountered: we have to skip
+ * the dirtying of requests that are internal from the kernel (i.e. from
+ * loopback), because we'll deadlock on page_lock.
+ */
+ dio->should_dirty = iter_is_iovec(iter);
+
+ goto start;
+ while (iter->count) {
+ bio = bio_alloc_bioset(NULL,
+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
+ REQ_OP_READ,
+ GFP_KERNEL,
+ &c->bio_read);
+ bio->bi_end_io = bch2_direct_IO_read_split_endio;
+start:
+ bio->bi_opf = REQ_OP_READ|REQ_SYNC;
+ bio->bi_iter.bi_sector = offset >> 9;
+ bio->bi_private = dio;
+
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (ret < 0) {
+ /* XXX: fault inject this path */
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ break;
+ }
+
+ offset += bio->bi_iter.bi_size;
+
+ if (dio->should_dirty)
+ bio_set_pages_dirty(bio);
+
+ if (iter->count)
+ closure_get(&dio->cl);
+
+ bch2_read(c, rbio_init(bio, opts), inode_inum(inode));
+ }
+
+ iter->count += shorten;
+
+ if (sync) {
+ closure_sync(&dio->cl);
+ closure_debug_destroy(&dio->cl);
+ ret = dio->ret;
+ bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
+ return ret;
+ } else {
+ return -EIOCBQUEUED;
+ }
+}
+
+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct address_space *mapping = file->f_mapping;
+ size_t count = iov_iter_count(iter);
+ ssize_t ret;
+
+ if (!count)
+ return 0; /* skip atime */
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ struct blk_plug plug;
+
+ if (unlikely(mapping->nrpages)) {
+ ret = filemap_write_and_wait_range(mapping,
+ iocb->ki_pos,
+ iocb->ki_pos + count - 1);
+ if (ret < 0)
+ goto out;
+ }
+
+ file_accessed(file);
+
+ blk_start_plug(&plug);
+ ret = bch2_direct_IO_read(iocb, iter);
+ blk_finish_plug(&plug);
+
+ if (ret >= 0)
+ iocb->ki_pos += ret;
+ } else {
+ bch2_pagecache_add_get(inode);
+ ret = generic_file_read_iter(iocb, iter);
+ bch2_pagecache_add_put(inode);
+ }
+out:
+ return bch2_err_class(ret);
+}
+
+/* O_DIRECT writes */
+
+struct dio_write {
+ struct kiocb *req;
+ struct address_space *mapping;
+ struct bch_inode_info *inode;
+ struct mm_struct *mm;
+ const struct iovec *iov;
+ unsigned loop:1,
+ extending:1,
+ sync:1,
+ flush:1;
+ struct quota_res quota_res;
+ u64 written;
+
+ struct iov_iter iter;
+ struct iovec inline_vecs[2];
+
+ /* must be last: */
+ struct bch_write_op op;
+};
+
+static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum,
+ u64 offset, u64 size,
+ unsigned nr_replicas, bool compressed)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u64 end = offset + size;
+ u32 snapshot;
+ bool ret = true;
+ int err;
+retry:
+ bch2_trans_begin(trans);
+
+ err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (err)
+ goto err;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
+ SPOS(inum.inum, offset, snapshot),
+ BTREE_ITER_SLOTS, k, err) {
+ if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end)))
+ break;
+
+ if (k.k->p.snapshot != snapshot ||
+ nr_replicas > bch2_bkey_replicas(c, k) ||
+ (!compressed && bch2_bkey_sectors_compressed(k))) {
+ ret = false;
+ break;
+ }
+ }
+
+ offset = iter.pos.offset;
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(err, BCH_ERR_transaction_restart))
+ goto retry;
+ bch2_trans_put(trans);
+
+ return err ? false : ret;
+}
+
+static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio)
+{
+ struct bch_fs *c = dio->op.c;
+ struct bch_inode_info *inode = dio->inode;
+ struct bio *bio = &dio->op.wbio.bio;
+
+ return bch2_check_range_allocated(c, inode_inum(inode),
+ dio->op.pos.offset, bio_sectors(bio),
+ dio->op.opts.data_replicas,
+ dio->op.opts.compression != 0);
+}
+
+static void bch2_dio_write_loop_async(struct bch_write_op *);
+static __always_inline long bch2_dio_write_done(struct dio_write *dio);
+
+/*
+ * We're going to return -EIOCBQUEUED, but we haven't finished consuming the
+ * iov_iter yet, so we need to stash a copy of the iovec: it might be on the
+ * caller's stack, we're not guaranteed that it will live for the duration of
+ * the IO:
+ */
+static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
+{
+ struct iovec *iov = dio->inline_vecs;
+
+ /*
+ * iov_iter has a single embedded iovec - nothing to do:
+ */
+ if (iter_is_ubuf(&dio->iter))
+ return 0;
+
+ /*
+ * We don't currently handle non-iovec iov_iters here - return an error,
+ * and we'll fall back to doing the IO synchronously:
+ */
+ if (!iter_is_iovec(&dio->iter))
+ return -1;
+
+ if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
+ dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
+ GFP_KERNEL);
+ if (unlikely(!iov))
+ return -ENOMEM;
+ }
+
+ memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
+ dio->iter.__iov = iov;
+ return 0;
+}
+
+static CLOSURE_CALLBACK(bch2_dio_write_flush_done)
+{
+ closure_type(dio, struct dio_write, op.cl);
+ struct bch_fs *c = dio->op.c;
+
+ closure_debug_destroy(cl);
+
+ dio->op.error = bch2_journal_error(&c->journal);
+
+ bch2_dio_write_done(dio);
+}
+
+static noinline void bch2_dio_write_flush(struct dio_write *dio)
+{
+ struct bch_fs *c = dio->op.c;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ dio->flush = 0;
+
+ closure_init(&dio->op.cl, NULL);
+
+ if (!dio->op.error) {
+ ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode);
+ if (ret) {
+ dio->op.error = ret;
+ } else {
+ bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq,
+ &dio->op.cl);
+ bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl);
+ }
+ }
+
+ if (dio->sync) {
+ closure_sync(&dio->op.cl);
+ closure_debug_destroy(&dio->op.cl);
+ } else {
+ continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL);
+ }
+}
+
+static __always_inline long bch2_dio_write_done(struct dio_write *dio)
+{
+ struct kiocb *req = dio->req;
+ struct bch_inode_info *inode = dio->inode;
+ bool sync = dio->sync;
+ long ret;
+
+ if (unlikely(dio->flush)) {
+ bch2_dio_write_flush(dio);
+ if (!sync)
+ return -EIOCBQUEUED;
+ }
+
+ bch2_pagecache_block_put(inode);
+
+ kfree(dio->iov);
+
+ ret = dio->op.error ?: ((long) dio->written << 9);
+ bio_put(&dio->op.wbio.bio);
+
+ /* inode->i_dio_count is our ref on inode and thus bch_fs */
+ inode_dio_end(&inode->v);
+
+ if (ret < 0)
+ ret = bch2_err_class(ret);
+
+ if (!sync) {
+ req->ki_complete(req, ret);
+ ret = -EIOCBQUEUED;
+ }
+ return ret;
+}
+
+static __always_inline void bch2_dio_write_end(struct dio_write *dio)
+{
+ struct bch_fs *c = dio->op.c;
+ struct kiocb *req = dio->req;
+ struct bch_inode_info *inode = dio->inode;
+ struct bio *bio = &dio->op.wbio.bio;
+
+ req->ki_pos += (u64) dio->op.written << 9;
+ dio->written += dio->op.written;
+
+ if (dio->extending) {
+ spin_lock(&inode->v.i_lock);
+ if (req->ki_pos > inode->v.i_size)
+ i_size_write(&inode->v, req->ki_pos);
+ spin_unlock(&inode->v.i_lock);
+ }
+
+ if (dio->op.i_sectors_delta || dio->quota_res.sectors) {
+ mutex_lock(&inode->ei_quota_lock);
+ __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta);
+ __bch2_quota_reservation_put(c, inode, &dio->quota_res);
+ mutex_unlock(&inode->ei_quota_lock);
+ }
+
+ bio_release_pages(bio, false);
+
+ if (unlikely(dio->op.error))
+ set_bit(EI_INODE_ERROR, &inode->ei_flags);
+}
+
+static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
+{
+ struct bch_fs *c = dio->op.c;
+ struct kiocb *req = dio->req;
+ struct address_space *mapping = dio->mapping;
+ struct bch_inode_info *inode = dio->inode;
+ struct bch_io_opts opts;
+ struct bio *bio = &dio->op.wbio.bio;
+ unsigned unaligned, iter_count;
+ bool sync = dio->sync, dropped_locks;
+ long ret;
+
+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+
+ while (1) {
+ iter_count = dio->iter.count;
+
+ EBUG_ON(current->faults_disabled_mapping);
+ current->faults_disabled_mapping = mapping;
+
+ ret = bio_iov_iter_get_pages(bio, &dio->iter);
+
+ dropped_locks = fdm_dropped_locks();
+
+ current->faults_disabled_mapping = NULL;
+
+ /*
+ * If the fault handler returned an error but also signalled
+ * that it dropped & retook ei_pagecache_lock, we just need to
+ * re-shoot down the page cache and retry:
+ */
+ if (dropped_locks && ret)
+ ret = 0;
+
+ if (unlikely(ret < 0))
+ goto err;
+
+ if (unlikely(dropped_locks)) {
+ ret = bch2_write_invalidate_inode_pages_range(mapping,
+ req->ki_pos,
+ req->ki_pos + iter_count - 1);
+ if (unlikely(ret))
+ goto err;
+
+ if (!bio->bi_iter.bi_size)
+ continue;
+ }
+
+ unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
+ bio->bi_iter.bi_size -= unaligned;
+ iov_iter_revert(&dio->iter, unaligned);
+
+ if (!bio->bi_iter.bi_size) {
+ /*
+ * bio_iov_iter_get_pages was only able to get <
+ * blocksize worth of pages:
+ */
+ ret = -EFAULT;
+ goto err;
+ }
+
+ bch2_write_op_init(&dio->op, c, opts);
+ dio->op.end_io = sync
+ ? NULL
+ : bch2_dio_write_loop_async;
+ dio->op.target = dio->op.opts.foreground_target;
+ dio->op.write_point = writepoint_hashed((unsigned long) current);
+ dio->op.nr_replicas = dio->op.opts.data_replicas;
+ dio->op.subvol = inode->ei_subvol;
+ dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
+ dio->op.devs_need_flush = &inode->ei_devs_need_flush;
+
+ if (sync)
+ dio->op.flags |= BCH_WRITE_SYNC;
+ dio->op.flags |= BCH_WRITE_CHECK_ENOSPC;
+
+ ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
+ bio_sectors(bio), true);
+ if (unlikely(ret))
+ goto err;
+
+ ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
+ dio->op.opts.data_replicas, 0);
+ if (unlikely(ret) &&
+ !bch2_dio_write_check_allocated(dio))
+ goto err;
+
+ task_io_account_write(bio->bi_iter.bi_size);
+
+ if (unlikely(dio->iter.count) &&
+ !dio->sync &&
+ !dio->loop &&
+ bch2_dio_write_copy_iov(dio))
+ dio->sync = sync = true;
+
+ dio->loop = true;
+ closure_call(&dio->op.cl, bch2_write, NULL, NULL);
+
+ if (!sync)
+ return -EIOCBQUEUED;
+
+ bch2_dio_write_end(dio);
+
+ if (likely(!dio->iter.count) || dio->op.error)
+ break;
+
+ bio_reset(bio, NULL, REQ_OP_WRITE);
+ }
+out:
+ return bch2_dio_write_done(dio);
+err:
+ dio->op.error = ret;
+
+ bio_release_pages(bio, false);
+
+ bch2_quota_reservation_put(c, inode, &dio->quota_res);
+ goto out;
+}
+
+static noinline __cold void bch2_dio_write_continue(struct dio_write *dio)
+{
+ struct mm_struct *mm = dio->mm;
+
+ bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE);
+
+ if (mm)
+ kthread_use_mm(mm);
+ bch2_dio_write_loop(dio);
+ if (mm)
+ kthread_unuse_mm(mm);
+}
+
+static void bch2_dio_write_loop_async(struct bch_write_op *op)
+{
+ struct dio_write *dio = container_of(op, struct dio_write, op);
+
+ bch2_dio_write_end(dio);
+
+ if (likely(!dio->iter.count) || dio->op.error)
+ bch2_dio_write_done(dio);
+ else
+ bch2_dio_write_continue(dio);
+}
+
+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
+{
+ struct file *file = req->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct dio_write *dio;
+ struct bio *bio;
+ bool locked = true, extending;
+ ssize_t ret;
+
+ prefetch(&c->opts);
+ prefetch((void *) &c->opts + 64);
+ prefetch(&inode->ei_inode);
+ prefetch((void *) &inode->ei_inode + 64);
+
+ inode_lock(&inode->v);
+
+ ret = generic_write_checks(req, iter);
+ if (unlikely(ret <= 0))
+ goto err;
+
+ ret = file_remove_privs(file);
+ if (unlikely(ret))
+ goto err;
+
+ ret = file_update_time(file);
+ if (unlikely(ret))
+ goto err;
+
+ if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
+ goto err;
+
+ inode_dio_begin(&inode->v);
+ bch2_pagecache_block_get(inode);
+
+ extending = req->ki_pos + iter->count > inode->v.i_size;
+ if (!extending) {
+ inode_unlock(&inode->v);
+ locked = false;
+ }
+
+ bio = bio_alloc_bioset(NULL,
+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
+ REQ_OP_WRITE,
+ GFP_KERNEL,
+ &c->dio_write_bioset);
+ dio = container_of(bio, struct dio_write, op.wbio.bio);
+ dio->req = req;
+ dio->mapping = mapping;
+ dio->inode = inode;
+ dio->mm = current->mm;
+ dio->iov = NULL;
+ dio->loop = false;
+ dio->extending = extending;
+ dio->sync = is_sync_kiocb(req) || extending;
+ dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
+ dio->quota_res.sectors = 0;
+ dio->written = 0;
+ dio->iter = *iter;
+ dio->op.c = c;
+
+ if (unlikely(mapping->nrpages)) {
+ ret = bch2_write_invalidate_inode_pages_range(mapping,
+ req->ki_pos,
+ req->ki_pos + iter->count - 1);
+ if (unlikely(ret))
+ goto err_put_bio;
+ }
+
+ ret = bch2_dio_write_loop(dio);
+err:
+ if (locked)
+ inode_unlock(&inode->v);
+ return ret;
+err_put_bio:
+ bch2_pagecache_block_put(inode);
+ bio_put(bio);
+ inode_dio_end(&inode->v);
+ goto err;
+}
+
+void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
+{
+ bioset_exit(&c->dio_write_bioset);
+ bioset_exit(&c->dio_read_bioset);
+}
+
+int bch2_fs_fs_io_direct_init(struct bch_fs *c)
+{
+ if (bioset_init(&c->dio_read_bioset,
+ 4, offsetof(struct dio_read, rbio.bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_dio_read_bioset_init;
+
+ if (bioset_init(&c->dio_write_bioset,
+ 4, offsetof(struct dio_write, op.wbio.bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_dio_write_bioset_init;
+
+ return 0;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/fs-io-direct.h b/c_src/libbcachefs/fs-io-direct.h
new file mode 100644
index 00000000..814621ec
--- /dev/null
+++ b/c_src/libbcachefs/fs-io-direct.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IO_DIRECT_H
+#define _BCACHEFS_FS_IO_DIRECT_H
+
+#ifndef NO_BCACHEFS_FS
+ssize_t bch2_direct_write(struct kiocb *, struct iov_iter *);
+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
+
+void bch2_fs_fs_io_direct_exit(struct bch_fs *);
+int bch2_fs_fs_io_direct_init(struct bch_fs *);
+#else
+static inline void bch2_fs_fs_io_direct_exit(struct bch_fs *c) {}
+static inline int bch2_fs_fs_io_direct_init(struct bch_fs *c) { return 0; }
+#endif
+
+#endif /* _BCACHEFS_FS_IO_DIRECT_H */
diff --git a/c_src/libbcachefs/fs-io-pagecache.c b/c_src/libbcachefs/fs-io-pagecache.c
new file mode 100644
index 00000000..ff664fd0
--- /dev/null
+++ b/c_src/libbcachefs/fs-io-pagecache.c
@@ -0,0 +1,791 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "btree_iter.h"
+#include "extents.h"
+#include "fs-io.h"
+#include "fs-io-pagecache.h"
+#include "subvolume.h"
+
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+
+int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
+ loff_t start, u64 end,
+ fgf_t fgp_flags, gfp_t gfp,
+ folios *fs)
+{
+ struct folio *f;
+ u64 pos = start;
+ int ret = 0;
+
+ while (pos < end) {
+ if ((u64) pos >= (u64) start + (1ULL << 20))
+ fgp_flags &= ~FGP_CREAT;
+
+ ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
+ if (ret)
+ break;
+
+ f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
+ if (IS_ERR_OR_NULL(f))
+ break;
+
+ BUG_ON(fs->nr && folio_pos(f) != pos);
+
+ pos = folio_end_pos(f);
+ darray_push(fs, f);
+ }
+
+ if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
+ ret = -ENOMEM;
+
+ return fs->nr ? 0 : ret;
+}
+
+/* pagecache_block must be held */
+int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
+ loff_t start, loff_t end)
+{
+ int ret;
+
+ /*
+ * XXX: the way this is currently implemented, we can spin if a process
+ * is continually redirtying a specific page
+ */
+ do {
+ if (!mapping->nrpages)
+ return 0;
+
+ ret = filemap_write_and_wait_range(mapping, start, end);
+ if (ret)
+ break;
+
+ if (!mapping->nrpages)
+ return 0;
+
+ ret = invalidate_inode_pages2_range(mapping,
+ start >> PAGE_SHIFT,
+ end >> PAGE_SHIFT);
+ } while (ret == -EBUSY);
+
+ return ret;
+}
+
+#if 0
+/* Useful for debug tracing: */
+static const char * const bch2_folio_sector_states[] = {
+#define x(n) #n,
+ BCH_FOLIO_SECTOR_STATE()
+#undef x
+ NULL
+};
+#endif
+
+static inline enum bch_folio_sector_state
+folio_sector_dirty(enum bch_folio_sector_state state)
+{
+ switch (state) {
+ case SECTOR_unallocated:
+ return SECTOR_dirty;
+ case SECTOR_reserved:
+ return SECTOR_dirty_reserved;
+ default:
+ return state;
+ }
+}
+
+static inline enum bch_folio_sector_state
+folio_sector_undirty(enum bch_folio_sector_state state)
+{
+ switch (state) {
+ case SECTOR_dirty:
+ return SECTOR_unallocated;
+ case SECTOR_dirty_reserved:
+ return SECTOR_reserved;
+ default:
+ return state;
+ }
+}
+
+static inline enum bch_folio_sector_state
+folio_sector_reserve(enum bch_folio_sector_state state)
+{
+ switch (state) {
+ case SECTOR_unallocated:
+ return SECTOR_reserved;
+ case SECTOR_dirty:
+ return SECTOR_dirty_reserved;
+ default:
+ return state;
+ }
+}
+
+/* for newly allocated folios: */
+struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
+{
+ struct bch_folio *s;
+
+ s = kzalloc(sizeof(*s) +
+ sizeof(struct bch_folio_sector) *
+ folio_sectors(folio), gfp);
+ if (!s)
+ return NULL;
+
+ spin_lock_init(&s->lock);
+ folio_attach_private(folio, s);
+ return s;
+}
+
+struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
+{
+ return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
+}
+
+static unsigned bkey_to_sector_state(struct bkey_s_c k)
+{
+ if (bkey_extent_is_reservation(k))
+ return SECTOR_reserved;
+ if (bkey_extent_is_allocation(k.k))
+ return SECTOR_allocated;
+ return SECTOR_unallocated;
+}
+
+static void __bch2_folio_set(struct folio *folio,
+ unsigned pg_offset, unsigned pg_len,
+ unsigned nr_ptrs, unsigned state)
+{
+ struct bch_folio *s = bch2_folio(folio);
+ unsigned i, sectors = folio_sectors(folio);
+
+ BUG_ON(pg_offset >= sectors);
+ BUG_ON(pg_offset + pg_len > sectors);
+
+ spin_lock(&s->lock);
+
+ for (i = pg_offset; i < pg_offset + pg_len; i++) {
+ s->s[i].nr_replicas = nr_ptrs;
+ bch2_folio_sector_set(folio, s, i, state);
+ }
+
+ if (i == sectors)
+ s->uptodate = true;
+
+ spin_unlock(&s->lock);
+}
+
+/*
+ * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
+ * extents btree:
+ */
+int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
+ struct folio **fs, unsigned nr_folios)
+{
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_folio *s;
+ u64 offset = folio_sector(fs[0]);
+ unsigned folio_idx;
+ u32 snapshot;
+ bool need_set = false;
+ int ret;
+
+ for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
+ s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ need_set |= !s->uptodate;
+ }
+
+ if (!need_set)
+ return 0;
+
+ folio_idx = 0;
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
+ SPOS(inum.inum, offset, snapshot),
+ BTREE_ITER_SLOTS, k, ret) {
+ unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
+ unsigned state = bkey_to_sector_state(k);
+
+ while (folio_idx < nr_folios) {
+ struct folio *folio = fs[folio_idx];
+ u64 folio_start = folio_sector(folio);
+ u64 folio_end = folio_end_sector(folio);
+ unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
+ folio_start;
+ unsigned folio_len = min(k.k->p.offset, folio_end) -
+ folio_offset - folio_start;
+
+ BUG_ON(k.k->p.offset < folio_start);
+ BUG_ON(bkey_start_offset(k.k) > folio_end);
+
+ if (!bch2_folio(folio)->uptodate)
+ __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
+
+ if (k.k->p.offset < folio_end)
+ break;
+ folio_idx++;
+ }
+
+ if (folio_idx == nr_folios)
+ break;
+ }
+
+ offset = iter.pos.offset;
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
+{
+ struct bvec_iter iter;
+ struct folio_vec fv;
+ unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
+ ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
+ unsigned state = bkey_to_sector_state(k);
+
+ bio_for_each_folio(fv, bio, iter)
+ __bch2_folio_set(fv.fv_folio,
+ fv.fv_offset >> 9,
+ fv.fv_len >> 9,
+ nr_ptrs, state);
+}
+
+void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
+ u64 start, u64 end)
+{
+ pgoff_t index = start >> PAGE_SECTORS_SHIFT;
+ pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
+ struct folio_batch fbatch;
+ unsigned i, j;
+
+ if (end <= start)
+ return;
+
+ folio_batch_init(&fbatch);
+
+ while (filemap_get_folios(inode->v.i_mapping,
+ &index, end_index, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ u64 folio_start = folio_sector(folio);
+ u64 folio_end = folio_end_sector(folio);
+ unsigned folio_offset = max(start, folio_start) - folio_start;
+ unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
+ struct bch_folio *s;
+
+ BUG_ON(end <= folio_start);
+
+ folio_lock(folio);
+ s = bch2_folio(folio);
+
+ if (s) {
+ spin_lock(&s->lock);
+ for (j = folio_offset; j < folio_offset + folio_len; j++)
+ s->s[j].nr_replicas = 0;
+ spin_unlock(&s->lock);
+ }
+
+ folio_unlock(folio);
+ }
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+}
+
+void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
+ u64 start, u64 end)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ pgoff_t index = start >> PAGE_SECTORS_SHIFT;
+ pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
+ struct folio_batch fbatch;
+ s64 i_sectors_delta = 0;
+ unsigned i, j;
+
+ if (end <= start)
+ return;
+
+ folio_batch_init(&fbatch);
+
+ while (filemap_get_folios(inode->v.i_mapping,
+ &index, end_index, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ u64 folio_start = folio_sector(folio);
+ u64 folio_end = folio_end_sector(folio);
+ unsigned folio_offset = max(start, folio_start) - folio_start;
+ unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
+ struct bch_folio *s;
+
+ BUG_ON(end <= folio_start);
+
+ folio_lock(folio);
+ s = bch2_folio(folio);
+
+ if (s) {
+ spin_lock(&s->lock);
+ for (j = folio_offset; j < folio_offset + folio_len; j++) {
+ i_sectors_delta -= s->s[j].state == SECTOR_dirty;
+ bch2_folio_sector_set(folio, s, j,
+ folio_sector_reserve(s->s[j].state));
+ }
+ spin_unlock(&s->lock);
+ }
+
+ folio_unlock(folio);
+ }
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+}
+
+static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
+ unsigned nr_replicas)
+{
+ return max(0, (int) nr_replicas -
+ s->nr_replicas -
+ s->replicas_reserved);
+}
+
+int bch2_get_folio_disk_reservation(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct folio *folio, bool check_enospc)
+{
+ struct bch_folio *s = bch2_folio_create(folio, 0);
+ unsigned nr_replicas = inode_nr_replicas(c, inode);
+ struct disk_reservation disk_res = { 0 };
+ unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
+ int ret;
+
+ if (!s)
+ return -ENOMEM;
+
+ for (i = 0; i < sectors; i++)
+ disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
+
+ if (!disk_res_sectors)
+ return 0;
+
+ ret = bch2_disk_reservation_get(c, &disk_res,
+ disk_res_sectors, 1,
+ !check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL
+ : 0);
+ if (unlikely(ret))
+ return ret;
+
+ for (i = 0; i < sectors; i++)
+ s->s[i].replicas_reserved +=
+ sectors_to_reserve(&s->s[i], nr_replicas);
+
+ return 0;
+}
+
+void bch2_folio_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct bch2_folio_reservation *res)
+{
+ bch2_disk_reservation_put(c, &res->disk);
+ bch2_quota_reservation_put(c, inode, &res->quota);
+}
+
+int bch2_folio_reservation_get(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct folio *folio,
+ struct bch2_folio_reservation *res,
+ unsigned offset, unsigned len)
+{
+ struct bch_folio *s = bch2_folio_create(folio, 0);
+ unsigned i, disk_sectors = 0, quota_sectors = 0;
+ int ret;
+
+ if (!s)
+ return -ENOMEM;
+
+ BUG_ON(!s->uptodate);
+
+ for (i = round_down(offset, block_bytes(c)) >> 9;
+ i < round_up(offset + len, block_bytes(c)) >> 9;
+ i++) {
+ disk_sectors += sectors_to_reserve(&s->s[i],
+ res->disk.nr_replicas);
+ quota_sectors += s->s[i].state == SECTOR_unallocated;
+ }
+
+ if (disk_sectors) {
+ ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ if (quota_sectors) {
+ ret = bch2_quota_reservation_add(c, inode, &res->quota,
+ quota_sectors, true);
+ if (unlikely(ret)) {
+ struct disk_reservation tmp = {
+ .sectors = disk_sectors
+ };
+
+ bch2_disk_reservation_put(c, &tmp);
+ res->disk.sectors -= disk_sectors;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void bch2_clear_folio_bits(struct folio *folio)
+{
+ struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_folio *s = bch2_folio(folio);
+ struct disk_reservation disk_res = { 0 };
+ int i, sectors = folio_sectors(folio), dirty_sectors = 0;
+
+ if (!s)
+ return;
+
+ EBUG_ON(!folio_test_locked(folio));
+ EBUG_ON(folio_test_writeback(folio));
+
+ for (i = 0; i < sectors; i++) {
+ disk_res.sectors += s->s[i].replicas_reserved;
+ s->s[i].replicas_reserved = 0;
+
+ dirty_sectors -= s->s[i].state == SECTOR_dirty;
+ bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
+ }
+
+ bch2_disk_reservation_put(c, &disk_res);
+
+ bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
+
+ bch2_folio_release(folio);
+}
+
+void bch2_set_folio_dirty(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct folio *folio,
+ struct bch2_folio_reservation *res,
+ unsigned offset, unsigned len)
+{
+ struct bch_folio *s = bch2_folio(folio);
+ unsigned i, dirty_sectors = 0;
+
+ WARN_ON((u64) folio_pos(folio) + offset + len >
+ round_up((u64) i_size_read(&inode->v), block_bytes(c)));
+
+ BUG_ON(!s->uptodate);
+
+ spin_lock(&s->lock);
+
+ for (i = round_down(offset, block_bytes(c)) >> 9;
+ i < round_up(offset + len, block_bytes(c)) >> 9;
+ i++) {
+ unsigned sectors = sectors_to_reserve(&s->s[i],
+ res->disk.nr_replicas);
+
+ /*
+ * This can happen if we race with the error path in
+ * bch2_writepage_io_done():
+ */
+ sectors = min_t(unsigned, sectors, res->disk.sectors);
+
+ s->s[i].replicas_reserved += sectors;
+ res->disk.sectors -= sectors;
+
+ dirty_sectors += s->s[i].state == SECTOR_unallocated;
+
+ bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
+ }
+
+ spin_unlock(&s->lock);
+
+ bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
+
+ if (!folio_test_dirty(folio))
+ filemap_dirty_folio(inode->v.i_mapping, folio);
+}
+
+vm_fault_t bch2_page_fault(struct vm_fault *vmf)
+{
+ struct file *file = vmf->vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ struct address_space *fdm = faults_disabled_mapping();
+ struct bch_inode_info *inode = file_bch_inode(file);
+ vm_fault_t ret;
+
+ if (fdm == mapping)
+ return VM_FAULT_SIGBUS;
+
+ /* Lock ordering: */
+ if (fdm > mapping) {
+ struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
+
+ if (bch2_pagecache_add_tryget(inode))
+ goto got_lock;
+
+ bch2_pagecache_block_put(fdm_host);
+
+ bch2_pagecache_add_get(inode);
+ bch2_pagecache_add_put(inode);
+
+ bch2_pagecache_block_get(fdm_host);
+
+ /* Signal that lock has been dropped: */
+ set_fdm_dropped_locks();
+ return VM_FAULT_SIGBUS;
+ }
+
+ bch2_pagecache_add_get(inode);
+got_lock:
+ ret = filemap_fault(vmf);
+ bch2_pagecache_add_put(inode);
+
+ return ret;
+}
+
+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
+{
+ struct folio *folio = page_folio(vmf->page);
+ struct file *file = vmf->vma->vm_file;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct address_space *mapping = file->f_mapping;
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch2_folio_reservation res;
+ unsigned len;
+ loff_t isize;
+ vm_fault_t ret;
+
+ bch2_folio_reservation_init(c, inode, &res);
+
+ sb_start_pagefault(inode->v.i_sb);
+ file_update_time(file);
+
+ /*
+ * Not strictly necessary, but helps avoid dio writes livelocking in
+ * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
+ * a bch2_write_invalidate_inode_pages_range() that works without dropping
+ * page lock before invalidating page
+ */
+ bch2_pagecache_add_get(inode);
+
+ folio_lock(folio);
+ isize = i_size_read(&inode->v);
+
+ if (folio->mapping != mapping || folio_pos(folio) >= isize) {
+ folio_unlock(folio);
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
+
+ len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
+
+ if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
+ bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
+ folio_unlock(folio);
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+
+ bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
+ bch2_folio_reservation_put(c, inode, &res);
+
+ folio_wait_stable(folio);
+ ret = VM_FAULT_LOCKED;
+out:
+ bch2_pagecache_add_put(inode);
+ sb_end_pagefault(inode->v.i_sb);
+
+ return ret;
+}
+
+void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
+{
+ if (offset || length < folio_size(folio))
+ return;
+
+ bch2_clear_folio_bits(folio);
+}
+
+bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
+{
+ if (folio_test_dirty(folio) || folio_test_writeback(folio))
+ return false;
+
+ bch2_clear_folio_bits(folio);
+ return true;
+}
+
+/* fseek: */
+
+static int folio_data_offset(struct folio *folio, loff_t pos,
+ unsigned min_replicas)
+{
+ struct bch_folio *s = bch2_folio(folio);
+ unsigned i, sectors = folio_sectors(folio);
+
+ if (s)
+ for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
+ if (s->s[i].state >= SECTOR_dirty &&
+ s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
+ return i << SECTOR_SHIFT;
+
+ return -1;
+}
+
+loff_t bch2_seek_pagecache_data(struct inode *vinode,
+ loff_t start_offset,
+ loff_t end_offset,
+ unsigned min_replicas,
+ bool nonblock)
+{
+ struct folio_batch fbatch;
+ pgoff_t start_index = start_offset >> PAGE_SHIFT;
+ pgoff_t end_index = end_offset >> PAGE_SHIFT;
+ pgoff_t index = start_index;
+ unsigned i;
+ loff_t ret;
+ int offset;
+
+ folio_batch_init(&fbatch);
+
+ while (filemap_get_folios(vinode->i_mapping,
+ &index, end_index, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+
+ if (!nonblock) {
+ folio_lock(folio);
+ } else if (!folio_trylock(folio)) {
+ folio_batch_release(&fbatch);
+ return -EAGAIN;
+ }
+
+ offset = folio_data_offset(folio,
+ max(folio_pos(folio), start_offset),
+ min_replicas);
+ if (offset >= 0) {
+ ret = clamp(folio_pos(folio) + offset,
+ start_offset, end_offset);
+ folio_unlock(folio);
+ folio_batch_release(&fbatch);
+ return ret;
+ }
+ folio_unlock(folio);
+ }
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+
+ return end_offset;
+}
+
+/*
+ * Search for a hole in a folio.
+ *
+ * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
+ * code to indicate a pagecache hole exists at the returned offset. Otherwise
+ * return 0 if the folio is filled with data, or an error code. This function
+ * can return -EAGAIN if nonblock is specified.
+ */
+static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
+ unsigned min_replicas, bool nonblock)
+{
+ struct folio *folio;
+ struct bch_folio *s;
+ unsigned i, sectors;
+ int ret = -ENOENT;
+
+ folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
+ FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+
+ s = bch2_folio(folio);
+ if (!s)
+ goto unlock;
+
+ sectors = folio_sectors(folio);
+ for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
+ if (s->s[i].state < SECTOR_dirty ||
+ s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
+ *offset = max(*offset,
+ folio_pos(folio) + (i << SECTOR_SHIFT));
+ goto unlock;
+ }
+
+ *offset = folio_end_pos(folio);
+ ret = 0;
+unlock:
+ folio_unlock(folio);
+ folio_put(folio);
+ return ret;
+}
+
+loff_t bch2_seek_pagecache_hole(struct inode *vinode,
+ loff_t start_offset,
+ loff_t end_offset,
+ unsigned min_replicas,
+ bool nonblock)
+{
+ struct address_space *mapping = vinode->i_mapping;
+ loff_t offset = start_offset;
+ loff_t ret = 0;
+
+ while (!ret && offset < end_offset)
+ ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
+
+ if (ret && ret != -ENOENT)
+ return ret;
+ return min(offset, end_offset);
+}
+
+int bch2_clamp_data_hole(struct inode *inode,
+ u64 *hole_start,
+ u64 *hole_end,
+ unsigned min_replicas,
+ bool nonblock)
+{
+ loff_t ret;
+
+ ret = bch2_seek_pagecache_hole(inode,
+ *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
+ if (ret < 0)
+ return ret;
+
+ *hole_start = ret;
+
+ if (*hole_start == *hole_end)
+ return 0;
+
+ ret = bch2_seek_pagecache_data(inode,
+ *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
+ if (ret < 0)
+ return ret;
+
+ *hole_end = ret;
+ return 0;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/fs-io-pagecache.h b/c_src/libbcachefs/fs-io-pagecache.h
new file mode 100644
index 00000000..27f712ae
--- /dev/null
+++ b/c_src/libbcachefs/fs-io-pagecache.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IO_PAGECACHE_H
+#define _BCACHEFS_FS_IO_PAGECACHE_H
+
+#include <linux/pagemap.h>
+
+typedef DARRAY(struct folio *) folios;
+
+int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
+ u64, fgf_t, gfp_t, folios *);
+int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
+
+/*
+ * Use u64 for the end pos and sector helpers because if the folio covers the
+ * max supported range of the mapping, the start offset of the next folio
+ * overflows loff_t. This breaks much of the range based processing in the
+ * buffered write path.
+ */
+static inline u64 folio_end_pos(struct folio *folio)
+{
+ return folio_pos(folio) + folio_size(folio);
+}
+
+static inline size_t folio_sectors(struct folio *folio)
+{
+ return PAGE_SECTORS << folio_order(folio);
+}
+
+static inline loff_t folio_sector(struct folio *folio)
+{
+ return folio_pos(folio) >> 9;
+}
+
+static inline u64 folio_end_sector(struct folio *folio)
+{
+ return folio_end_pos(folio) >> 9;
+}
+
+#define BCH_FOLIO_SECTOR_STATE() \
+ x(unallocated) \
+ x(reserved) \
+ x(dirty) \
+ x(dirty_reserved) \
+ x(allocated)
+
+enum bch_folio_sector_state {
+#define x(n) SECTOR_##n,
+ BCH_FOLIO_SECTOR_STATE()
+#undef x
+};
+
+struct bch_folio_sector {
+ /* Uncompressed, fully allocated replicas (or on disk reservation): */
+ unsigned nr_replicas:4;
+
+ /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */
+ unsigned replicas_reserved:4;
+
+ /* i_sectors: */
+ enum bch_folio_sector_state state:8;
+};
+
+struct bch_folio {
+ spinlock_t lock;
+ atomic_t write_count;
+ /*
+ * Is the sector state up to date with the btree?
+ * (Not the data itself)
+ */
+ bool uptodate;
+ struct bch_folio_sector s[];
+};
+
+/* Helper for when we need to add debug instrumentation: */
+static inline void bch2_folio_sector_set(struct folio *folio,
+ struct bch_folio *s,
+ unsigned i, unsigned n)
+{
+ s->s[i].state = n;
+}
+
+/* file offset (to folio offset) to bch_folio_sector index */
+static inline int folio_pos_to_s(struct folio *folio, loff_t pos)
+{
+ u64 f_offset = pos - folio_pos(folio);
+
+ BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio));
+ return f_offset >> SECTOR_SHIFT;
+}
+
+/* for newly allocated folios: */
+static inline void __bch2_folio_release(struct folio *folio)
+{
+ kfree(folio_detach_private(folio));
+}
+
+static inline void bch2_folio_release(struct folio *folio)
+{
+ EBUG_ON(!folio_test_locked(folio));
+ __bch2_folio_release(folio);
+}
+
+static inline struct bch_folio *__bch2_folio(struct folio *folio)
+{
+ return folio_has_private(folio)
+ ? (struct bch_folio *) folio_get_private(folio)
+ : NULL;
+}
+
+static inline struct bch_folio *bch2_folio(struct folio *folio)
+{
+ EBUG_ON(!folio_test_locked(folio));
+
+ return __bch2_folio(folio);
+}
+
+struct bch_folio *__bch2_folio_create(struct folio *, gfp_t);
+struct bch_folio *bch2_folio_create(struct folio *, gfp_t);
+
+struct bch2_folio_reservation {
+ struct disk_reservation disk;
+ struct quota_res quota;
+};
+
+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
+{
+ /* XXX: this should not be open coded */
+ return inode->ei_inode.bi_data_replicas
+ ? inode->ei_inode.bi_data_replicas - 1
+ : c->opts.data_replicas;
+}
+
+static inline void bch2_folio_reservation_init(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct bch2_folio_reservation *res)
+{
+ memset(res, 0, sizeof(*res));
+
+ res->disk.nr_replicas = inode_nr_replicas(c, inode);
+}
+
+int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned);
+void bch2_bio_page_state_set(struct bio *, struct bkey_s_c);
+
+void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64);
+void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64);
+
+int bch2_get_folio_disk_reservation(struct bch_fs *,
+ struct bch_inode_info *,
+ struct folio *, bool);
+
+void bch2_folio_reservation_put(struct bch_fs *,
+ struct bch_inode_info *,
+ struct bch2_folio_reservation *);
+int bch2_folio_reservation_get(struct bch_fs *,
+ struct bch_inode_info *,
+ struct folio *,
+ struct bch2_folio_reservation *,
+ unsigned, unsigned);
+
+void bch2_set_folio_dirty(struct bch_fs *,
+ struct bch_inode_info *,
+ struct folio *,
+ struct bch2_folio_reservation *,
+ unsigned, unsigned);
+
+vm_fault_t bch2_page_fault(struct vm_fault *);
+vm_fault_t bch2_page_mkwrite(struct vm_fault *);
+void bch2_invalidate_folio(struct folio *, size_t, size_t);
+bool bch2_release_folio(struct folio *, gfp_t);
+
+loff_t bch2_seek_pagecache_data(struct inode *, loff_t, loff_t, unsigned, bool);
+loff_t bch2_seek_pagecache_hole(struct inode *, loff_t, loff_t, unsigned, bool);
+int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool);
+
+#endif /* _BCACHEFS_FS_IO_PAGECACHE_H */
diff --git a/c_src/libbcachefs/fs-io.c b/c_src/libbcachefs/fs-io.c
new file mode 100644
index 00000000..98bd5bab
--- /dev/null
+++ b/c_src/libbcachefs/fs-io.c
@@ -0,0 +1,1078 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "error.h"
+#include "extents.h"
+#include "extent_update.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "fs-io-buffered.h"
+#include "fs-io-pagecache.h"
+#include "fsck.h"
+#include "inode.h"
+#include "journal.h"
+#include "io_misc.h"
+#include "keylist.h"
+#include "quota.h"
+#include "reflink.h"
+#include "trace.h"
+
+#include <linux/aio.h>
+#include <linux/backing-dev.h>
+#include <linux/falloc.h>
+#include <linux/migrate.h>
+#include <linux/mmu_context.h>
+#include <linux/pagevec.h>
+#include <linux/rmap.h>
+#include <linux/sched/signal.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/uio.h>
+
+#include <trace/events/writeback.h>
+
+struct nocow_flush {
+ struct closure *cl;
+ struct bch_dev *ca;
+ struct bio bio;
+};
+
+static void nocow_flush_endio(struct bio *_bio)
+{
+
+ struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio);
+
+ closure_put(bio->cl);
+ percpu_ref_put(&bio->ca->io_ref);
+ bio_put(&bio->bio);
+}
+
+void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct closure *cl)
+{
+ struct nocow_flush *bio;
+ struct bch_dev *ca;
+ struct bch_devs_mask devs;
+ unsigned dev;
+
+ dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX);
+ if (dev == BCH_SB_MEMBERS_MAX)
+ return;
+
+ devs = inode->ei_devs_need_flush;
+ memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
+
+ for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
+ rcu_read_lock();
+ ca = rcu_dereference(c->devs[dev]);
+ if (ca && !percpu_ref_tryget(&ca->io_ref))
+ ca = NULL;
+ rcu_read_unlock();
+
+ if (!ca)
+ continue;
+
+ bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0,
+ REQ_OP_FLUSH,
+ GFP_KERNEL,
+ &c->nocow_flush_bioset),
+ struct nocow_flush, bio);
+ bio->cl = cl;
+ bio->ca = ca;
+ bio->bio.bi_end_io = nocow_flush_endio;
+ closure_bio_submit(&bio->bio, cl);
+ }
+}
+
+static int bch2_inode_flush_nocow_writes(struct bch_fs *c,
+ struct bch_inode_info *inode)
+{
+ struct closure cl;
+
+ closure_init_stack(&cl);
+ bch2_inode_flush_nocow_writes_async(c, inode, &cl);
+ closure_sync(&cl);
+
+ return 0;
+}
+
+/* i_size updates: */
+
+struct inode_new_size {
+ loff_t new_size;
+ u64 now;
+ unsigned fields;
+};
+
+static int inode_set_size(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct inode_new_size *s = p;
+
+ bi->bi_size = s->new_size;
+ if (s->fields & ATTR_ATIME)
+ bi->bi_atime = s->now;
+ if (s->fields & ATTR_MTIME)
+ bi->bi_mtime = s->now;
+ if (s->fields & ATTR_CTIME)
+ bi->bi_ctime = s->now;
+
+ return 0;
+}
+
+int __must_check bch2_write_inode_size(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ loff_t new_size, unsigned fields)
+{
+ struct inode_new_size s = {
+ .new_size = new_size,
+ .now = bch2_current_time(c),
+ .fields = fields,
+ };
+
+ return bch2_write_inode(c, inode, inode_set_size, &s, fields);
+}
+
+void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
+ struct quota_res *quota_res, s64 sectors)
+{
+ bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c,
+ "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
+ inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
+ inode->ei_inode.bi_sectors);
+ inode->v.i_blocks += sectors;
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+ if (quota_res &&
+ !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) &&
+ sectors > 0) {
+ BUG_ON(sectors > quota_res->sectors);
+ BUG_ON(sectors > inode->ei_quota_reserved);
+
+ quota_res->sectors -= sectors;
+ inode->ei_quota_reserved -= sectors;
+ } else {
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN);
+ }
+#endif
+}
+
+/* fsync: */
+
+/*
+ * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an
+ * insert trigger: look up the btree inode instead
+ */
+static int bch2_flush_inode(struct bch_fs *c,
+ struct bch_inode_info *inode)
+{
+ struct bch_inode_unpacked u;
+ int ret;
+
+ if (c->opts.journal_flush_disabled)
+ return 0;
+
+ ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
+ if (ret)
+ return ret;
+
+ return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+ bch2_inode_flush_nocow_writes(c, inode);
+}
+
+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ int ret;
+
+ ret = file_write_and_wait_range(file, start, end);
+ if (ret)
+ goto out;
+ ret = sync_inode_metadata(&inode->v, 1);
+ if (ret)
+ goto out;
+ ret = bch2_flush_inode(c, inode);
+out:
+ return bch2_err_class(ret);
+}
+
+/* truncate: */
+
+static inline int range_has_data(struct bch_fs *c, u32 subvol,
+ struct bpos start,
+ struct bpos end)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot);
+ if (ret)
+ goto err;
+
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
+ if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) {
+ ret = 1;
+ break;
+ }
+ start = iter.pos;
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int __bch2_truncate_folio(struct bch_inode_info *inode,
+ pgoff_t index, loff_t start, loff_t end)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct address_space *mapping = inode->v.i_mapping;
+ struct bch_folio *s;
+ unsigned start_offset;
+ unsigned end_offset;
+ unsigned i;
+ struct folio *folio;
+ s64 i_sectors_delta = 0;
+ int ret = 0;
+ u64 end_pos;
+
+ folio = filemap_lock_folio(mapping, index);
+ if (IS_ERR_OR_NULL(folio)) {
+ /*
+ * XXX: we're doing two index lookups when we end up reading the
+ * folio
+ */
+ ret = range_has_data(c, inode->ei_subvol,
+ POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
+ POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));
+ if (ret <= 0)
+ return ret;
+
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK|FGP_CREAT, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(folio)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ BUG_ON(start >= folio_end_pos(folio));
+ BUG_ON(end <= folio_pos(folio));
+
+ start_offset = max(start, folio_pos(folio)) - folio_pos(folio);
+ end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio);
+
+ /* Folio boundary? Nothing to do */
+ if (start_offset == 0 &&
+ end_offset == folio_size(folio)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ s = bch2_folio_create(folio, 0);
+ if (!s) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ if (!folio_test_uptodate(folio)) {
+ ret = bch2_read_single_folio(folio, mapping);
+ if (ret)
+ goto unlock;
+ }
+
+ ret = bch2_folio_set(c, inode_inum(inode), &folio, 1);
+ if (ret)
+ goto unlock;
+
+ for (i = round_up(start_offset, block_bytes(c)) >> 9;
+ i < round_down(end_offset, block_bytes(c)) >> 9;
+ i++) {
+ s->s[i].nr_replicas = 0;
+
+ i_sectors_delta -= s->s[i].state == SECTOR_dirty;
+ bch2_folio_sector_set(folio, s, i, SECTOR_unallocated);
+ }
+
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+
+ /*
+ * Caller needs to know whether this folio will be written out by
+ * writeback - doing an i_size update if necessary - or whether it will
+ * be responsible for the i_size update.
+ *
+ * Note that we shouldn't ever see a folio beyond EOF, but check and
+ * warn if so. This has been observed by failure to clean up folios
+ * after a short write and there's still a chance reclaim will fix
+ * things up.
+ */
+ WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size);
+ end_pos = folio_end_pos(folio);
+ if (inode->v.i_size > folio_pos(folio))
+ end_pos = min_t(u64, inode->v.i_size, end_pos);
+ ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty;
+
+ folio_zero_segment(folio, start_offset, end_offset);
+
+ /*
+ * Bit of a hack - we don't want truncate to fail due to -ENOSPC.
+ *
+ * XXX: because we aren't currently tracking whether the folio has actual
+ * data in it (vs. just 0s, or only partially written) this wrong. ick.
+ */
+ BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false));
+
+ /*
+ * This removes any writeable userspace mappings; we need to force
+ * .page_mkwrite to be called again before any mmapped writes, to
+ * redirty the full page:
+ */
+ folio_mkclean(folio);
+ filemap_dirty_folio(mapping, folio);
+unlock:
+ folio_unlock(folio);
+ folio_put(folio);
+out:
+ return ret;
+}
+
+static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from)
+{
+ return __bch2_truncate_folio(inode, from >> PAGE_SHIFT,
+ from, ANYSINT_MAX(loff_t));
+}
+
+static int bch2_truncate_folios(struct bch_inode_info *inode,
+ loff_t start, loff_t end)
+{
+ int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT,
+ start, end);
+
+ if (ret >= 0 &&
+ start >> PAGE_SHIFT != end >> PAGE_SHIFT)
+ ret = __bch2_truncate_folio(inode,
+ (end - 1) >> PAGE_SHIFT,
+ start, end);
+ return ret;
+}
+
+static int bch2_extend(struct mnt_idmap *idmap,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *inode_u,
+ struct iattr *iattr)
+{
+ struct address_space *mapping = inode->v.i_mapping;
+ int ret;
+
+ /*
+ * sync appends:
+ *
+ * this has to be done _before_ extending i_size:
+ */
+ ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
+ if (ret)
+ return ret;
+
+ truncate_setsize(&inode->v, iattr->ia_size);
+
+ return bch2_setattr_nonsize(idmap, inode, iattr);
+}
+
+int bchfs_truncate(struct mnt_idmap *idmap,
+ struct bch_inode_info *inode, struct iattr *iattr)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct address_space *mapping = inode->v.i_mapping;
+ struct bch_inode_unpacked inode_u;
+ s64 i_sectors_delta = 0;
+ int ret = 0;
+
+ /*
+ * If the truncate call with change the size of the file, the
+ * cmtimes should be updated. If the size will not change, we
+ * do not need to update the cmtimes.
+ */
+ if (iattr->ia_size != inode->v.i_size) {
+ if (!(iattr->ia_valid & ATTR_MTIME))
+ ktime_get_coarse_real_ts64(&iattr->ia_mtime);
+ if (!(iattr->ia_valid & ATTR_CTIME))
+ ktime_get_coarse_real_ts64(&iattr->ia_ctime);
+ iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME;
+ }
+
+ inode_dio_wait(&inode->v);
+ bch2_pagecache_block_get(inode);
+
+ ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u);
+ if (ret)
+ goto err;
+
+ /*
+ * check this before next assertion; on filesystem error our normal
+ * invariants are a bit broken (truncate has to truncate the page cache
+ * before the inode).
+ */
+ ret = bch2_journal_error(&c->journal);
+ if (ret)
+ goto err;
+
+ WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) &&
+ inode->v.i_size < inode_u.bi_size,
+ "truncate spotted in mem i_size < btree i_size: %llu < %llu\n",
+ (u64) inode->v.i_size, inode_u.bi_size);
+
+ if (iattr->ia_size > inode->v.i_size) {
+ ret = bch2_extend(idmap, inode, &inode_u, iattr);
+ goto err;
+ }
+
+ iattr->ia_valid &= ~ATTR_SIZE;
+
+ ret = bch2_truncate_folio(inode, iattr->ia_size);
+ if (unlikely(ret < 0))
+ goto err;
+
+ truncate_setsize(&inode->v, iattr->ia_size);
+
+ /*
+ * When extending, we're going to write the new i_size to disk
+ * immediately so we need to flush anything above the current on disk
+ * i_size first:
+ *
+ * Also, when extending we need to flush the page that i_size currently
+ * straddles - if it's mapped to userspace, we need to ensure that
+ * userspace has to redirty it and call .mkwrite -> set_page_dirty
+ * again to allocate the part of the page that was extended.
+ */
+ if (iattr->ia_size > inode_u.bi_size)
+ ret = filemap_write_and_wait_range(mapping,
+ inode_u.bi_size,
+ iattr->ia_size - 1);
+ else if (iattr->ia_size & (PAGE_SIZE - 1))
+ ret = filemap_write_and_wait_range(mapping,
+ round_down(iattr->ia_size, PAGE_SIZE),
+ iattr->ia_size - 1);
+ if (ret)
+ goto err;
+
+ ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta);
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+
+ if (unlikely(ret)) {
+ /*
+ * If we error here, VFS caches are now inconsistent with btree
+ */
+ set_bit(EI_INODE_ERROR, &inode->ei_flags);
+ goto err;
+ }
+
+ bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
+ !bch2_journal_error(&c->journal), c,
+ "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
+ inode->v.i_ino, (u64) inode->v.i_blocks,
+ inode->ei_inode.bi_sectors);
+
+ ret = bch2_setattr_nonsize(idmap, inode, iattr);
+err:
+ bch2_pagecache_block_put(inode);
+ return bch2_err_class(ret);
+}
+
+/* fallocate: */
+
+static int inode_update_times_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi, void *p)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
+ return 0;
+}
+
+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ u64 end = offset + len;
+ u64 block_start = round_up(offset, block_bytes(c));
+ u64 block_end = round_down(end, block_bytes(c));
+ bool truncated_last_page;
+ int ret = 0;
+
+ ret = bch2_truncate_folios(inode, offset, end);
+ if (unlikely(ret < 0))
+ goto err;
+
+ truncated_last_page = ret;
+
+ truncate_pagecache_range(&inode->v, offset, end - 1);
+
+ if (block_start < block_end) {
+ s64 i_sectors_delta = 0;
+
+ ret = bch2_fpunch(c, inode_inum(inode),
+ block_start >> 9, block_end >> 9,
+ &i_sectors_delta);
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+ }
+
+ mutex_lock(&inode->ei_update_lock);
+ if (end >= inode->v.i_size && !truncated_last_page) {
+ ret = bch2_write_inode_size(c, inode, inode->v.i_size,
+ ATTR_MTIME|ATTR_CTIME);
+ } else {
+ ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
+ ATTR_MTIME|ATTR_CTIME);
+ }
+ mutex_unlock(&inode->ei_update_lock);
+err:
+ return ret;
+}
+
+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
+ loff_t offset, loff_t len,
+ bool insert)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct address_space *mapping = inode->v.i_mapping;
+ s64 i_sectors_delta = 0;
+ int ret = 0;
+
+ if ((offset | len) & (block_bytes(c) - 1))
+ return -EINVAL;
+
+ if (insert) {
+ if (offset >= inode->v.i_size)
+ return -EINVAL;
+ } else {
+ if (offset + len >= inode->v.i_size)
+ return -EINVAL;
+ }
+
+ ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
+ if (ret)
+ return ret;
+
+ if (insert)
+ i_size_write(&inode->v, inode->v.i_size + len);
+
+ ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9,
+ insert, &i_sectors_delta);
+ if (!ret && !insert)
+ i_size_write(&inode->v, inode->v.i_size - len);
+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+
+ return ret;
+}
+
+static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
+ u64 start_sector, u64 end_sector)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bpos end_pos = POS(inode->v.i_ino, end_sector);
+ struct bch_io_opts opts;
+ int ret = 0;
+
+ bch2_inode_opts_get(&opts, c, &inode->ei_inode);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ POS(inode->v.i_ino, start_sector),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ while (!ret && bkey_lt(iter.pos, end_pos)) {
+ s64 i_sectors_delta = 0;
+ struct quota_res quota_res = { 0 };
+ struct bkey_s_c k;
+ unsigned sectors;
+ bool is_allocation;
+ u64 hole_start, hole_end;
+ u32 snapshot;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans,
+ inode->ei_subvol, &snapshot);
+ if (ret)
+ goto bkey_err;
+
+ bch2_btree_iter_set_snapshot(&iter, snapshot);
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ if ((ret = bkey_err(k)))
+ goto bkey_err;
+
+ hole_start = iter.pos.offset;
+ hole_end = bpos_min(k.k->p, end_pos).offset;
+ is_allocation = bkey_extent_is_allocation(k.k);
+
+ /* already reserved */
+ if (bkey_extent_is_reservation(k) &&
+ bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
+ bch2_btree_iter_advance(&iter);
+ continue;
+ }
+
+ if (bkey_extent_is_data(k.k) &&
+ !(mode & FALLOC_FL_ZERO_RANGE)) {
+ bch2_btree_iter_advance(&iter);
+ continue;
+ }
+
+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
+ /*
+ * Lock ordering - can't be holding btree locks while
+ * blocking on a folio lock:
+ */
+ if (bch2_clamp_data_hole(&inode->v,
+ &hole_start,
+ &hole_end,
+ opts.data_replicas, true))
+ ret = drop_locks_do(trans,
+ (bch2_clamp_data_hole(&inode->v,
+ &hole_start,
+ &hole_end,
+ opts.data_replicas, false), 0));
+ bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
+
+ if (ret)
+ goto bkey_err;
+
+ if (hole_start == hole_end)
+ continue;
+ }
+
+ sectors = hole_end - hole_start;
+
+ if (!is_allocation) {
+ ret = bch2_quota_reservation_add(c, inode,
+ &quota_res, sectors, true);
+ if (unlikely(ret))
+ goto bkey_err;
+ }
+
+ ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter,
+ sectors, opts, &i_sectors_delta,
+ writepoint_hashed((unsigned long) current));
+ if (ret)
+ goto bkey_err;
+
+ bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
+
+ drop_locks_do(trans,
+ (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
+bkey_err:
+ bch2_quota_reservation_put(c, inode, &quota_res);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+ }
+
+ if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
+ struct quota_res quota_res = { 0 };
+ s64 i_sectors_delta = 0;
+
+ bch2_fpunch_at(trans, &iter, inode_inum(inode),
+ end_sector, &i_sectors_delta);
+ bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
+ bch2_quota_reservation_put(c, inode, &quota_res);
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
+ loff_t offset, loff_t len)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ u64 end = offset + len;
+ u64 block_start = round_down(offset, block_bytes(c));
+ u64 block_end = round_up(end, block_bytes(c));
+ bool truncated_last_page = false;
+ int ret, ret2 = 0;
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
+ ret = inode_newsize_ok(&inode->v, end);
+ if (ret)
+ return ret;
+ }
+
+ if (mode & FALLOC_FL_ZERO_RANGE) {
+ ret = bch2_truncate_folios(inode, offset, end);
+ if (unlikely(ret < 0))
+ return ret;
+
+ truncated_last_page = ret;
+
+ truncate_pagecache_range(&inode->v, offset, end - 1);
+
+ block_start = round_up(offset, block_bytes(c));
+ block_end = round_down(end, block_bytes(c));
+ }
+
+ ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9);
+
+ /*
+ * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update,
+ * so that the VFS cache i_size is consistent with the btree i_size:
+ */
+ if (ret &&
+ !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)))
+ return ret;
+
+ if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size)
+ end = inode->v.i_size;
+
+ if (end >= inode->v.i_size &&
+ (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) ||
+ !(mode & FALLOC_FL_KEEP_SIZE))) {
+ spin_lock(&inode->v.i_lock);
+ i_size_write(&inode->v, end);
+ spin_unlock(&inode->v.i_lock);
+
+ mutex_lock(&inode->ei_update_lock);
+ ret2 = bch2_write_inode_size(c, inode, end, 0);
+ mutex_unlock(&inode->ei_update_lock);
+ }
+
+ return ret ?: ret2;
+}
+
+long bch2_fallocate_dispatch(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ long ret;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
+ return -EROFS;
+
+ inode_lock(&inode->v);
+ inode_dio_wait(&inode->v);
+ bch2_pagecache_block_get(inode);
+
+ ret = file_modified(file);
+ if (ret)
+ goto err;
+
+ if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
+ ret = bchfs_fallocate(inode, mode, offset, len);
+ else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
+ ret = bchfs_fpunch(inode, offset, len);
+ else if (mode == FALLOC_FL_INSERT_RANGE)
+ ret = bchfs_fcollapse_finsert(inode, offset, len, true);
+ else if (mode == FALLOC_FL_COLLAPSE_RANGE)
+ ret = bchfs_fcollapse_finsert(inode, offset, len, false);
+ else
+ ret = -EOPNOTSUPP;
+err:
+ bch2_pagecache_block_put(inode);
+ inode_unlock(&inode->v);
+ bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
+
+ return bch2_err_class(ret);
+}
+
+/*
+ * Take a quota reservation for unallocated blocks in a given file range
+ * Does not check pagecache
+ */
+static int quota_reserve_range(struct bch_inode_info *inode,
+ struct quota_res *res,
+ u64 start, u64 end)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u32 snapshot;
+ u64 sectors = end - start;
+ u64 pos = start;
+ int ret;
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(inode->v.i_ino, pos, snapshot), 0);
+
+ while (!(ret = btree_trans_too_many_iters(trans)) &&
+ (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k &&
+ !(ret = bkey_err(k))) {
+ if (bkey_extent_is_allocation(k.k)) {
+ u64 s = min(end, k.k->p.offset) -
+ max(start, bkey_start_offset(k.k));
+ BUG_ON(s > sectors);
+ sectors -= s;
+ }
+ bch2_btree_iter_advance(&iter);
+ }
+ pos = iter.pos.offset;
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+
+ return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
+}
+
+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
+ struct file *file_dst, loff_t pos_dst,
+ loff_t len, unsigned remap_flags)
+{
+ struct bch_inode_info *src = file_bch_inode(file_src);
+ struct bch_inode_info *dst = file_bch_inode(file_dst);
+ struct bch_fs *c = src->v.i_sb->s_fs_info;
+ struct quota_res quota_res = { 0 };
+ s64 i_sectors_delta = 0;
+ u64 aligned_len;
+ loff_t ret = 0;
+
+ if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
+ return -EINVAL;
+
+ if (remap_flags & REMAP_FILE_DEDUP)
+ return -EOPNOTSUPP;
+
+ if ((pos_src & (block_bytes(c) - 1)) ||
+ (pos_dst & (block_bytes(c) - 1)))
+ return -EINVAL;
+
+ if (src == dst &&
+ abs(pos_src - pos_dst) < len)
+ return -EINVAL;
+
+ lock_two_nondirectories(&src->v, &dst->v);
+ bch2_lock_inodes(INODE_PAGECACHE_BLOCK, src, dst);
+
+ inode_dio_wait(&src->v);
+ inode_dio_wait(&dst->v);
+
+ ret = generic_remap_file_range_prep(file_src, pos_src,
+ file_dst, pos_dst,
+ &len, remap_flags);
+ if (ret < 0 || len == 0)
+ goto err;
+
+ aligned_len = round_up((u64) len, block_bytes(c));
+
+ ret = bch2_write_invalidate_inode_pages_range(dst->v.i_mapping,
+ pos_dst, pos_dst + len - 1);
+ if (ret)
+ goto err;
+
+ ret = quota_reserve_range(dst, &quota_res, pos_dst >> 9,
+ (pos_dst + aligned_len) >> 9);
+ if (ret)
+ goto err;
+
+ file_update_time(file_dst);
+
+ bch2_mark_pagecache_unallocated(src, pos_src >> 9,
+ (pos_src + aligned_len) >> 9);
+
+ ret = bch2_remap_range(c,
+ inode_inum(dst), pos_dst >> 9,
+ inode_inum(src), pos_src >> 9,
+ aligned_len >> 9,
+ pos_dst + len, &i_sectors_delta);
+ if (ret < 0)
+ goto err;
+
+ /*
+ * due to alignment, we might have remapped slightly more than requsted
+ */
+ ret = min((u64) ret << 9, (u64) len);
+
+ bch2_i_sectors_acct(c, dst, &quota_res, i_sectors_delta);
+
+ spin_lock(&dst->v.i_lock);
+ if (pos_dst + ret > dst->v.i_size)
+ i_size_write(&dst->v, pos_dst + ret);
+ spin_unlock(&dst->v.i_lock);
+
+ if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
+ IS_SYNC(file_inode(file_dst)))
+ ret = bch2_flush_inode(c, dst);
+err:
+ bch2_quota_reservation_put(c, dst, &quota_res);
+ bch2_unlock_inodes(INODE_PAGECACHE_BLOCK, src, dst);
+ unlock_two_nondirectories(&src->v, &dst->v);
+
+ return bch2_err_class(ret);
+}
+
+/* fseek: */
+
+static loff_t bch2_seek_data(struct file *file, u64 offset)
+{
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ subvol_inum inum = inode_inum(inode);
+ u64 isize, next_data = MAX_LFS_FILESIZE;
+ u32 snapshot;
+ int ret;
+
+ isize = i_size_read(&inode->v);
+ if (offset >= isize)
+ return -ENXIO;
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents,
+ SPOS(inode->v.i_ino, offset >> 9, snapshot),
+ POS(inode->v.i_ino, U64_MAX),
+ 0, k, ret) {
+ if (bkey_extent_is_data(k.k)) {
+ next_data = max(offset, bkey_start_offset(k.k) << 9);
+ break;
+ } else if (k.k->p.offset >> 9 > isize)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+ if (ret)
+ return ret;
+
+ if (next_data > offset)
+ next_data = bch2_seek_pagecache_data(&inode->v,
+ offset, next_data, 0, false);
+
+ if (next_data >= isize)
+ return -ENXIO;
+
+ return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
+}
+
+static loff_t bch2_seek_hole(struct file *file, u64 offset)
+{
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ subvol_inum inum = inode_inum(inode);
+ u64 isize, next_hole = MAX_LFS_FILESIZE;
+ u32 snapshot;
+ int ret;
+
+ isize = i_size_read(&inode->v);
+ if (offset >= isize)
+ return -ENXIO;
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
+ SPOS(inode->v.i_ino, offset >> 9, snapshot),
+ BTREE_ITER_SLOTS, k, ret) {
+ if (k.k->p.inode != inode->v.i_ino) {
+ next_hole = bch2_seek_pagecache_hole(&inode->v,
+ offset, MAX_LFS_FILESIZE, 0, false);
+ break;
+ } else if (!bkey_extent_is_data(k.k)) {
+ next_hole = bch2_seek_pagecache_hole(&inode->v,
+ max(offset, bkey_start_offset(k.k) << 9),
+ k.k->p.offset << 9, 0, false);
+
+ if (next_hole < k.k->p.offset << 9)
+ break;
+ } else {
+ offset = max(offset, bkey_start_offset(k.k) << 9);
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+ if (ret)
+ return ret;
+
+ if (next_hole > isize)
+ next_hole = isize;
+
+ return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE);
+}
+
+loff_t bch2_llseek(struct file *file, loff_t offset, int whence)
+{
+ loff_t ret;
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ case SEEK_END:
+ ret = generic_file_llseek(file, offset, whence);
+ break;
+ case SEEK_DATA:
+ ret = bch2_seek_data(file, offset);
+ break;
+ case SEEK_HOLE:
+ ret = bch2_seek_hole(file, offset);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return bch2_err_class(ret);
+}
+
+void bch2_fs_fsio_exit(struct bch_fs *c)
+{
+ bioset_exit(&c->nocow_flush_bioset);
+}
+
+int bch2_fs_fsio_init(struct bch_fs *c)
+{
+ if (bioset_init(&c->nocow_flush_bioset,
+ 1, offsetof(struct nocow_flush, bio), 0))
+ return -BCH_ERR_ENOMEM_nocow_flush_bioset_init;
+
+ return 0;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/fs-io.h b/c_src/libbcachefs/fs-io.h
new file mode 100644
index 00000000..ca70346e
--- /dev/null
+++ b/c_src/libbcachefs/fs-io.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IO_H
+#define _BCACHEFS_FS_IO_H
+
+#ifndef NO_BCACHEFS_FS
+
+#include "buckets.h"
+#include "fs.h"
+#include "io_write_types.h"
+#include "quota.h"
+
+#include <linux/uio.h>
+
+struct folio_vec {
+ struct folio *fv_folio;
+ size_t fv_offset;
+ size_t fv_len;
+};
+
+static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv)
+{
+
+ struct folio *folio = page_folio(bv.bv_page);
+ size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) +
+ bv.bv_offset;
+ size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len);
+
+ return (struct folio_vec) {
+ .fv_folio = folio,
+ .fv_offset = offset,
+ .fv_len = len,
+ };
+}
+
+static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio,
+ struct bvec_iter iter)
+{
+ return biovec_to_foliovec(bio_iter_iovec(bio, iter));
+}
+
+#define __bio_for_each_folio(bvl, bio, iter, start) \
+ for (iter = (start); \
+ (iter).bi_size && \
+ ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \
+ bio_advance_iter_single((bio), &(iter), (bvl).fv_len))
+
+/**
+ * bio_for_each_folio - iterate over folios within a bio
+ *
+ * Like other non-_all versions, this iterates over what bio->bi_iter currently
+ * points to. This version is for drivers, where the bio may have previously
+ * been split or cloned.
+ */
+#define bio_for_each_folio(bvl, bio, iter) \
+ __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter)
+
+struct quota_res {
+ u64 sectors;
+};
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+static inline void __bch2_quota_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res)
+{
+ BUG_ON(res->sectors > inode->ei_quota_reserved);
+
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC,
+ -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC);
+ inode->ei_quota_reserved -= res->sectors;
+ res->sectors = 0;
+}
+
+static inline void bch2_quota_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res)
+{
+ if (res->sectors) {
+ mutex_lock(&inode->ei_quota_lock);
+ __bch2_quota_reservation_put(c, inode, res);
+ mutex_unlock(&inode->ei_quota_lock);
+ }
+}
+
+static inline int bch2_quota_reservation_add(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res,
+ u64 sectors,
+ bool check_enospc)
+{
+ int ret;
+
+ if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags))
+ return 0;
+
+ mutex_lock(&inode->ei_quota_lock);
+ ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors,
+ check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK);
+ if (likely(!ret)) {
+ inode->ei_quota_reserved += sectors;
+ res->sectors += sectors;
+ }
+ mutex_unlock(&inode->ei_quota_lock);
+
+ return ret;
+}
+
+#else
+
+static inline void __bch2_quota_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res) {}
+
+static inline void bch2_quota_reservation_put(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res) {}
+
+static inline int bch2_quota_reservation_add(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct quota_res *res,
+ unsigned sectors,
+ bool check_enospc)
+{
+ return 0;
+}
+
+#endif
+
+void __bch2_i_sectors_acct(struct bch_fs *, struct bch_inode_info *,
+ struct quota_res *, s64);
+
+static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
+ struct quota_res *quota_res, s64 sectors)
+{
+ if (sectors) {
+ mutex_lock(&inode->ei_quota_lock);
+ __bch2_i_sectors_acct(c, inode, quota_res, sectors);
+ mutex_unlock(&inode->ei_quota_lock);
+ }
+}
+
+static inline struct address_space *faults_disabled_mapping(void)
+{
+ return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
+}
+
+static inline void set_fdm_dropped_locks(void)
+{
+ current->faults_disabled_mapping =
+ (void *) (((unsigned long) current->faults_disabled_mapping)|1);
+}
+
+static inline bool fdm_dropped_locks(void)
+{
+ return ((unsigned long) current->faults_disabled_mapping) & 1;
+}
+
+void bch2_inode_flush_nocow_writes_async(struct bch_fs *,
+ struct bch_inode_info *, struct closure *);
+
+int __must_check bch2_write_inode_size(struct bch_fs *,
+ struct bch_inode_info *,
+ loff_t, unsigned);
+
+int bch2_fsync(struct file *, loff_t, loff_t, int);
+
+int bchfs_truncate(struct mnt_idmap *,
+ struct bch_inode_info *, struct iattr *);
+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
+
+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
+ loff_t, loff_t, unsigned);
+
+loff_t bch2_llseek(struct file *, loff_t, int);
+
+void bch2_fs_fsio_exit(struct bch_fs *);
+int bch2_fs_fsio_init(struct bch_fs *);
+#else
+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {}
+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; }
+#endif
+
+#endif /* _BCACHEFS_FS_IO_H */
diff --git a/c_src/libbcachefs/fs-ioctl.c b/c_src/libbcachefs/fs-ioctl.c
new file mode 100644
index 00000000..e0a19a73
--- /dev/null
+++ b/c_src/libbcachefs/fs-ioctl.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "chardev.h"
+#include "dirent.h"
+#include "fs.h"
+#include "fs-common.h"
+#include "fs-ioctl.h"
+#include "quota.h"
+
+#include <linux/compat.h>
+#include <linux/fsnotify.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include <linux/writeback.h>
+
+#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32)
+#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
+#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
+#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+
+struct flags_set {
+ unsigned mask;
+ unsigned flags;
+
+ unsigned projid;
+
+ bool set_projinherit;
+ bool projinherit;
+};
+
+static int bch2_inode_flags_set(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ /*
+ * We're relying on btree locking here for exclusion with other ioctl
+ * calls - use the flags in the btree (@bi), not inode->i_flags:
+ */
+ struct flags_set *s = p;
+ unsigned newflags = s->flags;
+ unsigned oldflags = bi->bi_flags & s->mask;
+
+ if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ if (!S_ISREG(bi->bi_mode) &&
+ !S_ISDIR(bi->bi_mode) &&
+ (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
+ return -EINVAL;
+
+ if (s->set_projinherit) {
+ bi->bi_fields_set &= ~(1 << Inode_opt_project);
+ bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
+ }
+
+ bi->bi_flags &= ~s->mask;
+ bi->bi_flags |= newflags;
+
+ bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
+ return 0;
+}
+
+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
+{
+ unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
+
+ return put_user(flags, arg);
+}
+
+static int bch2_ioc_setflags(struct bch_fs *c,
+ struct file *file,
+ struct bch_inode_info *inode,
+ void __user *arg)
+{
+ struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
+ unsigned uflags;
+ int ret;
+
+ if (get_user(uflags, (int __user *) arg))
+ return -EFAULT;
+
+ s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
+ if (uflags)
+ return -EOPNOTSUPP;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ inode_lock(&inode->v);
+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
+ ret = -EACCES;
+ goto setflags_out;
+ }
+
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
+ ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+
+setflags_out:
+ inode_unlock(&inode->v);
+ mnt_drop_write_file(file);
+ return ret;
+}
+
+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
+ struct fsxattr __user *arg)
+{
+ struct fsxattr fa = { 0 };
+
+ fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
+
+ if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
+ fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
+
+ fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
+
+ if (copy_to_user(arg, &fa, sizeof(fa)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int fssetxattr_inode_update_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct flags_set *s = p;
+
+ if (s->projid != bi->bi_project) {
+ bi->bi_fields_set |= 1U << Inode_opt_project;
+ bi->bi_project = s->projid;
+ }
+
+ return bch2_inode_flags_set(trans, inode, bi, p);
+}
+
+static int bch2_ioc_fssetxattr(struct bch_fs *c,
+ struct file *file,
+ struct bch_inode_info *inode,
+ struct fsxattr __user *arg)
+{
+ struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
+ struct fsxattr fa;
+ int ret;
+
+ if (copy_from_user(&fa, arg, sizeof(fa)))
+ return -EFAULT;
+
+ s.set_projinherit = true;
+ s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
+ fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
+
+ s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
+ if (fa.fsx_xflags)
+ return -EOPNOTSUPP;
+
+ if (fa.fsx_projid >= U32_MAX)
+ return -EINVAL;
+
+ /*
+ * inode fields accessible via the xattr interface are stored with a +1
+ * bias, so that 0 means unset:
+ */
+ s.projid = fa.fsx_projid + 1;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ inode_lock(&inode->v);
+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
+ ret = -EACCES;
+ goto err;
+ }
+
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ bch2_set_projid(c, inode, fa.fsx_projid) ?:
+ bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+ ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+err:
+ inode_unlock(&inode->v);
+ mnt_drop_write_file(file);
+ return ret;
+}
+
+static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct bch_inode_info *dir = p;
+
+ return !bch2_reinherit_attrs(bi, &dir->ei_inode);
+}
+
+static int bch2_ioc_reinherit_attrs(struct bch_fs *c,
+ struct file *file,
+ struct bch_inode_info *src,
+ const char __user *name)
+{
+ struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode);
+ struct bch_inode_info *dst;
+ struct inode *vinode = NULL;
+ char *kname = NULL;
+ struct qstr qstr;
+ int ret = 0;
+ subvol_inum inum;
+
+ kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL);
+ if (!kname)
+ return -ENOMEM;
+
+ ret = strncpy_from_user(kname, name, BCH_NAME_MAX);
+ if (unlikely(ret < 0))
+ goto err1;
+
+ qstr.len = ret;
+ qstr.name = kname;
+
+ ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum);
+ if (ret)
+ goto err1;
+
+ vinode = bch2_vfs_inode_get(c, inum);
+ ret = PTR_ERR_OR_ZERO(vinode);
+ if (ret)
+ goto err1;
+
+ dst = to_bch_ei(vinode);
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ goto err2;
+
+ bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+ if (inode_attr_changing(src, dst, Inode_opt_project)) {
+ ret = bch2_fs_quota_transfer(c, dst,
+ src->ei_qid,
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_PREALLOC);
+ if (ret)
+ goto err3;
+ }
+
+ ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0);
+err3:
+ bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst);
+
+ /* return true if we did work */
+ if (ret >= 0)
+ ret = !ret;
+
+ mnt_drop_write_file(file);
+err2:
+ iput(vinode);
+err1:
+ kfree(kname);
+
+ return ret;
+}
+
+static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
+{
+ u32 flags;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, arg))
+ return -EFAULT;
+
+ bch_notice(c, "shutdown by ioctl type %u", flags);
+
+ switch (flags) {
+ case FSOP_GOING_FLAGS_DEFAULT:
+ ret = freeze_bdev(c->vfs_sb->s_bdev);
+ if (ret)
+ break;
+ bch2_journal_flush(&c->journal);
+ bch2_fs_emergency_read_only(c);
+ thaw_bdev(c->vfs_sb->s_bdev);
+ break;
+ case FSOP_GOING_FLAGS_LOGFLUSH:
+ bch2_journal_flush(&c->journal);
+ fallthrough;
+ case FSOP_GOING_FLAGS_NOLOGFLUSH:
+ bch2_fs_emergency_read_only(c);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
+ struct bch_ioctl_subvolume arg)
+{
+ struct inode *dir;
+ struct bch_inode_info *inode;
+ struct user_namespace *s_user_ns;
+ struct dentry *dst_dentry;
+ struct path src_path, dst_path;
+ int how = LOOKUP_FOLLOW;
+ int error;
+ subvol_inum snapshot_src = { 0 };
+ unsigned lookup_flags = 0;
+ unsigned create_flags = BCH_CREATE_SUBVOL;
+
+ if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
+ BCH_SUBVOL_SNAPSHOT_RO))
+ return -EINVAL;
+
+ if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+ (arg.src_ptr ||
+ (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
+ return -EINVAL;
+
+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+ create_flags |= BCH_CREATE_SNAPSHOT;
+
+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
+ create_flags |= BCH_CREATE_SNAPSHOT_RO;
+
+ /* why do we need this lock? */
+ down_read(&c->vfs_sb->s_umount);
+
+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+ sync_inodes_sb(c->vfs_sb);
+retry:
+ if (arg.src_ptr) {
+ error = user_path_at(arg.dirfd,
+ (const char __user *)(unsigned long)arg.src_ptr,
+ how, &src_path);
+ if (error)
+ goto err1;
+
+ if (src_path.dentry->d_sb->s_fs_info != c) {
+ path_put(&src_path);
+ error = -EXDEV;
+ goto err1;
+ }
+
+ snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
+ }
+
+ dst_dentry = user_path_create(arg.dirfd,
+ (const char __user *)(unsigned long)arg.dst_ptr,
+ &dst_path, lookup_flags);
+ error = PTR_ERR_OR_ZERO(dst_dentry);
+ if (error)
+ goto err2;
+
+ if (dst_dentry->d_sb->s_fs_info != c) {
+ error = -EXDEV;
+ goto err3;
+ }
+
+ if (dst_dentry->d_inode) {
+ error = -EEXIST;
+ goto err3;
+ }
+
+ dir = dst_path.dentry->d_inode;
+ if (IS_DEADDIR(dir)) {
+ error = -BCH_ERR_ENOENT_directory_dead;
+ goto err3;
+ }
+
+ s_user_ns = dir->i_sb->s_user_ns;
+ if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
+ !kgid_has_mapping(s_user_ns, current_fsgid())) {
+ error = -EOVERFLOW;
+ goto err3;
+ }
+
+ error = inode_permission(file_mnt_idmap(filp),
+ dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ goto err3;
+
+ if (!IS_POSIXACL(dir))
+ arg.mode &= ~current_umask();
+
+ error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
+ if (error)
+ goto err3;
+
+ if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
+ !arg.src_ptr)
+ snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
+
+ inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
+ dst_dentry, arg.mode|S_IFDIR,
+ 0, snapshot_src, create_flags);
+ error = PTR_ERR_OR_ZERO(inode);
+ if (error)
+ goto err3;
+
+ d_instantiate(dst_dentry, &inode->v);
+ fsnotify_mkdir(dir, dst_dentry);
+err3:
+ done_path_create(&dst_path, dst_dentry);
+err2:
+ if (arg.src_ptr)
+ path_put(&src_path);
+
+ if (retry_estale(error, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
+ }
+err1:
+ up_read(&c->vfs_sb->s_umount);
+
+ return error;
+}
+
+static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
+ struct bch_ioctl_subvolume arg)
+{
+ down_write(&c->snapshot_create_lock);
+ long ret = __bch2_ioctl_subvolume_create(c, filp, arg);
+ up_write(&c->snapshot_create_lock);
+
+ return ret;
+}
+
+static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
+ struct bch_ioctl_subvolume arg)
+{
+ struct path path;
+ struct inode *dir;
+ int ret = 0;
+
+ if (arg.flags)
+ return -EINVAL;
+
+ ret = user_path_at(arg.dirfd,
+ (const char __user *)(unsigned long)arg.dst_ptr,
+ LOOKUP_FOLLOW, &path);
+ if (ret)
+ return ret;
+
+ if (path.dentry->d_sb->s_fs_info != c) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ dir = path.dentry->d_parent->d_inode;
+
+ ret = __bch2_unlink(dir, path.dentry, true);
+ if (ret)
+ goto err;
+
+ fsnotify_rmdir(dir, path.dentry);
+ d_delete(path.dentry);
+err:
+ path_put(&path);
+ return ret;
+}
+
+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ long ret;
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ ret = bch2_ioc_getflags(inode, (int __user *) arg);
+ break;
+
+ case FS_IOC_SETFLAGS:
+ ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
+ break;
+
+ case FS_IOC_FSGETXATTR:
+ ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
+ break;
+
+ case FS_IOC_FSSETXATTR:
+ ret = bch2_ioc_fssetxattr(c, file, inode,
+ (void __user *) arg);
+ break;
+
+ case BCHFS_IOC_REINHERIT_ATTRS:
+ ret = bch2_ioc_reinherit_attrs(c, file, inode,
+ (void __user *) arg);
+ break;
+
+ case FS_IOC_GETVERSION:
+ ret = -ENOTTY;
+ break;
+
+ case FS_IOC_SETVERSION:
+ ret = -ENOTTY;
+ break;
+
+ case FS_IOC_GOINGDOWN:
+ ret = bch2_ioc_goingdown(c, (u32 __user *) arg);
+ break;
+
+ case BCH_IOCTL_SUBVOLUME_CREATE: {
+ struct bch_ioctl_subvolume i;
+
+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
+ ? -EFAULT
+ : bch2_ioctl_subvolume_create(c, file, i);
+ break;
+ }
+
+ case BCH_IOCTL_SUBVOLUME_DESTROY: {
+ struct bch_ioctl_subvolume i;
+
+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i))
+ ? -EFAULT
+ : bch2_ioctl_subvolume_destroy(c, file, i);
+ break;
+ }
+
+ default:
+ ret = bch2_fs_ioctl(c, cmd, (void __user *) arg);
+ break;
+ }
+
+ return bch2_err_class(ret);
+}
+
+#ifdef CONFIG_COMPAT
+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+ /* These are just misnamed, they actually get/put from/to user an int */
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
+ break;
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/fs-ioctl.h b/c_src/libbcachefs/fs-ioctl.h
new file mode 100644
index 00000000..d30f9bb0
--- /dev/null
+++ b/c_src/libbcachefs/fs-ioctl.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_IOCTL_H
+#define _BCACHEFS_FS_IOCTL_H
+
+/* Inode flags: */
+
+/* bcachefs inode flags -> vfs inode flags: */
+static const __maybe_unused unsigned bch_flags_to_vfs[] = {
+ [__BCH_INODE_sync] = S_SYNC,
+ [__BCH_INODE_immutable] = S_IMMUTABLE,
+ [__BCH_INODE_append] = S_APPEND,
+ [__BCH_INODE_noatime] = S_NOATIME,
+};
+
+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
+static const __maybe_unused unsigned bch_flags_to_uflags[] = {
+ [__BCH_INODE_sync] = FS_SYNC_FL,
+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
+ [__BCH_INODE_append] = FS_APPEND_FL,
+ [__BCH_INODE_nodump] = FS_NODUMP_FL,
+ [__BCH_INODE_noatime] = FS_NOATIME_FL,
+};
+
+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
+static const __maybe_unused unsigned bch_flags_to_xflags[] = {
+ [__BCH_INODE_sync] = FS_XFLAG_SYNC,
+ [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
+ [__BCH_INODE_append] = FS_XFLAG_APPEND,
+ [__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
+ [__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
+ //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
+};
+
+#define set_flags(_map, _in, _out) \
+do { \
+ unsigned _i; \
+ \
+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
+ if ((_in) & (1 << _i)) \
+ (_out) |= _map[_i]; \
+ else \
+ (_out) &= ~_map[_i]; \
+} while (0)
+
+#define map_flags(_map, _in) \
+({ \
+ unsigned _out = 0; \
+ \
+ set_flags(_map, _in, _out); \
+ _out; \
+})
+
+#define map_flags_rev(_map, _in) \
+({ \
+ unsigned _i, _out = 0; \
+ \
+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
+ if ((_in) & _map[_i]) { \
+ (_out) |= 1 << _i; \
+ (_in) &= ~_map[_i]; \
+ } \
+ (_out); \
+})
+
+#define map_defined(_map) \
+({ \
+ unsigned _in = ~0; \
+ \
+ map_flags_rev(_map, _in); \
+})
+
+/* Set VFS inode flags from bcachefs inode: */
+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
+{
+ set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
+}
+
+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);
+
+#endif /* _BCACHEFS_FS_IOCTL_H */
diff --git a/c_src/libbcachefs/fs.c b/c_src/libbcachefs/fs.c
new file mode 100644
index 00000000..da117576
--- /dev/null
+++ b/c_src/libbcachefs/fs.c
@@ -0,0 +1,1976 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "acl.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "chardev.h"
+#include "dirent.h"
+#include "errcode.h"
+#include "extents.h"
+#include "fs.h"
+#include "fs-common.h"
+#include "fs-io.h"
+#include "fs-ioctl.h"
+#include "fs-io-buffered.h"
+#include "fs-io-direct.h"
+#include "fs-io-pagecache.h"
+#include "fsck.h"
+#include "inode.h"
+#include "io_read.h"
+#include "journal.h"
+#include "keylist.h"
+#include "quota.h"
+#include "snapshot.h"
+#include "super.h"
+#include "xattr.h"
+
+#include <linux/aio.h>
+#include <linux/backing-dev.h>
+#include <linux/exportfs.h>
+#include <linux/fiemap.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/posix_acl.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+
+static struct kmem_cache *bch2_inode_cache;
+
+static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
+ struct bch_inode_info *,
+ struct bch_inode_unpacked *,
+ struct bch_subvolume *);
+
+void bch2_inode_update_after_write(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ unsigned fields)
+{
+ struct bch_fs *c = trans->c;
+
+ BUG_ON(bi->bi_inum != inode->v.i_ino);
+
+ bch2_assert_pos_locked(trans, BTREE_ID_inodes,
+ POS(0, bi->bi_inum),
+ c->opts.inodes_use_key_cache);
+
+ set_nlink(&inode->v, bch2_inode_nlink_get(bi));
+ i_uid_write(&inode->v, bi->bi_uid);
+ i_gid_write(&inode->v, bi->bi_gid);
+ inode->v.i_mode = bi->bi_mode;
+
+ if (fields & ATTR_ATIME)
+ inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
+ if (fields & ATTR_MTIME)
+ inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
+ if (fields & ATTR_CTIME)
+ inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
+
+ inode->ei_inode = *bi;
+
+ bch2_inode_flags_to_vfs(inode);
+}
+
+int __must_check bch2_write_inode(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ inode_set_fn set,
+ void *p, unsigned fields)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter = { NULL };
+ struct bch_inode_unpacked inode_u;
+ int ret;
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
+ BTREE_ITER_INTENT) ?:
+ (set ? set(trans, inode, &inode_u, p) : 0) ?:
+ bch2_inode_write(trans, &iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+
+ /*
+ * the btree node lock protects inode->ei_inode, not ei_update_lock;
+ * this is important for inode updates via bchfs_write_index_update
+ */
+ if (!ret)
+ bch2_inode_update_after_write(trans, inode, &inode_u, fields);
+
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
+ "inode %u:%llu not found when updating",
+ inode_inum(inode).subvol,
+ inode_inum(inode).inum);
+
+ bch2_trans_put(trans);
+ return ret < 0 ? ret : 0;
+}
+
+int bch2_fs_quota_transfer(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct bch_qid new_qid,
+ unsigned qtypes,
+ enum quota_acct_mode mode)
+{
+ unsigned i;
+ int ret;
+
+ qtypes &= enabled_qtypes(c);
+
+ for (i = 0; i < QTYP_NR; i++)
+ if (new_qid.q[i] == inode->ei_qid.q[i])
+ qtypes &= ~(1U << i);
+
+ if (!qtypes)
+ return 0;
+
+ mutex_lock(&inode->ei_quota_lock);
+
+ ret = bch2_quota_transfer(c, qtypes, new_qid,
+ inode->ei_qid,
+ inode->v.i_blocks +
+ inode->ei_quota_reserved,
+ mode);
+ if (!ret)
+ for (i = 0; i < QTYP_NR; i++)
+ if (qtypes & (1 << i))
+ inode->ei_qid.q[i] = new_qid.q[i];
+
+ mutex_unlock(&inode->ei_quota_lock);
+
+ return ret;
+}
+
+static int bch2_iget5_test(struct inode *vinode, void *p)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ subvol_inum *inum = p;
+
+ return inode->ei_subvol == inum->subvol &&
+ inode->ei_inode.bi_inum == inum->inum;
+}
+
+static int bch2_iget5_set(struct inode *vinode, void *p)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ subvol_inum *inum = p;
+
+ inode->v.i_ino = inum->inum;
+ inode->ei_subvol = inum->subvol;
+ inode->ei_inode.bi_inum = inum->inum;
+ return 0;
+}
+
+static unsigned bch2_inode_hash(subvol_inum inum)
+{
+ return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
+}
+
+struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
+{
+ struct bch_inode_unpacked inode_u;
+ struct bch_inode_info *inode;
+ struct btree_trans *trans;
+ struct bch_subvolume subvol;
+ int ret;
+
+ inode = to_bch_ei(iget5_locked(c->vfs_sb,
+ bch2_inode_hash(inum),
+ bch2_iget5_test,
+ bch2_iget5_set,
+ &inum));
+ if (unlikely(!inode))
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->v.i_state & I_NEW))
+ return &inode->v;
+
+ trans = bch2_trans_get(c);
+ ret = lockrestart_do(trans,
+ bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
+
+ if (!ret)
+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
+ bch2_trans_put(trans);
+
+ if (ret) {
+ iget_failed(&inode->v);
+ return ERR_PTR(bch2_err_class(ret));
+ }
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
+
+ unlock_new_inode(&inode->v);
+
+ return &inode->v;
+}
+
+struct bch_inode_info *
+__bch2_create(struct mnt_idmap *idmap,
+ struct bch_inode_info *dir, struct dentry *dentry,
+ umode_t mode, dev_t rdev, subvol_inum snapshot_src,
+ unsigned flags)
+{
+ struct bch_fs *c = dir->v.i_sb->s_fs_info;
+ struct btree_trans *trans;
+ struct bch_inode_unpacked dir_u;
+ struct bch_inode_info *inode, *old;
+ struct bch_inode_unpacked inode_u;
+ struct posix_acl *default_acl = NULL, *acl = NULL;
+ subvol_inum inum;
+ struct bch_subvolume subvol;
+ u64 journal_seq = 0;
+ int ret;
+
+ /*
+ * preallocate acls + vfs inode before btree transaction, so that
+ * nothing can fail after the transaction succeeds:
+ */
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
+ if (ret)
+ return ERR_PTR(ret);
+#endif
+ inode = to_bch_ei(new_inode(c->vfs_sb));
+ if (unlikely(!inode)) {
+ inode = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ bch2_inode_init_early(c, &inode_u);
+
+ if (!(flags & BCH_CREATE_TMPFILE))
+ mutex_lock(&dir->ei_update_lock);
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
+ bch2_create_trans(trans,
+ inode_inum(dir), &dir_u, &inode_u,
+ !(flags & BCH_CREATE_TMPFILE)
+ ? &dentry->d_name : NULL,
+ from_kuid(i_user_ns(&dir->v), current_fsuid()),
+ from_kgid(i_user_ns(&dir->v), current_fsgid()),
+ mode, rdev,
+ default_acl, acl, snapshot_src, flags) ?:
+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
+ KEY_TYPE_QUOTA_PREALLOC);
+ if (unlikely(ret))
+ goto err_before_quota;
+
+ inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
+ inum.inum = inode_u.bi_inum;
+
+ ret = bch2_subvolume_get(trans, inum.subvol, true,
+ BTREE_ITER_WITH_UPDATES, &subvol) ?:
+ bch2_trans_commit(trans, NULL, &journal_seq, 0);
+ if (unlikely(ret)) {
+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
+ KEY_TYPE_QUOTA_WARN);
+err_before_quota:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+ goto err_trans;
+ }
+
+ if (!(flags & BCH_CREATE_TMPFILE)) {
+ bch2_inode_update_after_write(trans, dir, &dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+ mutex_unlock(&dir->ei_update_lock);
+ }
+
+ bch2_iget5_set(&inode->v, &inum);
+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
+
+ set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
+ set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
+
+ /*
+ * we must insert the new inode into the inode cache before calling
+ * bch2_trans_exit() and dropping locks, else we could race with another
+ * thread pulling the inode in and modifying it:
+ */
+
+ inode->v.i_state |= I_CREATING;
+
+ old = to_bch_ei(inode_insert5(&inode->v,
+ bch2_inode_hash(inum),
+ bch2_iget5_test,
+ bch2_iget5_set,
+ &inum));
+ BUG_ON(!old);
+
+ if (unlikely(old != inode)) {
+ /*
+ * We raced, another process pulled the new inode into cache
+ * before us:
+ */
+ make_bad_inode(&inode->v);
+ iput(&inode->v);
+
+ inode = old;
+ } else {
+ mutex_lock(&c->vfs_inodes_lock);
+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+ mutex_unlock(&c->vfs_inodes_lock);
+ /*
+ * we really don't want insert_inode_locked2() to be setting
+ * I_NEW...
+ */
+ unlock_new_inode(&inode->v);
+ }
+
+ bch2_trans_put(trans);
+err:
+ posix_acl_release(default_acl);
+ posix_acl_release(acl);
+ return inode;
+err_trans:
+ if (!(flags & BCH_CREATE_TMPFILE))
+ mutex_unlock(&dir->ei_update_lock);
+
+ bch2_trans_put(trans);
+ make_bad_inode(&inode->v);
+ iput(&inode->v);
+ inode = ERR_PTR(ret);
+ goto err;
+}
+
+/* methods */
+
+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct bch_fs *c = vdir->i_sb->s_fs_info;
+ struct bch_inode_info *dir = to_bch_ei(vdir);
+ struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
+ struct inode *vinode = NULL;
+ subvol_inum inum = { .subvol = 1 };
+ int ret;
+
+ ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
+ &dentry->d_name, &inum);
+
+ if (!ret)
+ vinode = bch2_vfs_inode_get(c, inum);
+
+ return d_splice_alias(vinode, dentry);
+}
+
+static int bch2_mknod(struct mnt_idmap *idmap,
+ struct inode *vdir, struct dentry *dentry,
+ umode_t mode, dev_t rdev)
+{
+ struct bch_inode_info *inode =
+ __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
+ (subvol_inum) { 0 }, 0);
+
+ if (IS_ERR(inode))
+ return bch2_err_class(PTR_ERR(inode));
+
+ d_instantiate(dentry, &inode->v);
+ return 0;
+}
+
+static int bch2_create(struct mnt_idmap *idmap,
+ struct inode *vdir, struct dentry *dentry,
+ umode_t mode, bool excl)
+{
+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
+}
+
+static int __bch2_link(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct bch_inode_info *dir,
+ struct dentry *dentry)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bch_inode_unpacked dir_u, inode_u;
+ int ret;
+
+ mutex_lock(&inode->ei_update_lock);
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_link_trans(trans,
+ inode_inum(dir), &dir_u,
+ inode_inum(inode), &inode_u,
+ &dentry->d_name));
+
+ if (likely(!ret)) {
+ bch2_inode_update_after_write(trans, dir, &dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+ bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
+ }
+
+ bch2_trans_put(trans);
+ mutex_unlock(&inode->ei_update_lock);
+ return ret;
+}
+
+static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
+ struct dentry *dentry)
+{
+ struct bch_fs *c = vdir->i_sb->s_fs_info;
+ struct bch_inode_info *dir = to_bch_ei(vdir);
+ struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
+ int ret;
+
+ lockdep_assert_held(&inode->v.i_rwsem);
+
+ ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ __bch2_link(c, inode, dir, dentry);
+ if (unlikely(ret))
+ return ret;
+
+ ihold(&inode->v);
+ d_instantiate(dentry, &inode->v);
+ return 0;
+}
+
+int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
+ bool deleting_snapshot)
+{
+ struct bch_fs *c = vdir->i_sb->s_fs_info;
+ struct bch_inode_info *dir = to_bch_ei(vdir);
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_inode_unpacked dir_u, inode_u;
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret;
+
+ bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
+
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_unlink_trans(trans,
+ inode_inum(dir), &dir_u,
+ &inode_u, &dentry->d_name,
+ deleting_snapshot));
+ if (unlikely(ret))
+ goto err;
+
+ bch2_inode_update_after_write(trans, dir, &dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+ bch2_inode_update_after_write(trans, inode, &inode_u,
+ ATTR_MTIME);
+
+ if (inode_u.bi_subvol) {
+ /*
+ * Subvolume deletion is asynchronous, but we still want to tell
+ * the VFS that it's been deleted here:
+ */
+ set_nlink(&inode->v, 0);
+ }
+err:
+ bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
+{
+ struct bch_inode_info *dir= to_bch_ei(vdir);
+ struct bch_fs *c = dir->v.i_sb->s_fs_info;
+
+ return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ __bch2_unlink(vdir, dentry, false);
+}
+
+static int bch2_symlink(struct mnt_idmap *idmap,
+ struct inode *vdir, struct dentry *dentry,
+ const char *symname)
+{
+ struct bch_fs *c = vdir->i_sb->s_fs_info;
+ struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
+ int ret;
+
+ inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
+ (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
+ if (IS_ERR(inode))
+ return bch2_err_class(PTR_ERR(inode));
+
+ inode_lock(&inode->v);
+ ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
+ inode_unlock(&inode->v);
+
+ if (unlikely(ret))
+ goto err;
+
+ ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
+ if (unlikely(ret))
+ goto err;
+
+ ret = __bch2_link(c, inode, dir, dentry);
+ if (unlikely(ret))
+ goto err;
+
+ d_instantiate(dentry, &inode->v);
+ return 0;
+err:
+ iput(&inode->v);
+ return ret;
+}
+
+static int bch2_mkdir(struct mnt_idmap *idmap,
+ struct inode *vdir, struct dentry *dentry, umode_t mode)
+{
+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
+}
+
+static int bch2_rename2(struct mnt_idmap *idmap,
+ struct inode *src_vdir, struct dentry *src_dentry,
+ struct inode *dst_vdir, struct dentry *dst_dentry,
+ unsigned flags)
+{
+ struct bch_fs *c = src_vdir->i_sb->s_fs_info;
+ struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
+ struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
+ struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
+ struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
+ struct bch_inode_unpacked dst_dir_u, src_dir_u;
+ struct bch_inode_unpacked src_inode_u, dst_inode_u;
+ struct btree_trans *trans;
+ enum bch_rename_mode mode = flags & RENAME_EXCHANGE
+ ? BCH_RENAME_EXCHANGE
+ : dst_dentry->d_inode
+ ? BCH_RENAME_OVERWRITE : BCH_RENAME;
+ int ret;
+
+ if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
+ return -EINVAL;
+
+ if (mode == BCH_RENAME_OVERWRITE) {
+ ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
+ 0, LLONG_MAX);
+ if (ret)
+ return ret;
+ }
+
+ trans = bch2_trans_get(c);
+
+ bch2_lock_inodes(INODE_UPDATE_LOCK,
+ src_dir,
+ dst_dir,
+ src_inode,
+ dst_inode);
+
+ ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
+ bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
+ if (ret)
+ goto err;
+
+ if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
+ ret = bch2_fs_quota_transfer(c, src_inode,
+ dst_dir->ei_qid,
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_PREALLOC);
+ if (ret)
+ goto err;
+ }
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
+ ret = bch2_fs_quota_transfer(c, dst_inode,
+ src_dir->ei_qid,
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_PREALLOC);
+ if (ret)
+ goto err;
+ }
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_rename_trans(trans,
+ inode_inum(src_dir), &src_dir_u,
+ inode_inum(dst_dir), &dst_dir_u,
+ &src_inode_u,
+ &dst_inode_u,
+ &src_dentry->d_name,
+ &dst_dentry->d_name,
+ mode));
+ if (unlikely(ret))
+ goto err;
+
+ BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
+ BUG_ON(dst_inode &&
+ dst_inode->v.i_ino != dst_inode_u.bi_inum);
+
+ bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+
+ if (src_dir != dst_dir)
+ bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
+ ATTR_MTIME|ATTR_CTIME);
+
+ bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
+ ATTR_CTIME);
+
+ if (dst_inode)
+ bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
+ ATTR_CTIME);
+err:
+ bch2_trans_put(trans);
+
+ bch2_fs_quota_transfer(c, src_inode,
+ bch_qid(&src_inode->ei_inode),
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_NOCHECK);
+ if (dst_inode)
+ bch2_fs_quota_transfer(c, dst_inode,
+ bch_qid(&dst_inode->ei_inode),
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_NOCHECK);
+
+ bch2_unlock_inodes(INODE_UPDATE_LOCK,
+ src_dir,
+ dst_dir,
+ src_inode,
+ dst_inode);
+
+ return ret;
+}
+
+static void bch2_setattr_copy(struct mnt_idmap *idmap,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ struct iattr *attr)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ unsigned int ia_valid = attr->ia_valid;
+
+ if (ia_valid & ATTR_UID)
+ bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
+ if (ia_valid & ATTR_GID)
+ bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
+
+ if (ia_valid & ATTR_SIZE)
+ bi->bi_size = attr->ia_size;
+
+ if (ia_valid & ATTR_ATIME)
+ bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
+ if (ia_valid & ATTR_MTIME)
+ bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
+ if (ia_valid & ATTR_CTIME)
+ bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
+
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+ kgid_t gid = ia_valid & ATTR_GID
+ ? attr->ia_gid
+ : inode->v.i_gid;
+
+ if (!in_group_p(gid) &&
+ !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
+ mode &= ~S_ISGID;
+ bi->bi_mode = mode;
+ }
+}
+
+int bch2_setattr_nonsize(struct mnt_idmap *idmap,
+ struct bch_inode_info *inode,
+ struct iattr *attr)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_qid qid;
+ struct btree_trans *trans;
+ struct btree_iter inode_iter = { NULL };
+ struct bch_inode_unpacked inode_u;
+ struct posix_acl *acl = NULL;
+ int ret;
+
+ mutex_lock(&inode->ei_update_lock);
+
+ qid = inode->ei_qid;
+
+ if (attr->ia_valid & ATTR_UID)
+ qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
+
+ if (attr->ia_valid & ATTR_GID)
+ qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
+
+ ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
+ KEY_TYPE_QUOTA_PREALLOC);
+ if (ret)
+ goto err;
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+ kfree(acl);
+ acl = NULL;
+
+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
+ BTREE_ITER_INTENT);
+ if (ret)
+ goto btree_err;
+
+ bch2_setattr_copy(idmap, inode, &inode_u, attr);
+
+ if (attr->ia_valid & ATTR_MODE) {
+ ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
+ inode_u.bi_mode, &acl);
+ if (ret)
+ goto btree_err;
+ }
+
+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+btree_err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+ if (unlikely(ret))
+ goto err_trans;
+
+ bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
+
+ if (acl)
+ set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
+err_trans:
+ bch2_trans_put(trans);
+err:
+ mutex_unlock(&inode->ei_update_lock);
+
+ return bch2_err_class(ret);
+}
+
+static int bch2_getattr(struct mnt_idmap *idmap,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned query_flags)
+{
+ struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ stat->dev = inode->v.i_sb->s_dev;
+ stat->ino = inode->v.i_ino;
+ stat->mode = inode->v.i_mode;
+ stat->nlink = inode->v.i_nlink;
+ stat->uid = inode->v.i_uid;
+ stat->gid = inode->v.i_gid;
+ stat->rdev = inode->v.i_rdev;
+ stat->size = i_size_read(&inode->v);
+ stat->atime = inode_get_atime(&inode->v);
+ stat->mtime = inode_get_mtime(&inode->v);
+ stat->ctime = inode_get_ctime(&inode->v);
+ stat->blksize = block_bytes(c);
+ stat->blocks = inode->v.i_blocks;
+
+ if (request_mask & STATX_BTIME) {
+ stat->result_mask |= STATX_BTIME;
+ stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
+ }
+
+ if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
+
+ if (inode->ei_inode.bi_flags & BCH_INODE_append)
+ stat->attributes |= STATX_ATTR_APPEND;
+ stat->attributes_mask |= STATX_ATTR_APPEND;
+
+ if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
+ stat->attributes |= STATX_ATTR_NODUMP;
+ stat->attributes_mask |= STATX_ATTR_NODUMP;
+
+ return 0;
+}
+
+static int bch2_setattr(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct iattr *iattr)
+{
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ int ret;
+
+ lockdep_assert_held(&inode->v.i_rwsem);
+
+ ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+ setattr_prepare(idmap, dentry, iattr);
+ if (ret)
+ return ret;
+
+ return iattr->ia_valid & ATTR_SIZE
+ ? bchfs_truncate(idmap, inode, iattr)
+ : bch2_setattr_nonsize(idmap, inode, iattr);
+}
+
+static int bch2_tmpfile(struct mnt_idmap *idmap,
+ struct inode *vdir, struct file *file, umode_t mode)
+{
+ struct bch_inode_info *inode =
+ __bch2_create(idmap, to_bch_ei(vdir),
+ file->f_path.dentry, mode, 0,
+ (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
+
+ if (IS_ERR(inode))
+ return bch2_err_class(PTR_ERR(inode));
+
+ d_mark_tmpfile(file, &inode->v);
+ d_instantiate(file->f_path.dentry, &inode->v);
+ return finish_open_simple(file, 0);
+}
+
+static int bch2_fill_extent(struct bch_fs *c,
+ struct fiemap_extent_info *info,
+ struct bkey_s_c k, unsigned flags)
+{
+ if (bkey_extent_is_direct_data(k.k)) {
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ int ret;
+
+ if (k.k->type == KEY_TYPE_reflink_v)
+ flags |= FIEMAP_EXTENT_SHARED;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ int flags2 = 0;
+ u64 offset = p.ptr.offset;
+
+ if (p.ptr.unwritten)
+ flags2 |= FIEMAP_EXTENT_UNWRITTEN;
+
+ if (p.crc.compression_type)
+ flags2 |= FIEMAP_EXTENT_ENCODED;
+ else
+ offset += p.crc.offset;
+
+ if ((offset & (block_sectors(c) - 1)) ||
+ (k.k->size & (block_sectors(c) - 1)))
+ flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+ ret = fiemap_fill_next_extent(info,
+ bkey_start_offset(k.k) << 9,
+ offset << 9,
+ k.k->size << 9, flags|flags2);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+ } else if (bkey_extent_is_inline_data(k.k)) {
+ return fiemap_fill_next_extent(info,
+ bkey_start_offset(k.k) << 9,
+ 0, k.k->size << 9,
+ flags|
+ FIEMAP_EXTENT_DATA_INLINE);
+ } else if (k.k->type == KEY_TYPE_reservation) {
+ return fiemap_fill_next_extent(info,
+ bkey_start_offset(k.k) << 9,
+ 0, k.k->size << 9,
+ flags|
+ FIEMAP_EXTENT_DELALLOC|
+ FIEMAP_EXTENT_UNWRITTEN);
+ } else {
+ BUG();
+ }
+}
+
+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
+ u64 start, u64 len)
+{
+ struct bch_fs *c = vinode->i_sb->s_fs_info;
+ struct bch_inode_info *ei = to_bch_ei(vinode);
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_buf cur, prev;
+ struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
+ unsigned offset_into_extent, sectors;
+ bool have_extent = false;
+ u32 snapshot;
+ int ret = 0;
+
+ ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
+ if (ret)
+ return ret;
+
+ if (start + len < start)
+ return -EINVAL;
+
+ start >>= 9;
+
+ bch2_bkey_buf_init(&cur);
+ bch2_bkey_buf_init(&prev);
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(ei->v.i_ino, start, snapshot), 0);
+
+ while (!(ret = btree_trans_too_many_iters(trans)) &&
+ (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
+ !(ret = bkey_err(k))) {
+ enum btree_id data_btree = BTREE_ID_extents;
+
+ if (!bkey_extent_is_data(k.k) &&
+ k.k->type != KEY_TYPE_reservation) {
+ bch2_btree_iter_advance(&iter);
+ continue;
+ }
+
+ offset_into_extent = iter.pos.offset -
+ bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ bch2_bkey_buf_reassemble(&cur, c, k);
+
+ ret = bch2_read_indirect_extent(trans, &data_btree,
+ &offset_into_extent, &cur);
+ if (ret)
+ break;
+
+ k = bkey_i_to_s_c(cur.k);
+ bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
+
+ sectors = min(sectors, k.k->size - offset_into_extent);
+
+ bch2_cut_front(POS(k.k->p.inode,
+ bkey_start_offset(k.k) +
+ offset_into_extent),
+ cur.k);
+ bch2_key_resize(&cur.k->k, sectors);
+ cur.k->k.p = iter.pos;
+ cur.k->k.p.offset += cur.k->k.size;
+
+ if (have_extent) {
+ bch2_trans_unlock(trans);
+ ret = bch2_fill_extent(c, info,
+ bkey_i_to_s_c(prev.k), 0);
+ if (ret)
+ break;
+ }
+
+ bkey_copy(prev.k, cur.k);
+ have_extent = true;
+
+ bch2_btree_iter_set_pos(&iter,
+ POS(iter.pos.inode, iter.pos.offset + sectors));
+ }
+ start = iter.pos.offset;
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ if (!ret && have_extent) {
+ bch2_trans_unlock(trans);
+ ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
+ FIEMAP_EXTENT_LAST);
+ }
+
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&cur, c);
+ bch2_bkey_buf_exit(&prev, c);
+ return ret < 0 ? ret : 0;
+}
+
+static const struct vm_operations_struct bch_vm_ops = {
+ .fault = bch2_page_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = bch2_page_mkwrite,
+};
+
+static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ file_accessed(file);
+
+ vma->vm_ops = &bch_vm_ops;
+ return 0;
+}
+
+/* Directories: */
+
+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ return generic_file_llseek_size(file, offset, whence,
+ S64_MAX, S64_MAX);
+}
+
+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct bch_inode_info *inode = file_bch_inode(file);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+
+ int ret = bch2_readdir(c, inode_inum(inode), ctx);
+
+ bch_err_fn(c, ret);
+ return bch2_err_class(ret);
+}
+
+static int bch2_open(struct inode *vinode, struct file *file)
+{
+ if (file->f_flags & (O_WRONLY|O_RDWR)) {
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
+ if (ret)
+ return ret;
+ }
+
+ return generic_file_open(vinode, file);
+}
+
+static const struct file_operations bch_file_operations = {
+ .open = bch2_open,
+ .llseek = bch2_llseek,
+ .read_iter = bch2_read_iter,
+ .write_iter = bch2_write_iter,
+ .mmap = bch2_mmap,
+ .fsync = bch2_fsync,
+ .splice_read = filemap_splice_read,
+ .splice_write = iter_file_splice_write,
+ .fallocate = bch2_fallocate_dispatch,
+ .unlocked_ioctl = bch2_fs_file_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = bch2_compat_fs_ioctl,
+#endif
+ .remap_file_range = bch2_remap_file_range,
+};
+
+static const struct inode_operations bch_file_inode_operations = {
+ .getattr = bch2_getattr,
+ .setattr = bch2_setattr,
+ .fiemap = bch2_fiemap,
+ .listxattr = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ .get_acl = bch2_get_acl,
+ .set_acl = bch2_set_acl,
+#endif
+};
+
+static const struct inode_operations bch_dir_inode_operations = {
+ .lookup = bch2_lookup,
+ .create = bch2_create,
+ .link = bch2_link,
+ .unlink = bch2_unlink,
+ .symlink = bch2_symlink,
+ .mkdir = bch2_mkdir,
+ .rmdir = bch2_unlink,
+ .mknod = bch2_mknod,
+ .rename = bch2_rename2,
+ .getattr = bch2_getattr,
+ .setattr = bch2_setattr,
+ .tmpfile = bch2_tmpfile,
+ .listxattr = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ .get_acl = bch2_get_acl,
+ .set_acl = bch2_set_acl,
+#endif
+};
+
+static const struct file_operations bch_dir_file_operations = {
+ .llseek = bch2_dir_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = bch2_vfs_readdir,
+ .fsync = bch2_fsync,
+ .unlocked_ioctl = bch2_fs_file_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = bch2_compat_fs_ioctl,
+#endif
+};
+
+static const struct inode_operations bch_symlink_inode_operations = {
+ .get_link = page_get_link,
+ .getattr = bch2_getattr,
+ .setattr = bch2_setattr,
+ .listxattr = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ .get_acl = bch2_get_acl,
+ .set_acl = bch2_set_acl,
+#endif
+};
+
+static const struct inode_operations bch_special_inode_operations = {
+ .getattr = bch2_getattr,
+ .setattr = bch2_setattr,
+ .listxattr = bch2_xattr_list,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ .get_acl = bch2_get_acl,
+ .set_acl = bch2_set_acl,
+#endif
+};
+
+static const struct address_space_operations bch_address_space_operations = {
+ .read_folio = bch2_read_folio,
+ .writepages = bch2_writepages,
+ .readahead = bch2_readahead,
+ .dirty_folio = filemap_dirty_folio,
+ .write_begin = bch2_write_begin,
+ .write_end = bch2_write_end,
+ .invalidate_folio = bch2_invalidate_folio,
+ .release_folio = bch2_release_folio,
+ .direct_IO = noop_direct_IO,
+#ifdef CONFIG_MIGRATION
+ .migrate_folio = filemap_migrate_folio,
+#endif
+ .error_remove_page = generic_error_remove_page,
+};
+
+struct bcachefs_fid {
+ u64 inum;
+ u32 subvol;
+ u32 gen;
+} __packed;
+
+struct bcachefs_fid_with_parent {
+ struct bcachefs_fid fid;
+ struct bcachefs_fid dir;
+} __packed;
+
+static int bcachefs_fid_valid(int fh_len, int fh_type)
+{
+ switch (fh_type) {
+ case FILEID_BCACHEFS_WITHOUT_PARENT:
+ return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
+ case FILEID_BCACHEFS_WITH_PARENT:
+ return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
+ default:
+ return false;
+ }
+}
+
+static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
+{
+ return (struct bcachefs_fid) {
+ .inum = inode->ei_inode.bi_inum,
+ .subvol = inode->ei_subvol,
+ .gen = inode->ei_inode.bi_generation,
+ };
+}
+
+static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
+ struct inode *vdir)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_inode_info *dir = to_bch_ei(vdir);
+ int min_len;
+
+ if (!S_ISDIR(inode->v.i_mode) && dir) {
+ struct bcachefs_fid_with_parent *fid = (void *) fh;
+
+ min_len = sizeof(*fid) / sizeof(u32);
+ if (*len < min_len) {
+ *len = min_len;
+ return FILEID_INVALID;
+ }
+
+ fid->fid = bch2_inode_to_fid(inode);
+ fid->dir = bch2_inode_to_fid(dir);
+
+ *len = min_len;
+ return FILEID_BCACHEFS_WITH_PARENT;
+ } else {
+ struct bcachefs_fid *fid = (void *) fh;
+
+ min_len = sizeof(*fid) / sizeof(u32);
+ if (*len < min_len) {
+ *len = min_len;
+ return FILEID_INVALID;
+ }
+ *fid = bch2_inode_to_fid(inode);
+
+ *len = min_len;
+ return FILEID_BCACHEFS_WITHOUT_PARENT;
+ }
+}
+
+static struct inode *bch2_nfs_get_inode(struct super_block *sb,
+ struct bcachefs_fid fid)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
+ .subvol = fid.subvol,
+ .inum = fid.inum,
+ });
+ if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
+ iput(vinode);
+ vinode = ERR_PTR(-ESTALE);
+ }
+ return vinode;
+}
+
+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
+ int fh_len, int fh_type)
+{
+ struct bcachefs_fid *fid = (void *) _fid;
+
+ if (!bcachefs_fid_valid(fh_len, fh_type))
+ return NULL;
+
+ return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
+}
+
+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
+ int fh_len, int fh_type)
+{
+ struct bcachefs_fid_with_parent *fid = (void *) _fid;
+
+ if (!bcachefs_fid_valid(fh_len, fh_type) ||
+ fh_type != FILEID_BCACHEFS_WITH_PARENT)
+ return NULL;
+
+ return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
+}
+
+static struct dentry *bch2_get_parent(struct dentry *child)
+{
+ struct bch_inode_info *inode = to_bch_ei(child->d_inode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ subvol_inum parent_inum = {
+ .subvol = inode->ei_inode.bi_parent_subvol ?:
+ inode->ei_subvol,
+ .inum = inode->ei_inode.bi_dir,
+ };
+
+ return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
+}
+
+static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
+{
+ struct bch_inode_info *inode = to_bch_ei(child->d_inode);
+ struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct btree_trans *trans;
+ struct btree_iter iter1;
+ struct btree_iter iter2;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent d;
+ struct bch_inode_unpacked inode_u;
+ subvol_inum target;
+ u32 snapshot;
+ struct qstr dirent_name;
+ unsigned name_len = 0;
+ int ret;
+
+ if (!S_ISDIR(dir->v.i_mode))
+ return -EINVAL;
+
+ trans = bch2_trans_get(c);
+
+ bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
+ POS(dir->ei_inode.bi_inum, 0), 0);
+ bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
+ POS(dir->ei_inode.bi_inum, 0), 0);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_set_snapshot(&iter1, snapshot);
+ bch2_btree_iter_set_snapshot(&iter2, snapshot);
+
+ ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
+ if (ret)
+ goto err;
+
+ if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
+ bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
+
+ k = bch2_btree_iter_peek_slot(&iter1);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_dirent) {
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+ goto err;
+ }
+
+ d = bkey_s_c_to_dirent(k);
+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
+ if (ret > 0)
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+ if (ret)
+ goto err;
+
+ if (target.subvol == inode->ei_subvol &&
+ target.inum == inode->ei_inode.bi_inum)
+ goto found;
+ } else {
+ /*
+ * File with multiple hardlinks and our backref is to the wrong
+ * directory - linear search:
+ */
+ for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
+ if (k.k->p.inode > dir->ei_inode.bi_inum)
+ break;
+
+ if (k.k->type != KEY_TYPE_dirent)
+ continue;
+
+ d = bkey_s_c_to_dirent(k);
+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
+ if (ret < 0)
+ break;
+ if (ret)
+ continue;
+
+ if (target.subvol == inode->ei_subvol &&
+ target.inum == inode->ei_inode.bi_inum)
+ goto found;
+ }
+ }
+
+ ret = -ENOENT;
+ goto err;
+found:
+ dirent_name = bch2_dirent_get_name(d);
+
+ name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
+ memcpy(name, dirent_name.name, name_len);
+ name[name_len] = '\0';
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_iter_exit(trans, &iter1);
+ bch2_trans_iter_exit(trans, &iter2);
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+static const struct export_operations bch_export_ops = {
+ .encode_fh = bch2_encode_fh,
+ .fh_to_dentry = bch2_fh_to_dentry,
+ .fh_to_parent = bch2_fh_to_parent,
+ .get_parent = bch2_get_parent,
+ .get_name = bch2_get_name,
+};
+
+static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ struct bch_subvolume *subvol)
+{
+ bch2_inode_update_after_write(trans, inode, bi, ~0);
+
+ if (BCH_SUBVOLUME_SNAP(subvol))
+ set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
+ else
+ clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
+
+ inode->v.i_blocks = bi->bi_sectors;
+ inode->v.i_ino = bi->bi_inum;
+ inode->v.i_rdev = bi->bi_dev;
+ inode->v.i_generation = bi->bi_generation;
+ inode->v.i_size = bi->bi_size;
+
+ inode->ei_flags = 0;
+ inode->ei_quota_reserved = 0;
+ inode->ei_qid = bch_qid(bi);
+ inode->ei_subvol = inum.subvol;
+
+ inode->v.i_mapping->a_ops = &bch_address_space_operations;
+
+ switch (inode->v.i_mode & S_IFMT) {
+ case S_IFREG:
+ inode->v.i_op = &bch_file_inode_operations;
+ inode->v.i_fop = &bch_file_operations;
+ break;
+ case S_IFDIR:
+ inode->v.i_op = &bch_dir_inode_operations;
+ inode->v.i_fop = &bch_dir_file_operations;
+ break;
+ case S_IFLNK:
+ inode_nohighmem(&inode->v);
+ inode->v.i_op = &bch_symlink_inode_operations;
+ break;
+ default:
+ init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
+ inode->v.i_op = &bch_special_inode_operations;
+ break;
+ }
+
+ mapping_set_large_folios(inode->v.i_mapping);
+}
+
+static struct inode *bch2_alloc_inode(struct super_block *sb)
+{
+ struct bch_inode_info *inode;
+
+ inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
+ if (!inode)
+ return NULL;
+
+ inode_init_once(&inode->v);
+ mutex_init(&inode->ei_update_lock);
+ two_state_lock_init(&inode->ei_pagecache_lock);
+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
+ mutex_init(&inode->ei_quota_lock);
+
+ return &inode->v;
+}
+
+static void bch2_i_callback(struct rcu_head *head)
+{
+ struct inode *vinode = container_of(head, struct inode, i_rcu);
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+
+ kmem_cache_free(bch2_inode_cache, inode);
+}
+
+static void bch2_destroy_inode(struct inode *vinode)
+{
+ call_rcu(&vinode->i_rcu, bch2_i_callback);
+}
+
+static int inode_update_times_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+ bi->bi_atime = timespec_to_bch2_time(c, inode_get_atime(&inode->v));
+ bi->bi_mtime = timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
+ bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
+
+ return 0;
+}
+
+static int bch2_vfs_write_inode(struct inode *vinode,
+ struct writeback_control *wbc)
+{
+ struct bch_fs *c = vinode->i_sb->s_fs_info;
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ int ret;
+
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
+ ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+
+ return bch2_err_class(ret);
+}
+
+static void bch2_evict_inode(struct inode *vinode)
+{
+ struct bch_fs *c = vinode->i_sb->s_fs_info;
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+
+ truncate_inode_pages_final(&inode->v.i_data);
+
+ clear_inode(&inode->v);
+
+ BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
+
+ if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
+ KEY_TYPE_QUOTA_WARN);
+ bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
+ KEY_TYPE_QUOTA_WARN);
+ bch2_inode_rm(c, inode_inum(inode));
+ }
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_del_init(&inode->ei_vfs_inode_list);
+ mutex_unlock(&c->vfs_inodes_lock);
+}
+
+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
+{
+ struct bch_inode_info *inode;
+ DARRAY(struct bch_inode_info *) grabbed;
+ bool clean_pass = false, this_pass_clean;
+
+ /*
+ * Initially, we scan for inodes without I_DONTCACHE, then mark them to
+ * be pruned with d_mark_dontcache().
+ *
+ * Once we've had a clean pass where we didn't find any inodes without
+ * I_DONTCACHE, we wait for them to be freed:
+ */
+
+ darray_init(&grabbed);
+ darray_make_room(&grabbed, 1024);
+again:
+ cond_resched();
+ this_pass_clean = true;
+
+ mutex_lock(&c->vfs_inodes_lock);
+ list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
+ if (!snapshot_list_has_id(s, inode->ei_subvol))
+ continue;
+
+ if (!(inode->v.i_state & I_DONTCACHE) &&
+ !(inode->v.i_state & I_FREEING) &&
+ igrab(&inode->v)) {
+ this_pass_clean = false;
+
+ if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
+ iput(&inode->v);
+ break;
+ }
+ } else if (clean_pass && this_pass_clean) {
+ wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&c->vfs_inodes_lock);
+
+ schedule();
+ finish_wait(wq, &wait.wq_entry);
+ goto again;
+ }
+ }
+ mutex_unlock(&c->vfs_inodes_lock);
+
+ darray_for_each(grabbed, i) {
+ inode = *i;
+ d_mark_dontcache(&inode->v);
+ d_prune_aliases(&inode->v);
+ iput(&inode->v);
+ }
+ grabbed.nr = 0;
+
+ if (!clean_pass || !this_pass_clean) {
+ clean_pass = this_pass_clean;
+ goto again;
+ }
+
+ darray_exit(&grabbed);
+}
+
+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+ unsigned shift = sb->s_blocksize_bits - 9;
+ /*
+ * this assumes inodes take up 64 bytes, which is a decent average
+ * number:
+ */
+ u64 avail_inodes = ((usage.capacity - usage.used) << 3);
+ u64 fsid;
+
+ buf->f_type = BCACHEFS_STATFS_MAGIC;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_blocks = usage.capacity >> shift;
+ buf->f_bfree = usage.free >> shift;
+ buf->f_bavail = avail_factor(usage.free) >> shift;
+
+ buf->f_files = usage.nr_inodes + avail_inodes;
+ buf->f_ffree = avail_inodes;
+
+ fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
+ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
+ buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+ buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+ buf->f_namelen = BCH_NAME_MAX;
+
+ return 0;
+}
+
+static int bch2_sync_fs(struct super_block *sb, int wait)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ int ret;
+
+ if (c->opts.journal_flush_disabled)
+ return 0;
+
+ if (!wait) {
+ bch2_journal_flush_async(&c->journal, NULL);
+ return 0;
+ }
+
+ ret = bch2_journal_flush(&c->journal);
+ return bch2_err_class(ret);
+}
+
+static struct bch_fs *bch2_path_to_fs(const char *path)
+{
+ struct bch_fs *c;
+ dev_t dev;
+ int ret;
+
+ ret = lookup_bdev(path, &dev);
+ if (ret)
+ return ERR_PTR(ret);
+
+ c = bch2_dev_to_fs(dev);
+ if (c)
+ closure_put(&c->cl);
+ return c ?: ERR_PTR(-ENOENT);
+}
+
+static int bch2_remount(struct super_block *sb, int *flags, char *data)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_opts opts = bch2_opts_empty();
+ int ret;
+
+ ret = bch2_parse_mount_opts(c, &opts, data);
+ if (ret)
+ goto err;
+
+ opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
+
+ if (opts.read_only != c->opts.read_only) {
+ down_write(&c->state_lock);
+
+ if (opts.read_only) {
+ bch2_fs_read_only(c);
+
+ sb->s_flags |= SB_RDONLY;
+ } else {
+ ret = bch2_fs_read_write(c);
+ if (ret) {
+ bch_err(c, "error going rw: %i", ret);
+ up_write(&c->state_lock);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ sb->s_flags &= ~SB_RDONLY;
+ }
+
+ c->opts.read_only = opts.read_only;
+
+ up_write(&c->state_lock);
+ }
+
+ if (opt_defined(opts, errors))
+ c->opts.errors = opts.errors;
+err:
+ return bch2_err_class(ret);
+}
+
+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
+{
+ struct bch_fs *c = root->d_sb->s_fs_info;
+ bool first = true;
+
+ for_each_online_member(c, ca) {
+ if (!first)
+ seq_putc(seq, ':');
+ first = false;
+ seq_puts(seq, ca->disk_sb.sb_name);
+ }
+
+ return 0;
+}
+
+static int bch2_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct bch_fs *c = root->d_sb->s_fs_info;
+ enum bch_opt_id i;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ const struct bch_option *opt = &bch2_opt_table[i];
+ u64 v = bch2_opt_get_by_id(&c->opts, i);
+
+ if (!(opt->flags & OPT_MOUNT))
+ continue;
+
+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
+ continue;
+
+ printbuf_reset(&buf);
+ bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
+ OPT_SHOW_MOUNT_STYLE);
+ seq_putc(seq, ',');
+ seq_puts(seq, buf.buf);
+ }
+
+ if (buf.allocation_failure)
+ ret = -ENOMEM;
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static void bch2_put_super(struct super_block *sb)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ __bch2_fs_stop(c);
+}
+
+/*
+ * bcachefs doesn't currently integrate intwrite freeze protection but the
+ * internal write references serve the same purpose. Therefore reuse the
+ * read-only transition code to perform the quiesce. The caveat is that we don't
+ * currently have the ability to block tasks that want a write reference while
+ * the superblock is frozen. This is fine for now, but we should either add
+ * blocking support or find a way to integrate sb_start_intwrite() and friends.
+ */
+static int bch2_freeze(struct super_block *sb)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ down_write(&c->state_lock);
+ bch2_fs_read_only(c);
+ up_write(&c->state_lock);
+ return 0;
+}
+
+static int bch2_unfreeze(struct super_block *sb)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ int ret;
+
+ if (test_bit(BCH_FS_emergency_ro, &c->flags))
+ return 0;
+
+ down_write(&c->state_lock);
+ ret = bch2_fs_read_write(c);
+ up_write(&c->state_lock);
+ return ret;
+}
+
+static const struct super_operations bch_super_operations = {
+ .alloc_inode = bch2_alloc_inode,
+ .destroy_inode = bch2_destroy_inode,
+ .write_inode = bch2_vfs_write_inode,
+ .evict_inode = bch2_evict_inode,
+ .sync_fs = bch2_sync_fs,
+ .statfs = bch2_statfs,
+ .show_devname = bch2_show_devname,
+ .show_options = bch2_show_options,
+ .remount_fs = bch2_remount,
+ .put_super = bch2_put_super,
+ .freeze_fs = bch2_freeze,
+ .unfreeze_fs = bch2_unfreeze,
+};
+
+static int bch2_set_super(struct super_block *s, void *data)
+{
+ s->s_fs_info = data;
+ return 0;
+}
+
+static int bch2_noset_super(struct super_block *s, void *data)
+{
+ return -EBUSY;
+}
+
+typedef DARRAY(struct bch_fs *) darray_fs;
+
+static int bch2_test_super(struct super_block *s, void *data)
+{
+ struct bch_fs *c = s->s_fs_info;
+ darray_fs *d = data;
+
+ if (!c)
+ return false;
+
+ darray_for_each(*d, i)
+ if (c != *i)
+ return false;
+ return true;
+}
+
+static struct dentry *bch2_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ struct bch_fs *c;
+ struct super_block *sb;
+ struct inode *vinode;
+ struct bch_opts opts = bch2_opts_empty();
+ int ret;
+
+ opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
+
+ ret = bch2_parse_mount_opts(NULL, &opts, data);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!dev_name || strlen(dev_name) == 0)
+ return ERR_PTR(-EINVAL);
+
+ darray_str devs;
+ ret = bch2_split_devs(dev_name, &devs);
+ if (ret)
+ return ERR_PTR(ret);
+
+ darray_fs devs_to_fs = {};
+ darray_for_each(devs, i) {
+ ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
+ if (ret) {
+ sb = ERR_PTR(ret);
+ goto got_sb;
+ }
+ }
+
+ sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs);
+ if (!IS_ERR(sb))
+ goto got_sb;
+
+ c = bch2_fs_open(devs.data, devs.nr, opts);
+ if (IS_ERR(c)) {
+ sb = ERR_CAST(c);
+ goto got_sb;
+ }
+
+ /* Some options can't be parsed until after the fs is started: */
+ ret = bch2_parse_mount_opts(c, &opts, data);
+ if (ret) {
+ bch2_fs_stop(c);
+ sb = ERR_PTR(ret);
+ goto got_sb;
+ }
+
+ bch2_opts_apply(&c->opts, opts);
+
+ sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
+ if (IS_ERR(sb))
+ bch2_fs_stop(c);
+got_sb:
+ darray_exit(&devs_to_fs);
+ bch2_darray_str_exit(&devs);
+
+ if (IS_ERR(sb)) {
+ ret = PTR_ERR(sb);
+ ret = bch2_err_class(ret);
+ return ERR_PTR(ret);
+ }
+
+ c = sb->s_fs_info;
+
+ if (sb->s_root) {
+ if ((flags ^ sb->s_flags) & SB_RDONLY) {
+ ret = -EBUSY;
+ goto err_put_super;
+ }
+ goto out;
+ }
+
+ sb->s_blocksize = block_bytes(c);
+ sb->s_blocksize_bits = ilog2(block_bytes(c));
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_op = &bch_super_operations;
+ sb->s_export_op = &bch_export_ops;
+#ifdef CONFIG_BCACHEFS_QUOTA
+ sb->s_qcop = &bch2_quotactl_operations;
+ sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
+#endif
+ sb->s_xattr = bch2_xattr_handlers;
+ sb->s_magic = BCACHEFS_STATFS_MAGIC;
+ sb->s_time_gran = c->sb.nsec_per_time_unit;
+ sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
+ sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
+ c->vfs_sb = sb;
+ strscpy(sb->s_id, c->name, sizeof(sb->s_id));
+
+ ret = super_setup_bdi(sb);
+ if (ret)
+ goto err_put_super;
+
+ sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
+
+ for_each_online_member(c, ca) {
+ struct block_device *bdev = ca->disk_sb.bdev;
+
+ /* XXX: create an anonymous device for multi device filesystems */
+ sb->s_bdev = bdev;
+ sb->s_dev = bdev->bd_dev;
+ percpu_ref_put(&ca->io_ref);
+ break;
+ }
+
+ c->dev = sb->s_dev;
+
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ if (c->opts.acl)
+ sb->s_flags |= SB_POSIXACL;
+#endif
+
+ sb->s_shrink->seeks = 0;
+
+ vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
+ ret = PTR_ERR_OR_ZERO(vinode);
+ bch_err_msg(c, ret, "mounting: error getting root inode");
+ if (ret)
+ goto err_put_super;
+
+ sb->s_root = d_make_root(vinode);
+ if (!sb->s_root) {
+ bch_err(c, "error mounting: error allocating root dentry");
+ ret = -ENOMEM;
+ goto err_put_super;
+ }
+
+ sb->s_flags |= SB_ACTIVE;
+out:
+ return dget(sb->s_root);
+
+err_put_super:
+ deactivate_locked_super(sb);
+ return ERR_PTR(bch2_err_class(ret));
+}
+
+static void bch2_kill_sb(struct super_block *sb)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ generic_shutdown_super(sb);
+ bch2_fs_free(c);
+}
+
+static struct file_system_type bcache_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "bcachefs",
+ .mount = bch2_mount,
+ .kill_sb = bch2_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+MODULE_ALIAS_FS("bcachefs");
+
+void bch2_vfs_exit(void)
+{
+ unregister_filesystem(&bcache_fs_type);
+ kmem_cache_destroy(bch2_inode_cache);
+}
+
+int __init bch2_vfs_init(void)
+{
+ int ret = -ENOMEM;
+
+ bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
+ if (!bch2_inode_cache)
+ goto err;
+
+ ret = register_filesystem(&bcache_fs_type);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ bch2_vfs_exit();
+ return ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/fs.h b/c_src/libbcachefs/fs.h
new file mode 100644
index 00000000..c3af7225
--- /dev/null
+++ b/c_src/libbcachefs/fs.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FS_H
+#define _BCACHEFS_FS_H
+
+#include "inode.h"
+#include "opts.h"
+#include "str_hash.h"
+#include "quota_types.h"
+#include "two_state_shared_lock.h"
+
+#include <linux/seqlock.h>
+#include <linux/stat.h>
+
+struct bch_inode_info {
+ struct inode v;
+ struct list_head ei_vfs_inode_list;
+ unsigned long ei_flags;
+
+ struct mutex ei_update_lock;
+ u64 ei_quota_reserved;
+ unsigned long ei_last_dirtied;
+ two_state_lock_t ei_pagecache_lock;
+
+ struct mutex ei_quota_lock;
+ struct bch_qid ei_qid;
+
+ u32 ei_subvol;
+
+ /*
+ * When we've been doing nocow writes we'll need to issue flushes to the
+ * underlying block devices
+ *
+ * XXX: a device may have had a flush issued by some other codepath. It
+ * would be better to keep for each device a sequence number that's
+ * incremented when we isusue a cache flush, and track here the sequence
+ * number that needs flushing.
+ */
+ struct bch_devs_mask ei_devs_need_flush;
+
+ /* copy of inode in btree: */
+ struct bch_inode_unpacked ei_inode;
+};
+
+#define bch2_pagecache_add_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 0)
+#define bch2_pagecache_add_tryget(i) bch2_two_state_trylock(&i->ei_pagecache_lock, 0)
+#define bch2_pagecache_add_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 0)
+
+#define bch2_pagecache_block_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 1)
+#define bch2_pagecache_block_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 1)
+
+static inline subvol_inum inode_inum(struct bch_inode_info *inode)
+{
+ return (subvol_inum) {
+ .subvol = inode->ei_subvol,
+ .inum = inode->ei_inode.bi_inum,
+ };
+}
+
+/*
+ * Set if we've gotten a btree error for this inode, and thus the vfs inode and
+ * btree inode may be inconsistent:
+ */
+#define EI_INODE_ERROR 0
+
+/*
+ * Set in the inode is in a snapshot subvolume - we don't do quota accounting in
+ * those:
+ */
+#define EI_INODE_SNAPSHOT 1
+
+#define to_bch_ei(_inode) \
+ container_of_or_null(_inode, struct bch_inode_info, v)
+
+static inline int ptrcmp(void *l, void *r)
+{
+ return cmp_int(l, r);
+}
+
+enum bch_inode_lock_op {
+ INODE_PAGECACHE_BLOCK = (1U << 0),
+ INODE_UPDATE_LOCK = (1U << 1),
+};
+
+#define bch2_lock_inodes(_locks, ...) \
+do { \
+ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
+ unsigned i; \
+ \
+ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
+ \
+ for (i = 1; i < ARRAY_SIZE(a); i++) \
+ if (a[i] != a[i - 1]) { \
+ if ((_locks) & INODE_PAGECACHE_BLOCK) \
+ bch2_pagecache_block_get(a[i]);\
+ if ((_locks) & INODE_UPDATE_LOCK) \
+ mutex_lock_nested(&a[i]->ei_update_lock, i);\
+ } \
+} while (0)
+
+#define bch2_unlock_inodes(_locks, ...) \
+do { \
+ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \
+ unsigned i; \
+ \
+ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \
+ \
+ for (i = 1; i < ARRAY_SIZE(a); i++) \
+ if (a[i] != a[i - 1]) { \
+ if ((_locks) & INODE_PAGECACHE_BLOCK) \
+ bch2_pagecache_block_put(a[i]);\
+ if ((_locks) & INODE_UPDATE_LOCK) \
+ mutex_unlock(&a[i]->ei_update_lock); \
+ } \
+} while (0)
+
+static inline struct bch_inode_info *file_bch_inode(struct file *file)
+{
+ return to_bch_ei(file_inode(file));
+}
+
+static inline bool inode_attr_changing(struct bch_inode_info *dir,
+ struct bch_inode_info *inode,
+ enum inode_opt_id id)
+{
+ return !(inode->ei_inode.bi_fields_set & (1 << id)) &&
+ bch2_inode_opt_get(&dir->ei_inode, id) !=
+ bch2_inode_opt_get(&inode->ei_inode, id);
+}
+
+static inline bool inode_attrs_changing(struct bch_inode_info *dir,
+ struct bch_inode_info *inode)
+{
+ unsigned id;
+
+ for (id = 0; id < Inode_opt_nr; id++)
+ if (inode_attr_changing(dir, inode, id))
+ return true;
+
+ return false;
+}
+
+struct bch_inode_unpacked;
+
+#ifndef NO_BCACHEFS_FS
+
+struct bch_inode_info *
+__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
+ struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
+
+int bch2_fs_quota_transfer(struct bch_fs *,
+ struct bch_inode_info *,
+ struct bch_qid,
+ unsigned,
+ enum quota_acct_mode);
+
+static inline int bch2_set_projid(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ u32 projid)
+{
+ struct bch_qid qid = inode->ei_qid;
+
+ qid.q[QTYP_PRJ] = projid;
+
+ return bch2_fs_quota_transfer(c, inode, qid,
+ 1 << QTYP_PRJ,
+ KEY_TYPE_QUOTA_PREALLOC);
+}
+
+struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum);
+
+/* returns 0 if we want to do the update, or error is passed up */
+typedef int (*inode_set_fn)(struct btree_trans *,
+ struct bch_inode_info *,
+ struct bch_inode_unpacked *, void *);
+
+void bch2_inode_update_after_write(struct btree_trans *,
+ struct bch_inode_info *,
+ struct bch_inode_unpacked *,
+ unsigned);
+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
+ inode_set_fn, void *, unsigned);
+
+int bch2_setattr_nonsize(struct mnt_idmap *,
+ struct bch_inode_info *,
+ struct iattr *);
+int __bch2_unlink(struct inode *, struct dentry *, bool);
+
+void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *);
+
+void bch2_vfs_exit(void);
+int bch2_vfs_init(void);
+
+#else
+
+#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
+
+static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
+ snapshot_id_list *s) {}
+static inline void bch2_vfs_exit(void) {}
+static inline int bch2_vfs_init(void) { return 0; }
+
+#endif /* NO_BCACHEFS_FS */
+
+#endif /* _BCACHEFS_FS_H */
diff --git a/c_src/libbcachefs/fsck.c b/c_src/libbcachefs/fsck.c
new file mode 100644
index 00000000..4f0ecd60
--- /dev/null
+++ b/c_src/libbcachefs/fsck.c
@@ -0,0 +1,2394 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_cache.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "darray.h"
+#include "dirent.h"
+#include "error.h"
+#include "fs-common.h"
+#include "fsck.h"
+#include "inode.h"
+#include "keylist.h"
+#include "recovery.h"
+#include "snapshot.h"
+#include "super.h"
+#include "xattr.h"
+
+#include <linux/bsearch.h>
+#include <linux/dcache.h> /* struct qstr */
+
+/*
+ * XXX: this is handling transaction restarts without returning
+ * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
+ */
+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
+ u32 snapshot)
+{
+ u64 sectors = 0;
+
+ int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
+ SPOS(inum, 0, snapshot),
+ POS(inum, U64_MAX),
+ 0, k, ({
+ if (bkey_extent_is_allocation(k.k))
+ sectors += k.k->size;
+ 0;
+ }));
+
+ return ret ?: sectors;
+}
+
+static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
+ u32 snapshot)
+{
+ u64 subdirs = 0;
+
+ int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
+ SPOS(inum, 0, snapshot),
+ POS(inum, U64_MAX),
+ 0, k, ({
+ if (k.k->type == KEY_TYPE_dirent &&
+ bkey_s_c_to_dirent(k).v->d_type == DT_DIR)
+ subdirs++;
+ 0;
+ }));
+
+ return ret ?: subdirs;
+}
+
+static int subvol_lookup(struct btree_trans *trans, u32 subvol,
+ u32 *snapshot, u64 *inum)
+{
+ struct bch_subvolume s;
+ int ret;
+
+ ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
+
+ *snapshot = le32_to_cpu(s.snapshot);
+ *inum = le64_to_cpu(s.inode);
+ return ret;
+}
+
+static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
+ POS(0, inode_nr),
+ BTREE_ITER_ALL_SNAPSHOTS);
+ k = bch2_btree_iter_peek(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
+ ret = -BCH_ERR_ENOENT_inode;
+ goto err;
+ }
+
+ ret = bch2_inode_unpack(k, inode);
+err:
+ bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
+ struct bch_inode_unpacked *inode,
+ u32 *snapshot)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, inode_nr, *snapshot), 0);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ ret = bkey_is_inode(k.k)
+ ? bch2_inode_unpack(k, inode)
+ : -BCH_ERR_ENOENT_inode;
+ if (!ret)
+ *snapshot = iter.pos.snapshot;
+err:
+ bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int __lookup_dirent(struct btree_trans *trans,
+ struct bch_hash_info hash_info,
+ subvol_inum dir, struct qstr *name,
+ u64 *target, unsigned *type)
+{
+ struct btree_iter iter;
+ struct bkey_s_c_dirent d;
+ int ret;
+
+ ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc,
+ &hash_info, dir, name, 0);
+ if (ret)
+ return ret;
+
+ d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter));
+ *target = le64_to_cpu(d.v->d_inum);
+ *type = d.v->d_type;
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+}
+
+static int __write_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
+{
+ struct bkey_inode_buf *inode_p =
+ bch2_trans_kmalloc(trans, sizeof(*inode_p));
+
+ if (IS_ERR(inode_p))
+ return PTR_ERR(inode_p);
+
+ bch2_inode_pack(inode_p, inode);
+ inode_p->inode.k.p.snapshot = snapshot;
+
+ return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
+ &inode_p->inode.k_i,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+}
+
+static int fsck_write_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
+{
+ int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __write_inode(trans, inode, snapshot));
+ bch_err_fn(trans->c, ret);
+ return ret;
+}
+
+static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bch_inode_unpacked dir_inode;
+ struct bch_hash_info dir_hash_info;
+ int ret;
+
+ ret = lookup_first_inode(trans, pos.inode, &dir_inode);
+ if (ret)
+ goto err;
+
+ dir_hash_info = bch2_hash_info_init(c, &dir_inode);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
+
+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+ &dir_hash_info, &iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/* Get lost+found, create if it doesn't exist: */
+static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
+ struct bch_inode_unpacked *lostfound)
+{
+ struct bch_fs *c = trans->c;
+ struct qstr lostfound_str = QSTR("lost+found");
+ u64 inum = 0;
+ unsigned d_type = 0;
+ int ret;
+
+ struct bch_snapshot_tree st;
+ ret = bch2_snapshot_tree_lookup(trans,
+ bch2_snapshot_tree(c, snapshot), &st);
+ if (ret)
+ return ret;
+
+ subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
+ u32 subvol_snapshot;
+
+ ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol),
+ &subvol_snapshot, &root_inum.inum);
+ bch_err_msg(c, ret, "looking up root subvol");
+ if (ret)
+ return ret;
+
+ struct bch_inode_unpacked root_inode;
+ struct bch_hash_info root_hash_info;
+ ret = lookup_inode(trans, root_inum.inum, &root_inode, &snapshot);
+ bch_err_msg(c, ret, "looking up root inode");
+ if (ret)
+ return ret;
+
+ root_hash_info = bch2_hash_info_init(c, &root_inode);
+
+ ret = __lookup_dirent(trans, root_hash_info, root_inum,
+ &lostfound_str, &inum, &d_type);
+ if (bch2_err_matches(ret, ENOENT))
+ goto create_lostfound;
+
+ bch_err_fn(c, ret);
+ if (ret)
+ return ret;
+
+ if (d_type != DT_DIR) {
+ bch_err(c, "error looking up lost+found: not a directory");
+ return -BCH_ERR_ENOENT_not_directory;
+ }
+
+ /*
+ * The bch2_check_dirents pass has already run, dangling dirents
+ * shouldn't exist here:
+ */
+ return lookup_inode(trans, inum, lostfound, &snapshot);
+
+create_lostfound:
+ /*
+ * XXX: we could have a nicer log message here if we had a nice way to
+ * walk backpointers to print a path
+ */
+ bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot));
+
+ u64 now = bch2_current_time(c);
+ struct btree_iter lostfound_iter = { NULL };
+ u64 cpu = raw_smp_processor_id();
+
+ bch2_inode_init_early(c, lostfound);
+ bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode);
+ lostfound->bi_dir = root_inode.bi_inum;
+
+ root_inode.bi_nlink++;
+
+ ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot);
+ ret = bch2_btree_iter_traverse(&lostfound_iter);
+ if (ret)
+ goto err;
+
+ ret = bch2_dirent_create_snapshot(trans,
+ root_inode.bi_inum, snapshot, &root_hash_info,
+ mode_to_type(lostfound->bi_mode),
+ &lostfound_str,
+ lostfound->bi_inum,
+ &lostfound->bi_dir_offset,
+ BCH_HASH_SET_MUST_CREATE) ?:
+ bch2_inode_write_flags(trans, &lostfound_iter, lostfound,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+err:
+ bch_err_msg(c, ret, "creating lost+found");
+ bch2_trans_iter_exit(trans, &lostfound_iter);
+ return ret;
+}
+
+static int reattach_inode(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode,
+ u32 inode_snapshot)
+{
+ struct bch_hash_info dir_hash;
+ struct bch_inode_unpacked lostfound;
+ char name_buf[20];
+ struct qstr name;
+ u64 dir_offset = 0;
+ int ret;
+
+ ret = lookup_lostfound(trans, inode_snapshot, &lostfound);
+ if (ret)
+ return ret;
+
+ if (S_ISDIR(inode->bi_mode)) {
+ lostfound.bi_nlink++;
+
+ ret = __write_inode(trans, &lostfound, U32_MAX);
+ if (ret)
+ return ret;
+ }
+
+ dir_hash = bch2_hash_info_init(trans->c, &lostfound);
+
+ snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
+ name = (struct qstr) QSTR(name_buf);
+
+ ret = bch2_dirent_create_snapshot(trans,
+ lostfound.bi_inum, inode_snapshot,
+ &dir_hash,
+ inode_d_type(inode),
+ &name, inode->bi_inum, &dir_offset,
+ BCH_HASH_SET_MUST_CREATE);
+ if (ret)
+ return ret;
+
+ inode->bi_dir = lostfound.bi_inum;
+ inode->bi_dir_offset = dir_offset;
+
+ return __write_inode(trans, inode, inode_snapshot);
+}
+
+static int remove_backpointer(struct btree_trans *trans,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ struct bkey_s_c_dirent d;
+ int ret;
+
+ d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
+ POS(inode->bi_dir, inode->bi_dir_offset), 0,
+ dirent);
+ ret = bkey_err(d) ?:
+ __remove_dirent(trans, d.k->p);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+struct snapshots_seen_entry {
+ u32 id;
+ u32 equiv;
+};
+
+struct snapshots_seen {
+ struct bpos pos;
+ DARRAY(struct snapshots_seen_entry) ids;
+};
+
+static inline void snapshots_seen_exit(struct snapshots_seen *s)
+{
+ darray_exit(&s->ids);
+}
+
+static inline void snapshots_seen_init(struct snapshots_seen *s)
+{
+ memset(s, 0, sizeof(*s));
+}
+
+static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
+{
+ struct snapshots_seen_entry *i, n = {
+ .id = id,
+ .equiv = bch2_snapshot_equiv(c, id),
+ };
+ int ret = 0;
+
+ __darray_for_each(s->ids, i) {
+ if (i->id == id)
+ return 0;
+ if (i->id > id)
+ break;
+ }
+
+ ret = darray_insert_item(&s->ids, i - s->ids.data, n);
+ if (ret)
+ bch_err(c, "error reallocating snapshots_seen table (size %zu)",
+ s->ids.size);
+ return ret;
+}
+
+static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
+ enum btree_id btree_id, struct bpos pos)
+{
+ struct snapshots_seen_entry n = {
+ .id = pos.snapshot,
+ .equiv = bch2_snapshot_equiv(c, pos.snapshot),
+ };
+ int ret = 0;
+
+ if (!bkey_eq(s->pos, pos))
+ s->ids.nr = 0;
+
+ s->pos = pos;
+ s->pos.snapshot = n.equiv;
+
+ darray_for_each(s->ids, i) {
+ if (i->id == n.id)
+ return 0;
+
+ /*
+ * We currently don't rigorously track for snapshot cleanup
+ * needing to be run, so it shouldn't be a fsck error yet:
+ */
+ if (i->equiv == n.equiv) {
+ bch_err(c, "snapshot deletion did not finish:\n"
+ " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
+ bch2_btree_id_str(btree_id),
+ pos.inode, pos.offset,
+ i->id, n.id, n.equiv);
+ set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
+ return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots);
+ }
+ }
+
+ ret = darray_push(&s->ids, n);
+ if (ret)
+ bch_err(c, "error reallocating snapshots_seen table (size %zu)",
+ s->ids.size);
+ return ret;
+}
+
+/**
+ * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
+ * and @ancestor hasn't been overwritten in @seen
+ *
+ * @c: filesystem handle
+ * @seen: list of snapshot ids already seen at current position
+ * @id: descendent snapshot id
+ * @ancestor: ancestor snapshot id
+ *
+ * Returns: whether key in @ancestor snapshot is visible in @id snapshot
+ */
+static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
+ u32 id, u32 ancestor)
+{
+ ssize_t i;
+
+ EBUG_ON(id > ancestor);
+ EBUG_ON(!bch2_snapshot_is_equiv(c, id));
+ EBUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
+
+ /* @ancestor should be the snapshot most recently added to @seen */
+ EBUG_ON(ancestor != seen->pos.snapshot);
+ EBUG_ON(ancestor != seen->ids.data[seen->ids.nr - 1].equiv);
+
+ if (id == ancestor)
+ return true;
+
+ if (!bch2_snapshot_is_ancestor(c, id, ancestor))
+ return false;
+
+ /*
+ * We know that @id is a descendant of @ancestor, we're checking if
+ * we've seen a key that overwrote @ancestor - i.e. also a descendent of
+ * @ascestor and with @id as a descendent.
+ *
+ * But we already know that we're scanning IDs between @id and @ancestor
+ * numerically, since snapshot ID lists are kept sorted, so if we find
+ * an id that's an ancestor of @id we're done:
+ */
+
+ for (i = seen->ids.nr - 2;
+ i >= 0 && seen->ids.data[i].equiv >= id;
+ --i)
+ if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv))
+ return false;
+
+ return true;
+}
+
+/**
+ * ref_visible - given a key with snapshot id @src that points to a key with
+ * snapshot id @dst, test whether there is some snapshot in which @dst is
+ * visible.
+ *
+ * @c: filesystem handle
+ * @s: list of snapshot IDs already seen at @src
+ * @src: snapshot ID of src key
+ * @dst: snapshot ID of dst key
+ * Returns: true if there is some snapshot in which @dst is visible
+ *
+ * Assumes we're visiting @src keys in natural key order
+ */
+static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s,
+ u32 src, u32 dst)
+{
+ return dst <= src
+ ? key_visible_in_snapshot(c, s, dst, src)
+ : bch2_snapshot_is_ancestor(c, src, dst);
+}
+
+static int ref_visible2(struct bch_fs *c,
+ u32 src, struct snapshots_seen *src_seen,
+ u32 dst, struct snapshots_seen *dst_seen)
+{
+ src = bch2_snapshot_equiv(c, src);
+ dst = bch2_snapshot_equiv(c, dst);
+
+ if (dst > src) {
+ swap(dst, src);
+ swap(dst_seen, src_seen);
+ }
+ return key_visible_in_snapshot(c, src_seen, dst, src);
+}
+
+#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
+ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
+ (_i)->snapshot <= (_snapshot); _i++) \
+ if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
+
+struct inode_walker_entry {
+ struct bch_inode_unpacked inode;
+ u32 snapshot;
+ bool seen_this_pos;
+ u64 count;
+};
+
+struct inode_walker {
+ bool first_this_inode;
+ bool recalculate_sums;
+ struct bpos last_pos;
+
+ DARRAY(struct inode_walker_entry) inodes;
+};
+
+static void inode_walker_exit(struct inode_walker *w)
+{
+ darray_exit(&w->inodes);
+}
+
+static struct inode_walker inode_walker_init(void)
+{
+ return (struct inode_walker) { 0, };
+}
+
+static int add_inode(struct bch_fs *c, struct inode_walker *w,
+ struct bkey_s_c inode)
+{
+ struct bch_inode_unpacked u;
+
+ BUG_ON(bch2_inode_unpack(inode, &u));
+
+ return darray_push(&w->inodes, ((struct inode_walker_entry) {
+ .inode = u,
+ .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot),
+ }));
+}
+
+static int get_inodes_all_snapshots(struct btree_trans *trans,
+ struct inode_walker *w, u64 inum)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ w->recalculate_sums = false;
+ w->inodes.nr = 0;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ if (k.k->p.offset != inum)
+ break;
+
+ if (bkey_is_inode(k.k))
+ add_inode(c, w, k);
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ return ret;
+
+ w->first_this_inode = true;
+ return 0;
+}
+
+static struct inode_walker_entry *
+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
+ u32 snapshot, bool is_whiteout)
+{
+ struct inode_walker_entry *i;
+
+ snapshot = bch2_snapshot_equiv(c, snapshot);
+
+ __darray_for_each(w->inodes, i)
+ if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
+ goto found;
+
+ return NULL;
+found:
+ BUG_ON(snapshot > i->snapshot);
+
+ if (snapshot != i->snapshot && !is_whiteout) {
+ struct inode_walker_entry new = *i;
+ size_t pos;
+ int ret;
+
+ new.snapshot = snapshot;
+ new.count = 0;
+
+ bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
+ w->last_pos.inode, snapshot, i->snapshot);
+
+ while (i > w->inodes.data && i[-1].snapshot > snapshot)
+ --i;
+
+ pos = i - w->inodes.data;
+ ret = darray_insert_item(&w->inodes, pos, new);
+ if (ret)
+ return ERR_PTR(ret);
+
+ i = w->inodes.data + pos;
+ }
+
+ return i;
+}
+
+static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
+ struct inode_walker *w, struct bpos pos,
+ bool is_whiteout)
+{
+ if (w->last_pos.inode != pos.inode) {
+ int ret = get_inodes_all_snapshots(trans, w, pos.inode);
+ if (ret)
+ return ERR_PTR(ret);
+ } else if (bkey_cmp(w->last_pos, pos)) {
+ darray_for_each(w->inodes, i)
+ i->seen_this_pos = false;
+ }
+
+ w->last_pos = pos;
+
+ return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
+}
+
+static int __get_visible_inodes(struct btree_trans *trans,
+ struct inode_walker *w,
+ struct snapshots_seen *s,
+ u64 inum)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ w->inodes.nr = 0;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+ if (k.k->p.offset != inum)
+ break;
+
+ if (!ref_visible(c, s, s->pos.snapshot, equiv))
+ continue;
+
+ if (bkey_is_inode(k.k))
+ add_inode(c, w, k);
+
+ if (equiv >= s->pos.snapshot)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+static int check_key_has_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
+ bkey_in_missing_snapshot,
+ "key in missing snapshot: %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1;
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int hash_redo_key(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ struct bch_hash_info *hash_info,
+ struct btree_iter *k_iter, struct bkey_s_c k)
+{
+ struct bkey_i *delete;
+ struct bkey_i *tmp;
+
+ delete = bch2_trans_kmalloc(trans, sizeof(*delete));
+ if (IS_ERR(delete))
+ return PTR_ERR(delete);
+
+ tmp = bch2_bkey_make_mut_noupdate(trans, k);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+
+ bkey_init(&delete->k);
+ delete->k.p = k_iter->pos;
+ return bch2_btree_iter_traverse(k_iter) ?:
+ bch2_trans_update(trans, k_iter, delete, 0) ?:
+ bch2_hash_set_snapshot(trans, desc, hash_info,
+ (subvol_inum) { 0, k.k->p.inode },
+ k.k->p.snapshot, tmp,
+ BCH_HASH_SET_MUST_CREATE,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+}
+
+static int hash_check_key(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ struct bch_hash_info *hash_info,
+ struct btree_iter *k_iter, struct bkey_s_c hash_k)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter = { NULL };
+ struct printbuf buf = PRINTBUF;
+ struct bkey_s_c k;
+ u64 hash;
+ int ret = 0;
+
+ if (hash_k.k->type != desc.key_type)
+ return 0;
+
+ hash = desc.hash_bkey(hash_info, hash_k);
+
+ if (likely(hash == hash_k.k->p.offset))
+ return 0;
+
+ if (hash_k.k->p.offset < hash)
+ goto bad_hash;
+
+ for_each_btree_key_norestart(trans, iter, desc.btree_id,
+ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
+ BTREE_ITER_SLOTS, k, ret) {
+ if (bkey_eq(k.k->p, hash_k.k->p))
+ break;
+
+ if (fsck_err_on(k.k->type == desc.key_type &&
+ !desc.cmp_bkey(k, hash_k), c,
+ hash_table_key_duplicate,
+ "duplicate hash table keys:\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, hash_k),
+ buf.buf))) {
+ ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
+ break;
+ }
+
+ if (bkey_deleted(k.k)) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto bad_hash;
+ }
+ }
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ret;
+bad_hash:
+ if (fsck_err(c, hash_table_key_wrong_offset,
+ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
+ bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
+ ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
+ bch_err_fn(c, ret);
+ if (ret)
+ return ret;
+ ret = -BCH_ERR_transaction_restart_nested;
+ }
+fsck_err:
+ goto out;
+}
+
+static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ bch2_trans_iter_exit(trans, &iter);
+ return k.k->type == KEY_TYPE_set;
+}
+
+static int check_inode(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bch_inode_unpacked *prev,
+ struct snapshots_seen *s,
+ bool full)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked u;
+ bool do_update = false;
+ int ret;
+
+ ret = check_key_has_snapshot(trans, iter, k);
+ if (ret < 0)
+ goto err;
+ if (ret)
+ return 0;
+
+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
+ if (ret)
+ goto err;
+
+ if (!bkey_is_inode(k.k))
+ return 0;
+
+ BUG_ON(bch2_inode_unpack(k, &u));
+
+ if (!full &&
+ !(u.bi_flags & (BCH_INODE_i_size_dirty|
+ BCH_INODE_i_sectors_dirty|
+ BCH_INODE_unlinked)))
+ return 0;
+
+ if (prev->bi_inum != u.bi_inum)
+ *prev = u;
+
+ if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
+ inode_d_type(prev) != inode_d_type(&u),
+ c, inode_snapshot_mismatch,
+ "inodes in different snapshots don't match")) {
+ bch_err(c, "repair not implemented yet");
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
+ if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) &&
+ bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
+ struct bpos new_min_pos;
+
+ ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos);
+ if (ret)
+ goto err;
+
+ u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
+
+ ret = __write_inode(trans, &u, iter->pos.snapshot);
+ bch_err_msg(c, ret, "in fsck updating inode");
+ if (ret)
+ return ret;
+
+ if (!bpos_eq(new_min_pos, POS_MIN))
+ bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos));
+ return 0;
+ }
+
+ if (u.bi_flags & BCH_INODE_unlinked) {
+ ret = check_inode_deleted_list(trans, k.k->p);
+ if (ret < 0)
+ return ret;
+
+ fsck_err_on(ret, c, unlinked_inode_not_on_deleted_list,
+ "inode %llu:%u unlinked, but not on deleted list",
+ u.bi_inum, k.k->p.snapshot);
+ ret = 0;
+ }
+
+ if (u.bi_flags & BCH_INODE_unlinked &&
+ (!c->sb.clean ||
+ fsck_err(c, inode_unlinked_but_clean,
+ "filesystem marked clean, but inode %llu unlinked",
+ u.bi_inum))) {
+ ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
+ bch_err_msg(c, ret, "in fsck deleting inode");
+ return ret;
+ }
+
+ if (u.bi_flags & BCH_INODE_i_size_dirty &&
+ (!c->sb.clean ||
+ fsck_err(c, inode_i_size_dirty_but_clean,
+ "filesystem marked clean, but inode %llu has i_size dirty",
+ u.bi_inum))) {
+ bch_verbose(c, "truncating inode %llu", u.bi_inum);
+
+ /*
+ * XXX: need to truncate partial blocks too here - or ideally
+ * just switch units to bytes and that issue goes away
+ */
+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
+ SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9,
+ iter->pos.snapshot),
+ POS(u.bi_inum, U64_MAX),
+ 0, NULL);
+ bch_err_msg(c, ret, "in fsck truncating inode");
+ if (ret)
+ return ret;
+
+ /*
+ * We truncated without our normal sector accounting hook, just
+ * make sure we recalculate it:
+ */
+ u.bi_flags |= BCH_INODE_i_sectors_dirty;
+
+ u.bi_flags &= ~BCH_INODE_i_size_dirty;
+ do_update = true;
+ }
+
+ if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
+ (!c->sb.clean ||
+ fsck_err(c, inode_i_sectors_dirty_but_clean,
+ "filesystem marked clean, but inode %llu has i_sectors dirty",
+ u.bi_inum))) {
+ s64 sectors;
+
+ bch_verbose(c, "recounting sectors for inode %llu",
+ u.bi_inum);
+
+ sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
+ if (sectors < 0) {
+ bch_err_msg(c, sectors, "in fsck recounting inode sectors");
+ return sectors;
+ }
+
+ u.bi_sectors = sectors;
+ u.bi_flags &= ~BCH_INODE_i_sectors_dirty;
+ do_update = true;
+ }
+
+ if (u.bi_flags & BCH_INODE_backptr_untrusted) {
+ u.bi_dir = 0;
+ u.bi_dir_offset = 0;
+ u.bi_flags &= ~BCH_INODE_backptr_untrusted;
+ do_update = true;
+ }
+
+ if (do_update) {
+ ret = __write_inode(trans, &u, iter->pos.snapshot);
+ bch_err_msg(c, ret, "in fsck updating inode");
+ if (ret)
+ return ret;
+ }
+err:
+fsck_err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_check_inodes(struct bch_fs *c)
+{
+ bool full = c->opts.fsck;
+ struct bch_inode_unpacked prev = { 0 };
+ struct snapshots_seen s;
+
+ snapshots_seen_init(&s);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
+ POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_inode(trans, &iter, k, &prev, &s, full)));
+
+ snapshots_seen_exit(&s);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos pos)
+{
+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
+}
+
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+ struct bkey_s_c_dirent d)
+{
+ return inode->bi_dir == d.k->p.inode &&
+ inode->bi_dir_offset == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *inode)
+{
+ return d.v->d_type == DT_SUBVOL
+ ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
+ : le64_to_cpu(d.v->d_inum) == inode->bi_inum;
+}
+
+static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+ s64 count2;
+
+ darray_for_each(w->inodes, i) {
+ if (i->inode.bi_sectors == i->count)
+ continue;
+
+ count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot);
+
+ if (w->recalculate_sums)
+ i->count = count2;
+
+ if (i->count != count2) {
+ bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
+ return -BCH_ERR_internal_fsck_err;
+ }
+
+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
+ c, inode_i_sectors_wrong,
+ "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
+ w->last_pos.inode, i->snapshot,
+ i->inode.bi_sectors, i->count)) {
+ i->inode.bi_sectors = i->count;
+ ret = fsck_write_inode(trans, &i->inode, i->snapshot);
+ if (ret)
+ break;
+ }
+ }
+fsck_err:
+ bch_err_fn(c, ret);
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
+struct extent_end {
+ u32 snapshot;
+ u64 offset;
+ struct snapshots_seen seen;
+};
+
+struct extent_ends {
+ struct bpos last_pos;
+ DARRAY(struct extent_end) e;
+};
+
+static void extent_ends_reset(struct extent_ends *extent_ends)
+{
+ darray_for_each(extent_ends->e, i)
+ snapshots_seen_exit(&i->seen);
+ extent_ends->e.nr = 0;
+}
+
+static void extent_ends_exit(struct extent_ends *extent_ends)
+{
+ extent_ends_reset(extent_ends);
+ darray_exit(&extent_ends->e);
+}
+
+static void extent_ends_init(struct extent_ends *extent_ends)
+{
+ memset(extent_ends, 0, sizeof(*extent_ends));
+}
+
+static int extent_ends_at(struct bch_fs *c,
+ struct extent_ends *extent_ends,
+ struct snapshots_seen *seen,
+ struct bkey_s_c k)
+{
+ struct extent_end *i, n = (struct extent_end) {
+ .offset = k.k->p.offset,
+ .snapshot = k.k->p.snapshot,
+ .seen = *seen,
+ };
+
+ n.seen.ids.data = kmemdup(seen->ids.data,
+ sizeof(seen->ids.data[0]) * seen->ids.size,
+ GFP_KERNEL);
+ if (!n.seen.ids.data)
+ return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
+
+ __darray_for_each(extent_ends->e, i) {
+ if (i->snapshot == k.k->p.snapshot) {
+ snapshots_seen_exit(&i->seen);
+ *i = n;
+ return 0;
+ }
+
+ if (i->snapshot >= k.k->p.snapshot)
+ break;
+ }
+
+ return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n);
+}
+
+static int overlapping_extents_found(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bpos pos1, struct snapshots_seen *pos1_seen,
+ struct bkey pos2,
+ bool *fixed,
+ struct extent_end *extent_end)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter iter1, iter2 = { NULL };
+ struct bkey_s_c k1, k2;
+ int ret;
+
+ BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
+
+ bch2_trans_iter_init(trans, &iter1, btree, pos1,
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_NOT_EXTENTS);
+ k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX));
+ ret = bkey_err(k1);
+ if (ret)
+ goto err;
+
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k1);
+
+ if (!bpos_eq(pos1, k1.k->p)) {
+ prt_str(&buf, "\n wanted\n ");
+ bch2_bpos_to_text(&buf, pos1);
+ prt_str(&buf, "\n ");
+ bch2_bkey_to_text(&buf, &pos2);
+
+ bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
+ __func__, buf.buf);
+ ret = -BCH_ERR_internal_fsck_err;
+ goto err;
+ }
+
+ bch2_trans_copy_iter(&iter2, &iter1);
+
+ while (1) {
+ bch2_btree_iter_advance(&iter2);
+
+ k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX));
+ ret = bkey_err(k2);
+ if (ret)
+ goto err;
+
+ if (bpos_ge(k2.k->p, pos2.p))
+ break;
+ }
+
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k2);
+
+ if (bpos_gt(k2.k->p, pos2.p) ||
+ pos2.size != k2.k->size) {
+ bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
+ __func__, buf.buf);
+ ret = -BCH_ERR_internal_fsck_err;
+ goto err;
+ }
+
+ prt_printf(&buf, "\n overwriting %s extent",
+ pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
+
+ if (fsck_err(c, extent_overlapping,
+ "overlapping extents%s", buf.buf)) {
+ struct btree_iter *old_iter = &iter1;
+ struct disk_reservation res = { 0 };
+
+ if (pos1.snapshot < pos2.p.snapshot) {
+ old_iter = &iter2;
+ swap(k1, k2);
+ }
+
+ trans->extra_disk_res += bch2_bkey_sectors_compressed(k2);
+
+ ret = bch2_trans_update_extent_overwrite(trans, old_iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
+ k1, k2) ?:
+ bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc);
+ bch2_disk_reservation_put(c, &res);
+
+ if (ret)
+ goto err;
+
+ *fixed = true;
+
+ if (pos1.snapshot == pos2.p.snapshot) {
+ /*
+ * We overwrote the first extent, and did the overwrite
+ * in the same snapshot:
+ */
+ extent_end->offset = bkey_start_offset(&pos2);
+ } else if (pos1.snapshot > pos2.p.snapshot) {
+ /*
+ * We overwrote the first extent in pos2's snapshot:
+ */
+ ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot);
+ } else {
+ /*
+ * We overwrote the second extent - restart
+ * check_extent() from the top:
+ */
+ ret = -BCH_ERR_transaction_restart_nested;
+ }
+ }
+fsck_err:
+err:
+ bch2_trans_iter_exit(trans, &iter2);
+ bch2_trans_iter_exit(trans, &iter1);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int check_overlapping_extents(struct btree_trans *trans,
+ struct snapshots_seen *seen,
+ struct extent_ends *extent_ends,
+ struct bkey_s_c k,
+ u32 equiv,
+ struct btree_iter *iter,
+ bool *fixed)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ /* transaction restart, running again */
+ if (bpos_eq(extent_ends->last_pos, k.k->p))
+ return 0;
+
+ if (extent_ends->last_pos.inode != k.k->p.inode)
+ extent_ends_reset(extent_ends);
+
+ darray_for_each(extent_ends->e, i) {
+ if (i->offset <= bkey_start_offset(k.k))
+ continue;
+
+ if (!ref_visible2(c,
+ k.k->p.snapshot, seen,
+ i->snapshot, &i->seen))
+ continue;
+
+ ret = overlapping_extents_found(trans, iter->btree_id,
+ SPOS(iter->pos.inode,
+ i->offset,
+ i->snapshot),
+ &i->seen,
+ *k.k, fixed, i);
+ if (ret)
+ goto err;
+ }
+
+ ret = extent_ends_at(c, extent_ends, seen, k);
+ if (ret)
+ goto err;
+
+ extent_ends->last_pos = k.k->p;
+err:
+ return ret;
+}
+
+static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *i;
+ unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9;
+
+ bkey_for_each_crc(k.k, ptrs, crc, i)
+ if (crc_is_encoded(crc) &&
+ crc.uncompressed_size > encoded_extent_max_sectors) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ return 0;
+}
+
+static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct inode_walker *inode,
+ struct snapshots_seen *s,
+ struct extent_ends *extent_ends)
+{
+ struct bch_fs *c = trans->c;
+ struct inode_walker_entry *i;
+ struct printbuf buf = PRINTBUF;
+ struct bpos equiv = k.k->p;
+ int ret = 0;
+
+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+ ret = check_key_has_snapshot(trans, iter, k);
+ if (ret) {
+ ret = ret < 0 ? ret : 0;
+ goto out;
+ }
+
+ if (inode->last_pos.inode != k.k->p.inode) {
+ ret = check_i_sectors(trans, inode);
+ if (ret)
+ goto err;
+ }
+
+ i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
+ ret = PTR_ERR_OR_ZERO(i);
+ if (ret)
+ goto err;
+
+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_whiteout) {
+ if (fsck_err_on(!i, c, extent_in_missing_inode,
+ "extent in missing inode:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ goto delete;
+
+ if (fsck_err_on(i &&
+ !S_ISREG(i->inode.bi_mode) &&
+ !S_ISLNK(i->inode.bi_mode),
+ c, extent_in_non_reg_inode,
+ "extent in non regular inode mode %o:\n %s",
+ i->inode.bi_mode,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ goto delete;
+
+ ret = check_overlapping_extents(trans, s, extent_ends, k,
+ equiv.snapshot, iter,
+ &inode->recalculate_sums);
+ if (ret)
+ goto err;
+ }
+
+ /*
+ * Check inodes in reverse order, from oldest snapshots to newest,
+ * starting from the inode that matches this extent's snapshot. If we
+ * didn't have one, iterate over all inodes:
+ */
+ if (!i)
+ i = inode->inodes.data + inode->inodes.nr - 1;
+
+ for (;
+ inode->inodes.data && i >= inode->inodes.data;
+ --i) {
+ if (i->snapshot > equiv.snapshot ||
+ !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot))
+ continue;
+
+ if (k.k->type != KEY_TYPE_whiteout) {
+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
+ k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+ !bkey_extent_is_reservation(k),
+ c, extent_past_end_of_inode,
+ "extent type past end of inode %llu:%u, i_size %llu\n %s",
+ i->inode.bi_inum, i->snapshot, i->inode.bi_size,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ struct btree_iter iter2;
+
+ bch2_trans_copy_iter(&iter2, iter);
+ bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
+ ret = bch2_btree_iter_traverse(&iter2) ?:
+ bch2_btree_delete_at(trans, &iter2,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch2_trans_iter_exit(trans, &iter2);
+ if (ret)
+ goto err;
+
+ iter->k.type = KEY_TYPE_whiteout;
+ }
+
+ if (bkey_extent_is_allocation(k.k))
+ i->count += k.k->size;
+ }
+
+ i->seen_this_pos = true;
+ }
+out:
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+delete:
+ ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ goto out;
+}
+
+/*
+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
+ * that i_size an i_sectors are consistent
+ */
+int bch2_check_extents(struct bch_fs *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct snapshots_seen s;
+ struct extent_ends extent_ends;
+ struct disk_reservation res = { 0 };
+
+ snapshots_seen_init(&s);
+ extent_ends_init(&extent_ends);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_extents,
+ POS(BCACHEFS_ROOT_INO, 0),
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ &res, NULL,
+ BCH_TRANS_COMMIT_no_enospc, ({
+ bch2_disk_reservation_put(c, &res);
+ check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
+ check_extent_overbig(trans, &iter, k);
+ })) ?:
+ check_i_sectors(trans, &w));
+
+ bch2_disk_reservation_put(c, &res);
+ extent_ends_exit(&extent_ends);
+ inode_walker_exit(&w);
+ snapshots_seen_exit(&s);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_check_indirect_extents(struct bch_fs *c)
+{
+ struct disk_reservation res = { 0 };
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
+ POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ &res, NULL,
+ BCH_TRANS_COMMIT_no_enospc, ({
+ bch2_disk_reservation_put(c, &res);
+ check_extent_overbig(trans, &iter, k);
+ })));
+
+ bch2_disk_reservation_put(c, &res);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+ s64 count2;
+
+ darray_for_each(w->inodes, i) {
+ if (i->inode.bi_nlink == i->count)
+ continue;
+
+ count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot);
+ if (count2 < 0)
+ return count2;
+
+ if (i->count != count2) {
+ bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
+ i->count, count2);
+ i->count = count2;
+ if (i->inode.bi_nlink == i->count)
+ continue;
+ }
+
+ if (fsck_err_on(i->inode.bi_nlink != i->count,
+ c, inode_dir_wrong_nlink,
+ "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
+ w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
+ i->inode.bi_nlink = i->count;
+ ret = fsck_write_inode(trans, &i->inode, i->snapshot);
+ if (ret)
+ break;
+ }
+ }
+fsck_err:
+ bch_err_fn(c, ret);
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
+static int check_dirent_target(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *target,
+ u32 target_snapshot)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i_dirent *n;
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter bp_iter = { NULL };
+ int ret = 0;
+
+ if (!target->bi_dir &&
+ !target->bi_dir_offset) {
+ target->bi_dir = d.k->p.inode;
+ target->bi_dir_offset = d.k->p.offset;
+
+ ret = __write_inode(trans, target, target_snapshot);
+ if (ret)
+ goto err;
+ }
+
+ if (!inode_points_to_dirent(target, d)) {
+ struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
+ SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
+ ret = bkey_err(bp_dirent);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ bool backpointer_exists = !ret;
+ ret = 0;
+
+ bch2_bkey_val_to_text(&buf, c, d.s_c);
+ prt_newline(&buf);
+ if (backpointer_exists)
+ bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+
+ if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
+ c, inode_dir_multiple_links,
+ "directory %llu:%u with multiple links\n%s",
+ target->bi_inum, target_snapshot, buf.buf)) {
+ ret = __remove_dirent(trans, d.k->p);
+ goto out;
+ }
+
+ /*
+ * hardlinked file with nlink 0:
+ * We're just adjusting nlink here so check_nlinks() will pick
+ * it up, it ignores inodes with nlink 0
+ */
+ if (fsck_err_on(backpointer_exists && !target->bi_nlink,
+ c, inode_multiple_links_but_nlink_0,
+ "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
+ target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
+ target->bi_nlink++;
+ target->bi_flags &= ~BCH_INODE_unlinked;
+
+ ret = __write_inode(trans, target, target_snapshot);
+ if (ret)
+ goto err;
+ }
+
+ if (fsck_err_on(!backpointer_exists,
+ c, inode_wrong_backpointer,
+ "inode %llu:%u has wrong backpointer:\n"
+ "got %llu:%llu\n"
+ "should be %llu:%llu",
+ target->bi_inum, target_snapshot,
+ target->bi_dir,
+ target->bi_dir_offset,
+ d.k->p.inode,
+ d.k->p.offset)) {
+ target->bi_dir = d.k->p.inode;
+ target->bi_dir_offset = d.k->p.offset;
+
+ ret = __write_inode(trans, target, target_snapshot);
+ if (ret)
+ goto err;
+ }
+ }
+
+ if (fsck_err_on(d.v->d_type != inode_d_type(target),
+ c, dirent_d_type_wrong,
+ "incorrect d_type: got %s, should be %s:\n%s",
+ bch2_d_type_str(d.v->d_type),
+ bch2_d_type_str(inode_d_type(target)),
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+ n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto err;
+
+ bkey_reassemble(&n->k_i, d.s_c);
+ n->v.d_type = inode_d_type(target);
+
+ ret = bch2_trans_update(trans, iter, &n->k_i, 0);
+ if (ret)
+ goto err;
+
+ d = dirent_i_to_s_c(n);
+ }
+
+ if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
+ target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
+ c, dirent_d_parent_subvol_wrong,
+ "dirent has wrong d_parent_subvol field: got %u, should be %u",
+ le32_to_cpu(d.v->d_parent_subvol),
+ target->bi_parent_subvol)) {
+ n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto err;
+
+ bkey_reassemble(&n->k_i, d.s_c);
+ n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+
+ ret = bch2_trans_update(trans, iter, &n->k_i, 0);
+ if (ret)
+ goto err;
+
+ d = dirent_i_to_s_c(n);
+ }
+out:
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &bp_iter);
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bch_hash_info *hash_info,
+ struct inode_walker *dir,
+ struct inode_walker *target,
+ struct snapshots_seen *s)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c_dirent d;
+ struct inode_walker_entry *i;
+ struct printbuf buf = PRINTBUF;
+ struct bpos equiv;
+ int ret = 0;
+
+ ret = check_key_has_snapshot(trans, iter, k);
+ if (ret) {
+ ret = ret < 0 ? ret : 0;
+ goto out;
+ }
+
+ equiv = k.k->p;
+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
+ if (ret)
+ goto err;
+
+ if (k.k->type == KEY_TYPE_whiteout)
+ goto out;
+
+ if (dir->last_pos.inode != k.k->p.inode) {
+ ret = check_subdir_count(trans, dir);
+ if (ret)
+ goto err;
+ }
+
+ BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
+
+ i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
+ ret = PTR_ERR_OR_ZERO(i);
+ if (ret < 0)
+ goto err;
+
+ if (dir->first_this_inode && dir->inodes.nr)
+ *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
+ dir->first_this_inode = false;
+
+ if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
+ "dirent in nonexisting directory:\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ goto out;
+ }
+
+ if (!i)
+ goto out;
+
+ if (fsck_err_on(!S_ISDIR(i->inode.bi_mode),
+ c, dirent_in_non_dir_inode,
+ "dirent in non directory inode type %s:\n%s",
+ bch2_d_type_str(inode_d_type(&i->inode)),
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter, 0);
+ goto out;
+ }
+
+ ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ /* dirent has been deleted */
+ ret = 0;
+ goto out;
+ }
+
+ if (k.k->type != KEY_TYPE_dirent)
+ goto out;
+
+ d = bkey_s_c_to_dirent(k);
+
+ if (d.v->d_type == DT_SUBVOL) {
+ struct bch_inode_unpacked subvol_root;
+ u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
+ u32 target_snapshot;
+ u64 target_inum;
+
+ ret = subvol_lookup(trans, target_subvol,
+ &target_snapshot, &target_inum);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ if (fsck_err_on(ret, c, dirent_to_missing_subvol,
+ "dirent points to missing subvolume %u",
+ le32_to_cpu(d.v->d_child_subvol))) {
+ ret = __remove_dirent(trans, d.k->p);
+ goto err;
+ }
+
+ ret = lookup_inode(trans, target_inum,
+ &subvol_root, &target_snapshot);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ if (fsck_err_on(ret, c, subvol_to_missing_root,
+ "subvolume %u points to missing subvolume root %llu",
+ target_subvol,
+ target_inum)) {
+ bch_err(c, "repair not implemented yet");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
+ c, subvol_root_wrong_bi_subvol,
+ "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
+ target_inum,
+ subvol_root.bi_subvol, target_subvol)) {
+ subvol_root.bi_subvol = target_subvol;
+ ret = __write_inode(trans, &subvol_root, target_snapshot);
+ if (ret)
+ goto err;
+ }
+
+ ret = check_dirent_target(trans, iter, d, &subvol_root,
+ target_snapshot);
+ if (ret)
+ goto err;
+ } else {
+ ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
+ if (ret)
+ goto err;
+
+ if (fsck_err_on(!target->inodes.nr,
+ c, dirent_to_missing_inode,
+ "dirent points to missing inode: (equiv %u)\n%s",
+ equiv.snapshot,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k),
+ buf.buf))) {
+ ret = __remove_dirent(trans, d.k->p);
+ if (ret)
+ goto err;
+ }
+
+ darray_for_each(target->inodes, i) {
+ ret = check_dirent_target(trans, iter, d,
+ &i->inode, i->snapshot);
+ if (ret)
+ goto err;
+ }
+ }
+
+ if (d.v->d_type == DT_DIR)
+ for_each_visible_inode(c, s, dir, equiv.snapshot, i)
+ i->count++;
+
+out:
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/*
+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
+ * validate d_type
+ */
+int bch2_check_dirents(struct bch_fs *c)
+{
+ struct inode_walker dir = inode_walker_init();
+ struct inode_walker target = inode_walker_init();
+ struct snapshots_seen s;
+ struct bch_hash_info hash_info;
+
+ snapshots_seen_init(&s);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_dirents,
+ POS(BCACHEFS_ROOT_INO, 0),
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
+ k,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)));
+
+ snapshots_seen_exit(&s);
+ inode_walker_exit(&dir);
+ inode_walker_exit(&target);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bch_hash_info *hash_info,
+ struct inode_walker *inode)
+{
+ struct bch_fs *c = trans->c;
+ struct inode_walker_entry *i;
+ int ret;
+
+ ret = check_key_has_snapshot(trans, iter, k);
+ if (ret)
+ return ret;
+
+ i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
+ ret = PTR_ERR_OR_ZERO(i);
+ if (ret)
+ return ret;
+
+ if (inode->first_this_inode && inode->inodes.nr)
+ *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
+ inode->first_this_inode = false;
+
+ if (fsck_err_on(!i, c, xattr_in_missing_inode,
+ "xattr for missing inode %llu",
+ k.k->p.inode))
+ return bch2_btree_delete_at(trans, iter, 0);
+
+ if (!i)
+ return 0;
+
+ ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
+fsck_err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/*
+ * Walk xattrs: verify that they all have a corresponding inode
+ */
+int bch2_check_xattrs(struct bch_fs *c)
+{
+ struct inode_walker inode = inode_walker_init();
+ struct bch_hash_info hash_info;
+ int ret = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
+ POS(BCACHEFS_ROOT_INO, 0),
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
+ k,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ check_xattr(trans, &iter, k, &hash_info, &inode)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_root_trans(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked root_inode;
+ u32 snapshot;
+ u64 inum;
+ int ret;
+
+ ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
+ "root subvol missing")) {
+ struct bkey_i_subvolume root_subvol;
+
+ snapshot = U32_MAX;
+ inum = BCACHEFS_ROOT_INO;
+
+ bkey_subvolume_init(&root_subvol.k_i);
+ root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_subvol.v.flags = 0;
+ root_subvol.v.snapshot = cpu_to_le32(snapshot);
+ root_subvol.v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0);
+ bch_err_msg(c, ret, "writing root subvol");
+ if (ret)
+ goto err;
+ }
+
+ ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (mustfix_fsck_err_on(ret, c, root_dir_missing,
+ "root directory missing") ||
+ mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode),
+ c, root_inode_not_dir,
+ "root inode not a directory")) {
+ bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
+ 0, NULL);
+ root_inode.bi_inum = inum;
+
+ ret = __write_inode(trans, &root_inode, snapshot);
+ bch_err_msg(c, ret, "writing root inode");
+ }
+err:
+fsck_err:
+ return ret;
+}
+
+/* Get root directory, create if it doesn't exist: */
+int bch2_check_root(struct bch_fs *c)
+{
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_root_trans(trans));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+struct pathbuf_entry {
+ u64 inum;
+ u32 snapshot;
+};
+
+typedef DARRAY(struct pathbuf_entry) pathbuf;
+
+static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
+{
+ darray_for_each(*p, i)
+ if (i->inum == inum &&
+ i->snapshot == snapshot)
+ return true;
+ return false;
+}
+
+static int path_down(struct bch_fs *c, pathbuf *p,
+ u64 inum, u32 snapshot)
+{
+ int ret = darray_push(p, ((struct pathbuf_entry) {
+ .inum = inum,
+ .snapshot = snapshot,
+ }));
+
+ if (ret)
+ bch_err(c, "fsck: error allocating memory for pathbuf, size %zu",
+ p->size);
+ return ret;
+}
+
+/*
+ * Check that a given inode is reachable from the root:
+ *
+ * XXX: we should also be verifying that inodes are in the right subvolumes
+ */
+static int check_path(struct btree_trans *trans,
+ pathbuf *p,
+ struct bch_inode_unpacked *inode,
+ u32 snapshot)
+{
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ snapshot = bch2_snapshot_equiv(c, snapshot);
+ p->nr = 0;
+
+ while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
+ inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+ struct btree_iter dirent_iter;
+ struct bkey_s_c_dirent d;
+ u32 parent_snapshot = snapshot;
+
+ if (inode->bi_subvol) {
+ u64 inum;
+
+ ret = subvol_lookup(trans, inode->bi_parent_subvol,
+ &parent_snapshot, &inum);
+ if (ret)
+ break;
+ }
+
+ d = dirent_get_by_pos(trans, &dirent_iter,
+ SPOS(inode->bi_dir, inode->bi_dir_offset,
+ parent_snapshot));
+ ret = bkey_err(d.s_c);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ break;
+
+ if (!ret && !dirent_points_to_inode(d, inode)) {
+ bch2_trans_iter_exit(trans, &dirent_iter);
+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
+ }
+
+ if (bch2_err_matches(ret, ENOENT)) {
+ if (fsck_err(c, inode_unreachable,
+ "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
+ inode->bi_inum, snapshot,
+ bch2_d_type_str(inode_d_type(inode)),
+ inode->bi_nlink,
+ inode->bi_dir,
+ inode->bi_dir_offset))
+ ret = reattach_inode(trans, inode, snapshot);
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &dirent_iter);
+
+ if (!S_ISDIR(inode->bi_mode))
+ break;
+
+ ret = path_down(c, p, inode->bi_inum, snapshot);
+ if (ret) {
+ bch_err(c, "memory allocation failure");
+ return ret;
+ }
+
+ snapshot = parent_snapshot;
+
+ ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
+ if (ret) {
+ /* Should have been caught in dirents pass */
+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error looking up parent directory: %i", ret);
+ break;
+ }
+
+ if (path_is_dup(p, inode->bi_inum, snapshot)) {
+ /* XXX print path */
+ bch_err(c, "directory structure loop");
+
+ darray_for_each(*p, i)
+ pr_err("%llu:%u", i->inum, i->snapshot);
+ pr_err("%llu:%u", inode->bi_inum, snapshot);
+
+ if (!fsck_err(c, dir_loop, "directory structure loop"))
+ return 0;
+
+ ret = remove_backpointer(trans, inode);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err_msg(c, ret, "removing dirent");
+ if (ret)
+ break;
+
+ ret = reattach_inode(trans, inode, snapshot);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum);
+ break;
+ }
+ }
+fsck_err:
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/*
+ * Check for unreachable inodes, as well as loops in the directory structure:
+ * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's
+ * unreachable:
+ */
+int bch2_check_directory_structure(struct bch_fs *c)
+{
+ struct bch_inode_unpacked u;
+ pathbuf path = { 0, };
+ int ret;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ BUG_ON(bch2_inode_unpack(k, &u));
+
+ if (u.bi_flags & BCH_INODE_unlinked)
+ continue;
+
+ check_path(trans, &path, &u, iter.pos.snapshot);
+ })));
+ darray_exit(&path);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+struct nlink_table {
+ size_t nr;
+ size_t size;
+
+ struct nlink {
+ u64 inum;
+ u32 snapshot;
+ u32 count;
+ } *d;
+};
+
+static int add_nlink(struct bch_fs *c, struct nlink_table *t,
+ u64 inum, u32 snapshot)
+{
+ if (t->nr == t->size) {
+ size_t new_size = max_t(size_t, 128UL, t->size * 2);
+ void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
+
+ if (!d) {
+ bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
+ new_size);
+ return -BCH_ERR_ENOMEM_fsck_add_nlink;
+ }
+
+ if (t->d)
+ memcpy(d, t->d, t->size * sizeof(t->d[0]));
+ kvfree(t->d);
+
+ t->d = d;
+ t->size = new_size;
+ }
+
+
+ t->d[t->nr++] = (struct nlink) {
+ .inum = inum,
+ .snapshot = snapshot,
+ };
+
+ return 0;
+}
+
+static int nlink_cmp(const void *_l, const void *_r)
+{
+ const struct nlink *l = _l;
+ const struct nlink *r = _r;
+
+ return cmp_int(l->inum, r->inum);
+}
+
+static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
+ struct nlink_table *links,
+ u64 range_start, u64 range_end, u64 inum, u32 snapshot)
+{
+ struct nlink *link, key = {
+ .inum = inum, .snapshot = U32_MAX,
+ };
+
+ if (inum < range_start || inum >= range_end)
+ return;
+
+ link = __inline_bsearch(&key, links->d, links->nr,
+ sizeof(links->d[0]), nlink_cmp);
+ if (!link)
+ return;
+
+ while (link > links->d && link[0].inum == link[-1].inum)
+ --link;
+
+ for (; link < links->d + links->nr && link->inum == inum; link++)
+ if (ref_visible(c, s, snapshot, link->snapshot)) {
+ link->count++;
+ if (link->snapshot >= snapshot)
+ break;
+ }
+}
+
+noinline_for_stack
+static int check_nlinks_find_hardlinks(struct bch_fs *c,
+ struct nlink_table *t,
+ u64 start, u64 *end)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_inodes,
+ POS(0, start),
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ /* Should never fail, checked by bch2_inode_invalid: */
+ struct bch_inode_unpacked u;
+ BUG_ON(bch2_inode_unpack(k, &u));
+
+ /*
+ * Backpointer and directory structure checks are sufficient for
+ * directories, since they can't have hardlinks:
+ */
+ if (S_ISDIR(u.bi_mode))
+ continue;
+
+ if (!u.bi_nlink)
+ continue;
+
+ ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
+ if (ret) {
+ *end = k.k->p.offset;
+ ret = 0;
+ break;
+ }
+ 0;
+ })));
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+noinline_for_stack
+static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
+ u64 range_start, u64 range_end)
+{
+ struct snapshots_seen s;
+
+ snapshots_seen_init(&s);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
+ BTREE_ITER_INTENT|
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
+ if (ret)
+ break;
+
+ if (k.k->type == KEY_TYPE_dirent) {
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+
+ if (d.v->d_type != DT_DIR &&
+ d.v->d_type != DT_SUBVOL)
+ inc_link(c, &s, links, range_start, range_end,
+ le64_to_cpu(d.v->d_inum),
+ bch2_snapshot_equiv(c, d.k->p.snapshot));
+ }
+ 0;
+ })));
+
+ snapshots_seen_exit(&s);
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct nlink_table *links,
+ size_t *idx, u64 range_end)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked u;
+ struct nlink *link = &links->d[*idx];
+ int ret = 0;
+
+ if (k.k->p.offset >= range_end)
+ return 1;
+
+ if (!bkey_is_inode(k.k))
+ return 0;
+
+ BUG_ON(bch2_inode_unpack(k, &u));
+
+ if (S_ISDIR(u.bi_mode))
+ return 0;
+
+ if (!u.bi_nlink)
+ return 0;
+
+ while ((cmp_int(link->inum, k.k->p.offset) ?:
+ cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
+ BUG_ON(*idx == links->nr);
+ link = &links->d[++*idx];
+ }
+
+ if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count,
+ c, inode_wrong_nlink,
+ "inode %llu type %s has wrong i_nlink (%u, should be %u)",
+ u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
+ bch2_inode_nlink_get(&u), link->count)) {
+ bch2_inode_nlink_set(&u, link->count);
+ ret = __write_inode(trans, &u, k.k->p.snapshot);
+ }
+fsck_err:
+ return ret;
+}
+
+noinline_for_stack
+static int check_nlinks_update_hardlinks(struct bch_fs *c,
+ struct nlink_table *links,
+ u64 range_start, u64 range_end)
+{
+ size_t idx = 0;
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
+ POS(0, range_start),
+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
+ if (ret < 0) {
+ bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+int bch2_check_nlinks(struct bch_fs *c)
+{
+ struct nlink_table links = { 0 };
+ u64 this_iter_range_start, next_iter_range_start = 0;
+ int ret = 0;
+
+ do {
+ this_iter_range_start = next_iter_range_start;
+ next_iter_range_start = U64_MAX;
+
+ ret = check_nlinks_find_hardlinks(c, &links,
+ this_iter_range_start,
+ &next_iter_range_start);
+
+ ret = check_nlinks_walk_dirents(c, &links,
+ this_iter_range_start,
+ next_iter_range_start);
+ if (ret)
+ break;
+
+ ret = check_nlinks_update_hardlinks(c, &links,
+ this_iter_range_start,
+ next_iter_range_start);
+ if (ret)
+ break;
+
+ links.nr = 0;
+ } while (next_iter_range_start != U64_MAX);
+
+ kvfree(links.d);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_reflink_p p;
+ struct bkey_i_reflink_p *u;
+
+ if (k.k->type != KEY_TYPE_reflink_p)
+ return 0;
+
+ p = bkey_s_c_to_reflink_p(k);
+
+ if (!p.v->front_pad && !p.v->back_pad)
+ return 0;
+
+ u = bch2_trans_kmalloc(trans, sizeof(*u));
+ int ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
+
+ bkey_reassemble(&u->k_i, k);
+ u->v.front_pad = 0;
+ u->v.back_pad = 0;
+
+ return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN);
+}
+
+int bch2_fix_reflink_p(struct bch_fs *c)
+{
+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
+ return 0;
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_extents, POS_MIN,
+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ fix_reflink_p_key(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
diff --git a/c_src/libbcachefs/fsck.h b/c_src/libbcachefs/fsck.h
new file mode 100644
index 00000000..da991e8c
--- /dev/null
+++ b/c_src/libbcachefs/fsck.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_FSCK_H
+#define _BCACHEFS_FSCK_H
+
+int bch2_check_inodes(struct bch_fs *);
+int bch2_check_extents(struct bch_fs *);
+int bch2_check_indirect_extents(struct bch_fs *);
+int bch2_check_dirents(struct bch_fs *);
+int bch2_check_xattrs(struct bch_fs *);
+int bch2_check_root(struct bch_fs *);
+int bch2_check_directory_structure(struct bch_fs *);
+int bch2_check_nlinks(struct bch_fs *);
+int bch2_fix_reflink_p(struct bch_fs *);
+
+#endif /* _BCACHEFS_FSCK_H */
diff --git a/c_src/libbcachefs/inode.c b/c_src/libbcachefs/inode.c
new file mode 100644
index 00000000..37dce96f
--- /dev/null
+++ b/c_src/libbcachefs/inode.c
@@ -0,0 +1,1184 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_key_cache.h"
+#include "btree_write_buffer.h"
+#include "bkey_methods.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "compress.h"
+#include "dirent.h"
+#include "error.h"
+#include "extents.h"
+#include "extent_update.h"
+#include "inode.h"
+#include "str_hash.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "varint.h"
+
+#include <linux/random.h>
+
+#include <asm/unaligned.h>
+
+#define x(name, ...) #name,
+const char * const bch2_inode_opts[] = {
+ BCH_INODE_OPTS()
+ NULL,
+};
+
+static const char * const bch2_inode_flag_strs[] = {
+ BCH_INODE_FLAGS()
+ NULL
+};
+#undef x
+
+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
+
+static int inode_decode_field(const u8 *in, const u8 *end,
+ u64 out[2], unsigned *out_bits)
+{
+ __be64 be[2] = { 0, 0 };
+ unsigned bytes, shift;
+ u8 *p;
+
+ if (in >= end)
+ return -1;
+
+ if (!*in)
+ return -1;
+
+ /*
+ * position of highest set bit indicates number of bytes:
+ * shift = number of bits to remove in high byte:
+ */
+ shift = 8 - __fls(*in); /* 1 <= shift <= 8 */
+ bytes = byte_table[shift - 1];
+
+ if (in + bytes > end)
+ return -1;
+
+ p = (u8 *) be + 16 - bytes;
+ memcpy(p, in, bytes);
+ *p ^= (1 << 8) >> shift;
+
+ out[0] = be64_to_cpu(be[0]);
+ out[1] = be64_to_cpu(be[1]);
+ *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
+
+ return bytes;
+}
+
+static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
+ const struct bch_inode_unpacked *inode)
+{
+ struct bkey_i_inode_v3 *k = &packed->inode;
+ u8 *out = k->v.fields;
+ u8 *end = (void *) &packed[1];
+ u8 *last_nonzero_field = out;
+ unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
+ unsigned bytes;
+ int ret;
+
+ bkey_inode_v3_init(&packed->inode.k_i);
+ packed->inode.k.p.offset = inode->bi_inum;
+ packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq);
+ packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
+ packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
+ packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors);
+ packed->inode.v.bi_size = cpu_to_le64(inode->bi_size);
+ packed->inode.v.bi_version = cpu_to_le64(inode->bi_version);
+ SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
+ SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
+
+
+#define x(_name, _bits) \
+ nr_fields++; \
+ \
+ if (inode->_name) { \
+ ret = bch2_varint_encode_fast(out, inode->_name); \
+ out += ret; \
+ \
+ if (_bits > 64) \
+ *out++ = 0; \
+ \
+ last_nonzero_field = out; \
+ last_nonzero_fieldnr = nr_fields; \
+ } else { \
+ *out++ = 0; \
+ \
+ if (_bits > 64) \
+ *out++ = 0; \
+ }
+
+ BCH_INODE_FIELDS_v3()
+#undef x
+ BUG_ON(out > end);
+
+ out = last_nonzero_field;
+ nr_fields = last_nonzero_fieldnr;
+
+ bytes = out - (u8 *) &packed->inode.v;
+ set_bkey_val_bytes(&packed->inode.k, bytes);
+ memset_u64s_tail(&packed->inode.v, 0, bytes);
+
+ SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+ struct bch_inode_unpacked unpacked;
+
+ ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked);
+ BUG_ON(ret);
+ BUG_ON(unpacked.bi_inum != inode->bi_inum);
+ BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
+ BUG_ON(unpacked.bi_sectors != inode->bi_sectors);
+ BUG_ON(unpacked.bi_size != inode->bi_size);
+ BUG_ON(unpacked.bi_version != inode->bi_version);
+ BUG_ON(unpacked.bi_mode != inode->bi_mode);
+
+#define x(_name, _bits) if (unpacked._name != inode->_name) \
+ panic("unpacked %llu should be %llu", \
+ (u64) unpacked._name, (u64) inode->_name);
+ BCH_INODE_FIELDS_v3()
+#undef x
+ }
+}
+
+void bch2_inode_pack(struct bkey_inode_buf *packed,
+ const struct bch_inode_unpacked *inode)
+{
+ bch2_inode_pack_inlined(packed, inode);
+}
+
+static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
+ struct bch_inode_unpacked *unpacked)
+{
+ const u8 *in = inode.v->fields;
+ const u8 *end = bkey_val_end(inode);
+ u64 field[2];
+ unsigned fieldnr = 0, field_bits;
+ int ret;
+
+#define x(_name, _bits) \
+ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
+ unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
+ memset((void *) unpacked + offset, 0, \
+ sizeof(*unpacked) - offset); \
+ return 0; \
+ } \
+ \
+ ret = inode_decode_field(in, end, field, &field_bits); \
+ if (ret < 0) \
+ return ret; \
+ \
+ if (field_bits > sizeof(unpacked->_name) * 8) \
+ return -1; \
+ \
+ unpacked->_name = field[1]; \
+ in += ret;
+
+ BCH_INODE_FIELDS_v2()
+#undef x
+
+ /* XXX: signal if there were more fields than expected? */
+ return 0;
+}
+
+static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
+ const u8 *in, const u8 *end,
+ unsigned nr_fields)
+{
+ unsigned fieldnr = 0;
+ int ret;
+ u64 v[2];
+
+#define x(_name, _bits) \
+ if (fieldnr < nr_fields) { \
+ ret = bch2_varint_decode_fast(in, end, &v[0]); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ \
+ if (_bits > 64) { \
+ ret = bch2_varint_decode_fast(in, end, &v[1]); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ } else { \
+ v[1] = 0; \
+ } \
+ } else { \
+ v[0] = v[1] = 0; \
+ } \
+ \
+ unpacked->_name = v[0]; \
+ if (v[1] || v[0] != unpacked->_name) \
+ return -1; \
+ fieldnr++;
+
+ BCH_INODE_FIELDS_v2()
+#undef x
+
+ /* XXX: signal if there were more fields than expected? */
+ return 0;
+}
+
+static int bch2_inode_unpack_v3(struct bkey_s_c k,
+ struct bch_inode_unpacked *unpacked)
+{
+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+ const u8 *in = inode.v->fields;
+ const u8 *end = bkey_val_end(inode);
+ unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
+ unsigned fieldnr = 0;
+ int ret;
+ u64 v[2];
+
+ unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
+ unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors);
+ unpacked->bi_size = le64_to_cpu(inode.v->bi_size);
+ unpacked->bi_version = le64_to_cpu(inode.v->bi_version);
+ unpacked->bi_mode = INODEv3_MODE(inode.v);
+
+#define x(_name, _bits) \
+ if (fieldnr < nr_fields) { \
+ ret = bch2_varint_decode_fast(in, end, &v[0]); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ \
+ if (_bits > 64) { \
+ ret = bch2_varint_decode_fast(in, end, &v[1]); \
+ if (ret < 0) \
+ return ret; \
+ in += ret; \
+ } else { \
+ v[1] = 0; \
+ } \
+ } else { \
+ v[0] = v[1] = 0; \
+ } \
+ \
+ unpacked->_name = v[0]; \
+ if (v[1] || v[0] != unpacked->_name) \
+ return -1; \
+ fieldnr++;
+
+ BCH_INODE_FIELDS_v3()
+#undef x
+
+ /* XXX: signal if there were more fields than expected? */
+ return 0;
+}
+
+static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
+ struct bch_inode_unpacked *unpacked)
+{
+ memset(unpacked, 0, sizeof(*unpacked));
+
+ switch (k.k->type) {
+ case KEY_TYPE_inode: {
+ struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
+
+ unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_journal_seq= 0;
+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
+ unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
+ unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
+
+ if (INODE_NEW_VARINT(inode.v)) {
+ return bch2_inode_unpack_v2(unpacked, inode.v->fields,
+ bkey_val_end(inode),
+ INODE_NR_FIELDS(inode.v));
+ } else {
+ return bch2_inode_unpack_v1(inode, unpacked);
+ }
+ break;
+ }
+ case KEY_TYPE_inode_v2: {
+ struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
+
+ unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
+ unpacked->bi_hash_seed = inode.v->bi_hash_seed;
+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
+ unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
+
+ return bch2_inode_unpack_v2(unpacked, inode.v->fields,
+ bkey_val_end(inode),
+ INODEv2_NR_FIELDS(inode.v));
+ }
+ default:
+ BUG();
+ }
+}
+
+int bch2_inode_unpack(struct bkey_s_c k,
+ struct bch_inode_unpacked *unpacked)
+{
+ if (likely(k.k->type == KEY_TYPE_inode_v3))
+ return bch2_inode_unpack_v3(k, unpacked);
+ return bch2_inode_unpack_slowpath(k, unpacked);
+}
+
+static int bch2_inode_peek_nowarn(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags)
+{
+ struct bkey_s_c k;
+ u32 snapshot;
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes,
+ SPOS(0, inum.inum, snapshot),
+ flags|BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_unpack(k, inode);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+}
+
+int bch2_inode_peek(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags)
+{
+ int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
+ bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);
+ return ret;
+}
+
+int bch2_inode_write_flags(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ enum btree_update_flags flags)
+{
+ struct bkey_inode_buf *inode_p;
+
+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+ if (IS_ERR(inode_p))
+ return PTR_ERR(inode_p);
+
+ bch2_inode_pack_inlined(inode_p, inode);
+ inode_p->inode.k.p.snapshot = iter->snapshot;
+ return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
+}
+
+struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
+{
+ struct bch_inode_unpacked u;
+ struct bkey_inode_buf *inode_p;
+ int ret;
+
+ if (!bkey_is_inode(&k->k))
+ return ERR_PTR(-ENOENT);
+
+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
+ if (IS_ERR(inode_p))
+ return ERR_CAST(inode_p);
+
+ ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u);
+ if (ret)
+ return ERR_PTR(ret);
+
+ bch2_inode_pack(inode_p, &u);
+ return &inode_p->inode.k_i;
+}
+
+static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err)
+{
+ struct bch_inode_unpacked unpacked;
+ int ret = 0;
+
+ bkey_fsck_err_on(k.k->p.inode, c, err,
+ inode_pos_inode_nonzero,
+ "nonzero k.p.inode");
+
+ bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err,
+ inode_pos_blockdev_range,
+ "fs inode in blockdev range");
+
+ bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err,
+ inode_unpack_error,
+ "invalid variable length fields");
+
+ bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err,
+ inode_checksum_type_invalid,
+ "invalid data checksum type (%u >= %u",
+ unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1);
+
+ bkey_fsck_err_on(unpacked.bi_compression &&
+ !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err,
+ inode_compression_type_invalid,
+ "invalid compression opt %u", unpacked.bi_compression - 1);
+
+ bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) &&
+ unpacked.bi_nlink != 0, c, err,
+ inode_unlinked_but_nlink_nonzero,
+ "flagged as unlinked but bi_nlink != 0");
+
+ bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err,
+ inode_subvol_root_but_not_dir,
+ "subvolume root but not a directory");
+fsck_err:
+ return ret;
+}
+
+int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
+ inode_str_hash_invalid,
+ "invalid str hash type (%llu >= %u)",
+ INODE_STR_HASH(inode.v), BCH_STR_HASH_NR);
+
+ ret = __bch2_inode_invalid(c, k, err);
+fsck_err:
+ return ret;
+}
+
+int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
+ inode_str_hash_invalid,
+ "invalid str hash type (%llu >= %u)",
+ INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR);
+
+ ret = __bch2_inode_invalid(c, k, err);
+fsck_err:
+ return ret;
+}
+
+int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
+ INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err,
+ inode_v3_fields_start_bad,
+ "invalid fields_start (got %llu, min %u max %zu)",
+ INODEv3_FIELDS_START(inode.v),
+ INODEv3_FIELDS_START_INITIAL,
+ bkey_val_u64s(inode.k));
+
+ bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err,
+ inode_str_hash_invalid,
+ "invalid str hash type (%llu >= %u)",
+ INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
+
+ ret = __bch2_inode_invalid(c, k, err);
+fsck_err:
+ return ret;
+}
+
+static void __bch2_inode_unpacked_to_text(struct printbuf *out,
+ struct bch_inode_unpacked *inode)
+{
+ prt_printf(out, "mode=%o ", inode->bi_mode);
+
+ prt_str(out, "flags=");
+ prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
+ prt_printf(out, " (%x)", inode->bi_flags);
+
+ prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
+ inode->bi_journal_seq,
+ inode->bi_size,
+ inode->bi_sectors,
+ inode->bi_version);
+
+#define x(_name, _bits) \
+ prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
+ BCH_INODE_FIELDS_v3()
+#undef x
+}
+
+void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
+{
+ prt_printf(out, "inum: %llu ", inode->bi_inum);
+ __bch2_inode_unpacked_to_text(out, inode);
+}
+
+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bch_inode_unpacked inode;
+
+ if (bch2_inode_unpack(k, &inode)) {
+ prt_printf(out, "(unpack error)");
+ return;
+ }
+
+ __bch2_inode_unpacked_to_text(out, &inode);
+}
+
+static inline u64 bkey_inode_flags(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_inode:
+ return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
+ case KEY_TYPE_inode_v2:
+ return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
+ case KEY_TYPE_inode_v3:
+ return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
+ default:
+ return 0;
+ }
+}
+
+static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
+{
+ return bkey_inode_flags(k) & BCH_INODE_unlinked;
+}
+
+int bch2_trigger_inode(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old,
+ struct bkey_s new,
+ unsigned flags)
+{
+ s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ if (nr) {
+ int ret = bch2_replicas_deltas_realloc(trans, 0);
+ if (ret)
+ return ret;
+
+ trans->fs_usage_deltas->nr_inodes += nr;
+ }
+
+ bool old_deleted = bkey_is_deleted_inode(old);
+ bool new_deleted = bkey_is_deleted_inode(new.s_c);
+ if (old_deleted != new_deleted) {
+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+ BUG_ON(!trans->journal_res.seq);
+
+ bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ struct bch_fs *c = trans->c;
+
+ percpu_down_read(&c->mark_lock);
+ this_cpu_add(c->usage_gc->nr_inodes, nr);
+ percpu_up_read(&c->mark_lock);
+ }
+
+ return 0;
+}
+
+int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(k.k->p.inode, c, err,
+ inode_pos_inode_nonzero,
+ "nonzero k.p.inode");
+fsck_err:
+ return ret;
+}
+
+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k);
+
+ prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
+}
+
+void bch2_inode_init_early(struct bch_fs *c,
+ struct bch_inode_unpacked *inode_u)
+{
+ enum bch_str_hash_type str_hash =
+ bch2_str_hash_opt_to_type(c, c->opts.str_hash);
+
+ memset(inode_u, 0, sizeof(*inode_u));
+
+ /* ick */
+ inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
+ get_random_bytes(&inode_u->bi_hash_seed,
+ sizeof(inode_u->bi_hash_seed));
+}
+
+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+ struct bch_inode_unpacked *parent)
+{
+ inode_u->bi_mode = mode;
+ inode_u->bi_uid = uid;
+ inode_u->bi_gid = gid;
+ inode_u->bi_dev = rdev;
+ inode_u->bi_atime = now;
+ inode_u->bi_mtime = now;
+ inode_u->bi_ctime = now;
+ inode_u->bi_otime = now;
+
+ if (parent && parent->bi_mode & S_ISGID) {
+ inode_u->bi_gid = parent->bi_gid;
+ if (S_ISDIR(mode))
+ inode_u->bi_mode |= S_ISGID;
+ }
+
+ if (parent) {
+#define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name;
+ BCH_INODE_OPTS()
+#undef x
+ }
+}
+
+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+ struct bch_inode_unpacked *parent)
+{
+ bch2_inode_init_early(c, inode_u);
+ bch2_inode_init_late(inode_u, bch2_current_time(c),
+ uid, gid, mode, rdev, parent);
+}
+
+static inline u32 bkey_generation(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_inode:
+ case KEY_TYPE_inode_v2:
+ BUG();
+ case KEY_TYPE_inode_generation:
+ return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation);
+ default:
+ return 0;
+ }
+}
+
+/*
+ * This just finds an empty slot:
+ */
+int bch2_inode_create(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode_u,
+ u32 snapshot, u64 cpu)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+ u64 min, max, start, pos, *hint;
+ int ret = 0;
+ unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
+
+ if (c->opts.shard_inode_numbers) {
+ bits -= c->inode_shard_bits;
+
+ min = (cpu << bits);
+ max = (cpu << bits) | ~(ULLONG_MAX << bits);
+
+ min = max_t(u64, min, BLOCKDEV_INODE_MAX);
+ hint = c->unused_inode_hints + cpu;
+ } else {
+ min = BLOCKDEV_INODE_MAX;
+ max = ~(ULLONG_MAX << bits);
+ hint = c->unused_inode_hints;
+ }
+
+ start = READ_ONCE(*hint);
+
+ if (start >= max || start < min)
+ start = min;
+
+ pos = start;
+ bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_INTENT);
+again:
+ while ((k = bch2_btree_iter_peek(iter)).k &&
+ !(ret = bkey_err(k)) &&
+ bkey_lt(k.k->p, POS(0, max))) {
+ if (pos < iter->pos.offset)
+ goto found_slot;
+
+ /*
+ * We don't need to iterate over keys in every snapshot once
+ * we've found just one:
+ */
+ pos = iter->pos.offset + 1;
+ bch2_btree_iter_set_pos(iter, POS(0, pos));
+ }
+
+ if (!ret && pos < max)
+ goto found_slot;
+
+ if (!ret && start == min)
+ ret = -BCH_ERR_ENOSPC_inode_create;
+
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+ }
+
+ /* Retry from start */
+ pos = start = min;
+ bch2_btree_iter_set_pos(iter, POS(0, pos));
+ goto again;
+found_slot:
+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
+ return ret;
+ }
+
+ *hint = k.k->p.offset;
+ inode_u->bi_inum = k.k->p.offset;
+ inode_u->bi_generation = bkey_generation(k);
+ return 0;
+}
+
+static int bch2_inode_delete_keys(struct btree_trans *trans,
+ subvol_inum inum, enum btree_id id)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_i delete;
+ struct bpos end = POS(inum.inum, U64_MAX);
+ u32 snapshot;
+ int ret = 0;
+
+ /*
+ * We're never going to be deleting partial extents, no need to use an
+ * extent iterator:
+ */
+ bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0),
+ BTREE_ITER_INTENT);
+
+ while (1) {
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_set_snapshot(&iter, snapshot);
+
+ k = bch2_btree_iter_peek_upto(&iter, end);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!k.k)
+ break;
+
+ bkey_init(&delete.k);
+ delete.k.p = iter.pos;
+
+ if (iter.flags & BTREE_ITER_IS_EXTENTS)
+ bch2_key_resize(&delete.k,
+ bpos_min(end, k.k->p).offset -
+ iter.pos.offset);
+
+ ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter = { NULL };
+ struct bkey_i_inode_generation delete;
+ struct bch_inode_unpacked inode_u;
+ struct bkey_s_c k;
+ u32 snapshot;
+ int ret;
+
+ /*
+ * If this was a directory, there shouldn't be any real dirents left -
+ * but there could be whiteouts (from hash collisions) that we should
+ * delete:
+ *
+ * XXX: the dirent could ideally would delete whiteouts when they're no
+ * longer needed
+ */
+ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?:
+ bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents);
+ if (ret)
+ goto err;
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, inum.inum, snapshot),
+ BTREE_ITER_INTENT|BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!bkey_is_inode(k.k)) {
+ bch2_fs_inconsistent(c,
+ "inode %llu:%u not found when deleting",
+ inum.inum, snapshot);
+ ret = -EIO;
+ goto err;
+ }
+
+ bch2_inode_unpack(k, &inode_u);
+
+ bkey_inode_generation_init(&delete.k_i);
+ delete.k.p = iter.pos;
+ delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
+
+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0);
+ if (!ret)
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
+ subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_inode_peek(trans, &iter, inode, inum, 0);
+ if (!ret)
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
+ struct bch_inode_unpacked *inode)
+{
+ return bch2_trans_do(c, NULL, NULL, 0,
+ bch2_inode_find_by_inum_trans(trans, inum, inode));
+}
+
+int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
+{
+ if (bi->bi_flags & BCH_INODE_unlinked)
+ bi->bi_flags &= ~BCH_INODE_unlinked;
+ else {
+ if (bi->bi_nlink == U32_MAX)
+ return -EINVAL;
+
+ bi->bi_nlink++;
+ }
+
+ return 0;
+}
+
+void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi)
+{
+ if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) {
+ bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero",
+ bi->bi_inum);
+ return;
+ }
+
+ if (bi->bi_flags & BCH_INODE_unlinked) {
+ bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum);
+ return;
+ }
+
+ if (bi->bi_nlink)
+ bi->bi_nlink--;
+ else
+ bi->bi_flags |= BCH_INODE_unlinked;
+}
+
+struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
+{
+ struct bch_opts ret = { 0 };
+#define x(_name, _bits) \
+ if (inode->bi_##_name) \
+ opt_set(ret, _name, inode->bi_##_name - 1);
+ BCH_INODE_OPTS()
+#undef x
+ return ret;
+}
+
+void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
+ struct bch_inode_unpacked *inode)
+{
+#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name);
+ BCH_INODE_OPTS()
+#undef x
+
+ if (opts->nocow)
+ opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0;
+}
+
+int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts)
+{
+ struct bch_inode_unpacked inode;
+ int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode));
+
+ if (ret)
+ return ret;
+
+ bch2_inode_opts_get(opts, trans->c, &inode);
+ return 0;
+}
+
+int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter = { NULL };
+ struct bkey_i_inode_generation delete;
+ struct bch_inode_unpacked inode_u;
+ struct bkey_s_c k;
+ int ret;
+
+ do {
+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
+ SPOS(inum, 0, snapshot),
+ SPOS(inum, U64_MAX, snapshot),
+ 0, NULL) ?:
+ bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
+ SPOS(inum, 0, snapshot),
+ SPOS(inum, U64_MAX, snapshot),
+ 0, NULL) ?:
+ bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
+ SPOS(inum, 0, snapshot),
+ SPOS(inum, U64_MAX, snapshot),
+ 0, NULL);
+ } while (ret == -BCH_ERR_transaction_restart_nested);
+ if (ret)
+ goto err;
+retry:
+ bch2_trans_begin(trans);
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, inum, snapshot), BTREE_ITER_INTENT);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!bkey_is_inode(k.k)) {
+ bch2_fs_inconsistent(c,
+ "inode %llu:%u not found when deleting",
+ inum, snapshot);
+ ret = -EIO;
+ goto err;
+ }
+
+ bch2_inode_unpack(k, &inode_u);
+
+ /* Subvolume root? */
+ if (inode_u.bi_subvol)
+ bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum);
+
+ bkey_inode_generation_init(&delete.k_i);
+ delete.k.p = iter.pos;
+ delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
+
+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ return ret ?: -BCH_ERR_transaction_restart_nested;
+}
+
+static int may_delete_deleted_inode(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bpos pos,
+ bool *need_another_pass)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter inode_iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode;
+ if (fsck_err_on(!bkey_is_inode(k.k), c,
+ deleted_inode_missing,
+ "nonexistent inode %llu:%u in deleted_inodes btree",
+ pos.offset, pos.snapshot))
+ goto delete;
+
+ ret = bch2_inode_unpack(k, &inode);
+ if (ret)
+ goto out;
+
+ if (S_ISDIR(inode.bi_mode)) {
+ ret = bch2_empty_dir_snapshot(trans, pos.offset, pos.snapshot);
+ if (fsck_err_on(ret == -ENOTEMPTY, c, deleted_inode_is_dir,
+ "non empty directory %llu:%u in deleted_inodes btree",
+ pos.offset, pos.snapshot))
+ goto delete;
+ if (ret)
+ goto out;
+ }
+
+ if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
+ deleted_inode_not_unlinked,
+ "non-deleted inode %llu:%u in deleted_inodes btree",
+ pos.offset, pos.snapshot))
+ goto delete;
+
+ if (c->sb.clean &&
+ !fsck_err(c,
+ deleted_inode_but_clean,
+ "filesystem marked as clean but have deleted inode %llu:%u",
+ pos.offset, pos.snapshot)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (bch2_snapshot_is_internal_node(c, pos.snapshot)) {
+ struct bpos new_min_pos;
+
+ ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos);
+ if (ret)
+ goto out;
+
+ inode.bi_flags &= ~BCH_INODE_unlinked;
+
+ ret = bch2_inode_write_flags(trans, &inode_iter, &inode,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch_err_msg(c, ret, "clearing inode unlinked flag");
+ if (ret)
+ goto out;
+
+ /*
+ * We'll need another write buffer flush to pick up the new
+ * unlinked inodes in the snapshot leaves:
+ */
+ *need_another_pass = true;
+ goto out;
+ }
+
+ ret = 1;
+out:
+fsck_err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ return ret;
+delete:
+ ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+ goto out;
+}
+
+int bch2_delete_dead_inodes(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ bool need_another_pass;
+ int ret;
+again:
+ need_another_pass = false;
+
+ /*
+ * Weird transaction restart handling here because on successful delete,
+ * bch2_inode_rm_snapshot() will return a nested transaction restart,
+ * but we can't retry because the btree write buffer won't have been
+ * flushed and we'd spin:
+ */
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass);
+ if (ret > 0) {
+ bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot);
+
+ ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot);
+ /*
+ * We don't want to loop here: a transaction restart
+ * error here means we handled a transaction restart and
+ * we're actually done, but if we loop we'll retry the
+ * same key because the write buffer hasn't been flushed
+ * yet
+ */
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ ret = 0;
+ continue;
+ }
+ }
+
+ ret;
+ }));
+
+ if (!ret && need_another_pass) {
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+ goto again;
+ }
+err:
+ bch2_trans_put(trans);
+ return ret;
+}
diff --git a/c_src/libbcachefs/inode.h b/c_src/libbcachefs/inode.h
new file mode 100644
index 00000000..b63f3125
--- /dev/null
+++ b/c_src/libbcachefs/inode.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_INODE_H
+#define _BCACHEFS_INODE_H
+
+#include "bkey.h"
+#include "bkey_methods.h"
+#include "opts.h"
+
+enum bkey_invalid_flags;
+extern const char * const bch2_inode_opts[];
+
+int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define bch2_bkey_ops_inode ((struct bkey_ops) { \
+ .key_invalid = bch2_inode_invalid, \
+ .val_to_text = bch2_inode_to_text, \
+ .trigger = bch2_trigger_inode, \
+ .min_val_size = 16, \
+})
+
+#define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \
+ .key_invalid = bch2_inode_v2_invalid, \
+ .val_to_text = bch2_inode_to_text, \
+ .trigger = bch2_trigger_inode, \
+ .min_val_size = 32, \
+})
+
+#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \
+ .key_invalid = bch2_inode_v3_invalid, \
+ .val_to_text = bch2_inode_to_text, \
+ .trigger = bch2_trigger_inode, \
+ .min_val_size = 48, \
+})
+
+static inline bool bkey_is_inode(const struct bkey *k)
+{
+ return k->type == KEY_TYPE_inode ||
+ k->type == KEY_TYPE_inode_v2 ||
+ k->type == KEY_TYPE_inode_v3;
+}
+
+int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \
+ .key_invalid = bch2_inode_generation_invalid, \
+ .val_to_text = bch2_inode_generation_to_text, \
+ .min_val_size = 8, \
+})
+
+#if 0
+typedef struct {
+ u64 lo;
+ u32 hi;
+} __packed __aligned(4) u96;
+#endif
+typedef u64 u96;
+
+struct bch_inode_unpacked {
+ u64 bi_inum;
+ u64 bi_journal_seq;
+ __le64 bi_hash_seed;
+ u64 bi_size;
+ u64 bi_sectors;
+ u64 bi_version;
+ u32 bi_flags;
+ u16 bi_mode;
+
+#define x(_name, _bits) u##_bits _name;
+ BCH_INODE_FIELDS_v3()
+#undef x
+};
+
+struct bkey_inode_buf {
+ struct bkey_i_inode_v3 inode;
+
+#define x(_name, _bits) + 8 + _bits / 8
+ u8 _pad[0 + BCH_INODE_FIELDS_v3()];
+#undef x
+} __packed __aligned(8);
+
+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
+int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
+struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
+
+void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
+
+int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
+ struct bch_inode_unpacked *, subvol_inum, unsigned);
+
+int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *,
+ struct bch_inode_unpacked *, enum btree_update_flags);
+
+static inline int bch2_inode_write(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode)
+{
+ return bch2_inode_write_flags(trans, iter, inode, 0);
+}
+
+void bch2_inode_init_early(struct bch_fs *,
+ struct bch_inode_unpacked *);
+void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
+ uid_t, gid_t, umode_t, dev_t,
+ struct bch_inode_unpacked *);
+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
+ uid_t, gid_t, umode_t, dev_t,
+ struct bch_inode_unpacked *);
+
+int bch2_inode_create(struct btree_trans *, struct btree_iter *,
+ struct bch_inode_unpacked *, u32, u64);
+
+int bch2_inode_rm(struct bch_fs *, subvol_inum);
+
+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *,
+ subvol_inum,
+ struct bch_inode_unpacked *);
+int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *);
+int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum,
+ struct bch_inode_unpacked *);
+
+#define inode_opt_get(_c, _inode, _name) \
+ ((_inode)->bi_##_name ? (_inode)->bi_##_name - 1 : (_c)->opts._name)
+
+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode,
+ enum inode_opt_id id, u64 v)
+{
+ switch (id) {
+#define x(_name, ...) \
+ case Inode_opt_##_name: \
+ inode->bi_##_name = v; \
+ break;
+ BCH_INODE_OPTS()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode,
+ enum inode_opt_id id)
+{
+ switch (id) {
+#define x(_name, ...) \
+ case Inode_opt_##_name: \
+ return inode->bi_##_name;
+ BCH_INODE_OPTS()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+static inline u8 mode_to_type(umode_t mode)
+{
+ return (mode >> 12) & 15;
+}
+
+static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
+{
+ return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
+}
+
+/* i_nlink: */
+
+static inline unsigned nlink_bias(umode_t mode)
+{
+ return S_ISDIR(mode) ? 2 : 1;
+}
+
+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
+{
+ return bi->bi_flags & BCH_INODE_unlinked
+ ? 0
+ : bi->bi_nlink + nlink_bias(bi->bi_mode);
+}
+
+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
+ unsigned nlink)
+{
+ if (nlink) {
+ bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
+ bi->bi_flags &= ~BCH_INODE_unlinked;
+ } else {
+ bi->bi_nlink = 0;
+ bi->bi_flags |= BCH_INODE_unlinked;
+ }
+}
+
+int bch2_inode_nlink_inc(struct bch_inode_unpacked *);
+void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *);
+
+struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *);
+void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
+ struct bch_inode_unpacked *);
+int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
+
+int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
+int bch2_delete_dead_inodes(struct bch_fs *);
+
+#endif /* _BCACHEFS_INODE_H */
diff --git a/c_src/libbcachefs/io_misc.c b/c_src/libbcachefs/io_misc.c
new file mode 100644
index 00000000..ca6d5f51
--- /dev/null
+++ b/c_src/libbcachefs/io_misc.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * io_misc.c - fallocate, fpunch, truncate:
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "clock.h"
+#include "error.h"
+#include "extents.h"
+#include "extent_update.h"
+#include "inode.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "logged_ops.h"
+#include "rebalance.h"
+#include "subvolume.h"
+
+/* Overwrites whatever was present with zeroes: */
+int bch2_extent_fallocate(struct btree_trans *trans,
+ subvol_inum inum,
+ struct btree_iter *iter,
+ u64 sectors,
+ struct bch_io_opts opts,
+ s64 *i_sectors_delta,
+ struct write_point_specifier write_point)
+{
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = { 0 };
+ struct closure cl;
+ struct open_buckets open_buckets = { 0 };
+ struct bkey_s_c k;
+ struct bkey_buf old, new;
+ unsigned sectors_allocated = 0, new_replicas;
+ bool unwritten = opts.nocow &&
+ c->sb.version >= bcachefs_metadata_version_unwritten_extents;
+ int ret;
+
+ bch2_bkey_buf_init(&old);
+ bch2_bkey_buf_init(&new);
+ closure_init_stack(&cl);
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset);
+ new_replicas = max(0, (int) opts.data_replicas -
+ (int) bch2_bkey_nr_ptrs_fully_allocated(k));
+
+ /*
+ * Get a disk reservation before (in the nocow case) calling
+ * into the allocator:
+ */
+ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
+ if (unlikely(ret))
+ goto err_noprint;
+
+ bch2_bkey_buf_reassemble(&old, c, k);
+
+ if (!unwritten) {
+ struct bkey_i_reservation *reservation;
+
+ bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64));
+ reservation = bkey_reservation_init(new.k);
+ reservation->k.p = iter->pos;
+ bch2_key_resize(&reservation->k, sectors);
+ reservation->v.nr_replicas = opts.data_replicas;
+ } else {
+ struct bkey_i_extent *e;
+ struct bch_devs_list devs_have;
+ struct write_point *wp;
+
+ devs_have.nr = 0;
+
+ bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX);
+
+ e = bkey_extent_init(new.k);
+ e->k.p = iter->pos;
+
+ ret = bch2_alloc_sectors_start_trans(trans,
+ opts.foreground_target,
+ false,
+ write_point,
+ &devs_have,
+ opts.data_replicas,
+ opts.data_replicas,
+ BCH_WATERMARK_normal, 0, &cl, &wp);
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ ret = -BCH_ERR_transaction_restart_nested;
+ if (ret)
+ goto err;
+
+ sectors = min_t(u64, sectors, wp->sectors_free);
+ sectors_allocated = sectors;
+
+ bch2_key_resize(&e->k, sectors);
+
+ bch2_open_bucket_get(c, wp, &open_buckets);
+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
+ bch2_alloc_sectors_done(c, wp);
+
+ extent_for_each_ptr(extent_i_to_s(e), ptr)
+ ptr->unwritten = true;
+ }
+
+ ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res,
+ 0, i_sectors_delta, true);
+err:
+ if (!ret && sectors_allocated)
+ bch2_increment_clock(c, sectors_allocated, WRITE);
+ if (should_print_err(ret))
+ bch_err_inum_offset_ratelimited(c,
+ inum.inum,
+ iter->pos.offset << 9,
+ "%s(): error: %s", __func__, bch2_err_str(ret));
+err_noprint:
+ bch2_open_buckets_put(c, &open_buckets);
+ bch2_disk_reservation_put(c, &disk_res);
+ bch2_bkey_buf_exit(&new, c);
+ bch2_bkey_buf_exit(&old, c);
+
+ if (closure_nr_remaining(&cl) != 1) {
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ }
+
+ return ret;
+}
+
+/*
+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
+ */
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+ subvol_inum inum, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bpos end_pos = POS(inum.inum, end);
+ struct bkey_s_c k;
+ int ret = 0, ret2 = 0;
+ u32 snapshot;
+
+ while (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ struct bkey_i delete;
+
+ if (ret)
+ ret2 = ret;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ continue;
+
+ bch2_btree_iter_set_snapshot(iter, snapshot);
+
+ /*
+ * peek_upto() doesn't have ideal semantics for extents:
+ */
+ k = bch2_btree_iter_peek_upto(iter, end_pos);
+ if (!k.k)
+ break;
+
+ ret = bkey_err(k);
+ if (ret)
+ continue;
+
+ bkey_init(&delete.k);
+ delete.k.p = iter->pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end_pos, &delete);
+
+ ret = bch2_extent_update(trans, inum, iter, &delete,
+ &disk_res, 0, i_sectors_delta, false);
+ bch2_disk_reservation_put(c, &disk_res);
+ }
+
+ return ret ?: ret2;
+}
+
+int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
+ s64 *i_sectors_delta)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ POS(inum.inum, start),
+ BTREE_ITER_INTENT);
+
+ ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta);
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+
+ return ret;
+}
+
+/* truncate: */
+
+void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k);
+
+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
+ prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size));
+}
+
+static int truncate_set_isize(struct btree_trans *trans,
+ subvol_inum inum,
+ u64 new_i_size)
+{
+ struct btree_iter iter = { NULL };
+ struct bch_inode_unpacked inode_u;
+ int ret;
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
+ (inode_u.bi_size = new_i_size, 0) ?:
+ bch2_inode_write(trans, &iter, &inode_u);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
+ struct bkey_i *op_k,
+ u64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter fpunch_iter;
+ struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k);
+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
+ u64 new_i_size = le64_to_cpu(op->v.new_i_size);
+ int ret;
+
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ truncate_set_isize(trans, inum, new_i_size));
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents,
+ POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9),
+ BTREE_ITER_INTENT);
+ ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta);
+ bch2_trans_iter_exit(trans, &fpunch_iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+err:
+ bch2_logged_op_finish(trans, op_k);
+ return ret;
+}
+
+int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k)
+{
+ return __bch2_resume_logged_op_truncate(trans, op_k, NULL);
+}
+
+int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta)
+{
+ struct bkey_i_logged_op_truncate op;
+
+ bkey_logged_op_truncate_init(&op.k_i);
+ op.v.subvol = cpu_to_le32(inum.subvol);
+ op.v.inum = cpu_to_le64(inum.inum);
+ op.v.new_i_size = cpu_to_le64(new_i_size);
+
+ /*
+ * Logged ops aren't atomic w.r.t. snapshot creation: creating a
+ * snapshot while they're in progress, then crashing, will result in the
+ * resume only proceeding in one of the snapshots
+ */
+ down_read(&c->snapshot_create_lock);
+ int ret = bch2_trans_run(c,
+ bch2_logged_op_start(trans, &op.k_i) ?:
+ __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta));
+ up_read(&c->snapshot_create_lock);
+
+ return ret;
+}
+
+/* finsert/fcollapse: */
+
+void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k);
+
+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol));
+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum));
+ prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset));
+ prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset));
+}
+
+static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len)
+{
+ struct btree_iter iter;
+ struct bch_inode_unpacked inode_u;
+ int ret;
+
+ offset <<= 9;
+ len <<= 9;
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
+ if (ret)
+ return ret;
+
+ if (len > 0) {
+ if (MAX_LFS_FILESIZE - inode_u.bi_size < len) {
+ ret = -EFBIG;
+ goto err;
+ }
+
+ if (offset >= inode_u.bi_size) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ inode_u.bi_size += len;
+ inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c);
+
+ ret = bch2_inode_write(trans, &iter, &inode_u);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
+ struct bkey_i *op_k,
+ u64 *i_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k);
+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
+ struct bch_io_opts opts;
+ u64 dst_offset = le64_to_cpu(op->v.dst_offset);
+ u64 src_offset = le64_to_cpu(op->v.src_offset);
+ s64 shift = dst_offset - src_offset;
+ u64 len = abs(shift);
+ u64 pos = le64_to_cpu(op->v.pos);
+ bool insert = shift > 0;
+ int ret = 0;
+
+ ret = bch2_inum_opts_get(trans, inum, &opts);
+ if (ret)
+ return ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ POS(inum.inum, 0),
+ BTREE_ITER_INTENT);
+
+ switch (op->v.state) {
+case LOGGED_OP_FINSERT_start:
+ op->v.state = LOGGED_OP_FINSERT_shift_extents;
+
+ if (insert) {
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ adjust_i_size(trans, inum, src_offset, len) ?:
+ bch2_logged_op_update(trans, &op->k_i));
+ if (ret)
+ goto err;
+ } else {
+ bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset));
+
+ ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto err;
+
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_logged_op_update(trans, &op->k_i));
+ }
+
+ fallthrough;
+case LOGGED_OP_FINSERT_shift_extents:
+ while (1) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ struct bkey_i delete, *copy;
+ struct bkey_s_c k;
+ struct bpos src_pos = POS(inum.inum, src_offset);
+ u32 snapshot;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto btree_err;
+
+ bch2_btree_iter_set_snapshot(&iter, snapshot);
+ bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot));
+
+ k = insert
+ ? bch2_btree_iter_peek_prev(&iter)
+ : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
+ if ((ret = bkey_err(k)))
+ goto btree_err;
+
+ if (!k.k ||
+ k.k->p.inode != inum.inum ||
+ bkey_le(k.k->p, POS(inum.inum, src_offset)))
+ break;
+
+ copy = bch2_bkey_make_mut_noupdate(trans, k);
+ if ((ret = PTR_ERR_OR_ZERO(copy)))
+ goto btree_err;
+
+ if (insert &&
+ bkey_lt(bkey_start_pos(k.k), src_pos)) {
+ bch2_cut_front(src_pos, copy);
+
+ /* Splitting compressed extent? */
+ bch2_disk_reservation_add(c, &disk_res,
+ copy->k.size *
+ bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)),
+ BCH_DISK_RESERVATION_NOFAIL);
+ }
+
+ bkey_init(&delete.k);
+ delete.k.p = copy->k.p;
+ delete.k.p.snapshot = snapshot;
+ delete.k.size = copy->k.size;
+
+ copy->k.p.offset += shift;
+ copy->k.p.snapshot = snapshot;
+
+ op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
+
+ ret = bch2_bkey_set_needs_rebalance(c, copy,
+ opts.background_target,
+ opts.background_compression) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
+ bch2_logged_op_update(trans, &op->k_i) ?:
+ bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc);
+btree_err:
+ bch2_disk_reservation_put(c, &disk_res);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+
+ pos = le64_to_cpu(op->v.pos);
+ }
+
+ op->v.state = LOGGED_OP_FINSERT_finish;
+
+ if (!insert) {
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ adjust_i_size(trans, inum, src_offset, shift) ?:
+ bch2_logged_op_update(trans, &op->k_i));
+ } else {
+ /* We need an inode update to update bi_journal_seq for fsync: */
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ adjust_i_size(trans, inum, 0, 0) ?:
+ bch2_logged_op_update(trans, &op->k_i));
+ }
+
+ break;
+case LOGGED_OP_FINSERT_finish:
+ break;
+ }
+err:
+ bch2_logged_op_finish(trans, op_k);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k)
+{
+ return __bch2_resume_logged_op_finsert(trans, op_k, NULL);
+}
+
+int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
+ u64 offset, u64 len, bool insert,
+ s64 *i_sectors_delta)
+{
+ struct bkey_i_logged_op_finsert op;
+ s64 shift = insert ? len : -len;
+
+ bkey_logged_op_finsert_init(&op.k_i);
+ op.v.subvol = cpu_to_le32(inum.subvol);
+ op.v.inum = cpu_to_le64(inum.inum);
+ op.v.dst_offset = cpu_to_le64(offset + shift);
+ op.v.src_offset = cpu_to_le64(offset);
+ op.v.pos = cpu_to_le64(insert ? U64_MAX : offset);
+
+ /*
+ * Logged ops aren't atomic w.r.t. snapshot creation: creating a
+ * snapshot while they're in progress, then crashing, will result in the
+ * resume only proceeding in one of the snapshots
+ */
+ down_read(&c->snapshot_create_lock);
+ int ret = bch2_trans_run(c,
+ bch2_logged_op_start(trans, &op.k_i) ?:
+ __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta));
+ up_read(&c->snapshot_create_lock);
+
+ return ret;
+}
diff --git a/c_src/libbcachefs/io_misc.h b/c_src/libbcachefs/io_misc.h
new file mode 100644
index 00000000..9cb44a7c
--- /dev/null
+++ b/c_src/libbcachefs/io_misc.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_MISC_H
+#define _BCACHEFS_IO_MISC_H
+
+int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
+ u64, struct bch_io_opts, s64 *,
+ struct write_point_specifier);
+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
+ subvol_inum, u64, s64 *);
+int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
+
+void bch2_logged_op_truncate_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_logged_op_truncate ((struct bkey_ops) { \
+ .val_to_text = bch2_logged_op_truncate_to_text, \
+ .min_val_size = 24, \
+})
+
+int bch2_resume_logged_op_truncate(struct btree_trans *, struct bkey_i *);
+
+int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *);
+
+void bch2_logged_op_finsert_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_logged_op_finsert ((struct bkey_ops) { \
+ .val_to_text = bch2_logged_op_finsert_to_text, \
+ .min_val_size = 24, \
+})
+
+int bch2_resume_logged_op_finsert(struct btree_trans *, struct bkey_i *);
+
+int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *);
+
+#endif /* _BCACHEFS_IO_MISC_H */
diff --git a/c_src/libbcachefs/io_read.c b/c_src/libbcachefs/io_read.c
new file mode 100644
index 00000000..3c574d88
--- /dev/null
+++ b/c_src/libbcachefs/io_read.c
@@ -0,0 +1,1220 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Some low level IO code, and hacks for various block layer limitations
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "clock.h"
+#include "compress.h"
+#include "data_update.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "io_read.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "subvolume.h"
+#include "trace.h"
+
+#include <linux/sched/mm.h>
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ const struct bch_devs_mask *devs;
+ unsigned d, nr = 0, total = 0;
+ u64 now = local_clock(), last;
+ s64 congested;
+ struct bch_dev *ca;
+
+ if (!target)
+ return false;
+
+ rcu_read_lock();
+ devs = bch2_target_to_mask(c, target) ?:
+ &c->rw_devs[BCH_DATA_user];
+
+ for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
+ ca = rcu_dereference(c->devs[d]);
+ if (!ca)
+ continue;
+
+ congested = atomic_read(&ca->congested);
+ last = READ_ONCE(ca->congested_last);
+ if (time_after64(now, last))
+ congested -= (now - last) >> 12;
+
+ total += max(congested, 0LL);
+ nr++;
+ }
+ rcu_read_unlock();
+
+ return bch2_rand_range(nr * CONGESTED_MAX) < total;
+}
+
+#else
+
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ return false;
+}
+
+#endif
+
+/* Cache promotion on read */
+
+struct promote_op {
+ struct rcu_head rcu;
+ u64 start_time;
+
+ struct rhash_head hash;
+ struct bpos pos;
+
+ struct data_update write;
+ struct bio_vec bi_inline_vecs[]; /* must be last */
+};
+
+static const struct rhashtable_params bch_promote_params = {
+ .head_offset = offsetof(struct promote_op, hash),
+ .key_offset = offsetof(struct promote_op, pos),
+ .key_len = sizeof(struct bpos),
+};
+
+static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
+ struct bpos pos,
+ struct bch_io_opts opts,
+ unsigned flags)
+{
+ BUG_ON(!opts.promote_target);
+
+ if (!(flags & BCH_READ_MAY_PROMOTE))
+ return -BCH_ERR_nopromote_may_not;
+
+ if (bch2_bkey_has_target(c, k, opts.promote_target))
+ return -BCH_ERR_nopromote_already_promoted;
+
+ if (bkey_extent_is_unwritten(k))
+ return -BCH_ERR_nopromote_unwritten;
+
+ if (bch2_target_congested(c, opts.promote_target))
+ return -BCH_ERR_nopromote_congested;
+
+ if (rhashtable_lookup_fast(&c->promote_table, &pos,
+ bch_promote_params))
+ return -BCH_ERR_nopromote_in_flight;
+
+ return 0;
+}
+
+static void promote_free(struct bch_fs *c, struct promote_op *op)
+{
+ int ret;
+
+ bch2_data_update_exit(&op->write);
+
+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ kfree_rcu(op, rcu);
+}
+
+static void promote_done(struct bch_write_op *wop)
+{
+ struct promote_op *op =
+ container_of(wop, struct promote_op, write.op);
+ struct bch_fs *c = op->write.op.c;
+
+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
+ op->start_time);
+ promote_free(c, op);
+}
+
+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
+{
+ struct bio *bio = &op->write.op.wbio.bio;
+
+ trace_and_count(op->write.op.c, read_promote, &rbio->bio);
+
+ /* we now own pages: */
+ BUG_ON(!rbio->bounce);
+ BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
+
+ memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
+ sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
+ swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
+
+ bch2_data_update_read_done(&op->write, rbio->pick.crc);
+}
+
+static struct promote_op *__promote_alloc(struct btree_trans *trans,
+ enum btree_id btree_id,
+ struct bkey_s_c k,
+ struct bpos pos,
+ struct extent_ptr_decoded *pick,
+ struct bch_io_opts opts,
+ unsigned sectors,
+ struct bch_read_bio **rbio)
+{
+ struct bch_fs *c = trans->c;
+ struct promote_op *op = NULL;
+ struct bio *bio;
+ unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
+ int ret;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
+ return ERR_PTR(-BCH_ERR_nopromote_no_writes);
+
+ op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_KERNEL);
+ if (!op) {
+ ret = -BCH_ERR_nopromote_enomem;
+ goto err;
+ }
+
+ op->start_time = local_clock();
+ op->pos = pos;
+
+ /*
+ * We don't use the mempool here because extents that aren't
+ * checksummed or compressed can be too big for the mempool:
+ */
+ *rbio = kzalloc(sizeof(struct bch_read_bio) +
+ sizeof(struct bio_vec) * pages,
+ GFP_KERNEL);
+ if (!*rbio) {
+ ret = -BCH_ERR_nopromote_enomem;
+ goto err;
+ }
+
+ rbio_init(&(*rbio)->bio, opts);
+ bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
+
+ if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) {
+ ret = -BCH_ERR_nopromote_enomem;
+ goto err;
+ }
+
+ (*rbio)->bounce = true;
+ (*rbio)->split = true;
+ (*rbio)->kmalloc = true;
+
+ if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
+ bch_promote_params)) {
+ ret = -BCH_ERR_nopromote_in_flight;
+ goto err;
+ }
+
+ bio = &op->write.op.wbio.bio;
+ bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
+
+ ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
+ writepoint_hashed((unsigned long) current),
+ opts,
+ (struct data_update_opts) {
+ .target = opts.promote_target,
+ .extra_replicas = 1,
+ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
+ },
+ btree_id, k);
+ /*
+ * possible errors: -BCH_ERR_nocow_lock_blocked,
+ * -BCH_ERR_ENOSPC_disk_reservation:
+ */
+ if (ret) {
+ BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params));
+ goto err;
+ }
+
+ op->write.op.end_io = promote_done;
+
+ return op;
+err:
+ if (*rbio)
+ bio_free_pages(&(*rbio)->bio);
+ kfree(*rbio);
+ *rbio = NULL;
+ kfree(op);
+ bch2_write_ref_put(c, BCH_WRITE_REF_promote);
+ return ERR_PTR(ret);
+}
+
+noinline
+static struct promote_op *promote_alloc(struct btree_trans *trans,
+ struct bvec_iter iter,
+ struct bkey_s_c k,
+ struct extent_ptr_decoded *pick,
+ struct bch_io_opts opts,
+ unsigned flags,
+ struct bch_read_bio **rbio,
+ bool *bounce,
+ bool *read_full)
+{
+ struct bch_fs *c = trans->c;
+ bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
+ /* data might have to be decompressed in the write path: */
+ unsigned sectors = promote_full
+ ? max(pick->crc.compressed_size, pick->crc.live_size)
+ : bvec_iter_sectors(iter);
+ struct bpos pos = promote_full
+ ? bkey_start_pos(k.k)
+ : POS(k.k->p.inode, iter.bi_sector);
+ struct promote_op *promote;
+ int ret;
+
+ ret = should_promote(c, k, pos, opts, flags);
+ if (ret)
+ goto nopromote;
+
+ promote = __promote_alloc(trans,
+ k.k->type == KEY_TYPE_reflink_v
+ ? BTREE_ID_reflink
+ : BTREE_ID_extents,
+ k, pos, pick, opts, sectors, rbio);
+ ret = PTR_ERR_OR_ZERO(promote);
+ if (ret)
+ goto nopromote;
+
+ *bounce = true;
+ *read_full = promote_full;
+ return promote;
+nopromote:
+ trace_read_nopromote(c, ret);
+ return NULL;
+}
+
+/* Read */
+
+#define READ_RETRY_AVOID 1
+#define READ_RETRY 2
+#define READ_ERR 3
+
+enum rbio_context {
+ RBIO_CONTEXT_NULL,
+ RBIO_CONTEXT_HIGHPRI,
+ RBIO_CONTEXT_UNBOUND,
+};
+
+static inline struct bch_read_bio *
+bch2_rbio_parent(struct bch_read_bio *rbio)
+{
+ return rbio->split ? rbio->parent : rbio;
+}
+
+__always_inline
+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn,
+ enum rbio_context context,
+ struct workqueue_struct *wq)
+{
+ if (context <= rbio->context) {
+ fn(&rbio->work);
+ } else {
+ rbio->work.func = fn;
+ rbio->context = context;
+ queue_work(wq, &rbio->work);
+ }
+}
+
+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
+{
+ BUG_ON(rbio->bounce && !rbio->split);
+
+ if (rbio->promote)
+ promote_free(rbio->c, rbio->promote);
+ rbio->promote = NULL;
+
+ if (rbio->bounce)
+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
+
+ if (rbio->split) {
+ struct bch_read_bio *parent = rbio->parent;
+
+ if (rbio->kmalloc)
+ kfree(rbio);
+ else
+ bio_put(&rbio->bio);
+
+ rbio = parent;
+ }
+
+ return rbio;
+}
+
+/*
+ * Only called on a top level bch_read_bio to complete an entire read request,
+ * not a split:
+ */
+static void bch2_rbio_done(struct bch_read_bio *rbio)
+{
+ if (rbio->start_time)
+ bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
+ rbio->start_time);
+ bio_endio(&rbio->bio);
+}
+
+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter,
+ struct bch_io_failures *failed,
+ unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_buf sk;
+ struct bkey_s_c k;
+ int ret;
+
+ flags &= ~BCH_READ_LAST_FRAGMENT;
+ flags |= BCH_READ_MUST_CLONE;
+
+ bch2_bkey_buf_init(&sk);
+
+ bch2_trans_iter_init(trans, &iter, rbio->data_btree,
+ rbio->read_pos, BTREE_ITER_SLOTS);
+retry:
+ rbio->bio.bi_status = 0;
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ if (bkey_err(k))
+ goto err;
+
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
+ bch2_trans_unlock(trans);
+
+ if (!bch2_bkey_matches_ptr(c, k,
+ rbio->pick.ptr,
+ rbio->data_pos.offset -
+ rbio->pick.crc.offset)) {
+ /* extent we wanted to read no longer exists: */
+ rbio->hole = true;
+ goto out;
+ }
+
+ ret = __bch2_read_extent(trans, rbio, bvec_iter,
+ rbio->read_pos,
+ rbio->data_btree,
+ k, 0, failed, flags);
+ if (ret == READ_RETRY)
+ goto retry;
+ if (ret)
+ goto err;
+out:
+ bch2_rbio_done(rbio);
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
+ return;
+err:
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ goto out;
+}
+
+static void bch2_rbio_retry(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct bvec_iter iter = rbio->bvec_iter;
+ unsigned flags = rbio->flags;
+ subvol_inum inum = {
+ .subvol = rbio->subvol,
+ .inum = rbio->read_pos.inode,
+ };
+ struct bch_io_failures failed = { .nr = 0 };
+
+ trace_and_count(c, read_retry, &rbio->bio);
+
+ if (rbio->retry == READ_RETRY_AVOID)
+ bch2_mark_io_failure(&failed, &rbio->pick);
+
+ rbio->bio.bi_status = 0;
+
+ rbio = bch2_rbio_free(rbio);
+
+ flags |= BCH_READ_IN_RETRY;
+ flags &= ~BCH_READ_MAY_PROMOTE;
+
+ if (flags & BCH_READ_NODECODE) {
+ bch2_read_retry_nodecode(c, rbio, iter, &failed, flags);
+ } else {
+ flags &= ~BCH_READ_LAST_FRAGMENT;
+ flags |= BCH_READ_MUST_CLONE;
+
+ __bch2_read(c, rbio, iter, inum, &failed, flags);
+ }
+}
+
+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
+ blk_status_t error)
+{
+ rbio->retry = retry;
+
+ if (rbio->flags & BCH_READ_IN_RETRY)
+ return;
+
+ if (retry == READ_ERR) {
+ rbio = bch2_rbio_free(rbio);
+
+ rbio->bio.bi_status = error;
+ bch2_rbio_done(rbio);
+ } else {
+ bch2_rbio_punt(rbio, bch2_rbio_retry,
+ RBIO_CONTEXT_UNBOUND, system_unbound_wq);
+ }
+}
+
+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
+ struct bch_read_bio *rbio)
+{
+ struct bch_fs *c = rbio->c;
+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
+ struct bch_extent_crc_unpacked new_crc;
+ struct btree_iter iter;
+ struct bkey_i *new;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ if (crc_is_compressed(rbio->pick.crc))
+ return 0;
+
+ k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ if ((ret = bkey_err(k)))
+ goto out;
+
+ if (bversion_cmp(k.k->version, rbio->version) ||
+ !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
+ goto out;
+
+ /* Extent was merged? */
+ if (bkey_start_offset(k.k) < data_offset ||
+ k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
+ goto out;
+
+ if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
+ rbio->pick.crc, NULL, &new_crc,
+ bkey_start_offset(k.k) - data_offset, k.k->size,
+ rbio->pick.crc.csum_type)) {
+ bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * going to be temporarily appending another checksum entry:
+ */
+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
+ sizeof(struct bch_extent_crc128));
+ if ((ret = PTR_ERR_OR_ZERO(new)))
+ goto out;
+
+ bkey_reassemble(new, k);
+
+ if (!bch2_bkey_narrow_crcs(new, new_crc))
+ goto out;
+
+ ret = bch2_trans_update(trans, &iter, new,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
+{
+ bch2_trans_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_rbio_narrow_crcs(trans, rbio));
+}
+
+/* Inner part that may run in process context */
+static void __bch2_read_endio(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
+ struct bio *src = &rbio->bio;
+ struct bio *dst = &bch2_rbio_parent(rbio)->bio;
+ struct bvec_iter dst_iter = rbio->bvec_iter;
+ struct bch_extent_crc_unpacked crc = rbio->pick.crc;
+ struct nonce nonce = extent_nonce(rbio->version, crc);
+ unsigned nofs_flags;
+ struct bch_csum csum;
+ int ret;
+
+ nofs_flags = memalloc_nofs_save();
+
+ /* Reset iterator for checksumming and copying bounced data: */
+ if (rbio->bounce) {
+ src->bi_iter.bi_size = crc.compressed_size << 9;
+ src->bi_iter.bi_idx = 0;
+ src->bi_iter.bi_bvec_done = 0;
+ } else {
+ src->bi_iter = rbio->bvec_iter;
+ }
+
+ csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
+ if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io)
+ goto csum_err;
+
+ /*
+ * XXX
+ * We need to rework the narrow_crcs path to deliver the read completion
+ * first, and then punt to a different workqueue, otherwise we're
+ * holding up reads while doing btree updates which is bad for memory
+ * reclaim.
+ */
+ if (unlikely(rbio->narrow_crcs))
+ bch2_rbio_narrow_crcs(rbio);
+
+ if (rbio->flags & BCH_READ_NODECODE)
+ goto nodecode;
+
+ /* Adjust crc to point to subset of data we want: */
+ crc.offset += rbio->offset_into_extent;
+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
+
+ if (crc_is_compressed(crc)) {
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
+ !c->opts.no_data_io)
+ goto decompression_err;
+ } else {
+ /* don't need to decrypt the entire bio: */
+ nonce = nonce_add(nonce, crc.offset << 9);
+ bio_advance(src, crc.offset << 9);
+
+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
+ src->bi_iter.bi_size = dst_iter.bi_size;
+
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ if (rbio->bounce) {
+ struct bvec_iter src_iter = src->bi_iter;
+
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+ }
+ }
+
+ if (rbio->promote) {
+ /*
+ * Re encrypt data we decrypted, so it's consistent with
+ * rbio->crc:
+ */
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ promote_start(rbio->promote, rbio);
+ rbio->promote = NULL;
+ }
+nodecode:
+ if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) {
+ rbio = bch2_rbio_free(rbio);
+ bch2_rbio_done(rbio);
+ }
+out:
+ memalloc_nofs_restore(nofs_flags);
+ return;
+csum_err:
+ /*
+ * Checksum error: if the bio wasn't bounced, we may have been
+ * reading into buffers owned by userspace (that userspace can
+ * scribble over) - retry the read, bouncing it this time:
+ */
+ if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
+ rbio->flags |= BCH_READ_MUST_BOUNCE;
+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR);
+ goto out;
+ }
+
+ struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+ prt_str(&buf, "data ");
+ bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
+
+ bch_err_inum_offset_ratelimited(ca,
+ rbio->read_pos.inode,
+ rbio->read_pos.offset << 9,
+ "data %s", buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+decompression_err:
+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
+ rbio->read_pos.offset << 9,
+ "decompression error");
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ goto out;
+decrypt_err:
+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
+ rbio->read_pos.offset << 9,
+ "decrypt error");
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ goto out;
+}
+
+static void bch2_read_endio(struct bio *bio)
+{
+ struct bch_read_bio *rbio =
+ container_of(bio, struct bch_read_bio, bio);
+ struct bch_fs *c = rbio->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev);
+ struct workqueue_struct *wq = NULL;
+ enum rbio_context context = RBIO_CONTEXT_NULL;
+
+ if (rbio->have_ioref) {
+ bch2_latency_acct(ca, rbio->submit_time, READ);
+ percpu_ref_put(&ca->io_ref);
+ }
+
+ if (!rbio->split)
+ rbio->bio.bi_end_io = rbio->end_io;
+
+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+ rbio->read_pos.inode,
+ rbio->read_pos.offset,
+ "data read error: %s",
+ bch2_blk_status_to_str(bio->bi_status))) {
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
+ return;
+ }
+
+ if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+ ptr_stale(ca, &rbio->pick.ptr)) {
+ trace_and_count(c, read_reuse_race, &rbio->bio);
+
+ if (rbio->flags & BCH_READ_RETRY_IF_STALE)
+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN);
+ else
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN);
+ return;
+ }
+
+ if (rbio->narrow_crcs ||
+ rbio->promote ||
+ crc_is_compressed(rbio->pick.crc) ||
+ bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
+ context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
+ else if (rbio->pick.crc.csum_type)
+ context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq;
+
+ bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
+}
+
+int __bch2_read_indirect_extent(struct btree_trans *trans,
+ unsigned *offset_into_extent,
+ struct bkey_buf *orig_k)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u64 reflink_offset;
+ int ret;
+
+ reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
+ *offset_into_extent;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
+ POS(0, reflink_offset), 0);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_reflink_v &&
+ k.k->type != KEY_TYPE_indirect_inline_data) {
+ bch_err_inum_offset_ratelimited(trans->c,
+ orig_k->k->k.p.inode,
+ orig_k->k->k.p.offset << 9,
+ "%llu len %u points to nonexistent indirect extent %llu",
+ orig_k->k->k.p.offset,
+ orig_k->k->k.size,
+ reflink_offset);
+ bch2_inconsistent_error(trans->c);
+ ret = -EIO;
+ goto err;
+ }
+
+ *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
+ bch2_bkey_buf_reassemble(orig_k, trans->c, k);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bch_extent_ptr ptr)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
+ struct btree_iter iter;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ PTR_BUCKET_POS(c, &ptr),
+ BTREE_ITER_CACHED);
+
+ prt_printf(&buf, "Attempting to read from stale dirty pointer:");
+ printbuf_indent_add(&buf, 2);
+ prt_newline(&buf);
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
+
+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ if (!ret) {
+ prt_newline(&buf);
+ bch2_bkey_val_to_text(&buf, c, k);
+ }
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+}
+
+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
+ struct bvec_iter iter, struct bpos read_pos,
+ enum btree_id data_btree, struct bkey_s_c k,
+ unsigned offset_into_extent,
+ struct bch_io_failures *failed, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct extent_ptr_decoded pick;
+ struct bch_read_bio *rbio = NULL;
+ struct bch_dev *ca = NULL;
+ struct promote_op *promote = NULL;
+ bool bounce = false, read_full = false, narrow_crcs = false;
+ struct bpos data_pos = bkey_start_pos(k.k);
+ int pick_ret;
+
+ if (bkey_extent_is_inline_data(k.k)) {
+ unsigned bytes = min_t(unsigned, iter.bi_size,
+ bkey_inline_data_bytes(k.k));
+
+ swap(iter.bi_size, bytes);
+ memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k));
+ swap(iter.bi_size, bytes);
+ bio_advance_iter(&orig->bio, &iter, bytes);
+ zero_fill_bio_iter(&orig->bio, iter);
+ goto out_read_done;
+ }
+retry_pick:
+ pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
+
+ /* hole or reservation - just zero fill: */
+ if (!pick_ret)
+ goto hole;
+
+ if (pick_ret < 0) {
+ bch_err_inum_offset_ratelimited(c,
+ read_pos.inode, read_pos.offset << 9,
+ "no device to read from");
+ goto err;
+ }
+
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+ /*
+ * Stale dirty pointers are treated as IO errors, but @failed isn't
+ * allocated unless we're in the retry path - so if we're not in the
+ * retry path, don't check here, it'll be caught in bch2_read_endio()
+ * and we'll end up in the retry path:
+ */
+ if ((flags & BCH_READ_IN_RETRY) &&
+ !pick.ptr.cached &&
+ unlikely(ptr_stale(ca, &pick.ptr))) {
+ read_from_stale_dirty_pointer(trans, k, pick.ptr);
+ bch2_mark_io_failure(failed, &pick);
+ goto retry_pick;
+ }
+
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ bch2_trans_unlock(trans);
+
+ if (flags & BCH_READ_NODECODE) {
+ /*
+ * can happen if we retry, and the extent we were going to read
+ * has been merged in the meantime:
+ */
+ if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
+ goto hole;
+
+ iter.bi_size = pick.crc.compressed_size << 9;
+ goto get_bio;
+ }
+
+ if (!(flags & BCH_READ_LAST_FRAGMENT) ||
+ bio_flagged(&orig->bio, BIO_CHAIN))
+ flags |= BCH_READ_MUST_CLONE;
+
+ narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
+ bch2_can_narrow_extent_crcs(k, pick.crc);
+
+ if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
+ flags |= BCH_READ_MUST_BOUNCE;
+
+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
+
+ if (crc_is_compressed(pick.crc) ||
+ (pick.crc.csum_type != BCH_CSUM_none &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
+ (flags & BCH_READ_USER_MAPPED)) ||
+ (flags & BCH_READ_MUST_BOUNCE)))) {
+ read_full = true;
+ bounce = true;
+ }
+
+ if (orig->opts.promote_target)
+ promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
+ &rbio, &bounce, &read_full);
+
+ if (!read_full) {
+ EBUG_ON(crc_is_compressed(pick.crc));
+ EBUG_ON(pick.crc.csum_type &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ bvec_iter_sectors(iter) != pick.crc.live_size ||
+ pick.crc.offset ||
+ offset_into_extent));
+
+ data_pos.offset += offset_into_extent;
+ pick.ptr.offset += pick.crc.offset +
+ offset_into_extent;
+ offset_into_extent = 0;
+ pick.crc.compressed_size = bvec_iter_sectors(iter);
+ pick.crc.uncompressed_size = bvec_iter_sectors(iter);
+ pick.crc.offset = 0;
+ pick.crc.live_size = bvec_iter_sectors(iter);
+ }
+get_bio:
+ if (rbio) {
+ /*
+ * promote already allocated bounce rbio:
+ * promote needs to allocate a bio big enough for uncompressing
+ * data in the write path, but we're not going to use it all
+ * here:
+ */
+ EBUG_ON(rbio->bio.bi_iter.bi_size <
+ pick.crc.compressed_size << 9);
+ rbio->bio.bi_iter.bi_size =
+ pick.crc.compressed_size << 9;
+ } else if (bounce) {
+ unsigned sectors = pick.crc.compressed_size;
+
+ rbio = rbio_init(bio_alloc_bioset(NULL,
+ DIV_ROUND_UP(sectors, PAGE_SECTORS),
+ 0,
+ GFP_NOFS,
+ &c->bio_read_split),
+ orig->opts);
+
+ bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
+ rbio->bounce = true;
+ rbio->split = true;
+ } else if (flags & BCH_READ_MUST_CLONE) {
+ /*
+ * Have to clone if there were any splits, due to error
+ * reporting issues (if a split errored, and retrying didn't
+ * work, when it reports the error to its parent (us) we don't
+ * know if the error was from our bio, and we should retry, or
+ * from the whole bio, in which case we don't want to retry and
+ * lose the error)
+ */
+ rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
+ &c->bio_read_split),
+ orig->opts);
+ rbio->bio.bi_iter = iter;
+ rbio->split = true;
+ } else {
+ rbio = orig;
+ rbio->bio.bi_iter = iter;
+ EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
+ }
+
+ EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
+
+ rbio->c = c;
+ rbio->submit_time = local_clock();
+ if (rbio->split)
+ rbio->parent = orig;
+ else
+ rbio->end_io = orig->bio.bi_end_io;
+ rbio->bvec_iter = iter;
+ rbio->offset_into_extent= offset_into_extent;
+ rbio->flags = flags;
+ rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
+ rbio->narrow_crcs = narrow_crcs;
+ rbio->hole = 0;
+ rbio->retry = 0;
+ rbio->context = 0;
+ /* XXX: only initialize this if needed */
+ rbio->devs_have = bch2_bkey_devs(k);
+ rbio->pick = pick;
+ rbio->subvol = orig->subvol;
+ rbio->read_pos = read_pos;
+ rbio->data_btree = data_btree;
+ rbio->data_pos = data_pos;
+ rbio->version = k.k->version;
+ rbio->promote = promote;
+ INIT_WORK(&rbio->work, NULL);
+
+ rbio->bio.bi_opf = orig->bio.bi_opf;
+ rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
+ rbio->bio.bi_end_io = bch2_read_endio;
+
+ if (rbio->bounce)
+ trace_and_count(c, read_bounce, &rbio->bio);
+
+ this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
+ bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
+
+ /*
+ * If it's being moved internally, we don't want to flag it as a cache
+ * hit:
+ */
+ if (pick.ptr.cached && !(flags & BCH_READ_NODECODE))
+ bch2_bucket_io_time_reset(trans, pick.ptr.dev,
+ PTR_BUCKET_NR(ca, &pick.ptr), READ);
+
+ if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
+ bio_inc_remaining(&orig->bio);
+ trace_and_count(c, read_split, &orig->bio);
+ }
+
+ if (!rbio->pick.idx) {
+ if (!rbio->have_ioref) {
+ bch_err_inum_offset_ratelimited(c,
+ read_pos.inode,
+ read_pos.offset << 9,
+ "no device to read from");
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+ }
+
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user],
+ bio_sectors(&rbio->bio));
+ bio_set_dev(&rbio->bio, ca->disk_sb.bdev);
+
+ if (unlikely(c->opts.no_data_io)) {
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ bio_endio(&rbio->bio);
+ } else {
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ submit_bio(&rbio->bio);
+ else
+ submit_bio_wait(&rbio->bio);
+ }
+
+ /*
+ * We just submitted IO which may block, we expect relock fail
+ * events and shouldn't count them:
+ */
+ trans->notrace_relock_fail = true;
+ } else {
+ /* Attempting reconstruct read: */
+ if (bch2_ec_read_extent(trans, rbio)) {
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ goto out;
+ }
+
+ if (likely(!(flags & BCH_READ_IN_RETRY)))
+ bio_endio(&rbio->bio);
+ }
+out:
+ if (likely(!(flags & BCH_READ_IN_RETRY))) {
+ return 0;
+ } else {
+ int ret;
+
+ rbio->context = RBIO_CONTEXT_UNBOUND;
+ bch2_read_endio(&rbio->bio);
+
+ ret = rbio->retry;
+ rbio = bch2_rbio_free(rbio);
+
+ if (ret == READ_RETRY_AVOID) {
+ bch2_mark_io_failure(failed, &pick);
+ ret = READ_RETRY;
+ }
+
+ if (!ret)
+ goto out_read_done;
+
+ return ret;
+ }
+
+err:
+ if (flags & BCH_READ_IN_RETRY)
+ return READ_ERR;
+
+ orig->bio.bi_status = BLK_STS_IOERR;
+ goto out_read_done;
+
+hole:
+ /*
+ * won't normally happen in the BCH_READ_NODECODE
+ * (bch2_move_extent()) path, but if we retry and the extent we wanted
+ * to read no longer exists we have to signal that:
+ */
+ if (flags & BCH_READ_NODECODE)
+ orig->hole = true;
+
+ zero_fill_bio_iter(&orig->bio, iter);
+out_read_done:
+ if (flags & BCH_READ_LAST_FRAGMENT)
+ bch2_rbio_done(orig);
+ return 0;
+}
+
+void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
+ struct bvec_iter bvec_iter, subvol_inum inum,
+ struct bch_io_failures *failed, unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_buf sk;
+ struct bkey_s_c k;
+ u32 snapshot;
+ int ret;
+
+ BUG_ON(flags & BCH_READ_NODECODE);
+
+ bch2_bkey_buf_init(&sk);
+retry:
+ bch2_trans_begin(trans);
+ iter = (struct btree_iter) { NULL };
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
+ BTREE_ITER_SLOTS);
+ while (1) {
+ unsigned bytes, sectors, offset_into_extent;
+ enum btree_id data_btree = BTREE_ID_extents;
+
+ /*
+ * read_extent -> io_time_reset may cause a transaction restart
+ * without returning an error, we need to check for that here:
+ */
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ break;
+
+ bch2_btree_iter_set_pos(&iter,
+ POS(inum.inum, bvec_iter.bi_sector));
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ offset_into_extent = iter.pos.offset -
+ bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ bch2_bkey_buf_reassemble(&sk, c, k);
+
+ ret = bch2_read_indirect_extent(trans, &data_btree,
+ &offset_into_extent, &sk);
+ if (ret)
+ break;
+
+ k = bkey_i_to_s_c(sk.k);
+
+ /*
+ * With indirect extents, the amount of data to read is the min
+ * of the original extent and the indirect extent:
+ */
+ sectors = min(sectors, k.k->size - offset_into_extent);
+
+ bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
+ swap(bvec_iter.bi_size, bytes);
+
+ if (bvec_iter.bi_size == bytes)
+ flags |= BCH_READ_LAST_FRAGMENT;
+
+ ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
+ data_btree, k,
+ offset_into_extent, failed, flags);
+ if (ret)
+ break;
+
+ if (flags & BCH_READ_LAST_FRAGMENT)
+ break;
+
+ swap(bvec_iter.bi_size, bytes);
+ bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
+
+ ret = btree_trans_too_many_iters(trans);
+ if (ret)
+ break;
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ ret == READ_RETRY ||
+ ret == READ_RETRY_AVOID)
+ goto retry;
+
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
+
+ if (ret) {
+ bch_err_inum_offset_ratelimited(c, inum.inum,
+ bvec_iter.bi_sector << 9,
+ "read error %i from btree lookup", ret);
+ rbio->bio.bi_status = BLK_STS_IOERR;
+ bch2_rbio_done(rbio);
+ }
+}
+
+void bch2_fs_io_read_exit(struct bch_fs *c)
+{
+ if (c->promote_table.tbl)
+ rhashtable_destroy(&c->promote_table);
+ bioset_exit(&c->bio_read_split);
+ bioset_exit(&c->bio_read);
+}
+
+int bch2_fs_io_read_init(struct bch_fs *c)
+{
+ if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_read_init;
+
+ if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_read_split_init;
+
+ if (rhashtable_init(&c->promote_table, &bch_promote_params))
+ return -BCH_ERR_ENOMEM_promote_table_init;
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/io_read.h b/c_src/libbcachefs/io_read.h
new file mode 100644
index 00000000..d9c18bb7
--- /dev/null
+++ b/c_src/libbcachefs/io_read.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_READ_H
+#define _BCACHEFS_IO_READ_H
+
+#include "bkey_buf.h"
+
+struct bch_read_bio {
+ struct bch_fs *c;
+ u64 start_time;
+ u64 submit_time;
+
+ /*
+ * Reads will often have to be split, and if the extent being read from
+ * was checksummed or compressed we'll also have to allocate bounce
+ * buffers and copy the data back into the original bio.
+ *
+ * If we didn't have to split, we have to save and restore the original
+ * bi_end_io - @split below indicates which:
+ */
+ union {
+ struct bch_read_bio *parent;
+ bio_end_io_t *end_io;
+ };
+
+ /*
+ * Saved copy of bio->bi_iter, from submission time - allows us to
+ * resubmit on IO error, and also to copy data back to the original bio
+ * when we're bouncing:
+ */
+ struct bvec_iter bvec_iter;
+
+ unsigned offset_into_extent;
+
+ u16 flags;
+ union {
+ struct {
+ u16 bounce:1,
+ split:1,
+ kmalloc:1,
+ have_ioref:1,
+ narrow_crcs:1,
+ hole:1,
+ retry:2,
+ context:2;
+ };
+ u16 _state;
+ };
+
+ struct bch_devs_list devs_have;
+
+ struct extent_ptr_decoded pick;
+
+ /*
+ * pos we read from - different from data_pos for indirect extents:
+ */
+ u32 subvol;
+ struct bpos read_pos;
+
+ /*
+ * start pos of data we read (may not be pos of data we want) - for
+ * promote, narrow extents paths:
+ */
+ enum btree_id data_btree;
+ struct bpos data_pos;
+ struct bversion version;
+
+ struct promote_op *promote;
+
+ struct bch_io_opts opts;
+
+ struct work_struct work;
+
+ struct bio bio;
+};
+
+#define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio)
+
+struct bch_devs_mask;
+struct cache_promote_op;
+struct extent_ptr_decoded;
+
+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
+ struct bkey_buf *);
+
+static inline int bch2_read_indirect_extent(struct btree_trans *trans,
+ enum btree_id *data_btree,
+ unsigned *offset_into_extent,
+ struct bkey_buf *k)
+{
+ if (k->k->k.type != KEY_TYPE_reflink_p)
+ return 0;
+
+ *data_btree = BTREE_ID_reflink;
+ return __bch2_read_indirect_extent(trans, offset_into_extent, k);
+}
+
+enum bch_read_flags {
+ BCH_READ_RETRY_IF_STALE = 1 << 0,
+ BCH_READ_MAY_PROMOTE = 1 << 1,
+ BCH_READ_USER_MAPPED = 1 << 2,
+ BCH_READ_NODECODE = 1 << 3,
+ BCH_READ_LAST_FRAGMENT = 1 << 4,
+
+ /* internal: */
+ BCH_READ_MUST_BOUNCE = 1 << 5,
+ BCH_READ_MUST_CLONE = 1 << 6,
+ BCH_READ_IN_RETRY = 1 << 7,
+};
+
+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
+ struct bvec_iter, struct bpos, enum btree_id,
+ struct bkey_s_c, unsigned,
+ struct bch_io_failures *, unsigned);
+
+static inline void bch2_read_extent(struct btree_trans *trans,
+ struct bch_read_bio *rbio, struct bpos read_pos,
+ enum btree_id data_btree, struct bkey_s_c k,
+ unsigned offset_into_extent, unsigned flags)
+{
+ __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
+ data_btree, k, offset_into_extent, NULL, flags);
+}
+
+void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
+ subvol_inum, struct bch_io_failures *, unsigned flags);
+
+static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
+ subvol_inum inum)
+{
+ struct bch_io_failures failed = { .nr = 0 };
+
+ BUG_ON(rbio->_state);
+
+ rbio->c = c;
+ rbio->start_time = local_clock();
+ rbio->subvol = inum.subvol;
+
+ __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed,
+ BCH_READ_RETRY_IF_STALE|
+ BCH_READ_MAY_PROMOTE|
+ BCH_READ_USER_MAPPED);
+}
+
+static inline struct bch_read_bio *rbio_init(struct bio *bio,
+ struct bch_io_opts opts)
+{
+ struct bch_read_bio *rbio = to_rbio(bio);
+
+ rbio->_state = 0;
+ rbio->promote = NULL;
+ rbio->opts = opts;
+ return rbio;
+}
+
+void bch2_fs_io_read_exit(struct bch_fs *);
+int bch2_fs_io_read_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_IO_READ_H */
diff --git a/c_src/libbcachefs/io_write.c b/c_src/libbcachefs/io_write.c
new file mode 100644
index 00000000..33c0e783
--- /dev/null
+++ b/c_src/libbcachefs/io_write.c
@@ -0,0 +1,1662 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_buf.h"
+#include "bset.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "clock.h"
+#include "compress.h"
+#include "debug.h"
+#include "ec.h"
+#include "error.h"
+#include "extent_update.h"
+#include "inode.h"
+#include "io_write.h"
+#include "journal.h"
+#include "keylist.h"
+#include "move.h"
+#include "nocow_locking.h"
+#include "rebalance.h"
+#include "subvolume.h"
+#include "super.h"
+#include "super-io.h"
+#include "trace.h"
+
+#include <linux/blkdev.h>
+#include <linux/prefetch.h>
+#include <linux/random.h>
+#include <linux/sched/mm.h>
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+
+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
+ u64 now, int rw)
+{
+ u64 latency_capable =
+ ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
+ /* ideally we'd be taking into account the device's variance here: */
+ u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
+ s64 latency_over = io_latency - latency_threshold;
+
+ if (latency_threshold && latency_over > 0) {
+ /*
+ * bump up congested by approximately latency_over * 4 /
+ * latency_threshold - we don't need much accuracy here so don't
+ * bother with the divide:
+ */
+ if (atomic_read(&ca->congested) < CONGESTED_MAX)
+ atomic_add(latency_over >>
+ max_t(int, ilog2(latency_threshold) - 2, 0),
+ &ca->congested);
+
+ ca->congested_last = now;
+ } else if (atomic_read(&ca->congested) > 0) {
+ atomic_dec(&ca->congested);
+ }
+}
+
+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
+{
+ atomic64_t *latency = &ca->cur_latency[rw];
+ u64 now = local_clock();
+ u64 io_latency = time_after64(now, submit_time)
+ ? now - submit_time
+ : 0;
+ u64 old, new, v = atomic64_read(latency);
+
+ do {
+ old = v;
+
+ /*
+ * If the io latency was reasonably close to the current
+ * latency, skip doing the update and atomic operation - most of
+ * the time:
+ */
+ if (abs((int) (old - io_latency)) < (old >> 1) &&
+ now & ~(~0U << 5))
+ break;
+
+ new = ewma_add(old, io_latency, 5);
+ } while ((v = atomic64_cmpxchg(latency, old, new)) != old);
+
+ bch2_congested_acct(ca, io_latency, now, rw);
+
+ __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
+}
+
+#endif
+
+/* Allocate, free from mempool: */
+
+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
+{
+ struct bvec_iter_all iter;
+ struct bio_vec *bv;
+
+ bio_for_each_segment_all(bv, bio, iter)
+ if (bv->bv_page != ZERO_PAGE(0))
+ mempool_free(bv->bv_page, &c->bio_bounce_pages);
+ bio->bi_vcnt = 0;
+}
+
+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
+{
+ struct page *page;
+
+ if (likely(!*using_mempool)) {
+ page = alloc_page(GFP_NOFS);
+ if (unlikely(!page)) {
+ mutex_lock(&c->bio_bounce_pages_lock);
+ *using_mempool = true;
+ goto pool_alloc;
+
+ }
+ } else {
+pool_alloc:
+ page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS);
+ }
+
+ return page;
+}
+
+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
+ size_t size)
+{
+ bool using_mempool = false;
+
+ while (size) {
+ struct page *page = __bio_alloc_page_pool(c, &using_mempool);
+ unsigned len = min_t(size_t, PAGE_SIZE, size);
+
+ BUG_ON(!bio_add_page(bio, page, len, 0));
+ size -= len;
+ }
+
+ if (using_mempool)
+ mutex_unlock(&c->bio_bounce_pages_lock);
+}
+
+/* Extent update path: */
+
+int bch2_sum_sector_overwrites(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ struct bkey_i *new,
+ bool *usage_increasing,
+ s64 *i_sectors_delta,
+ s64 *disk_sectors_delta)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c old;
+ unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new));
+ bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
+ int ret = 0;
+
+ *usage_increasing = false;
+ *i_sectors_delta = 0;
+ *disk_sectors_delta = 0;
+
+ bch2_trans_copy_iter(&iter, extent_iter);
+
+ for_each_btree_key_upto_continue_norestart(iter,
+ new->k.p, BTREE_ITER_SLOTS, old, ret) {
+ s64 sectors = min(new->k.p.offset, old.k->p.offset) -
+ max(bkey_start_offset(&new->k),
+ bkey_start_offset(old.k));
+
+ *i_sectors_delta += sectors *
+ (bkey_extent_is_allocation(&new->k) -
+ bkey_extent_is_allocation(old.k));
+
+ *disk_sectors_delta += sectors * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new));
+ *disk_sectors_delta -= new->k.p.snapshot == old.k->p.snapshot
+ ? sectors * bch2_bkey_nr_ptrs_fully_allocated(old)
+ : 0;
+
+ if (!*usage_increasing &&
+ (new->k.p.snapshot != old.k->p.snapshot ||
+ new_replicas > bch2_bkey_replicas(c, old) ||
+ (!new_compressed && bch2_bkey_sectors_compressed(old))))
+ *usage_increasing = true;
+
+ if (bkey_ge(old.k->p, new->k.p))
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ u64 new_i_size,
+ s64 i_sectors_delta)
+{
+ struct btree_iter iter;
+ struct bkey_i *k;
+ struct bkey_i_inode_v3 *inode;
+ /*
+ * Crazy performance optimization:
+ * Every extent update needs to also update the inode: the inode trigger
+ * will set bi->journal_seq to the journal sequence number of this
+ * transaction - for fsync.
+ *
+ * But if that's the only reason we're updating the inode (we're not
+ * updating bi_size or bi_sectors), then we don't need the inode update
+ * to be journalled - if we crash, the bi_journal_seq update will be
+ * lost, but that's fine.
+ */
+ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
+ int ret;
+
+ k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
+ SPOS(0,
+ extent_iter->pos.inode,
+ extent_iter->snapshot),
+ BTREE_ITER_CACHED);
+ ret = PTR_ERR_OR_ZERO(k);
+ if (unlikely(ret))
+ return ret;
+
+ if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
+ k = bch2_inode_to_v3(trans, k);
+ ret = PTR_ERR_OR_ZERO(k);
+ if (unlikely(ret))
+ goto err;
+ }
+
+ inode = bkey_i_to_inode_v3(k);
+
+ if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
+ new_i_size > le64_to_cpu(inode->v.bi_size)) {
+ inode->v.bi_size = cpu_to_le64(new_i_size);
+ inode_update_flags = 0;
+ }
+
+ if (i_sectors_delta) {
+ le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta);
+ inode_update_flags = 0;
+ }
+
+ if (inode->k.p.snapshot != iter.snapshot) {
+ inode->k.p.snapshot = iter.snapshot;
+ inode_update_flags = 0;
+ }
+
+ ret = bch2_trans_update(trans, &iter, &inode->k_i,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ inode_update_flags);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_extent_update(struct btree_trans *trans,
+ subvol_inum inum,
+ struct btree_iter *iter,
+ struct bkey_i *k,
+ struct disk_reservation *disk_res,
+ u64 new_i_size,
+ s64 *i_sectors_delta_total,
+ bool check_enospc)
+{
+ struct bpos next_pos;
+ bool usage_increasing;
+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
+ int ret;
+
+ /*
+ * This traverses us the iterator without changing iter->path->pos to
+ * search_key() (which is pos + 1 for extents): we want there to be a
+ * path already traversed at iter->pos because
+ * bch2_trans_extent_update() will use it to attempt extent merging
+ */
+ ret = __bch2_btree_iter_traverse(iter);
+ if (ret)
+ return ret;
+
+ ret = bch2_extent_trim_atomic(trans, iter, k);
+ if (ret)
+ return ret;
+
+ next_pos = k->k.p;
+
+ ret = bch2_sum_sector_overwrites(trans, iter, k,
+ &usage_increasing,
+ &i_sectors_delta,
+ &disk_sectors_delta);
+ if (ret)
+ return ret;
+
+ if (disk_res &&
+ disk_sectors_delta > (s64) disk_res->sectors) {
+ ret = bch2_disk_reservation_add(trans->c, disk_res,
+ disk_sectors_delta - disk_res->sectors,
+ !check_enospc || !usage_increasing
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Note:
+ * We always have to do an inode update - even when i_size/i_sectors
+ * aren't changing - for fsync to work properly; fsync relies on
+ * inode->bi_journal_seq which is updated by the trigger code:
+ */
+ ret = bch2_extent_update_i_size_sectors(trans, iter,
+ min(k->k.p.offset << 9, new_i_size),
+ i_sectors_delta) ?:
+ bch2_trans_update(trans, iter, k, 0) ?:
+ bch2_trans_commit(trans, disk_res, NULL,
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc);
+ if (unlikely(ret))
+ return ret;
+
+ if (i_sectors_delta_total)
+ *i_sectors_delta_total += i_sectors_delta;
+ bch2_btree_iter_set_pos(iter, next_pos);
+ return 0;
+}
+
+static int bch2_write_index_default(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct bkey_buf sk;
+ struct keylist *keys = &op->insert_keys;
+ struct bkey_i *k = bch2_keylist_front(keys);
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ subvol_inum inum = {
+ .subvol = op->subvol,
+ .inum = k->k.p.inode,
+ };
+ int ret;
+
+ BUG_ON(!inum.subvol);
+
+ bch2_bkey_buf_init(&sk);
+
+ do {
+ bch2_trans_begin(trans);
+
+ k = bch2_keylist_front(keys);
+ bch2_bkey_buf_copy(&sk, c, k);
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol,
+ &sk.k->k.p.snapshot);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ bkey_start_pos(&sk.k->k),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ ret = bch2_bkey_set_needs_rebalance(c, sk.k,
+ op->opts.background_target,
+ op->opts.background_compression) ?:
+ bch2_extent_update(trans, inum, &iter, sk.k,
+ &op->res,
+ op->new_i_size, &op->i_sectors_delta,
+ op->flags & BCH_WRITE_CHECK_ENOSPC);
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ if (bkey_ge(iter.pos, k->k.p))
+ bch2_keylist_pop_front(&op->insert_keys);
+ else
+ bch2_cut_front(iter.pos, k);
+ } while (!bch2_keylist_empty(keys));
+
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
+
+ return ret;
+}
+
+/* Writes */
+
+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
+ enum bch_data_type type,
+ const struct bkey_i *k,
+ bool nocow)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
+ struct bch_write_bio *n;
+
+ BUG_ON(c->opts.nochanges);
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ BUG_ON(!bch2_dev_exists2(c, ptr->dev));
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ if (to_entry(ptr + 1) < ptrs.end) {
+ n = to_wbio(bio_alloc_clone(NULL, &wbio->bio,
+ GFP_NOFS, &ca->replica_set));
+
+ n->bio.bi_end_io = wbio->bio.bi_end_io;
+ n->bio.bi_private = wbio->bio.bi_private;
+ n->parent = wbio;
+ n->split = true;
+ n->bounce = false;
+ n->put_bio = true;
+ n->bio.bi_opf = wbio->bio.bi_opf;
+ bio_inc_remaining(&wbio->bio);
+ } else {
+ n = wbio;
+ n->split = false;
+ }
+
+ n->c = c;
+ n->dev = ptr->dev;
+ n->have_ioref = nocow || bch2_dev_get_ioref(ca,
+ type == BCH_DATA_btree ? READ : WRITE);
+ n->nocow = nocow;
+ n->submit_time = local_clock();
+ n->inode_offset = bkey_start_offset(&k->k);
+ n->bio.bi_iter.bi_sector = ptr->offset;
+
+ if (likely(n->have_ioref)) {
+ this_cpu_add(ca->io_done->sectors[WRITE][type],
+ bio_sectors(&n->bio));
+
+ bio_set_dev(&n->bio, ca->disk_sb.bdev);
+
+ if (type != BCH_DATA_btree && unlikely(c->opts.no_data_io)) {
+ bio_endio(&n->bio);
+ continue;
+ }
+
+ submit_bio(&n->bio);
+ } else {
+ n->bio.bi_status = BLK_STS_REMOVED;
+ bio_endio(&n->bio);
+ }
+ }
+}
+
+static void __bch2_write(struct bch_write_op *);
+
+static void bch2_write_done(struct closure *cl)
+{
+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+ struct bch_fs *c = op->c;
+
+ EBUG_ON(op->open_buckets.nr);
+
+ bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
+ bch2_disk_reservation_put(c, &op->res);
+
+ if (!(op->flags & BCH_WRITE_MOVE))
+ bch2_write_ref_put(c, BCH_WRITE_REF_write);
+ bch2_keylist_free(&op->insert_keys, op->inline_keys);
+
+ EBUG_ON(cl->parent);
+ closure_debug_destroy(cl);
+ if (op->end_io)
+ op->end_io(op);
+}
+
+static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
+{
+ struct keylist *keys = &op->insert_keys;
+ struct bch_extent_ptr *ptr;
+ struct bkey_i *src, *dst = keys->keys, *n;
+
+ for (src = keys->keys; src != keys->top; src = n) {
+ n = bkey_next(src);
+
+ if (bkey_extent_is_direct_data(&src->k)) {
+ bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
+ test_bit(ptr->dev, op->failed.d));
+
+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src)))
+ return -EIO;
+ }
+
+ if (dst != src)
+ memmove_u64s_down(dst, src, src->k.u64s);
+ dst = bkey_next(dst);
+ }
+
+ keys->top = dst;
+ return 0;
+}
+
+/**
+ * __bch2_write_index - after a write, update index to point to new data
+ * @op: bch_write_op to process
+ */
+static void __bch2_write_index(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct keylist *keys = &op->insert_keys;
+ unsigned dev;
+ int ret = 0;
+
+ if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
+ ret = bch2_write_drop_io_error_ptrs(op);
+ if (ret)
+ goto err;
+ }
+
+ if (!bch2_keylist_empty(keys)) {
+ u64 sectors_start = keylist_sectors(keys);
+
+ ret = !(op->flags & BCH_WRITE_MOVE)
+ ? bch2_write_index_default(op)
+ : bch2_data_update_index_update(op);
+
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
+ BUG_ON(keylist_sectors(keys) && !ret);
+
+ op->written += sectors_start - keylist_sectors(keys);
+
+ if (ret && !bch2_err_matches(ret, EROFS)) {
+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
+
+ bch_err_inum_offset_ratelimited(c,
+ insert->k.p.inode, insert->k.p.offset << 9,
+ "write error while doing btree update: %s",
+ bch2_err_str(ret));
+ }
+
+ if (ret)
+ goto err;
+ }
+out:
+ /* If some a bucket wasn't written, we can't erasure code it: */
+ for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX)
+ bch2_open_bucket_write_error(c, &op->open_buckets, dev);
+
+ bch2_open_buckets_put(c, &op->open_buckets);
+ return;
+err:
+ keys->top = keys->keys;
+ op->error = ret;
+ op->flags |= BCH_WRITE_DONE;
+ goto out;
+}
+
+static inline void __wp_update_state(struct write_point *wp, enum write_point_state state)
+{
+ if (state != wp->state) {
+ u64 now = ktime_get_ns();
+
+ if (wp->last_state_change &&
+ time_after64(now, wp->last_state_change))
+ wp->time[wp->state] += now - wp->last_state_change;
+ wp->state = state;
+ wp->last_state_change = now;
+ }
+}
+
+static inline void wp_update_state(struct write_point *wp, bool running)
+{
+ enum write_point_state state;
+
+ state = running ? WRITE_POINT_running :
+ !list_empty(&wp->writes) ? WRITE_POINT_waiting_io
+ : WRITE_POINT_stopped;
+
+ __wp_update_state(wp, state);
+}
+
+static CLOSURE_CALLBACK(bch2_write_index)
+{
+ closure_type(op, struct bch_write_op, cl);
+ struct write_point *wp = op->wp;
+ struct workqueue_struct *wq = index_update_wq(op);
+ unsigned long flags;
+
+ if ((op->flags & BCH_WRITE_DONE) &&
+ (op->flags & BCH_WRITE_MOVE))
+ bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
+
+ spin_lock_irqsave(&wp->writes_lock, flags);
+ if (wp->state == WRITE_POINT_waiting_io)
+ __wp_update_state(wp, WRITE_POINT_waiting_work);
+ list_add_tail(&op->wp_list, &wp->writes);
+ spin_unlock_irqrestore (&wp->writes_lock, flags);
+
+ queue_work(wq, &wp->index_update_work);
+}
+
+static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
+{
+ op->wp = wp;
+
+ if (wp->state == WRITE_POINT_stopped) {
+ spin_lock_irq(&wp->writes_lock);
+ __wp_update_state(wp, WRITE_POINT_waiting_io);
+ spin_unlock_irq(&wp->writes_lock);
+ }
+}
+
+void bch2_write_point_do_index_updates(struct work_struct *work)
+{
+ struct write_point *wp =
+ container_of(work, struct write_point, index_update_work);
+ struct bch_write_op *op;
+
+ while (1) {
+ spin_lock_irq(&wp->writes_lock);
+ op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
+ if (op)
+ list_del(&op->wp_list);
+ wp_update_state(wp, op != NULL);
+ spin_unlock_irq(&wp->writes_lock);
+
+ if (!op)
+ break;
+
+ op->flags |= BCH_WRITE_IN_WORKER;
+
+ __bch2_write_index(op);
+
+ if (!(op->flags & BCH_WRITE_DONE))
+ __bch2_write(op);
+ else
+ bch2_write_done(&op->cl);
+ }
+}
+
+static void bch2_write_endio(struct bio *bio)
+{
+ struct closure *cl = bio->bi_private;
+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
+ struct bch_write_bio *wbio = to_wbio(bio);
+ struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
+ struct bch_fs *c = wbio->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev);
+
+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+ op->pos.inode,
+ wbio->inode_offset << 9,
+ "data write error: %s",
+ bch2_blk_status_to_str(bio->bi_status))) {
+ set_bit(wbio->dev, op->failed.d);
+ op->flags |= BCH_WRITE_IO_ERROR;
+ }
+
+ if (wbio->nocow)
+ set_bit(wbio->dev, op->devs_need_flush->d);
+
+ if (wbio->have_ioref) {
+ bch2_latency_acct(ca, wbio->submit_time, WRITE);
+ percpu_ref_put(&ca->io_ref);
+ }
+
+ if (wbio->bounce)
+ bch2_bio_free_pages_pool(c, bio);
+
+ if (wbio->put_bio)
+ bio_put(bio);
+
+ if (parent)
+ bio_endio(&parent->bio);
+ else
+ closure_put(cl);
+}
+
+static void init_append_extent(struct bch_write_op *op,
+ struct write_point *wp,
+ struct bversion version,
+ struct bch_extent_crc_unpacked crc)
+{
+ struct bkey_i_extent *e;
+
+ op->pos.offset += crc.uncompressed_size;
+
+ e = bkey_extent_init(op->insert_keys.top);
+ e->k.p = op->pos;
+ e->k.size = crc.uncompressed_size;
+ e->k.version = version;
+
+ if (crc.csum_type ||
+ crc.compression_type ||
+ crc.nonce)
+ bch2_extent_crc_append(&e->k_i, crc);
+
+ bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size,
+ op->flags & BCH_WRITE_CACHED);
+
+ bch2_keylist_push(&op->insert_keys);
+}
+
+static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
+ struct write_point *wp,
+ struct bio *src,
+ bool *page_alloc_failed,
+ void *buf)
+{
+ struct bch_write_bio *wbio;
+ struct bio *bio;
+ unsigned output_available =
+ min(wp->sectors_free << 9, src->bi_iter.bi_size);
+ unsigned pages = DIV_ROUND_UP(output_available +
+ (buf
+ ? ((unsigned long) buf & (PAGE_SIZE - 1))
+ : 0), PAGE_SIZE);
+
+ pages = min(pages, BIO_MAX_VECS);
+
+ bio = bio_alloc_bioset(NULL, pages, 0,
+ GFP_NOFS, &c->bio_write);
+ wbio = wbio_init(bio);
+ wbio->put_bio = true;
+ /* copy WRITE_SYNC flag */
+ wbio->bio.bi_opf = src->bi_opf;
+
+ if (buf) {
+ bch2_bio_map(bio, buf, output_available);
+ return bio;
+ }
+
+ wbio->bounce = true;
+
+ /*
+ * We can't use mempool for more than c->sb.encoded_extent_max
+ * worth of pages, but we'd like to allocate more if we can:
+ */
+ bch2_bio_alloc_pages_pool(c, bio,
+ min_t(unsigned, output_available,
+ c->opts.encoded_extent_max));
+
+ if (bio->bi_iter.bi_size < output_available)
+ *page_alloc_failed =
+ bch2_bio_alloc_pages(bio,
+ output_available -
+ bio->bi_iter.bi_size,
+ GFP_NOFS) != 0;
+
+ return bio;
+}
+
+static int bch2_write_rechecksum(struct bch_fs *c,
+ struct bch_write_op *op,
+ unsigned new_csum_type)
+{
+ struct bio *bio = &op->wbio.bio;
+ struct bch_extent_crc_unpacked new_crc;
+ int ret;
+
+ /* bch2_rechecksum_bio() can't encrypt or decrypt data: */
+
+ if (bch2_csum_type_is_encryption(op->crc.csum_type) !=
+ bch2_csum_type_is_encryption(new_csum_type))
+ new_csum_type = op->crc.csum_type;
+
+ ret = bch2_rechecksum_bio(c, bio, op->version, op->crc,
+ NULL, &new_crc,
+ op->crc.offset, op->crc.live_size,
+ new_csum_type);
+ if (ret)
+ return ret;
+
+ bio_advance(bio, op->crc.offset << 9);
+ bio->bi_iter.bi_size = op->crc.live_size << 9;
+ op->crc = new_crc;
+ return 0;
+}
+
+static int bch2_write_decrypt(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct nonce nonce = extent_nonce(op->version, op->crc);
+ struct bch_csum csum;
+ int ret;
+
+ if (!bch2_csum_type_is_encryption(op->crc.csum_type))
+ return 0;
+
+ /*
+ * If we need to decrypt data in the write path, we'll no longer be able
+ * to verify the existing checksum (poly1305 mac, in this case) after
+ * it's decrypted - this is the last point we'll be able to reverify the
+ * checksum:
+ */
+ csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
+ return -EIO;
+
+ ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
+ op->crc.csum_type = 0;
+ op->crc.csum = (struct bch_csum) { 0, 0 };
+ return ret;
+}
+
+static enum prep_encoded_ret {
+ PREP_ENCODED_OK,
+ PREP_ENCODED_ERR,
+ PREP_ENCODED_CHECKSUM_ERR,
+ PREP_ENCODED_DO_WRITE,
+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp)
+{
+ struct bch_fs *c = op->c;
+ struct bio *bio = &op->wbio.bio;
+
+ if (!(op->flags & BCH_WRITE_DATA_ENCODED))
+ return PREP_ENCODED_OK;
+
+ BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
+
+ /* Can we just write the entire extent as is? */
+ if (op->crc.uncompressed_size == op->crc.live_size &&
+ op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 &&
+ op->crc.compressed_size <= wp->sectors_free &&
+ (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
+ op->incompressible)) {
+ if (!crc_is_compressed(op->crc) &&
+ op->csum_type != op->crc.csum_type &&
+ bch2_write_rechecksum(c, op, op->csum_type) &&
+ !c->opts.no_data_io)
+ return PREP_ENCODED_CHECKSUM_ERR;
+
+ return PREP_ENCODED_DO_WRITE;
+ }
+
+ /*
+ * If the data is compressed and we couldn't write the entire extent as
+ * is, we have to decompress it:
+ */
+ if (crc_is_compressed(op->crc)) {
+ struct bch_csum csum;
+
+ if (bch2_write_decrypt(op))
+ return PREP_ENCODED_CHECKSUM_ERR;
+
+ /* Last point we can still verify checksum: */
+ csum = bch2_checksum_bio(c, op->crc.csum_type,
+ extent_nonce(op->version, op->crc),
+ bio);
+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
+ return PREP_ENCODED_CHECKSUM_ERR;
+
+ if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
+ return PREP_ENCODED_ERR;
+ }
+
+ /*
+ * No longer have compressed data after this point - data might be
+ * encrypted:
+ */
+
+ /*
+ * If the data is checksummed and we're only writing a subset,
+ * rechecksum and adjust bio to point to currently live data:
+ */
+ if ((op->crc.live_size != op->crc.uncompressed_size ||
+ op->crc.csum_type != op->csum_type) &&
+ bch2_write_rechecksum(c, op, op->csum_type) &&
+ !c->opts.no_data_io)
+ return PREP_ENCODED_CHECKSUM_ERR;
+
+ /*
+ * If we want to compress the data, it has to be decrypted:
+ */
+ if ((op->compression_opt ||
+ bch2_csum_type_is_encryption(op->crc.csum_type) !=
+ bch2_csum_type_is_encryption(op->csum_type)) &&
+ bch2_write_decrypt(op))
+ return PREP_ENCODED_CHECKSUM_ERR;
+
+ return PREP_ENCODED_OK;
+}
+
+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
+ struct bio **_dst)
+{
+ struct bch_fs *c = op->c;
+ struct bio *src = &op->wbio.bio, *dst = src;
+ struct bvec_iter saved_iter;
+ void *ec_buf;
+ unsigned total_output = 0, total_input = 0;
+ bool bounce = false;
+ bool page_alloc_failed = false;
+ int ret, more = 0;
+
+ BUG_ON(!bio_sectors(src));
+
+ ec_buf = bch2_writepoint_ec_buf(c, wp);
+
+ switch (bch2_write_prep_encoded_data(op, wp)) {
+ case PREP_ENCODED_OK:
+ break;
+ case PREP_ENCODED_ERR:
+ ret = -EIO;
+ goto err;
+ case PREP_ENCODED_CHECKSUM_ERR:
+ goto csum_err;
+ case PREP_ENCODED_DO_WRITE:
+ /* XXX look for bug here */
+ if (ec_buf) {
+ dst = bch2_write_bio_alloc(c, wp, src,
+ &page_alloc_failed,
+ ec_buf);
+ bio_copy_data(dst, src);
+ bounce = true;
+ }
+ init_append_extent(op, wp, op->version, op->crc);
+ goto do_write;
+ }
+
+ if (ec_buf ||
+ op->compression_opt ||
+ (op->csum_type &&
+ !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
+ (bch2_csum_type_is_encryption(op->csum_type) &&
+ !(op->flags & BCH_WRITE_PAGES_OWNED))) {
+ dst = bch2_write_bio_alloc(c, wp, src,
+ &page_alloc_failed,
+ ec_buf);
+ bounce = true;
+ }
+
+ saved_iter = dst->bi_iter;
+
+ do {
+ struct bch_extent_crc_unpacked crc = { 0 };
+ struct bversion version = op->version;
+ size_t dst_len = 0, src_len = 0;
+
+ if (page_alloc_failed &&
+ dst->bi_iter.bi_size < (wp->sectors_free << 9) &&
+ dst->bi_iter.bi_size < c->opts.encoded_extent_max)
+ break;
+
+ BUG_ON(op->compression_opt &&
+ (op->flags & BCH_WRITE_DATA_ENCODED) &&
+ bch2_csum_type_is_encryption(op->crc.csum_type));
+ BUG_ON(op->compression_opt && !bounce);
+
+ crc.compression_type = op->incompressible
+ ? BCH_COMPRESSION_TYPE_incompressible
+ : op->compression_opt
+ ? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
+ op->compression_opt)
+ : 0;
+ if (!crc_is_compressed(crc)) {
+ dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
+ dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9);
+
+ if (op->csum_type)
+ dst_len = min_t(unsigned, dst_len,
+ c->opts.encoded_extent_max);
+
+ if (bounce) {
+ swap(dst->bi_iter.bi_size, dst_len);
+ bio_copy_data(dst, src);
+ swap(dst->bi_iter.bi_size, dst_len);
+ }
+
+ src_len = dst_len;
+ }
+
+ BUG_ON(!src_len || !dst_len);
+
+ if (bch2_csum_type_is_encryption(op->csum_type)) {
+ if (bversion_zero(version)) {
+ version.lo = atomic64_inc_return(&c->key_version);
+ } else {
+ crc.nonce = op->nonce;
+ op->nonce += src_len >> 9;
+ }
+ }
+
+ if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
+ !crc_is_compressed(crc) &&
+ bch2_csum_type_is_encryption(op->crc.csum_type) ==
+ bch2_csum_type_is_encryption(op->csum_type)) {
+ u8 compression_type = crc.compression_type;
+ u16 nonce = crc.nonce;
+ /*
+ * Note: when we're using rechecksum(), we need to be
+ * checksumming @src because it has all the data our
+ * existing checksum covers - if we bounced (because we
+ * were trying to compress), @dst will only have the
+ * part of the data the new checksum will cover.
+ *
+ * But normally we want to be checksumming post bounce,
+ * because part of the reason for bouncing is so the
+ * data can't be modified (by userspace) while it's in
+ * flight.
+ */
+ if (bch2_rechecksum_bio(c, src, version, op->crc,
+ &crc, &op->crc,
+ src_len >> 9,
+ bio_sectors(src) - (src_len >> 9),
+ op->csum_type))
+ goto csum_err;
+ /*
+ * rchecksum_bio sets compression_type on crc from op->crc,
+ * this isn't always correct as sometimes we're changing
+ * an extent from uncompressed to incompressible.
+ */
+ crc.compression_type = compression_type;
+ crc.nonce = nonce;
+ } else {
+ if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
+ bch2_rechecksum_bio(c, src, version, op->crc,
+ NULL, &op->crc,
+ src_len >> 9,
+ bio_sectors(src) - (src_len >> 9),
+ op->crc.csum_type))
+ goto csum_err;
+
+ crc.compressed_size = dst_len >> 9;
+ crc.uncompressed_size = src_len >> 9;
+ crc.live_size = src_len >> 9;
+
+ swap(dst->bi_iter.bi_size, dst_len);
+ ret = bch2_encrypt_bio(c, op->csum_type,
+ extent_nonce(version, crc), dst);
+ if (ret)
+ goto err;
+
+ crc.csum = bch2_checksum_bio(c, op->csum_type,
+ extent_nonce(version, crc), dst);
+ crc.csum_type = op->csum_type;
+ swap(dst->bi_iter.bi_size, dst_len);
+ }
+
+ init_append_extent(op, wp, version, crc);
+
+ if (dst != src)
+ bio_advance(dst, dst_len);
+ bio_advance(src, src_len);
+ total_output += dst_len;
+ total_input += src_len;
+ } while (dst->bi_iter.bi_size &&
+ src->bi_iter.bi_size &&
+ wp->sectors_free &&
+ !bch2_keylist_realloc(&op->insert_keys,
+ op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ BKEY_EXTENT_U64s_MAX));
+
+ more = src->bi_iter.bi_size != 0;
+
+ dst->bi_iter = saved_iter;
+
+ if (dst == src && more) {
+ BUG_ON(total_output != total_input);
+
+ dst = bio_split(src, total_input >> 9,
+ GFP_NOFS, &c->bio_write);
+ wbio_init(dst)->put_bio = true;
+ /* copy WRITE_SYNC flag */
+ dst->bi_opf = src->bi_opf;
+ }
+
+ dst->bi_iter.bi_size = total_output;
+do_write:
+ *_dst = dst;
+ return more;
+csum_err:
+ bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)");
+ ret = -EIO;
+err:
+ if (to_wbio(dst)->bounce)
+ bch2_bio_free_pages_pool(c, dst);
+ if (to_wbio(dst)->put_bio)
+ bio_put(dst);
+
+ return ret;
+}
+
+static bool bch2_extent_is_writeable(struct bch_write_op *op,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = op->c;
+ struct bkey_s_c_extent e;
+ struct extent_ptr_decoded p;
+ const union bch_extent_entry *entry;
+ unsigned replicas = 0;
+
+ if (k.k->type != KEY_TYPE_extent)
+ return false;
+
+ e = bkey_s_c_to_extent(k);
+ extent_for_each_ptr_decode(e, p, entry) {
+ if (crc_is_encoded(p.crc) || p.has_ec)
+ return false;
+
+ replicas += bch2_extent_ptr_durability(c, &p);
+ }
+
+ return replicas >= op->opts.data_replicas;
+}
+
+static inline void bch2_nocow_write_unlock(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+
+ for_each_keylist_key(&op->insert_keys, k) {
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
+
+ bkey_for_each_ptr(ptrs, ptr)
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, ptr),
+ BUCKET_NOCOW_LOCK_UPDATE);
+ }
+}
+
+static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *orig,
+ struct bkey_s_c k,
+ u64 new_i_size)
+{
+ if (!bch2_extents_match(bkey_i_to_s_c(orig), k)) {
+ /* trace this */
+ return 0;
+ }
+
+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
+ int ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ return ret;
+
+ bch2_cut_front(bkey_start_pos(&orig->k), new);
+ bch2_cut_back(orig->k.p, new);
+
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_for_each_ptr(ptrs, ptr)
+ ptr->unwritten = 0;
+
+ /*
+ * Note that we're not calling bch2_subvol_get_snapshot() in this path -
+ * that was done when we kicked off the write, and here it's important
+ * that we update the extent that we wrote to - even if a snapshot has
+ * since been created. The write is still outstanding, so we're ok
+ * w.r.t. snapshot atomicity:
+ */
+ return bch2_extent_update_i_size_sectors(trans, iter,
+ min(new->k.p.offset << 9, new_i_size), 0) ?:
+ bch2_trans_update(trans, iter, new,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+}
+
+static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ for_each_keylist_key(&op->insert_keys, orig) {
+ int ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents,
+ bkey_start_pos(&orig->k), orig->k.p,
+ BTREE_ITER_INTENT, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size);
+ }));
+
+ if (ret && !bch2_err_matches(ret, EROFS)) {
+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
+
+ bch_err_inum_offset_ratelimited(c,
+ insert->k.p.inode, insert->k.p.offset << 9,
+ "write error while doing btree update: %s",
+ bch2_err_str(ret));
+ }
+
+ if (ret) {
+ op->error = ret;
+ break;
+ }
+ }
+
+ bch2_trans_put(trans);
+}
+
+static void __bch2_nocow_write_done(struct bch_write_op *op)
+{
+ bch2_nocow_write_unlock(op);
+
+ if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) {
+ op->error = -EIO;
+ } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN))
+ bch2_nocow_write_convert_unwritten(op);
+}
+
+static CLOSURE_CALLBACK(bch2_nocow_write_done)
+{
+ closure_type(op, struct bch_write_op, cl);
+
+ __bch2_nocow_write_done(op);
+ bch2_write_done(cl);
+}
+
+struct bucket_to_lock {
+ struct bpos b;
+ unsigned gen;
+ struct nocow_lock_bucket *l;
+};
+
+static void bch2_nocow_write(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
+ u32 snapshot;
+ struct bucket_to_lock *stale_at;
+ int ret;
+
+ if (op->flags & BCH_WRITE_MOVE)
+ return;
+
+ darray_init(&buckets);
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+
+ ret = bch2_subvolume_get_snapshot(trans, op->subvol, &snapshot);
+ if (unlikely(ret))
+ goto err;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(op->pos.inode, op->pos.offset, snapshot),
+ BTREE_ITER_SLOTS);
+ while (1) {
+ struct bio *bio = &op->wbio.bio;
+
+ buckets.nr = 0;
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ /* fall back to normal cow write path? */
+ if (unlikely(k.k->p.snapshot != snapshot ||
+ !bch2_extent_is_writeable(op, k)))
+ break;
+
+ if (bch2_keylist_realloc(&op->insert_keys,
+ op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ k.k->u64s))
+ break;
+
+ /* Get iorefs before dropping btree locks: */
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bpos b = PTR_BUCKET_POS(c, ptr);
+ struct nocow_lock_bucket *l =
+ bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
+ prefetch(l);
+
+ if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
+ goto err_get_ioref;
+
+ /* XXX allocating memory with btree locks held - rare */
+ darray_push_gfp(&buckets, ((struct bucket_to_lock) {
+ .b = b, .gen = ptr->gen, .l = l,
+ }), GFP_KERNEL|__GFP_NOFAIL);
+
+ if (ptr->unwritten)
+ op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
+ }
+
+ /* Unlock before taking nocow locks, doing IO: */
+ bkey_reassemble(op->insert_keys.top, k);
+ bch2_trans_unlock(trans);
+
+ bch2_cut_front(op->pos, op->insert_keys.top);
+ if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
+ bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
+
+ darray_for_each(buckets, i) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);
+
+ __bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
+ bucket_to_u64(i->b),
+ BUCKET_NOCOW_LOCK_UPDATE);
+
+ rcu_read_lock();
+ bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
+ rcu_read_unlock();
+
+ if (unlikely(stale)) {
+ stale_at = i;
+ goto err_bucket_stale;
+ }
+ }
+
+ bio = &op->wbio.bio;
+ if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) {
+ bio = bio_split(bio, k.k->p.offset - op->pos.offset,
+ GFP_KERNEL, &c->bio_write);
+ wbio_init(bio)->put_bio = true;
+ bio->bi_opf = op->wbio.bio.bi_opf;
+ } else {
+ op->flags |= BCH_WRITE_DONE;
+ }
+
+ op->pos.offset += bio_sectors(bio);
+ op->written += bio_sectors(bio);
+
+ bio->bi_end_io = bch2_write_endio;
+ bio->bi_private = &op->cl;
+ bio->bi_opf |= REQ_OP_WRITE;
+ closure_get(&op->cl);
+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
+ op->insert_keys.top, true);
+
+ bch2_keylist_push(&op->insert_keys);
+ if (op->flags & BCH_WRITE_DONE)
+ break;
+ bch2_btree_iter_advance(&iter);
+ }
+out:
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ if (ret) {
+ bch_err_inum_offset_ratelimited(c,
+ op->pos.inode, op->pos.offset << 9,
+ "%s: btree lookup error %s", __func__, bch2_err_str(ret));
+ op->error = ret;
+ op->flags |= BCH_WRITE_DONE;
+ }
+
+ bch2_trans_put(trans);
+ darray_exit(&buckets);
+
+ /* fallback to cow write path? */
+ if (!(op->flags & BCH_WRITE_DONE)) {
+ closure_sync(&op->cl);
+ __bch2_nocow_write_done(op);
+ op->insert_keys.top = op->insert_keys.keys;
+ } else if (op->flags & BCH_WRITE_SYNC) {
+ closure_sync(&op->cl);
+ bch2_nocow_write_done(&op->cl.work);
+ } else {
+ /*
+ * XXX
+ * needs to run out of process context because ei_quota_lock is
+ * a mutex
+ */
+ continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op));
+ }
+ return;
+err_get_ioref:
+ darray_for_each(buckets, i)
+ percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);
+
+ /* Fall back to COW path: */
+ goto out;
+err_bucket_stale:
+ darray_for_each(buckets, i) {
+ bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE);
+ if (i == stale_at)
+ break;
+ }
+
+ /* We can retry this: */
+ ret = -BCH_ERR_transaction_restart;
+ goto err_get_ioref;
+}
+
+static void __bch2_write(struct bch_write_op *op)
+{
+ struct bch_fs *c = op->c;
+ struct write_point *wp = NULL;
+ struct bio *bio = NULL;
+ unsigned nofs_flags;
+ int ret;
+
+ nofs_flags = memalloc_nofs_save();
+
+ if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
+ bch2_nocow_write(op);
+ if (op->flags & BCH_WRITE_DONE)
+ goto out_nofs_restore;
+ }
+again:
+ memset(&op->failed, 0, sizeof(op->failed));
+
+ do {
+ struct bkey_i *key_to_write;
+ unsigned key_to_write_offset = op->insert_keys.top_p -
+ op->insert_keys.keys_p;
+
+ /* +1 for possible cache device: */
+ if (op->open_buckets.nr + op->nr_replicas + 1 >
+ ARRAY_SIZE(op->open_buckets.v))
+ break;
+
+ if (bch2_keylist_realloc(&op->insert_keys,
+ op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ BKEY_EXTENT_U64s_MAX))
+ break;
+
+ /*
+ * The copygc thread is now global, which means it's no longer
+ * freeing up space on specific disks, which means that
+ * allocations for specific disks may hang arbitrarily long:
+ */
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_alloc_sectors_start_trans(trans,
+ op->target,
+ op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
+ op->write_point,
+ &op->devs_have,
+ op->nr_replicas,
+ op->nr_replicas_required,
+ op->watermark,
+ op->flags,
+ (op->flags & (BCH_WRITE_ALLOC_NOWAIT|
+ BCH_WRITE_ONLY_SPECIFIED_DEVS))
+ ? NULL : &op->cl, &wp));
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ break;
+
+ goto err;
+ }
+
+ EBUG_ON(!wp);
+
+ bch2_open_bucket_get(c, wp, &op->open_buckets);
+ ret = bch2_write_extent(op, wp, &bio);
+
+ bch2_alloc_sectors_done_inlined(c, wp);
+err:
+ if (ret <= 0) {
+ op->flags |= BCH_WRITE_DONE;
+
+ if (ret < 0) {
+ bch_err_inum_offset_ratelimited(c,
+ op->pos.inode,
+ op->pos.offset << 9,
+ "%s(): error: %s", __func__, bch2_err_str(ret));
+ op->error = ret;
+ break;
+ }
+ }
+
+ bio->bi_end_io = bch2_write_endio;
+ bio->bi_private = &op->cl;
+ bio->bi_opf |= REQ_OP_WRITE;
+
+ closure_get(bio->bi_private);
+
+ key_to_write = (void *) (op->insert_keys.keys_p +
+ key_to_write_offset);
+
+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
+ key_to_write, false);
+ } while (ret);
+
+ /*
+ * Sync or no?
+ *
+ * If we're running asynchronously, wne may still want to block
+ * synchronously here if we weren't able to submit all of the IO at
+ * once, as that signals backpressure to the caller.
+ */
+ if ((op->flags & BCH_WRITE_SYNC) ||
+ (!(op->flags & BCH_WRITE_DONE) &&
+ !(op->flags & BCH_WRITE_IN_WORKER))) {
+ closure_sync(&op->cl);
+ __bch2_write_index(op);
+
+ if (!(op->flags & BCH_WRITE_DONE))
+ goto again;
+ bch2_write_done(&op->cl);
+ } else {
+ bch2_write_queue(op, wp);
+ continue_at(&op->cl, bch2_write_index, NULL);
+ }
+out_nofs_restore:
+ memalloc_nofs_restore(nofs_flags);
+}
+
+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
+{
+ struct bio *bio = &op->wbio.bio;
+ struct bvec_iter iter;
+ struct bkey_i_inline_data *id;
+ unsigned sectors;
+ int ret;
+
+ op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+ op->flags |= BCH_WRITE_DONE;
+
+ bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
+
+ ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ BKEY_U64s + DIV_ROUND_UP(data_len, 8));
+ if (ret) {
+ op->error = ret;
+ goto err;
+ }
+
+ sectors = bio_sectors(bio);
+ op->pos.offset += sectors;
+
+ id = bkey_inline_data_init(op->insert_keys.top);
+ id->k.p = op->pos;
+ id->k.version = op->version;
+ id->k.size = sectors;
+
+ iter = bio->bi_iter;
+ iter.bi_size = data_len;
+ memcpy_from_bio(id->v.data, bio, iter);
+
+ while (data_len & 7)
+ id->v.data[data_len++] = '\0';
+ set_bkey_val_bytes(&id->k, data_len);
+ bch2_keylist_push(&op->insert_keys);
+
+ __bch2_write_index(op);
+err:
+ bch2_write_done(&op->cl);
+}
+
+/**
+ * bch2_write() - handle a write to a cache device or flash only volume
+ * @cl: &bch_write_op->cl
+ *
+ * This is the starting point for any data to end up in a cache device; it could
+ * be from a normal write, or a writeback write, or a write to a flash only
+ * volume - it's also used by the moving garbage collector to compact data in
+ * mostly empty buckets.
+ *
+ * It first writes the data to the cache, creating a list of keys to be inserted
+ * (if the data won't fit in a single open bucket, there will be multiple keys);
+ * after the data is written it calls bch_journal, and after the keys have been
+ * added to the next journal write they're inserted into the btree.
+ *
+ * If op->discard is true, instead of inserting the data it invalidates the
+ * region of the cache represented by op->bio and op->inode.
+ */
+CLOSURE_CALLBACK(bch2_write)
+{
+ closure_type(op, struct bch_write_op, cl);
+ struct bio *bio = &op->wbio.bio;
+ struct bch_fs *c = op->c;
+ unsigned data_len;
+
+ EBUG_ON(op->cl.parent);
+ BUG_ON(!op->nr_replicas);
+ BUG_ON(!op->write_point.v);
+ BUG_ON(bkey_eq(op->pos, POS_MAX));
+
+ op->start_time = local_clock();
+ bch2_keylist_init(&op->insert_keys, op->inline_keys);
+ wbio_init(bio)->put_bio = false;
+
+ if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) {
+ bch_err_inum_offset_ratelimited(c,
+ op->pos.inode,
+ op->pos.offset << 9,
+ "misaligned write");
+ op->error = -EIO;
+ goto err;
+ }
+
+ if (c->opts.nochanges) {
+ op->error = -BCH_ERR_erofs_no_writes;
+ goto err;
+ }
+
+ if (!(op->flags & BCH_WRITE_MOVE) &&
+ !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
+ op->error = -BCH_ERR_erofs_no_writes;
+ goto err;
+ }
+
+ this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio));
+ bch2_increment_clock(c, bio_sectors(bio), WRITE);
+
+ data_len = min_t(u64, bio->bi_iter.bi_size,
+ op->new_i_size - (op->pos.offset << 9));
+
+ if (c->opts.inline_data &&
+ data_len <= min(block_bytes(c) / 2, 1024U)) {
+ bch2_write_data_inline(op, data_len);
+ return;
+ }
+
+ __bch2_write(op);
+ return;
+err:
+ bch2_disk_reservation_put(c, &op->res);
+
+ closure_debug_destroy(&op->cl);
+ if (op->end_io)
+ op->end_io(op);
+}
+
+static const char * const bch2_write_flags[] = {
+#define x(f) #f,
+ BCH_WRITE_FLAGS()
+#undef x
+ NULL
+};
+
+void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
+{
+ prt_str(out, "pos: ");
+ bch2_bpos_to_text(out, op->pos);
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ prt_str(out, "started: ");
+ bch2_pr_time_units(out, local_clock() - op->start_time);
+ prt_newline(out);
+
+ prt_str(out, "flags: ");
+ prt_bitflags(out, bch2_write_flags, op->flags);
+ prt_newline(out);
+
+ prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
+ prt_newline(out);
+
+ printbuf_indent_sub(out, 2);
+}
+
+void bch2_fs_io_write_exit(struct bch_fs *c)
+{
+ mempool_exit(&c->bio_bounce_pages);
+ bioset_exit(&c->bio_write);
+}
+
+int bch2_fs_io_write_init(struct bch_fs *c)
+{
+ if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_write_init;
+
+ if (mempool_init_page_pool(&c->bio_bounce_pages,
+ max_t(unsigned,
+ c->opts.btree_node_size,
+ c->opts.encoded_extent_max) /
+ PAGE_SIZE, 0))
+ return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/io_write.h b/c_src/libbcachefs/io_write.h
new file mode 100644
index 00000000..6c276a48
--- /dev/null
+++ b/c_src/libbcachefs/io_write.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_WRITE_H
+#define _BCACHEFS_IO_WRITE_H
+
+#include "checksum.h"
+#include "io_write_types.h"
+
+#define to_wbio(_bio) \
+ container_of((_bio), struct bch_write_bio, bio)
+
+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+void bch2_latency_acct(struct bch_dev *, u64, int);
+#else
+static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {}
+#endif
+
+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
+ enum bch_data_type, const struct bkey_i *, bool);
+
+#define BCH_WRITE_FLAGS() \
+ x(ALLOC_NOWAIT) \
+ x(CACHED) \
+ x(DATA_ENCODED) \
+ x(PAGES_STABLE) \
+ x(PAGES_OWNED) \
+ x(ONLY_SPECIFIED_DEVS) \
+ x(WROTE_DATA_INLINE) \
+ x(FROM_INTERNAL) \
+ x(CHECK_ENOSPC) \
+ x(SYNC) \
+ x(MOVE) \
+ x(IN_WORKER) \
+ x(DONE) \
+ x(IO_ERROR) \
+ x(CONVERT_UNWRITTEN)
+
+enum __bch_write_flags {
+#define x(f) __BCH_WRITE_##f,
+ BCH_WRITE_FLAGS()
+#undef x
+};
+
+enum bch_write_flags {
+#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
+ BCH_WRITE_FLAGS()
+#undef x
+};
+
+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
+{
+ return op->watermark == BCH_WATERMARK_copygc
+ ? op->c->copygc_wq
+ : op->c->btree_update_wq;
+}
+
+int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, bool *, s64 *, s64 *);
+int bch2_extent_update(struct btree_trans *, subvol_inum,
+ struct btree_iter *, struct bkey_i *,
+ struct disk_reservation *, u64, s64 *, bool);
+
+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
+ struct bch_io_opts opts)
+{
+ op->c = c;
+ op->end_io = NULL;
+ op->flags = 0;
+ op->written = 0;
+ op->error = 0;
+ op->csum_type = bch2_data_checksum_type(c, opts);
+ op->compression_opt = opts.compression;
+ op->nr_replicas = 0;
+ op->nr_replicas_required = c->opts.data_replicas_required;
+ op->watermark = BCH_WATERMARK_normal;
+ op->incompressible = 0;
+ op->open_buckets.nr = 0;
+ op->devs_have.nr = 0;
+ op->target = 0;
+ op->opts = opts;
+ op->subvol = 0;
+ op->pos = POS_MAX;
+ op->version = ZERO_VERSION;
+ op->write_point = (struct write_point_specifier) { 0 };
+ op->res = (struct disk_reservation) { 0 };
+ op->new_i_size = U64_MAX;
+ op->i_sectors_delta = 0;
+ op->devs_need_flush = NULL;
+}
+
+CLOSURE_CALLBACK(bch2_write);
+void bch2_write_point_do_index_updates(struct work_struct *);
+
+static inline struct bch_write_bio *wbio_init(struct bio *bio)
+{
+ struct bch_write_bio *wbio = to_wbio(bio);
+
+ memset(&wbio->wbio, 0, sizeof(wbio->wbio));
+ return wbio;
+}
+
+void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *);
+
+void bch2_fs_io_write_exit(struct bch_fs *);
+int bch2_fs_io_write_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_IO_WRITE_H */
diff --git a/c_src/libbcachefs/io_write_types.h b/c_src/libbcachefs/io_write_types.h
new file mode 100644
index 00000000..c7f97c2c
--- /dev/null
+++ b/c_src/libbcachefs/io_write_types.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_IO_WRITE_TYPES_H
+#define _BCACHEFS_IO_WRITE_TYPES_H
+
+#include "alloc_types.h"
+#include "btree_types.h"
+#include "buckets_types.h"
+#include "extents_types.h"
+#include "keylist_types.h"
+#include "opts.h"
+#include "super_types.h"
+
+#include <linux/llist.h>
+#include <linux/workqueue.h>
+
+struct bch_write_bio {
+ struct_group(wbio,
+ struct bch_fs *c;
+ struct bch_write_bio *parent;
+
+ u64 submit_time;
+ u64 inode_offset;
+
+ struct bch_devs_list failed;
+ u8 dev;
+
+ unsigned split:1,
+ bounce:1,
+ put_bio:1,
+ have_ioref:1,
+ nocow:1,
+ used_mempool:1,
+ first_btree_write:1;
+ );
+
+ struct bio bio;
+};
+
+struct bch_write_op {
+ struct closure cl;
+ struct bch_fs *c;
+ void (*end_io)(struct bch_write_op *);
+ u64 start_time;
+
+ unsigned written; /* sectors */
+ u16 flags;
+ s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
+
+ unsigned compression_opt:8;
+ unsigned csum_type:4;
+ unsigned nr_replicas:4;
+ unsigned nr_replicas_required:4;
+ unsigned watermark:3;
+ unsigned incompressible:1;
+ unsigned stripe_waited:1;
+
+ struct bch_devs_list devs_have;
+ u16 target;
+ u16 nonce;
+ struct bch_io_opts opts;
+
+ u32 subvol;
+ struct bpos pos;
+ struct bversion version;
+
+ /* For BCH_WRITE_DATA_ENCODED: */
+ struct bch_extent_crc_unpacked crc;
+
+ struct write_point_specifier write_point;
+
+ struct write_point *wp;
+ struct list_head wp_list;
+
+ struct disk_reservation res;
+
+ struct open_buckets open_buckets;
+
+ u64 new_i_size;
+ s64 i_sectors_delta;
+
+ struct bch_devs_mask failed;
+
+ struct keylist insert_keys;
+ u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2];
+
+ /*
+ * Bitmask of devices that have had nocow writes issued to them since
+ * last flush:
+ */
+ struct bch_devs_mask *devs_need_flush;
+
+ /* Must be last: */
+ struct bch_write_bio wbio;
+};
+
+#endif /* _BCACHEFS_IO_WRITE_TYPES_H */
diff --git a/c_src/libbcachefs/journal.c b/c_src/libbcachefs/journal.c
new file mode 100644
index 00000000..8538ef34
--- /dev/null
+++ b/c_src/libbcachefs/journal.c
@@ -0,0 +1,1473 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bcachefs journalling code, for btree insertions
+ *
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_foreground.h"
+#include "bkey_methods.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "journal_sb.h"
+#include "journal_seq_blacklist.h"
+#include "trace.h"
+
+static const char * const bch2_journal_errors[] = {
+#define x(n) #n,
+ JOURNAL_ERRORS()
+#undef x
+ NULL
+};
+
+static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
+{
+ return seq > j->seq_ondisk;
+}
+
+static bool __journal_entry_is_open(union journal_res_state state)
+{
+ return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
+}
+
+static inline unsigned nr_unwritten_journal_entries(struct journal *j)
+{
+ return atomic64_read(&j->seq) - j->seq_ondisk;
+}
+
+static bool journal_entry_is_open(struct journal *j)
+{
+ return __journal_entry_is_open(j->reservations);
+}
+
+static inline struct journal_buf *
+journal_seq_to_buf(struct journal *j, u64 seq)
+{
+ struct journal_buf *buf = NULL;
+
+ EBUG_ON(seq > journal_cur_seq(j));
+
+ if (journal_seq_unwritten(j, seq)) {
+ buf = j->buf + (seq & JOURNAL_BUF_MASK);
+ EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
+ }
+ return buf;
+}
+
+static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(p->list); i++)
+ INIT_LIST_HEAD(&p->list[i]);
+ INIT_LIST_HEAD(&p->flushed);
+ atomic_set(&p->count, count);
+ p->devs.nr = 0;
+}
+
+/*
+ * Detect stuck journal conditions and trigger shutdown. Technically the journal
+ * can end up stuck for a variety of reasons, such as a blocked I/O, journal
+ * reservation lockup, etc. Since this is a fatal error with potentially
+ * unpredictable characteristics, we want to be fairly conservative before we
+ * decide to shut things down.
+ *
+ * Consider the journal stuck when it appears full with no ability to commit
+ * btree transactions, to discard journal buckets, nor acquire priority
+ * (reserved watermark) reservation.
+ */
+static inline bool
+journal_error_check_stuck(struct journal *j, int error, unsigned flags)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ bool stuck = false;
+ struct printbuf buf = PRINTBUF;
+
+ if (!(error == JOURNAL_ERR_journal_full ||
+ error == JOURNAL_ERR_journal_pin_full) ||
+ nr_unwritten_journal_entries(j) ||
+ (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim)
+ return stuck;
+
+ spin_lock(&j->lock);
+
+ if (j->can_discard) {
+ spin_unlock(&j->lock);
+ return stuck;
+ }
+
+ stuck = true;
+
+ /*
+ * The journal shutdown path will set ->err_seq, but do it here first to
+ * serialize against concurrent failures and avoid duplicate error
+ * reports.
+ */
+ if (j->err_seq) {
+ spin_unlock(&j->lock);
+ return stuck;
+ }
+ j->err_seq = journal_cur_seq(j);
+ spin_unlock(&j->lock);
+
+ bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
+ bch2_journal_errors[error]);
+ bch2_journal_debug_to_text(&buf, j);
+ bch_err(c, "%s", buf.buf);
+
+ printbuf_reset(&buf);
+ bch2_journal_pins_to_text(&buf, j);
+ bch_err(c, "Journal pins:\n%s", buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_fatal_error(c);
+ dump_stack();
+
+ return stuck;
+}
+
+/*
+ * Final processing when the last reference of a journal buffer has been
+ * dropped. Drop the pin list reference acquired at journal entry open and write
+ * the buffer, if requested.
+ */
+void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ lockdep_assert_held(&j->lock);
+
+ if (__bch2_journal_pin_put(j, seq))
+ bch2_journal_reclaim_fast(j);
+ if (write)
+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
+}
+
+/*
+ * Returns true if journal entry is now closed:
+ *
+ * We don't close a journal_buf until the next journal_buf is finished writing,
+ * and can be opened again - this also initializes the next journal_buf:
+ */
+static void __journal_entry_close(struct journal *j, unsigned closed_val)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_buf *buf = journal_cur_buf(j);
+ union journal_res_state old, new;
+ u64 v = atomic64_read(&j->reservations.counter);
+ unsigned sectors;
+
+ BUG_ON(closed_val != JOURNAL_ENTRY_CLOSED_VAL &&
+ closed_val != JOURNAL_ENTRY_ERROR_VAL);
+
+ lockdep_assert_held(&j->lock);
+
+ do {
+ old.v = new.v = v;
+ new.cur_entry_offset = closed_val;
+
+ if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL ||
+ old.cur_entry_offset == new.cur_entry_offset)
+ return;
+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+ old.v, new.v)) != old.v);
+
+ if (!__journal_entry_is_open(old))
+ return;
+
+ /* Close out old buffer: */
+ buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
+
+ trace_journal_entry_close(c, vstruct_bytes(buf->data));
+
+ sectors = vstruct_blocks_plus(buf->data, c->block_bits,
+ buf->u64s_reserved) << c->block_bits;
+ BUG_ON(sectors > buf->sectors);
+ buf->sectors = sectors;
+
+ /*
+ * We have to set last_seq here, _before_ opening a new journal entry:
+ *
+ * A threads may replace an old pin with a new pin on their current
+ * journal reservation - the expectation being that the journal will
+ * contain either what the old pin protected or what the new pin
+ * protects.
+ *
+ * After the old pin is dropped journal_last_seq() won't include the old
+ * pin, so we can only write the updated last_seq on the entry that
+ * contains whatever the new pin protects.
+ *
+ * Restated, we can _not_ update last_seq for a given entry if there
+ * could be a newer entry open with reservations/pins that have been
+ * taken against it.
+ *
+ * Hence, we want update/set last_seq on the current journal entry right
+ * before we open a new one:
+ */
+ buf->last_seq = journal_last_seq(j);
+ buf->data->last_seq = cpu_to_le64(buf->last_seq);
+ BUG_ON(buf->last_seq > le64_to_cpu(buf->data->seq));
+
+ cancel_delayed_work(&j->write_work);
+
+ bch2_journal_space_available(j);
+
+ __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq));
+}
+
+void bch2_journal_halt(struct journal *j)
+{
+ spin_lock(&j->lock);
+ __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL);
+ if (!j->err_seq)
+ j->err_seq = journal_cur_seq(j);
+ journal_wake(j);
+ spin_unlock(&j->lock);
+}
+
+static bool journal_entry_want_write(struct journal *j)
+{
+ bool ret = !journal_entry_is_open(j) ||
+ journal_cur_seq(j) == journal_last_unwritten_seq(j);
+
+ /* Don't close it yet if we already have a write in flight: */
+ if (ret)
+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+ else if (nr_unwritten_journal_entries(j)) {
+ struct journal_buf *buf = journal_cur_buf(j);
+
+ if (!buf->flush_time) {
+ buf->flush_time = local_clock() ?: 1;
+ buf->expires = jiffies;
+ }
+ }
+
+ return ret;
+}
+
+bool bch2_journal_entry_close(struct journal *j)
+{
+ bool ret;
+
+ spin_lock(&j->lock);
+ ret = journal_entry_want_write(j);
+ spin_unlock(&j->lock);
+
+ return ret;
+}
+
+/*
+ * should _only_ called from journal_res_get() - when we actually want a
+ * journal reservation - journal entry is open means journal is dirty:
+ */
+static int journal_entry_open(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_buf *buf = j->buf +
+ ((journal_cur_seq(j) + 1) & JOURNAL_BUF_MASK);
+ union journal_res_state old, new;
+ int u64s;
+ u64 v;
+
+ lockdep_assert_held(&j->lock);
+ BUG_ON(journal_entry_is_open(j));
+ BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+
+ if (j->blocked)
+ return JOURNAL_ERR_blocked;
+
+ if (j->cur_entry_error)
+ return j->cur_entry_error;
+
+ if (bch2_journal_error(j))
+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */
+
+ if (!fifo_free(&j->pin))
+ return JOURNAL_ERR_journal_pin_full;
+
+ if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
+ return JOURNAL_ERR_max_in_flight;
+
+ BUG_ON(!j->cur_entry_sectors);
+
+ buf->expires =
+ (journal_cur_seq(j) == j->flushed_seq_ondisk
+ ? jiffies
+ : j->last_flush_write) +
+ msecs_to_jiffies(c->opts.journal_flush_delay);
+
+ buf->u64s_reserved = j->entry_u64s_reserved;
+ buf->disk_sectors = j->cur_entry_sectors;
+ buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9);
+
+ u64s = (int) (buf->sectors << 9) / sizeof(u64) -
+ journal_entry_overhead(j);
+ u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
+
+ if (u64s <= (ssize_t) j->early_journal_entries.nr)
+ return JOURNAL_ERR_journal_full;
+
+ if (fifo_empty(&j->pin) && j->reclaim_thread)
+ wake_up_process(j->reclaim_thread);
+
+ /*
+ * The fifo_push() needs to happen at the same time as j->seq is
+ * incremented for journal_last_seq() to be calculated correctly
+ */
+ atomic64_inc(&j->seq);
+ journal_pin_list_init(fifo_push_ref(&j->pin), 1);
+
+ BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq));
+
+ BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
+
+ bkey_extent_init(&buf->key);
+ buf->noflush = false;
+ buf->must_flush = false;
+ buf->separate_flush = false;
+ buf->flush_time = 0;
+ buf->need_flush_to_write_buffer = true;
+
+ memset(buf->data, 0, sizeof(*buf->data));
+ buf->data->seq = cpu_to_le64(journal_cur_seq(j));
+ buf->data->u64s = 0;
+
+ if (j->early_journal_entries.nr) {
+ memcpy(buf->data->_data, j->early_journal_entries.data,
+ j->early_journal_entries.nr * sizeof(u64));
+ le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr);
+ }
+
+ /*
+ * Must be set before marking the journal entry as open:
+ */
+ j->cur_entry_u64s = u64s;
+
+ v = atomic64_read(&j->reservations.counter);
+ do {
+ old.v = new.v = v;
+
+ BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL);
+
+ new.idx++;
+ BUG_ON(journal_state_count(new, new.idx));
+ BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
+
+ journal_state_inc(&new);
+
+ /* Handle any already added entries */
+ new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+ old.v, new.v)) != old.v);
+
+ mod_delayed_work(c->io_complete_wq,
+ &j->write_work,
+ msecs_to_jiffies(c->opts.journal_flush_delay));
+ journal_wake(j);
+
+ if (j->early_journal_entries.nr)
+ darray_exit(&j->early_journal_entries);
+ return 0;
+}
+
+static bool journal_quiesced(struct journal *j)
+{
+ bool ret = atomic64_read(&j->seq) == j->seq_ondisk;
+
+ if (!ret)
+ bch2_journal_entry_close(j);
+ return ret;
+}
+
+static void journal_quiesce(struct journal *j)
+{
+ wait_event(j->wait, journal_quiesced(j));
+}
+
+static void journal_write_work(struct work_struct *work)
+{
+ struct journal *j = container_of(work, struct journal, write_work.work);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ long delta;
+
+ spin_lock(&j->lock);
+ if (!__journal_entry_is_open(j->reservations))
+ goto unlock;
+
+ delta = journal_cur_buf(j)->expires - jiffies;
+
+ if (delta > 0)
+ mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
+ else
+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+unlock:
+ spin_unlock(&j->lock);
+}
+
+static int __journal_res_get(struct journal *j, struct journal_res *res,
+ unsigned flags)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_buf *buf;
+ bool can_discard;
+ int ret;
+retry:
+ if (journal_res_get_fast(j, res, flags))
+ return 0;
+
+ if (bch2_journal_error(j))
+ return -BCH_ERR_erofs_journal_err;
+
+ spin_lock(&j->lock);
+
+ /* check once more in case somebody else shut things down... */
+ if (bch2_journal_error(j)) {
+ spin_unlock(&j->lock);
+ return -BCH_ERR_erofs_journal_err;
+ }
+
+ /*
+ * Recheck after taking the lock, so we don't race with another thread
+ * that just did journal_entry_open() and call bch2_journal_entry_close()
+ * unnecessarily
+ */
+ if (journal_res_get_fast(j, res, flags)) {
+ spin_unlock(&j->lock);
+ return 0;
+ }
+
+ if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
+ /*
+ * Don't want to close current journal entry, just need to
+ * invoke reclaim:
+ */
+ ret = JOURNAL_ERR_journal_full;
+ goto unlock;
+ }
+
+ /*
+ * If we couldn't get a reservation because the current buf filled up,
+ * and we had room for a bigger entry on disk, signal that we want to
+ * realloc the journal bufs:
+ */
+ buf = journal_cur_buf(j);
+ if (journal_entry_is_open(j) &&
+ buf->buf_size >> 9 < buf->disk_sectors &&
+ buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
+ j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
+
+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+ ret = journal_entry_open(j);
+
+ if (ret == JOURNAL_ERR_max_in_flight) {
+ track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
+ &j->max_in_flight_start, true);
+ trace_and_count(c, journal_entry_full, c);
+ }
+unlock:
+ can_discard = j->can_discard;
+ spin_unlock(&j->lock);
+
+ if (!ret)
+ goto retry;
+ if (journal_error_check_stuck(j, ret, flags))
+ ret = -BCH_ERR_journal_res_get_blocked;
+
+ /*
+ * Journal is full - can't rely on reclaim from work item due to
+ * freezing:
+ */
+ if ((ret == JOURNAL_ERR_journal_full ||
+ ret == JOURNAL_ERR_journal_pin_full) &&
+ !(flags & JOURNAL_RES_GET_NONBLOCK)) {
+ if (can_discard) {
+ bch2_journal_do_discards(j);
+ goto retry;
+ }
+
+ if (mutex_trylock(&j->reclaim_lock)) {
+ bch2_journal_reclaim(j);
+ mutex_unlock(&j->reclaim_lock);
+ }
+ }
+
+ return ret == JOURNAL_ERR_insufficient_devices
+ ? -BCH_ERR_erofs_journal_err
+ : -BCH_ERR_journal_res_get_blocked;
+}
+
+/*
+ * Essentially the entry function to the journaling code. When bcachefs is doing
+ * a btree insert, it calls this function to get the current journal write.
+ * Journal write is the structure used set up journal writes. The calling
+ * function will then add its keys to the structure, queuing them for the next
+ * write.
+ *
+ * To ensure forward progress, the current task must not be holding any
+ * btree node write locks.
+ */
+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
+ unsigned flags)
+{
+ int ret;
+
+ closure_wait_event(&j->async_wait,
+ (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+ (flags & JOURNAL_RES_GET_NONBLOCK));
+ return ret;
+}
+
+/* journal_entry_res: */
+
+void bch2_journal_entry_res_resize(struct journal *j,
+ struct journal_entry_res *res,
+ unsigned new_u64s)
+{
+ union journal_res_state state;
+ int d = new_u64s - res->u64s;
+
+ spin_lock(&j->lock);
+
+ j->entry_u64s_reserved += d;
+ if (d <= 0)
+ goto out;
+
+ j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
+ smp_mb();
+ state = READ_ONCE(j->reservations);
+
+ if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
+ state.cur_entry_offset > j->cur_entry_u64s) {
+ j->cur_entry_u64s += d;
+ /*
+ * Not enough room in current journal entry, have to flush it:
+ */
+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+ } else {
+ journal_cur_buf(j)->u64s_reserved += d;
+ }
+out:
+ spin_unlock(&j->lock);
+ res->u64s += d;
+}
+
+/* journal flushing: */
+
+/**
+ * bch2_journal_flush_seq_async - wait for a journal entry to be written
+ * @j: journal object
+ * @seq: seq to flush
+ * @parent: closure object to wait with
+ * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed,
+ * -EIO if @seq will never be flushed
+ *
+ * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if
+ * necessary
+ */
+int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
+ struct closure *parent)
+{
+ struct journal_buf *buf;
+ int ret = 0;
+
+ if (seq <= j->flushed_seq_ondisk)
+ return 1;
+
+ spin_lock(&j->lock);
+
+ if (WARN_ONCE(seq > journal_cur_seq(j),
+ "requested to flush journal seq %llu, but currently at %llu",
+ seq, journal_cur_seq(j)))
+ goto out;
+
+ /* Recheck under lock: */
+ if (j->err_seq && seq >= j->err_seq) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (seq <= j->flushed_seq_ondisk) {
+ ret = 1;
+ goto out;
+ }
+
+ /* if seq was written, but not flushed - flush a newer one instead */
+ seq = max(seq, journal_last_unwritten_seq(j));
+
+recheck_need_open:
+ if (seq > journal_cur_seq(j)) {
+ struct journal_res res = { 0 };
+
+ if (journal_entry_is_open(j))
+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+
+ spin_unlock(&j->lock);
+
+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ if (ret)
+ return ret;
+
+ seq = res.seq;
+ buf = j->buf + (seq & JOURNAL_BUF_MASK);
+ buf->must_flush = true;
+
+ if (!buf->flush_time) {
+ buf->flush_time = local_clock() ?: 1;
+ buf->expires = jiffies;
+ }
+
+ if (parent && !closure_wait(&buf->wait, parent))
+ BUG();
+
+ bch2_journal_res_put(j, &res);
+
+ spin_lock(&j->lock);
+ goto want_write;
+ }
+
+ /*
+ * if write was kicked off without a flush, flush the next sequence
+ * number instead
+ */
+ buf = journal_seq_to_buf(j, seq);
+ if (buf->noflush) {
+ seq++;
+ goto recheck_need_open;
+ }
+
+ buf->must_flush = true;
+
+ if (parent && !closure_wait(&buf->wait, parent))
+ BUG();
+want_write:
+ if (seq == journal_cur_seq(j))
+ journal_entry_want_write(j);
+out:
+ spin_unlock(&j->lock);
+ return ret;
+}
+
+int bch2_journal_flush_seq(struct journal *j, u64 seq)
+{
+ u64 start_time = local_clock();
+ int ret, ret2;
+
+ /*
+ * Don't update time_stats when @seq is already flushed:
+ */
+ if (seq <= j->flushed_seq_ondisk)
+ return 0;
+
+ ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
+
+ if (!ret)
+ bch2_time_stats_update(j->flush_seq_time, start_time);
+
+ return ret ?: ret2 < 0 ? ret2 : 0;
+}
+
+/*
+ * bch2_journal_flush_async - if there is an open journal entry, or a journal
+ * still being written, write it and wait for the write to complete
+ */
+void bch2_journal_flush_async(struct journal *j, struct closure *parent)
+{
+ bch2_journal_flush_seq_async(j, atomic64_read(&j->seq), parent);
+}
+
+int bch2_journal_flush(struct journal *j)
+{
+ return bch2_journal_flush_seq(j, atomic64_read(&j->seq));
+}
+
+/*
+ * bch2_journal_noflush_seq - tell the journal not to issue any flushes before
+ * @seq
+ */
+bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ u64 unwritten_seq;
+ bool ret = false;
+
+ if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush)))
+ return false;
+
+ if (seq <= c->journal.flushed_seq_ondisk)
+ return false;
+
+ spin_lock(&j->lock);
+ if (seq <= c->journal.flushed_seq_ondisk)
+ goto out;
+
+ for (unwritten_seq = journal_last_unwritten_seq(j);
+ unwritten_seq < seq;
+ unwritten_seq++) {
+ struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq);
+
+ /* journal write is already in flight, and was a flush write: */
+ if (unwritten_seq == journal_last_unwritten_seq(j) && !buf->noflush)
+ goto out;
+
+ buf->noflush = true;
+ }
+
+ ret = true;
+out:
+ spin_unlock(&j->lock);
+ return ret;
+}
+
+int bch2_journal_meta(struct journal *j)
+{
+ struct journal_buf *buf;
+ struct journal_res res;
+ int ret;
+
+ memset(&res, 0, sizeof(res));
+
+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ if (ret)
+ return ret;
+
+ buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
+ buf->must_flush = true;
+
+ if (!buf->flush_time) {
+ buf->flush_time = local_clock() ?: 1;
+ buf->expires = jiffies;
+ }
+
+ bch2_journal_res_put(j, &res);
+
+ return bch2_journal_flush_seq(j, res.seq);
+}
+
+/* block/unlock the journal: */
+
+void bch2_journal_unblock(struct journal *j)
+{
+ spin_lock(&j->lock);
+ j->blocked--;
+ spin_unlock(&j->lock);
+
+ journal_wake(j);
+}
+
+void bch2_journal_block(struct journal *j)
+{
+ spin_lock(&j->lock);
+ j->blocked++;
+ spin_unlock(&j->lock);
+
+ journal_quiesce(j);
+}
+
+static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
+{
+ struct journal_buf *ret = NULL;
+
+ mutex_lock(&j->buf_lock);
+ spin_lock(&j->lock);
+ max_seq = min(max_seq, journal_cur_seq(j));
+
+ for (u64 seq = journal_last_unwritten_seq(j);
+ seq <= max_seq;
+ seq++) {
+ unsigned idx = seq & JOURNAL_BUF_MASK;
+ struct journal_buf *buf = j->buf + idx;
+
+ if (buf->need_flush_to_write_buffer) {
+ if (seq == journal_cur_seq(j))
+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+
+ union journal_res_state s;
+ s.v = atomic64_read_acquire(&j->reservations.counter);
+
+ ret = journal_state_count(s, idx)
+ ? ERR_PTR(-EAGAIN)
+ : buf;
+ break;
+ }
+ }
+
+ spin_unlock(&j->lock);
+ if (IS_ERR_OR_NULL(ret))
+ mutex_unlock(&j->buf_lock);
+ return ret;
+}
+
+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq)
+{
+ struct journal_buf *ret;
+
+ wait_event(j->wait, (ret = __bch2_next_write_buffer_flush_journal_buf(j, max_seq)) != ERR_PTR(-EAGAIN));
+ return ret;
+}
+
+/* allocate journal on a device: */
+
+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
+ bool new_fs, struct closure *cl)
+{
+ struct bch_fs *c = ca->fs;
+ struct journal_device *ja = &ca->journal;
+ u64 *new_bucket_seq = NULL, *new_buckets = NULL;
+ struct open_bucket **ob = NULL;
+ long *bu = NULL;
+ unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr;
+ int ret = 0;
+
+ BUG_ON(nr <= ja->nr);
+
+ bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
+ ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
+ new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL);
+ new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
+ if (!bu || !ob || !new_buckets || !new_bucket_seq) {
+ ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets;
+ goto err_free;
+ }
+
+ for (nr_got = 0; nr_got < nr_want; nr_got++) {
+ if (new_fs) {
+ bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
+ if (bu[nr_got] < 0) {
+ ret = -BCH_ERR_ENOSPC_bucket_alloc;
+ break;
+ }
+ } else {
+ ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal, cl);
+ ret = PTR_ERR_OR_ZERO(ob[nr_got]);
+ if (ret)
+ break;
+
+ ret = bch2_trans_run(c,
+ bch2_trans_mark_metadata_bucket(trans, ca,
+ ob[nr_got]->bucket, BCH_DATA_journal,
+ ca->mi.bucket_size));
+ if (ret) {
+ bch2_open_bucket_put(c, ob[nr_got]);
+ bch_err_msg(c, ret, "marking new journal buckets");
+ break;
+ }
+
+ bu[nr_got] = ob[nr_got]->bucket;
+ }
+ }
+
+ if (!nr_got)
+ goto err_free;
+
+ /* Don't return an error if we successfully allocated some buckets: */
+ ret = 0;
+
+ if (c) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_block(&c->journal);
+ mutex_lock(&c->sb_lock);
+ }
+
+ memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64));
+ memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64));
+
+ BUG_ON(ja->discard_idx > ja->nr);
+
+ pos = ja->discard_idx ?: ja->nr;
+
+ memmove(new_buckets + pos + nr_got,
+ new_buckets + pos,
+ sizeof(new_buckets[0]) * (ja->nr - pos));
+ memmove(new_bucket_seq + pos + nr_got,
+ new_bucket_seq + pos,
+ sizeof(new_bucket_seq[0]) * (ja->nr - pos));
+
+ for (i = 0; i < nr_got; i++) {
+ new_buckets[pos + i] = bu[i];
+ new_bucket_seq[pos + i] = 0;
+ }
+
+ nr = ja->nr + nr_got;
+
+ ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr);
+ if (ret)
+ goto err_unblock;
+
+ if (!new_fs)
+ bch2_write_super(c);
+
+ /* Commit: */
+ if (c)
+ spin_lock(&c->journal.lock);
+
+ swap(new_buckets, ja->buckets);
+ swap(new_bucket_seq, ja->bucket_seq);
+ ja->nr = nr;
+
+ if (pos <= ja->discard_idx)
+ ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr;
+ if (pos <= ja->dirty_idx_ondisk)
+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr;
+ if (pos <= ja->dirty_idx)
+ ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr;
+ if (pos <= ja->cur_idx)
+ ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr;
+
+ if (c)
+ spin_unlock(&c->journal.lock);
+err_unblock:
+ if (c) {
+ bch2_journal_unblock(&c->journal);
+ mutex_unlock(&c->sb_lock);
+ }
+
+ if (ret && !new_fs)
+ for (i = 0; i < nr_got; i++)
+ bch2_trans_run(c,
+ bch2_trans_mark_metadata_bucket(trans, ca,
+ bu[i], BCH_DATA_free, 0));
+err_free:
+ if (!new_fs)
+ for (i = 0; i < nr_got; i++)
+ bch2_open_bucket_put(c, ob[i]);
+
+ kfree(new_bucket_seq);
+ kfree(new_buckets);
+ kfree(ob);
+ kfree(bu);
+ return ret;
+}
+
+/*
+ * Allocate more journal space at runtime - not currently making use if it, but
+ * the code works:
+ */
+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
+ unsigned nr)
+{
+ struct journal_device *ja = &ca->journal;
+ struct closure cl;
+ int ret = 0;
+
+ closure_init_stack(&cl);
+
+ down_write(&c->state_lock);
+
+ /* don't handle reducing nr of buckets yet: */
+ if (nr < ja->nr)
+ goto unlock;
+
+ while (ja->nr < nr) {
+ struct disk_reservation disk_res = { 0, 0, 0 };
+
+ /*
+ * note: journal buckets aren't really counted as _sectors_ used yet, so
+ * we don't need the disk reservation to avoid the BUG_ON() in buckets.c
+ * when space used goes up without a reservation - but we do need the
+ * reservation to ensure we'll actually be able to allocate:
+ *
+ * XXX: that's not right, disk reservations only ensure a
+ * filesystem-wide allocation will succeed, this is a device
+ * specific allocation - we can hang here:
+ */
+
+ ret = bch2_disk_reservation_get(c, &disk_res,
+ bucket_to_sector(ca, nr - ja->nr), 1, 0);
+ if (ret)
+ break;
+
+ ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
+
+ bch2_disk_reservation_put(c, &disk_res);
+
+ closure_sync(&cl);
+
+ if (ret && ret != -BCH_ERR_bucket_alloc_blocked)
+ break;
+ }
+
+ bch_err_fn(c, ret);
+unlock:
+ up_write(&c->state_lock);
+ return ret;
+}
+
+int bch2_dev_journal_alloc(struct bch_dev *ca)
+{
+ unsigned nr;
+ int ret;
+
+ if (dynamic_fault("bcachefs:add:journal_alloc")) {
+ ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets;
+ goto err;
+ }
+
+ /* 1/128th of the device by default: */
+ nr = ca->mi.nbuckets >> 7;
+
+ /*
+ * clamp journal size to 8192 buckets or 8GB (in sectors), whichever
+ * is smaller:
+ */
+ nr = clamp_t(unsigned, nr,
+ BCH_JOURNAL_BUCKETS_MIN,
+ min(1 << 13,
+ (1 << 24) / ca->mi.bucket_size));
+
+ ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
+err:
+ bch_err_fn(ca, ret);
+ return ret;
+}
+
+int bch2_fs_journal_alloc(struct bch_fs *c)
+{
+ for_each_online_member(c, ca) {
+ if (ca->journal.nr)
+ continue;
+
+ int ret = bch2_dev_journal_alloc(ca);
+ if (ret) {
+ percpu_ref_put(&ca->io_ref);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* startup/shutdown: */
+
+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
+{
+ bool ret = false;
+ u64 seq;
+
+ spin_lock(&j->lock);
+ for (seq = journal_last_unwritten_seq(j);
+ seq <= journal_cur_seq(j) && !ret;
+ seq++) {
+ struct journal_buf *buf = journal_seq_to_buf(j, seq);
+
+ if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx))
+ ret = true;
+ }
+ spin_unlock(&j->lock);
+
+ return ret;
+}
+
+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
+{
+ wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
+}
+
+void bch2_fs_journal_stop(struct journal *j)
+{
+ bch2_journal_reclaim_stop(j);
+ bch2_journal_flush_all_pins(j);
+
+ wait_event(j->wait, bch2_journal_entry_close(j));
+
+ /*
+ * Always write a new journal entry, to make sure the clock hands are up
+ * to date (and match the superblock)
+ */
+ bch2_journal_meta(j);
+
+ journal_quiesce(j);
+
+ BUG_ON(!bch2_journal_error(j) &&
+ test_bit(JOURNAL_REPLAY_DONE, &j->flags) &&
+ j->last_empty_seq != journal_cur_seq(j));
+
+ cancel_delayed_work_sync(&j->write_work);
+}
+
+int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_entry_pin_list *p;
+ struct journal_replay *i, **_i;
+ struct genradix_iter iter;
+ bool had_entries = false;
+ unsigned ptr;
+ u64 last_seq = cur_seq, nr, seq;
+
+ genradix_for_each_reverse(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ last_seq = le64_to_cpu(i->j.last_seq);
+ break;
+ }
+
+ nr = cur_seq - last_seq;
+
+ if (nr + 1 > j->pin.size) {
+ free_fifo(&j->pin);
+ init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
+ if (!j->pin.data) {
+ bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
+ return -BCH_ERR_ENOMEM_journal_pin_fifo;
+ }
+ }
+
+ j->replay_journal_seq = last_seq;
+ j->replay_journal_seq_end = cur_seq;
+ j->last_seq_ondisk = last_seq;
+ j->flushed_seq_ondisk = cur_seq - 1;
+ j->seq_ondisk = cur_seq - 1;
+ j->pin.front = last_seq;
+ j->pin.back = cur_seq;
+ atomic64_set(&j->seq, cur_seq - 1);
+
+ fifo_for_each_entry_ptr(p, &j->pin, seq)
+ journal_pin_list_init(p, 1);
+
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ seq = le64_to_cpu(i->j.seq);
+ BUG_ON(seq >= cur_seq);
+
+ if (seq < last_seq)
+ continue;
+
+ if (journal_entry_empty(&i->j))
+ j->last_empty_seq = le64_to_cpu(i->j.seq);
+
+ p = journal_seq_pin(j, seq);
+
+ p->devs.nr = 0;
+ for (ptr = 0; ptr < i->nr_ptrs; ptr++)
+ bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
+
+ had_entries = true;
+ }
+
+ if (!had_entries)
+ j->last_empty_seq = cur_seq;
+
+ spin_lock(&j->lock);
+
+ set_bit(JOURNAL_STARTED, &j->flags);
+ j->last_flush_write = jiffies;
+
+ j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
+ j->reservations.unwritten_idx++;
+
+ c->last_bucket_seq_cleanup = journal_cur_seq(j);
+
+ bch2_journal_space_available(j);
+ spin_unlock(&j->lock);
+
+ return bch2_journal_reclaim_start(j);
+}
+
+/* init/exit: */
+
+void bch2_dev_journal_exit(struct bch_dev *ca)
+{
+ kfree(ca->journal.bio);
+ kfree(ca->journal.buckets);
+ kfree(ca->journal.bucket_seq);
+
+ ca->journal.bio = NULL;
+ ca->journal.buckets = NULL;
+ ca->journal.bucket_seq = NULL;
+}
+
+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
+{
+ struct journal_device *ja = &ca->journal;
+ struct bch_sb_field_journal *journal_buckets =
+ bch2_sb_field_get(sb, journal);
+ struct bch_sb_field_journal_v2 *journal_buckets_v2 =
+ bch2_sb_field_get(sb, journal_v2);
+ unsigned i, nr_bvecs;
+
+ ja->nr = 0;
+
+ if (journal_buckets_v2) {
+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
+
+ for (i = 0; i < nr; i++)
+ ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr);
+ } else if (journal_buckets) {
+ ja->nr = bch2_nr_journal_buckets(journal_buckets);
+ }
+
+ ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
+ if (!ja->bucket_seq)
+ return -BCH_ERR_ENOMEM_dev_journal_init;
+
+ nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
+
+ ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
+ if (!ca->journal.bio)
+ return -BCH_ERR_ENOMEM_dev_journal_init;
+
+ bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0);
+
+ ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
+ if (!ja->buckets)
+ return -BCH_ERR_ENOMEM_dev_journal_init;
+
+ if (journal_buckets_v2) {
+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
+ unsigned j, dst = 0;
+
+ for (i = 0; i < nr; i++)
+ for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
+ ja->buckets[dst++] =
+ le64_to_cpu(journal_buckets_v2->d[i].start) + j;
+ } else if (journal_buckets) {
+ for (i = 0; i < ja->nr; i++)
+ ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
+ }
+
+ return 0;
+}
+
+void bch2_fs_journal_exit(struct journal *j)
+{
+ unsigned i;
+
+ darray_exit(&j->early_journal_entries);
+
+ for (i = 0; i < ARRAY_SIZE(j->buf); i++)
+ kvpfree(j->buf[i].data, j->buf[i].buf_size);
+ free_fifo(&j->pin);
+}
+
+int bch2_fs_journal_init(struct journal *j)
+{
+ static struct lock_class_key res_key;
+ unsigned i;
+
+ mutex_init(&j->buf_lock);
+ spin_lock_init(&j->lock);
+ spin_lock_init(&j->err_lock);
+ init_waitqueue_head(&j->wait);
+ INIT_DELAYED_WORK(&j->write_work, journal_write_work);
+ init_waitqueue_head(&j->reclaim_wait);
+ init_waitqueue_head(&j->pin_flush_wait);
+ mutex_init(&j->reclaim_lock);
+ mutex_init(&j->discard_lock);
+
+ lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
+
+ atomic64_set(&j->reservations.counter,
+ ((union journal_res_state)
+ { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
+
+ if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
+ return -BCH_ERR_ENOMEM_journal_pin_fifo;
+
+ for (i = 0; i < ARRAY_SIZE(j->buf); i++) {
+ j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
+ j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL);
+ if (!j->buf[i].data)
+ return -BCH_ERR_ENOMEM_journal_buf;
+ }
+
+ j->pin.front = j->pin.back = 1;
+ return 0;
+}
+
+/* debug: */
+
+void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ union journal_res_state s;
+ unsigned long now = jiffies;
+ u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
+
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 24);
+ out->atomic++;
+
+ rcu_read_lock();
+ s = READ_ONCE(j->reservations);
+
+ prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size);
+ prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
+ prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk);
+ prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
+ prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
+ prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
+ prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
+ prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
+ prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
+ prt_printf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes);
+ prt_printf(out, "average write size:\t");
+ prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0);
+ prt_newline(out);
+ prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
+ prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
+ prt_printf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked);
+ prt_printf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now)
+ ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
+ prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors);
+ prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]);
+ prt_printf(out, "current entry:\t\t");
+
+ switch (s.cur_entry_offset) {
+ case JOURNAL_ENTRY_ERROR_VAL:
+ prt_printf(out, "error");
+ break;
+ case JOURNAL_ENTRY_CLOSED_VAL:
+ prt_printf(out, "closed");
+ break;
+ default:
+ prt_printf(out, "%u/%u", s.cur_entry_offset, j->cur_entry_u64s);
+ break;
+ }
+
+ prt_newline(out);
+
+ for (u64 seq = journal_cur_seq(j);
+ seq >= journal_last_unwritten_seq(j);
+ --seq) {
+ unsigned i = seq & JOURNAL_BUF_MASK;
+
+ prt_printf(out, "unwritten entry:");
+ prt_tab(out);
+ prt_printf(out, "%llu", seq);
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ prt_printf(out, "refcount:");
+ prt_tab(out);
+ prt_printf(out, "%u", journal_state_count(s, i));
+ prt_newline(out);
+
+ prt_printf(out, "sectors:");
+ prt_tab(out);
+ prt_printf(out, "%u", j->buf[i].sectors);
+ prt_newline(out);
+
+ prt_printf(out, "expires");
+ prt_tab(out);
+ prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies);
+ prt_newline(out);
+
+ printbuf_indent_sub(out, 2);
+ }
+
+ prt_printf(out,
+ "replay done:\t\t%i\n",
+ test_bit(JOURNAL_REPLAY_DONE, &j->flags));
+
+ prt_printf(out, "space:\n");
+ prt_printf(out, "\tdiscarded\t%u:%u\n",
+ j->space[journal_space_discarded].next_entry,
+ j->space[journal_space_discarded].total);
+ prt_printf(out, "\tclean ondisk\t%u:%u\n",
+ j->space[journal_space_clean_ondisk].next_entry,
+ j->space[journal_space_clean_ondisk].total);
+ prt_printf(out, "\tclean\t\t%u:%u\n",
+ j->space[journal_space_clean].next_entry,
+ j->space[journal_space_clean].total);
+ prt_printf(out, "\ttotal\t\t%u:%u\n",
+ j->space[journal_space_total].next_entry,
+ j->space[journal_space_total].total);
+
+ for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
+ struct journal_device *ja = &ca->journal;
+
+ if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d))
+ continue;
+
+ if (!ja->nr)
+ continue;
+
+ prt_printf(out, "dev %u:\n", ca->dev_idx);
+ prt_printf(out, "\tnr\t\t%u\n", ja->nr);
+ prt_printf(out, "\tbucket size\t%u\n", ca->mi.bucket_size);
+ prt_printf(out, "\tavailable\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free);
+ prt_printf(out, "\tdiscard_idx\t%u\n", ja->discard_idx);
+ prt_printf(out, "\tdirty_ondisk\t%u (seq %llu)\n", ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]);
+ prt_printf(out, "\tdirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]);
+ prt_printf(out, "\tcur_idx\t\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
+ }
+
+ rcu_read_unlock();
+
+ --out->atomic;
+}
+
+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+{
+ spin_lock(&j->lock);
+ __bch2_journal_debug_to_text(out, j);
+ spin_unlock(&j->lock);
+}
+
+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
+{
+ struct journal_entry_pin_list *pin_list;
+ struct journal_entry_pin *pin;
+ unsigned i;
+
+ spin_lock(&j->lock);
+ *seq = max(*seq, j->pin.front);
+
+ if (*seq >= j->pin.back) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
+ out->atomic++;
+
+ pin_list = journal_seq_pin(j, *seq);
+
+ prt_printf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count));
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
+ list_for_each_entry(pin, &pin_list->list[i], list) {
+ prt_printf(out, "\t%px %ps", pin, pin->flush);
+ prt_newline(out);
+ }
+
+ if (!list_empty(&pin_list->flushed)) {
+ prt_printf(out, "flushed:");
+ prt_newline(out);
+ }
+
+ list_for_each_entry(pin, &pin_list->flushed, list) {
+ prt_printf(out, "\t%px %ps", pin, pin->flush);
+ prt_newline(out);
+ }
+
+ printbuf_indent_sub(out, 2);
+
+ --out->atomic;
+ spin_unlock(&j->lock);
+
+ return false;
+}
+
+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+{
+ u64 seq = 0;
+
+ while (!bch2_journal_seq_pins_to_text(out, j, &seq))
+ seq++;
+}
diff --git a/c_src/libbcachefs/journal.h b/c_src/libbcachefs/journal.h
new file mode 100644
index 00000000..4544ce24
--- /dev/null
+++ b/c_src/libbcachefs/journal.h
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_H
+#define _BCACHEFS_JOURNAL_H
+
+/*
+ * THE JOURNAL:
+ *
+ * The primary purpose of the journal is to log updates (insertions) to the
+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk.
+ *
+ * Without the journal, the b-tree is always internally consistent on
+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal
+ * but did handle unclean shutdowns by doing all index updates synchronously
+ * (with coalescing).
+ *
+ * Updates to interior nodes still happen synchronously and without the journal
+ * (for simplicity) - this may change eventually but updates to interior nodes
+ * are rare enough it's not a huge priority.
+ *
+ * This means the journal is relatively separate from the b-tree; it consists of
+ * just a list of keys and journal replay consists of just redoing those
+ * insertions in same order that they appear in the journal.
+ *
+ * PERSISTENCE:
+ *
+ * For synchronous updates (where we're waiting on the index update to hit
+ * disk), the journal entry will be written out immediately (or as soon as
+ * possible, if the write for the previous journal entry was still in flight).
+ *
+ * Synchronous updates are specified by passing a closure (@flush_cl) to
+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter
+ * down to the journalling code. That closure will wait on the journal write to
+ * complete (via closure_wait()).
+ *
+ * If the index update wasn't synchronous, the journal entry will be
+ * written out after 10 ms have elapsed, by default (the delay_ms field
+ * in struct journal).
+ *
+ * JOURNAL ENTRIES:
+ *
+ * A journal entry is variable size (struct jset), it's got a fixed length
+ * header and then a variable number of struct jset_entry entries.
+ *
+ * Journal entries are identified by monotonically increasing 64 bit sequence
+ * numbers - jset->seq; other places in the code refer to this sequence number.
+ *
+ * A jset_entry entry contains one or more bkeys (which is what gets inserted
+ * into the b-tree). We need a container to indicate which b-tree the key is
+ * for; also, the roots of the various b-trees are stored in jset_entry entries
+ * (one for each b-tree) - this lets us add new b-tree types without changing
+ * the on disk format.
+ *
+ * We also keep some things in the journal header that are logically part of the
+ * superblock - all the things that are frequently updated. This is for future
+ * bcache on raw flash support; the superblock (which will become another
+ * journal) can't be moved or wear leveled, so it contains just enough
+ * information to find the main journal, and the superblock only has to be
+ * rewritten when we want to move/wear level the main journal.
+ *
+ * JOURNAL LAYOUT ON DISK:
+ *
+ * The journal is written to a ringbuffer of buckets (which is kept in the
+ * superblock); the individual buckets are not necessarily contiguous on disk
+ * which means that journal entries are not allowed to span buckets, but also
+ * that we can resize the journal at runtime if desired (unimplemented).
+ *
+ * The journal buckets exist in the same pool as all the other buckets that are
+ * managed by the allocator and garbage collection - garbage collection marks
+ * the journal buckets as metadata buckets.
+ *
+ * OPEN/DIRTY JOURNAL ENTRIES:
+ *
+ * Open/dirty journal entries are journal entries that contain b-tree updates
+ * that have not yet been written out to the b-tree on disk. We have to track
+ * which journal entries are dirty, and we also have to avoid wrapping around
+ * the journal and overwriting old but still dirty journal entries with new
+ * journal entries.
+ *
+ * On disk, this is represented with the "last_seq" field of struct jset;
+ * last_seq is the first sequence number that journal replay has to replay.
+ *
+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in
+ * journal_device->seq) of for each journal bucket, the highest sequence number
+ * any journal entry it contains. Then, by comparing that against last_seq we
+ * can determine whether that journal bucket contains dirty journal entries or
+ * not.
+ *
+ * To track which journal entries are dirty, we maintain a fifo of refcounts
+ * (where each entry corresponds to a specific sequence number) - when a ref
+ * goes to 0, that journal entry is no longer dirty.
+ *
+ * Journalling of index updates is done at the same time as the b-tree itself is
+ * being modified (see btree_insert_key()); when we add the key to the journal
+ * the pending b-tree write takes a ref on the journal entry the key was added
+ * to. If a pending b-tree write would need to take refs on multiple dirty
+ * journal entries, it only keeps the ref on the oldest one (since a newer
+ * journal entry will still be replayed if an older entry was dirty).
+ *
+ * JOURNAL FILLING UP:
+ *
+ * There are two ways the journal could fill up; either we could run out of
+ * space to write to, or we could have too many open journal entries and run out
+ * of room in the fifo of refcounts. Since those refcounts are decremented
+ * without any locking we can't safely resize that fifo, so we handle it the
+ * same way.
+ *
+ * If the journal fills up, we start flushing dirty btree nodes until we can
+ * allocate space for a journal write again - preferentially flushing btree
+ * nodes that are pinning the oldest journal entries first.
+ */
+
+#include <linux/hash.h>
+
+#include "journal_types.h"
+
+struct bch_fs;
+
+static inline void journal_wake(struct journal *j)
+{
+ wake_up(&j->wait);
+ closure_wake_up(&j->async_wait);
+}
+
+static inline struct journal_buf *journal_cur_buf(struct journal *j)
+{
+ return j->buf + j->reservations.idx;
+}
+
+/* Sequence number of oldest dirty journal entry */
+
+static inline u64 journal_last_seq(struct journal *j)
+{
+ return j->pin.front;
+}
+
+static inline u64 journal_cur_seq(struct journal *j)
+{
+ return atomic64_read(&j->seq);
+}
+
+static inline u64 journal_last_unwritten_seq(struct journal *j)
+{
+ return j->seq_ondisk + 1;
+}
+
+static inline int journal_state_count(union journal_res_state s, int idx)
+{
+ switch (idx) {
+ case 0: return s.buf0_count;
+ case 1: return s.buf1_count;
+ case 2: return s.buf2_count;
+ case 3: return s.buf3_count;
+ }
+ BUG();
+}
+
+static inline void journal_state_inc(union journal_res_state *s)
+{
+ s->buf0_count += s->idx == 0;
+ s->buf1_count += s->idx == 1;
+ s->buf2_count += s->idx == 2;
+ s->buf3_count += s->idx == 3;
+}
+
+/*
+ * Amount of space that will be taken up by some keys in the journal (i.e.
+ * including the jset header)
+ */
+static inline unsigned jset_u64s(unsigned u64s)
+{
+ return u64s + sizeof(struct jset_entry) / sizeof(u64);
+}
+
+static inline int journal_entry_overhead(struct journal *j)
+{
+ return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
+}
+
+static inline struct jset_entry *
+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
+{
+ struct jset *jset = buf->data;
+ struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s));
+
+ memset(entry, 0, sizeof(*entry));
+ entry->u64s = cpu_to_le16(u64s);
+
+ le32_add_cpu(&jset->u64s, jset_u64s(u64s));
+
+ return entry;
+}
+
+static inline struct jset_entry *
+journal_res_entry(struct journal *j, struct journal_res *res)
+{
+ return vstruct_idx(j->buf[res->idx].data, res->offset);
+}
+
+static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type,
+ enum btree_id id, unsigned level,
+ unsigned u64s)
+{
+ entry->u64s = cpu_to_le16(u64s);
+ entry->btree_id = id;
+ entry->level = level;
+ entry->type = type;
+ entry->pad[0] = 0;
+ entry->pad[1] = 0;
+ entry->pad[2] = 0;
+ return jset_u64s(u64s);
+}
+
+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type,
+ enum btree_id id, unsigned level,
+ const void *data, unsigned u64s)
+{
+ unsigned ret = journal_entry_init(entry, type, id, level, u64s);
+
+ memcpy_u64s_small(entry->_data, data, u64s);
+ return ret;
+}
+
+static inline struct jset_entry *
+bch2_journal_add_entry(struct journal *j, struct journal_res *res,
+ unsigned type, enum btree_id id,
+ unsigned level, unsigned u64s)
+{
+ struct jset_entry *entry = journal_res_entry(j, res);
+ unsigned actual = journal_entry_init(entry, type, id, level, u64s);
+
+ EBUG_ON(!res->ref);
+ EBUG_ON(actual > res->u64s);
+
+ res->offset += actual;
+ res->u64s -= actual;
+ return entry;
+}
+
+static inline bool journal_entry_empty(struct jset *j)
+{
+ if (j->seq != j->last_seq)
+ return false;
+
+ vstruct_for_each(j, i)
+ if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s)
+ return false;
+ return true;
+}
+
+/*
+ * Drop reference on a buffer index and return true if the count has hit zero.
+ */
+static inline union journal_res_state journal_state_buf_put(struct journal *j, unsigned idx)
+{
+ union journal_res_state s;
+
+ s.v = atomic64_sub_return(((union journal_res_state) {
+ .buf0_count = idx == 0,
+ .buf1_count = idx == 1,
+ .buf2_count = idx == 2,
+ .buf3_count = idx == 3,
+ }).v, &j->reservations.counter);
+ return s;
+}
+
+bool bch2_journal_entry_close(struct journal *);
+void bch2_journal_buf_put_final(struct journal *, u64, bool);
+
+static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
+{
+ union journal_res_state s;
+
+ s = journal_state_buf_put(j, idx);
+ if (!journal_state_count(s, idx))
+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
+}
+
+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
+{
+ union journal_res_state s;
+
+ s = journal_state_buf_put(j, idx);
+ if (!journal_state_count(s, idx)) {
+ spin_lock(&j->lock);
+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
+ spin_unlock(&j->lock);
+ }
+}
+
+/*
+ * This function releases the journal write structure so other threads can
+ * then proceed to add their keys as well.
+ */
+static inline void bch2_journal_res_put(struct journal *j,
+ struct journal_res *res)
+{
+ if (!res->ref)
+ return;
+
+ lock_release(&j->res_map, _THIS_IP_);
+
+ while (res->u64s)
+ bch2_journal_add_entry(j, res,
+ BCH_JSET_ENTRY_btree_keys,
+ 0, 0, 0);
+
+ bch2_journal_buf_put(j, res->idx, res->seq);
+
+ res->ref = 0;
+}
+
+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
+ unsigned);
+
+/* First bits for BCH_WATERMARK: */
+enum journal_res_flags {
+ __JOURNAL_RES_GET_NONBLOCK = BCH_WATERMARK_BITS,
+ __JOURNAL_RES_GET_CHECK,
+};
+
+#define JOURNAL_RES_GET_NONBLOCK (1 << __JOURNAL_RES_GET_NONBLOCK)
+#define JOURNAL_RES_GET_CHECK (1 << __JOURNAL_RES_GET_CHECK)
+
+static inline int journal_res_get_fast(struct journal *j,
+ struct journal_res *res,
+ unsigned flags)
+{
+ union journal_res_state old, new;
+ u64 v = atomic64_read(&j->reservations.counter);
+
+ do {
+ old.v = new.v = v;
+
+ /*
+ * Check if there is still room in the current journal
+ * entry:
+ */
+ if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
+ return 0;
+
+ EBUG_ON(!journal_state_count(new, new.idx));
+
+ if ((flags & BCH_WATERMARK_MASK) < j->watermark)
+ return 0;
+
+ new.cur_entry_offset += res->u64s;
+ journal_state_inc(&new);
+
+ /*
+ * If the refcount would overflow, we have to wait:
+ * XXX - tracepoint this:
+ */
+ if (!journal_state_count(new, new.idx))
+ return 0;
+
+ if (flags & JOURNAL_RES_GET_CHECK)
+ return 1;
+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+ old.v, new.v)) != old.v);
+
+ res->ref = true;
+ res->idx = old.idx;
+ res->offset = old.cur_entry_offset;
+ res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
+ return 1;
+}
+
+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
+ unsigned u64s, unsigned flags)
+{
+ int ret;
+
+ EBUG_ON(res->ref);
+ EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
+
+ res->u64s = u64s;
+
+ if (journal_res_get_fast(j, res, flags))
+ goto out;
+
+ ret = bch2_journal_res_get_slowpath(j, res, flags);
+ if (ret)
+ return ret;
+out:
+ if (!(flags & JOURNAL_RES_GET_CHECK)) {
+ lock_acquire_shared(&j->res_map, 0,
+ (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
+ NULL, _THIS_IP_);
+ EBUG_ON(!res->ref);
+ }
+ return 0;
+}
+
+/* journal_entry_res: */
+
+void bch2_journal_entry_res_resize(struct journal *,
+ struct journal_entry_res *,
+ unsigned);
+
+int bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
+void bch2_journal_flush_async(struct journal *, struct closure *);
+
+int bch2_journal_flush_seq(struct journal *, u64);
+int bch2_journal_flush(struct journal *);
+bool bch2_journal_noflush_seq(struct journal *, u64);
+int bch2_journal_meta(struct journal *);
+
+void bch2_journal_halt(struct journal *);
+
+static inline int bch2_journal_error(struct journal *j)
+{
+ return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
+ ? -EIO : 0;
+}
+
+struct bch_dev;
+
+static inline void bch2_journal_set_replay_done(struct journal *j)
+{
+ BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
+ set_bit(JOURNAL_REPLAY_DONE, &j->flags);
+}
+
+void bch2_journal_unblock(struct journal *);
+void bch2_journal_block(struct journal *);
+struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j, u64 max_seq);
+
+void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
+void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
+void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
+
+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
+ unsigned nr);
+int bch2_dev_journal_alloc(struct bch_dev *);
+int bch2_fs_journal_alloc(struct bch_fs *);
+
+void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
+
+void bch2_fs_journal_stop(struct journal *);
+int bch2_fs_journal_start(struct journal *, u64);
+
+void bch2_dev_journal_exit(struct bch_dev *);
+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
+void bch2_fs_journal_exit(struct journal *);
+int bch2_fs_journal_init(struct journal *);
+
+#endif /* _BCACHEFS_JOURNAL_H */
diff --git a/c_src/libbcachefs/journal_io.c b/c_src/libbcachefs/journal_io.c
new file mode 100644
index 00000000..b0f4dd49
--- /dev/null
+++ b/c_src/libbcachefs/journal_io.c
@@ -0,0 +1,2009 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_io.h"
+#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "checksum.h"
+#include "disk_groups.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "replicas.h"
+#include "sb-clean.h"
+#include "trace.h"
+
+static struct nonce journal_nonce(const struct jset *jset)
+{
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = ((__le32 *) &jset->seq)[0],
+ [2] = ((__le32 *) &jset->seq)[1],
+ [3] = BCH_NONCE_JOURNAL,
+ }};
+}
+
+static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum)
+{
+ if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) {
+ *csum = (struct bch_csum) {};
+ return false;
+ }
+
+ *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
+ return !bch2_crc_cmp(j->csum, *csum);
+}
+
+static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
+{
+ return (seq - c->journal_entries_base_seq) & (~0U >> 1);
+}
+
+static void __journal_replay_free(struct bch_fs *c,
+ struct journal_replay *i)
+{
+ struct journal_replay **p =
+ genradix_ptr(&c->journal_entries,
+ journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)));
+
+ BUG_ON(*p != i);
+ *p = NULL;
+ kvpfree(i, offsetof(struct journal_replay, j) +
+ vstruct_bytes(&i->j));
+}
+
+static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
+{
+ i->ignore = true;
+
+ if (!c->opts.read_entire_journal)
+ __journal_replay_free(c, i);
+}
+
+struct journal_list {
+ struct closure cl;
+ u64 last_seq;
+ struct mutex lock;
+ int ret;
+};
+
+#define JOURNAL_ENTRY_ADD_OK 0
+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE 5
+
+/*
+ * Given a journal entry we just read, add it to the list of journal entries to
+ * be replayed:
+ */
+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
+ struct journal_ptr entry_ptr,
+ struct journal_list *jlist, struct jset *j)
+{
+ struct genradix_iter iter;
+ struct journal_replay **_i, *i, *dup;
+ struct journal_ptr *ptr;
+ size_t bytes = vstruct_bytes(j);
+ u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0;
+ int ret = JOURNAL_ENTRY_ADD_OK;
+
+ /* Is this entry older than the range we need? */
+ if (!c->opts.read_entire_journal &&
+ le64_to_cpu(j->seq) < jlist->last_seq)
+ return JOURNAL_ENTRY_ADD_OUT_OF_RANGE;
+
+ /*
+ * genradixes are indexed by a ulong, not a u64, so we can't index them
+ * by sequence number directly: Assume instead that they will all fall
+ * within the range of +-2billion of the filrst one we find.
+ */
+ if (!c->journal_entries_base_seq)
+ c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX);
+
+ /* Drop entries we don't need anymore */
+ if (last_seq > jlist->last_seq && !c->opts.read_entire_journal) {
+ genradix_for_each_from(&c->journal_entries, iter, _i,
+ journal_entry_radix_idx(c, jlist->last_seq)) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ if (le64_to_cpu(i->j.seq) >= last_seq)
+ break;
+ journal_replay_free(c, i);
+ }
+ }
+
+ jlist->last_seq = max(jlist->last_seq, last_seq);
+
+ _i = genradix_ptr_alloc(&c->journal_entries,
+ journal_entry_radix_idx(c, le64_to_cpu(j->seq)),
+ GFP_KERNEL);
+ if (!_i)
+ return -BCH_ERR_ENOMEM_journal_entry_add;
+
+ /*
+ * Duplicate journal entries? If so we want the one that didn't have a
+ * checksum error:
+ */
+ dup = *_i;
+ if (dup) {
+ if (bytes == vstruct_bytes(&dup->j) &&
+ !memcmp(j, &dup->j, bytes)) {
+ i = dup;
+ goto found;
+ }
+
+ if (!entry_ptr.csum_good) {
+ i = dup;
+ goto found;
+ }
+
+ if (!dup->csum_good)
+ goto replace;
+
+ fsck_err(c, journal_entry_replicas_data_mismatch,
+ "found duplicate but non identical journal entries (seq %llu)",
+ le64_to_cpu(j->seq));
+ i = dup;
+ goto found;
+ }
+replace:
+ i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
+ if (!i)
+ return -BCH_ERR_ENOMEM_journal_entry_add;
+
+ i->nr_ptrs = 0;
+ i->csum_good = entry_ptr.csum_good;
+ i->ignore = false;
+ unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct");
+ i->ptrs[i->nr_ptrs++] = entry_ptr;
+
+ if (dup) {
+ if (dup->nr_ptrs >= ARRAY_SIZE(dup->ptrs)) {
+ bch_err(c, "found too many copies of journal entry %llu",
+ le64_to_cpu(i->j.seq));
+ dup->nr_ptrs = ARRAY_SIZE(dup->ptrs) - 1;
+ }
+
+ /* The first ptr should represent the jset we kept: */
+ memcpy(i->ptrs + i->nr_ptrs,
+ dup->ptrs,
+ sizeof(dup->ptrs[0]) * dup->nr_ptrs);
+ i->nr_ptrs += dup->nr_ptrs;
+ __journal_replay_free(c, dup);
+ }
+
+ *_i = i;
+ return 0;
+found:
+ for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
+ if (ptr->dev == ca->dev_idx) {
+ bch_err(c, "duplicate journal entry %llu on same device",
+ le64_to_cpu(i->j.seq));
+ goto out;
+ }
+ }
+
+ if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) {
+ bch_err(c, "found too many copies of journal entry %llu",
+ le64_to_cpu(i->j.seq));
+ goto out;
+ }
+
+ i->ptrs[i->nr_ptrs++] = entry_ptr;
+out:
+fsck_err:
+ return ret;
+}
+
+/* this fills in a range with empty jset_entries: */
+static void journal_entry_null_range(void *start, void *end)
+{
+ struct jset_entry *entry;
+
+ for (entry = start; entry != end; entry = vstruct_next(entry))
+ memset(entry, 0, sizeof(*entry));
+}
+
+#define JOURNAL_ENTRY_REREAD 5
+#define JOURNAL_ENTRY_NONE 6
+#define JOURNAL_ENTRY_BAD 7
+
+static void journal_entry_err_msg(struct printbuf *out,
+ u32 version,
+ struct jset *jset,
+ struct jset_entry *entry)
+{
+ prt_str(out, "invalid journal entry, version=");
+ bch2_version_to_text(out, version);
+
+ if (entry) {
+ prt_str(out, " type=");
+ prt_str(out, bch2_jset_entry_types[entry->type]);
+ }
+
+ if (!jset) {
+ prt_printf(out, " in superblock");
+ } else {
+
+ prt_printf(out, " seq=%llu", le64_to_cpu(jset->seq));
+
+ if (entry)
+ prt_printf(out, " offset=%zi/%u",
+ (u64 *) entry - jset->_data,
+ le32_to_cpu(jset->u64s));
+ }
+
+ prt_str(out, ": ");
+}
+
+#define journal_entry_err(c, version, jset, entry, _err, msg, ...) \
+({ \
+ struct printbuf _buf = PRINTBUF; \
+ \
+ journal_entry_err_msg(&_buf, version, jset, entry); \
+ prt_printf(&_buf, msg, ##__VA_ARGS__); \
+ \
+ switch (flags & BKEY_INVALID_WRITE) { \
+ case READ: \
+ mustfix_fsck_err(c, _err, "%s", _buf.buf); \
+ break; \
+ case WRITE: \
+ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \
+ bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\
+ if (bch2_fs_inconsistent(c)) { \
+ ret = -BCH_ERR_fsck_errors_not_fixed; \
+ goto fsck_err; \
+ } \
+ break; \
+ } \
+ \
+ printbuf_exit(&_buf); \
+ true; \
+})
+
+#define journal_entry_err_on(cond, ...) \
+ ((cond) ? journal_entry_err(__VA_ARGS__) : false)
+
+#define FSCK_DELETED_KEY 5
+
+static int journal_validate_key(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned level, enum btree_id btree_id,
+ struct bkey_i *k,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ int write = flags & BKEY_INVALID_WRITE;
+ void *next = vstruct_next(entry);
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ if (journal_entry_err_on(!k->k.u64s,
+ c, version, jset, entry,
+ journal_entry_bkey_u64s_0,
+ "k->u64s 0")) {
+ entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
+ journal_entry_null_range(vstruct_next(entry), next);
+ return FSCK_DELETED_KEY;
+ }
+
+ if (journal_entry_err_on((void *) bkey_next(k) >
+ (void *) vstruct_next(entry),
+ c, version, jset, entry,
+ journal_entry_bkey_past_end,
+ "extends past end of journal entry")) {
+ entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
+ journal_entry_null_range(vstruct_next(entry), next);
+ return FSCK_DELETED_KEY;
+ }
+
+ if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT,
+ c, version, jset, entry,
+ journal_entry_bkey_bad_format,
+ "bad format %u", k->k.format)) {
+ le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
+ memmove(k, bkey_next(k), next - (void *) bkey_next(k));
+ journal_entry_null_range(vstruct_next(entry), next);
+ return FSCK_DELETED_KEY;
+ }
+
+ if (!write)
+ bch2_bkey_compat(level, btree_id, version, big_endian,
+ write, NULL, bkey_to_packed(k));
+
+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(k),
+ __btree_node_type(level, btree_id), write, &buf)) {
+ printbuf_reset(&buf);
+ journal_entry_err_msg(&buf, version, jset, entry);
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+ prt_newline(&buf);
+ bch2_bkey_invalid(c, bkey_i_to_s_c(k),
+ __btree_node_type(level, btree_id), write, &buf);
+
+ mustfix_fsck_err(c, journal_entry_bkey_invalid,
+ "%s", buf.buf);
+
+ le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
+ memmove(k, bkey_next(k), next - (void *) bkey_next(k));
+ journal_entry_null_range(vstruct_next(entry), next);
+
+ printbuf_exit(&buf);
+ return FSCK_DELETED_KEY;
+ }
+
+ if (write)
+ bch2_bkey_compat(level, btree_id, version, big_endian,
+ write, NULL, bkey_to_packed(k));
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int journal_entry_btree_keys_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct bkey_i *k = entry->start;
+
+ while (k != vstruct_last(entry)) {
+ int ret = journal_validate_key(c, jset, entry,
+ entry->level,
+ entry->btree_id,
+ k, version, big_endian,
+ flags|BKEY_INVALID_JOURNAL);
+ if (ret == FSCK_DELETED_KEY)
+ continue;
+
+ k = bkey_next(k);
+ }
+
+ return 0;
+}
+
+static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct bkey_i *k;
+ bool first = true;
+
+ jset_entry_for_each_key(entry, k) {
+ if (!first) {
+ prt_newline(out);
+ prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ }
+ prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
+ first = false;
+ }
+}
+
+static int journal_entry_btree_root_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct bkey_i *k = entry->start;
+ int ret = 0;
+
+ if (journal_entry_err_on(!entry->u64s ||
+ le16_to_cpu(entry->u64s) != k->k.u64s,
+ c, version, jset, entry,
+ journal_entry_btree_root_bad_size,
+ "invalid btree root journal entry: wrong number of keys")) {
+ void *next = vstruct_next(entry);
+ /*
+ * we don't want to null out this jset_entry,
+ * just the contents, so that later we can tell
+ * we were _supposed_ to have a btree root
+ */
+ entry->u64s = 0;
+ journal_entry_null_range(vstruct_next(entry), next);
+ return 0;
+ }
+
+ ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
+ version, big_endian, flags);
+ if (ret == FSCK_DELETED_KEY)
+ ret = 0;
+fsck_err:
+ return ret;
+}
+
+static void journal_entry_btree_root_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ journal_entry_btree_keys_to_text(out, c, entry);
+}
+
+static int journal_entry_prio_ptrs_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ /* obsolete, don't care: */
+ return 0;
+}
+
+static void journal_entry_prio_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+}
+
+static int journal_entry_blacklist_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ int ret = 0;
+
+ if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1,
+ c, version, jset, entry,
+ journal_entry_blacklist_bad_size,
+ "invalid journal seq blacklist entry: bad size")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ }
+fsck_err:
+ return ret;
+}
+
+static void journal_entry_blacklist_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_blacklist *bl =
+ container_of(entry, struct jset_entry_blacklist, entry);
+
+ prt_printf(out, "seq=%llu", le64_to_cpu(bl->seq));
+}
+
+static int journal_entry_blacklist_v2_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct jset_entry_blacklist_v2 *bl_entry;
+ int ret = 0;
+
+ if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2,
+ c, version, jset, entry,
+ journal_entry_blacklist_v2_bad_size,
+ "invalid journal seq blacklist entry: bad size")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ goto out;
+ }
+
+ bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+ if (journal_entry_err_on(le64_to_cpu(bl_entry->start) >
+ le64_to_cpu(bl_entry->end),
+ c, version, jset, entry,
+ journal_entry_blacklist_v2_start_past_end,
+ "invalid journal seq blacklist entry: start > end")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ }
+out:
+fsck_err:
+ return ret;
+}
+
+static void journal_entry_blacklist_v2_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_blacklist_v2 *bl =
+ container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+ prt_printf(out, "start=%llu end=%llu",
+ le64_to_cpu(bl->start),
+ le64_to_cpu(bl->end));
+}
+
+static int journal_entry_usage_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct jset_entry_usage *u =
+ container_of(entry, struct jset_entry_usage, entry);
+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+ int ret = 0;
+
+ if (journal_entry_err_on(bytes < sizeof(*u),
+ c, version, jset, entry,
+ journal_entry_usage_bad_size,
+ "invalid journal entry usage: bad size")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ return ret;
+ }
+
+fsck_err:
+ return ret;
+}
+
+static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_usage *u =
+ container_of(entry, struct jset_entry_usage, entry);
+
+ prt_printf(out, "type=%s v=%llu",
+ bch2_fs_usage_types[u->entry.btree_id],
+ le64_to_cpu(u->v));
+}
+
+static int journal_entry_data_usage_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct jset_entry_data_usage *u =
+ container_of(entry, struct jset_entry_data_usage, entry);
+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+ struct printbuf err = PRINTBUF;
+ int ret = 0;
+
+ if (journal_entry_err_on(bytes < sizeof(*u) ||
+ bytes < sizeof(*u) + u->r.nr_devs,
+ c, version, jset, entry,
+ journal_entry_data_usage_bad_size,
+ "invalid journal entry usage: bad size")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ goto out;
+ }
+
+ if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+ c, version, jset, entry,
+ journal_entry_data_usage_bad_size,
+ "invalid journal entry usage: %s", err.buf)) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ goto out;
+ }
+out:
+fsck_err:
+ printbuf_exit(&err);
+ return ret;
+}
+
+static void journal_entry_data_usage_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_data_usage *u =
+ container_of(entry, struct jset_entry_data_usage, entry);
+
+ bch2_replicas_entry_to_text(out, &u->r);
+ prt_printf(out, "=%llu", le64_to_cpu(u->v));
+}
+
+static int journal_entry_clock_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct jset_entry_clock *clock =
+ container_of(entry, struct jset_entry_clock, entry);
+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+ int ret = 0;
+
+ if (journal_entry_err_on(bytes != sizeof(*clock),
+ c, version, jset, entry,
+ journal_entry_clock_bad_size,
+ "bad size")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ return ret;
+ }
+
+ if (journal_entry_err_on(clock->rw > 1,
+ c, version, jset, entry,
+ journal_entry_clock_bad_rw,
+ "bad rw")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ return ret;
+ }
+
+fsck_err:
+ return ret;
+}
+
+static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_clock *clock =
+ container_of(entry, struct jset_entry_clock, entry);
+
+ prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time));
+}
+
+static int journal_entry_dev_usage_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ struct jset_entry_dev_usage *u =
+ container_of(entry, struct jset_entry_dev_usage, entry);
+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+ unsigned expected = sizeof(*u);
+ unsigned dev;
+ int ret = 0;
+
+ if (journal_entry_err_on(bytes < expected,
+ c, version, jset, entry,
+ journal_entry_dev_usage_bad_size,
+ "bad size (%u < %u)",
+ bytes, expected)) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ return ret;
+ }
+
+ dev = le32_to_cpu(u->dev);
+
+ if (journal_entry_err_on(!bch2_dev_exists2(c, dev),
+ c, version, jset, entry,
+ journal_entry_dev_usage_bad_dev,
+ "bad dev")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ return ret;
+ }
+
+ if (journal_entry_err_on(u->pad,
+ c, version, jset, entry,
+ journal_entry_dev_usage_bad_pad,
+ "bad pad")) {
+ journal_entry_null_range(entry, vstruct_next(entry));
+ return ret;
+ }
+
+fsck_err:
+ return ret;
+}
+
+static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_dev_usage *u =
+ container_of(entry, struct jset_entry_dev_usage, entry);
+ unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
+
+ prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
+
+ for (i = 0; i < nr_types; i++) {
+ if (i < BCH_DATA_NR)
+ prt_printf(out, " %s", bch2_data_types[i]);
+ else
+ prt_printf(out, " (unknown data type %u)", i);
+ prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu",
+ le64_to_cpu(u->d[i].buckets),
+ le64_to_cpu(u->d[i].sectors),
+ le64_to_cpu(u->d[i].fragmented));
+ }
+}
+
+static int journal_entry_log_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ return 0;
+}
+
+static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
+ unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d);
+
+ prt_printf(out, "%.*s", bytes, l->d);
+}
+
+static int journal_entry_overwrite_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ return journal_entry_btree_keys_validate(c, jset, entry,
+ version, big_endian, READ);
+}
+
+static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ journal_entry_btree_keys_to_text(out, c, entry);
+}
+
+static int journal_entry_write_buffer_keys_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ return journal_entry_btree_keys_validate(c, jset, entry,
+ version, big_endian, READ);
+}
+
+static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ journal_entry_btree_keys_to_text(out, c, entry);
+}
+
+struct jset_entry_ops {
+ int (*validate)(struct bch_fs *, struct jset *,
+ struct jset_entry *, unsigned, int,
+ enum bkey_invalid_flags);
+ void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *);
+};
+
+static const struct jset_entry_ops bch2_jset_entry_ops[] = {
+#define x(f, nr) \
+ [BCH_JSET_ENTRY_##f] = (struct jset_entry_ops) { \
+ .validate = journal_entry_##f##_validate, \
+ .to_text = journal_entry_##f##_to_text, \
+ },
+ BCH_JSET_ENTRY_TYPES()
+#undef x
+};
+
+int bch2_journal_entry_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ enum bkey_invalid_flags flags)
+{
+ return entry->type < BCH_JSET_ENTRY_NR
+ ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry,
+ version, big_endian, flags)
+ : 0;
+}
+
+void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ if (entry->type < BCH_JSET_ENTRY_NR) {
+ prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
+ } else {
+ prt_printf(out, "(unknown type %u)", entry->type);
+ }
+}
+
+static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
+ enum bkey_invalid_flags flags)
+{
+ unsigned version = le32_to_cpu(jset->version);
+ int ret = 0;
+
+ vstruct_for_each(jset, entry) {
+ if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset),
+ c, version, jset, entry,
+ journal_entry_past_jset_end,
+ "journal entry extends past end of jset")) {
+ jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
+ break;
+ }
+
+ ret = bch2_journal_entry_validate(c, jset, entry,
+ version, JSET_BIG_ENDIAN(jset), flags);
+ if (ret)
+ break;
+ }
+fsck_err:
+ return ret;
+}
+
+static int jset_validate(struct bch_fs *c,
+ struct bch_dev *ca,
+ struct jset *jset, u64 sector,
+ enum bkey_invalid_flags flags)
+{
+ unsigned version;
+ int ret = 0;
+
+ if (le64_to_cpu(jset->magic) != jset_magic(c))
+ return JOURNAL_ENTRY_NONE;
+
+ version = le32_to_cpu(jset->version);
+ if (journal_entry_err_on(!bch2_version_compatible(version),
+ c, version, jset, NULL,
+ jset_unsupported_version,
+ "%s sector %llu seq %llu: incompatible journal entry version %u.%u",
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq),
+ BCH_VERSION_MAJOR(version),
+ BCH_VERSION_MINOR(version))) {
+ /* don't try to continue: */
+ return -EINVAL;
+ }
+
+ if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)),
+ c, version, jset, NULL,
+ jset_unknown_csum,
+ "%s sector %llu seq %llu: journal entry with unknown csum type %llu",
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq),
+ JSET_CSUM_TYPE(jset)))
+ ret = JOURNAL_ENTRY_BAD;
+
+ /* last_seq is ignored when JSET_NO_FLUSH is true */
+ if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
+ le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq),
+ c, version, jset, NULL,
+ jset_last_seq_newer_than_seq,
+ "invalid journal entry: last_seq > seq (%llu > %llu)",
+ le64_to_cpu(jset->last_seq),
+ le64_to_cpu(jset->seq))) {
+ jset->last_seq = jset->seq;
+ return JOURNAL_ENTRY_BAD;
+ }
+
+ ret = jset_validate_entries(c, jset, flags);
+fsck_err:
+ return ret;
+}
+
+static int jset_validate_early(struct bch_fs *c,
+ struct bch_dev *ca,
+ struct jset *jset, u64 sector,
+ unsigned bucket_sectors_left,
+ unsigned sectors_read)
+{
+ size_t bytes = vstruct_bytes(jset);
+ unsigned version;
+ enum bkey_invalid_flags flags = BKEY_INVALID_JOURNAL;
+ int ret = 0;
+
+ if (le64_to_cpu(jset->magic) != jset_magic(c))
+ return JOURNAL_ENTRY_NONE;
+
+ version = le32_to_cpu(jset->version);
+ if (journal_entry_err_on(!bch2_version_compatible(version),
+ c, version, jset, NULL,
+ jset_unsupported_version,
+ "%s sector %llu seq %llu: unknown journal entry version %u.%u",
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq),
+ BCH_VERSION_MAJOR(version),
+ BCH_VERSION_MINOR(version))) {
+ /* don't try to continue: */
+ return -EINVAL;
+ }
+
+ if (bytes > (sectors_read << 9) &&
+ sectors_read < bucket_sectors_left)
+ return JOURNAL_ENTRY_REREAD;
+
+ if (journal_entry_err_on(bytes > bucket_sectors_left << 9,
+ c, version, jset, NULL,
+ jset_past_bucket_end,
+ "%s sector %llu seq %llu: journal entry too big (%zu bytes)",
+ ca ? ca->name : c->name,
+ sector, le64_to_cpu(jset->seq), bytes))
+ le32_add_cpu(&jset->u64s,
+ -((bytes - (bucket_sectors_left << 9)) / 8));
+fsck_err:
+ return ret;
+}
+
+struct journal_read_buf {
+ void *data;
+ size_t size;
+};
+
+static int journal_read_buf_realloc(struct journal_read_buf *b,
+ size_t new_size)
+{
+ void *n;
+
+ /* the bios are sized for this many pages, max: */
+ if (new_size > JOURNAL_ENTRY_SIZE_MAX)
+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
+
+ new_size = roundup_pow_of_two(new_size);
+ n = kvpmalloc(new_size, GFP_KERNEL);
+ if (!n)
+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
+
+ kvpfree(b->data, b->size);
+ b->data = n;
+ b->size = new_size;
+ return 0;
+}
+
+static int journal_read_bucket(struct bch_dev *ca,
+ struct journal_read_buf *buf,
+ struct journal_list *jlist,
+ unsigned bucket)
+{
+ struct bch_fs *c = ca->fs;
+ struct journal_device *ja = &ca->journal;
+ struct jset *j = NULL;
+ unsigned sectors, sectors_read = 0;
+ u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
+ end = offset + ca->mi.bucket_size;
+ bool saw_bad = false, csum_good;
+ struct printbuf err = PRINTBUF;
+ int ret = 0;
+
+ pr_debug("reading %u", bucket);
+
+ while (offset < end) {
+ if (!sectors_read) {
+ struct bio *bio;
+ unsigned nr_bvecs;
+reread:
+ sectors_read = min_t(unsigned,
+ end - offset, buf->size >> 9);
+ nr_bvecs = buf_pages(buf->data, sectors_read << 9);
+
+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
+ bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ);
+
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, buf->data, sectors_read << 9);
+
+ ret = submit_bio_wait(bio);
+ kfree(bio);
+
+ if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read,
+ "journal read error: sector %llu",
+ offset) ||
+ bch2_meta_read_fault("journal")) {
+ /*
+ * We don't error out of the recovery process
+ * here, since the relevant journal entry may be
+ * found on a different device, and missing or
+ * no journal entries will be handled later
+ */
+ goto out;
+ }
+
+ j = buf->data;
+ }
+
+ ret = jset_validate_early(c, ca, j, offset,
+ end - offset, sectors_read);
+ switch (ret) {
+ case 0:
+ sectors = vstruct_sectors(j, c->block_bits);
+ break;
+ case JOURNAL_ENTRY_REREAD:
+ if (vstruct_bytes(j) > buf->size) {
+ ret = journal_read_buf_realloc(buf,
+ vstruct_bytes(j));
+ if (ret)
+ goto err;
+ }
+ goto reread;
+ case JOURNAL_ENTRY_NONE:
+ if (!saw_bad)
+ goto out;
+ /*
+ * On checksum error we don't really trust the size
+ * field of the journal entry we read, so try reading
+ * again at next block boundary:
+ */
+ sectors = block_sectors(c);
+ goto next_block;
+ default:
+ goto err;
+ }
+
+ /*
+ * This happens sometimes if we don't have discards on -
+ * when we've partially overwritten a bucket with new
+ * journal entries. We don't need the rest of the
+ * bucket:
+ */
+ if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
+ goto out;
+
+ ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
+
+ enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
+ struct bch_csum csum;
+ csum_good = jset_csum_good(c, j, &csum);
+
+ if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
+ "%s",
+ (printbuf_reset(&err),
+ prt_str(&err, "journal "),
+ bch2_csum_err_msg(&err, csum_type, j->csum, csum),
+ err.buf)))
+ saw_bad = true;
+
+ ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
+ j->encrypted_start,
+ vstruct_end(j) - (void *) j->encrypted_start);
+ bch2_fs_fatal_err_on(ret, c,
+ "error decrypting journal entry: %s",
+ bch2_err_str(ret));
+
+ mutex_lock(&jlist->lock);
+ ret = journal_entry_add(c, ca, (struct journal_ptr) {
+ .csum_good = csum_good,
+ .dev = ca->dev_idx,
+ .bucket = bucket,
+ .bucket_offset = offset -
+ bucket_to_sector(ca, ja->buckets[bucket]),
+ .sector = offset,
+ }, jlist, j);
+ mutex_unlock(&jlist->lock);
+
+ switch (ret) {
+ case JOURNAL_ENTRY_ADD_OK:
+ break;
+ case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
+ break;
+ default:
+ goto err;
+ }
+next_block:
+ pr_debug("next");
+ offset += sectors;
+ sectors_read -= sectors;
+ j = ((void *) j) + (sectors << 9);
+ }
+
+out:
+ ret = 0;
+err:
+ printbuf_exit(&err);
+ return ret;
+}
+
+static CLOSURE_CALLBACK(bch2_journal_read_device)
+{
+ closure_type(ja, struct journal_device, read);
+ struct bch_dev *ca = container_of(ja, struct bch_dev, journal);
+ struct bch_fs *c = ca->fs;
+ struct journal_list *jlist =
+ container_of(cl->parent, struct journal_list, cl);
+ struct journal_replay *r, **_r;
+ struct genradix_iter iter;
+ struct journal_read_buf buf = { NULL, 0 };
+ unsigned i;
+ int ret = 0;
+
+ if (!ja->nr)
+ goto out;
+
+ ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
+ if (ret)
+ goto err;
+
+ pr_debug("%u journal buckets", ja->nr);
+
+ for (i = 0; i < ja->nr; i++) {
+ ret = journal_read_bucket(ca, &buf, jlist, i);
+ if (ret)
+ goto err;
+ }
+
+ ja->sectors_free = ca->mi.bucket_size;
+
+ mutex_lock(&jlist->lock);
+ genradix_for_each_reverse(&c->journal_entries, iter, _r) {
+ r = *_r;
+
+ if (!r)
+ continue;
+
+ for (i = 0; i < r->nr_ptrs; i++) {
+ if (r->ptrs[i].dev == ca->dev_idx) {
+ unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) +
+ vstruct_sectors(&r->j, c->block_bits);
+
+ ja->cur_idx = r->ptrs[i].bucket;
+ ja->sectors_free = ca->mi.bucket_size - wrote;
+ goto found;
+ }
+ }
+ }
+found:
+ mutex_unlock(&jlist->lock);
+
+ if (ja->bucket_seq[ja->cur_idx] &&
+ ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+ /*
+ * Debug code for ZNS support, where we (probably) want to be
+ * correlated where we stopped in the journal to the zone write
+ * points:
+ */
+ bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
+ bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
+ for (i = 0; i < 3; i++) {
+ unsigned idx = (ja->cur_idx + ja->nr - 1 + i) % ja->nr;
+
+ bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
+ }
+#endif
+ ja->sectors_free = 0;
+ }
+
+ /*
+ * Set dirty_idx to indicate the entire journal is full and needs to be
+ * reclaimed - journal reclaim will immediately reclaim whatever isn't
+ * pinned when it first runs:
+ */
+ ja->discard_idx = ja->dirty_idx_ondisk =
+ ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
+out:
+ bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
+ kvpfree(buf.data, buf.size);
+ percpu_ref_put(&ca->io_ref);
+ closure_return(cl);
+ return;
+err:
+ mutex_lock(&jlist->lock);
+ jlist->ret = ret;
+ mutex_unlock(&jlist->lock);
+ goto out;
+}
+
+void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+ struct journal_replay *j)
+{
+ unsigned i;
+
+ for (i = 0; i < j->nr_ptrs; i++) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
+ u64 offset;
+
+ div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset);
+
+ if (i)
+ prt_printf(out, " ");
+ prt_printf(out, "%u:%u:%u (sector %llu)",
+ j->ptrs[i].dev,
+ j->ptrs[i].bucket,
+ j->ptrs[i].bucket_offset,
+ j->ptrs[i].sector);
+ }
+}
+
+int bch2_journal_read(struct bch_fs *c,
+ u64 *last_seq,
+ u64 *blacklist_seq,
+ u64 *start_seq)
+{
+ struct journal_list jlist;
+ struct journal_replay *i, **_i, *prev = NULL;
+ struct genradix_iter radix_iter;
+ struct printbuf buf = PRINTBUF;
+ bool degraded = false, last_write_torn = false;
+ u64 seq;
+ int ret = 0;
+
+ closure_init_stack(&jlist.cl);
+ mutex_init(&jlist.lock);
+ jlist.last_seq = 0;
+ jlist.ret = 0;
+
+ for_each_member_device(c, ca) {
+ if (!c->opts.fsck &&
+ !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
+ continue;
+
+ if ((ca->mi.state == BCH_MEMBER_STATE_rw ||
+ ca->mi.state == BCH_MEMBER_STATE_ro) &&
+ percpu_ref_tryget(&ca->io_ref))
+ closure_call(&ca->journal.read,
+ bch2_journal_read_device,
+ system_unbound_wq,
+ &jlist.cl);
+ else
+ degraded = true;
+ }
+
+ closure_sync(&jlist.cl);
+
+ if (jlist.ret)
+ return jlist.ret;
+
+ *last_seq = 0;
+ *start_seq = 0;
+ *blacklist_seq = 0;
+
+ /*
+ * Find most recent flush entry, and ignore newer non flush entries -
+ * those entries will be blacklisted:
+ */
+ genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
+ enum bkey_invalid_flags flags = BKEY_INVALID_JOURNAL;
+
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ if (!*start_seq)
+ *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1;
+
+ if (JSET_NO_FLUSH(&i->j)) {
+ i->ignore = true;
+ continue;
+ }
+
+ if (!last_write_torn && !i->csum_good) {
+ last_write_torn = true;
+ i->ignore = true;
+ continue;
+ }
+
+ if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq),
+ c, le32_to_cpu(i->j.version), &i->j, NULL,
+ jset_last_seq_newer_than_seq,
+ "invalid journal entry: last_seq > seq (%llu > %llu)",
+ le64_to_cpu(i->j.last_seq),
+ le64_to_cpu(i->j.seq)))
+ i->j.last_seq = i->j.seq;
+
+ *last_seq = le64_to_cpu(i->j.last_seq);
+ *blacklist_seq = le64_to_cpu(i->j.seq) + 1;
+ break;
+ }
+
+ if (!*start_seq) {
+ bch_info(c, "journal read done, but no entries found");
+ return 0;
+ }
+
+ if (!*last_seq) {
+ fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes,
+ "journal read done, but no entries found after dropping non-flushes");
+ return 0;
+ }
+
+ bch_info(c, "journal read done, replaying entries %llu-%llu",
+ *last_seq, *blacklist_seq - 1);
+
+ if (*start_seq != *blacklist_seq)
+ bch_info(c, "dropped unflushed entries %llu-%llu",
+ *blacklist_seq, *start_seq - 1);
+
+ /* Drop blacklisted entries and entries older than last_seq: */
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ seq = le64_to_cpu(i->j.seq);
+ if (seq < *last_seq) {
+ journal_replay_free(c, i);
+ continue;
+ }
+
+ if (bch2_journal_seq_is_blacklisted(c, seq, true)) {
+ fsck_err_on(!JSET_NO_FLUSH(&i->j), c,
+ jset_seq_blacklisted,
+ "found blacklisted journal entry %llu", seq);
+ i->ignore = true;
+ }
+ }
+
+ /* Check for missing entries: */
+ seq = *last_seq;
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ BUG_ON(seq > le64_to_cpu(i->j.seq));
+
+ while (seq < le64_to_cpu(i->j.seq)) {
+ u64 missing_start, missing_end;
+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+
+ while (seq < le64_to_cpu(i->j.seq) &&
+ bch2_journal_seq_is_blacklisted(c, seq, false))
+ seq++;
+
+ if (seq == le64_to_cpu(i->j.seq))
+ break;
+
+ missing_start = seq;
+
+ while (seq < le64_to_cpu(i->j.seq) &&
+ !bch2_journal_seq_is_blacklisted(c, seq, false))
+ seq++;
+
+ if (prev) {
+ bch2_journal_ptrs_to_text(&buf1, c, prev);
+ prt_printf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits));
+ } else
+ prt_printf(&buf1, "(none)");
+ bch2_journal_ptrs_to_text(&buf2, c, i);
+
+ missing_end = seq - 1;
+ fsck_err(c, journal_entries_missing,
+ "journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
+ " prev at %s\n"
+ " next at %s",
+ missing_start, missing_end,
+ *last_seq, *blacklist_seq - 1,
+ buf1.buf, buf2.buf);
+
+ printbuf_exit(&buf1);
+ printbuf_exit(&buf2);
+ }
+
+ prev = i;
+ seq++;
+ }
+
+ genradix_for_each(&c->journal_entries, radix_iter, _i) {
+ struct bch_replicas_padded replicas = {
+ .e.data_type = BCH_DATA_journal,
+ .e.nr_required = 1,
+ };
+ unsigned ptr;
+
+ i = *_i;
+ if (!i || i->ignore)
+ continue;
+
+ for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
+
+ if (!i->ptrs[ptr].csum_good)
+ bch_err_dev_offset(ca, i->ptrs[ptr].sector,
+ "invalid journal checksum, seq %llu%s",
+ le64_to_cpu(i->j.seq),
+ i->csum_good ? " (had good copy on another device)" : "");
+ }
+
+ ret = jset_validate(c,
+ bch_dev_bkey_exists(c, i->ptrs[0].dev),
+ &i->j,
+ i->ptrs[0].sector,
+ READ);
+ if (ret)
+ goto err;
+
+ for (ptr = 0; ptr < i->nr_ptrs; ptr++)
+ replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev;
+
+ bch2_replicas_entry_sort(&replicas.e);
+
+ printbuf_reset(&buf);
+ bch2_replicas_entry_to_text(&buf, &replicas.e);
+
+ if (!degraded &&
+ !bch2_replicas_marked(c, &replicas.e) &&
+ (le64_to_cpu(i->j.seq) == *last_seq ||
+ fsck_err(c, journal_entry_replicas_not_marked,
+ "superblock not marked as containing replicas for journal entry %llu\n %s",
+ le64_to_cpu(i->j.seq), buf.buf))) {
+ ret = bch2_mark_replicas(c, &replicas.e);
+ if (ret)
+ goto err;
+ }
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+/* journal write: */
+
+static void __journal_write_alloc(struct journal *j,
+ struct journal_buf *w,
+ struct dev_alloc_list *devs_sorted,
+ unsigned sectors,
+ unsigned *replicas,
+ unsigned replicas_want)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_device *ja;
+ struct bch_dev *ca;
+ unsigned i;
+
+ if (*replicas >= replicas_want)
+ return;
+
+ for (i = 0; i < devs_sorted->nr; i++) {
+ ca = rcu_dereference(c->devs[devs_sorted->devs[i]]);
+ if (!ca)
+ continue;
+
+ ja = &ca->journal;
+
+ /*
+ * Check that we can use this device, and aren't already using
+ * it:
+ */
+ if (!ca->mi.durability ||
+ ca->mi.state != BCH_MEMBER_STATE_rw ||
+ !ja->nr ||
+ bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
+ sectors > ja->sectors_free)
+ continue;
+
+ bch2_dev_stripe_increment(ca, &j->wp.stripe);
+
+ bch2_bkey_append_ptr(&w->key,
+ (struct bch_extent_ptr) {
+ .offset = bucket_to_sector(ca,
+ ja->buckets[ja->cur_idx]) +
+ ca->mi.bucket_size -
+ ja->sectors_free,
+ .dev = ca->dev_idx,
+ });
+
+ ja->sectors_free -= sectors;
+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
+
+ *replicas += ca->mi.durability;
+
+ if (*replicas >= replicas_want)
+ break;
+ }
+}
+
+/**
+ * journal_write_alloc - decide where to write next journal entry
+ *
+ * @j: journal object
+ * @w: journal buf (entry to be written)
+ *
+ * Returns: 0 on success, or -EROFS on failure
+ */
+static int journal_write_alloc(struct journal *j, struct journal_buf *w)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct bch_devs_mask devs;
+ struct journal_device *ja;
+ struct bch_dev *ca;
+ struct dev_alloc_list devs_sorted;
+ unsigned sectors = vstruct_sectors(w->data, c->block_bits);
+ unsigned target = c->opts.metadata_target ?:
+ c->opts.foreground_target;
+ unsigned i, replicas = 0, replicas_want =
+ READ_ONCE(c->opts.metadata_replicas);
+
+ rcu_read_lock();
+retry:
+ devs = target_rw_devs(c, BCH_DATA_journal, target);
+
+ devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
+
+ __journal_write_alloc(j, w, &devs_sorted,
+ sectors, &replicas, replicas_want);
+
+ if (replicas >= replicas_want)
+ goto done;
+
+ for (i = 0; i < devs_sorted.nr; i++) {
+ ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
+ if (!ca)
+ continue;
+
+ ja = &ca->journal;
+
+ if (sectors > ja->sectors_free &&
+ sectors <= ca->mi.bucket_size &&
+ bch2_journal_dev_buckets_available(j, ja,
+ journal_space_discarded)) {
+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
+ ja->sectors_free = ca->mi.bucket_size;
+
+ /*
+ * ja->bucket_seq[ja->cur_idx] must always have
+ * something sensible:
+ */
+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
+ }
+ }
+
+ __journal_write_alloc(j, w, &devs_sorted,
+ sectors, &replicas, replicas_want);
+
+ if (replicas < replicas_want && target) {
+ /* Retry from all devices: */
+ target = 0;
+ goto retry;
+ }
+done:
+ rcu_read_unlock();
+
+ BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
+
+ return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
+}
+
+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ /* we aren't holding j->lock: */
+ unsigned new_size = READ_ONCE(j->buf_size_want);
+ void *new_buf;
+
+ if (buf->buf_size >= new_size)
+ return;
+
+ size_t btree_write_buffer_size = new_size / 64;
+
+ if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size))
+ return;
+
+ new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
+ if (!new_buf)
+ return;
+
+ memcpy(new_buf, buf->data, buf->buf_size);
+
+ spin_lock(&j->lock);
+ swap(buf->data, new_buf);
+ swap(buf->buf_size, new_size);
+ spin_unlock(&j->lock);
+
+ kvpfree(new_buf, new_size);
+}
+
+static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
+{
+ return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK);
+}
+
+static CLOSURE_CALLBACK(journal_write_done)
+{
+ closure_type(j, struct journal, io);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_buf *w = journal_last_unwritten_buf(j);
+ struct bch_replicas_padded replicas;
+ union journal_res_state old, new;
+ u64 v, seq;
+ int err = 0;
+
+ bch2_time_stats_update(!JSET_NO_FLUSH(w->data)
+ ? j->flush_write_time
+ : j->noflush_write_time, j->write_start_time);
+
+ if (!w->devs_written.nr) {
+ bch_err(c, "unable to write journal to sufficient devices");
+ err = -EIO;
+ } else {
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+ w->devs_written);
+ if (bch2_mark_replicas(c, &replicas.e))
+ err = -EIO;
+ }
+
+ if (err)
+ bch2_fatal_error(c);
+
+ spin_lock(&j->lock);
+ seq = le64_to_cpu(w->data->seq);
+
+ if (seq >= j->pin.front)
+ journal_seq_pin(j, seq)->devs = w->devs_written;
+
+ if (!err) {
+ if (!JSET_NO_FLUSH(w->data)) {
+ j->flushed_seq_ondisk = seq;
+ j->last_seq_ondisk = w->last_seq;
+
+ bch2_do_discards(c);
+ closure_wake_up(&c->freelist_wait);
+
+ bch2_reset_alloc_cursors(c);
+ }
+ } else if (!j->err_seq || seq < j->err_seq)
+ j->err_seq = seq;
+
+ j->seq_ondisk = seq;
+
+ /*
+ * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
+ * more buckets:
+ *
+ * Must come before signaling write completion, for
+ * bch2_fs_journal_stop():
+ */
+ if (j->watermark != BCH_WATERMARK_stripe)
+ journal_reclaim_kick(&c->journal);
+
+ /* also must come before signalling write completion: */
+ closure_debug_destroy(cl);
+
+ v = atomic64_read(&j->reservations.counter);
+ do {
+ old.v = new.v = v;
+ BUG_ON(journal_state_count(new, new.unwritten_idx));
+
+ new.unwritten_idx++;
+ } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+ old.v, new.v)) != old.v);
+
+ bch2_journal_reclaim_fast(j);
+ bch2_journal_space_available(j);
+
+ track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
+ &j->max_in_flight_start, false);
+
+ closure_wake_up(&w->wait);
+ journal_wake(j);
+
+ if (!journal_state_count(new, new.unwritten_idx) &&
+ journal_last_unwritten_seq(j) <= journal_cur_seq(j)) {
+ spin_unlock(&j->lock);
+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
+ } else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
+ new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
+ struct journal_buf *buf = journal_cur_buf(j);
+ long delta = buf->expires - jiffies;
+
+ /*
+ * We don't close a journal entry to write it while there's
+ * previous entries still in flight - the current journal entry
+ * might want to be written now:
+ */
+
+ spin_unlock(&j->lock);
+ mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
+ } else {
+ spin_unlock(&j->lock);
+ }
+}
+
+static void journal_write_endio(struct bio *bio)
+{
+ struct bch_dev *ca = bio->bi_private;
+ struct journal *j = &ca->fs->journal;
+ struct journal_buf *w = journal_last_unwritten_buf(j);
+ unsigned long flags;
+
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
+ "error writing journal entry %llu: %s",
+ le64_to_cpu(w->data->seq),
+ bch2_blk_status_to_str(bio->bi_status)) ||
+ bch2_meta_write_fault("journal")) {
+ spin_lock_irqsave(&j->err_lock, flags);
+ bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
+ spin_unlock_irqrestore(&j->err_lock, flags);
+ }
+
+ closure_put(&j->io);
+ percpu_ref_put(&ca->io_ref);
+}
+
+static CLOSURE_CALLBACK(do_journal_write)
+{
+ closure_type(j, struct journal, io);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct bch_dev *ca;
+ struct journal_buf *w = journal_last_unwritten_buf(j);
+ struct bio *bio;
+ unsigned sectors = vstruct_sectors(w->data, c->block_bits);
+
+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+ ca = bch_dev_bkey_exists(c, ptr->dev);
+ if (!percpu_ref_tryget(&ca->io_ref)) {
+ /* XXX: fix this */
+ bch_err(c, "missing device for journal write\n");
+ continue;
+ }
+
+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
+ sectors);
+
+ bio = ca->journal.bio;
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
+ bio->bi_iter.bi_sector = ptr->offset;
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+
+ BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector);
+ ca->prev_journal_sector = bio->bi_iter.bi_sector;
+
+ if (!JSET_NO_FLUSH(w->data))
+ bio->bi_opf |= REQ_FUA;
+ if (!JSET_NO_FLUSH(w->data) && !w->separate_flush)
+ bio->bi_opf |= REQ_PREFLUSH;
+
+ bch2_bio_map(bio, w->data, sectors << 9);
+
+ trace_and_count(c, journal_write, bio);
+ closure_bio_submit(bio, cl);
+
+ ca->journal.bucket_seq[ca->journal.cur_idx] =
+ le64_to_cpu(w->data->seq);
+ }
+
+ continue_at(cl, journal_write_done, c->io_complete_wq);
+}
+
+static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct jset_entry *start, *end;
+ struct jset *jset = w->data;
+ struct journal_keys_to_wb wb = { NULL };
+ unsigned sectors, bytes, u64s;
+ unsigned long btree_roots_have = 0;
+ bool validate_before_checksum = false;
+ u64 seq = le64_to_cpu(jset->seq);
+ int ret;
+
+ /*
+ * Simple compaction, dropping empty jset_entries (from journal
+ * reservations that weren't fully used) and merging jset_entries that
+ * can be.
+ *
+ * If we wanted to be really fancy here, we could sort all the keys in
+ * the jset and drop keys that were overwritten - probably not worth it:
+ */
+ vstruct_for_each(jset, i) {
+ unsigned u64s = le16_to_cpu(i->u64s);
+
+ /* Empty entry: */
+ if (!u64s)
+ continue;
+
+ /*
+ * New btree roots are set by journalling them; when the journal
+ * entry gets written we have to propagate them to
+ * c->btree_roots
+ *
+ * But, every journal entry we write has to contain all the
+ * btree roots (at least for now); so after we copy btree roots
+ * to c->btree_roots we have to get any missing btree roots and
+ * add them to this journal entry:
+ */
+ switch (i->type) {
+ case BCH_JSET_ENTRY_btree_root:
+ bch2_journal_entry_to_btree_root(c, i);
+ __set_bit(i->btree_id, &btree_roots_have);
+ break;
+ case BCH_JSET_ENTRY_write_buffer_keys:
+ EBUG_ON(!w->need_flush_to_write_buffer);
+
+ if (!wb.wb)
+ bch2_journal_keys_to_write_buffer_start(c, &wb, seq);
+
+ struct bkey_i *k;
+ jset_entry_for_each_key(i, k) {
+ ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
+ if (ret) {
+ bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
+ bch2_journal_keys_to_write_buffer_end(c, &wb);
+ return ret;
+ }
+ }
+ i->type = BCH_JSET_ENTRY_btree_keys;
+ break;
+ }
+ }
+
+ if (wb.wb)
+ bch2_journal_keys_to_write_buffer_end(c, &wb);
+ w->need_flush_to_write_buffer = false;
+
+ start = end = vstruct_last(jset);
+
+ end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
+
+ bch2_journal_super_entries_add_common(c, &end, seq);
+ u64s = (u64 *) end - (u64 *) start;
+ BUG_ON(u64s > j->entry_u64s_reserved);
+
+ le32_add_cpu(&jset->u64s, u64s);
+
+ sectors = vstruct_sectors(jset, c->block_bits);
+ bytes = vstruct_bytes(jset);
+
+ if (sectors > w->sectors) {
+ bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
+ vstruct_bytes(jset), w->sectors << 9,
+ u64s, w->u64s_reserved, j->entry_u64s_reserved);
+ return -EINVAL;
+ }
+
+ jset->magic = cpu_to_le64(jset_magic(c));
+ jset->version = cpu_to_le32(c->sb.version);
+
+ SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
+ SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
+
+ if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset))
+ j->last_empty_seq = seq;
+
+ if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
+ validate_before_checksum = true;
+
+ if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
+ validate_before_checksum = true;
+
+ if (validate_before_checksum &&
+ (ret = jset_validate(c, NULL, jset, 0, WRITE)))
+ return ret;
+
+ ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
+ jset->encrypted_start,
+ vstruct_end(jset) - (void *) jset->encrypted_start);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "error decrypting journal entry: %i", ret))
+ return ret;
+
+ jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
+ journal_nonce(jset), jset);
+
+ if (!validate_before_checksum &&
+ (ret = jset_validate(c, NULL, jset, 0, WRITE)))
+ return ret;
+
+ memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
+ return 0;
+}
+
+static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ int error = bch2_journal_error(j);
+
+ /*
+ * If the journal is in an error state - we did an emergency shutdown -
+ * we prefer to continue doing journal writes. We just mark them as
+ * noflush so they'll never be used, but they'll still be visible by the
+ * list_journal tool - this helps in debugging.
+ *
+ * There's a caveat: the first journal write after marking the
+ * superblock dirty must always be a flush write, because on startup
+ * from a clean shutdown we didn't necessarily read the journal and the
+ * new journal write might overwrite whatever was in the journal
+ * previously - we can't leave the journal without any flush writes in
+ * it.
+ *
+ * So if we're in an error state, and we're still starting up, we don't
+ * write anything at all.
+ */
+ if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags))
+ return -EIO;
+
+ if (error ||
+ w->noflush ||
+ (!w->must_flush &&
+ (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
+ test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
+ w->noflush = true;
+ SET_JSET_NO_FLUSH(w->data, true);
+ w->data->last_seq = 0;
+ w->last_seq = 0;
+
+ j->nr_noflush_writes++;
+ } else {
+ j->last_flush_write = jiffies;
+ j->nr_flush_writes++;
+ clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
+ }
+
+ return 0;
+}
+
+CLOSURE_CALLBACK(bch2_journal_write)
+{
+ closure_type(j, struct journal, io);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_buf *w = journal_last_unwritten_buf(j);
+ struct bch_replicas_padded replicas;
+ struct bio *bio;
+ struct printbuf journal_debug_buf = PRINTBUF;
+ unsigned nr_rw_members = 0;
+ int ret;
+
+ BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+
+ j->write_start_time = local_clock();
+
+ spin_lock(&j->lock);
+ ret = bch2_journal_write_pick_flush(j, w);
+ spin_unlock(&j->lock);
+ if (ret)
+ goto err;
+
+ mutex_lock(&j->buf_lock);
+ journal_buf_realloc(j, w);
+
+ ret = bch2_journal_write_prep(j, w);
+ mutex_unlock(&j->buf_lock);
+ if (ret)
+ goto err;
+
+ j->entry_bytes_written += vstruct_bytes(w->data);
+
+ while (1) {
+ spin_lock(&j->lock);
+ ret = journal_write_alloc(j, w);
+ if (!ret || !j->can_discard)
+ break;
+
+ spin_unlock(&j->lock);
+ bch2_journal_do_discards(j);
+ }
+
+ if (ret) {
+ __bch2_journal_debug_to_text(&journal_debug_buf, j);
+ spin_unlock(&j->lock);
+ bch_err(c, "Unable to allocate journal write:\n%s",
+ journal_debug_buf.buf);
+ printbuf_exit(&journal_debug_buf);
+ goto err;
+ }
+
+ /*
+ * write is allocated, no longer need to account for it in
+ * bch2_journal_space_available():
+ */
+ w->sectors = 0;
+
+ /*
+ * journal entry has been compacted and allocated, recalculate space
+ * available:
+ */
+ bch2_journal_space_available(j);
+ spin_unlock(&j->lock);
+
+ w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
+
+ if (c->opts.nochanges)
+ goto no_io;
+
+ for_each_rw_member(c, ca)
+ nr_rw_members++;
+
+ if (nr_rw_members > 1)
+ w->separate_flush = true;
+
+ /*
+ * Mark journal replicas before we submit the write to guarantee
+ * recovery will find the journal entries after a crash.
+ */
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+ w->devs_written);
+ ret = bch2_mark_replicas(c, &replicas.e);
+ if (ret)
+ goto err;
+
+ if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
+ for_each_rw_member(c, ca) {
+ percpu_ref_get(&ca->io_ref);
+
+ bio = ca->journal.bio;
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+ closure_bio_submit(bio, cl);
+ }
+ }
+
+ continue_at(cl, do_journal_write, c->io_complete_wq);
+ return;
+no_io:
+ continue_at(cl, journal_write_done, c->io_complete_wq);
+ return;
+err:
+ bch2_fatal_error(c);
+ continue_at(cl, journal_write_done, c->io_complete_wq);
+}
diff --git a/c_src/libbcachefs/journal_io.h b/c_src/libbcachefs/journal_io.h
new file mode 100644
index 00000000..c035e7c1
--- /dev/null
+++ b/c_src/libbcachefs/journal_io.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_IO_H
+#define _BCACHEFS_JOURNAL_IO_H
+
+/*
+ * Only used for holding the journal entries we read in btree_journal_read()
+ * during cache_registration
+ */
+struct journal_replay {
+ struct journal_ptr {
+ bool csum_good;
+ u8 dev;
+ u32 bucket;
+ u32 bucket_offset;
+ u64 sector;
+ } ptrs[BCH_REPLICAS_MAX];
+ unsigned nr_ptrs;
+
+ bool csum_good;
+ bool ignore;
+ /* must be last: */
+ struct jset j;
+};
+
+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
+ struct jset_entry *entry, unsigned type)
+{
+ while (entry < vstruct_last(jset)) {
+ if (entry->type == type)
+ return entry;
+
+ entry = vstruct_next(entry);
+ }
+
+ return NULL;
+}
+
+#define for_each_jset_entry_type(entry, jset, type) \
+ for (entry = (jset)->start; \
+ (entry = __jset_entry_type_next(jset, entry, type)); \
+ entry = vstruct_next(entry))
+
+#define jset_entry_for_each_key(_e, _k) \
+ for (_k = (_e)->start; \
+ _k < vstruct_last(_e); \
+ _k = bkey_next(_k))
+
+#define for_each_jset_key(k, entry, jset) \
+ for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)\
+ jset_entry_for_each_key(entry, k)
+
+int bch2_journal_entry_validate(struct bch_fs *, struct jset *,
+ struct jset_entry *, unsigned, int,
+ enum bkey_invalid_flags);
+void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
+ struct jset_entry *);
+
+void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
+ struct journal_replay *);
+
+int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
+
+CLOSURE_CALLBACK(bch2_journal_write);
+
+#endif /* _BCACHEFS_JOURNAL_IO_H */
diff --git a/c_src/libbcachefs/journal_reclaim.c b/c_src/libbcachefs/journal_reclaim.c
new file mode 100644
index 00000000..820d25e1
--- /dev/null
+++ b/c_src/libbcachefs/journal_reclaim.c
@@ -0,0 +1,905 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "errcode.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "replicas.h"
+#include "sb-members.h"
+#include "trace.h"
+
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+
+/* Free space calculations: */
+
+static unsigned journal_space_from(struct journal_device *ja,
+ enum journal_space_from from)
+{
+ switch (from) {
+ case journal_space_discarded:
+ return ja->discard_idx;
+ case journal_space_clean_ondisk:
+ return ja->dirty_idx_ondisk;
+ case journal_space_clean:
+ return ja->dirty_idx;
+ default:
+ BUG();
+ }
+}
+
+unsigned bch2_journal_dev_buckets_available(struct journal *j,
+ struct journal_device *ja,
+ enum journal_space_from from)
+{
+ unsigned available = (journal_space_from(ja, from) -
+ ja->cur_idx - 1 + ja->nr) % ja->nr;
+
+ /*
+ * Don't use the last bucket unless writing the new last_seq
+ * will make another bucket available:
+ */
+ if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
+ --available;
+
+ return available;
+}
+
+void bch2_journal_set_watermark(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ bool low_on_space = j->space[journal_space_clean].total * 4 <=
+ j->space[journal_space_total].total;
+ bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
+ bool low_on_wb = bch2_btree_write_buffer_must_wait(c);
+ unsigned watermark = low_on_space || low_on_pin || low_on_wb
+ ? BCH_WATERMARK_reclaim
+ : BCH_WATERMARK_stripe;
+
+ if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
+ &j->low_on_space_start, low_on_space) ||
+ track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
+ &j->low_on_pin_start, low_on_pin) ||
+ track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
+ &j->write_buffer_full_start, low_on_wb))
+ trace_and_count(c, journal_full, c);
+
+ swap(watermark, j->watermark);
+ if (watermark > j->watermark)
+ journal_wake(j);
+}
+
+static struct journal_space
+journal_dev_space_available(struct journal *j, struct bch_dev *ca,
+ enum journal_space_from from)
+{
+ struct journal_device *ja = &ca->journal;
+ unsigned sectors, buckets, unwritten;
+ u64 seq;
+
+ if (from == journal_space_total)
+ return (struct journal_space) {
+ .next_entry = ca->mi.bucket_size,
+ .total = ca->mi.bucket_size * ja->nr,
+ };
+
+ buckets = bch2_journal_dev_buckets_available(j, ja, from);
+ sectors = ja->sectors_free;
+
+ /*
+ * We that we don't allocate the space for a journal entry
+ * until we write it out - thus, account for it here:
+ */
+ for (seq = journal_last_unwritten_seq(j);
+ seq <= journal_cur_seq(j);
+ seq++) {
+ unwritten = j->buf[seq & JOURNAL_BUF_MASK].sectors;
+
+ if (!unwritten)
+ continue;
+
+ /* entry won't fit on this device, skip: */
+ if (unwritten > ca->mi.bucket_size)
+ continue;
+
+ if (unwritten >= sectors) {
+ if (!buckets) {
+ sectors = 0;
+ break;
+ }
+
+ buckets--;
+ sectors = ca->mi.bucket_size;
+ }
+
+ sectors -= unwritten;
+ }
+
+ if (sectors < ca->mi.bucket_size && buckets) {
+ buckets--;
+ sectors = ca->mi.bucket_size;
+ }
+
+ return (struct journal_space) {
+ .next_entry = sectors,
+ .total = sectors + buckets * ca->mi.bucket_size,
+ };
+}
+
+static struct journal_space __journal_space_available(struct journal *j, unsigned nr_devs_want,
+ enum journal_space_from from)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ unsigned pos, nr_devs = 0;
+ struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX];
+
+ BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
+ if (!ca->journal.nr)
+ continue;
+
+ space = journal_dev_space_available(j, ca, from);
+ if (!space.next_entry)
+ continue;
+
+ for (pos = 0; pos < nr_devs; pos++)
+ if (space.total > dev_space[pos].total)
+ break;
+
+ array_insert_item(dev_space, nr_devs, pos, space);
+ }
+ rcu_read_unlock();
+
+ if (nr_devs < nr_devs_want)
+ return (struct journal_space) { 0, 0 };
+
+ /*
+ * We sorted largest to smallest, and we want the smallest out of the
+ * @nr_devs_want largest devices:
+ */
+ return dev_space[nr_devs_want - 1];
+}
+
+void bch2_journal_space_available(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ unsigned clean, clean_ondisk, total;
+ unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
+ j->buf[1].buf_size >> 9);
+ unsigned nr_online = 0, nr_devs_want;
+ bool can_discard = false;
+ int ret = 0;
+
+ lockdep_assert_held(&j->lock);
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
+ struct journal_device *ja = &ca->journal;
+
+ if (!ja->nr)
+ continue;
+
+ while (ja->dirty_idx != ja->cur_idx &&
+ ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+
+ while (ja->dirty_idx_ondisk != ja->dirty_idx &&
+ ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+
+ if (ja->discard_idx != ja->dirty_idx_ondisk)
+ can_discard = true;
+
+ max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size);
+ nr_online++;
+ }
+ rcu_read_unlock();
+
+ j->can_discard = can_discard;
+
+ if (nr_online < c->opts.metadata_replicas_required) {
+ ret = JOURNAL_ERR_insufficient_devices;
+ goto out;
+ }
+
+ nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
+
+ for (unsigned i = 0; i < journal_space_nr; i++)
+ j->space[i] = __journal_space_available(j, nr_devs_want, i);
+
+ clean_ondisk = j->space[journal_space_clean_ondisk].total;
+ clean = j->space[journal_space_clean].total;
+ total = j->space[journal_space_total].total;
+
+ if (!j->space[journal_space_discarded].next_entry)
+ ret = JOURNAL_ERR_journal_full;
+
+ if ((j->space[journal_space_clean_ondisk].next_entry <
+ j->space[journal_space_clean_ondisk].total) &&
+ (clean - clean_ondisk <= total / 8) &&
+ (clean_ondisk * 2 > clean))
+ set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
+ else
+ clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
+
+ bch2_journal_set_watermark(j);
+out:
+ j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
+ j->cur_entry_error = ret;
+
+ if (!ret)
+ journal_wake(j);
+}
+
+/* Discards - last part of journal reclaim: */
+
+static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
+{
+ bool ret;
+
+ spin_lock(&j->lock);
+ ret = ja->discard_idx != ja->dirty_idx_ondisk;
+ spin_unlock(&j->lock);
+
+ return ret;
+}
+
+/*
+ * Advance ja->discard_idx as long as it points to buckets that are no longer
+ * dirty, issuing discards if necessary:
+ */
+void bch2_journal_do_discards(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ mutex_lock(&j->discard_lock);
+
+ for_each_rw_member(c, ca) {
+ struct journal_device *ja = &ca->journal;
+
+ while (should_discard_bucket(j, ja)) {
+ if (!c->opts.nochanges &&
+ ca->mi.discard &&
+ bdev_max_discard_sectors(ca->disk_sb.bdev))
+ blkdev_issue_discard(ca->disk_sb.bdev,
+ bucket_to_sector(ca,
+ ja->buckets[ja->discard_idx]),
+ ca->mi.bucket_size, GFP_NOFS);
+
+ spin_lock(&j->lock);
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+
+ bch2_journal_space_available(j);
+ spin_unlock(&j->lock);
+ }
+ }
+
+ mutex_unlock(&j->discard_lock);
+}
+
+/*
+ * Journal entry pinning - machinery for holding a reference on a given journal
+ * entry, holding it open to ensure it gets replayed during recovery:
+ */
+
+void bch2_journal_reclaim_fast(struct journal *j)
+{
+ bool popped = false;
+
+ lockdep_assert_held(&j->lock);
+
+ /*
+ * Unpin journal entries whose reference counts reached zero, meaning
+ * all btree nodes got written out
+ */
+ while (!fifo_empty(&j->pin) &&
+ j->pin.front <= j->seq_ondisk &&
+ !atomic_read(&fifo_peek_front(&j->pin).count)) {
+ j->pin.front++;
+ popped = true;
+ }
+
+ if (popped)
+ bch2_journal_space_available(j);
+}
+
+bool __bch2_journal_pin_put(struct journal *j, u64 seq)
+{
+ struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
+
+ return atomic_dec_and_test(&pin_list->count);
+}
+
+void bch2_journal_pin_put(struct journal *j, u64 seq)
+{
+ if (__bch2_journal_pin_put(j, seq)) {
+ spin_lock(&j->lock);
+ bch2_journal_reclaim_fast(j);
+ spin_unlock(&j->lock);
+ }
+}
+
+static inline bool __journal_pin_drop(struct journal *j,
+ struct journal_entry_pin *pin)
+{
+ struct journal_entry_pin_list *pin_list;
+
+ if (!journal_pin_active(pin))
+ return false;
+
+ if (j->flush_in_progress == pin)
+ j->flush_in_progress_dropped = true;
+
+ pin_list = journal_seq_pin(j, pin->seq);
+ pin->seq = 0;
+ list_del_init(&pin->list);
+
+ /*
+ * Unpinning a journal entry may make journal_next_bucket() succeed, if
+ * writing a new last_seq will now make another bucket available:
+ */
+ return atomic_dec_and_test(&pin_list->count) &&
+ pin_list == &fifo_peek_front(&j->pin);
+}
+
+void bch2_journal_pin_drop(struct journal *j,
+ struct journal_entry_pin *pin)
+{
+ spin_lock(&j->lock);
+ if (__journal_pin_drop(j, pin))
+ bch2_journal_reclaim_fast(j);
+ spin_unlock(&j->lock);
+}
+
+static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
+{
+ if (fn == bch2_btree_node_flush0 ||
+ fn == bch2_btree_node_flush1)
+ return JOURNAL_PIN_btree;
+ else if (fn == bch2_btree_key_cache_journal_flush)
+ return JOURNAL_PIN_key_cache;
+ else
+ return JOURNAL_PIN_other;
+}
+
+static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq,
+ struct journal_entry_pin *pin,
+ journal_pin_flush_fn flush_fn,
+ enum journal_pin_type type)
+{
+ struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
+
+ /*
+ * flush_fn is how we identify journal pins in debugfs, so must always
+ * exist, even if it doesn't do anything:
+ */
+ BUG_ON(!flush_fn);
+
+ atomic_inc(&pin_list->count);
+ pin->seq = seq;
+ pin->flush = flush_fn;
+ list_add(&pin->list, &pin_list->list[type]);
+}
+
+void bch2_journal_pin_copy(struct journal *j,
+ struct journal_entry_pin *dst,
+ struct journal_entry_pin *src,
+ journal_pin_flush_fn flush_fn)
+{
+ bool reclaim;
+
+ spin_lock(&j->lock);
+
+ u64 seq = READ_ONCE(src->seq);
+
+ if (seq < journal_last_seq(j)) {
+ /*
+ * bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on
+ * the src pin - with the pin dropped, the entry to pin might no
+ * longer to exist, but that means there's no longer anything to
+ * copy and we can bail out here:
+ */
+ spin_unlock(&j->lock);
+ return;
+ }
+
+ reclaim = __journal_pin_drop(j, dst);
+
+ bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
+
+ if (reclaim)
+ bch2_journal_reclaim_fast(j);
+ spin_unlock(&j->lock);
+
+ /*
+ * If the journal is currently full, we might want to call flush_fn
+ * immediately:
+ */
+ journal_wake(j);
+}
+
+void bch2_journal_pin_set(struct journal *j, u64 seq,
+ struct journal_entry_pin *pin,
+ journal_pin_flush_fn flush_fn)
+{
+ bool reclaim;
+
+ spin_lock(&j->lock);
+
+ BUG_ON(seq < journal_last_seq(j));
+
+ reclaim = __journal_pin_drop(j, pin);
+
+ bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
+
+ if (reclaim)
+ bch2_journal_reclaim_fast(j);
+ spin_unlock(&j->lock);
+
+ /*
+ * If the journal is currently full, we might want to call flush_fn
+ * immediately:
+ */
+ journal_wake(j);
+}
+
+/**
+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running
+ * @j: journal object
+ * @pin: pin to flush
+ */
+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
+{
+ BUG_ON(journal_pin_active(pin));
+
+ wait_event(j->pin_flush_wait, j->flush_in_progress != pin);
+}
+
+/*
+ * Journal reclaim: flush references to open journal entries to reclaim space in
+ * the journal
+ *
+ * May be done by the journal code in the background as needed to free up space
+ * for more journal entries, or as part of doing a clean shutdown, or to migrate
+ * data off of a specific device:
+ */
+
+static struct journal_entry_pin *
+journal_get_next_pin(struct journal *j,
+ u64 seq_to_flush,
+ unsigned allowed_below_seq,
+ unsigned allowed_above_seq,
+ u64 *seq)
+{
+ struct journal_entry_pin_list *pin_list;
+ struct journal_entry_pin *ret = NULL;
+ unsigned i;
+
+ fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
+ if (*seq > seq_to_flush && !allowed_above_seq)
+ break;
+
+ for (i = 0; i < JOURNAL_PIN_NR; i++)
+ if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
+ ((1U << i) & allowed_above_seq)) {
+ ret = list_first_entry_or_null(&pin_list->list[i],
+ struct journal_entry_pin, list);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return NULL;
+}
+
+/* returns true if we did work */
+static size_t journal_flush_pins(struct journal *j,
+ u64 seq_to_flush,
+ unsigned allowed_below_seq,
+ unsigned allowed_above_seq,
+ unsigned min_any,
+ unsigned min_key_cache)
+{
+ struct journal_entry_pin *pin;
+ size_t nr_flushed = 0;
+ journal_pin_flush_fn flush_fn;
+ u64 seq;
+ int err;
+
+ lockdep_assert_held(&j->reclaim_lock);
+
+ while (1) {
+ unsigned allowed_above = allowed_above_seq;
+ unsigned allowed_below = allowed_below_seq;
+
+ if (min_any) {
+ allowed_above |= ~0;
+ allowed_below |= ~0;
+ }
+
+ if (min_key_cache) {
+ allowed_above |= 1U << JOURNAL_PIN_key_cache;
+ allowed_below |= 1U << JOURNAL_PIN_key_cache;
+ }
+
+ cond_resched();
+
+ j->last_flushed = jiffies;
+
+ spin_lock(&j->lock);
+ pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
+ if (pin) {
+ BUG_ON(j->flush_in_progress);
+ j->flush_in_progress = pin;
+ j->flush_in_progress_dropped = false;
+ flush_fn = pin->flush;
+ }
+ spin_unlock(&j->lock);
+
+ if (!pin)
+ break;
+
+ if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush)
+ min_key_cache--;
+
+ if (min_any)
+ min_any--;
+
+ err = flush_fn(j, pin, seq);
+
+ spin_lock(&j->lock);
+ /* Pin might have been dropped or rearmed: */
+ if (likely(!err && !j->flush_in_progress_dropped))
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
+ j->flush_in_progress = NULL;
+ j->flush_in_progress_dropped = false;
+ spin_unlock(&j->lock);
+
+ wake_up(&j->pin_flush_wait);
+
+ if (err)
+ break;
+
+ nr_flushed++;
+ }
+
+ return nr_flushed;
+}
+
+static u64 journal_seq_to_flush(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ u64 seq_to_flush = 0;
+
+ spin_lock(&j->lock);
+
+ for_each_rw_member(c, ca) {
+ struct journal_device *ja = &ca->journal;
+ unsigned nr_buckets, bucket_to_flush;
+
+ if (!ja->nr)
+ continue;
+
+ /* Try to keep the journal at most half full: */
+ nr_buckets = ja->nr / 2;
+
+ nr_buckets = min(nr_buckets, ja->nr);
+
+ bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
+ seq_to_flush = max(seq_to_flush,
+ ja->bucket_seq[bucket_to_flush]);
+ }
+
+ /* Also flush if the pin fifo is more than half full */
+ seq_to_flush = max_t(s64, seq_to_flush,
+ (s64) journal_cur_seq(j) -
+ (j->pin.size >> 1));
+ spin_unlock(&j->lock);
+
+ return seq_to_flush;
+}
+
+/**
+ * __bch2_journal_reclaim - free up journal buckets
+ * @j: journal object
+ * @direct: direct or background reclaim?
+ * @kicked: requested to run since we last ran?
+ * Returns: 0 on success, or -EIO if the journal has been shutdown
+ *
+ * Background journal reclaim writes out btree nodes. It should be run
+ * early enough so that we never completely run out of journal buckets.
+ *
+ * High watermarks for triggering background reclaim:
+ * - FIFO has fewer than 512 entries left
+ * - fewer than 25% journal buckets free
+ *
+ * Background reclaim runs until low watermarks are reached:
+ * - FIFO has more than 1024 entries left
+ * - more than 50% journal buckets free
+ *
+ * As long as a reclaim can complete in the time it takes to fill up
+ * 512 journal entries or 25% of all journal buckets, then
+ * journal_next_bucket() should not stall.
+ */
+static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ bool kthread = (current->flags & PF_KTHREAD) != 0;
+ u64 seq_to_flush;
+ size_t min_nr, min_key_cache, nr_flushed;
+ unsigned flags;
+ int ret = 0;
+
+ /*
+ * We can't invoke memory reclaim while holding the reclaim_lock -
+ * journal reclaim is required to make progress for memory reclaim
+ * (cleaning the caches), so we can't get stuck in memory reclaim while
+ * we're holding the reclaim lock:
+ */
+ lockdep_assert_held(&j->reclaim_lock);
+ flags = memalloc_noreclaim_save();
+
+ do {
+ if (kthread && kthread_should_stop())
+ break;
+
+ if (bch2_journal_error(j)) {
+ ret = -EIO;
+ break;
+ }
+
+ bch2_journal_do_discards(j);
+
+ seq_to_flush = journal_seq_to_flush(j);
+ min_nr = 0;
+
+ /*
+ * If it's been longer than j->reclaim_delay_ms since we last flushed,
+ * make sure to flush at least one journal pin:
+ */
+ if (time_after(jiffies, j->last_flushed +
+ msecs_to_jiffies(c->opts.journal_reclaim_delay)))
+ min_nr = 1;
+
+ if (j->watermark != BCH_WATERMARK_stripe)
+ min_nr = 1;
+
+ if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
+ min_nr = 1;
+
+ min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
+
+ trace_and_count(c, journal_reclaim_start, c,
+ direct, kicked,
+ min_nr, min_key_cache,
+ atomic_read(&c->btree_cache.dirty),
+ c->btree_cache.used,
+ atomic_long_read(&c->btree_key_cache.nr_dirty),
+ atomic_long_read(&c->btree_key_cache.nr_keys));
+
+ nr_flushed = journal_flush_pins(j, seq_to_flush,
+ ~0, 0,
+ min_nr, min_key_cache);
+
+ if (direct)
+ j->nr_direct_reclaim += nr_flushed;
+ else
+ j->nr_background_reclaim += nr_flushed;
+ trace_and_count(c, journal_reclaim_finish, c, nr_flushed);
+
+ if (nr_flushed)
+ wake_up(&j->reclaim_wait);
+ } while ((min_nr || min_key_cache) && nr_flushed && !direct);
+
+ memalloc_noreclaim_restore(flags);
+
+ return ret;
+}
+
+int bch2_journal_reclaim(struct journal *j)
+{
+ return __bch2_journal_reclaim(j, true, true);
+}
+
+static int bch2_journal_reclaim_thread(void *arg)
+{
+ struct journal *j = arg;
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ unsigned long delay, now;
+ bool journal_empty;
+ int ret = 0;
+
+ set_freezable();
+
+ j->last_flushed = jiffies;
+
+ while (!ret && !kthread_should_stop()) {
+ bool kicked = j->reclaim_kicked;
+
+ j->reclaim_kicked = false;
+
+ mutex_lock(&j->reclaim_lock);
+ ret = __bch2_journal_reclaim(j, false, kicked);
+ mutex_unlock(&j->reclaim_lock);
+
+ now = jiffies;
+ delay = msecs_to_jiffies(c->opts.journal_reclaim_delay);
+ j->next_reclaim = j->last_flushed + delay;
+
+ if (!time_in_range(j->next_reclaim, now, now + delay))
+ j->next_reclaim = now + delay;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+ if (kthread_should_stop())
+ break;
+ if (j->reclaim_kicked)
+ break;
+
+ spin_lock(&j->lock);
+ journal_empty = fifo_empty(&j->pin);
+ spin_unlock(&j->lock);
+
+ if (journal_empty)
+ schedule();
+ else if (time_after(j->next_reclaim, jiffies))
+ schedule_timeout(j->next_reclaim - jiffies);
+ else
+ break;
+ }
+ __set_current_state(TASK_RUNNING);
+ }
+
+ return 0;
+}
+
+void bch2_journal_reclaim_stop(struct journal *j)
+{
+ struct task_struct *p = j->reclaim_thread;
+
+ j->reclaim_thread = NULL;
+
+ if (p) {
+ kthread_stop(p);
+ put_task_struct(p);
+ }
+}
+
+int bch2_journal_reclaim_start(struct journal *j)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct task_struct *p;
+ int ret;
+
+ if (j->reclaim_thread)
+ return 0;
+
+ p = kthread_create(bch2_journal_reclaim_thread, j,
+ "bch-reclaim/%s", c->name);
+ ret = PTR_ERR_OR_ZERO(p);
+ bch_err_msg(c, ret, "creating journal reclaim thread");
+ if (ret)
+ return ret;
+
+ get_task_struct(p);
+ j->reclaim_thread = p;
+ wake_up_process(p);
+ return 0;
+}
+
+static int journal_flush_done(struct journal *j, u64 seq_to_flush,
+ bool *did_work)
+{
+ int ret;
+
+ ret = bch2_journal_error(j);
+ if (ret)
+ return ret;
+
+ mutex_lock(&j->reclaim_lock);
+
+ if (journal_flush_pins(j, seq_to_flush,
+ (1U << JOURNAL_PIN_key_cache)|
+ (1U << JOURNAL_PIN_other), 0, 0, 0) ||
+ journal_flush_pins(j, seq_to_flush,
+ (1U << JOURNAL_PIN_btree), 0, 0, 0))
+ *did_work = true;
+
+ if (seq_to_flush > journal_cur_seq(j))
+ bch2_journal_entry_close(j);
+
+ spin_lock(&j->lock);
+ /*
+ * If journal replay hasn't completed, the unreplayed journal entries
+ * hold refs on their corresponding sequence numbers
+ */
+ ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
+ journal_last_seq(j) > seq_to_flush ||
+ !fifo_used(&j->pin);
+
+ spin_unlock(&j->lock);
+ mutex_unlock(&j->reclaim_lock);
+
+ return ret;
+}
+
+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
+{
+ /* time_stats this */
+ bool did_work = false;
+
+ if (!test_bit(JOURNAL_STARTED, &j->flags))
+ return false;
+
+ closure_wait_event(&j->async_wait,
+ journal_flush_done(j, seq_to_flush, &did_work));
+
+ return did_work;
+}
+
+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct journal_entry_pin_list *p;
+ u64 iter, seq = 0;
+ int ret = 0;
+
+ spin_lock(&j->lock);
+ fifo_for_each_entry_ptr(p, &j->pin, iter)
+ if (dev_idx >= 0
+ ? bch2_dev_list_has_dev(p->devs, dev_idx)
+ : p->devs.nr < c->opts.metadata_replicas)
+ seq = iter;
+ spin_unlock(&j->lock);
+
+ bch2_journal_flush_pins(j, seq);
+
+ ret = bch2_journal_error(j);
+ if (ret)
+ return ret;
+
+ mutex_lock(&c->replicas_gc_lock);
+ bch2_replicas_gc_start(c, 1 << BCH_DATA_journal);
+
+ /*
+ * Now that we've populated replicas_gc, write to the journal to mark
+ * active journal devices. This handles the case where the journal might
+ * be empty. Otherwise we could clear all journal replicas and
+ * temporarily put the fs into an unrecoverable state. Journal recovery
+ * expects to find devices marked for journal data on unclean mount.
+ */
+ ret = bch2_journal_meta(&c->journal);
+ if (ret)
+ goto err;
+
+ seq = 0;
+ spin_lock(&j->lock);
+ while (!ret) {
+ struct bch_replicas_padded replicas;
+
+ seq = max(seq, journal_last_seq(j));
+ if (seq >= j->pin.back)
+ break;
+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
+ journal_seq_pin(j, seq)->devs);
+ seq++;
+
+ spin_unlock(&j->lock);
+ ret = bch2_mark_replicas(c, &replicas.e);
+ spin_lock(&j->lock);
+ }
+ spin_unlock(&j->lock);
+err:
+ ret = bch2_replicas_gc_end(c, ret);
+ mutex_unlock(&c->replicas_gc_lock);
+
+ return ret;
+}
diff --git a/c_src/libbcachefs/journal_reclaim.h b/c_src/libbcachefs/journal_reclaim.h
new file mode 100644
index 00000000..ec84c334
--- /dev/null
+++ b/c_src/libbcachefs/journal_reclaim.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H
+#define _BCACHEFS_JOURNAL_RECLAIM_H
+
+#define JOURNAL_PIN (32 * 1024)
+
+static inline void journal_reclaim_kick(struct journal *j)
+{
+ struct task_struct *p = READ_ONCE(j->reclaim_thread);
+
+ j->reclaim_kicked = true;
+ if (p)
+ wake_up_process(p);
+}
+
+unsigned bch2_journal_dev_buckets_available(struct journal *,
+ struct journal_device *,
+ enum journal_space_from);
+void bch2_journal_set_watermark(struct journal *);
+void bch2_journal_space_available(struct journal *);
+
+static inline bool journal_pin_active(struct journal_entry_pin *pin)
+{
+ return pin->seq != 0;
+}
+
+static inline struct journal_entry_pin_list *
+journal_seq_pin(struct journal *j, u64 seq)
+{
+ EBUG_ON(seq < j->pin.front || seq >= j->pin.back);
+
+ return &j->pin.data[seq & j->pin.mask];
+}
+
+void bch2_journal_reclaim_fast(struct journal *);
+bool __bch2_journal_pin_put(struct journal *, u64);
+void bch2_journal_pin_put(struct journal *, u64);
+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *);
+
+void bch2_journal_pin_set(struct journal *, u64, struct journal_entry_pin *,
+ journal_pin_flush_fn);
+
+static inline void bch2_journal_pin_add(struct journal *j, u64 seq,
+ struct journal_entry_pin *pin,
+ journal_pin_flush_fn flush_fn)
+{
+ if (unlikely(!journal_pin_active(pin) || pin->seq > seq))
+ bch2_journal_pin_set(j, seq, pin, flush_fn);
+}
+
+void bch2_journal_pin_copy(struct journal *,
+ struct journal_entry_pin *,
+ struct journal_entry_pin *,
+ journal_pin_flush_fn);
+
+static inline void bch2_journal_pin_update(struct journal *j, u64 seq,
+ struct journal_entry_pin *pin,
+ journal_pin_flush_fn flush_fn)
+{
+ if (unlikely(!journal_pin_active(pin) || pin->seq < seq))
+ bch2_journal_pin_set(j, seq, pin, flush_fn);
+}
+
+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
+
+void bch2_journal_do_discards(struct journal *);
+int bch2_journal_reclaim(struct journal *);
+
+void bch2_journal_reclaim_stop(struct journal *);
+int bch2_journal_reclaim_start(struct journal *);
+
+bool bch2_journal_flush_pins(struct journal *, u64);
+
+static inline bool bch2_journal_flush_all_pins(struct journal *j)
+{
+ return bch2_journal_flush_pins(j, U64_MAX);
+}
+
+int bch2_journal_flush_device_pins(struct journal *, int);
+
+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */
diff --git a/c_src/libbcachefs/journal_sb.c b/c_src/libbcachefs/journal_sb.c
new file mode 100644
index 00000000..ae4fb8c3
--- /dev/null
+++ b/c_src/libbcachefs/journal_sb.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "journal_sb.h"
+#include "darray.h"
+
+#include <linux/sort.h>
+
+/* BCH_SB_FIELD_journal: */
+
+static int u64_cmp(const void *_l, const void *_r)
+{
+ const u64 *l = _l;
+ const u64 *r = _r;
+
+ return cmp_int(*l, *r);
+}
+
+static int bch2_sb_journal_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_journal *journal = field_to_type(f, journal);
+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
+ int ret = -BCH_ERR_invalid_sb_journal;
+ unsigned nr;
+ unsigned i;
+ u64 *b;
+
+ nr = bch2_nr_journal_buckets(journal);
+ if (!nr)
+ return 0;
+
+ b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL);
+ if (!b)
+ return -BCH_ERR_ENOMEM_sb_journal_validate;
+
+ for (i = 0; i < nr; i++)
+ b[i] = le64_to_cpu(journal->buckets[i]);
+
+ sort(b, nr, sizeof(u64), u64_cmp, NULL);
+
+ if (!b[0]) {
+ prt_printf(err, "journal bucket at sector 0");
+ goto err;
+ }
+
+ if (b[0] < le16_to_cpu(m.first_bucket)) {
+ prt_printf(err, "journal bucket %llu before first bucket %u",
+ b[0], le16_to_cpu(m.first_bucket));
+ goto err;
+ }
+
+ if (b[nr - 1] >= le64_to_cpu(m.nbuckets)) {
+ prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
+ b[nr - 1], le64_to_cpu(m.nbuckets));
+ goto err;
+ }
+
+ for (i = 0; i + 1 < nr; i++)
+ if (b[i] == b[i + 1]) {
+ prt_printf(err, "duplicate journal buckets %llu", b[i]);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ kfree(b);
+ return ret;
+}
+
+static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_journal *journal = field_to_type(f, journal);
+ unsigned i, nr = bch2_nr_journal_buckets(journal);
+
+ prt_printf(out, "Buckets: ");
+ for (i = 0; i < nr; i++)
+ prt_printf(out, " %llu", le64_to_cpu(journal->buckets[i]));
+ prt_newline(out);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_journal = {
+ .validate = bch2_sb_journal_validate,
+ .to_text = bch2_sb_journal_to_text,
+};
+
+struct u64_range {
+ u64 start;
+ u64 end;
+};
+
+static int u64_range_cmp(const void *_l, const void *_r)
+{
+ const struct u64_range *l = _l;
+ const struct u64_range *r = _r;
+
+ return cmp_int(l->start, r->start);
+}
+
+static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
+ int ret = -BCH_ERR_invalid_sb_journal;
+ unsigned nr;
+ unsigned i;
+ struct u64_range *b;
+
+ nr = bch2_sb_field_journal_v2_nr_entries(journal);
+ if (!nr)
+ return 0;
+
+ b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL);
+ if (!b)
+ return -BCH_ERR_ENOMEM_sb_journal_v2_validate;
+
+ for (i = 0; i < nr; i++) {
+ b[i].start = le64_to_cpu(journal->d[i].start);
+ b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr);
+ }
+
+ sort(b, nr, sizeof(*b), u64_range_cmp, NULL);
+
+ if (!b[0].start) {
+ prt_printf(err, "journal bucket at sector 0");
+ goto err;
+ }
+
+ if (b[0].start < le16_to_cpu(m.first_bucket)) {
+ prt_printf(err, "journal bucket %llu before first bucket %u",
+ b[0].start, le16_to_cpu(m.first_bucket));
+ goto err;
+ }
+
+ if (b[nr - 1].end > le64_to_cpu(m.nbuckets)) {
+ prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
+ b[nr - 1].end - 1, le64_to_cpu(m.nbuckets));
+ goto err;
+ }
+
+ for (i = 0; i + 1 < nr; i++) {
+ if (b[i].end > b[i + 1].start) {
+ prt_printf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
+ b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
+ goto err;
+ }
+ }
+
+ ret = 0;
+err:
+ kfree(b);
+ return ret;
+}
+
+static void bch2_sb_journal_v2_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
+ unsigned i, nr = bch2_sb_field_journal_v2_nr_entries(journal);
+
+ prt_printf(out, "Buckets: ");
+ for (i = 0; i < nr; i++)
+ prt_printf(out, " %llu-%llu",
+ le64_to_cpu(journal->d[i].start),
+ le64_to_cpu(journal->d[i].start) + le64_to_cpu(journal->d[i].nr));
+ prt_newline(out);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
+ .validate = bch2_sb_journal_v2_validate,
+ .to_text = bch2_sb_journal_v2_to_text,
+};
+
+int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
+ u64 *buckets, unsigned nr)
+{
+ struct bch_sb_field_journal_v2 *j;
+ unsigned i, dst = 0, nr_compacted = 1;
+
+ if (c)
+ lockdep_assert_held(&c->sb_lock);
+
+ if (!nr) {
+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2);
+ return 0;
+ }
+
+ for (i = 0; i + 1 < nr; i++)
+ if (buckets[i] + 1 != buckets[i + 1])
+ nr_compacted++;
+
+ j = bch2_sb_field_resize(&ca->disk_sb, journal_v2,
+ (sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
+ if (!j)
+ return -BCH_ERR_ENOSPC_sb_journal;
+
+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
+
+ j->d[dst].start = cpu_to_le64(buckets[0]);
+ j->d[dst].nr = cpu_to_le64(1);
+
+ for (i = 1; i < nr; i++) {
+ if (buckets[i] == buckets[i - 1] + 1) {
+ le64_add_cpu(&j->d[dst].nr, 1);
+ } else {
+ dst++;
+ j->d[dst].start = cpu_to_le64(buckets[i]);
+ j->d[dst].nr = cpu_to_le64(1);
+ }
+ }
+
+ BUG_ON(dst + 1 != nr_compacted);
+ return 0;
+}
diff --git a/c_src/libbcachefs/journal_sb.h b/c_src/libbcachefs/journal_sb.h
new file mode 100644
index 00000000..ba40a7e8
--- /dev/null
+++ b/c_src/libbcachefs/journal_sb.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "super-io.h"
+#include "vstructs.h"
+
+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j)
+{
+ return j
+ ? (__le64 *) vstruct_end(&j->field) - j->buckets
+ : 0;
+}
+
+static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_journal_v2 *j)
+{
+ if (!j)
+ return 0;
+
+ return (struct bch_sb_field_journal_v2_entry *) vstruct_end(&j->field) - &j->d[0];
+}
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_journal;
+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2;
+
+int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned);
diff --git a/c_src/libbcachefs/journal_seq_blacklist.c b/c_src/libbcachefs/journal_seq_blacklist.c
new file mode 100644
index 00000000..0200e299
--- /dev/null
+++ b/c_src/libbcachefs/journal_seq_blacklist.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_iter.h"
+#include "eytzinger.h"
+#include "journal_seq_blacklist.h"
+#include "super-io.h"
+
+/*
+ * journal_seq_blacklist machinery:
+ *
+ * To guarantee order of btree updates after a crash, we need to detect when a
+ * btree node entry (bset) is newer than the newest journal entry that was
+ * successfully written, and ignore it - effectively ignoring any btree updates
+ * that didn't make it into the journal.
+ *
+ * If we didn't do this, we might have two btree nodes, a and b, both with
+ * updates that weren't written to the journal yet: if b was updated after a,
+ * but b was flushed and not a - oops; on recovery we'll find that the updates
+ * to b happened, but not the updates to a that happened before it.
+ *
+ * Ignoring bsets that are newer than the newest journal entry is always safe,
+ * because everything they contain will also have been journalled - and must
+ * still be present in the journal on disk until a journal entry has been
+ * written _after_ that bset was written.
+ *
+ * To accomplish this, bsets record the newest journal sequence number they
+ * contain updates for; then, on startup, the btree code queries the journal
+ * code to ask "Is this sequence number newer than the newest journal entry? If
+ * so, ignore it."
+ *
+ * When this happens, we must blacklist that journal sequence number: the
+ * journal must not write any entries with that sequence number, and it must
+ * record that it was blacklisted so that a) on recovery we don't think we have
+ * missing journal entries and b) so that the btree code continues to ignore
+ * that bset, until that btree node is rewritten.
+ */
+
+static unsigned sb_blacklist_u64s(unsigned nr)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl;
+
+ return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
+}
+
+static struct bch_sb_field_journal_seq_blacklist *
+blacklist_entry_try_merge(struct bch_fs *c,
+ struct bch_sb_field_journal_seq_blacklist *bl,
+ unsigned i)
+{
+ unsigned nr = blacklist_nr_entries(bl);
+
+ if (le64_to_cpu(bl->start[i].end) >=
+ le64_to_cpu(bl->start[i + 1].start)) {
+ bl->start[i].end = bl->start[i + 1].end;
+ --nr;
+ memmove(&bl->start[i],
+ &bl->start[i + 1],
+ sizeof(bl->start[0]) * (nr - i));
+
+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
+ sb_blacklist_u64s(nr));
+ BUG_ON(!bl);
+ }
+
+ return bl;
+}
+
+static bool bl_entry_contig_or_overlaps(struct journal_seq_blacklist_entry *e,
+ u64 start, u64 end)
+{
+ return !(end < le64_to_cpu(e->start) || le64_to_cpu(e->end) < start);
+}
+
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl;
+ unsigned i, nr;
+ int ret = 0;
+
+ mutex_lock(&c->sb_lock);
+ bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
+ nr = blacklist_nr_entries(bl);
+
+ for (i = 0; i < nr; i++) {
+ struct journal_seq_blacklist_entry *e =
+ bl->start + i;
+
+ if (bl_entry_contig_or_overlaps(e, start, end)) {
+ e->start = cpu_to_le64(min(start, le64_to_cpu(e->start)));
+ e->end = cpu_to_le64(max(end, le64_to_cpu(e->end)));
+
+ if (i + 1 < nr)
+ bl = blacklist_entry_try_merge(c,
+ bl, i);
+ if (i)
+ bl = blacklist_entry_try_merge(c,
+ bl, i - 1);
+ goto out_write_sb;
+ }
+ }
+
+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
+ sb_blacklist_u64s(nr + 1));
+ if (!bl) {
+ ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist;
+ goto out;
+ }
+
+ bl->start[nr].start = cpu_to_le64(start);
+ bl->start[nr].end = cpu_to_le64(end);
+out_write_sb:
+ c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
+
+ ret = bch2_write_super(c);
+out:
+ mutex_unlock(&c->sb_lock);
+
+ return ret ?: bch2_blacklist_table_initialize(c);
+}
+
+static int journal_seq_blacklist_table_cmp(const void *_l,
+ const void *_r, size_t size)
+{
+ const struct journal_seq_blacklist_table_entry *l = _l;
+ const struct journal_seq_blacklist_table_entry *r = _r;
+
+ return cmp_int(l->start, r->start);
+}
+
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
+ bool dirty)
+{
+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+ struct journal_seq_blacklist_table_entry search = { .start = seq };
+ int idx;
+
+ if (!t)
+ return false;
+
+ idx = eytzinger0_find_le(t->entries, t->nr,
+ sizeof(t->entries[0]),
+ journal_seq_blacklist_table_cmp,
+ &search);
+ if (idx < 0)
+ return false;
+
+ BUG_ON(t->entries[idx].start > seq);
+
+ if (seq >= t->entries[idx].end)
+ return false;
+
+ if (dirty)
+ t->entries[idx].dirty = true;
+ return true;
+}
+
+int bch2_blacklist_table_initialize(struct bch_fs *c)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl =
+ bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
+ struct journal_seq_blacklist_table *t;
+ unsigned i, nr = blacklist_nr_entries(bl);
+
+ if (!bl)
+ return 0;
+
+ t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
+ GFP_KERNEL);
+ if (!t)
+ return -BCH_ERR_ENOMEM_blacklist_table_init;
+
+ t->nr = nr;
+
+ for (i = 0; i < nr; i++) {
+ t->entries[i].start = le64_to_cpu(bl->start[i].start);
+ t->entries[i].end = le64_to_cpu(bl->start[i].end);
+ }
+
+ eytzinger0_sort(t->entries,
+ t->nr,
+ sizeof(t->entries[0]),
+ journal_seq_blacklist_table_cmp,
+ NULL);
+
+ kfree(c->journal_seq_blacklist_table);
+ c->journal_seq_blacklist_table = t;
+ return 0;
+}
+
+static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl =
+ field_to_type(f, journal_seq_blacklist);
+ unsigned i, nr = blacklist_nr_entries(bl);
+
+ for (i = 0; i < nr; i++) {
+ struct journal_seq_blacklist_entry *e = bl->start + i;
+
+ if (le64_to_cpu(e->start) >=
+ le64_to_cpu(e->end)) {
+ prt_printf(err, "entry %u start >= end (%llu >= %llu)",
+ i, le64_to_cpu(e->start), le64_to_cpu(e->end));
+ return -BCH_ERR_invalid_sb_journal_seq_blacklist;
+ }
+
+ if (i + 1 < nr &&
+ le64_to_cpu(e[0].end) >
+ le64_to_cpu(e[1].start)) {
+ prt_printf(err, "entry %u out of order with next entry (%llu > %llu)",
+ i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start));
+ return -BCH_ERR_invalid_sb_journal_seq_blacklist;
+ }
+ }
+
+ return 0;
+}
+
+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
+ struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_journal_seq_blacklist *bl =
+ field_to_type(f, journal_seq_blacklist);
+ struct journal_seq_blacklist_entry *i;
+ unsigned nr = blacklist_nr_entries(bl);
+
+ for (i = bl->start; i < bl->start + nr; i++) {
+ if (i != bl->start)
+ prt_printf(out, " ");
+
+ prt_printf(out, "%llu-%llu",
+ le64_to_cpu(i->start),
+ le64_to_cpu(i->end));
+ }
+ prt_newline(out);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
+ .validate = bch2_sb_journal_seq_blacklist_validate,
+ .to_text = bch2_sb_journal_seq_blacklist_to_text
+};
+
+void bch2_blacklist_entries_gc(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs,
+ journal_seq_blacklist_gc_work);
+ struct journal_seq_blacklist_table *t;
+ struct bch_sb_field_journal_seq_blacklist *bl;
+ struct journal_seq_blacklist_entry *src, *dst;
+ struct btree_trans *trans = bch2_trans_get(c);
+ unsigned i, nr, new_nr;
+ int ret;
+
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct btree_iter iter;
+ struct btree *b;
+
+ bch2_trans_node_iter_init(trans, &iter, i, POS_MIN,
+ 0, 0, BTREE_ITER_PREFETCH);
+retry:
+ bch2_trans_begin(trans);
+
+ b = bch2_btree_iter_peek_node(&iter);
+
+ while (!(ret = PTR_ERR_OR_ZERO(b)) &&
+ b &&
+ !test_bit(BCH_FS_stopping, &c->flags))
+ b = bch2_btree_iter_next_node(&iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ bch2_trans_put(trans);
+ if (ret)
+ return;
+
+ mutex_lock(&c->sb_lock);
+ bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
+ if (!bl)
+ goto out;
+
+ nr = blacklist_nr_entries(bl);
+ dst = bl->start;
+
+ t = c->journal_seq_blacklist_table;
+ BUG_ON(nr != t->nr);
+
+ for (src = bl->start, i = eytzinger0_first(t->nr);
+ src < bl->start + nr;
+ src++, i = eytzinger0_next(i, nr)) {
+ BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
+ BUG_ON(t->entries[i].end != le64_to_cpu(src->end));
+
+ if (t->entries[i].dirty)
+ *dst++ = *src;
+ }
+
+ new_nr = dst - bl->start;
+
+ bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
+
+ if (new_nr != nr) {
+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
+ new_nr ? sb_blacklist_u64s(new_nr) : 0);
+ BUG_ON(new_nr && !bl);
+
+ if (!new_nr)
+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3));
+
+ bch2_write_super(c);
+ }
+out:
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/c_src/libbcachefs/journal_seq_blacklist.h b/c_src/libbcachefs/journal_seq_blacklist.h
new file mode 100644
index 00000000..afb886ec
--- /dev/null
+++ b/c_src/libbcachefs/journal_seq_blacklist.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
+
+static inline unsigned
+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
+{
+ return bl
+ ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
+ sizeof(struct journal_seq_blacklist_entry))
+ : 0;
+}
+
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
+int bch2_blacklist_table_initialize(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
+
+void bch2_blacklist_entries_gc(struct work_struct *);
+
+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
diff --git a/c_src/libbcachefs/journal_types.h b/c_src/libbcachefs/journal_types.h
new file mode 100644
index 00000000..38817c7a
--- /dev/null
+++ b/c_src/libbcachefs/journal_types.h
@@ -0,0 +1,329 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_TYPES_H
+#define _BCACHEFS_JOURNAL_TYPES_H
+
+#include <linux/cache.h>
+#include <linux/workqueue.h>
+
+#include "alloc_types.h"
+#include "super_types.h"
+#include "fifo.h"
+
+#define JOURNAL_BUF_BITS 2
+#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
+#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
+
+/*
+ * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to
+ * the journal that are being staged or in flight.
+ */
+struct journal_buf {
+ struct jset *data;
+
+ __BKEY_PADDED(key, BCH_REPLICAS_MAX);
+ struct bch_devs_list devs_written;
+
+ struct closure_waitlist wait;
+ u64 last_seq; /* copy of data->last_seq */
+ long expires;
+ u64 flush_time;
+
+ unsigned buf_size; /* size in bytes of @data */
+ unsigned sectors; /* maximum size for current entry */
+ unsigned disk_sectors; /* maximum size entry could have been, if
+ buf_size was bigger */
+ unsigned u64s_reserved;
+ bool noflush; /* write has already been kicked off, and was noflush */
+ bool must_flush; /* something wants a flush */
+ bool separate_flush;
+ bool need_flush_to_write_buffer;
+};
+
+/*
+ * Something that makes a journal entry dirty - i.e. a btree node that has to be
+ * flushed:
+ */
+
+enum journal_pin_type {
+ JOURNAL_PIN_btree,
+ JOURNAL_PIN_key_cache,
+ JOURNAL_PIN_other,
+ JOURNAL_PIN_NR,
+};
+
+struct journal_entry_pin_list {
+ struct list_head list[JOURNAL_PIN_NR];
+ struct list_head flushed;
+ atomic_t count;
+ struct bch_devs_list devs;
+};
+
+struct journal;
+struct journal_entry_pin;
+typedef int (*journal_pin_flush_fn)(struct journal *j,
+ struct journal_entry_pin *, u64);
+
+struct journal_entry_pin {
+ struct list_head list;
+ journal_pin_flush_fn flush;
+ u64 seq;
+};
+
+struct journal_res {
+ bool ref;
+ u8 idx;
+ u16 u64s;
+ u32 offset;
+ u64 seq;
+};
+
+union journal_res_state {
+ struct {
+ atomic64_t counter;
+ };
+
+ struct {
+ u64 v;
+ };
+
+ struct {
+ u64 cur_entry_offset:20,
+ idx:2,
+ unwritten_idx:2,
+ buf0_count:10,
+ buf1_count:10,
+ buf2_count:10,
+ buf3_count:10;
+ };
+};
+
+/* bytes: */
+#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
+#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
+
+/*
+ * We stash some journal state as sentinal values in cur_entry_offset:
+ * note - cur_entry_offset is in units of u64s
+ */
+#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
+
+#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1)
+#define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX)
+
+struct journal_space {
+ /* Units of 512 bytes sectors: */
+ unsigned next_entry; /* How big the next journal entry can be */
+ unsigned total;
+};
+
+enum journal_space_from {
+ journal_space_discarded,
+ journal_space_clean_ondisk,
+ journal_space_clean,
+ journal_space_total,
+ journal_space_nr,
+};
+
+enum journal_flags {
+ JOURNAL_REPLAY_DONE,
+ JOURNAL_STARTED,
+ JOURNAL_MAY_SKIP_FLUSH,
+ JOURNAL_NEED_FLUSH_WRITE,
+};
+
+/* Reasons we may fail to get a journal reservation: */
+#define JOURNAL_ERRORS() \
+ x(ok) \
+ x(blocked) \
+ x(max_in_flight) \
+ x(journal_full) \
+ x(journal_pin_full) \
+ x(journal_stuck) \
+ x(insufficient_devices)
+
+enum journal_errors {
+#define x(n) JOURNAL_ERR_##n,
+ JOURNAL_ERRORS()
+#undef x
+};
+
+typedef DARRAY(u64) darray_u64;
+
+/* Embedded in struct bch_fs */
+struct journal {
+ /* Fastpath stuff up front: */
+ struct {
+
+ union journal_res_state reservations;
+ enum bch_watermark watermark;
+
+ } __aligned(SMP_CACHE_BYTES);
+
+ unsigned long flags;
+
+ /* Max size of current journal entry */
+ unsigned cur_entry_u64s;
+ unsigned cur_entry_sectors;
+
+ /* Reserved space in journal entry to be used just prior to write */
+ unsigned entry_u64s_reserved;
+
+
+ /*
+ * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
+ * insufficient devices:
+ */
+ enum journal_errors cur_entry_error;
+
+ unsigned buf_size_want;
+ /*
+ * We may queue up some things to be journalled (log messages) before
+ * the journal has actually started - stash them here:
+ */
+ darray_u64 early_journal_entries;
+
+ /*
+ * Protects journal_buf->data, when accessing without a jorunal
+ * reservation: for synchronization between the btree write buffer code
+ * and the journal write path:
+ */
+ struct mutex buf_lock;
+ /*
+ * Two journal entries -- one is currently open for new entries, the
+ * other is possibly being written out.
+ */
+ struct journal_buf buf[JOURNAL_BUF_NR];
+
+ spinlock_t lock;
+
+ /* if nonzero, we may not open a new journal entry: */
+ unsigned blocked;
+
+ /* Used when waiting because the journal was full */
+ wait_queue_head_t wait;
+ struct closure_waitlist async_wait;
+
+ struct closure io;
+ struct delayed_work write_work;
+
+ /* Sequence number of most recent journal entry (last entry in @pin) */
+ atomic64_t seq;
+
+ /* seq, last_seq from the most recent journal entry successfully written */
+ u64 seq_ondisk;
+ u64 flushed_seq_ondisk;
+ u64 last_seq_ondisk;
+ u64 err_seq;
+ u64 last_empty_seq;
+
+ /*
+ * FIFO of journal entries whose btree updates have not yet been
+ * written out.
+ *
+ * Each entry is a reference count. The position in the FIFO is the
+ * entry's sequence number relative to @seq.
+ *
+ * The journal entry itself holds a reference count, put when the
+ * journal entry is written out. Each btree node modified by the journal
+ * entry also holds a reference count, put when the btree node is
+ * written.
+ *
+ * When a reference count reaches zero, the journal entry is no longer
+ * needed. When all journal entries in the oldest journal bucket are no
+ * longer needed, the bucket can be discarded and reused.
+ */
+ struct {
+ u64 front, back, size, mask;
+ struct journal_entry_pin_list *data;
+ } pin;
+
+ struct journal_space space[journal_space_nr];
+
+ u64 replay_journal_seq;
+ u64 replay_journal_seq_end;
+
+ struct write_point wp;
+ spinlock_t err_lock;
+
+ struct mutex reclaim_lock;
+ /*
+ * Used for waiting until journal reclaim has freed up space in the
+ * journal:
+ */
+ wait_queue_head_t reclaim_wait;
+ struct task_struct *reclaim_thread;
+ bool reclaim_kicked;
+ unsigned long next_reclaim;
+ u64 nr_direct_reclaim;
+ u64 nr_background_reclaim;
+
+ unsigned long last_flushed;
+ struct journal_entry_pin *flush_in_progress;
+ bool flush_in_progress_dropped;
+ wait_queue_head_t pin_flush_wait;
+
+ /* protects advancing ja->discard_idx: */
+ struct mutex discard_lock;
+ bool can_discard;
+
+ unsigned long last_flush_write;
+
+ u64 write_start_time;
+
+ u64 nr_flush_writes;
+ u64 nr_noflush_writes;
+ u64 entry_bytes_written;
+
+ u64 low_on_space_start;
+ u64 low_on_pin_start;
+ u64 max_in_flight_start;
+ u64 write_buffer_full_start;
+
+ struct bch2_time_stats *flush_write_time;
+ struct bch2_time_stats *noflush_write_time;
+ struct bch2_time_stats *flush_seq_time;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map res_map;
+#endif
+} __aligned(SMP_CACHE_BYTES);
+
+/*
+ * Embedded in struct bch_dev. First three fields refer to the array of journal
+ * buckets, in bch_sb.
+ */
+struct journal_device {
+ /*
+ * For each journal bucket, contains the max sequence number of the
+ * journal writes it contains - so we know when a bucket can be reused.
+ */
+ u64 *bucket_seq;
+
+ unsigned sectors_free;
+
+ /*
+ * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
+ */
+ unsigned discard_idx; /* Next bucket to discard */
+ unsigned dirty_idx_ondisk;
+ unsigned dirty_idx;
+ unsigned cur_idx; /* Journal bucket we're currently writing to */
+ unsigned nr;
+
+ u64 *buckets;
+
+ /* Bio for journal reads/writes to this device */
+ struct bio *bio;
+
+ /* for bch_journal_read_device */
+ struct closure read;
+};
+
+/*
+ * journal_entry_res - reserve space in every journal entry:
+ */
+struct journal_entry_res {
+ unsigned u64s;
+};
+
+#endif /* _BCACHEFS_JOURNAL_TYPES_H */
diff --git a/c_src/libbcachefs/keylist.c b/c_src/libbcachefs/keylist.c
new file mode 100644
index 00000000..1b828bdd
--- /dev/null
+++ b/c_src/libbcachefs/keylist.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey.h"
+#include "keylist.h"
+
+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,
+ size_t nr_inline_u64s, size_t new_u64s)
+{
+ size_t oldsize = bch2_keylist_u64s(l);
+ size_t newsize = oldsize + new_u64s;
+ u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p;
+ u64 *new_keys;
+
+ newsize = roundup_pow_of_two(newsize);
+
+ if (newsize <= nr_inline_u64s ||
+ (old_buf && roundup_pow_of_two(oldsize) == newsize))
+ return 0;
+
+ new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOFS);
+ if (!new_keys)
+ return -ENOMEM;
+
+ if (!old_buf)
+ memcpy_u64s(new_keys, inline_u64s, oldsize);
+
+ l->keys_p = new_keys;
+ l->top_p = new_keys + oldsize;
+
+ return 0;
+}
+
+void bch2_keylist_pop_front(struct keylist *l)
+{
+ l->top_p -= bch2_keylist_front(l)->k.u64s;
+
+ memmove_u64s_down(l->keys,
+ bkey_next(l->keys),
+ bch2_keylist_u64s(l));
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_verify_keylist_sorted(struct keylist *l)
+{
+ for_each_keylist_key(l, k)
+ BUG_ON(bkey_next(k) != l->top &&
+ bpos_ge(k->k.p, bkey_next(k)->k.p));
+}
+#endif
diff --git a/c_src/libbcachefs/keylist.h b/c_src/libbcachefs/keylist.h
new file mode 100644
index 00000000..e687e0e9
--- /dev/null
+++ b/c_src/libbcachefs/keylist.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_KEYLIST_H
+#define _BCACHEFS_KEYLIST_H
+
+#include "keylist_types.h"
+
+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t);
+void bch2_keylist_pop_front(struct keylist *);
+
+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys)
+{
+ l->top_p = l->keys_p = inline_keys;
+}
+
+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys)
+{
+ if (l->keys_p != inline_keys)
+ kfree(l->keys_p);
+}
+
+static inline void bch2_keylist_push(struct keylist *l)
+{
+ l->top = bkey_next(l->top);
+}
+
+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k)
+{
+ bkey_copy(l->top, k);
+ bch2_keylist_push(l);
+}
+
+static inline bool bch2_keylist_empty(struct keylist *l)
+{
+ return l->top == l->keys;
+}
+
+static inline size_t bch2_keylist_u64s(struct keylist *l)
+{
+ return l->top_p - l->keys_p;
+}
+
+static inline size_t bch2_keylist_bytes(struct keylist *l)
+{
+ return bch2_keylist_u64s(l) * sizeof(u64);
+}
+
+static inline struct bkey_i *bch2_keylist_front(struct keylist *l)
+{
+ return l->keys;
+}
+
+#define for_each_keylist_key(_keylist, _k) \
+ for (struct bkey_i *_k = (_keylist)->keys; \
+ _k != (_keylist)->top; \
+ _k = bkey_next(_k))
+
+static inline u64 keylist_sectors(struct keylist *keys)
+{
+ u64 ret = 0;
+
+ for_each_keylist_key(keys, k)
+ ret += k->k.size;
+ return ret;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_verify_keylist_sorted(struct keylist *);
+#else
+static inline void bch2_verify_keylist_sorted(struct keylist *l) {}
+#endif
+
+#endif /* _BCACHEFS_KEYLIST_H */
diff --git a/c_src/libbcachefs/keylist_types.h b/c_src/libbcachefs/keylist_types.h
new file mode 100644
index 00000000..4b3ff7d8
--- /dev/null
+++ b/c_src/libbcachefs/keylist_types.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_KEYLIST_TYPES_H
+#define _BCACHEFS_KEYLIST_TYPES_H
+
+struct keylist {
+ union {
+ struct bkey_i *keys;
+ u64 *keys_p;
+ };
+ union {
+ struct bkey_i *top;
+ u64 *top_p;
+ };
+};
+
+#endif /* _BCACHEFS_KEYLIST_TYPES_H */
diff --git a/c_src/libbcachefs/logged_ops.c b/c_src/libbcachefs/logged_ops.c
new file mode 100644
index 00000000..ad598105
--- /dev/null
+++ b/c_src/libbcachefs/logged_ops.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "error.h"
+#include "io_misc.h"
+#include "logged_ops.h"
+#include "super.h"
+
+struct bch_logged_op_fn {
+ u8 type;
+ int (*resume)(struct btree_trans *, struct bkey_i *);
+};
+
+static const struct bch_logged_op_fn logged_op_fns[] = {
+#define x(n) { \
+ .type = KEY_TYPE_logged_op_##n, \
+ .resume = bch2_resume_logged_op_##n, \
+},
+ BCH_LOGGED_OPS()
+#undef x
+};
+
+static const struct bch_logged_op_fn *logged_op_fn(enum bch_bkey_type type)
+{
+ for (unsigned i = 0; i < ARRAY_SIZE(logged_op_fns); i++)
+ if (logged_op_fns[i].type == type)
+ return logged_op_fns + i;
+ return NULL;
+}
+
+static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
+ struct bkey_buf sk;
+ u32 restart_count = trans->restart_count;
+ int ret;
+
+ if (!fn)
+ return 0;
+
+ bch2_bkey_buf_init(&sk);
+ bch2_bkey_buf_reassemble(&sk, c, k);
+
+ ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
+ fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
+
+ bch2_bkey_buf_exit(&sk, c);
+ return ret;
+}
+
+int bch2_resume_logged_ops(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter,
+ BTREE_ID_logged_ops, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ resume_logged_op(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX);
+ if (ret)
+ return ret;
+
+ k->k.p = iter.pos;
+
+ ret = bch2_trans_update(trans, &iter, k, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
+{
+ return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_logged_op_start(trans, k));
+}
+
+void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
+{
+ int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0));
+ /*
+ * This needs to be a fatal error because we've left an unfinished
+ * operation in the logged ops btree.
+ *
+ * We should only ever see an error here if the filesystem has already
+ * been shut down, but make sure of that here:
+ */
+ if (ret) {
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+ bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
+ __func__, buf.buf, bch2_err_str(ret));
+ printbuf_exit(&buf);
+ }
+}
diff --git a/c_src/libbcachefs/logged_ops.h b/c_src/libbcachefs/logged_ops.h
new file mode 100644
index 00000000..4d1e786a
--- /dev/null
+++ b/c_src/libbcachefs/logged_ops.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_LOGGED_OPS_H
+#define _BCACHEFS_LOGGED_OPS_H
+
+#include "bkey.h"
+
+#define BCH_LOGGED_OPS() \
+ x(truncate) \
+ x(finsert)
+
+static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op)
+{
+ return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0);
+}
+
+int bch2_resume_logged_ops(struct bch_fs *);
+int bch2_logged_op_start(struct btree_trans *, struct bkey_i *);
+void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *);
+
+#endif /* _BCACHEFS_LOGGED_OPS_H */
diff --git a/c_src/libbcachefs/lru.c b/c_src/libbcachefs/lru.c
new file mode 100644
index 00000000..7a4ca5a2
--- /dev/null
+++ b/c_src/libbcachefs/lru.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
+#include "error.h"
+#include "lru.h"
+#include "recovery.h"
+
+/* KEY_TYPE_lru is obsolete: */
+int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err,
+ lru_entry_at_time_0,
+ "lru entry at time=0");
+fsck_err:
+ return ret;
+}
+
+void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ const struct bch_lru *lru = bkey_s_c_to_lru(k).v;
+
+ prt_printf(out, "idx %llu", le64_to_cpu(lru->idx));
+}
+
+void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
+{
+ prt_printf(out, "%llu:%llu -> %llu:%llu",
+ lru_pos_id(lru),
+ lru_pos_time(lru),
+ u64_to_bucket(lru.offset).inode,
+ u64_to_bucket(lru.offset).offset);
+}
+
+static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
+ u64 dev_bucket, u64 time, bool set)
+{
+ return time
+ ? bch2_btree_bit_mod(trans, BTREE_ID_lru,
+ lru_pos(lru_id, dev_bucket, time), set)
+ : 0;
+}
+
+int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
+{
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
+}
+
+int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
+{
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
+}
+
+int bch2_lru_change(struct btree_trans *trans,
+ u16 lru_id, u64 dev_bucket,
+ u64 old_time, u64 new_time)
+{
+ if (old_time == new_time)
+ return 0;
+
+ return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
+ bch2_lru_set(trans, lru_id, dev_bucket, new_time);
+}
+
+static const char * const bch2_lru_types[] = {
+#define x(n) #n,
+ BCH_LRU_TYPES()
+#undef x
+ NULL
+};
+
+static int bch2_check_lru_key(struct btree_trans *trans,
+ struct btree_iter *lru_iter,
+ struct bkey_s_c lru_k,
+ struct bpos *last_flushed_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+ enum bch_lru_type type = lru_type(lru_k);
+ struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
+ u64 idx;
+ int ret;
+
+ if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
+ lru_entry_to_invalid_bucket,
+ "lru key points to nonexistent device:bucket %llu:%llu",
+ alloc_pos.inode, alloc_pos.offset))
+ return bch2_btree_delete_at(trans, lru_iter, 0);
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+
+ switch (type) {
+ case BCH_LRU_read:
+ idx = alloc_lru_idx_read(*a);
+ break;
+ case BCH_LRU_fragmentation:
+ idx = a->fragmentation_lru;
+ break;
+ }
+
+ if (lru_k.k->type != KEY_TYPE_set ||
+ lru_pos_time(lru_k.k->p) != idx) {
+ if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
+ *last_flushed_pos = lru_k.k->p;
+ ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ -BCH_ERR_transaction_restart_write_buffer_flush;
+ goto out;
+ }
+
+ if (c->opts.reconstruct_alloc ||
+ fsck_err(c, lru_entry_bad,
+ "incorrect lru entry: lru %s time %llu\n"
+ " %s\n"
+ " for %s",
+ bch2_lru_types[type],
+ lru_pos_time(lru_k.k->p),
+ (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
+ (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
+ ret = bch2_btree_delete_at(trans, lru_iter, 0);
+ }
+out:
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ return ret;
+}
+
+int bch2_check_lrus(struct bch_fs *c)
+{
+ struct bpos last_flushed_pos = POS_MIN;
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
+ bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
+ bch_err_fn(c, ret);
+ return ret;
+
+}
diff --git a/c_src/libbcachefs/lru.h b/c_src/libbcachefs/lru.h
new file mode 100644
index 00000000..429dca81
--- /dev/null
+++ b/c_src/libbcachefs/lru.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_LRU_H
+#define _BCACHEFS_LRU_H
+
+#define LRU_TIME_BITS 48
+#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
+
+static inline u64 lru_pos_id(struct bpos pos)
+{
+ return pos.inode >> LRU_TIME_BITS;
+}
+
+static inline u64 lru_pos_time(struct bpos pos)
+{
+ return pos.inode & ~(~0ULL << LRU_TIME_BITS);
+}
+
+static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time)
+{
+ struct bpos pos = POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket);
+
+ EBUG_ON(time > LRU_TIME_MAX);
+ EBUG_ON(lru_pos_id(pos) != lru_id);
+ EBUG_ON(lru_pos_time(pos) != time);
+ EBUG_ON(pos.offset != dev_bucket);
+
+ return pos;
+}
+
+#define BCH_LRU_TYPES() \
+ x(read) \
+ x(fragmentation)
+
+enum bch_lru_type {
+#define x(n) BCH_LRU_##n,
+ BCH_LRU_TYPES()
+#undef x
+};
+
+#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
+
+static inline enum bch_lru_type lru_type(struct bkey_s_c l)
+{
+ u16 lru_id = l.k->p.inode >> 48;
+
+ if (lru_id == BCH_LRU_FRAGMENTATION_START)
+ return BCH_LRU_fragmentation;
+ return BCH_LRU_read;
+}
+
+int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
+
+#define bch2_bkey_ops_lru ((struct bkey_ops) { \
+ .key_invalid = bch2_lru_invalid, \
+ .val_to_text = bch2_lru_to_text, \
+ .min_val_size = 8, \
+})
+
+int bch2_lru_del(struct btree_trans *, u16, u64, u64);
+int bch2_lru_set(struct btree_trans *, u16, u64, u64);
+int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
+
+int bch2_check_lrus(struct bch_fs *);
+
+#endif /* _BCACHEFS_LRU_H */
diff --git a/c_src/libbcachefs/mean_and_variance.c b/c_src/libbcachefs/mean_and_variance.c
new file mode 100644
index 00000000..bf0ef668
--- /dev/null
+++ b/c_src/libbcachefs/mean_and_variance.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Functions for incremental mean and variance.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * Copyright © 2022 Daniel B. Hill
+ *
+ * Author: Daniel B. Hill <daniel@gluo.nz>
+ *
+ * Description:
+ *
+ * This is includes some incremental algorithms for mean and variance calculation
+ *
+ * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
+ *
+ * Create a struct and if it's the weighted variant set the w field (weight = 2^k).
+ *
+ * Use mean_and_variance[_weighted]_update() on the struct to update it's state.
+ *
+ * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation
+ * is deferred to these functions for performance reasons.
+ *
+ * see lib/math/mean_and_variance_test.c for examples of usage.
+ *
+ * DO NOT access the mean and variance fields of the weighted variants directly.
+ * DO NOT change the weight after calling update.
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+
+#include "mean_and_variance.h"
+
+u128_u u128_div(u128_u n, u64 d)
+{
+ u128_u r;
+ u64 rem;
+ u64 hi = u128_hi(n);
+ u64 lo = u128_lo(n);
+ u64 h = hi & ((u64) U32_MAX << 32);
+ u64 l = (hi & (u64) U32_MAX) << 32;
+
+ r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64);
+ r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32));
+ r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem)));
+ return r;
+}
+EXPORT_SYMBOL_GPL(u128_div);
+
+/**
+ * mean_and_variance_get_mean() - get mean from @s
+ * @s: mean and variance number of samples and their sums
+ */
+s64 mean_and_variance_get_mean(struct mean_and_variance s)
+{
+ return s.n ? div64_u64(s.sum, s.n) : 0;
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_get_mean);
+
+/**
+ * mean_and_variance_get_variance() - get variance from @s1
+ * @s1: mean and variance number of samples and sums
+ *
+ * see linked pdf equation 12.
+ */
+u64 mean_and_variance_get_variance(struct mean_and_variance s1)
+{
+ if (s1.n) {
+ u128_u s2 = u128_div(s1.sum_squares, s1.n);
+ u64 s3 = abs(mean_and_variance_get_mean(s1));
+
+ return u128_lo(u128_sub(s2, u128_square(s3)));
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_get_variance);
+
+/**
+ * mean_and_variance_get_stddev() - get standard deviation from @s
+ * @s: mean and variance number of samples and their sums
+ */
+u32 mean_and_variance_get_stddev(struct mean_and_variance s)
+{
+ return int_sqrt64(mean_and_variance_get_variance(s));
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
+
+/**
+ * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
+ * @s: mean and variance number of samples and their sums
+ * @x: new value to include in the &mean_and_variance_weighted
+ *
+ * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
+ * values are stored bitshifted for performance and added precision.
+ */
+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x)
+{
+ // previous weighted variance.
+ u8 w = s->weight;
+ u64 var_w0 = s->variance;
+ // new value weighted.
+ s64 x_w = x << w;
+ s64 diff_w = x_w - s->mean;
+ s64 diff = fast_divpow2(diff_w, w);
+ // new mean weighted.
+ s64 u_w1 = s->mean + diff;
+
+ if (!s->init) {
+ s->mean = x_w;
+ s->variance = 0;
+ } else {
+ s->mean = u_w1;
+ s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
+ }
+ s->init = true;
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
+
+/**
+ * mean_and_variance_weighted_get_mean() - get mean from @s
+ * @s: mean and variance number of samples and their sums
+ */
+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s)
+{
+ return fast_divpow2(s.mean, s.weight);
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
+
+/**
+ * mean_and_variance_weighted_get_variance() -- get variance from @s
+ * @s: mean and variance number of samples and their sums
+ */
+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s)
+{
+ // always positive don't need fast divpow2
+ return s.variance >> s.weight;
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
+
+/**
+ * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
+ * @s: mean and variance number of samples and their sums
+ */
+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s)
+{
+ return int_sqrt64(mean_and_variance_weighted_get_variance(s));
+}
+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
+
+MODULE_AUTHOR("Daniel B. Hill");
+MODULE_LICENSE("GPL");
diff --git a/c_src/libbcachefs/mean_and_variance.h b/c_src/libbcachefs/mean_and_variance.h
new file mode 100644
index 00000000..b2be565b
--- /dev/null
+++ b/c_src/libbcachefs/mean_and_variance.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MEAN_AND_VARIANCE_H_
+#define MEAN_AND_VARIANCE_H_
+
+#include <linux/types.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+#include <linux/math64.h>
+
+#define SQRT_U64_MAX 4294967295ULL
+
+/*
+ * u128_u: u128 user mode, because not all architectures support a real int128
+ * type
+ *
+ * We don't use this version in userspace, because in userspace we link with
+ * Rust and rustc has issues with u128.
+ */
+
+#if defined(__SIZEOF_INT128__) && defined(__KERNEL__)
+
+typedef struct {
+ unsigned __int128 v;
+} __aligned(16) u128_u;
+
+static inline u128_u u64_to_u128(u64 a)
+{
+ return (u128_u) { .v = a };
+}
+
+static inline u64 u128_lo(u128_u a)
+{
+ return a.v;
+}
+
+static inline u64 u128_hi(u128_u a)
+{
+ return a.v >> 64;
+}
+
+static inline u128_u u128_add(u128_u a, u128_u b)
+{
+ a.v += b.v;
+ return a;
+}
+
+static inline u128_u u128_sub(u128_u a, u128_u b)
+{
+ a.v -= b.v;
+ return a;
+}
+
+static inline u128_u u128_shl(u128_u a, s8 shift)
+{
+ a.v <<= shift;
+ return a;
+}
+
+static inline u128_u u128_square(u64 a)
+{
+ u128_u b = u64_to_u128(a);
+
+ b.v *= b.v;
+ return b;
+}
+
+#else
+
+typedef struct {
+ u64 hi, lo;
+} __aligned(16) u128_u;
+
+/* conversions */
+
+static inline u128_u u64_to_u128(u64 a)
+{
+ return (u128_u) { .lo = a };
+}
+
+static inline u64 u128_lo(u128_u a)
+{
+ return a.lo;
+}
+
+static inline u64 u128_hi(u128_u a)
+{
+ return a.hi;
+}
+
+/* arithmetic */
+
+static inline u128_u u128_add(u128_u a, u128_u b)
+{
+ u128_u c;
+
+ c.lo = a.lo + b.lo;
+ c.hi = a.hi + b.hi + (c.lo < a.lo);
+ return c;
+}
+
+static inline u128_u u128_sub(u128_u a, u128_u b)
+{
+ u128_u c;
+
+ c.lo = a.lo - b.lo;
+ c.hi = a.hi - b.hi - (c.lo > a.lo);
+ return c;
+}
+
+static inline u128_u u128_shl(u128_u i, s8 shift)
+{
+ u128_u r;
+
+ r.lo = i.lo << shift;
+ if (shift < 64)
+ r.hi = (i.hi << shift) | (i.lo >> (64 - shift));
+ else {
+ r.hi = i.lo << (shift - 64);
+ r.lo = 0;
+ }
+ return r;
+}
+
+static inline u128_u u128_square(u64 i)
+{
+ u128_u r;
+ u64 h = i >> 32, l = i & U32_MAX;
+
+ r = u128_shl(u64_to_u128(h*h), 64);
+ r = u128_add(r, u128_shl(u64_to_u128(h*l), 32));
+ r = u128_add(r, u128_shl(u64_to_u128(l*h), 32));
+ r = u128_add(r, u64_to_u128(l*l));
+ return r;
+}
+
+#endif
+
+static inline u128_u u64s_to_u128(u64 hi, u64 lo)
+{
+ u128_u c = u64_to_u128(hi);
+
+ c = u128_shl(c, 64);
+ c = u128_add(c, u64_to_u128(lo));
+ return c;
+}
+
+u128_u u128_div(u128_u n, u64 d);
+
+struct mean_and_variance {
+ s64 n;
+ s64 sum;
+ u128_u sum_squares;
+};
+
+/* expontentially weighted variant */
+struct mean_and_variance_weighted {
+ bool init;
+ u8 weight; /* base 2 logarithim */
+ s64 mean;
+ u64 variance;
+};
+
+/**
+ * fast_divpow2() - fast approximation for n / (1 << d)
+ * @n: numerator
+ * @d: the power of 2 denominator.
+ *
+ * note: this rounds towards 0.
+ */
+static inline s64 fast_divpow2(s64 n, u8 d)
+{
+ return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d;
+}
+
+/**
+ * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1
+ * and return it.
+ * @s1: the mean_and_variance to update.
+ * @v1: the new sample.
+ *
+ * see linked pdf equation 12.
+ */
+static inline void
+mean_and_variance_update(struct mean_and_variance *s, s64 v)
+{
+ s->n++;
+ s->sum += v;
+ s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v)));
+}
+
+s64 mean_and_variance_get_mean(struct mean_and_variance s);
+u64 mean_and_variance_get_variance(struct mean_and_variance s1);
+u32 mean_and_variance_get_stddev(struct mean_and_variance s);
+
+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v);
+
+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
+
+#endif // MEAN_AND_VAIRANCE_H_
diff --git a/c_src/libbcachefs/migrate.c b/c_src/libbcachefs/migrate.c
new file mode 100644
index 00000000..5623cee3
--- /dev/null
+++ b/c_src/libbcachefs/migrate.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for moving data off a device.
+ */
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "errcode.h"
+#include "extents.h"
+#include "io_write.h"
+#include "journal.h"
+#include "keylist.h"
+#include "migrate.h"
+#include "move.h"
+#include "replicas.h"
+#include "super-io.h"
+
+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
+ unsigned dev_idx, int flags, bool metadata)
+{
+ unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
+ unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
+ unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED;
+ unsigned nr_good;
+
+ bch2_bkey_drop_device(k, dev_idx);
+
+ nr_good = bch2_bkey_durability(c, k.s_c);
+ if ((!nr_good && !(flags & lost)) ||
+ (nr_good < replicas && !(flags & degraded)))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ unsigned dev_idx,
+ int flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *n;
+ int ret;
+
+ if (!bch2_bkey_has_device_c(k, dev_idx))
+ return 0;
+
+ n = bch2_bkey_make_mut(trans, iter, &k, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, false);
+ if (ret)
+ return ret;
+
+ /*
+ * If the new extent no longer has any pointers, bch2_extent_normalize()
+ * will do the appropriate thing with it (turning it into a
+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
+ */
+ bch2_extent_normalize(c, bkey_i_to_s(n));
+
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&n->k))
+ n->k.size = 0;
+ return 0;
+}
+
+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ enum btree_id id;
+ int ret = 0;
+
+ for (id = 0; id < BTREE_ID_NR; id++) {
+ if (!btree_type_has_ptrs(id))
+ continue;
+
+ ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags));
+ if (ret)
+ break;
+ }
+
+ bch2_trans_put(trans);
+
+ return ret;
+}
+
+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct closure cl;
+ struct btree *b;
+ struct bkey_buf k;
+ unsigned id;
+ int ret;
+
+ /* don't handle this yet: */
+ if (flags & BCH_FORCE_IF_METADATA_LOST)
+ return -EINVAL;
+
+ trans = bch2_trans_get(c);
+ bch2_bkey_buf_init(&k);
+ closure_init_stack(&cl);
+
+ for (id = 0; id < BTREE_ID_NR; id++) {
+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0,
+ BTREE_ITER_PREFETCH);
+retry:
+ ret = 0;
+ while (bch2_trans_begin(trans),
+ (b = bch2_btree_iter_peek_node(&iter)) &&
+ !(ret = PTR_ERR_OR_ZERO(b))) {
+ if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
+ goto next;
+
+ bch2_bkey_buf_copy(&k, c, &b->key);
+
+ ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
+ dev_idx, flags, true);
+ if (ret) {
+ bch_err(c, "Cannot drop device without losing data");
+ break;
+ }
+
+ ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ ret = 0;
+ continue;
+ }
+
+ bch_err_msg(c, ret, "updating btree node key");
+ if (ret)
+ break;
+next:
+ bch2_btree_iter_next_node(&iter);
+ }
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ goto err;
+ }
+
+ bch2_btree_interior_updates_flush(c);
+ ret = 0;
+err:
+ bch2_bkey_buf_exit(&k, c);
+ bch2_trans_put(trans);
+
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
+
+ return ret;
+}
+
+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+ return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
+ bch2_dev_metadata_drop(c, dev_idx, flags);
+}
diff --git a/c_src/libbcachefs/migrate.h b/c_src/libbcachefs/migrate.h
new file mode 100644
index 00000000..027efaa0
--- /dev/null
+++ b/c_src/libbcachefs/migrate.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MIGRATE_H
+#define _BCACHEFS_MIGRATE_H
+
+int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
+
+#endif /* _BCACHEFS_MIGRATE_H */
diff --git a/c_src/libbcachefs/move.c b/c_src/libbcachefs/move.c
new file mode 100644
index 00000000..7a33319d
--- /dev/null
+++ b/c_src/libbcachefs/move.c
@@ -0,0 +1,1161 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "backpointers.h"
+#include "bkey_buf.h"
+#include "btree_gc.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "errcode.h"
+#include "error.h"
+#include "inode.h"
+#include "io_read.h"
+#include "io_write.h"
+#include "journal_reclaim.h"
+#include "keylist.h"
+#include "move.h"
+#include "replicas.h"
+#include "snapshot.h"
+#include "super-io.h"
+#include "trace.h"
+
+#include <linux/ioprio.h>
+#include <linux/kthread.h>
+
+const char * const bch2_data_ops_strs[] = {
+#define x(t, n, ...) [n] = #t,
+ BCH_DATA_OPS()
+#undef x
+ NULL
+};
+
+static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
+{
+ if (trace_move_extent_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_move_extent(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k)
+{
+ if (trace_move_extent_read_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ trace_move_extent_read(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+}
+
+struct moving_io {
+ struct list_head read_list;
+ struct list_head io_list;
+ struct move_bucket_in_flight *b;
+ struct closure cl;
+ bool read_completed;
+
+ unsigned read_sectors;
+ unsigned write_sectors;
+
+ struct bch_read_bio rbio;
+
+ struct data_update write;
+ /* Must be last since it is variable size */
+ struct bio_vec bi_inline_vecs[];
+};
+
+static void move_free(struct moving_io *io)
+{
+ struct moving_context *ctxt = io->write.ctxt;
+
+ if (io->b)
+ atomic_dec(&io->b->count);
+
+ bch2_data_update_exit(&io->write);
+
+ mutex_lock(&ctxt->lock);
+ list_del(&io->io_list);
+ wake_up(&ctxt->wait);
+ mutex_unlock(&ctxt->lock);
+
+ kfree(io);
+}
+
+static void move_write_done(struct bch_write_op *op)
+{
+ struct moving_io *io = container_of(op, struct moving_io, write.op);
+ struct moving_context *ctxt = io->write.ctxt;
+
+ if (io->write.op.error)
+ ctxt->write_error = true;
+
+ atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors);
+ atomic_dec(&io->write.ctxt->write_ios);
+ move_free(io);
+ closure_put(&ctxt->cl);
+}
+
+static void move_write(struct moving_io *io)
+{
+ if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) {
+ move_free(io);
+ return;
+ }
+
+ closure_get(&io->write.ctxt->cl);
+ atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
+ atomic_inc(&io->write.ctxt->write_ios);
+
+ bch2_data_update_read_done(&io->write, io->rbio.pick.crc);
+}
+
+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
+{
+ struct moving_io *io =
+ list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
+
+ return io && io->read_completed ? io : NULL;
+}
+
+static void move_read_endio(struct bio *bio)
+{
+ struct moving_io *io = container_of(bio, struct moving_io, rbio.bio);
+ struct moving_context *ctxt = io->write.ctxt;
+
+ atomic_sub(io->read_sectors, &ctxt->read_sectors);
+ atomic_dec(&ctxt->read_ios);
+ io->read_completed = true;
+
+ wake_up(&ctxt->wait);
+ closure_put(&ctxt->cl);
+}
+
+void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
+{
+ struct moving_io *io;
+
+ while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
+ bch2_trans_unlock_long(ctxt->trans);
+ list_del(&io->read_list);
+ move_write(io);
+ }
+}
+
+void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
+{
+ unsigned sectors_pending = atomic_read(&ctxt->write_sectors);
+
+ move_ctxt_wait_event(ctxt,
+ !atomic_read(&ctxt->write_sectors) ||
+ atomic_read(&ctxt->write_sectors) != sectors_pending);
+}
+
+void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
+{
+ move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+ bch2_trans_unlock_long(ctxt->trans);
+ closure_sync(&ctxt->cl);
+}
+
+void bch2_moving_ctxt_exit(struct moving_context *ctxt)
+{
+ struct bch_fs *c = ctxt->trans->c;
+
+ bch2_moving_ctxt_flush_all(ctxt);
+
+ EBUG_ON(atomic_read(&ctxt->write_sectors));
+ EBUG_ON(atomic_read(&ctxt->write_ios));
+ EBUG_ON(atomic_read(&ctxt->read_sectors));
+ EBUG_ON(atomic_read(&ctxt->read_ios));
+
+ mutex_lock(&c->moving_context_lock);
+ list_del(&ctxt->list);
+ mutex_unlock(&c->moving_context_lock);
+
+ bch2_trans_put(ctxt->trans);
+ memset(ctxt, 0, sizeof(*ctxt));
+}
+
+void bch2_moving_ctxt_init(struct moving_context *ctxt,
+ struct bch_fs *c,
+ struct bch_ratelimit *rate,
+ struct bch_move_stats *stats,
+ struct write_point_specifier wp,
+ bool wait_on_copygc)
+{
+ memset(ctxt, 0, sizeof(*ctxt));
+
+ ctxt->trans = bch2_trans_get(c);
+ ctxt->fn = (void *) _RET_IP_;
+ ctxt->rate = rate;
+ ctxt->stats = stats;
+ ctxt->wp = wp;
+ ctxt->wait_on_copygc = wait_on_copygc;
+
+ closure_init_stack(&ctxt->cl);
+
+ mutex_init(&ctxt->lock);
+ INIT_LIST_HEAD(&ctxt->reads);
+ INIT_LIST_HEAD(&ctxt->ios);
+ init_waitqueue_head(&ctxt->wait);
+
+ mutex_lock(&c->moving_context_lock);
+ list_add(&ctxt->list, &c->moving_context_list);
+ mutex_unlock(&c->moving_context_lock);
+}
+
+void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c)
+{
+ trace_move_data(c, stats);
+}
+
+void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->data_type = BCH_DATA_user;
+ scnprintf(stats->name, sizeof(stats->name), "%s", name);
+}
+
+int bch2_move_extent(struct moving_context *ctxt,
+ struct move_bucket_in_flight *bucket_in_flight,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bch_io_opts io_opts,
+ struct data_update_opts data_opts)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct moving_io *io;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned sectors = k.k->size, pages;
+ int ret = -ENOMEM;
+
+ if (ctxt->stats)
+ ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
+ trace_move_extent2(c, k);
+
+ bch2_data_update_opts_normalize(k, &data_opts);
+
+ if (!data_opts.rewrite_ptrs &&
+ !data_opts.extra_replicas) {
+ if (data_opts.kill_ptrs)
+ return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+ return 0;
+ }
+
+ /*
+ * Before memory allocations & taking nocow locks in
+ * bch2_data_update_init():
+ */
+ bch2_trans_unlock(trans);
+
+ /* write path might have to decompress data: */
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
+
+ pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
+ io = kzalloc(sizeof(struct moving_io) +
+ sizeof(struct bio_vec) * pages, GFP_KERNEL);
+ if (!io)
+ goto err;
+
+ INIT_LIST_HEAD(&io->io_list);
+ io->write.ctxt = ctxt;
+ io->read_sectors = k.k->size;
+ io->write_sectors = k.k->size;
+
+ bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0);
+ bio_set_prio(&io->write.op.wbio.bio,
+ IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+
+ if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
+ GFP_KERNEL))
+ goto err_free;
+
+ io->rbio.c = c;
+ io->rbio.opts = io_opts;
+ bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0);
+ io->rbio.bio.bi_vcnt = pages;
+ bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ io->rbio.bio.bi_iter.bi_size = sectors << 9;
+
+ io->rbio.bio.bi_opf = REQ_OP_READ;
+ io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
+ io->rbio.bio.bi_end_io = move_read_endio;
+
+ ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
+ io_opts, data_opts, iter->btree_id, k);
+ if (ret)
+ goto err_free_pages;
+
+ io->write.op.end_io = move_write_done;
+
+ if (ctxt->rate)
+ bch2_ratelimit_increment(ctxt->rate, k.k->size);
+
+ if (ctxt->stats) {
+ atomic64_inc(&ctxt->stats->keys_moved);
+ atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
+ }
+
+ if (bucket_in_flight) {
+ io->b = bucket_in_flight;
+ atomic_inc(&io->b->count);
+ }
+
+ this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
+ this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size);
+ trace_move_extent_read2(c, k);
+
+ mutex_lock(&ctxt->lock);
+ atomic_add(io->read_sectors, &ctxt->read_sectors);
+ atomic_inc(&ctxt->read_ios);
+
+ list_add_tail(&io->read_list, &ctxt->reads);
+ list_add_tail(&io->io_list, &ctxt->ios);
+ mutex_unlock(&ctxt->lock);
+
+ /*
+ * dropped by move_read_endio() - guards against use after free of
+ * ctxt when doing wakeup
+ */
+ closure_get(&ctxt->cl);
+ bch2_read_extent(trans, &io->rbio,
+ bkey_start_pos(k.k),
+ iter->btree_id, k, 0,
+ BCH_READ_NODECODE|
+ BCH_READ_LAST_FRAGMENT);
+ return 0;
+err_free_pages:
+ bio_free_pages(&io->write.op.wbio.bio);
+err_free:
+ kfree(io);
+err:
+ if (ret == -BCH_ERR_data_update_done)
+ return 0;
+
+ if (bch2_err_matches(ret, EROFS) ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+
+ count_event(c, move_extent_start_fail);
+
+ if (trace_move_extent_start_fail_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, ": ");
+ prt_str(&buf, bch2_err_str(ret));
+ trace_move_extent_start_fail(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+ return ret;
+}
+
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
+ struct per_snapshot_io_opts *io_opts,
+ struct bkey_s_c extent_k)
+{
+ struct bch_fs *c = trans->c;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ if (io_opts->cur_inum != extent_k.k->p.inode) {
+ io_opts->d.nr = 0;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ if (k.k->p.offset != extent_k.k->p.inode)
+ break;
+
+ if (!bkey_is_inode(k.k))
+ continue;
+
+ struct bch_inode_unpacked inode;
+ BUG_ON(bch2_inode_unpack(k, &inode));
+
+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+ bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
+
+ darray_push(&io_opts->d, e);
+ }));
+ io_opts->cur_inum = extent_k.k->p.inode;
+ }
+
+ ret = ret ?: trans_was_restarted(trans, restart_count);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (extent_k.k->p.snapshot)
+ darray_for_each(io_opts->d, i)
+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+ return &i->io_opts;
+
+ return &io_opts->fs_io_opts;
+}
+
+int bch2_move_get_io_opts_one(struct btree_trans *trans,
+ struct bch_io_opts *io_opts,
+ struct bkey_s_c extent_k)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ /* reflink btree? */
+ if (!extent_k.k->p.inode) {
+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
+ return 0;
+ }
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+ BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+
+ if (!ret && bkey_is_inode(k.k)) {
+ struct bch_inode_unpacked inode;
+ bch2_inode_unpack(k, &inode);
+ bch2_inode_opts_get(io_opts, trans->c, &inode);
+ } else {
+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+}
+
+int bch2_move_ratelimit(struct moving_context *ctxt)
+{
+ struct bch_fs *c = ctxt->trans->c;
+ bool is_kthread = current->flags & PF_KTHREAD;
+ u64 delay;
+
+ if (ctxt->wait_on_copygc && c->copygc_running) {
+ bch2_moving_ctxt_flush_all(ctxt);
+ wait_event_killable(c->copygc_running_wq,
+ !c->copygc_running ||
+ (is_kthread && kthread_should_stop()));
+ }
+
+ do {
+ delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
+
+ if (is_kthread && kthread_should_stop())
+ return 1;
+
+ if (delay)
+ move_ctxt_wait_event_timeout(ctxt,
+ freezing(current) ||
+ (is_kthread && kthread_should_stop()),
+ delay);
+
+ if (unlikely(freezing(current))) {
+ bch2_moving_ctxt_flush_all(ctxt);
+ try_to_freeze();
+ }
+ } while (delay);
+
+ /*
+ * XXX: these limits really ought to be per device, SSDs and hard drives
+ * will want different limits
+ */
+ move_ctxt_wait_event(ctxt,
+ atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 &&
+ atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 &&
+ atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight &&
+ atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight);
+
+ return 0;
+}
+
+static int bch2_move_data_btree(struct moving_context *ctxt,
+ struct bpos start,
+ struct bpos end,
+ move_pred_fn pred, void *arg,
+ enum btree_id btree_id)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ struct per_snapshot_io_opts snapshot_io_opts;
+ struct bch_io_opts *io_opts;
+ struct bkey_buf sk;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct data_update_opts data_opts;
+ int ret = 0, ret2;
+
+ per_snapshot_io_opts_init(&snapshot_io_opts, c);
+ bch2_bkey_buf_init(&sk);
+
+ if (ctxt->stats) {
+ ctxt->stats->data_type = BCH_DATA_user;
+ ctxt->stats->pos = BBPOS(btree_id, start);
+ }
+
+ bch2_trans_iter_init(trans, &iter, btree_id, start,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS);
+
+ if (ctxt->rate)
+ bch2_ratelimit_reset(ctxt->rate);
+
+ while (!bch2_move_ratelimit(ctxt)) {
+ bch2_trans_begin(trans);
+
+ k = bch2_btree_iter_peek(&iter);
+ if (!k.k)
+ break;
+
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ if (bkey_ge(bkey_start_pos(k.k), end))
+ break;
+
+ if (ctxt->stats)
+ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos);
+
+ if (!bkey_extent_is_direct_data(k.k))
+ goto next_nondata;
+
+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k);
+ ret = PTR_ERR_OR_ZERO(io_opts);
+ if (ret)
+ continue;
+
+ memset(&data_opts, 0, sizeof(data_opts));
+ if (!pred(c, arg, k, io_opts, &data_opts))
+ goto next;
+
+ /*
+ * The iterator gets unlocked by __bch2_read_extent - need to
+ * save a copy of @k elsewhere:
+ */
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
+
+ ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts);
+ if (ret2) {
+ if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
+ continue;
+
+ if (ret2 == -ENOMEM) {
+ /* memory allocation failure, wait for some IO to finish */
+ bch2_move_ctxt_wait_for_io(ctxt);
+ continue;
+ }
+
+ /* XXX signal failure */
+ goto next;
+ }
+next:
+ if (ctxt->stats)
+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
+next_nondata:
+ bch2_btree_iter_advance(&iter);
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_bkey_buf_exit(&sk, c);
+ per_snapshot_io_opts_exit(&snapshot_io_opts);
+
+ return ret;
+}
+
+int __bch2_move_data(struct moving_context *ctxt,
+ struct bbpos start,
+ struct bbpos end,
+ move_pred_fn pred, void *arg)
+{
+ struct bch_fs *c = ctxt->trans->c;
+ enum btree_id id;
+ int ret = 0;
+
+ for (id = start.btree;
+ id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
+ id++) {
+ ctxt->stats->pos = BBPOS(id, POS_MIN);
+
+ if (!btree_type_has_ptrs(id) ||
+ !bch2_btree_id_root(c, id)->b)
+ continue;
+
+ ret = bch2_move_data_btree(ctxt,
+ id == start.btree ? start.pos : POS_MIN,
+ id == end.btree ? end.pos : POS_MAX,
+ pred, arg, id);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int bch2_move_data(struct bch_fs *c,
+ struct bbpos start,
+ struct bbpos end,
+ struct bch_ratelimit *rate,
+ struct bch_move_stats *stats,
+ struct write_point_specifier wp,
+ bool wait_on_copygc,
+ move_pred_fn pred, void *arg)
+{
+
+ struct moving_context ctxt;
+ int ret;
+
+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
+ ret = __bch2_move_data(&ctxt, start, end, pred, arg);
+ bch2_moving_ctxt_exit(&ctxt);
+
+ return ret;
+}
+
+int bch2_evacuate_bucket(struct moving_context *ctxt,
+ struct move_bucket_in_flight *bucket_in_flight,
+ struct bpos bucket, int gen,
+ struct data_update_opts _data_opts)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ bool is_kthread = current->flags & PF_KTHREAD;
+ struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+ struct btree_iter iter;
+ struct bkey_buf sk;
+ struct bch_backpointer bp;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ struct bkey_s_c k;
+ struct data_update_opts data_opts;
+ unsigned dirty_sectors, bucket_size;
+ u64 fragmentation;
+ struct bpos bp_pos = POS_MIN;
+ int ret = 0;
+
+ trace_bucket_evacuate(c, &bucket);
+
+ bch2_bkey_buf_init(&sk);
+
+ /*
+ * We're not run in a context that handles transaction restarts:
+ */
+ bch2_trans_begin(trans);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ bucket, BTREE_ITER_CACHED);
+ ret = lockrestart_do(trans,
+ bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ bch2_trans_iter_exit(trans, &iter);
+
+ bch_err_msg(c, ret, "looking up alloc key");
+ if (ret)
+ goto err;
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+ dirty_sectors = bch2_bucket_sectors_dirty(*a);
+ bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
+ fragmentation = a->fragmentation_lru;
+
+ ret = bch2_btree_write_buffer_tryflush(trans);
+ bch_err_msg(c, ret, "flushing btree write buffer");
+ if (ret)
+ goto err;
+
+ while (!(ret = bch2_move_ratelimit(ctxt))) {
+ if (is_kthread && kthread_should_stop())
+ break;
+
+ bch2_trans_begin(trans);
+
+ ret = bch2_get_next_backpointer(trans, bucket, gen,
+ &bp_pos, &bp,
+ BTREE_ITER_CACHED);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+ if (bkey_eq(bp_pos, POS_MAX))
+ break;
+
+ if (!bp.level) {
+ k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+ if (!k.k)
+ goto next;
+
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
+
+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
+ if (ret) {
+ bch2_trans_iter_exit(trans, &iter);
+ continue;
+ }
+
+ data_opts = _data_opts;
+ data_opts.target = io_opts.background_target;
+ data_opts.rewrite_ptrs = 0;
+
+ unsigned i = 0;
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
+ if (ptr->dev == bucket.inode) {
+ data_opts.rewrite_ptrs |= 1U << i;
+ if (ptr->cached) {
+ bch2_trans_iter_exit(trans, &iter);
+ goto next;
+ }
+ }
+ i++;
+ }
+
+ ret = bch2_move_extent(ctxt, bucket_in_flight,
+ &iter, k, io_opts, data_opts);
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret == -ENOMEM) {
+ /* memory allocation failure, wait for some IO to finish */
+ bch2_move_ctxt_wait_for_io(ctxt);
+ continue;
+ }
+ if (ret)
+ goto err;
+
+ if (ctxt->stats)
+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
+ } else {
+ struct btree *b;
+
+ b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ continue;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+ if (!b)
+ goto next;
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ goto err;
+
+ if (ctxt->rate)
+ bch2_ratelimit_increment(ctxt->rate,
+ c->opts.btree_node_size >> 9);
+ if (ctxt->stats) {
+ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen);
+ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved);
+ }
+ }
+next:
+ bp_pos = bpos_nosnap_successor(bp_pos);
+ }
+
+ trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
+err:
+ bch2_bkey_buf_exit(&sk, c);
+ return ret;
+}
+
+typedef bool (*move_btree_pred)(struct bch_fs *, void *,
+ struct btree *, struct bch_io_opts *,
+ struct data_update_opts *);
+
+static int bch2_move_btree(struct bch_fs *c,
+ struct bbpos start,
+ struct bbpos end,
+ move_btree_pred pred, void *arg,
+ struct bch_move_stats *stats)
+{
+ bool kthread = (current->flags & PF_KTHREAD) != 0;
+ struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+ struct moving_context ctxt;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct btree *b;
+ enum btree_id btree;
+ struct data_update_opts data_opts;
+ int ret = 0;
+
+ bch2_moving_ctxt_init(&ctxt, c, NULL, stats,
+ writepoint_ptr(&c->btree_write_point),
+ true);
+ trans = ctxt.trans;
+
+ stats->data_type = BCH_DATA_btree;
+
+ for (btree = start.btree;
+ btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
+ btree ++) {
+ stats->pos = BBPOS(btree, POS_MIN);
+
+ if (!bch2_btree_id_root(c, btree)->b)
+ continue;
+
+ bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0,
+ BTREE_ITER_PREFETCH);
+retry:
+ ret = 0;
+ while (bch2_trans_begin(trans),
+ (b = bch2_btree_iter_peek_node(&iter)) &&
+ !(ret = PTR_ERR_OR_ZERO(b))) {
+ if (kthread && kthread_should_stop())
+ break;
+
+ if ((cmp_int(btree, end.btree) ?:
+ bpos_cmp(b->key.k.p, end.pos)) > 0)
+ break;
+
+ stats->pos = BBPOS(iter.btree_id, iter.pos);
+
+ if (!pred(c, arg, b, &io_opts, &data_opts))
+ goto next;
+
+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+next:
+ bch2_btree_iter_next_node(&iter);
+ }
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (kthread && kthread_should_stop())
+ break;
+ }
+
+ bch_err_fn(c, ret);
+ bch2_moving_ctxt_exit(&ctxt);
+ bch2_btree_interior_updates_flush(c);
+
+ return ret;
+}
+
+static bool rereplicate_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ unsigned nr_good = bch2_bkey_durability(c, k);
+ unsigned replicas = bkey_is_btree_ptr(k.k)
+ ? c->opts.metadata_replicas
+ : io_opts->data_replicas;
+
+ if (!nr_good || nr_good >= replicas)
+ return false;
+
+ data_opts->target = 0;
+ data_opts->extra_replicas = replicas - nr_good;
+ data_opts->btree_insert_flags = 0;
+ return true;
+}
+
+static bool migrate_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_ioctl_data *op = arg;
+ unsigned i = 0;
+
+ data_opts->rewrite_ptrs = 0;
+ data_opts->target = 0;
+ data_opts->extra_replicas = 0;
+ data_opts->btree_insert_flags = 0;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == op->migrate.dev)
+ data_opts->rewrite_ptrs |= 1U << i;
+ i++;
+ }
+
+ return data_opts->rewrite_ptrs != 0;
+}
+
+static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static bool migrate_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static bool bformat_needs_redo(struct bkey_format *f)
+{
+ unsigned i;
+
+ for (i = 0; i < f->nr_fields; i++) {
+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
+ u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
+ u64 field_offset = le64_to_cpu(f->field_offset[i]);
+
+ if (f->bits_per_field[i] > unpacked_bits)
+ return true;
+
+ if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
+ return true;
+
+ if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
+ unpacked_mask) <
+ field_offset)
+ return true;
+ }
+
+ return false;
+}
+
+static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ if (b->version_ondisk != c->sb.version ||
+ btree_node_need_rewrite(b) ||
+ bformat_needs_redo(&b->format)) {
+ data_opts->target = 0;
+ data_opts->extra_replicas = 0;
+ data_opts->btree_insert_flags = 0;
+ return true;
+ }
+
+ return false;
+}
+
+int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats)
+{
+ int ret;
+
+ ret = bch2_move_btree(c,
+ BBPOS_MIN,
+ BBPOS_MAX,
+ rewrite_old_nodes_pred, c, stats);
+ if (!ret) {
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
+ c->disk_sb.sb->version_min = c->disk_sb.sb->version;
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ unsigned durability = bch2_bkey_durability(c, k);
+ unsigned replicas = bkey_is_btree_ptr(k.k)
+ ? c->opts.metadata_replicas
+ : io_opts->data_replicas;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned i = 0;
+
+ bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
+ unsigned d = bch2_extent_ptr_durability(c, &p);
+
+ if (d && durability - d >= replicas) {
+ data_opts->kill_ptrs |= BIT(i);
+ durability -= d;
+ }
+
+ i++;
+ }
+
+ return data_opts->kill_ptrs != 0;
+}
+
+static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
+ struct btree *b,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+int bch2_data_job(struct bch_fs *c,
+ struct bch_move_stats *stats,
+ struct bch_ioctl_data op)
+{
+ struct bbpos start = BBPOS(op.start_btree, op.start_pos);
+ struct bbpos end = BBPOS(op.end_btree, op.end_pos);
+ int ret = 0;
+
+ if (op.op >= BCH_DATA_OP_NR)
+ return -EINVAL;
+
+ bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]);
+
+ switch (op.op) {
+ case BCH_DATA_OP_rereplicate:
+ stats->data_type = BCH_DATA_journal;
+ ret = bch2_journal_flush_device_pins(&c->journal, -1);
+ ret = bch2_move_btree(c, start, end,
+ rereplicate_btree_pred, c, stats) ?: ret;
+ ret = bch2_move_data(c, start, end,
+ NULL,
+ stats,
+ writepoint_hashed((unsigned long) current),
+ true,
+ rereplicate_pred, c) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
+ break;
+ case BCH_DATA_OP_migrate:
+ if (op.migrate.dev >= c->sb.nr_devices)
+ return -EINVAL;
+
+ stats->data_type = BCH_DATA_journal;
+ ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
+ ret = bch2_move_btree(c, start, end,
+ migrate_btree_pred, &op, stats) ?: ret;
+ ret = bch2_move_data(c, start, end,
+ NULL,
+ stats,
+ writepoint_hashed((unsigned long) current),
+ true,
+ migrate_pred, &op) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
+ break;
+ case BCH_DATA_OP_rewrite_old_nodes:
+ ret = bch2_scan_old_btree_nodes(c, stats);
+ break;
+ case BCH_DATA_OP_drop_extra_replicas:
+ ret = bch2_move_btree(c, start, end,
+ drop_extra_replicas_btree_pred, c, stats) ?: ret;
+ ret = bch2_move_data(c, start, end, NULL, stats,
+ writepoint_hashed((unsigned long) current),
+ true,
+ drop_extra_replicas_pred, c) ?: ret;
+ ret = bch2_replicas_gc2(c) ?: ret;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ bch2_move_stats_exit(stats, c);
+ return ret;
+}
+
+void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
+{
+ prt_printf(out, "%s: data type=%s pos=",
+ stats->name,
+ bch2_data_types[stats->data_type]);
+ bch2_bbpos_to_text(out, stats->pos);
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ prt_str(out, "keys moved: ");
+ prt_u64(out, atomic64_read(&stats->keys_moved));
+ prt_newline(out);
+
+ prt_str(out, "keys raced: ");
+ prt_u64(out, atomic64_read(&stats->keys_raced));
+ prt_newline(out);
+
+ prt_str(out, "bytes seen: ");
+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9);
+ prt_newline(out);
+
+ prt_str(out, "bytes moved: ");
+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9);
+ prt_newline(out);
+
+ prt_str(out, "bytes raced: ");
+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9);
+ prt_newline(out);
+
+ printbuf_indent_sub(out, 2);
+}
+
+static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt)
+{
+ struct moving_io *io;
+
+ bch2_move_stats_to_text(out, ctxt->stats);
+ printbuf_indent_add(out, 2);
+
+ prt_printf(out, "reads: ios %u/%u sectors %u/%u",
+ atomic_read(&ctxt->read_ios),
+ c->opts.move_ios_in_flight,
+ atomic_read(&ctxt->read_sectors),
+ c->opts.move_bytes_in_flight >> 9);
+ prt_newline(out);
+
+ prt_printf(out, "writes: ios %u/%u sectors %u/%u",
+ atomic_read(&ctxt->write_ios),
+ c->opts.move_ios_in_flight,
+ atomic_read(&ctxt->write_sectors),
+ c->opts.move_bytes_in_flight >> 9);
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+
+ mutex_lock(&ctxt->lock);
+ list_for_each_entry(io, &ctxt->ios, io_list)
+ bch2_write_op_to_text(out, &io->write.op);
+ mutex_unlock(&ctxt->lock);
+
+ printbuf_indent_sub(out, 4);
+}
+
+void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct moving_context *ctxt;
+
+ mutex_lock(&c->moving_context_lock);
+ list_for_each_entry(ctxt, &c->moving_context_list, list)
+ bch2_moving_ctxt_to_text(out, c, ctxt);
+ mutex_unlock(&c->moving_context_lock);
+}
+
+void bch2_fs_move_init(struct bch_fs *c)
+{
+ INIT_LIST_HEAD(&c->moving_context_list);
+ mutex_init(&c->moving_context_lock);
+}
diff --git a/c_src/libbcachefs/move.h b/c_src/libbcachefs/move.h
new file mode 100644
index 00000000..9baf3093
--- /dev/null
+++ b/c_src/libbcachefs/move.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MOVE_H
+#define _BCACHEFS_MOVE_H
+
+#include "bbpos.h"
+#include "bcachefs_ioctl.h"
+#include "btree_iter.h"
+#include "buckets.h"
+#include "data_update.h"
+#include "move_types.h"
+
+struct bch_read_bio;
+
+struct moving_context {
+ struct btree_trans *trans;
+ struct list_head list;
+ void *fn;
+
+ struct bch_ratelimit *rate;
+ struct bch_move_stats *stats;
+ struct write_point_specifier wp;
+ bool wait_on_copygc;
+ bool write_error;
+
+ /* For waiting on outstanding reads and writes: */
+ struct closure cl;
+
+ struct mutex lock;
+ struct list_head reads;
+ struct list_head ios;
+
+ /* in flight sectors: */
+ atomic_t read_sectors;
+ atomic_t write_sectors;
+ atomic_t read_ios;
+ atomic_t write_ios;
+
+ wait_queue_head_t wait;
+};
+
+#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \
+({ \
+ int _ret = 0; \
+ while (true) { \
+ bool cond_finished = false; \
+ bch2_moving_ctxt_do_pending_writes(_ctxt); \
+ \
+ if (_cond) \
+ break; \
+ bch2_trans_unlock_long((_ctxt)->trans); \
+ _ret = __wait_event_timeout((_ctxt)->wait, \
+ bch2_moving_ctxt_next_pending_write(_ctxt) || \
+ (cond_finished = (_cond)), _timeout); \
+ if (_ret || ( cond_finished)) \
+ break; \
+ } \
+ _ret; \
+})
+
+#define move_ctxt_wait_event(_ctxt, _cond) \
+do { \
+ bool cond_finished = false; \
+ bch2_moving_ctxt_do_pending_writes(_ctxt); \
+ \
+ if (_cond) \
+ break; \
+ bch2_trans_unlock_long((_ctxt)->trans); \
+ __wait_event((_ctxt)->wait, \
+ bch2_moving_ctxt_next_pending_write(_ctxt) || \
+ (cond_finished = (_cond))); \
+ if (cond_finished) \
+ break; \
+} while (1)
+
+typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c,
+ struct bch_io_opts *, struct data_update_opts *);
+
+extern const char * const bch2_data_ops_strs[];
+
+void bch2_moving_ctxt_exit(struct moving_context *);
+void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
+ struct bch_ratelimit *, struct bch_move_stats *,
+ struct write_point_specifier, bool);
+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
+void bch2_moving_ctxt_do_pending_writes(struct moving_context *);
+void bch2_moving_ctxt_flush_all(struct moving_context *);
+void bch2_move_ctxt_wait_for_io(struct moving_context *);
+int bch2_move_ratelimit(struct moving_context *);
+
+/* Inodes in different snapshots may have different IO options: */
+struct snapshot_io_opts_entry {
+ u32 snapshot;
+ struct bch_io_opts io_opts;
+};
+
+struct per_snapshot_io_opts {
+ u64 cur_inum;
+ struct bch_io_opts fs_io_opts;
+ DARRAY(struct snapshot_io_opts_entry) d;
+};
+
+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
+{
+ memset(io_opts, 0, sizeof(*io_opts));
+ io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
+}
+
+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
+{
+ darray_exit(&io_opts->d);
+}
+
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
+ struct per_snapshot_io_opts *, struct bkey_s_c);
+int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c);
+
+int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
+
+int bch2_move_extent(struct moving_context *,
+ struct move_bucket_in_flight *,
+ struct btree_iter *,
+ struct bkey_s_c,
+ struct bch_io_opts,
+ struct data_update_opts);
+
+int __bch2_move_data(struct moving_context *,
+ struct bbpos,
+ struct bbpos,
+ move_pred_fn, void *);
+int bch2_move_data(struct bch_fs *,
+ struct bbpos start,
+ struct bbpos end,
+ struct bch_ratelimit *,
+ struct bch_move_stats *,
+ struct write_point_specifier,
+ bool,
+ move_pred_fn, void *);
+
+int bch2_evacuate_bucket(struct moving_context *,
+ struct move_bucket_in_flight *,
+ struct bpos, int,
+ struct data_update_opts);
+int bch2_data_job(struct bch_fs *,
+ struct bch_move_stats *,
+ struct bch_ioctl_data);
+
+void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *);
+void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *);
+void bch2_move_stats_init(struct bch_move_stats *, const char *);
+
+void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_fs_move_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_MOVE_H */
diff --git a/c_src/libbcachefs/move_types.h b/c_src/libbcachefs/move_types.h
new file mode 100644
index 00000000..e22841ef
--- /dev/null
+++ b/c_src/libbcachefs/move_types.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MOVE_TYPES_H
+#define _BCACHEFS_MOVE_TYPES_H
+
+#include "bbpos_types.h"
+
+struct bch_move_stats {
+ enum bch_data_type data_type;
+ struct bbpos pos;
+ char name[32];
+
+ atomic64_t keys_moved;
+ atomic64_t keys_raced;
+ atomic64_t sectors_seen;
+ atomic64_t sectors_moved;
+ atomic64_t sectors_raced;
+};
+
+struct move_bucket_key {
+ struct bpos bucket;
+ u8 gen;
+};
+
+struct move_bucket {
+ struct move_bucket_key k;
+ unsigned sectors;
+};
+
+struct move_bucket_in_flight {
+ struct move_bucket_in_flight *next;
+ struct rhash_head hash;
+ struct move_bucket bucket;
+ atomic_t count;
+};
+
+#endif /* _BCACHEFS_MOVE_TYPES_H */
diff --git a/c_src/libbcachefs/movinggc.c b/c_src/libbcachefs/movinggc.c
new file mode 100644
index 00000000..69e06a84
--- /dev/null
+++ b/c_src/libbcachefs/movinggc.c
@@ -0,0 +1,436 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Moving/copying garbage collector
+ *
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "clock.h"
+#include "errcode.h"
+#include "error.h"
+#include "lru.h"
+#include "move.h"
+#include "movinggc.h"
+#include "trace.h"
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/math64.h>
+#include <linux/sched/task.h>
+#include <linux/wait.h>
+
+struct buckets_in_flight {
+ struct rhashtable table;
+ struct move_bucket_in_flight *first;
+ struct move_bucket_in_flight *last;
+ size_t nr;
+ size_t sectors;
+};
+
+static const struct rhashtable_params bch_move_bucket_params = {
+ .head_offset = offsetof(struct move_bucket_in_flight, hash),
+ .key_offset = offsetof(struct move_bucket_in_flight, bucket.k),
+ .key_len = sizeof(struct move_bucket_key),
+};
+
+static struct move_bucket_in_flight *
+move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b)
+{
+ struct move_bucket_in_flight *new = kzalloc(sizeof(*new), GFP_KERNEL);
+ int ret;
+
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ new->bucket = b;
+
+ ret = rhashtable_lookup_insert_fast(&list->table, &new->hash,
+ bch_move_bucket_params);
+ if (ret) {
+ kfree(new);
+ return ERR_PTR(ret);
+ }
+
+ if (!list->first)
+ list->first = new;
+ else
+ list->last->next = new;
+
+ list->last = new;
+ list->nr++;
+ list->sectors += b.sectors;
+ return new;
+}
+
+static int bch2_bucket_is_movable(struct btree_trans *trans,
+ struct move_bucket *b, u64 time)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a;
+ int ret;
+
+ if (bch2_bucket_is_open(trans->c,
+ b->k.bucket.inode,
+ b->k.bucket.offset))
+ return 0;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
+ b->k.bucket, BTREE_ITER_CACHED);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ a = bch2_alloc_to_v4(k, &_a);
+ b->k.gen = a->gen;
+ b->sectors = bch2_bucket_sectors_dirty(*a);
+
+ ret = data_type_movable(a->data_type) &&
+ a->fragmentation_lru &&
+ a->fragmentation_lru <= time;
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static void move_buckets_wait(struct moving_context *ctxt,
+ struct buckets_in_flight *list,
+ bool flush)
+{
+ struct move_bucket_in_flight *i;
+ int ret;
+
+ while ((i = list->first)) {
+ if (flush)
+ move_ctxt_wait_event(ctxt, !atomic_read(&i->count));
+
+ if (atomic_read(&i->count))
+ break;
+
+ list->first = i->next;
+ if (!list->first)
+ list->last = NULL;
+
+ list->nr--;
+ list->sectors -= i->bucket.sectors;
+
+ ret = rhashtable_remove_fast(&list->table, &i->hash,
+ bch_move_bucket_params);
+ BUG_ON(ret);
+ kfree(i);
+ }
+
+ bch2_trans_unlock_long(ctxt->trans);
+}
+
+static bool bucket_in_flight(struct buckets_in_flight *list,
+ struct move_bucket_key k)
+{
+ return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params);
+}
+
+typedef DARRAY(struct move_bucket) move_buckets;
+
+static int bch2_copygc_get_buckets(struct moving_context *ctxt,
+ struct buckets_in_flight *buckets_in_flight,
+ move_buckets *buckets)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
+ size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
+ int ret;
+
+ move_buckets_wait(ctxt, buckets_in_flight, false);
+
+ ret = bch2_btree_write_buffer_tryflush(trans);
+ if (bch2_err_matches(ret, EROFS))
+ return ret;
+
+ if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
+ __func__, bch2_err_str(ret)))
+ return ret;
+
+ ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
+ lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
+ lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
+ 0, k, ({
+ struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
+ int ret2 = 0;
+
+ saw++;
+
+ ret2 = bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p));
+ if (ret2 < 0)
+ goto err;
+
+ if (!ret2)
+ not_movable++;
+ else if (bucket_in_flight(buckets_in_flight, b.k))
+ in_flight++;
+ else {
+ ret2 = darray_push(buckets, b);
+ if (ret2)
+ goto err;
+ sectors += b.sectors;
+ }
+
+ ret2 = buckets->nr >= nr_to_get;
+err:
+ ret2;
+ }));
+
+ pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i",
+ buckets_in_flight->nr, buckets_in_flight->sectors,
+ saw, in_flight, not_movable, buckets->nr, sectors, nr_to_get, ret);
+
+ return ret < 0 ? ret : 0;
+}
+
+noinline
+static int bch2_copygc(struct moving_context *ctxt,
+ struct buckets_in_flight *buckets_in_flight,
+ bool *did_work)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ struct data_update_opts data_opts = {
+ .btree_insert_flags = BCH_WATERMARK_copygc,
+ };
+ move_buckets buckets = { 0 };
+ struct move_bucket_in_flight *f;
+ u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
+ int ret = 0;
+
+ ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
+ if (ret)
+ goto err;
+
+ darray_for_each(buckets, i) {
+ if (kthread_should_stop() || freezing(current))
+ break;
+
+ f = move_bucket_in_flight_add(buckets_in_flight, *i);
+ ret = PTR_ERR_OR_ZERO(f);
+ if (ret == -EEXIST) { /* rare race: copygc_get_buckets returned same bucket more than once */
+ ret = 0;
+ continue;
+ }
+ if (ret == -ENOMEM) { /* flush IO, continue later */
+ ret = 0;
+ break;
+ }
+
+ ret = bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
+ f->bucket.k.gen, data_opts);
+ if (ret)
+ goto err;
+
+ *did_work = true;
+ }
+err:
+ darray_exit(&buckets);
+
+ /* no entries in LRU btree found, or got to end: */
+ if (bch2_err_matches(ret, ENOENT))
+ ret = 0;
+
+ if (ret < 0 && !bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "from bch2_move_data()");
+
+ moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
+ trace_and_count(c, copygc, c, moved, 0, 0, 0);
+ return ret;
+}
+
+/*
+ * Copygc runs when the amount of fragmented data is above some arbitrary
+ * threshold:
+ *
+ * The threshold at the limit - when the device is full - is the amount of space
+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of
+ * disk space stranded due to fragmentation and store everything we have
+ * promised to store.
+ *
+ * But we don't want to be running copygc unnecessarily when the device still
+ * has plenty of free space - rather, we want copygc to smoothly run every so
+ * often and continually reduce the amount of fragmented space as the device
+ * fills up. So, we increase the threshold by half the current free space.
+ */
+unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
+{
+ s64 wait = S64_MAX, fragmented_allowed, fragmented;
+
+ for_each_rw_member(c, ca) {
+ struct bch_dev_usage usage = bch2_dev_usage_read(ca);
+
+ fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) *
+ ca->mi.bucket_size) >> 1);
+ fragmented = 0;
+
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
+ if (data_type_movable(i))
+ fragmented += usage.d[i].fragmented;
+
+ wait = min(wait, max(0LL, fragmented_allowed - fragmented));
+ }
+
+ return wait;
+}
+
+void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ prt_printf(out, "Currently waiting for: ");
+ prt_human_readable_u64(out, max(0LL, c->copygc_wait -
+ atomic64_read(&c->io_clock[WRITE].now)) << 9);
+ prt_newline(out);
+
+ prt_printf(out, "Currently waiting since: ");
+ prt_human_readable_u64(out, max(0LL,
+ atomic64_read(&c->io_clock[WRITE].now) -
+ c->copygc_wait_at) << 9);
+ prt_newline(out);
+
+ prt_printf(out, "Currently calculated wait: ");
+ prt_human_readable_u64(out, bch2_copygc_wait_amount(c));
+ prt_newline(out);
+}
+
+static int bch2_copygc_thread(void *arg)
+{
+ struct bch_fs *c = arg;
+ struct moving_context ctxt;
+ struct bch_move_stats move_stats;
+ struct io_clock *clock = &c->io_clock[WRITE];
+ struct buckets_in_flight *buckets;
+ u64 last, wait;
+ int ret = 0;
+
+ buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL);
+ if (!buckets)
+ return -ENOMEM;
+ ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
+ bch_err_msg(c, ret, "allocating copygc buckets in flight");
+ if (ret) {
+ kfree(buckets);
+ return ret;
+ }
+
+ set_freezable();
+
+ bch2_move_stats_init(&move_stats, "copygc");
+ bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
+ writepoint_ptr(&c->copygc_write_point),
+ false);
+
+ while (!ret && !kthread_should_stop()) {
+ bool did_work = false;
+
+ bch2_trans_unlock_long(ctxt.trans);
+ cond_resched();
+
+ if (!c->copy_gc_enabled) {
+ move_buckets_wait(&ctxt, buckets, true);
+ kthread_wait_freezable(c->copy_gc_enabled ||
+ kthread_should_stop());
+ }
+
+ if (unlikely(freezing(current))) {
+ move_buckets_wait(&ctxt, buckets, true);
+ __refrigerator(false);
+ continue;
+ }
+
+ last = atomic64_read(&clock->now);
+ wait = bch2_copygc_wait_amount(c);
+
+ if (wait > clock->max_slop) {
+ c->copygc_wait_at = last;
+ c->copygc_wait = last + wait;
+ move_buckets_wait(&ctxt, buckets, true);
+ trace_and_count(c, copygc_wait, c, wait, last + wait);
+ bch2_kthread_io_clock_wait(clock, last + wait,
+ MAX_SCHEDULE_TIMEOUT);
+ continue;
+ }
+
+ c->copygc_wait = 0;
+
+ c->copygc_running = true;
+ ret = bch2_copygc(&ctxt, buckets, &did_work);
+ c->copygc_running = false;
+
+ wake_up(&c->copygc_running_wq);
+
+ if (!wait && !did_work) {
+ u64 min_member_capacity = bch2_min_rw_member_capacity(c);
+
+ if (min_member_capacity == U64_MAX)
+ min_member_capacity = 128 * 2048;
+
+ bch2_trans_unlock_long(ctxt.trans);
+ bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
+ MAX_SCHEDULE_TIMEOUT);
+ }
+ }
+
+ move_buckets_wait(&ctxt, buckets, true);
+
+ rhashtable_destroy(&buckets->table);
+ kfree(buckets);
+ bch2_moving_ctxt_exit(&ctxt);
+ bch2_move_stats_exit(&move_stats, c);
+
+ return 0;
+}
+
+void bch2_copygc_stop(struct bch_fs *c)
+{
+ if (c->copygc_thread) {
+ kthread_stop(c->copygc_thread);
+ put_task_struct(c->copygc_thread);
+ }
+ c->copygc_thread = NULL;
+}
+
+int bch2_copygc_start(struct bch_fs *c)
+{
+ struct task_struct *t;
+ int ret;
+
+ if (c->copygc_thread)
+ return 0;
+
+ if (c->opts.nochanges)
+ return 0;
+
+ if (bch2_fs_init_fault("copygc_start"))
+ return -ENOMEM;
+
+ t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
+ ret = PTR_ERR_OR_ZERO(t);
+ bch_err_msg(c, ret, "creating copygc thread");
+ if (ret)
+ return ret;
+
+ get_task_struct(t);
+
+ c->copygc_thread = t;
+ wake_up_process(c->copygc_thread);
+
+ return 0;
+}
+
+void bch2_fs_copygc_init(struct bch_fs *c)
+{
+ init_waitqueue_head(&c->copygc_running_wq);
+ c->copygc_running = false;
+}
diff --git a/c_src/libbcachefs/movinggc.h b/c_src/libbcachefs/movinggc.h
new file mode 100644
index 00000000..ea181fef
--- /dev/null
+++ b/c_src/libbcachefs/movinggc.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_MOVINGGC_H
+#define _BCACHEFS_MOVINGGC_H
+
+unsigned long bch2_copygc_wait_amount(struct bch_fs *);
+void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_copygc_stop(struct bch_fs *);
+int bch2_copygc_start(struct bch_fs *);
+void bch2_fs_copygc_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_MOVINGGC_H */
diff --git a/c_src/libbcachefs/nocow_locking.c b/c_src/libbcachefs/nocow_locking.c
new file mode 100644
index 00000000..3c21981a
--- /dev/null
+++ b/c_src/libbcachefs/nocow_locking.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_methods.h"
+#include "nocow_locking.h"
+#include "util.h"
+
+#include <linux/closure.h>
+
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket)
+{
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (l->b[i] == dev_bucket && atomic_read(&l->l[i]))
+ return true;
+ return false;
+}
+
+#define sign(v) (v < 0 ? -1 : v > 0 ? 1 : 0)
+
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
+{
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+ int lock_val = flags ? 1 : -1;
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (l->b[i] == dev_bucket) {
+ int v = atomic_sub_return(lock_val, &l->l[i]);
+
+ BUG_ON(v && sign(v) != lock_val);
+ if (!v)
+ closure_wake_up(&l->wait);
+ return;
+ }
+
+ BUG();
+}
+
+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
+ u64 dev_bucket, int flags)
+{
+ int v, lock_val = flags ? 1 : -1;
+ unsigned i;
+
+ spin_lock(&l->lock);
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (l->b[i] == dev_bucket)
+ goto got_entry;
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (!atomic_read(&l->l[i])) {
+ l->b[i] = dev_bucket;
+ goto take_lock;
+ }
+fail:
+ spin_unlock(&l->lock);
+ return false;
+got_entry:
+ v = atomic_read(&l->l[i]);
+ if (lock_val > 0 ? v < 0 : v > 0)
+ goto fail;
+take_lock:
+ v = atomic_read(&l->l[i]);
+ /* Overflow? */
+ if (v && sign(v + lock_val) != sign(v))
+ goto fail;
+
+ atomic_add(lock_val, &l->l[i]);
+ spin_unlock(&l->lock);
+ return true;
+}
+
+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+ struct nocow_lock_bucket *l,
+ u64 dev_bucket, int flags)
+{
+ if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
+ struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
+ u64 start_time = local_clock();
+
+ __closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags));
+ bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+ }
+}
+
+void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t)
+
+{
+ unsigned i, nr_zero = 0;
+ struct nocow_lock_bucket *l;
+
+ for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) {
+ unsigned v = 0;
+
+ for (i = 0; i < ARRAY_SIZE(l->l); i++)
+ v |= atomic_read(&l->l[i]);
+
+ if (!v) {
+ nr_zero++;
+ continue;
+ }
+
+ if (nr_zero)
+ prt_printf(out, "(%u empty entries)\n", nr_zero);
+ nr_zero = 0;
+
+ for (i = 0; i < ARRAY_SIZE(l->l); i++) {
+ int v = atomic_read(&l->l[i]);
+ if (v) {
+ bch2_bpos_to_text(out, u64_to_bucket(l->b[i]));
+ prt_printf(out, ": %s %u ", v < 0 ? "copy" : "update", abs(v));
+ }
+ }
+ prt_newline(out);
+ }
+
+ if (nr_zero)
+ prt_printf(out, "(%u empty entries)\n", nr_zero);
+}
+
+void bch2_fs_nocow_locking_exit(struct bch_fs *c)
+{
+ struct bucket_nocow_lock_table *t = &c->nocow_locks;
+
+ for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++)
+ for (unsigned j = 0; j < ARRAY_SIZE(l->l); j++)
+ BUG_ON(atomic_read(&l->l[j]));
+}
+
+int bch2_fs_nocow_locking_init(struct bch_fs *c)
+{
+ struct bucket_nocow_lock_table *t = &c->nocow_locks;
+
+ for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++)
+ spin_lock_init(&l->lock);
+
+ return 0;
+}
diff --git a/c_src/libbcachefs/nocow_locking.h b/c_src/libbcachefs/nocow_locking.h
new file mode 100644
index 00000000..f9d6a426
--- /dev/null
+++ b/c_src/libbcachefs/nocow_locking.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_NOCOW_LOCKING_H
+#define _BCACHEFS_NOCOW_LOCKING_H
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "nocow_locking_types.h"
+
+#include <linux/hash.h>
+
+static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+ u64 dev_bucket)
+{
+ unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS);
+
+ return t->l + (h & (BUCKET_NOCOW_LOCKS - 1));
+}
+
+#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0)
+
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int);
+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
+ struct nocow_lock_bucket *, u64, int);
+
+static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+ struct bpos bucket, int flags)
+{
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+
+ __bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
+}
+
+static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
+ struct bpos bucket, int flags)
+{
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+
+ return __bch2_bucket_nocow_trylock(l, dev_bucket, flags);
+}
+
+void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
+
+void bch2_fs_nocow_locking_exit(struct bch_fs *);
+int bch2_fs_nocow_locking_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_NOCOW_LOCKING_H */
diff --git a/c_src/libbcachefs/nocow_locking_types.h b/c_src/libbcachefs/nocow_locking_types.h
new file mode 100644
index 00000000..bd12bf67
--- /dev/null
+++ b/c_src/libbcachefs/nocow_locking_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H
+#define _BCACHEFS_NOCOW_LOCKING_TYPES_H
+
+#define BUCKET_NOCOW_LOCKS_BITS 10
+#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS)
+
+struct nocow_lock_bucket {
+ struct closure_waitlist wait;
+ spinlock_t lock;
+ u64 b[4];
+ atomic_t l[4];
+} __aligned(SMP_CACHE_BYTES);
+
+struct bucket_nocow_lock_table {
+ struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS];
+};
+
+#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */
+
diff --git a/c_src/libbcachefs/opts.c b/c_src/libbcachefs/opts.c
new file mode 100644
index 00000000..8e6f230e
--- /dev/null
+++ b/c_src/libbcachefs/opts.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+
+#include "bcachefs.h"
+#include "compress.h"
+#include "disk_groups.h"
+#include "error.h"
+#include "opts.h"
+#include "super-io.h"
+#include "util.h"
+
+#define x(t, n, ...) [n] = #t,
+
+const char * const bch2_error_actions[] = {
+ BCH_ERROR_ACTIONS()
+ NULL
+};
+
+const char * const bch2_fsck_fix_opts[] = {
+ BCH_FIX_ERRORS_OPTS()
+ NULL
+};
+
+const char * const bch2_version_upgrade_opts[] = {
+ BCH_VERSION_UPGRADE_OPTS()
+ NULL
+};
+
+const char * const bch2_sb_features[] = {
+ BCH_SB_FEATURES()
+ NULL
+};
+
+const char * const bch2_sb_compat[] = {
+ BCH_SB_COMPAT()
+ NULL
+};
+
+const char * const __bch2_btree_ids[] = {
+ BCH_BTREE_IDS()
+ NULL
+};
+
+const char * const bch2_csum_types[] = {
+ BCH_CSUM_TYPES()
+ NULL
+};
+
+const char * const bch2_csum_opts[] = {
+ BCH_CSUM_OPTS()
+ NULL
+};
+
+const char * const bch2_compression_types[] = {
+ BCH_COMPRESSION_TYPES()
+ NULL
+};
+
+const char * const bch2_compression_opts[] = {
+ BCH_COMPRESSION_OPTS()
+ NULL
+};
+
+const char * const bch2_str_hash_types[] = {
+ BCH_STR_HASH_TYPES()
+ NULL
+};
+
+const char * const bch2_str_hash_opts[] = {
+ BCH_STR_HASH_OPTS()
+ NULL
+};
+
+const char * const bch2_data_types[] = {
+ BCH_DATA_TYPES()
+ NULL
+};
+
+const char * const bch2_member_states[] = {
+ BCH_MEMBER_STATES()
+ NULL
+};
+
+const char * const bch2_jset_entry_types[] = {
+ BCH_JSET_ENTRY_TYPES()
+ NULL
+};
+
+const char * const bch2_fs_usage_types[] = {
+ BCH_FS_USAGE_TYPES()
+ NULL
+};
+
+#undef x
+
+static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
+ struct printbuf *err)
+{
+ if (!val) {
+ *res = FSCK_FIX_yes;
+ } else {
+ int ret = match_string(bch2_fsck_fix_opts, -1, val);
+
+ if (ret < 0 && err)
+ prt_str(err, "fix_errors: invalid selection");
+ if (ret < 0)
+ return ret;
+ *res = ret;
+ }
+
+ return 0;
+}
+
+static void bch2_opt_fix_errors_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_sb *sb,
+ u64 v)
+{
+ prt_str(out, bch2_fsck_fix_opts[v]);
+}
+
+#define bch2_opt_fix_errors (struct bch_opt_fn) { \
+ .parse = bch2_opt_fix_errors_parse, \
+ .to_text = bch2_opt_fix_errors_to_text, \
+}
+
+const char * const bch2_d_types[BCH_DT_MAX] = {
+ [DT_UNKNOWN] = "unknown",
+ [DT_FIFO] = "fifo",
+ [DT_CHR] = "chr",
+ [DT_DIR] = "dir",
+ [DT_BLK] = "blk",
+ [DT_REG] = "reg",
+ [DT_LNK] = "lnk",
+ [DT_SOCK] = "sock",
+ [DT_WHT] = "whiteout",
+ [DT_SUBVOL] = "subvol",
+};
+
+u64 BCH2_NO_SB_OPT(const struct bch_sb *sb)
+{
+ BUG();
+}
+
+void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v)
+{
+ BUG();
+}
+
+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src)
+{
+#define x(_name, ...) \
+ if (opt_defined(src, _name)) \
+ opt_set(*dst, _name, src._name);
+
+ BCH_OPTS()
+#undef x
+}
+
+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id)
+{
+ switch (id) {
+#define x(_name, ...) \
+ case Opt_##_name: \
+ return opt_defined(*opts, _name);
+ BCH_OPTS()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id)
+{
+ switch (id) {
+#define x(_name, ...) \
+ case Opt_##_name: \
+ return opts->_name;
+ BCH_OPTS()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v)
+{
+ switch (id) {
+#define x(_name, ...) \
+ case Opt_##_name: \
+ opt_set(*opts, _name, v); \
+ break;
+ BCH_OPTS()
+#undef x
+ default:
+ BUG();
+ }
+}
+
+const struct bch_option bch2_opt_table[] = {
+#define OPT_BOOL() .type = BCH_OPT_BOOL, .min = 0, .max = 2
+#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
+ .min = _min, .max = _max
+#define OPT_STR(_choices) .type = BCH_OPT_STR, \
+ .min = 0, .max = ARRAY_SIZE(_choices), \
+ .choices = _choices
+#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
+
+#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
+ [Opt_##_name] = { \
+ .attr = { \
+ .name = #_name, \
+ .mode = (_flags) & OPT_RUNTIME ? 0644 : 0444, \
+ }, \
+ .flags = _flags, \
+ .hint = _hint, \
+ .help = _help, \
+ .get_sb = _sb_opt, \
+ .set_sb = SET_##_sb_opt, \
+ _type \
+ },
+
+ BCH_OPTS()
+#undef x
+};
+
+int bch2_opt_lookup(const char *name)
+{
+ const struct bch_option *i;
+
+ for (i = bch2_opt_table;
+ i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table);
+ i++)
+ if (!strcmp(name, i->attr.name))
+ return i - bch2_opt_table;
+
+ return -1;
+}
+
+struct synonym {
+ const char *s1, *s2;
+};
+
+static const struct synonym bch_opt_synonyms[] = {
+ { "quota", "usrquota" },
+};
+
+static int bch2_mount_opt_lookup(const char *name)
+{
+ const struct synonym *i;
+
+ for (i = bch_opt_synonyms;
+ i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
+ i++)
+ if (!strcmp(name, i->s1))
+ name = i->s2;
+
+ return bch2_opt_lookup(name);
+}
+
+int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
+{
+ if (v < opt->min) {
+ if (err)
+ prt_printf(err, "%s: too small (min %llu)",
+ opt->attr.name, opt->min);
+ return -BCH_ERR_ERANGE_option_too_small;
+ }
+
+ if (opt->max && v >= opt->max) {
+ if (err)
+ prt_printf(err, "%s: too big (max %llu)",
+ opt->attr.name, opt->max);
+ return -BCH_ERR_ERANGE_option_too_big;
+ }
+
+ if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) {
+ if (err)
+ prt_printf(err, "%s: not a multiple of 512",
+ opt->attr.name);
+ return -BCH_ERR_opt_parse_error;
+ }
+
+ if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) {
+ if (err)
+ prt_printf(err, "%s: must be a power of two",
+ opt->attr.name);
+ return -BCH_ERR_opt_parse_error;
+ }
+
+ if (opt->fn.validate)
+ return opt->fn.validate(v, err);
+
+ return 0;
+}
+
+int bch2_opt_parse(struct bch_fs *c,
+ const struct bch_option *opt,
+ const char *val, u64 *res,
+ struct printbuf *err)
+{
+ ssize_t ret;
+
+ switch (opt->type) {
+ case BCH_OPT_BOOL:
+ if (val) {
+ ret = kstrtou64(val, 10, res);
+ } else {
+ ret = 0;
+ *res = 1;
+ }
+
+ if (ret < 0 || (*res != 0 && *res != 1)) {
+ if (err)
+ prt_printf(err, "%s: must be bool", opt->attr.name);
+ return ret;
+ }
+ break;
+ case BCH_OPT_UINT:
+ if (!val) {
+ prt_printf(err, "%s: required value",
+ opt->attr.name);
+ return -EINVAL;
+ }
+
+ ret = opt->flags & OPT_HUMAN_READABLE
+ ? bch2_strtou64_h(val, res)
+ : kstrtou64(val, 10, res);
+ if (ret < 0) {
+ if (err)
+ prt_printf(err, "%s: must be a number",
+ opt->attr.name);
+ return ret;
+ }
+ break;
+ case BCH_OPT_STR:
+ if (!val) {
+ prt_printf(err, "%s: required value",
+ opt->attr.name);
+ return -EINVAL;
+ }
+
+ ret = match_string(opt->choices, -1, val);
+ if (ret < 0) {
+ if (err)
+ prt_printf(err, "%s: invalid selection",
+ opt->attr.name);
+ return ret;
+ }
+
+ *res = ret;
+ break;
+ case BCH_OPT_FN:
+ ret = opt->fn.parse(c, val, res, err);
+ if (ret < 0) {
+ if (err)
+ prt_printf(err, "%s: parse error",
+ opt->attr.name);
+ return ret;
+ }
+ }
+
+ return bch2_opt_validate(opt, *res, err);
+}
+
+void bch2_opt_to_text(struct printbuf *out,
+ struct bch_fs *c, struct bch_sb *sb,
+ const struct bch_option *opt, u64 v,
+ unsigned flags)
+{
+ if (flags & OPT_SHOW_MOUNT_STYLE) {
+ if (opt->type == BCH_OPT_BOOL) {
+ prt_printf(out, "%s%s",
+ v ? "" : "no",
+ opt->attr.name);
+ return;
+ }
+
+ prt_printf(out, "%s=", opt->attr.name);
+ }
+
+ switch (opt->type) {
+ case BCH_OPT_BOOL:
+ case BCH_OPT_UINT:
+ if (opt->flags & OPT_HUMAN_READABLE)
+ prt_human_readable_u64(out, v);
+ else
+ prt_printf(out, "%lli", v);
+ break;
+ case BCH_OPT_STR:
+ if (flags & OPT_SHOW_FULL_LIST)
+ prt_string_option(out, opt->choices, v);
+ else
+ prt_str(out, opt->choices[v]);
+ break;
+ case BCH_OPT_FN:
+ opt->fn.to_text(out, c, sb, v);
+ break;
+ default:
+ BUG();
+ }
+}
+
+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
+{
+ int ret = 0;
+
+ switch (id) {
+ case Opt_compression:
+ case Opt_background_compression:
+ ret = bch2_check_set_has_compressed_data(c, v);
+ break;
+ case Opt_erasure_code:
+ if (v)
+ bch2_check_set_feature(c, BCH_FEATURE_ec);
+ break;
+ }
+
+ return ret;
+}
+
+int bch2_opts_check_may_set(struct bch_fs *c)
+{
+ unsigned i;
+ int ret;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ ret = bch2_opt_check_may_set(c, i,
+ bch2_opt_get_by_id(&c->opts, i));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
+ char *options)
+{
+ char *copied_opts, *copied_opts_start;
+ char *opt, *name, *val;
+ int ret, id;
+ struct printbuf err = PRINTBUF;
+ u64 v;
+
+ if (!options)
+ return 0;
+
+ /*
+ * sys_fsconfig() is now occasionally providing us with option lists
+ * starting with a comma - weird.
+ */
+ if (*options == ',')
+ options++;
+
+ copied_opts = kstrdup(options, GFP_KERNEL);
+ if (!copied_opts)
+ return -1;
+ copied_opts_start = copied_opts;
+
+ while ((opt = strsep(&copied_opts, ",")) != NULL) {
+ name = strsep(&opt, "=");
+ val = opt;
+
+ id = bch2_mount_opt_lookup(name);
+
+ /* Check for the form "noopt", negation of a boolean opt: */
+ if (id < 0 &&
+ !val &&
+ !strncmp("no", name, 2)) {
+ id = bch2_mount_opt_lookup(name + 2);
+ val = "0";
+ }
+
+ /* Unknown options are ignored: */
+ if (id < 0)
+ continue;
+
+ if (!(bch2_opt_table[id].flags & OPT_MOUNT))
+ goto bad_opt;
+
+ if (id == Opt_acl &&
+ !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
+ goto bad_opt;
+
+ if ((id == Opt_usrquota ||
+ id == Opt_grpquota) &&
+ !IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
+ goto bad_opt;
+
+ ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
+ if (ret < 0)
+ goto bad_val;
+
+ bch2_opt_set_by_id(opts, id, v);
+ }
+
+ ret = 0;
+ goto out;
+
+bad_opt:
+ pr_err("Bad mount option %s", name);
+ ret = -1;
+ goto out;
+bad_val:
+ pr_err("Invalid mount option %s", err.buf);
+ ret = -1;
+ goto out;
+out:
+ kfree(copied_opts_start);
+ printbuf_exit(&err);
+ return ret;
+}
+
+u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id)
+{
+ const struct bch_option *opt = bch2_opt_table + id;
+ u64 v;
+
+ v = opt->get_sb(sb);
+
+ if (opt->flags & OPT_SB_FIELD_ILOG2)
+ v = 1ULL << v;
+
+ if (opt->flags & OPT_SB_FIELD_SECTORS)
+ v <<= 9;
+
+ return v;
+}
+
+/*
+ * Initial options from superblock - here we don't want any options undefined,
+ * any options the superblock doesn't specify are set to 0:
+ */
+int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb)
+{
+ unsigned id;
+
+ for (id = 0; id < bch2_opts_nr; id++) {
+ const struct bch_option *opt = bch2_opt_table + id;
+
+ if (opt->get_sb == BCH2_NO_SB_OPT)
+ continue;
+
+ bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id));
+ }
+
+ return 0;
+}
+
+void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v)
+{
+ if (opt->set_sb == SET_BCH2_NO_SB_OPT)
+ return;
+
+ if (opt->flags & OPT_SB_FIELD_SECTORS)
+ v >>= 9;
+
+ if (opt->flags & OPT_SB_FIELD_ILOG2)
+ v = ilog2(v);
+
+ opt->set_sb(sb, v);
+}
+
+void bch2_opt_set_sb(struct bch_fs *c, const struct bch_option *opt, u64 v)
+{
+ if (opt->set_sb == SET_BCH2_NO_SB_OPT)
+ return;
+
+ mutex_lock(&c->sb_lock);
+ __bch2_opt_set_sb(c->disk_sb.sb, opt, v);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
+/* io opts: */
+
+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src)
+{
+ return (struct bch_io_opts) {
+#define x(_name, _bits) ._name = src._name,
+ BCH_INODE_OPTS()
+#undef x
+ };
+}
+
+bool bch2_opt_is_inode_opt(enum bch_opt_id id)
+{
+ static const enum bch_opt_id inode_opt_list[] = {
+#define x(_name, _bits) Opt_##_name,
+ BCH_INODE_OPTS()
+#undef x
+ };
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++)
+ if (inode_opt_list[i] == id)
+ return true;
+
+ return false;
+}
diff --git a/c_src/libbcachefs/opts.h b/c_src/libbcachefs/opts.h
new file mode 100644
index 00000000..93a24fef
--- /dev/null
+++ b/c_src/libbcachefs/opts.h
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_OPTS_H
+#define _BCACHEFS_OPTS_H
+
+#include <linux/bug.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include "bcachefs_format.h"
+
+struct bch_fs;
+
+extern const char * const bch2_error_actions[];
+extern const char * const bch2_fsck_fix_opts[];
+extern const char * const bch2_version_upgrade_opts[];
+extern const char * const bch2_sb_features[];
+extern const char * const bch2_sb_compat[];
+extern const char * const __bch2_btree_ids[];
+extern const char * const bch2_csum_types[];
+extern const char * const bch2_csum_opts[];
+extern const char * const bch2_compression_types[];
+extern const char * const bch2_compression_opts[];
+extern const char * const bch2_str_hash_types[];
+extern const char * const bch2_str_hash_opts[];
+extern const char * const bch2_data_types[];
+extern const char * const bch2_member_states[];
+extern const char * const bch2_jset_entry_types[];
+extern const char * const bch2_fs_usage_types[];
+extern const char * const bch2_d_types[];
+
+static inline const char *bch2_d_type_str(unsigned d_type)
+{
+ return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
+}
+
+/*
+ * Mount options; we also store defaults in the superblock.
+ *
+ * Also exposed via sysfs: if an option is writeable, and it's also stored in
+ * the superblock, changing it via sysfs (currently? might change this) also
+ * updates the superblock.
+ *
+ * We store options as signed integers, where -1 means undefined. This means we
+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only
+ * apply the options from that struct that are defined.
+ */
+
+/* dummy option, for options that aren't stored in the superblock */
+u64 BCH2_NO_SB_OPT(const struct bch_sb *);
+void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64);
+
+/* When can be set: */
+enum opt_flags {
+ OPT_FS = (1 << 0), /* Filesystem option */
+ OPT_DEVICE = (1 << 1), /* Device option */
+ OPT_INODE = (1 << 2), /* Inode option */
+ OPT_FORMAT = (1 << 3), /* May be specified at format time */
+ OPT_MOUNT = (1 << 4), /* May be specified at mount time */
+ OPT_RUNTIME = (1 << 5), /* May be specified at runtime */
+ OPT_HUMAN_READABLE = (1 << 6),
+ OPT_MUST_BE_POW_2 = (1 << 7), /* Must be power of 2 */
+ OPT_SB_FIELD_SECTORS = (1 << 8),/* Superblock field is >> 9 of actual value */
+ OPT_SB_FIELD_ILOG2 = (1 << 9), /* Superblock field is ilog2 of actual value */
+};
+
+enum opt_type {
+ BCH_OPT_BOOL,
+ BCH_OPT_UINT,
+ BCH_OPT_STR,
+ BCH_OPT_FN,
+};
+
+struct bch_opt_fn {
+ int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *);
+ void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64);
+ int (*validate)(u64, struct printbuf *);
+};
+
+/**
+ * x(name, shortopt, type, in mem type, mode, sb_opt)
+ *
+ * @name - name of mount option, sysfs attribute, and struct bch_opts
+ * member
+ *
+ * @mode - when opt may be set
+ *
+ * @sb_option - name of corresponding superblock option
+ *
+ * @type - one of OPT_BOOL, OPT_UINT, OPT_STR
+ */
+
+/*
+ * XXX: add fields for
+ * - default value
+ * - helptext
+ */
+
+#ifdef __KERNEL__
+#define RATELIMIT_ERRORS_DEFAULT true
+#else
+#define RATELIMIT_ERRORS_DEFAULT false
+#endif
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define BCACHEFS_VERBOSE_DEFAULT true
+#else
+#define BCACHEFS_VERBOSE_DEFAULT false
+#endif
+
+#define BCH_FIX_ERRORS_OPTS() \
+ x(exit, 0) \
+ x(yes, 1) \
+ x(no, 2) \
+ x(ask, 3)
+
+enum fsck_err_opts {
+#define x(t, n) FSCK_FIX_##t,
+ BCH_FIX_ERRORS_OPTS()
+#undef x
+};
+
+#define BCH_OPTS() \
+ x(block_size, u16, \
+ OPT_FS|OPT_FORMAT| \
+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \
+ OPT_UINT(512, 1U << 16), \
+ BCH_SB_BLOCK_SIZE, 8, \
+ "size", NULL) \
+ x(btree_node_size, u32, \
+ OPT_FS|OPT_FORMAT| \
+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \
+ OPT_UINT(512, 1U << 20), \
+ BCH_SB_BTREE_NODE_SIZE, 512, \
+ "size", "Btree node size, default 256k") \
+ x(errors, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_STR(bch2_error_actions), \
+ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
+ NULL, "Action to take on filesystem error") \
+ x(metadata_replicas, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(1, BCH_REPLICAS_MAX), \
+ BCH_SB_META_REPLICAS_WANT, 1, \
+ "#", "Number of metadata replicas") \
+ x(data_replicas, u8, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(1, BCH_REPLICAS_MAX), \
+ BCH_SB_DATA_REPLICAS_WANT, 1, \
+ "#", "Number of data replicas") \
+ x(metadata_replicas_required, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_UINT(1, BCH_REPLICAS_MAX), \
+ BCH_SB_META_REPLICAS_REQ, 1, \
+ "#", NULL) \
+ x(data_replicas_required, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_UINT(1, BCH_REPLICAS_MAX), \
+ BCH_SB_DATA_REPLICAS_REQ, 1, \
+ "#", NULL) \
+ x(encoded_extent_max, u32, \
+ OPT_FS|OPT_FORMAT| \
+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS|OPT_SB_FIELD_ILOG2,\
+ OPT_UINT(4096, 2U << 20), \
+ BCH_SB_ENCODED_EXTENT_MAX_BITS, 64 << 10, \
+ "size", "Maximum size of checksummed/compressed extents")\
+ x(metadata_checksum, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_STR(bch2_csum_opts), \
+ BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \
+ NULL, NULL) \
+ x(data_checksum, u8, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_STR(bch2_csum_opts), \
+ BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \
+ NULL, NULL) \
+ x(compression, u8, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_FN(bch2_opt_compression), \
+ BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \
+ NULL, NULL) \
+ x(background_compression, u8, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_FN(bch2_opt_compression), \
+ BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \
+ NULL, NULL) \
+ x(str_hash, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_STR(bch2_str_hash_opts), \
+ BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_OPT_siphash, \
+ NULL, "Hash function for directory entries and xattrs")\
+ x(metadata_target, u16, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_FN(bch2_opt_target), \
+ BCH_SB_METADATA_TARGET, 0, \
+ "(target)", "Device or label for metadata writes") \
+ x(foreground_target, u16, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_FN(bch2_opt_target), \
+ BCH_SB_FOREGROUND_TARGET, 0, \
+ "(target)", "Device or label for foreground writes") \
+ x(background_target, u16, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_FN(bch2_opt_target), \
+ BCH_SB_BACKGROUND_TARGET, 0, \
+ "(target)", "Device or label to move data to in the background")\
+ x(promote_target, u16, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_FN(bch2_opt_target), \
+ BCH_SB_PROMOTE_TARGET, 0, \
+ "(target)", "Device or label to promote data to on read") \
+ x(erasure_code, u16, \
+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_ERASURE_CODE, false, \
+ NULL, "Enable erasure coding (DO NOT USE YET)") \
+ x(inodes_32bit, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_INODE_32BIT, true, \
+ NULL, "Constrain inode numbers to 32 bits") \
+ x(shard_inode_numbers, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_SHARD_INUMS, true, \
+ NULL, "Shard new inode numbers by CPU id") \
+ x(inodes_use_key_cache, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_INODES_USE_KEY_CACHE, true, \
+ NULL, "Use the btree key cache for the inodes btree") \
+ x(btree_node_mem_ptr_optimization, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Stash pointer to in memory btree node in btree ptr")\
+ x(gc_reserve_percent, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(5, 21), \
+ BCH_SB_GC_RESERVE, 8, \
+ "%", "Percentage of disk space to reserve for copygc")\
+ x(gc_reserve_bytes, u64, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME| \
+ OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS, \
+ OPT_UINT(0, U64_MAX), \
+ BCH_SB_GC_RESERVE_BYTES, 0, \
+ "%", "Amount of disk space to reserve for copygc\n" \
+ "Takes precedence over gc_reserve_percent if set")\
+ x(root_reserve_percent, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_UINT(0, 100), \
+ BCH_SB_ROOT_RESERVE, 0, \
+ "%", "Percentage of disk space to reserve for superuser")\
+ x(wide_macs, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_128_BIT_MACS, false, \
+ NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\
+ x(inline_data, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Enable inline data extents") \
+ x(acl, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_POSIX_ACL, true, \
+ NULL, "Enable POSIX acls") \
+ x(usrquota, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_USRQUOTA, false, \
+ NULL, "Enable user quotas") \
+ x(grpquota, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_GRPQUOTA, false, \
+ NULL, "Enable group quotas") \
+ x(prjquota, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH_SB_PRJQUOTA, false, \
+ NULL, "Enable project quotas") \
+ x(degraded, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Allow mounting in degraded mode") \
+ x(very_degraded, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Allow mounting in when data will be missing") \
+ x(discard, u8, \
+ OPT_FS|OPT_MOUNT|OPT_DEVICE, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Enable discard/TRIM support") \
+ x(verbose, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, BCACHEFS_VERBOSE_DEFAULT, \
+ NULL, "Extra debugging information during mount/recovery")\
+ x(journal_flush_delay, u32, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(1, U32_MAX), \
+ BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \
+ NULL, "Delay in milliseconds before automatic journal commits")\
+ x(journal_flush_disabled, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_JOURNAL_FLUSH_DISABLED,false, \
+ NULL, "Disable journal flush on sync/fsync\n" \
+ "If enabled, writes can be lost, but only since the\n"\
+ "last journal write (default 1 second)") \
+ x(journal_reclaim_delay, u32, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(0, U32_MAX), \
+ BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \
+ NULL, "Delay in milliseconds before automatic journal reclaim")\
+ x(move_bytes_in_flight, u32, \
+ OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(1024, U32_MAX), \
+ BCH2_NO_SB_OPT, 1U << 20, \
+ NULL, "Maximum Amount of IO to keep in flight by the move path")\
+ x(move_ios_in_flight, u32, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(1, 1024), \
+ BCH2_NO_SB_OPT, 32, \
+ NULL, "Maximum number of IOs to keep in flight by the move path")\
+ x(fsck, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Run fsck on mount") \
+ x(fix_errors, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_FN(bch2_opt_fix_errors), \
+ BCH2_NO_SB_OPT, FSCK_FIX_exit, \
+ NULL, "Fix errors during fsck without asking") \
+ x(ratelimit_errors, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \
+ NULL, "Ratelimit error messages during fsck") \
+ x(nochanges, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Super read only mode - no writes at all will be issued,\n"\
+ "even if we have to replay the journal") \
+ x(norecovery, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Don't replay the journal") \
+ x(keep_journal, u8, \
+ 0, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Don't free journal entries/keys after startup")\
+ x(read_entire_journal, u8, \
+ 0, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Read all journal entries, not just dirty ones")\
+ x(read_journal_only, u8, \
+ 0, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Only read the journal, skip the rest of recovery")\
+ x(journal_transaction_names, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
+ NULL, "Log transaction function names in journal") \
+ x(noexcl, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Don't open device in exclusive mode") \
+ x(direct_io, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Use O_DIRECT (userspace only)") \
+ x(sb, u64, \
+ OPT_MOUNT, \
+ OPT_UINT(0, S64_MAX), \
+ BCH2_NO_SB_OPT, BCH_SB_SECTOR, \
+ "offset", "Sector offset of superblock") \
+ x(read_only, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, NULL) \
+ x(nostart, u8, \
+ 0, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Don\'t start filesystem, only open devices") \
+ x(reconstruct_alloc, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Reconstruct alloc btree") \
+ x(version_upgrade, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_STR(bch2_version_upgrade_opts), \
+ BCH_SB_VERSION_UPGRADE, BCH_VERSION_UPGRADE_compatible, \
+ NULL, "Set superblock to latest version,\n" \
+ "allowing any new features to be used") \
+ x(buckets_nouse, u8, \
+ 0, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Allocate the buckets_nouse bitmap") \
+ x(stdio, u64, \
+ 0, \
+ OPT_UINT(0, S64_MAX), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Pointer to a struct stdio_redirect") \
+ x(project, u8, \
+ OPT_INODE, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, NULL) \
+ x(nocow, u8, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \
+ OPT_BOOL(), \
+ BCH_SB_NOCOW, false, \
+ NULL, "Nocow mode: Writes will be done in place when possible.\n"\
+ "Snapshots and reflink will still caused writes to be COW\n"\
+ "Implicitly disables data checksumming, compression and encryption")\
+ x(nocow_enabled, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Enable nocow mode: enables runtime locking in\n"\
+ "data move path needed if nocow will ever be in use\n")\
+ x(no_data_io, u8, \
+ OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, false, \
+ NULL, "Skip submit_bio() for data reads and writes, " \
+ "for performance testing purposes") \
+ x(fs_size, u64, \
+ OPT_DEVICE, \
+ OPT_UINT(0, S64_MAX), \
+ BCH2_NO_SB_OPT, 0, \
+ "size", "Size of filesystem on device") \
+ x(bucket, u32, \
+ OPT_DEVICE, \
+ OPT_UINT(0, S64_MAX), \
+ BCH2_NO_SB_OPT, 0, \
+ "size", "Size of filesystem on device") \
+ x(durability, u8, \
+ OPT_DEVICE, \
+ OPT_UINT(0, BCH_REPLICAS_MAX), \
+ BCH2_NO_SB_OPT, 1, \
+ "n", "Data written to this device will be considered\n"\
+ "to have already been replicated n times") \
+ x(btree_node_prefetch, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "BTREE_ITER_PREFETCH casuse btree nodes to be\n"\
+ " prefetched sequentially")
+
+struct bch_opts {
+#define x(_name, _bits, ...) unsigned _name##_defined:1;
+ BCH_OPTS()
+#undef x
+
+#define x(_name, _bits, ...) _bits _name;
+ BCH_OPTS()
+#undef x
+};
+
+static const __maybe_unused struct bch_opts bch2_opts_default = {
+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \
+ ._name##_defined = true, \
+ ._name = _default, \
+
+ BCH_OPTS()
+#undef x
+};
+
+#define opt_defined(_opts, _name) ((_opts)._name##_defined)
+
+#define opt_get(_opts, _name) \
+ (opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
+
+#define opt_set(_opts, _name, _v) \
+do { \
+ (_opts)._name##_defined = true; \
+ (_opts)._name = _v; \
+} while (0)
+
+static inline struct bch_opts bch2_opts_empty(void)
+{
+ return (struct bch_opts) { 0 };
+}
+
+void bch2_opts_apply(struct bch_opts *, struct bch_opts);
+
+enum bch_opt_id {
+#define x(_name, ...) Opt_##_name,
+ BCH_OPTS()
+#undef x
+ bch2_opts_nr
+};
+
+struct bch_fs;
+struct printbuf;
+
+struct bch_option {
+ struct attribute attr;
+ u64 (*get_sb)(const struct bch_sb *);
+ void (*set_sb)(struct bch_sb *, u64);
+ enum opt_type type;
+ enum opt_flags flags;
+ u64 min, max;
+
+ const char * const *choices;
+
+ struct bch_opt_fn fn;
+
+ const char *hint;
+ const char *help;
+
+};
+
+extern const struct bch_option bch2_opt_table[];
+
+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id);
+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id);
+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64);
+
+u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id);
+int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *);
+void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64);
+void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64);
+
+int bch2_opt_lookup(const char *);
+int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *);
+int bch2_opt_parse(struct bch_fs *, const struct bch_option *,
+ const char *, u64 *, struct printbuf *);
+
+#define OPT_SHOW_FULL_LIST (1 << 0)
+#define OPT_SHOW_MOUNT_STYLE (1 << 1)
+
+void bch2_opt_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *,
+ const struct bch_option *, u64, unsigned);
+
+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
+int bch2_opts_check_may_set(struct bch_fs *);
+int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, char *);
+
+/* inode opts: */
+
+struct bch_io_opts {
+#define x(_name, _bits) u##_bits _name;
+ BCH_INODE_OPTS()
+#undef x
+};
+
+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
+bool bch2_opt_is_inode_opt(enum bch_opt_id);
+
+#endif /* _BCACHEFS_OPTS_H */
diff --git a/c_src/libbcachefs/printbuf.c b/c_src/libbcachefs/printbuf.c
new file mode 100644
index 00000000..accf246c
--- /dev/null
+++ b/c_src/libbcachefs/printbuf.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: LGPL-2.1+
+/* Copyright (C) 2022 Kent Overstreet */
+
+#include <linux/bitmap.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
+
+#include "printbuf.h"
+
+static inline unsigned printbuf_linelen(struct printbuf *buf)
+{
+ return buf->pos - buf->last_newline;
+}
+
+int bch2_printbuf_make_room(struct printbuf *out, unsigned extra)
+{
+ unsigned new_size;
+ char *buf;
+
+ if (!out->heap_allocated)
+ return 0;
+
+ /* Reserved space for terminating nul: */
+ extra += 1;
+
+ if (out->pos + extra < out->size)
+ return 0;
+
+ new_size = roundup_pow_of_two(out->size + extra);
+
+ /*
+ * Note: output buffer must be freeable with kfree(), it's not required
+ * that the user use printbuf_exit().
+ */
+ buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT);
+
+ if (!buf) {
+ out->allocation_failure = true;
+ return -ENOMEM;
+ }
+
+ out->buf = buf;
+ out->size = new_size;
+ return 0;
+}
+
+void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
+{
+ int len;
+
+ do {
+ va_list args2;
+
+ va_copy(args2, args);
+ len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
+ } while (len + 1 >= printbuf_remaining(out) &&
+ !bch2_printbuf_make_room(out, len + 1));
+
+ len = min_t(size_t, len,
+ printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
+ out->pos += len;
+}
+
+void bch2_prt_printf(struct printbuf *out, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ do {
+ va_start(args, fmt);
+ len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args);
+ va_end(args);
+ } while (len + 1 >= printbuf_remaining(out) &&
+ !bch2_printbuf_make_room(out, len + 1));
+
+ len = min_t(size_t, len,
+ printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0);
+ out->pos += len;
+}
+
+/**
+ * bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be
+ * null terminated
+ * @buf: printbuf to terminate
+ * Returns: Printbuf contents, as a nul terminated C string
+ */
+const char *bch2_printbuf_str(const struct printbuf *buf)
+{
+ /*
+ * If we've written to a printbuf then it's guaranteed to be a null
+ * terminated string - but if we haven't, then we might not have
+ * allocated a buffer at all:
+ */
+ return buf->pos
+ ? buf->buf
+ : "";
+}
+
+/**
+ * bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it
+ * against accidental use.
+ * @buf: printbuf to exit
+ */
+void bch2_printbuf_exit(struct printbuf *buf)
+{
+ if (buf->heap_allocated) {
+ kfree(buf->buf);
+ buf->buf = ERR_PTR(-EINTR); /* poison value */
+ }
+}
+
+void bch2_printbuf_tabstops_reset(struct printbuf *buf)
+{
+ buf->nr_tabstops = 0;
+}
+
+void bch2_printbuf_tabstop_pop(struct printbuf *buf)
+{
+ if (buf->nr_tabstops)
+ --buf->nr_tabstops;
+}
+
+/*
+ * bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop
+ *
+ * @buf: printbuf to control
+ * @spaces: number of spaces from previous tabpstop
+ *
+ * In the future this function may allocate memory if setting more than
+ * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start
+ * of line.
+ */
+int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
+{
+ unsigned prev_tabstop = buf->nr_tabstops
+ ? buf->_tabstops[buf->nr_tabstops - 1]
+ : 0;
+
+ if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops)))
+ return -EINVAL;
+
+ buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces;
+ buf->has_indent_or_tabstops = true;
+ return 0;
+}
+
+/**
+ * bch2_printbuf_indent_add() - add to the current indent level
+ *
+ * @buf: printbuf to control
+ * @spaces: number of spaces to add to the current indent level
+ *
+ * Subsequent lines, and the current line if the output position is at the start
+ * of the current line, will be indented by @spaces more spaces.
+ */
+void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces)
+{
+ if (WARN_ON_ONCE(buf->indent + spaces < buf->indent))
+ spaces = 0;
+
+ buf->indent += spaces;
+ prt_chars(buf, ' ', spaces);
+
+ buf->has_indent_or_tabstops = true;
+}
+
+/**
+ * bch2_printbuf_indent_sub() - subtract from the current indent level
+ *
+ * @buf: printbuf to control
+ * @spaces: number of spaces to subtract from the current indent level
+ *
+ * Subsequent lines, and the current line if the output position is at the start
+ * of the current line, will be indented by @spaces less spaces.
+ */
+void bch2_printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
+{
+ if (WARN_ON_ONCE(spaces > buf->indent))
+ spaces = buf->indent;
+
+ if (buf->last_newline + buf->indent == buf->pos) {
+ buf->pos -= spaces;
+ printbuf_nul_terminate(buf);
+ }
+ buf->indent -= spaces;
+
+ if (!buf->indent && !buf->nr_tabstops)
+ buf->has_indent_or_tabstops = false;
+}
+
+void bch2_prt_newline(struct printbuf *buf)
+{
+ unsigned i;
+
+ bch2_printbuf_make_room(buf, 1 + buf->indent);
+
+ __prt_char(buf, '\n');
+
+ buf->last_newline = buf->pos;
+
+ for (i = 0; i < buf->indent; i++)
+ __prt_char(buf, ' ');
+
+ printbuf_nul_terminate(buf);
+
+ buf->last_field = buf->pos;
+ buf->cur_tabstop = 0;
+}
+
+/*
+ * Returns spaces from start of line, if set, or 0 if unset:
+ */
+static inline unsigned cur_tabstop(struct printbuf *buf)
+{
+ return buf->cur_tabstop < buf->nr_tabstops
+ ? buf->_tabstops[buf->cur_tabstop]
+ : 0;
+}
+
+static void __prt_tab(struct printbuf *out)
+{
+ int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out));
+
+ prt_chars(out, ' ', spaces);
+
+ out->last_field = out->pos;
+ out->cur_tabstop++;
+}
+
+/**
+ * bch2_prt_tab() - Advance printbuf to the next tabstop
+ * @out: printbuf to control
+ *
+ * Advance output to the next tabstop by printing spaces.
+ */
+void bch2_prt_tab(struct printbuf *out)
+{
+ if (WARN_ON(!cur_tabstop(out)))
+ return;
+
+ __prt_tab(out);
+}
+
+static void __prt_tab_rjust(struct printbuf *buf)
+{
+ unsigned move = buf->pos - buf->last_field;
+ int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf);
+
+ if (pad > 0) {
+ bch2_printbuf_make_room(buf, pad);
+
+ if (buf->last_field + pad < buf->size)
+ memmove(buf->buf + buf->last_field + pad,
+ buf->buf + buf->last_field,
+ min(move, buf->size - 1 - buf->last_field - pad));
+
+ if (buf->last_field < buf->size)
+ memset(buf->buf + buf->last_field, ' ',
+ min((unsigned) pad, buf->size - buf->last_field));
+
+ buf->pos += pad;
+ printbuf_nul_terminate(buf);
+ }
+
+ buf->last_field = buf->pos;
+ buf->cur_tabstop++;
+}
+
+/**
+ * bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
+ * previous output
+ *
+ * @buf: printbuf to control
+ *
+ * Advance output to the next tabstop by inserting spaces immediately after the
+ * previous tabstop, right justifying previously outputted text.
+ */
+void bch2_prt_tab_rjust(struct printbuf *buf)
+{
+ if (WARN_ON(!cur_tabstop(buf)))
+ return;
+
+ __prt_tab_rjust(buf);
+}
+
+/**
+ * bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters
+ *
+ * @out: output printbuf
+ * @str: string to print
+ * @count: number of bytes to print
+ *
+ * The following contol characters are handled as so:
+ * \n: prt_newline newline that obeys current indent level
+ * \t: prt_tab advance to next tabstop
+ * \r: prt_tab_rjust advance to next tabstop, with right justification
+ */
+void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
+{
+ const char *unprinted_start = str;
+ const char *end = str + count;
+
+ if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) {
+ prt_bytes(out, str, count);
+ return;
+ }
+
+ while (str != end) {
+ switch (*str) {
+ case '\n':
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+ unprinted_start = str + 1;
+ bch2_prt_newline(out);
+ break;
+ case '\t':
+ if (likely(cur_tabstop(out))) {
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+ unprinted_start = str + 1;
+ __prt_tab(out);
+ }
+ break;
+ case '\r':
+ if (likely(cur_tabstop(out))) {
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+ unprinted_start = str + 1;
+ __prt_tab_rjust(out);
+ }
+ break;
+ }
+
+ str++;
+ }
+
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+}
+
+/**
+ * bch2_prt_human_readable_u64() - Print out a u64 in human readable units
+ * @out: output printbuf
+ * @v: integer to print
+ *
+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units
+ */
+void bch2_prt_human_readable_u64(struct printbuf *out, u64 v)
+{
+ bch2_printbuf_make_room(out, 10);
+ out->pos += string_get_size(v, 1, !out->si_units,
+ out->buf + out->pos,
+ printbuf_remaining_size(out));
+}
+
+/**
+ * bch2_prt_human_readable_s64() - Print out a s64 in human readable units
+ * @out: output printbuf
+ * @v: integer to print
+ *
+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units
+ */
+void bch2_prt_human_readable_s64(struct printbuf *out, s64 v)
+{
+ if (v < 0)
+ prt_char(out, '-');
+ bch2_prt_human_readable_u64(out, abs(v));
+}
+
+/**
+ * bch2_prt_units_u64() - Print out a u64 according to printbuf unit options
+ * @out: output printbuf
+ * @v: integer to print
+ *
+ * Units are either raw (default), or human reabable units (controlled via
+ * @buf->human_readable_units)
+ */
+void bch2_prt_units_u64(struct printbuf *out, u64 v)
+{
+ if (out->human_readable_units)
+ bch2_prt_human_readable_u64(out, v);
+ else
+ bch2_prt_printf(out, "%llu", v);
+}
+
+/**
+ * bch2_prt_units_s64() - Print out a s64 according to printbuf unit options
+ * @out: output printbuf
+ * @v: integer to print
+ *
+ * Units are either raw (default), or human reabable units (controlled via
+ * @buf->human_readable_units)
+ */
+void bch2_prt_units_s64(struct printbuf *out, s64 v)
+{
+ if (v < 0)
+ prt_char(out, '-');
+ bch2_prt_units_u64(out, abs(v));
+}
+
+void bch2_prt_string_option(struct printbuf *out,
+ const char * const list[],
+ size_t selected)
+{
+ size_t i;
+
+ for (i = 0; list[i]; i++)
+ bch2_prt_printf(out, i == selected ? "[%s] " : "%s ", list[i]);
+}
+
+void bch2_prt_bitflags(struct printbuf *out,
+ const char * const list[], u64 flags)
+{
+ unsigned bit, nr = 0;
+ bool first = true;
+
+ while (list[nr])
+ nr++;
+
+ while (flags && (bit = __ffs64(flags)) < nr) {
+ if (!first)
+ bch2_prt_printf(out, ",");
+ first = false;
+ bch2_prt_printf(out, "%s", list[bit]);
+ flags ^= BIT_ULL(bit);
+ }
+}
+
+void bch2_prt_bitflags_vector(struct printbuf *out,
+ const char * const list[],
+ unsigned long *v, unsigned nr)
+{
+ bool first = true;
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ if (!list[i]) {
+ nr = i - 1;
+ break;
+ }
+
+ for_each_set_bit(i, v, nr) {
+ if (!first)
+ bch2_prt_printf(out, ",");
+ first = false;
+ bch2_prt_printf(out, "%s", list[i]);
+ }
+}
diff --git a/c_src/libbcachefs/printbuf.h b/c_src/libbcachefs/printbuf.h
new file mode 100644
index 00000000..9a4a56c4
--- /dev/null
+++ b/c_src/libbcachefs/printbuf.h
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/* Copyright (C) 2022 Kent Overstreet */
+
+#ifndef _BCACHEFS_PRINTBUF_H
+#define _BCACHEFS_PRINTBUF_H
+
+/*
+ * Printbufs: Simple strings for printing to, with optional heap allocation
+ *
+ * This code has provisions for use in userspace, to aid in making other code
+ * portable between kernelspace and userspace.
+ *
+ * Basic example:
+ * struct printbuf buf = PRINTBUF;
+ *
+ * prt_printf(&buf, "foo=");
+ * foo_to_text(&buf, foo);
+ * printk("%s", buf.buf);
+ * printbuf_exit(&buf);
+ *
+ * Or
+ * struct printbuf buf = PRINTBUF_EXTERN(char_buf, char_buf_size)
+ *
+ * We can now write pretty printers instead of writing code that dumps
+ * everything to the kernel log buffer, and then those pretty-printers can be
+ * used by other code that outputs to kernel log, sysfs, debugfs, etc.
+ *
+ * Memory allocation: Outputing to a printbuf may allocate memory. This
+ * allocation is done with GFP_KERNEL, by default: use the newer
+ * memalloc_*_(save|restore) functions as needed.
+ *
+ * Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations
+ * will be done with GFP_NOWAIT if printbuf->atomic is nonzero.
+ *
+ * It's allowed to grab the output buffer and free it later with kfree() instead
+ * of using printbuf_exit(), if the user just needs a heap allocated string at
+ * the end.
+ *
+ * Memory allocation failures: We don't return errors directly, because on
+ * memory allocation failure we usually don't want to bail out and unwind - we
+ * want to print what we've got, on a best-effort basis. But code that does want
+ * to return -ENOMEM may check printbuf.allocation_failure.
+ *
+ * Indenting, tabstops:
+ *
+ * To aid is writing multi-line pretty printers spread across multiple
+ * functions, printbufs track the current indent level.
+ *
+ * printbuf_indent_push() and printbuf_indent_pop() increase and decrease the current indent
+ * level, respectively.
+ *
+ * To use tabstops, set printbuf->tabstops[]; they are in units of spaces, from
+ * start of line. Once set, prt_tab() will output spaces up to the next tabstop.
+ * prt_tab_rjust() will also advance the current line of text up to the next
+ * tabstop, but it does so by shifting text since the previous tabstop up to the
+ * next tabstop - right justifying it.
+ *
+ * Make sure you use prt_newline() instead of \n in the format string for indent
+ * level and tabstops to work corretly.
+ *
+ * Output units: printbuf->units exists to tell pretty-printers how to output
+ * numbers: a raw value (e.g. directly from a superblock field), as bytes, or as
+ * human readable bytes. prt_units() obeys it.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+enum printbuf_si {
+ PRINTBUF_UNITS_2, /* use binary powers of 2^10 */
+ PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */
+};
+
+#define PRINTBUF_INLINE_TABSTOPS 6
+
+struct printbuf {
+ char *buf;
+ unsigned size;
+ unsigned pos;
+ unsigned last_newline;
+ unsigned last_field;
+ unsigned indent;
+ /*
+ * If nonzero, allocations will be done with GFP_ATOMIC:
+ */
+ u8 atomic;
+ bool allocation_failure:1;
+ bool heap_allocated:1;
+ enum printbuf_si si_units:1;
+ bool human_readable_units:1;
+ bool has_indent_or_tabstops:1;
+ bool suppress_indent_tabstop_handling:1;
+ u8 nr_tabstops;
+
+ /*
+ * Do not modify directly: use printbuf_tabstop_add(),
+ * printbuf_tabstop_get()
+ */
+ u8 cur_tabstop;
+ u8 _tabstops[PRINTBUF_INLINE_TABSTOPS];
+};
+
+int bch2_printbuf_make_room(struct printbuf *, unsigned);
+__printf(2, 3) void bch2_prt_printf(struct printbuf *out, const char *fmt, ...);
+__printf(2, 0) void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list);
+const char *bch2_printbuf_str(const struct printbuf *);
+void bch2_printbuf_exit(struct printbuf *);
+
+void bch2_printbuf_tabstops_reset(struct printbuf *);
+void bch2_printbuf_tabstop_pop(struct printbuf *);
+int bch2_printbuf_tabstop_push(struct printbuf *, unsigned);
+
+void bch2_printbuf_indent_add(struct printbuf *, unsigned);
+void bch2_printbuf_indent_sub(struct printbuf *, unsigned);
+
+void bch2_prt_newline(struct printbuf *);
+void bch2_prt_tab(struct printbuf *);
+void bch2_prt_tab_rjust(struct printbuf *);
+
+void bch2_prt_bytes_indented(struct printbuf *, const char *, unsigned);
+void bch2_prt_human_readable_u64(struct printbuf *, u64);
+void bch2_prt_human_readable_s64(struct printbuf *, s64);
+void bch2_prt_units_u64(struct printbuf *, u64);
+void bch2_prt_units_s64(struct printbuf *, s64);
+void bch2_prt_string_option(struct printbuf *, const char * const[], size_t);
+void bch2_prt_bitflags(struct printbuf *, const char * const[], u64);
+void bch2_prt_bitflags_vector(struct printbuf *, const char * const[],
+ unsigned long *, unsigned);
+
+/* Initializer for a heap allocated printbuf: */
+#define PRINTBUF ((struct printbuf) { .heap_allocated = true })
+
+/* Initializer a printbuf that points to an external buffer: */
+#define PRINTBUF_EXTERN(_buf, _size) \
+((struct printbuf) { \
+ .buf = _buf, \
+ .size = _size, \
+})
+
+/*
+ * Returns size remaining of output buffer:
+ */
+static inline unsigned printbuf_remaining_size(struct printbuf *out)
+{
+ return out->pos < out->size ? out->size - out->pos : 0;
+}
+
+/*
+ * Returns number of characters we can print to the output buffer - i.e.
+ * excluding the terminating nul:
+ */
+static inline unsigned printbuf_remaining(struct printbuf *out)
+{
+ return out->pos < out->size ? out->size - out->pos - 1 : 0;
+}
+
+static inline unsigned printbuf_written(struct printbuf *out)
+{
+ return out->size ? min(out->pos, out->size - 1) : 0;
+}
+
+/*
+ * Returns true if output was truncated:
+ */
+static inline bool printbuf_overflowed(struct printbuf *out)
+{
+ return out->pos >= out->size;
+}
+
+static inline void printbuf_nul_terminate(struct printbuf *out)
+{
+ bch2_printbuf_make_room(out, 1);
+
+ if (out->pos < out->size)
+ out->buf[out->pos] = 0;
+ else if (out->size)
+ out->buf[out->size - 1] = 0;
+}
+
+/* Doesn't call bch2_printbuf_make_room(), doesn't nul terminate: */
+static inline void __prt_char_reserved(struct printbuf *out, char c)
+{
+ if (printbuf_remaining(out))
+ out->buf[out->pos] = c;
+ out->pos++;
+}
+
+/* Doesn't nul terminate: */
+static inline void __prt_char(struct printbuf *out, char c)
+{
+ bch2_printbuf_make_room(out, 1);
+ __prt_char_reserved(out, c);
+}
+
+static inline void prt_char(struct printbuf *out, char c)
+{
+ __prt_char(out, c);
+ printbuf_nul_terminate(out);
+}
+
+static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
+{
+ unsigned i, can_print = min(n, printbuf_remaining(out));
+
+ for (i = 0; i < can_print; i++)
+ out->buf[out->pos++] = c;
+ out->pos += n - can_print;
+}
+
+static inline void prt_chars(struct printbuf *out, char c, unsigned n)
+{
+ bch2_printbuf_make_room(out, n);
+ __prt_chars_reserved(out, c, n);
+ printbuf_nul_terminate(out);
+}
+
+static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
+{
+ unsigned i, can_print;
+
+ bch2_printbuf_make_room(out, n);
+
+ can_print = min(n, printbuf_remaining(out));
+
+ for (i = 0; i < can_print; i++)
+ out->buf[out->pos++] = ((char *) b)[i];
+ out->pos += n - can_print;
+
+ printbuf_nul_terminate(out);
+}
+
+static inline void prt_str(struct printbuf *out, const char *str)
+{
+ prt_bytes(out, str, strlen(str));
+}
+
+static inline void prt_str_indented(struct printbuf *out, const char *str)
+{
+ bch2_prt_bytes_indented(out, str, strlen(str));
+}
+
+static inline void prt_hex_byte(struct printbuf *out, u8 byte)
+{
+ bch2_printbuf_make_room(out, 2);
+ __prt_char_reserved(out, hex_asc_hi(byte));
+ __prt_char_reserved(out, hex_asc_lo(byte));
+ printbuf_nul_terminate(out);
+}
+
+static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte)
+{
+ bch2_printbuf_make_room(out, 2);
+ __prt_char_reserved(out, hex_asc_upper_hi(byte));
+ __prt_char_reserved(out, hex_asc_upper_lo(byte));
+ printbuf_nul_terminate(out);
+}
+
+/**
+ * printbuf_reset - re-use a printbuf without freeing and re-initializing it:
+ */
+static inline void printbuf_reset(struct printbuf *buf)
+{
+ buf->pos = 0;
+ buf->allocation_failure = 0;
+ buf->indent = 0;
+ buf->nr_tabstops = 0;
+ buf->cur_tabstop = 0;
+}
+
+/**
+ * printbuf_atomic_inc - mark as entering an atomic section
+ */
+static inline void printbuf_atomic_inc(struct printbuf *buf)
+{
+ buf->atomic++;
+}
+
+/**
+ * printbuf_atomic_inc - mark as leaving an atomic section
+ */
+static inline void printbuf_atomic_dec(struct printbuf *buf)
+{
+ buf->atomic--;
+}
+
+#endif /* _BCACHEFS_PRINTBUF_H */
diff --git a/c_src/libbcachefs/quota.c b/c_src/libbcachefs/quota.c
new file mode 100644
index 00000000..e68b34ea
--- /dev/null
+++ b/c_src/libbcachefs/quota.c
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "errcode.h"
+#include "error.h"
+#include "inode.h"
+#include "quota.h"
+#include "snapshot.h"
+#include "super-io.h"
+
+static const char * const bch2_quota_types[] = {
+ "user",
+ "group",
+ "project",
+};
+
+static const char * const bch2_quota_counters[] = {
+ "space",
+ "inodes",
+};
+
+static int bch2_sb_quota_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_quota *q = field_to_type(f, quota);
+
+ if (vstruct_bytes(&q->field) < sizeof(*q)) {
+ prt_printf(err, "wrong size (got %zu should be %zu)",
+ vstruct_bytes(&q->field), sizeof(*q));
+ return -BCH_ERR_invalid_sb_quota;
+ }
+
+ return 0;
+}
+
+static void bch2_sb_quota_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_quota *q = field_to_type(f, quota);
+ unsigned qtyp, counter;
+
+ for (qtyp = 0; qtyp < ARRAY_SIZE(q->q); qtyp++) {
+ prt_printf(out, "%s: flags %llx",
+ bch2_quota_types[qtyp],
+ le64_to_cpu(q->q[qtyp].flags));
+
+ for (counter = 0; counter < Q_COUNTERS; counter++)
+ prt_printf(out, " %s timelimit %u warnlimit %u",
+ bch2_quota_counters[counter],
+ le32_to_cpu(q->q[qtyp].c[counter].timelimit),
+ le32_to_cpu(q->q[qtyp].c[counter].warnlimit));
+
+ prt_newline(out);
+ }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_quota = {
+ .validate = bch2_sb_quota_validate,
+ .to_text = bch2_sb_quota_to_text,
+};
+
+int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err,
+ quota_type_invalid,
+ "invalid quota type (%llu >= %u)",
+ k.k->p.inode, QTYP_NR);
+fsck_err:
+ return ret;
+}
+
+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_quota dq = bkey_s_c_to_quota(k);
+ unsigned i;
+
+ for (i = 0; i < Q_COUNTERS; i++)
+ prt_printf(out, "%s hardlimit %llu softlimit %llu",
+ bch2_quota_counters[i],
+ le64_to_cpu(dq.v->c[i].hardlimit),
+ le64_to_cpu(dq.v->c[i].softlimit));
+}
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+#include <linux/cred.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+
+static void qc_info_to_text(struct printbuf *out, struct qc_info *i)
+{
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 20);
+
+ prt_str(out, "i_fieldmask");
+ prt_tab(out);
+ prt_printf(out, "%x", i->i_fieldmask);
+ prt_newline(out);
+
+ prt_str(out, "i_flags");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_flags);
+ prt_newline(out);
+
+ prt_str(out, "i_spc_timelimit");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_spc_timelimit);
+ prt_newline(out);
+
+ prt_str(out, "i_ino_timelimit");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_ino_timelimit);
+ prt_newline(out);
+
+ prt_str(out, "i_rt_spc_timelimit");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_rt_spc_timelimit);
+ prt_newline(out);
+
+ prt_str(out, "i_spc_warnlimit");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_spc_warnlimit);
+ prt_newline(out);
+
+ prt_str(out, "i_ino_warnlimit");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_ino_warnlimit);
+ prt_newline(out);
+
+ prt_str(out, "i_rt_spc_warnlimit");
+ prt_tab(out);
+ prt_printf(out, "%u", i->i_rt_spc_warnlimit);
+ prt_newline(out);
+}
+
+static void qc_dqblk_to_text(struct printbuf *out, struct qc_dqblk *q)
+{
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 20);
+
+ prt_str(out, "d_fieldmask");
+ prt_tab(out);
+ prt_printf(out, "%x", q->d_fieldmask);
+ prt_newline(out);
+
+ prt_str(out, "d_spc_hardlimit");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_spc_hardlimit);
+ prt_newline(out);
+
+ prt_str(out, "d_spc_softlimit");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_spc_softlimit);
+ prt_newline(out);
+
+ prt_str(out, "d_ino_hardlimit");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_ino_hardlimit);
+ prt_newline(out);
+
+ prt_str(out, "d_ino_softlimit");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_ino_softlimit);
+ prt_newline(out);
+
+ prt_str(out, "d_space");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_space);
+ prt_newline(out);
+
+ prt_str(out, "d_ino_count");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_ino_count);
+ prt_newline(out);
+
+ prt_str(out, "d_ino_timer");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_ino_timer);
+ prt_newline(out);
+
+ prt_str(out, "d_spc_timer");
+ prt_tab(out);
+ prt_printf(out, "%llu", q->d_spc_timer);
+ prt_newline(out);
+
+ prt_str(out, "d_ino_warns");
+ prt_tab(out);
+ prt_printf(out, "%i", q->d_ino_warns);
+ prt_newline(out);
+
+ prt_str(out, "d_spc_warns");
+ prt_tab(out);
+ prt_printf(out, "%i", q->d_spc_warns);
+ prt_newline(out);
+}
+
+static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
+{
+ qtypes >>= i;
+ return qtypes ? i + __ffs(qtypes) : QTYP_NR;
+}
+
+#define for_each_set_qtype(_c, _i, _q, _qtypes) \
+ for (_i = 0; \
+ (_i = __next_qtype(_i, _qtypes), \
+ _q = &(_c)->quotas[_i], \
+ _i < QTYP_NR); \
+ _i++)
+
+static bool ignore_hardlimit(struct bch_memquota_type *q)
+{
+ if (capable(CAP_SYS_RESOURCE))
+ return true;
+#if 0
+ struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
+
+ return capable(CAP_SYS_RESOURCE) &&
+ (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
+ !(info->dqi_flags & DQF_ROOT_SQUASH));
+#endif
+ return false;
+}
+
+enum quota_msg {
+ SOFTWARN, /* Softlimit reached */
+ SOFTLONGWARN, /* Grace time expired */
+ HARDWARN, /* Hardlimit reached */
+
+ HARDBELOW, /* Usage got below inode hardlimit */
+ SOFTBELOW, /* Usage got below inode softlimit */
+};
+
+static int quota_nl[][Q_COUNTERS] = {
+ [HARDWARN][Q_SPC] = QUOTA_NL_BHARDWARN,
+ [SOFTLONGWARN][Q_SPC] = QUOTA_NL_BSOFTLONGWARN,
+ [SOFTWARN][Q_SPC] = QUOTA_NL_BSOFTWARN,
+ [HARDBELOW][Q_SPC] = QUOTA_NL_BHARDBELOW,
+ [SOFTBELOW][Q_SPC] = QUOTA_NL_BSOFTBELOW,
+
+ [HARDWARN][Q_INO] = QUOTA_NL_IHARDWARN,
+ [SOFTLONGWARN][Q_INO] = QUOTA_NL_ISOFTLONGWARN,
+ [SOFTWARN][Q_INO] = QUOTA_NL_ISOFTWARN,
+ [HARDBELOW][Q_INO] = QUOTA_NL_IHARDBELOW,
+ [SOFTBELOW][Q_INO] = QUOTA_NL_ISOFTBELOW,
+};
+
+struct quota_msgs {
+ u8 nr;
+ struct {
+ u8 qtype;
+ u8 msg;
+ } m[QTYP_NR * Q_COUNTERS];
+};
+
+static void prepare_msg(unsigned qtype,
+ enum quota_counters counter,
+ struct quota_msgs *msgs,
+ enum quota_msg msg_type)
+{
+ BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
+
+ msgs->m[msgs->nr].qtype = qtype;
+ msgs->m[msgs->nr].msg = quota_nl[msg_type][counter];
+ msgs->nr++;
+}
+
+static void prepare_warning(struct memquota_counter *qc,
+ unsigned qtype,
+ enum quota_counters counter,
+ struct quota_msgs *msgs,
+ enum quota_msg msg_type)
+{
+ if (qc->warning_issued & (1 << msg_type))
+ return;
+
+ prepare_msg(qtype, counter, msgs, msg_type);
+}
+
+static void flush_warnings(struct bch_qid qid,
+ struct super_block *sb,
+ struct quota_msgs *msgs)
+{
+ unsigned i;
+
+ for (i = 0; i < msgs->nr; i++)
+ quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
+ sb->s_dev, msgs->m[i].msg);
+}
+
+static int bch2_quota_check_limit(struct bch_fs *c,
+ unsigned qtype,
+ struct bch_memquota *mq,
+ struct quota_msgs *msgs,
+ enum quota_counters counter,
+ s64 v,
+ enum quota_acct_mode mode)
+{
+ struct bch_memquota_type *q = &c->quotas[qtype];
+ struct memquota_counter *qc = &mq->c[counter];
+ u64 n = qc->v + v;
+
+ BUG_ON((s64) n < 0);
+
+ if (mode == KEY_TYPE_QUOTA_NOCHECK)
+ return 0;
+
+ if (v <= 0) {
+ if (n < qc->hardlimit &&
+ (qc->warning_issued & (1 << HARDWARN))) {
+ qc->warning_issued &= ~(1 << HARDWARN);
+ prepare_msg(qtype, counter, msgs, HARDBELOW);
+ }
+
+ if (n < qc->softlimit &&
+ (qc->warning_issued & (1 << SOFTWARN))) {
+ qc->warning_issued &= ~(1 << SOFTWARN);
+ prepare_msg(qtype, counter, msgs, SOFTBELOW);
+ }
+
+ qc->warning_issued = 0;
+ return 0;
+ }
+
+ if (qc->hardlimit &&
+ qc->hardlimit < n &&
+ !ignore_hardlimit(q)) {
+ prepare_warning(qc, qtype, counter, msgs, HARDWARN);
+ return -EDQUOT;
+ }
+
+ if (qc->softlimit &&
+ qc->softlimit < n) {
+ if (qc->timer == 0) {
+ qc->timer = ktime_get_real_seconds() + q->limits[counter].timelimit;
+ prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
+ } else if (ktime_get_real_seconds() >= qc->timer &&
+ !ignore_hardlimit(q)) {
+ prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
+ return -EDQUOT;
+ }
+ }
+
+ return 0;
+}
+
+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
+ enum quota_counters counter, s64 v,
+ enum quota_acct_mode mode)
+{
+ unsigned qtypes = enabled_qtypes(c);
+ struct bch_memquota_type *q;
+ struct bch_memquota *mq[QTYP_NR];
+ struct quota_msgs msgs;
+ unsigned i;
+ int ret = 0;
+
+ memset(&msgs, 0, sizeof(msgs));
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_KERNEL);
+ if (!mq[i])
+ return -ENOMEM;
+ }
+
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_lock_nested(&q->lock, i);
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
+ if (ret)
+ goto err;
+ }
+
+ for_each_set_qtype(c, i, q, qtypes)
+ mq[i]->c[counter].v += v;
+err:
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_unlock(&q->lock);
+
+ flush_warnings(qid, c->vfs_sb, &msgs);
+
+ return ret;
+}
+
+static void __bch2_quota_transfer(struct bch_memquota *src_q,
+ struct bch_memquota *dst_q,
+ enum quota_counters counter, s64 v)
+{
+ BUG_ON(v > src_q->c[counter].v);
+ BUG_ON(v + dst_q->c[counter].v < v);
+
+ src_q->c[counter].v -= v;
+ dst_q->c[counter].v += v;
+}
+
+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
+ struct bch_qid dst,
+ struct bch_qid src, u64 space,
+ enum quota_acct_mode mode)
+{
+ struct bch_memquota_type *q;
+ struct bch_memquota *src_q[3], *dst_q[3];
+ struct quota_msgs msgs;
+ unsigned i;
+ int ret = 0;
+
+ qtypes &= enabled_qtypes(c);
+
+ memset(&msgs, 0, sizeof(msgs));
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_KERNEL);
+ dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_KERNEL);
+ if (!src_q[i] || !dst_q[i])
+ return -ENOMEM;
+ }
+
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_lock_nested(&q->lock, i);
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
+ dst_q[i]->c[Q_SPC].v + space,
+ mode);
+ if (ret)
+ goto err;
+
+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
+ dst_q[i]->c[Q_INO].v + 1,
+ mode);
+ if (ret)
+ goto err;
+ }
+
+ for_each_set_qtype(c, i, q, qtypes) {
+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
+ }
+
+err:
+ for_each_set_qtype(c, i, q, qtypes)
+ mutex_unlock(&q->lock);
+
+ flush_warnings(dst, c->vfs_sb, &msgs);
+
+ return ret;
+}
+
+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k,
+ struct qc_dqblk *qdq)
+{
+ struct bkey_s_c_quota dq;
+ struct bch_memquota_type *q;
+ struct bch_memquota *mq;
+ unsigned i;
+
+ BUG_ON(k.k->p.inode >= QTYP_NR);
+
+ if (!((1U << k.k->p.inode) & enabled_qtypes(c)))
+ return 0;
+
+ switch (k.k->type) {
+ case KEY_TYPE_quota:
+ dq = bkey_s_c_to_quota(k);
+ q = &c->quotas[k.k->p.inode];
+
+ mutex_lock(&q->lock);
+ mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
+ if (!mq) {
+ mutex_unlock(&q->lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < Q_COUNTERS; i++) {
+ mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
+ mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
+ }
+
+ if (qdq && qdq->d_fieldmask & QC_SPC_TIMER)
+ mq->c[Q_SPC].timer = qdq->d_spc_timer;
+ if (qdq && qdq->d_fieldmask & QC_SPC_WARNS)
+ mq->c[Q_SPC].warns = qdq->d_spc_warns;
+ if (qdq && qdq->d_fieldmask & QC_INO_TIMER)
+ mq->c[Q_INO].timer = qdq->d_ino_timer;
+ if (qdq && qdq->d_fieldmask & QC_INO_WARNS)
+ mq->c[Q_INO].warns = qdq->d_ino_warns;
+
+ mutex_unlock(&q->lock);
+ }
+
+ return 0;
+}
+
+void bch2_fs_quota_exit(struct bch_fs *c)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
+ genradix_free(&c->quotas[i].table);
+}
+
+void bch2_fs_quota_init(struct bch_fs *c)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
+ mutex_init(&c->quotas[i].lock);
+}
+
+static struct bch_sb_field_quota *bch2_sb_get_or_create_quota(struct bch_sb_handle *sb)
+{
+ struct bch_sb_field_quota *sb_quota = bch2_sb_field_get(sb->sb, quota);
+
+ if (sb_quota)
+ return sb_quota;
+
+ sb_quota = bch2_sb_field_resize(sb, quota, sizeof(*sb_quota) / sizeof(u64));
+ if (sb_quota) {
+ unsigned qtype, qc;
+
+ for (qtype = 0; qtype < QTYP_NR; qtype++)
+ for (qc = 0; qc < Q_COUNTERS; qc++)
+ sb_quota->q[qtype].c[qc].timelimit =
+ cpu_to_le32(7 * 24 * 60 * 60);
+ }
+
+ return sb_quota;
+}
+
+static void bch2_sb_quota_read(struct bch_fs *c)
+{
+ struct bch_sb_field_quota *sb_quota;
+ unsigned i, j;
+
+ sb_quota = bch2_sb_field_get(c->disk_sb.sb, quota);
+ if (!sb_quota)
+ return;
+
+ for (i = 0; i < QTYP_NR; i++) {
+ struct bch_memquota_type *q = &c->quotas[i];
+
+ for (j = 0; j < Q_COUNTERS; j++) {
+ q->limits[j].timelimit =
+ le32_to_cpu(sb_quota->q[i].c[j].timelimit);
+ q->limits[j].warnlimit =
+ le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
+ }
+ }
+}
+
+static int bch2_fs_quota_read_inode(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked u;
+ struct bch_snapshot_tree s_t;
+ int ret;
+
+ ret = bch2_snapshot_tree_lookup(trans,
+ bch2_snapshot_tree(c, k.k->p.snapshot), &s_t);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "%s: snapshot tree %u not found", __func__,
+ snapshot_t(c, k.k->p.snapshot)->tree);
+ if (ret)
+ return ret;
+
+ if (!s_t.master_subvol)
+ goto advance;
+
+ ret = bch2_inode_find_by_inum_nowarn_trans(trans,
+ (subvol_inum) {
+ le32_to_cpu(s_t.master_subvol),
+ k.k->p.offset,
+ }, &u);
+ /*
+ * Inode might be deleted in this snapshot - the easiest way to handle
+ * that is to just skip it here:
+ */
+ if (bch2_err_matches(ret, ENOENT))
+ goto advance;
+
+ if (ret)
+ return ret;
+
+ bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
+ KEY_TYPE_QUOTA_NOCHECK);
+ bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
+ KEY_TYPE_QUOTA_NOCHECK);
+advance:
+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
+ return 0;
+}
+
+int bch2_fs_quota_read(struct bch_fs *c)
+{
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
+ if (!sb_quota) {
+ mutex_unlock(&c->sb_lock);
+ return -BCH_ERR_ENOSPC_sb_quota;
+ }
+
+ bch2_sb_quota_read(c);
+ mutex_unlock(&c->sb_lock);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ __bch2_quota_set(c, k, NULL)) ?:
+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ bch2_fs_quota_read_inode(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/* Enable/disable/delete quotas for an entire filesystem: */
+
+static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_sb_field_quota *sb_quota;
+ int ret = 0;
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ /* Accounting must be enabled at mount time: */
+ if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
+ return -EINVAL;
+
+ /* Can't enable enforcement without accounting: */
+ if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
+ return -EINVAL;
+
+ if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
+ return -EINVAL;
+
+ if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota)
+ return -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
+ if (!sb_quota) {
+ ret = -BCH_ERR_ENOSPC_sb_quota;
+ goto unlock;
+ }
+
+ if (uflags & FS_QUOTA_UDQ_ENFD)
+ SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true);
+
+ if (uflags & FS_QUOTA_GDQ_ENFD)
+ SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true);
+
+ if (uflags & FS_QUOTA_PDQ_ENFD)
+ SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true);
+
+ bch2_write_super(c);
+unlock:
+ mutex_unlock(&c->sb_lock);
+
+ return bch2_err_class(ret);
+}
+
+static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
+{
+ struct bch_fs *c = sb->s_fs_info;
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ mutex_lock(&c->sb_lock);
+ if (uflags & FS_QUOTA_UDQ_ENFD)
+ SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false);
+
+ if (uflags & FS_QUOTA_GDQ_ENFD)
+ SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false);
+
+ if (uflags & FS_QUOTA_PDQ_ENFD)
+ SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ int ret;
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ if (uflags & FS_USER_QUOTA) {
+ if (c->opts.usrquota)
+ return -EINVAL;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
+ POS(QTYP_USR, 0),
+ POS(QTYP_USR, U64_MAX),
+ 0, NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (uflags & FS_GROUP_QUOTA) {
+ if (c->opts.grpquota)
+ return -EINVAL;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
+ POS(QTYP_GRP, 0),
+ POS(QTYP_GRP, U64_MAX),
+ 0, NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (uflags & FS_PROJ_QUOTA) {
+ if (c->opts.prjquota)
+ return -EINVAL;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas,
+ POS(QTYP_PRJ, 0),
+ POS(QTYP_PRJ, U64_MAX),
+ 0, NULL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Return quota status information, such as enforcements, quota file inode
+ * numbers etc.
+ */
+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ unsigned qtypes = enabled_qtypes(c);
+ unsigned i;
+
+ memset(state, 0, sizeof(*state));
+
+ for (i = 0; i < QTYP_NR; i++) {
+ state->s_state[i].flags |= QCI_SYSFILE;
+
+ if (!(qtypes & (1 << i)))
+ continue;
+
+ state->s_state[i].flags |= QCI_ACCT_ENABLED;
+
+ state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
+ state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
+
+ state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
+ state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
+ }
+
+ return 0;
+}
+
+/*
+ * Adjust quota timers & warnings
+ */
+static int bch2_quota_set_info(struct super_block *sb, int type,
+ struct qc_info *info)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_sb_field_quota *sb_quota;
+ int ret = 0;
+
+ if (0) {
+ struct printbuf buf = PRINTBUF;
+
+ qc_info_to_text(&buf, info);
+ pr_info("setting:\n%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ if (type >= QTYP_NR)
+ return -EINVAL;
+
+ if (!((1 << type) & enabled_qtypes(c)))
+ return -ESRCH;
+
+ if (info->i_fieldmask &
+ ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
+ return -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb);
+ if (!sb_quota) {
+ ret = -BCH_ERR_ENOSPC_sb_quota;
+ goto unlock;
+ }
+
+ if (info->i_fieldmask & QC_SPC_TIMER)
+ sb_quota->q[type].c[Q_SPC].timelimit =
+ cpu_to_le32(info->i_spc_timelimit);
+
+ if (info->i_fieldmask & QC_SPC_WARNS)
+ sb_quota->q[type].c[Q_SPC].warnlimit =
+ cpu_to_le32(info->i_spc_warnlimit);
+
+ if (info->i_fieldmask & QC_INO_TIMER)
+ sb_quota->q[type].c[Q_INO].timelimit =
+ cpu_to_le32(info->i_ino_timelimit);
+
+ if (info->i_fieldmask & QC_INO_WARNS)
+ sb_quota->q[type].c[Q_INO].warnlimit =
+ cpu_to_le32(info->i_ino_warnlimit);
+
+ bch2_sb_quota_read(c);
+
+ bch2_write_super(c);
+unlock:
+ mutex_unlock(&c->sb_lock);
+
+ return bch2_err_class(ret);
+}
+
+/* Get/set individual quotas: */
+
+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
+{
+ dst->d_space = src->c[Q_SPC].v << 9;
+ dst->d_spc_hardlimit = src->c[Q_SPC].hardlimit << 9;
+ dst->d_spc_softlimit = src->c[Q_SPC].softlimit << 9;
+ dst->d_spc_timer = src->c[Q_SPC].timer;
+ dst->d_spc_warns = src->c[Q_SPC].warns;
+
+ dst->d_ino_count = src->c[Q_INO].v;
+ dst->d_ino_hardlimit = src->c[Q_INO].hardlimit;
+ dst->d_ino_softlimit = src->c[Q_INO].softlimit;
+ dst->d_ino_timer = src->c[Q_INO].timer;
+ dst->d_ino_warns = src->c[Q_INO].warns;
+}
+
+static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
+ struct qc_dqblk *qdq)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_memquota_type *q = &c->quotas[kqid.type];
+ qid_t qid = from_kqid(&init_user_ns, kqid);
+ struct bch_memquota *mq;
+
+ memset(qdq, 0, sizeof(*qdq));
+
+ mutex_lock(&q->lock);
+ mq = genradix_ptr(&q->table, qid);
+ if (mq)
+ __bch2_quota_get(qdq, mq);
+ mutex_unlock(&q->lock);
+
+ return 0;
+}
+
+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
+ struct qc_dqblk *qdq)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bch_memquota_type *q = &c->quotas[kqid->type];
+ qid_t qid = from_kqid(&init_user_ns, *kqid);
+ struct genradix_iter iter;
+ struct bch_memquota *mq;
+ int ret = 0;
+
+ mutex_lock(&q->lock);
+
+ genradix_for_each_from(&q->table, iter, mq, qid)
+ if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
+ __bch2_quota_get(qdq, mq);
+ *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
+ goto found;
+ }
+
+ ret = -ENOENT;
+found:
+ mutex_unlock(&q->lock);
+ return bch2_err_class(ret);
+}
+
+static int bch2_set_quota_trans(struct btree_trans *trans,
+ struct bkey_i_quota *new_quota,
+ struct qc_dqblk *qdq)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ ret = bkey_err(k);
+ if (unlikely(ret))
+ return ret;
+
+ if (k.k->type == KEY_TYPE_quota)
+ new_quota->v = *bkey_s_c_to_quota(k).v;
+
+ if (qdq->d_fieldmask & QC_SPC_SOFT)
+ new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
+ if (qdq->d_fieldmask & QC_SPC_HARD)
+ new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
+
+ if (qdq->d_fieldmask & QC_INO_SOFT)
+ new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
+ if (qdq->d_fieldmask & QC_INO_HARD)
+ new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
+
+ ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
+ struct qc_dqblk *qdq)
+{
+ struct bch_fs *c = sb->s_fs_info;
+ struct bkey_i_quota new_quota;
+ int ret;
+
+ if (0) {
+ struct printbuf buf = PRINTBUF;
+
+ qc_dqblk_to_text(&buf, qdq);
+ pr_info("setting:\n%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ if (sb->s_flags & SB_RDONLY)
+ return -EROFS;
+
+ bkey_quota_init(&new_quota.k_i);
+ new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_set_quota_trans(trans, &new_quota, qdq)) ?:
+ __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq);
+
+ return bch2_err_class(ret);
+}
+
+const struct quotactl_ops bch2_quotactl_operations = {
+ .quota_enable = bch2_quota_enable,
+ .quota_disable = bch2_quota_disable,
+ .rm_xquota = bch2_quota_remove,
+
+ .get_state = bch2_quota_get_state,
+ .set_info = bch2_quota_set_info,
+
+ .get_dqblk = bch2_get_quota,
+ .get_nextdqblk = bch2_get_next_quota,
+ .set_dqblk = bch2_set_quota,
+};
+
+#endif /* CONFIG_BCACHEFS_QUOTA */
diff --git a/c_src/libbcachefs/quota.h b/c_src/libbcachefs/quota.h
new file mode 100644
index 00000000..884f601f
--- /dev/null
+++ b/c_src/libbcachefs/quota.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_QUOTA_H
+#define _BCACHEFS_QUOTA_H
+
+#include "inode.h"
+#include "quota_types.h"
+
+enum bkey_invalid_flags;
+extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
+
+int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_quota ((struct bkey_ops) { \
+ .key_invalid = bch2_quota_invalid, \
+ .val_to_text = bch2_quota_to_text, \
+ .min_val_size = 32, \
+})
+
+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
+{
+ return (struct bch_qid) {
+ .q[QTYP_USR] = u->bi_uid,
+ .q[QTYP_GRP] = u->bi_gid,
+ .q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0,
+ };
+}
+
+static inline unsigned enabled_qtypes(struct bch_fs *c)
+{
+ return ((c->opts.usrquota << QTYP_USR)|
+ (c->opts.grpquota << QTYP_GRP)|
+ (c->opts.prjquota << QTYP_PRJ));
+}
+
+#ifdef CONFIG_BCACHEFS_QUOTA
+
+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
+ s64, enum quota_acct_mode);
+
+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
+ struct bch_qid, u64, enum quota_acct_mode);
+
+void bch2_fs_quota_exit(struct bch_fs *);
+void bch2_fs_quota_init(struct bch_fs *);
+int bch2_fs_quota_read(struct bch_fs *);
+
+extern const struct quotactl_ops bch2_quotactl_operations;
+
+#else
+
+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
+ enum quota_counters counter, s64 v,
+ enum quota_acct_mode mode)
+{
+ return 0;
+}
+
+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
+ struct bch_qid dst,
+ struct bch_qid src, u64 space,
+ enum quota_acct_mode mode)
+{
+ return 0;
+}
+
+static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
+static inline void bch2_fs_quota_init(struct bch_fs *c) {}
+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
+
+#endif
+
+#endif /* _BCACHEFS_QUOTA_H */
diff --git a/c_src/libbcachefs/quota_types.h b/c_src/libbcachefs/quota_types.h
new file mode 100644
index 00000000..6a136083
--- /dev/null
+++ b/c_src/libbcachefs/quota_types.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_QUOTA_TYPES_H
+#define _BCACHEFS_QUOTA_TYPES_H
+
+#include <linux/generic-radix-tree.h>
+
+struct bch_qid {
+ u32 q[QTYP_NR];
+};
+
+enum quota_acct_mode {
+ KEY_TYPE_QUOTA_PREALLOC,
+ KEY_TYPE_QUOTA_WARN,
+ KEY_TYPE_QUOTA_NOCHECK,
+};
+
+struct memquota_counter {
+ u64 v;
+ u64 hardlimit;
+ u64 softlimit;
+ s64 timer;
+ int warns;
+ int warning_issued;
+};
+
+struct bch_memquota {
+ struct memquota_counter c[Q_COUNTERS];
+};
+
+typedef GENRADIX(struct bch_memquota) bch_memquota_table;
+
+struct quota_limit {
+ u32 timelimit;
+ u32 warnlimit;
+};
+
+struct bch_memquota_type {
+ struct quota_limit limits[Q_COUNTERS];
+ bch_memquota_table table;
+ struct mutex lock;
+};
+
+#endif /* _BCACHEFS_QUOTA_TYPES_H */
diff --git a/c_src/libbcachefs/rebalance.c b/c_src/libbcachefs/rebalance.c
new file mode 100644
index 00000000..95f46cb3
--- /dev/null
+++ b/c_src/libbcachefs/rebalance.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "btree_write_buffer.h"
+#include "buckets.h"
+#include "clock.h"
+#include "compress.h"
+#include "disk_groups.h"
+#include "errcode.h"
+#include "error.h"
+#include "inode.h"
+#include "move.h"
+#include "rebalance.h"
+#include "subvolume.h"
+#include "super-io.h"
+#include "trace.h"
+
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/sched/cputime.h>
+
+#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1)
+
+static const char * const bch2_rebalance_state_strs[] = {
+#define x(t) #t,
+ BCH_REBALANCE_STATES()
+ NULL
+#undef x
+};
+
+static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_i_cookie *cookie;
+ u64 v;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
+ BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ v = k.k->type == KEY_TYPE_cookie
+ ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
+ : 0;
+
+ cookie = bch2_trans_kmalloc(trans, sizeof(*cookie));
+ ret = PTR_ERR_OR_ZERO(cookie);
+ if (ret)
+ goto err;
+
+ bkey_cookie_init(&cookie->k_i);
+ cookie->k.p = iter.pos;
+ cookie->v.cookie = cpu_to_le64(v + 1);
+
+ ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
+{
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
+ __bch2_set_rebalance_needs_scan(trans, inum));
+ rebalance_wakeup(c);
+ return ret;
+}
+
+int bch2_set_fs_needs_rebalance(struct bch_fs *c)
+{
+ return bch2_set_rebalance_needs_scan(c, 0);
+}
+
+static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u64 v;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
+ BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ v = k.k->type == KEY_TYPE_cookie
+ ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)
+ : 0;
+
+ if (v == cookie)
+ ret = bch2_btree_delete_at(trans, &iter, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans,
+ struct btree_iter *work_iter)
+{
+ return !kthread_should_stop()
+ ? bch2_btree_iter_peek(work_iter)
+ : bkey_s_c_null;
+}
+
+static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0);
+ int ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ extent_entry_drop(bkey_i_to_s(n),
+ (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n)));
+ return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+}
+
+static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
+ struct bpos work_pos,
+ struct btree_iter *extent_iter,
+ struct data_update_opts *data_opts)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c k;
+
+ bch2_trans_iter_exit(trans, extent_iter);
+ bch2_trans_iter_init(trans, extent_iter,
+ work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink,
+ work_pos,
+ BTREE_ITER_ALL_SNAPSHOTS);
+ k = bch2_btree_iter_peek_slot(extent_iter);
+ if (bkey_err(k))
+ return k;
+
+ const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL;
+ if (!r) {
+ /* raced due to btree write buffer, nothing to do */
+ return bkey_s_c_null;
+ }
+
+ memset(data_opts, 0, sizeof(*data_opts));
+
+ data_opts->rewrite_ptrs =
+ bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression);
+ data_opts->target = r->target;
+
+ if (!data_opts->rewrite_ptrs) {
+ /*
+ * device we would want to write to offline? devices in target
+ * changed?
+ *
+ * We'll now need a full scan before this extent is picked up
+ * again:
+ */
+ int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k);
+ if (ret)
+ return bkey_s_c_err(ret);
+ return bkey_s_c_null;
+ }
+
+ if (trace_rebalance_extent_enabled()) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "target=");
+ bch2_target_to_text(&buf, c, r->target);
+ prt_str(&buf, " compression=");
+ struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
+ prt_str(&buf, bch2_compression_opts[opt.type]);
+ prt_str(&buf, " ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ trace_rebalance_extent(c, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ return k;
+}
+
+noinline_for_stack
+static int do_rebalance_extent(struct moving_context *ctxt,
+ struct bpos work_pos,
+ struct btree_iter *extent_iter)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ struct bch_fs_rebalance *r = &trans->c->rebalance;
+ struct data_update_opts data_opts;
+ struct bch_io_opts io_opts;
+ struct bkey_s_c k;
+ struct bkey_buf sk;
+ int ret;
+
+ ctxt->stats = &r->work_stats;
+ r->state = BCH_REBALANCE_working;
+
+ bch2_bkey_buf_init(&sk);
+
+ ret = bkey_err(k = next_rebalance_extent(trans, work_pos,
+ extent_iter, &data_opts));
+ if (ret || !k.k)
+ goto out;
+
+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
+ if (ret)
+ goto out;
+
+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
+
+ /*
+ * The iterator gets unlocked by __bch2_read_extent - need to
+ * save a copy of @k elsewhere:
+ */
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
+
+ ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts);
+ if (ret) {
+ if (bch2_err_matches(ret, ENOMEM)) {
+ /* memory allocation failure, wait for some IO to finish */
+ bch2_move_ctxt_wait_for_io(ctxt);
+ ret = -BCH_ERR_transaction_restart_nested;
+ }
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto out;
+
+ /* skip it and continue, XXX signal failure */
+ ret = 0;
+ }
+out:
+ bch2_bkey_buf_exit(&sk, c);
+ return ret;
+}
+
+static bool rebalance_pred(struct bch_fs *c, void *arg,
+ struct bkey_s_c k,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ unsigned target, compression;
+
+ if (k.k->p.inode) {
+ target = io_opts->background_target;
+ compression = io_opts->background_compression ?: io_opts->compression;
+ } else {
+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+ target = r ? r->target : io_opts->background_target;
+ compression = r ? r->compression :
+ (io_opts->background_compression ?: io_opts->compression);
+ }
+
+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
+ data_opts->target = target;
+ return data_opts->rewrite_ptrs != 0;
+}
+
+static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs_rebalance *r = &trans->c->rebalance;
+ int ret;
+
+ bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
+ ctxt->stats = &r->scan_stats;
+
+ if (!inum) {
+ r->scan_start = BBPOS_MIN;
+ r->scan_end = BBPOS_MAX;
+ } else {
+ r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0));
+ r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX));
+ }
+
+ r->state = BCH_REBALANCE_scanning;
+
+ ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?:
+ commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_rebalance_needs_scan(trans, inum, cookie));
+
+ bch2_move_stats_exit(&r->scan_stats, trans->c);
+ return ret;
+}
+
+static void rebalance_wait(struct bch_fs *c)
+{
+ struct bch_fs_rebalance *r = &c->rebalance;
+ struct io_clock *clock = &c->io_clock[WRITE];
+ u64 now = atomic64_read(&clock->now);
+ u64 min_member_capacity = bch2_min_rw_member_capacity(c);
+
+ if (min_member_capacity == U64_MAX)
+ min_member_capacity = 128 * 2048;
+
+ r->wait_iotime_end = now + (min_member_capacity >> 6);
+
+ if (r->state != BCH_REBALANCE_waiting) {
+ r->wait_iotime_start = now;
+ r->wait_wallclock_start = ktime_get_real_ns();
+ r->state = BCH_REBALANCE_waiting;
+ }
+
+ bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT);
+}
+
+static int do_rebalance(struct moving_context *ctxt)
+{
+ struct btree_trans *trans = ctxt->trans;
+ struct bch_fs *c = trans->c;
+ struct bch_fs_rebalance *r = &c->rebalance;
+ struct btree_iter rebalance_work_iter, extent_iter = { NULL };
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_move_stats_init(&r->work_stats, "rebalance_work");
+ bch2_move_stats_init(&r->scan_stats, "rebalance_scan");
+
+ bch2_trans_iter_init(trans, &rebalance_work_iter,
+ BTREE_ID_rebalance_work, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS);
+
+ while (!bch2_move_ratelimit(ctxt)) {
+ if (!r->enabled) {
+ bch2_moving_ctxt_flush_all(ctxt);
+ kthread_wait_freezable(r->enabled ||
+ kthread_should_stop());
+ }
+
+ if (kthread_should_stop())
+ break;
+
+ bch2_trans_begin(trans);
+
+ ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter));
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret || !k.k)
+ break;
+
+ ret = k.k->type == KEY_TYPE_cookie
+ ? do_rebalance_scan(ctxt, k.k->p.inode,
+ le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie))
+ : do_rebalance_extent(ctxt, k.k->p, &extent_iter);
+
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
+ break;
+
+ bch2_btree_iter_advance(&rebalance_work_iter);
+ }
+
+ bch2_trans_iter_exit(trans, &extent_iter);
+ bch2_trans_iter_exit(trans, &rebalance_work_iter);
+ bch2_move_stats_exit(&r->scan_stats, c);
+
+ if (!ret &&
+ !kthread_should_stop() &&
+ !atomic64_read(&r->work_stats.sectors_seen) &&
+ !atomic64_read(&r->scan_stats.sectors_seen)) {
+ bch2_trans_unlock_long(trans);
+ rebalance_wait(c);
+ }
+
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int bch2_rebalance_thread(void *arg)
+{
+ struct bch_fs *c = arg;
+ struct bch_fs_rebalance *r = &c->rebalance;
+ struct moving_context ctxt;
+ int ret;
+
+ set_freezable();
+
+ bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
+ writepoint_ptr(&c->rebalance_write_point),
+ true);
+
+ while (!kthread_should_stop() &&
+ !(ret = do_rebalance(&ctxt)))
+ ;
+
+ bch2_moving_ctxt_exit(&ctxt);
+
+ return 0;
+}
+
+void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct bch_fs_rebalance *r = &c->rebalance;
+
+ prt_str(out, bch2_rebalance_state_strs[r->state]);
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ switch (r->state) {
+ case BCH_REBALANCE_waiting: {
+ u64 now = atomic64_read(&c->io_clock[WRITE].now);
+
+ prt_str(out, "io wait duration: ");
+ bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start);
+ prt_newline(out);
+
+ prt_str(out, "io wait remaining: ");
+ bch2_prt_human_readable_s64(out, r->wait_iotime_end - now);
+ prt_newline(out);
+
+ prt_str(out, "duration waited: ");
+ bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start);
+ prt_newline(out);
+ break;
+ }
+ case BCH_REBALANCE_working:
+ bch2_move_stats_to_text(out, &r->work_stats);
+ break;
+ case BCH_REBALANCE_scanning:
+ bch2_move_stats_to_text(out, &r->scan_stats);
+ break;
+ }
+ prt_newline(out);
+ printbuf_indent_sub(out, 2);
+}
+
+void bch2_rebalance_stop(struct bch_fs *c)
+{
+ struct task_struct *p;
+
+ c->rebalance.pd.rate.rate = UINT_MAX;
+ bch2_ratelimit_reset(&c->rebalance.pd.rate);
+
+ p = rcu_dereference_protected(c->rebalance.thread, 1);
+ c->rebalance.thread = NULL;
+
+ if (p) {
+ /* for sychronizing with rebalance_wakeup() */
+ synchronize_rcu();
+
+ kthread_stop(p);
+ put_task_struct(p);
+ }
+}
+
+int bch2_rebalance_start(struct bch_fs *c)
+{
+ struct task_struct *p;
+ int ret;
+
+ if (c->rebalance.thread)
+ return 0;
+
+ if (c->opts.nochanges)
+ return 0;
+
+ p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
+ ret = PTR_ERR_OR_ZERO(p);
+ bch_err_msg(c, ret, "creating rebalance thread");
+ if (ret)
+ return ret;
+
+ get_task_struct(p);
+ rcu_assign_pointer(c->rebalance.thread, p);
+ wake_up_process(p);
+ return 0;
+}
+
+void bch2_fs_rebalance_init(struct bch_fs *c)
+{
+ bch2_pd_controller_init(&c->rebalance.pd);
+}
diff --git a/c_src/libbcachefs/rebalance.h b/c_src/libbcachefs/rebalance.h
new file mode 100644
index 00000000..28a52638
--- /dev/null
+++ b/c_src/libbcachefs/rebalance.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REBALANCE_H
+#define _BCACHEFS_REBALANCE_H
+
+#include "rebalance_types.h"
+
+int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum);
+int bch2_set_fs_needs_rebalance(struct bch_fs *);
+
+static inline void rebalance_wakeup(struct bch_fs *c)
+{
+ struct task_struct *p;
+
+ rcu_read_lock();
+ p = rcu_dereference(c->rebalance.thread);
+ if (p)
+ wake_up_process(p);
+ rcu_read_unlock();
+}
+
+void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *);
+
+void bch2_rebalance_stop(struct bch_fs *);
+int bch2_rebalance_start(struct bch_fs *);
+void bch2_fs_rebalance_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_REBALANCE_H */
diff --git a/c_src/libbcachefs/rebalance_types.h b/c_src/libbcachefs/rebalance_types.h
new file mode 100644
index 00000000..0fffb536
--- /dev/null
+++ b/c_src/libbcachefs/rebalance_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REBALANCE_TYPES_H
+#define _BCACHEFS_REBALANCE_TYPES_H
+
+#include "bbpos_types.h"
+#include "move_types.h"
+
+#define BCH_REBALANCE_STATES() \
+ x(waiting) \
+ x(working) \
+ x(scanning)
+
+enum bch_rebalance_states {
+#define x(t) BCH_REBALANCE_##t,
+ BCH_REBALANCE_STATES()
+#undef x
+};
+
+struct bch_fs_rebalance {
+ struct task_struct __rcu *thread;
+ struct bch_pd_controller pd;
+
+ enum bch_rebalance_states state;
+ u64 wait_iotime_start;
+ u64 wait_iotime_end;
+ u64 wait_wallclock_start;
+
+ struct bch_move_stats work_stats;
+
+ struct bbpos scan_start;
+ struct bbpos scan_end;
+ struct bch_move_stats scan_stats;
+
+ unsigned enabled:1;
+};
+
+#endif /* _BCACHEFS_REBALANCE_TYPES_H */
diff --git a/c_src/libbcachefs/recovery.c b/c_src/libbcachefs/recovery.c
new file mode 100644
index 00000000..72521460
--- /dev/null
+++ b/c_src/libbcachefs/recovery.c
@@ -0,0 +1,1220 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "backpointers.h"
+#include "bkey_buf.h"
+#include "alloc_background.h"
+#include "btree_gc.h"
+#include "btree_journal_iter.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "buckets.h"
+#include "dirent.h"
+#include "ec.h"
+#include "errcode.h"
+#include "error.h"
+#include "fs-common.h"
+#include "fsck.h"
+#include "journal_io.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "lru.h"
+#include "logged_ops.h"
+#include "move.h"
+#include "quota.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "sb-clean.h"
+#include "sb-downgrade.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "super-io.h"
+
+#include <linux/sort.h>
+#include <linux/stat.h>
+
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static bool btree_id_is_alloc(enum btree_id id)
+{
+ switch (id) {
+ case BTREE_ID_alloc:
+ case BTREE_ID_backpointers:
+ case BTREE_ID_need_discard:
+ case BTREE_ID_freespace:
+ case BTREE_ID_bucket_gens:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* for -o reconstruct_alloc: */
+static void drop_alloc_keys(struct journal_keys *keys)
+{
+ size_t src, dst;
+
+ for (src = 0, dst = 0; src < keys->nr; src++)
+ if (!btree_id_is_alloc(keys->d[src].btree_id))
+ keys->d[dst++] = keys->d[src];
+
+ keys->nr = dst;
+}
+
+/*
+ * Btree node pointers have a field to stack a pointer to the in memory btree
+ * node; we need to zero out this field when reading in btree nodes, or when
+ * reading in keys from the journal:
+ */
+static void zero_out_btree_mem_ptr(struct journal_keys *keys)
+{
+ struct journal_key *i;
+
+ for (i = keys->d; i < keys->d + keys->nr; i++)
+ if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
+ bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
+}
+
+/* journal replay: */
+
+static void replay_now_at(struct journal *j, u64 seq)
+{
+ BUG_ON(seq < j->replay_journal_seq);
+
+ seq = min(seq, j->replay_journal_seq_end);
+
+ while (j->replay_journal_seq < seq)
+ bch2_journal_pin_put(j, j->replay_journal_seq++);
+}
+
+static int bch2_journal_replay_key(struct btree_trans *trans,
+ struct journal_key *k)
+{
+ struct btree_iter iter;
+ unsigned iter_flags =
+ BTREE_ITER_INTENT|
+ BTREE_ITER_NOT_EXTENTS;
+ unsigned update_flags = BTREE_TRIGGER_NORUN;
+ int ret;
+
+ if (k->overwritten)
+ return 0;
+
+ trans->journal_res.seq = k->journal_seq;
+
+ /*
+ * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to
+ * keep the key cache coherent with the underlying btree. Nothing
+ * besides the allocator is doing updates yet so we don't need key cache
+ * coherency for non-alloc btrees, and key cache fills for snapshots
+ * btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until
+ * the snapshots recovery pass runs.
+ */
+ if (!k->level && k->btree_id == BTREE_ID_alloc)
+ iter_flags |= BTREE_ITER_CACHED;
+ else
+ update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM;
+
+ bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
+ BTREE_MAX_DEPTH, k->level,
+ iter_flags);
+ ret = bch2_btree_iter_traverse(&iter);
+ if (ret)
+ goto out;
+
+ /* Must be checked with btree locked: */
+ if (k->overwritten)
+ goto out;
+
+ ret = bch2_trans_update(trans, &iter, k->k, update_flags);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int journal_sort_seq_cmp(const void *_l, const void *_r)
+{
+ const struct journal_key *l = *((const struct journal_key **)_l);
+ const struct journal_key *r = *((const struct journal_key **)_r);
+
+ return cmp_int(l->journal_seq, r->journal_seq);
+}
+
+static int bch2_journal_replay(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ DARRAY(struct journal_key *) keys_sorted = { 0 };
+ struct journal *j = &c->journal;
+ u64 start_seq = c->journal_replay_seq_start;
+ u64 end_seq = c->journal_replay_seq_start;
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret = 0;
+
+ if (keys->nr) {
+ ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
+ keys->nr, start_seq, end_seq);
+ if (ret)
+ goto err;
+ }
+
+ BUG_ON(!atomic_read(&keys->ref));
+
+ /*
+ * First, attempt to replay keys in sorted order. This is more
+ * efficient - better locality of btree access - but some might fail if
+ * that would cause a journal deadlock.
+ */
+ for (size_t i = 0; i < keys->nr; i++) {
+ cond_resched();
+
+ struct journal_key *k = keys->d + i;
+
+ /* Skip fastpath if we're low on space in the journal */
+ ret = c->journal.watermark ? -1 :
+ commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ BCH_TRANS_COMMIT_journal_reclaim|
+ (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
+ bch2_journal_replay_key(trans, k));
+ BUG_ON(!ret && !k->overwritten);
+ if (ret) {
+ ret = darray_push(&keys_sorted, k);
+ if (ret)
+ goto err;
+ }
+ }
+
+ /*
+ * Now, replay any remaining keys in the order in which they appear in
+ * the journal, unpinning those journal entries as we go:
+ */
+ sort(keys_sorted.data, keys_sorted.nr,
+ sizeof(keys_sorted.data[0]),
+ journal_sort_seq_cmp, NULL);
+
+ darray_for_each(keys_sorted, kp) {
+ cond_resched();
+
+ struct journal_key *k = *kp;
+
+ replay_now_at(j, k->journal_seq);
+
+ ret = commit_do(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc|
+ (!k->allocated
+ ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim
+ : 0),
+ bch2_journal_replay_key(trans, k));
+ bch_err_msg(c, ret, "while replaying key at btree %s level %u:",
+ bch2_btree_id_str(k->btree_id), k->level);
+ if (ret)
+ goto err;
+
+ BUG_ON(!k->overwritten);
+ }
+
+ /*
+ * We need to put our btree_trans before calling flush_all_pins(), since
+ * that will use a btree_trans internally
+ */
+ bch2_trans_put(trans);
+ trans = NULL;
+
+ if (!c->opts.keep_journal)
+ bch2_journal_keys_put_initial(c);
+
+ replay_now_at(j, j->replay_journal_seq_end);
+ j->replay_journal_seq = 0;
+
+ bch2_journal_set_replay_done(j);
+
+ if (keys->nr)
+ bch2_journal_log_msg(c, "journal replay finished");
+err:
+ if (trans)
+ bch2_trans_put(trans);
+ darray_exit(&keys_sorted);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/* journal replay early: */
+
+static int journal_replay_entry_early(struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ int ret = 0;
+
+ switch (entry->type) {
+ case BCH_JSET_ENTRY_btree_root: {
+ struct btree_root *r;
+
+ while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) {
+ ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL });
+ if (ret)
+ return ret;
+ }
+
+ r = bch2_btree_id_root(c, entry->btree_id);
+
+ if (entry->u64s) {
+ r->level = entry->level;
+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
+ r->error = 0;
+ } else {
+ r->error = -EIO;
+ }
+ r->alive = true;
+ break;
+ }
+ case BCH_JSET_ENTRY_usage: {
+ struct jset_entry_usage *u =
+ container_of(entry, struct jset_entry_usage, entry);
+
+ switch (entry->btree_id) {
+ case BCH_FS_USAGE_reserved:
+ if (entry->level < BCH_REPLICAS_MAX)
+ c->usage_base->persistent_reserved[entry->level] =
+ le64_to_cpu(u->v);
+ break;
+ case BCH_FS_USAGE_inodes:
+ c->usage_base->nr_inodes = le64_to_cpu(u->v);
+ break;
+ case BCH_FS_USAGE_key_version:
+ atomic64_set(&c->key_version,
+ le64_to_cpu(u->v));
+ break;
+ }
+
+ break;
+ }
+ case BCH_JSET_ENTRY_data_usage: {
+ struct jset_entry_data_usage *u =
+ container_of(entry, struct jset_entry_data_usage, entry);
+
+ ret = bch2_replicas_set_usage(c, &u->r,
+ le64_to_cpu(u->v));
+ break;
+ }
+ case BCH_JSET_ENTRY_dev_usage: {
+ struct jset_entry_dev_usage *u =
+ container_of(entry, struct jset_entry_dev_usage, entry);
+ struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
+ unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
+
+ for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
+ ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets);
+ ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors);
+ ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented);
+ }
+
+ break;
+ }
+ case BCH_JSET_ENTRY_blacklist: {
+ struct jset_entry_blacklist *bl_entry =
+ container_of(entry, struct jset_entry_blacklist, entry);
+
+ ret = bch2_journal_seq_blacklist_add(c,
+ le64_to_cpu(bl_entry->seq),
+ le64_to_cpu(bl_entry->seq) + 1);
+ break;
+ }
+ case BCH_JSET_ENTRY_blacklist_v2: {
+ struct jset_entry_blacklist_v2 *bl_entry =
+ container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+ ret = bch2_journal_seq_blacklist_add(c,
+ le64_to_cpu(bl_entry->start),
+ le64_to_cpu(bl_entry->end) + 1);
+ break;
+ }
+ case BCH_JSET_ENTRY_clock: {
+ struct jset_entry_clock *clock =
+ container_of(entry, struct jset_entry_clock, entry);
+
+ atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
+ }
+ }
+
+ return ret;
+}
+
+static int journal_replay_early(struct bch_fs *c,
+ struct bch_sb_field_clean *clean)
+{
+ if (clean) {
+ for (struct jset_entry *entry = clean->start;
+ entry != vstruct_end(&clean->field);
+ entry = vstruct_next(entry)) {
+ int ret = journal_replay_entry_early(c, entry);
+ if (ret)
+ return ret;
+ }
+ } else {
+ struct genradix_iter iter;
+ struct journal_replay *i, **_i;
+
+ genradix_for_each(&c->journal_entries, iter, _i) {
+ i = *_i;
+
+ if (!i || i->ignore)
+ continue;
+
+ vstruct_for_each(&i->j, entry) {
+ int ret = journal_replay_entry_early(c, entry);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ bch2_fs_usage_initialize(c);
+
+ return 0;
+}
+
+/* sb clean section: */
+
+static int read_btree_roots(struct bch_fs *c)
+{
+ unsigned i;
+ int ret = 0;
+
+ for (i = 0; i < btree_id_nr_alive(c); i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (!r->alive)
+ continue;
+
+ if (btree_id_is_alloc(i) &&
+ c->opts.reconstruct_alloc) {
+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ continue;
+ }
+
+ if (r->error) {
+ __fsck_err(c,
+ btree_id_is_alloc(i)
+ ? FSCK_CAN_IGNORE : 0,
+ btree_root_bkey_invalid,
+ "invalid btree root %s",
+ bch2_btree_id_str(i));
+ if (i == BTREE_ID_alloc)
+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ }
+
+ ret = bch2_btree_root_read(c, i, &r->key, r->level);
+ if (ret) {
+ fsck_err(c,
+ btree_root_read_error,
+ "error reading btree root %s",
+ bch2_btree_id_str(i));
+ if (btree_id_is_alloc(i))
+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ ret = 0;
+ }
+ }
+
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct btree_root *r = bch2_btree_id_root(c, i);
+
+ if (!r->b) {
+ r->alive = false;
+ r->level = 0;
+ bch2_btree_root_alloc(c, i);
+ }
+ }
+fsck_err:
+ return ret;
+}
+
+static int bch2_initialize_subvolumes(struct bch_fs *c)
+{
+ struct bkey_i_snapshot_tree root_tree;
+ struct bkey_i_snapshot root_snapshot;
+ struct bkey_i_subvolume root_volume;
+ int ret;
+
+ bkey_snapshot_tree_init(&root_tree.k_i);
+ root_tree.k.p.offset = 1;
+ root_tree.v.master_subvol = cpu_to_le32(1);
+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
+
+ bkey_snapshot_init(&root_snapshot.k_i);
+ root_snapshot.k.p.offset = U32_MAX;
+ root_snapshot.v.flags = 0;
+ root_snapshot.v.parent = 0;
+ root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
+ root_snapshot.v.tree = cpu_to_le32(1);
+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+ bkey_subvolume_init(&root_volume.k_i);
+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_volume.v.flags = 0;
+ root_volume.v.snapshot = cpu_to_le32(U32_MAX);
+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (!bkey_is_inode(k.k)) {
+ bch_err(trans->c, "root inode not found");
+ ret = -BCH_ERR_ENOENT_inode;
+ goto err;
+ }
+
+ ret = bch2_inode_unpack(k, &inode);
+ BUG_ON(ret);
+
+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+ ret = bch2_inode_write(trans, &iter, &inode);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* set bi_subvol on root inode */
+noinline_for_stack
+static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
+{
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ __bch2_fs_upgrade_for_subvolumes(trans));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+const char * const bch2_recovery_passes[] = {
+#define x(_fn, ...) #_fn,
+ BCH_RECOVERY_PASSES()
+#undef x
+ NULL
+};
+
+static int bch2_check_allocations(struct bch_fs *c)
+{
+ return bch2_gc(c, true, c->opts.norecovery);
+}
+
+static int bch2_set_may_go_rw(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+
+ /*
+ * After we go RW, the journal keys buffer can't be modified (except for
+ * setting journal_key->overwritten: it will be accessed by multiple
+ * threads
+ */
+ move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
+ keys->gap = keys->nr;
+
+ set_bit(BCH_FS_may_go_rw, &c->flags);
+
+ if (keys->nr || c->opts.fsck || !c->sb.clean)
+ return bch2_fs_read_write_early(c);
+ return 0;
+}
+
+struct recovery_pass_fn {
+ int (*fn)(struct bch_fs *);
+ unsigned when;
+};
+
+static struct recovery_pass_fn recovery_pass_fns[] = {
+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+static bool check_version_upgrade(struct bch_fs *c)
+{
+ unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
+ unsigned latest_version = bcachefs_metadata_version_current;
+ unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
+ unsigned new_version = 0;
+
+ if (old_version < bcachefs_metadata_required_upgrade_below) {
+ if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
+ latest_compatible < bcachefs_metadata_required_upgrade_below)
+ new_version = latest_version;
+ else
+ new_version = latest_compatible;
+ } else {
+ switch (c->opts.version_upgrade) {
+ case BCH_VERSION_UPGRADE_compatible:
+ new_version = latest_compatible;
+ break;
+ case BCH_VERSION_UPGRADE_incompatible:
+ new_version = latest_version;
+ break;
+ case BCH_VERSION_UPGRADE_none:
+ new_version = old_version;
+ break;
+ }
+ }
+
+ if (new_version > old_version) {
+ struct printbuf buf = PRINTBUF;
+
+ if (old_version < bcachefs_metadata_required_upgrade_below)
+ prt_str(&buf, "Version upgrade required:\n");
+
+ if (old_version != c->sb.version) {
+ prt_str(&buf, "Version upgrade from ");
+ bch2_version_to_text(&buf, c->sb.version_upgrade_complete);
+ prt_str(&buf, " to ");
+ bch2_version_to_text(&buf, c->sb.version);
+ prt_str(&buf, " incomplete\n");
+ }
+
+ prt_printf(&buf, "Doing %s version upgrade from ",
+ BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
+ ? "incompatible" : "compatible");
+ bch2_version_to_text(&buf, old_version);
+ prt_str(&buf, " to ");
+ bch2_version_to_text(&buf, new_version);
+ prt_newline(&buf);
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_upgrade(c, old_version, new_version);
+ passes = ext->recovery_passes_required[0] & ~passes;
+
+ if (passes) {
+ prt_str(&buf, " running recovery passes: ");
+ prt_bitflags(&buf, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
+
+ bch_info(c, "%s", buf.buf);
+
+ bch2_sb_upgrade(c, new_version);
+
+ printbuf_exit(&buf);
+ return true;
+ }
+
+ return false;
+}
+
+u64 bch2_fsck_recovery_passes(void)
+{
+ u64 ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+ if (recovery_pass_fns[i].when & PASS_FSCK)
+ ret |= BIT_ULL(i);
+ return ret;
+}
+
+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+
+ if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
+ return false;
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return true;
+ if ((p->when & PASS_FSCK) && c->opts.fsck)
+ return true;
+ if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
+ return true;
+ if (p->when & PASS_ALWAYS)
+ return true;
+ return false;
+}
+
+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+ int ret;
+
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
+ bch2_recovery_passes[pass]);
+ ret = p->fn(c);
+ if (ret)
+ return ret;
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_CONT " done\n");
+
+ return 0;
+}
+
+static int bch2_run_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+ if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+ unsigned pass = c->curr_recovery_pass;
+
+ ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+ (ret && c->curr_recovery_pass < pass))
+ continue;
+ if (ret)
+ break;
+
+ c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
+ }
+ c->curr_recovery_pass++;
+ c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+ }
+
+ return ret;
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
+ struct recovery_pass_fn *p = recovery_pass_fns + i;
+
+ if (!(p->when & PASS_ONLINE))
+ continue;
+
+ ret = bch2_run_recovery_pass(c, i);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
+ i = c->curr_recovery_pass;
+ continue;
+ }
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int bch2_fs_recovery(struct bch_fs *c)
+{
+ struct bch_sb_field_clean *clean = NULL;
+ struct jset *last_journal_entry = NULL;
+ u64 last_seq = 0, blacklist_seq, journal_seq;
+ int ret = 0;
+
+ if (c->sb.clean) {
+ clean = bch2_read_superblock_clean(c);
+ ret = PTR_ERR_OR_ZERO(clean);
+ if (ret)
+ goto err;
+
+ bch_info(c, "recovering from clean shutdown, journal seq %llu",
+ le64_to_cpu(clean->journal_seq));
+ } else {
+ bch_info(c, "recovering from unclean shutdown");
+ }
+
+ if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
+ bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (!c->sb.clean &&
+ !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
+ bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (c->opts.fsck && c->opts.norecovery) {
+ bch_err(c, "cannot select both norecovery and fsck");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (!(c->opts.nochanges && c->opts.norecovery)) {
+ mutex_lock(&c->sb_lock);
+ bool write_sb = false;
+
+ struct bch_sb_field_ext *ext =
+ bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
+ if (!ext) {
+ ret = -BCH_ERR_ENOSPC_sb;
+ mutex_unlock(&c->sb_lock);
+ goto err;
+ }
+
+ if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
+ ext->recovery_passes_required[0] |=
+ cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
+ write_sb = true;
+ }
+
+ u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ if (sb_passes) {
+ struct printbuf buf = PRINTBUF;
+ prt_str(&buf, "superblock requires following recovery passes to be run:\n ");
+ prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
+ bch_info(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ if (bch2_check_version_downgrade(c)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Version downgrade required:\n");
+
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_downgrade(c,
+ BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+ BCH_VERSION_MINOR(c->sb.version));
+ passes = ext->recovery_passes_required[0] & ~passes;
+ if (passes) {
+ prt_str(&buf, " running recovery passes: ");
+ prt_bitflags(&buf, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+ }
+
+ bch_info(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ write_sb = true;
+ }
+
+ if (check_version_upgrade(c))
+ write_sb = true;
+
+ if (write_sb)
+ bch2_write_super(c);
+
+ c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ mutex_unlock(&c->sb_lock);
+ }
+
+ if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+
+ if (c->opts.fsck)
+ set_bit(BCH_FS_fsck_running, &c->flags);
+
+ ret = bch2_blacklist_table_initialize(c);
+ if (ret) {
+ bch_err(c, "error initializing blacklist table");
+ goto err;
+ }
+
+ if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
+ struct genradix_iter iter;
+ struct journal_replay **i;
+
+ bch_verbose(c, "starting journal read");
+ ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq);
+ if (ret)
+ goto err;
+
+ /*
+ * note: cmd_list_journal needs the blacklist table fully up to date so
+ * it can asterisk ignored journal entries:
+ */
+ if (c->opts.read_journal_only)
+ goto out;
+
+ genradix_for_each_reverse(&c->journal_entries, iter, i)
+ if (*i && !(*i)->ignore) {
+ last_journal_entry = &(*i)->j;
+ break;
+ }
+
+ if (mustfix_fsck_err_on(c->sb.clean &&
+ last_journal_entry &&
+ !journal_entry_empty(last_journal_entry), c,
+ clean_but_journal_not_empty,
+ "filesystem marked clean but journal not empty")) {
+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ c->sb.clean = false;
+ }
+
+ if (!last_journal_entry) {
+ fsck_err_on(!c->sb.clean, c,
+ dirty_but_no_journal_entries,
+ "no journal entries found");
+ if (clean)
+ goto use_clean;
+
+ genradix_for_each_reverse(&c->journal_entries, iter, i)
+ if (*i) {
+ last_journal_entry = &(*i)->j;
+ (*i)->ignore = false;
+ /*
+ * This was probably a NO_FLUSH entry,
+ * so last_seq was garbage - but we know
+ * we're only using a single journal
+ * entry, set it here:
+ */
+ (*i)->j.last_seq = (*i)->j.seq;
+ break;
+ }
+ }
+
+ ret = bch2_journal_keys_sort(c);
+ if (ret)
+ goto err;
+
+ if (c->sb.clean && last_journal_entry) {
+ ret = bch2_verify_superblock_clean(c, &clean,
+ last_journal_entry);
+ if (ret)
+ goto err;
+ }
+ } else {
+use_clean:
+ if (!clean) {
+ bch_err(c, "no superblock clean section found");
+ ret = -BCH_ERR_fsck_repair_impossible;
+ goto err;
+
+ }
+ blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1;
+ }
+
+ c->journal_replay_seq_start = last_seq;
+ c->journal_replay_seq_end = blacklist_seq - 1;
+
+ if (c->opts.reconstruct_alloc) {
+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ drop_alloc_keys(&c->journal_keys);
+ }
+
+ zero_out_btree_mem_ptr(&c->journal_keys);
+
+ ret = journal_replay_early(c, clean);
+ if (ret)
+ goto err;
+
+ /*
+ * After an unclean shutdown, skip then next few journal sequence
+ * numbers as they may have been referenced by btree writes that
+ * happened before their corresponding journal writes - those btree
+ * writes need to be ignored, by skipping and blacklisting the next few
+ * journal sequence numbers:
+ */
+ if (!c->sb.clean)
+ journal_seq += 8;
+
+ if (blacklist_seq != journal_seq) {
+ ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu",
+ blacklist_seq, journal_seq) ?:
+ bch2_journal_seq_blacklist_add(c,
+ blacklist_seq, journal_seq);
+ if (ret) {
+ bch_err(c, "error creating new journal seq blacklist entry");
+ goto err;
+ }
+ }
+
+ ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
+ journal_seq, last_seq, blacklist_seq - 1) ?:
+ bch2_fs_journal_start(&c->journal, journal_seq);
+ if (ret)
+ goto err;
+
+ if (c->opts.reconstruct_alloc)
+ bch2_journal_log_msg(c, "dropping alloc info");
+
+ /*
+ * Skip past versions that might have possibly been used (as nonces),
+ * but hadn't had their pointers written:
+ */
+ if (c->sb.encryption_type && !c->sb.clean)
+ atomic64_add(1 << 16, &c->key_version);
+
+ ret = read_btree_roots(c);
+ if (ret)
+ goto err;
+
+ ret = bch2_run_recovery_passes(c);
+ if (ret)
+ goto err;
+
+ clear_bit(BCH_FS_fsck_running, &c->flags);
+
+ /* If we fixed errors, verify that fs is actually clean now: */
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+ test_bit(BCH_FS_errors_fixed, &c->flags) &&
+ !test_bit(BCH_FS_errors_not_fixed, &c->flags) &&
+ !test_bit(BCH_FS_error, &c->flags)) {
+ bch2_flush_fsck_errs(c);
+
+ bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
+ clear_bit(BCH_FS_errors_fixed, &c->flags);
+
+ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
+
+ ret = bch2_run_recovery_passes(c);
+ if (ret)
+ goto err;
+
+ if (test_bit(BCH_FS_errors_fixed, &c->flags) ||
+ test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
+ bch_err(c, "Second fsck run was not clean");
+ set_bit(BCH_FS_errors_not_fixed, &c->flags);
+ }
+
+ set_bit(BCH_FS_errors_fixed, &c->flags);
+ }
+
+ if (enabled_qtypes(c)) {
+ bch_verbose(c, "reading quotas");
+ ret = bch2_fs_quota_read(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "quotas done");
+ }
+
+ mutex_lock(&c->sb_lock);
+ bool write_sb = false;
+
+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
+ write_sb = true;
+ }
+
+ if (!test_bit(BCH_FS_error, &c->flags) &&
+ !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) {
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
+ write_sb = true;
+ }
+
+ if (!test_bit(BCH_FS_error, &c->flags)) {
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ if (ext &&
+ (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
+ !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
+ memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
+ memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+ write_sb = true;
+ }
+ }
+
+ if (c->opts.fsck &&
+ !test_bit(BCH_FS_error, &c->flags) &&
+ !test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
+ SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0);
+ write_sb = true;
+ }
+
+ if (write_sb)
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
+ c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) {
+ struct bch_move_stats stats;
+
+ bch2_move_stats_init(&stats, "recovery");
+
+ struct printbuf buf = PRINTBUF;
+ bch2_version_to_text(&buf, c->sb.version_min);
+ bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf);
+ printbuf_exit(&buf);
+
+ ret = bch2_fs_read_write_early(c) ?:
+ bch2_scan_old_btree_nodes(c, &stats);
+ if (ret)
+ goto err;
+ bch_info(c, "scanning for old btree nodes done");
+ }
+
+ if (c->journal_seq_blacklist_table &&
+ c->journal_seq_blacklist_table->nr > 128)
+ queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
+
+ ret = 0;
+out:
+ bch2_flush_fsck_errs(c);
+
+ if (!c->opts.keep_journal &&
+ test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ bch2_journal_keys_put_initial(c);
+ kfree(clean);
+
+ if (!ret &&
+ test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&
+ !c->opts.nochanges) {
+ bch2_fs_read_write_early(c);
+ bch2_delete_dead_snapshots_async(c);
+ }
+
+ bch_err_fn(c, ret);
+ return ret;
+err:
+fsck_err:
+ bch2_fs_emergency_read_only(c);
+ goto out;
+}
+
+int bch2_fs_initialize(struct bch_fs *c)
+{
+ struct bch_inode_unpacked root_inode, lostfound_inode;
+ struct bkey_inode_buf packed_inode;
+ struct qstr lostfound = QSTR("lost+found");
+ int ret;
+
+ bch_notice(c, "initializing new filesystem");
+
+ mutex_lock(&c->sb_lock);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
+
+ bch2_check_version_downgrade(c);
+
+ if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
+ bch2_sb_upgrade(c, bcachefs_metadata_version_current);
+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+
+ c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
+ set_bit(BCH_FS_may_go_rw, &c->flags);
+
+ for (unsigned i = 0; i < BTREE_ID_NR; i++)
+ bch2_btree_root_alloc(c, i);
+
+ for_each_member_device(c, ca)
+ bch2_dev_usage_init(ca);
+
+ ret = bch2_fs_journal_alloc(c);
+ if (ret)
+ goto err;
+
+ /*
+ * journal_res_get() will crash if called before this has
+ * set up the journal.pin FIFO and journal.cur pointer:
+ */
+ bch2_fs_journal_start(&c->journal, 1);
+ bch2_journal_set_replay_done(&c->journal);
+
+ ret = bch2_fs_read_write_early(c);
+ if (ret)
+ goto err;
+
+ /*
+ * Write out the superblock and journal buckets, now that we can do
+ * btree updates
+ */
+ bch_verbose(c, "marking superblocks");
+ ret = bch2_trans_mark_dev_sbs(c);
+ bch_err_msg(c, ret, "marking superblocks");
+ if (ret)
+ goto err;
+
+ for_each_online_member(c, ca)
+ ca->new_fs_bucket_idx = 0;
+
+ ret = bch2_fs_freespace_init(c);
+ if (ret)
+ goto err;
+
+ ret = bch2_initialize_subvolumes(c);
+ if (ret)
+ goto err;
+
+ bch_verbose(c, "reading snapshots table");
+ ret = bch2_snapshots_read(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "reading snapshots done");
+
+ bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL);
+ root_inode.bi_inum = BCACHEFS_ROOT_INO;
+ root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+ bch2_inode_pack(&packed_inode, &root_inode);
+ packed_inode.inode.k.p.snapshot = U32_MAX;
+
+ ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0);
+ bch_err_msg(c, ret, "creating root directory");
+ if (ret)
+ goto err;
+
+ bch2_inode_init_early(c, &lostfound_inode);
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_create_trans(trans,
+ BCACHEFS_ROOT_SUBVOL_INUM,
+ &root_inode, &lostfound_inode,
+ &lostfound,
+ 0, 0, S_IFDIR|0700, 0,
+ NULL, NULL, (subvol_inum) { 0 }, 0));
+ bch_err_msg(c, ret, "creating lost+found");
+ if (ret)
+ goto err;
+
+ c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
+
+ if (enabled_qtypes(c)) {
+ ret = bch2_fs_quota_read(c);
+ if (ret)
+ goto err;
+ }
+
+ ret = bch2_journal_flush(&c->journal);
+ bch_err_msg(c, ret, "writing first journal entry");
+ if (ret)
+ goto err;
+
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+err:
+ bch_err_fn(c, ret);
+ return ret;
+}
diff --git a/c_src/libbcachefs/recovery.h b/c_src/libbcachefs/recovery.h
new file mode 100644
index 00000000..4e9d2471
--- /dev/null
+++ b/c_src/libbcachefs/recovery.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_RECOVERY_H
+#define _BCACHEFS_RECOVERY_H
+
+extern const char * const bch2_recovery_passes[];
+
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
+/*
+ * For when we need to rewind recovery passes and run a pass we skipped:
+ */
+static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return 0;
+
+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
+ bch2_recovery_passes[pass], pass,
+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
+
+ c->recovery_passes_explicit |= BIT_ULL(pass);
+
+ if (c->curr_recovery_pass >= pass) {
+ c->curr_recovery_pass = pass;
+ c->recovery_passes_complete &= (1ULL << pass) >> 1;
+ return -BCH_ERR_restart_recovery;
+ } else {
+ return 0;
+ }
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *);
+u64 bch2_fsck_recovery_passes(void);
+
+int bch2_fs_recovery(struct bch_fs *);
+int bch2_fs_initialize(struct bch_fs *);
+
+#endif /* _BCACHEFS_RECOVERY_H */
diff --git a/c_src/libbcachefs/recovery_types.h b/c_src/libbcachefs/recovery_types.h
new file mode 100644
index 00000000..fa0c8efd
--- /dev/null
+++ b/c_src/libbcachefs/recovery_types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_RECOVERY_TYPES_H
+#define _BCACHEFS_RECOVERY_TYPES_H
+
+#define PASS_SILENT BIT(0)
+#define PASS_FSCK BIT(1)
+#define PASS_UNCLEAN BIT(2)
+#define PASS_ALWAYS BIT(3)
+#define PASS_ONLINE BIT(4)
+
+/*
+ * Passes may be reordered, but the second field is a persistent identifier and
+ * must never change:
+ */
+#define BCH_RECOVERY_PASSES() \
+ x(alloc_read, 0, PASS_ALWAYS) \
+ x(stripes_read, 1, PASS_ALWAYS) \
+ x(initialize_subvolumes, 2, 0) \
+ x(snapshots_read, 3, PASS_ALWAYS) \
+ x(check_topology, 4, 0) \
+ x(check_allocations, 5, PASS_FSCK) \
+ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
+ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
+ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
+ x(journal_replay, 9, PASS_ALWAYS) \
+ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \
+ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \
+ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \
+ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \
+ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \
+ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
+ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
+ x(bucket_gens_init, 17, 0) \
+ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
+ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
+ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
+ x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
+ x(fs_upgrade_for_subvolumes, 22, 0) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
+ x(check_inodes, 24, PASS_FSCK) \
+ x(check_extents, 25, PASS_FSCK) \
+ x(check_indirect_extents, 26, PASS_FSCK) \
+ x(check_dirents, 27, PASS_FSCK) \
+ x(check_xattrs, 28, PASS_FSCK) \
+ x(check_root, 29, PASS_ONLINE|PASS_FSCK) \
+ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
+ x(check_nlinks, 31, PASS_FSCK) \
+ x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
+ x(fix_reflink_p, 33, 0) \
+ x(set_fs_needs_rebalance, 34, 0) \
+
+/* We normally enumerate recovery passes in the order we run them: */
+enum bch_recovery_pass {
+#define x(n, id, when) BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+/* But we also need stable identifiers that can be used in the superblock */
+enum bch_recovery_pass_stable {
+#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+#endif /* _BCACHEFS_RECOVERY_TYPES_H */
diff --git a/c_src/libbcachefs/reflink.c b/c_src/libbcachefs/reflink.c
new file mode 100644
index 00000000..b24b71bc
--- /dev/null
+++ b/c_src/libbcachefs/reflink.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "error.h"
+#include "extents.h"
+#include "inode.h"
+#include "io_misc.h"
+#include "io_write.h"
+#include "rebalance.h"
+#include "reflink.h"
+#include "subvolume.h"
+#include "super-io.h"
+
+#include <linux/sched/signal.h>
+
+static inline unsigned bkey_type_to_indirect(const struct bkey *k)
+{
+ switch (k->type) {
+ case KEY_TYPE_extent:
+ return KEY_TYPE_reflink_v;
+ case KEY_TYPE_inline_data:
+ return KEY_TYPE_indirect_inline_data;
+ default:
+ return 0;
+ }
+}
+
+/* reflink pointers */
+
+int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad),
+ c, err, reflink_p_front_pad_bad,
+ "idx < front_pad (%llu < %u)",
+ le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad));
+fsck_err:
+ return ret;
+}
+
+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+
+ prt_printf(out, "idx %llu front_pad %u back_pad %u",
+ le64_to_cpu(p.v->idx),
+ le32_to_cpu(p.v->front_pad),
+ le32_to_cpu(p.v->back_pad));
+}
+
+bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
+{
+ struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
+ struct bkey_s_c_reflink_p r = bkey_s_c_to_reflink_p(_r);
+
+ /*
+ * Disabled for now, the triggers code needs to be reworked for merging
+ * of reflink pointers to work:
+ */
+ return false;
+
+ if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
+ return false;
+
+ bch2_key_resize(l.k, l.k->size + r.k->size);
+ return true;
+}
+
+static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ u64 *idx, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_i *k;
+ __le64 *refcount;
+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ k = bch2_bkey_get_mut_noupdate(trans, &iter,
+ BTREE_ID_reflink, POS(0, *idx),
+ BTREE_ITER_WITH_UPDATES);
+ ret = PTR_ERR_OR_ZERO(k);
+ if (ret)
+ goto err;
+
+ refcount = bkey_refcount(bkey_i_to_s(k));
+ if (!refcount) {
+ bch2_bkey_val_to_text(&buf, c, p.s_c);
+ bch2_trans_inconsistent(trans,
+ "nonexistent indirect extent at %llu while marking\n %s",
+ *idx, buf.buf);
+ ret = -EIO;
+ goto err;
+ }
+
+ if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
+ bch2_bkey_val_to_text(&buf, c, p.s_c);
+ bch2_trans_inconsistent(trans,
+ "indirect extent refcount underflow at %llu while marking\n %s",
+ *idx, buf.buf);
+ ret = -EIO;
+ goto err;
+ }
+
+ if (flags & BTREE_TRIGGER_INSERT) {
+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+ u64 pad;
+
+ pad = max_t(s64, le32_to_cpu(v->front_pad),
+ le64_to_cpu(v->idx) - bkey_start_offset(&k->k));
+ BUG_ON(pad > U32_MAX);
+ v->front_pad = cpu_to_le32(pad);
+
+ pad = max_t(s64, le32_to_cpu(v->back_pad),
+ k->k.p.offset - p.k->size - le64_to_cpu(v->idx));
+ BUG_ON(pad > U32_MAX);
+ v->back_pad = cpu_to_le32(pad);
+ }
+
+ le64_add_cpu(refcount, add);
+
+ bch2_btree_iter_set_pos_to_extent_start(&iter);
+ ret = bch2_trans_update(trans, &iter, k, 0);
+ if (ret)
+ goto err;
+
+ *idx = k->k.p.offset;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ u64 *idx, unsigned flags, size_t r_idx)
+{
+ struct bch_fs *c = trans->c;
+ struct reflink_gc *r;
+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
+ u64 start = le64_to_cpu(p.v->idx);
+ u64 end = le64_to_cpu(p.v->idx) + p.k->size;
+ u64 next_idx = end + le32_to_cpu(p.v->back_pad);
+ s64 ret = 0;
+ struct printbuf buf = PRINTBUF;
+
+ if (r_idx >= c->reflink_gc_nr)
+ goto not_found;
+
+ r = genradix_ptr(&c->reflink_gc_table, r_idx);
+ next_idx = min(next_idx, r->offset - r->size);
+ if (*idx < next_idx)
+ goto not_found;
+
+ BUG_ON((s64) r->refcount + add < 0);
+
+ r->refcount += add;
+ *idx = r->offset;
+ return 0;
+not_found:
+ if (fsck_err(c, reflink_p_to_missing_reflink_v,
+ "pointer to missing indirect extent\n"
+ " %s\n"
+ " missing range %llu-%llu",
+ (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
+ *idx, next_idx)) {
+ struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c);
+ ret = PTR_ERR_OR_ZERO(update);
+ if (ret)
+ goto err;
+
+ if (next_idx <= start) {
+ bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx);
+ } else if (*idx >= end) {
+ bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end);
+ } else {
+ bkey_error_init(update);
+ update->k.p = p.k->p;
+ update->k.p.offset = next_idx;
+ update->k.size = next_idx - *idx;
+ set_bkey_val_u64s(&update->k, 0);
+ }
+
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_NORUN);
+ }
+
+ *idx = next_idx;
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int __trigger_reflink_p(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+ int ret = 0;
+
+ u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad);
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ while (idx < end && !ret)
+ ret = trans_trigger_reflink_p_segment(trans, p, &idx, flags);
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ size_t l = 0, r = c->reflink_gc_nr;
+
+ while (l < r) {
+ size_t m = l + (r - l) / 2;
+ struct reflink_gc *ref = genradix_ptr(&c->reflink_gc_table, m);
+ if (ref->offset <= idx)
+ l = m + 1;
+ else
+ r = m;
+ }
+
+ while (idx < end && !ret)
+ ret = gc_trigger_reflink_p_segment(trans, p, &idx, flags, l++);
+ }
+
+ return ret;
+}
+
+int bch2_trigger_reflink_p(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old,
+ struct bkey_s new,
+ unsigned flags)
+{
+ if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
+ (flags & BTREE_TRIGGER_INSERT)) {
+ struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v;
+
+ v->front_pad = v->back_pad = 0;
+ }
+
+ return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags);
+}
+
+/* indirect extents */
+
+int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ return bch2_bkey_ptrs_invalid(c, k, flags, err);
+}
+
+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+ prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
+
+ bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+#if 0
+Currently disabled, needs to be debugged:
+
+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
+{
+ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l);
+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r);
+
+ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r);
+}
+#endif
+
+static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *flags)
+{
+ if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) {
+ new.k->type = KEY_TYPE_deleted;
+ new.k->size = 0;
+ set_bkey_val_u64s(new.k, 0);
+ *flags &= ~BTREE_TRIGGER_INSERT;
+ }
+}
+
+int bch2_trans_mark_reflink_v(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
+ (flags & BTREE_TRIGGER_INSERT))
+ check_indirect_extent_deleting(new, &flags);
+
+ if (old.k->type == KEY_TYPE_reflink_v &&
+ new.k->type == KEY_TYPE_reflink_v &&
+ old.k->u64s == new.k->u64s &&
+ !memcmp(bkey_s_c_to_reflink_v(old).v->start,
+ bkey_s_to_reflink_v(new).v->start,
+ bkey_val_bytes(new.k) - 8))
+ return 0;
+
+ return bch2_trigger_extent(trans, btree_id, level, old, new, flags);
+}
+
+/* indirect inline data */
+
+int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ return 0;
+}
+
+void bch2_indirect_inline_data_to_text(struct printbuf *out,
+ struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k);
+ unsigned datalen = bkey_inline_data_bytes(k.k);
+
+ prt_printf(out, "refcount %llu datalen %u: %*phN",
+ le64_to_cpu(d.v->refcount), datalen,
+ min(datalen, 32U), d.v->data);
+}
+
+int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ check_indirect_extent_deleting(new, &flags);
+
+ return 0;
+}
+
+static int bch2_make_extent_indirect(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ struct bkey_i *orig)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter reflink_iter = { NULL };
+ struct bkey_s_c k;
+ struct bkey_i *r_v;
+ struct bkey_i_reflink_p *r_p;
+ __le64 *refcount;
+ int ret;
+
+ if (orig->k.type == KEY_TYPE_inline_data)
+ bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data);
+
+ bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX,
+ BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_prev(&reflink_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k));
+ ret = PTR_ERR_OR_ZERO(r_v);
+ if (ret)
+ goto err;
+
+ bkey_init(&r_v->k);
+ r_v->k.type = bkey_type_to_indirect(&orig->k);
+ r_v->k.p = reflink_iter.pos;
+ bch2_key_resize(&r_v->k, orig->k.size);
+ r_v->k.version = orig->k.version;
+
+ set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
+
+ refcount = bkey_refcount(bkey_i_to_s(r_v));
+ *refcount = 0;
+ memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
+
+ ret = bch2_trans_update(trans, &reflink_iter, r_v, 0);
+ if (ret)
+ goto err;
+
+ /*
+ * orig is in a bkey_buf which statically allocates 5 64s for the val,
+ * so we know it will be big enough:
+ */
+ orig->k.type = KEY_TYPE_reflink_p;
+ r_p = bkey_i_to_reflink_p(orig);
+ set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
+
+ /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+ __underlying_memset(&r_p->v, 0, sizeof(r_p->v));
+#else
+ memset(&r_p->v, 0, sizeof(r_p->v));
+#endif
+
+ r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
+
+ ret = bch2_trans_update(trans, extent_iter, &r_p->k_i,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+err:
+ bch2_trans_iter_exit(trans, &reflink_iter);
+
+ return ret;
+}
+
+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
+{
+ struct bkey_s_c k;
+ int ret;
+
+ for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
+ if (bkey_extent_is_unwritten(k))
+ continue;
+
+ if (bkey_extent_is_data(k.k))
+ return k;
+ }
+
+ if (bkey_ge(iter->pos, end))
+ bch2_btree_iter_set_pos(iter, end);
+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
+}
+
+s64 bch2_remap_range(struct bch_fs *c,
+ subvol_inum dst_inum, u64 dst_offset,
+ subvol_inum src_inum, u64 src_offset,
+ u64 remap_sectors,
+ u64 new_i_size, s64 *i_sectors_delta)
+{
+ struct btree_trans *trans;
+ struct btree_iter dst_iter, src_iter;
+ struct bkey_s_c src_k;
+ struct bkey_buf new_dst, new_src;
+ struct bpos dst_start = POS(dst_inum.inum, dst_offset);
+ struct bpos src_start = POS(src_inum.inum, src_offset);
+ struct bpos dst_end = dst_start, src_end = src_start;
+ struct bch_io_opts opts;
+ struct bpos src_want;
+ u64 dst_done = 0;
+ u32 dst_snapshot, src_snapshot;
+ int ret = 0, ret2 = 0;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
+ return -BCH_ERR_erofs_no_writes;
+
+ bch2_check_set_feature(c, BCH_FEATURE_reflink);
+
+ dst_end.offset += remap_sectors;
+ src_end.offset += remap_sectors;
+
+ bch2_bkey_buf_init(&new_dst);
+ bch2_bkey_buf_init(&new_src);
+ trans = bch2_trans_get(c);
+
+ ret = bch2_inum_opts_get(trans, src_inum, &opts);
+ if (ret)
+ goto err;
+
+ bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start,
+ BTREE_ITER_INTENT);
+ bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start,
+ BTREE_ITER_INTENT);
+
+ while ((ret == 0 ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) &&
+ bkey_lt(dst_iter.pos, dst_end)) {
+ struct disk_reservation disk_res = { 0 };
+
+ bch2_trans_begin(trans);
+
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+
+ ret = bch2_subvolume_get_snapshot(trans, src_inum.subvol,
+ &src_snapshot);
+ if (ret)
+ continue;
+
+ bch2_btree_iter_set_snapshot(&src_iter, src_snapshot);
+
+ ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol,
+ &dst_snapshot);
+ if (ret)
+ continue;
+
+ bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot);
+
+ dst_done = dst_iter.pos.offset - dst_start.offset;
+ src_want = POS(src_start.inode, src_start.offset + dst_done);
+ bch2_btree_iter_set_pos(&src_iter, src_want);
+
+ src_k = get_next_src(&src_iter, src_end);
+ ret = bkey_err(src_k);
+ if (ret)
+ continue;
+
+ if (bkey_lt(src_want, src_iter.pos)) {
+ ret = bch2_fpunch_at(trans, &dst_iter, dst_inum,
+ min(dst_end.offset,
+ dst_iter.pos.offset +
+ src_iter.pos.offset - src_want.offset),
+ i_sectors_delta);
+ continue;
+ }
+
+ if (src_k.k->type != KEY_TYPE_reflink_p) {
+ bch2_btree_iter_set_pos_to_extent_start(&src_iter);
+
+ bch2_bkey_buf_reassemble(&new_src, c, src_k);
+ src_k = bkey_i_to_s_c(new_src.k);
+
+ ret = bch2_make_extent_indirect(trans, &src_iter,
+ new_src.k);
+ if (ret)
+ continue;
+
+ BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
+ }
+
+ if (src_k.k->type == KEY_TYPE_reflink_p) {
+ struct bkey_s_c_reflink_p src_p =
+ bkey_s_c_to_reflink_p(src_k);
+ struct bkey_i_reflink_p *dst_p =
+ bkey_reflink_p_init(new_dst.k);
+
+ u64 offset = le64_to_cpu(src_p.v->idx) +
+ (src_want.offset -
+ bkey_start_offset(src_k.k));
+
+ dst_p->v.idx = cpu_to_le64(offset);
+ } else {
+ BUG();
+ }
+
+ new_dst.k->k.p = dst_iter.pos;
+ bch2_key_resize(&new_dst.k->k,
+ min(src_k.k->p.offset - src_want.offset,
+ dst_end.offset - dst_iter.pos.offset));
+
+ ret = bch2_bkey_set_needs_rebalance(c, new_dst.k,
+ opts.background_target,
+ opts.background_compression) ?:
+ bch2_extent_update(trans, dst_inum, &dst_iter,
+ new_dst.k, &disk_res,
+ new_i_size, i_sectors_delta,
+ true);
+ bch2_disk_reservation_put(c, &disk_res);
+ }
+ bch2_trans_iter_exit(trans, &dst_iter);
+ bch2_trans_iter_exit(trans, &src_iter);
+
+ BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end));
+ BUG_ON(bkey_gt(dst_iter.pos, dst_end));
+
+ dst_done = dst_iter.pos.offset - dst_start.offset;
+ new_i_size = min(dst_iter.pos.offset << 9, new_i_size);
+
+ do {
+ struct bch_inode_unpacked inode_u;
+ struct btree_iter inode_iter = { NULL };
+
+ bch2_trans_begin(trans);
+
+ ret2 = bch2_inode_peek(trans, &inode_iter, &inode_u,
+ dst_inum, BTREE_ITER_INTENT);
+
+ if (!ret2 &&
+ inode_u.bi_size < new_i_size) {
+ inode_u.bi_size = new_i_size;
+ ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+ }
+
+ bch2_trans_iter_exit(trans, &inode_iter);
+ } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
+err:
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&new_src, c);
+ bch2_bkey_buf_exit(&new_dst, c);
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
+
+ return dst_done ?: ret ?: ret2;
+}
diff --git a/c_src/libbcachefs/reflink.h b/c_src/libbcachefs/reflink.h
new file mode 100644
index 00000000..8ee778ec
--- /dev/null
+++ b/c_src/libbcachefs/reflink.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REFLINK_H
+#define _BCACHEFS_REFLINK_H
+
+enum bkey_invalid_flags;
+
+int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \
+ .key_invalid = bch2_reflink_p_invalid, \
+ .val_to_text = bch2_reflink_p_to_text, \
+ .key_merge = bch2_reflink_p_merge, \
+ .trigger = bch2_trigger_reflink_p, \
+ .min_val_size = 16, \
+})
+
+int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
+ struct bkey_s_c);
+int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \
+ .key_invalid = bch2_reflink_v_invalid, \
+ .val_to_text = bch2_reflink_v_to_text, \
+ .swab = bch2_ptr_swab, \
+ .trigger = bch2_trans_mark_reflink_v, \
+ .min_val_size = 8, \
+})
+
+int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_indirect_inline_data_to_text(struct printbuf *,
+ struct bch_fs *, struct bkey_s_c);
+int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
+ enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s,
+ unsigned);
+
+#define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \
+ .key_invalid = bch2_indirect_inline_data_invalid, \
+ .val_to_text = bch2_indirect_inline_data_to_text, \
+ .trigger = bch2_trans_mark_indirect_inline_data, \
+ .min_val_size = 8, \
+})
+
+static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_reflink_v:
+ return &bkey_s_c_to_reflink_v(k).v->refcount;
+ case KEY_TYPE_indirect_inline_data:
+ return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
+ default:
+ return NULL;
+ }
+}
+
+static inline __le64 *bkey_refcount(struct bkey_s k)
+{
+ switch (k.k->type) {
+ case KEY_TYPE_reflink_v:
+ return &bkey_s_to_reflink_v(k).v->refcount;
+ case KEY_TYPE_indirect_inline_data:
+ return &bkey_s_to_indirect_inline_data(k).v->refcount;
+ default:
+ return NULL;
+ }
+}
+
+s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64,
+ subvol_inum, u64, u64, u64, s64 *);
+
+#endif /* _BCACHEFS_REFLINK_H */
diff --git a/c_src/libbcachefs/replicas.c b/c_src/libbcachefs/replicas.c
new file mode 100644
index 00000000..92ba56ef
--- /dev/null
+++ b/c_src/libbcachefs/replicas.c
@@ -0,0 +1,1057 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "buckets.h"
+#include "journal.h"
+#include "replicas.h"
+#include "super-io.h"
+
+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
+ struct bch_replicas_cpu *);
+
+/* Replicas tracking - in memory: */
+
+static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ unsigned i;
+
+ BUG_ON(e->data_type >= BCH_DATA_NR);
+ BUG_ON(!e->nr_devs);
+ BUG_ON(e->nr_required > 1 &&
+ e->nr_required >= e->nr_devs);
+
+ for (i = 0; i + 1 < e->nr_devs; i++)
+ BUG_ON(e->devs[i] >= e->devs[i + 1]);
+#endif
+}
+
+void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
+{
+ bubble_sort(e->devs, e->nr_devs, u8_cmp);
+}
+
+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
+{
+ eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
+}
+
+static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
+ struct bch_replicas_entry_v0 *e)
+{
+ unsigned i;
+
+ if (e->data_type < BCH_DATA_NR)
+ prt_printf(out, "%s", bch2_data_types[e->data_type]);
+ else
+ prt_printf(out, "(invalid data type %u)", e->data_type);
+
+ prt_printf(out, ": %u [", e->nr_devs);
+ for (i = 0; i < e->nr_devs; i++)
+ prt_printf(out, i ? " %u" : "%u", e->devs[i]);
+ prt_printf(out, "]");
+}
+
+void bch2_replicas_entry_to_text(struct printbuf *out,
+ struct bch_replicas_entry_v1 *e)
+{
+ unsigned i;
+
+ if (e->data_type < BCH_DATA_NR)
+ prt_printf(out, "%s", bch2_data_types[e->data_type]);
+ else
+ prt_printf(out, "(invalid data type %u)", e->data_type);
+
+ prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs);
+ for (i = 0; i < e->nr_devs; i++)
+ prt_printf(out, i ? " %u" : "%u", e->devs[i]);
+ prt_printf(out, "]");
+}
+
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
+ struct bch_sb *sb,
+ struct printbuf *err)
+{
+ if (!r->nr_devs) {
+ prt_printf(err, "no devices in entry ");
+ goto bad;
+ }
+
+ if (r->nr_required > 1 &&
+ r->nr_required >= r->nr_devs) {
+ prt_printf(err, "bad nr_required in entry ");
+ goto bad;
+ }
+
+ for (unsigned i = 0; i < r->nr_devs; i++)
+ if (!bch2_dev_exists(sb, r->devs[i])) {
+ prt_printf(err, "invalid device %u in entry ", r->devs[i]);
+ goto bad;
+ }
+
+ return 0;
+bad:
+ bch2_replicas_entry_to_text(err, r);
+ return -BCH_ERR_invalid_replicas_entry;
+}
+
+void bch2_cpu_replicas_to_text(struct printbuf *out,
+ struct bch_replicas_cpu *r)
+{
+ struct bch_replicas_entry_v1 *e;
+ bool first = true;
+
+ for_each_cpu_replicas_entry(r, e) {
+ if (!first)
+ prt_printf(out, " ");
+ first = false;
+
+ bch2_replicas_entry_to_text(out, e);
+ }
+}
+
+static void extent_to_replicas(struct bkey_s_c k,
+ struct bch_replicas_entry_v1 *r)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ r->nr_required = 1;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.ptr.cached)
+ continue;
+
+ if (!p.has_ec)
+ r->devs[r->nr_devs++] = p.ptr.dev;
+ else
+ r->nr_required = 0;
+ }
+}
+
+static void stripe_to_replicas(struct bkey_s_c k,
+ struct bch_replicas_entry_v1 *r)
+{
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+ const struct bch_extent_ptr *ptr;
+
+ r->nr_required = s.v->nr_blocks - s.v->nr_redundant;
+
+ for (ptr = s.v->ptrs;
+ ptr < s.v->ptrs + s.v->nr_blocks;
+ ptr++)
+ r->devs[r->nr_devs++] = ptr->dev;
+}
+
+void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e,
+ struct bkey_s_c k)
+{
+ e->nr_devs = 0;
+
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ e->data_type = BCH_DATA_btree;
+ extent_to_replicas(k, e);
+ break;
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ e->data_type = BCH_DATA_user;
+ extent_to_replicas(k, e);
+ break;
+ case KEY_TYPE_stripe:
+ e->data_type = BCH_DATA_parity;
+ stripe_to_replicas(k, e);
+ break;
+ }
+
+ bch2_replicas_entry_sort(e);
+}
+
+void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
+ enum bch_data_type data_type,
+ struct bch_devs_list devs)
+{
+ BUG_ON(!data_type ||
+ data_type == BCH_DATA_sb ||
+ data_type >= BCH_DATA_NR);
+
+ e->data_type = data_type;
+ e->nr_devs = 0;
+ e->nr_required = 1;
+
+ darray_for_each(devs, i)
+ e->devs[e->nr_devs++] = *i;
+
+ bch2_replicas_entry_sort(e);
+}
+
+static struct bch_replicas_cpu
+cpu_replicas_add_entry(struct bch_fs *c,
+ struct bch_replicas_cpu *old,
+ struct bch_replicas_entry_v1 *new_entry)
+{
+ unsigned i;
+ struct bch_replicas_cpu new = {
+ .nr = old->nr + 1,
+ .entry_size = max_t(unsigned, old->entry_size,
+ replicas_entry_bytes(new_entry)),
+ };
+
+ for (i = 0; i < new_entry->nr_devs; i++)
+ BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
+
+ BUG_ON(!new_entry->data_type);
+ verify_replicas_entry(new_entry);
+
+ new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL);
+ if (!new.entries)
+ return new;
+
+ for (i = 0; i < old->nr; i++)
+ memcpy(cpu_replicas_entry(&new, i),
+ cpu_replicas_entry(old, i),
+ old->entry_size);
+
+ memcpy(cpu_replicas_entry(&new, old->nr),
+ new_entry,
+ replicas_entry_bytes(new_entry));
+
+ bch2_cpu_replicas_sort(&new);
+ return new;
+}
+
+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
+ struct bch_replicas_entry_v1 *search)
+{
+ int idx, entry_size = replicas_entry_bytes(search);
+
+ if (unlikely(entry_size > r->entry_size))
+ return -1;
+
+ verify_replicas_entry(search);
+
+#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
+ idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
+ entry_cmp, search);
+#undef entry_cmp
+
+ return idx < r->nr ? idx : -1;
+}
+
+int bch2_replicas_entry_idx(struct bch_fs *c,
+ struct bch_replicas_entry_v1 *search)
+{
+ bch2_replicas_entry_sort(search);
+
+ return __replicas_entry_idx(&c->replicas, search);
+}
+
+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
+ struct bch_replicas_entry_v1 *search)
+{
+ return __replicas_entry_idx(r, search) >= 0;
+}
+
+bool bch2_replicas_marked(struct bch_fs *c,
+ struct bch_replicas_entry_v1 *search)
+{
+ bool marked;
+
+ if (!search->nr_devs)
+ return true;
+
+ verify_replicas_entry(search);
+
+ percpu_down_read(&c->mark_lock);
+ marked = __replicas_has_entry(&c->replicas, search) &&
+ (likely((!c->replicas_gc.entries)) ||
+ __replicas_has_entry(&c->replicas_gc, search));
+ percpu_up_read(&c->mark_lock);
+
+ return marked;
+}
+
+static void __replicas_table_update(struct bch_fs_usage *dst,
+ struct bch_replicas_cpu *dst_r,
+ struct bch_fs_usage *src,
+ struct bch_replicas_cpu *src_r)
+{
+ int src_idx, dst_idx;
+
+ *dst = *src;
+
+ for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
+ if (!src->replicas[src_idx])
+ continue;
+
+ dst_idx = __replicas_entry_idx(dst_r,
+ cpu_replicas_entry(src_r, src_idx));
+ BUG_ON(dst_idx < 0);
+
+ dst->replicas[dst_idx] = src->replicas[src_idx];
+ }
+}
+
+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
+ struct bch_replicas_cpu *dst_r,
+ struct bch_fs_usage __percpu *src_p,
+ struct bch_replicas_cpu *src_r)
+{
+ unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
+ struct bch_fs_usage *dst, *src = (void *)
+ bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr);
+
+ preempt_disable();
+ dst = this_cpu_ptr(dst_p);
+ preempt_enable();
+
+ __replicas_table_update(dst, dst_r, src, src_r);
+}
+
+/*
+ * Resize filesystem accounting:
+ */
+static int replicas_table_update(struct bch_fs *c,
+ struct bch_replicas_cpu *new_r)
+{
+ struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR];
+ struct bch_fs_usage_online *new_scratch = NULL;
+ struct bch_fs_usage __percpu *new_gc = NULL;
+ struct bch_fs_usage *new_base = NULL;
+ unsigned i, bytes = sizeof(struct bch_fs_usage) +
+ sizeof(u64) * new_r->nr;
+ unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) +
+ sizeof(u64) * new_r->nr;
+ int ret = 0;
+
+ memset(new_usage, 0, sizeof(new_usage));
+
+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
+ if (!(new_usage[i] = __alloc_percpu_gfp(bytes,
+ sizeof(u64), GFP_KERNEL)))
+ goto err;
+
+ if (!(new_base = kzalloc(bytes, GFP_KERNEL)) ||
+ !(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) ||
+ (c->usage_gc &&
+ !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL))))
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
+ if (c->usage[i])
+ __replicas_table_update_pcpu(new_usage[i], new_r,
+ c->usage[i], &c->replicas);
+ if (c->usage_base)
+ __replicas_table_update(new_base, new_r,
+ c->usage_base, &c->replicas);
+ if (c->usage_gc)
+ __replicas_table_update_pcpu(new_gc, new_r,
+ c->usage_gc, &c->replicas);
+
+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
+ swap(c->usage[i], new_usage[i]);
+ swap(c->usage_base, new_base);
+ swap(c->usage_scratch, new_scratch);
+ swap(c->usage_gc, new_gc);
+ swap(c->replicas, *new_r);
+out:
+ free_percpu(new_gc);
+ kfree(new_scratch);
+ for (i = 0; i < ARRAY_SIZE(new_usage); i++)
+ free_percpu(new_usage[i]);
+ kfree(new_base);
+ return ret;
+err:
+ bch_err(c, "error updating replicas table: memory allocation failure");
+ ret = -BCH_ERR_ENOMEM_replicas_table;
+ goto out;
+}
+
+static unsigned reserve_journal_replicas(struct bch_fs *c,
+ struct bch_replicas_cpu *r)
+{
+ struct bch_replicas_entry_v1 *e;
+ unsigned journal_res_u64s = 0;
+
+ /* nr_inodes: */
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+ /* key_version: */
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+ /* persistent_reserved: */
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
+ BCH_REPLICAS_MAX;
+
+ for_each_cpu_replicas_entry(r, e)
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
+ e->nr_devs, sizeof(u64));
+ return journal_res_u64s;
+}
+
+noinline
+static int bch2_mark_replicas_slowpath(struct bch_fs *c,
+ struct bch_replicas_entry_v1 *new_entry)
+{
+ struct bch_replicas_cpu new_r, new_gc;
+ int ret = 0;
+
+ verify_replicas_entry(new_entry);
+
+ memset(&new_r, 0, sizeof(new_r));
+ memset(&new_gc, 0, sizeof(new_gc));
+
+ mutex_lock(&c->sb_lock);
+
+ if (c->replicas_gc.entries &&
+ !__replicas_has_entry(&c->replicas_gc, new_entry)) {
+ new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
+ if (!new_gc.entries) {
+ ret = -BCH_ERR_ENOMEM_cpu_replicas;
+ goto err;
+ }
+ }
+
+ if (!__replicas_has_entry(&c->replicas, new_entry)) {
+ new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
+ if (!new_r.entries) {
+ ret = -BCH_ERR_ENOMEM_cpu_replicas;
+ goto err;
+ }
+
+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
+ if (ret)
+ goto err;
+
+ bch2_journal_entry_res_resize(&c->journal,
+ &c->replicas_journal_res,
+ reserve_journal_replicas(c, &new_r));
+ }
+
+ if (!new_r.entries &&
+ !new_gc.entries)
+ goto out;
+
+ /* allocations done, now commit: */
+
+ if (new_r.entries)
+ bch2_write_super(c);
+
+ /* don't update in memory replicas until changes are persistent */
+ percpu_down_write(&c->mark_lock);
+ if (new_r.entries)
+ ret = replicas_table_update(c, &new_r);
+ if (new_gc.entries)
+ swap(new_gc, c->replicas_gc);
+ percpu_up_write(&c->mark_lock);
+out:
+ mutex_unlock(&c->sb_lock);
+
+ kfree(new_r.entries);
+ kfree(new_gc.entries);
+
+ return ret;
+err:
+ bch_err_msg(c, ret, "adding replicas entry");
+ goto out;
+}
+
+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
+{
+ return likely(bch2_replicas_marked(c, r))
+ ? 0 : bch2_mark_replicas_slowpath(c, r);
+}
+
+/* replicas delta list: */
+
+int bch2_replicas_delta_list_mark(struct bch_fs *c,
+ struct replicas_delta_list *r)
+{
+ struct replicas_delta *d = r->d;
+ struct replicas_delta *top = (void *) r->d + r->used;
+ int ret = 0;
+
+ for (d = r->d; !ret && d != top; d = replicas_delta_next(d))
+ ret = bch2_mark_replicas(c, &d->r);
+ return ret;
+}
+
+/*
+ * Old replicas_gc mechanism: only used for journal replicas entries now, should
+ * die at some point:
+ */
+
+int bch2_replicas_gc_end(struct bch_fs *c, int ret)
+{
+ lockdep_assert_held(&c->replicas_gc_lock);
+
+ mutex_lock(&c->sb_lock);
+ percpu_down_write(&c->mark_lock);
+
+ ret = ret ?:
+ bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?:
+ replicas_table_update(c, &c->replicas_gc);
+
+ kfree(c->replicas_gc.entries);
+ c->replicas_gc.entries = NULL;
+
+ percpu_up_write(&c->mark_lock);
+
+ if (!ret)
+ bch2_write_super(c);
+
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
+{
+ struct bch_replicas_entry_v1 *e;
+ unsigned i = 0;
+
+ lockdep_assert_held(&c->replicas_gc_lock);
+
+ mutex_lock(&c->sb_lock);
+ BUG_ON(c->replicas_gc.entries);
+
+ c->replicas_gc.nr = 0;
+ c->replicas_gc.entry_size = 0;
+
+ for_each_cpu_replicas_entry(&c->replicas, e)
+ if (!((1 << e->data_type) & typemask)) {
+ c->replicas_gc.nr++;
+ c->replicas_gc.entry_size =
+ max_t(unsigned, c->replicas_gc.entry_size,
+ replicas_entry_bytes(e));
+ }
+
+ c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
+ c->replicas_gc.entry_size,
+ GFP_KERNEL);
+ if (!c->replicas_gc.entries) {
+ mutex_unlock(&c->sb_lock);
+ bch_err(c, "error allocating c->replicas_gc");
+ return -BCH_ERR_ENOMEM_replicas_gc;
+ }
+
+ for_each_cpu_replicas_entry(&c->replicas, e)
+ if (!((1 << e->data_type) & typemask))
+ memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
+ e, c->replicas_gc.entry_size);
+
+ bch2_cpu_replicas_sort(&c->replicas_gc);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+/*
+ * New much simpler mechanism for clearing out unneeded replicas entries - drop
+ * replicas entries that have 0 sectors used.
+ *
+ * However, we don't track sector counts for journal usage, so this doesn't drop
+ * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism
+ * is retained for that.
+ */
+int bch2_replicas_gc2(struct bch_fs *c)
+{
+ struct bch_replicas_cpu new = { 0 };
+ unsigned i, nr;
+ int ret = 0;
+
+ bch2_journal_meta(&c->journal);
+retry:
+ nr = READ_ONCE(c->replicas.nr);
+ new.entry_size = READ_ONCE(c->replicas.entry_size);
+ new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
+ if (!new.entries) {
+ bch_err(c, "error allocating c->replicas_gc");
+ return -BCH_ERR_ENOMEM_replicas_gc;
+ }
+
+ mutex_lock(&c->sb_lock);
+ percpu_down_write(&c->mark_lock);
+
+ if (nr != c->replicas.nr ||
+ new.entry_size != c->replicas.entry_size) {
+ percpu_up_write(&c->mark_lock);
+ mutex_unlock(&c->sb_lock);
+ kfree(new.entries);
+ goto retry;
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ if (e->data_type == BCH_DATA_journal ||
+ c->usage_base->replicas[i] ||
+ percpu_u64_get(&c->usage[0]->replicas[i]) ||
+ percpu_u64_get(&c->usage[1]->replicas[i]) ||
+ percpu_u64_get(&c->usage[2]->replicas[i]) ||
+ percpu_u64_get(&c->usage[3]->replicas[i]))
+ memcpy(cpu_replicas_entry(&new, new.nr++),
+ e, new.entry_size);
+ }
+
+ bch2_cpu_replicas_sort(&new);
+
+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?:
+ replicas_table_update(c, &new);
+
+ kfree(new.entries);
+
+ percpu_up_write(&c->mark_lock);
+
+ if (!ret)
+ bch2_write_super(c);
+
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+int bch2_replicas_set_usage(struct bch_fs *c,
+ struct bch_replicas_entry_v1 *r,
+ u64 sectors)
+{
+ int ret, idx = bch2_replicas_entry_idx(c, r);
+
+ if (idx < 0) {
+ struct bch_replicas_cpu n;
+
+ n = cpu_replicas_add_entry(c, &c->replicas, r);
+ if (!n.entries)
+ return -BCH_ERR_ENOMEM_cpu_replicas;
+
+ ret = replicas_table_update(c, &n);
+ if (ret)
+ return ret;
+
+ kfree(n.entries);
+
+ idx = bch2_replicas_entry_idx(c, r);
+ BUG_ON(ret < 0);
+ }
+
+ c->usage_base->replicas[idx] = sectors;
+
+ return 0;
+}
+
+/* Replicas tracking - superblock: */
+
+static int
+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
+ struct bch_replicas_cpu *cpu_r)
+{
+ struct bch_replicas_entry_v1 *e, *dst;
+ unsigned nr = 0, entry_size = 0, idx = 0;
+
+ for_each_replicas_entry(sb_r, e) {
+ entry_size = max_t(unsigned, entry_size,
+ replicas_entry_bytes(e));
+ nr++;
+ }
+
+ cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
+ if (!cpu_r->entries)
+ return -BCH_ERR_ENOMEM_cpu_replicas;
+
+ cpu_r->nr = nr;
+ cpu_r->entry_size = entry_size;
+
+ for_each_replicas_entry(sb_r, e) {
+ dst = cpu_replicas_entry(cpu_r, idx++);
+ memcpy(dst, e, replicas_entry_bytes(e));
+ bch2_replicas_entry_sort(dst);
+ }
+
+ return 0;
+}
+
+static int
+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
+ struct bch_replicas_cpu *cpu_r)
+{
+ struct bch_replicas_entry_v0 *e;
+ unsigned nr = 0, entry_size = 0, idx = 0;
+
+ for_each_replicas_entry(sb_r, e) {
+ entry_size = max_t(unsigned, entry_size,
+ replicas_entry_bytes(e));
+ nr++;
+ }
+
+ entry_size += sizeof(struct bch_replicas_entry_v1) -
+ sizeof(struct bch_replicas_entry_v0);
+
+ cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
+ if (!cpu_r->entries)
+ return -BCH_ERR_ENOMEM_cpu_replicas;
+
+ cpu_r->nr = nr;
+ cpu_r->entry_size = entry_size;
+
+ for_each_replicas_entry(sb_r, e) {
+ struct bch_replicas_entry_v1 *dst =
+ cpu_replicas_entry(cpu_r, idx++);
+
+ dst->data_type = e->data_type;
+ dst->nr_devs = e->nr_devs;
+ dst->nr_required = 1;
+ memcpy(dst->devs, e->devs, e->nr_devs);
+ bch2_replicas_entry_sort(dst);
+ }
+
+ return 0;
+}
+
+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
+{
+ struct bch_sb_field_replicas *sb_v1;
+ struct bch_sb_field_replicas_v0 *sb_v0;
+ struct bch_replicas_cpu new_r = { 0, 0, NULL };
+ int ret = 0;
+
+ if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas)))
+ ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
+ else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0)))
+ ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
+ if (ret)
+ return ret;
+
+ bch2_cpu_replicas_sort(&new_r);
+
+ percpu_down_write(&c->mark_lock);
+
+ ret = replicas_table_update(c, &new_r);
+ percpu_up_write(&c->mark_lock);
+
+ kfree(new_r.entries);
+
+ return 0;
+}
+
+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
+ struct bch_replicas_cpu *r)
+{
+ struct bch_sb_field_replicas_v0 *sb_r;
+ struct bch_replicas_entry_v0 *dst;
+ struct bch_replicas_entry_v1 *src;
+ size_t bytes;
+
+ bytes = sizeof(struct bch_sb_field_replicas);
+
+ for_each_cpu_replicas_entry(r, src)
+ bytes += replicas_entry_bytes(src) - 1;
+
+ sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0,
+ DIV_ROUND_UP(bytes, sizeof(u64)));
+ if (!sb_r)
+ return -BCH_ERR_ENOSPC_sb_replicas;
+
+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
+ sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0);
+
+ memset(&sb_r->entries, 0,
+ vstruct_end(&sb_r->field) -
+ (void *) &sb_r->entries);
+
+ dst = sb_r->entries;
+ for_each_cpu_replicas_entry(r, src) {
+ dst->data_type = src->data_type;
+ dst->nr_devs = src->nr_devs;
+ memcpy(dst->devs, src->devs, src->nr_devs);
+
+ dst = replicas_entry_next(dst);
+
+ BUG_ON((void *) dst > vstruct_end(&sb_r->field));
+ }
+
+ return 0;
+}
+
+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
+ struct bch_replicas_cpu *r)
+{
+ struct bch_sb_field_replicas *sb_r;
+ struct bch_replicas_entry_v1 *dst, *src;
+ bool need_v1 = false;
+ size_t bytes;
+
+ bytes = sizeof(struct bch_sb_field_replicas);
+
+ for_each_cpu_replicas_entry(r, src) {
+ bytes += replicas_entry_bytes(src);
+ if (src->nr_required != 1)
+ need_v1 = true;
+ }
+
+ if (!need_v1)
+ return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
+
+ sb_r = bch2_sb_field_resize(&c->disk_sb, replicas,
+ DIV_ROUND_UP(bytes, sizeof(u64)));
+ if (!sb_r)
+ return -BCH_ERR_ENOSPC_sb_replicas;
+
+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
+ sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas);
+
+ memset(&sb_r->entries, 0,
+ vstruct_end(&sb_r->field) -
+ (void *) &sb_r->entries);
+
+ dst = sb_r->entries;
+ for_each_cpu_replicas_entry(r, src) {
+ memcpy(dst, src, replicas_entry_bytes(src));
+
+ dst = replicas_entry_next(dst);
+
+ BUG_ON((void *) dst > vstruct_end(&sb_r->field));
+ }
+
+ return 0;
+}
+
+static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
+ struct bch_sb *sb,
+ struct printbuf *err)
+{
+ unsigned i;
+
+ sort_cmp_size(cpu_r->entries,
+ cpu_r->nr,
+ cpu_r->entry_size,
+ memcmp, NULL);
+
+ for (i = 0; i < cpu_r->nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(cpu_r, i);
+
+ int ret = bch2_replicas_entry_validate(e, sb, err);
+ if (ret)
+ return ret;
+
+ if (i + 1 < cpu_r->nr) {
+ struct bch_replicas_entry_v1 *n =
+ cpu_replicas_entry(cpu_r, i + 1);
+
+ BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
+
+ if (!memcmp(e, n, cpu_r->entry_size)) {
+ prt_printf(err, "duplicate replicas entry ");
+ bch2_replicas_entry_to_text(err, e);
+ return -BCH_ERR_invalid_sb_replicas;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
+ struct bch_replicas_cpu cpu_r;
+ int ret;
+
+ ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r);
+ if (ret)
+ return ret;
+
+ ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
+ kfree(cpu_r.entries);
+ return ret;
+}
+
+static void bch2_sb_replicas_to_text(struct printbuf *out,
+ struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_replicas *r = field_to_type(f, replicas);
+ struct bch_replicas_entry_v1 *e;
+ bool first = true;
+
+ for_each_replicas_entry(r, e) {
+ if (!first)
+ prt_printf(out, " ");
+ first = false;
+
+ bch2_replicas_entry_to_text(out, e);
+ }
+ prt_newline(out);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
+ .validate = bch2_sb_replicas_validate,
+ .to_text = bch2_sb_replicas_to_text,
+};
+
+static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
+ struct bch_replicas_cpu cpu_r;
+ int ret;
+
+ ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r);
+ if (ret)
+ return ret;
+
+ ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
+ kfree(cpu_r.entries);
+ return ret;
+}
+
+static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
+ struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
+ struct bch_replicas_entry_v0 *e;
+ bool first = true;
+
+ for_each_replicas_entry(sb_r, e) {
+ if (!first)
+ prt_printf(out, " ");
+ first = false;
+
+ bch2_replicas_entry_v0_to_text(out, e);
+ }
+ prt_newline(out);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
+ .validate = bch2_sb_replicas_v0_validate,
+ .to_text = bch2_sb_replicas_v0_to_text,
+};
+
+/* Query replicas: */
+
+bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
+ unsigned flags, bool print)
+{
+ struct bch_replicas_entry_v1 *e;
+ bool ret = true;
+
+ percpu_down_read(&c->mark_lock);
+ for_each_cpu_replicas_entry(&c->replicas, e) {
+ unsigned i, nr_online = 0, nr_failed = 0, dflags = 0;
+ bool metadata = e->data_type < BCH_DATA_user;
+
+ if (e->data_type == BCH_DATA_cached)
+ continue;
+
+ for (i = 0; i < e->nr_devs; i++) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]);
+
+ nr_online += test_bit(e->devs[i], devs.d);
+ nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed;
+ }
+
+ if (nr_failed == e->nr_devs)
+ continue;
+
+ if (nr_online < e->nr_required)
+ dflags |= metadata
+ ? BCH_FORCE_IF_METADATA_LOST
+ : BCH_FORCE_IF_DATA_LOST;
+
+ if (nr_online < e->nr_devs)
+ dflags |= metadata
+ ? BCH_FORCE_IF_METADATA_DEGRADED
+ : BCH_FORCE_IF_DATA_DEGRADED;
+
+ if (dflags & ~flags) {
+ if (print) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_replicas_entry_to_text(&buf, e);
+ bch_err(c, "insufficient devices online (%u) for replicas entry %s",
+ nr_online, buf.buf);
+ printbuf_exit(&buf);
+ }
+ ret = false;
+ break;
+ }
+
+ }
+ percpu_up_read(&c->mark_lock);
+
+ return ret;
+}
+
+unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
+{
+ struct bch_sb_field_replicas *replicas;
+ struct bch_sb_field_replicas_v0 *replicas_v0;
+ unsigned i, data_has = 0;
+
+ replicas = bch2_sb_field_get(sb, replicas);
+ replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
+
+ if (replicas) {
+ struct bch_replicas_entry_v1 *r;
+
+ for_each_replicas_entry(replicas, r)
+ for (i = 0; i < r->nr_devs; i++)
+ if (r->devs[i] == dev)
+ data_has |= 1 << r->data_type;
+ } else if (replicas_v0) {
+ struct bch_replicas_entry_v0 *r;
+
+ for_each_replicas_entry_v0(replicas_v0, r)
+ for (i = 0; i < r->nr_devs; i++)
+ if (r->devs[i] == dev)
+ data_has |= 1 << r->data_type;
+ }
+
+
+ return data_has;
+}
+
+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
+{
+ unsigned ret;
+
+ mutex_lock(&c->sb_lock);
+ ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx);
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+void bch2_fs_replicas_exit(struct bch_fs *c)
+{
+ unsigned i;
+
+ kfree(c->usage_scratch);
+ for (i = 0; i < ARRAY_SIZE(c->usage); i++)
+ free_percpu(c->usage[i]);
+ kfree(c->usage_base);
+ kfree(c->replicas.entries);
+ kfree(c->replicas_gc.entries);
+
+ mempool_exit(&c->replicas_delta_pool);
+}
+
+int bch2_fs_replicas_init(struct bch_fs *c)
+{
+ bch2_journal_entry_res_resize(&c->journal,
+ &c->replicas_journal_res,
+ reserve_journal_replicas(c, &c->replicas));
+
+ return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1,
+ REPLICAS_DELTA_LIST_MAX) ?:
+ replicas_table_update(c, &c->replicas);
+}
diff --git a/c_src/libbcachefs/replicas.h b/c_src/libbcachefs/replicas.h
new file mode 100644
index 00000000..654a4b26
--- /dev/null
+++ b/c_src/libbcachefs/replicas.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REPLICAS_H
+#define _BCACHEFS_REPLICAS_H
+
+#include "bkey.h"
+#include "eytzinger.h"
+#include "replicas_types.h"
+
+void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
+void bch2_replicas_entry_to_text(struct printbuf *,
+ struct bch_replicas_entry_v1 *);
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
+ struct bch_sb *, struct printbuf *);
+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
+
+static inline struct bch_replicas_entry_v1 *
+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
+{
+ return (void *) r->entries + r->entry_size * i;
+}
+
+int bch2_replicas_entry_idx(struct bch_fs *,
+ struct bch_replicas_entry_v1 *);
+
+void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *,
+ enum bch_data_type,
+ struct bch_devs_list);
+bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry_v1 *);
+int bch2_mark_replicas(struct bch_fs *,
+ struct bch_replicas_entry_v1 *);
+
+static inline struct replicas_delta *
+replicas_delta_next(struct replicas_delta *d)
+{
+ return (void *) d + replicas_entry_bytes(&d->r) + 8;
+}
+
+int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *);
+
+void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *, struct bkey_s_c);
+
+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry_v1 *e,
+ unsigned dev)
+{
+ e->data_type = BCH_DATA_cached;
+ e->nr_devs = 1;
+ e->nr_required = 1;
+ e->devs[0] = dev;
+}
+
+bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
+ unsigned, bool);
+
+unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned);
+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
+
+int bch2_replicas_gc_end(struct bch_fs *, int);
+int bch2_replicas_gc_start(struct bch_fs *, unsigned);
+int bch2_replicas_gc2(struct bch_fs *);
+
+int bch2_replicas_set_usage(struct bch_fs *,
+ struct bch_replicas_entry_v1 *,
+ u64);
+
+#define for_each_cpu_replicas_entry(_r, _i) \
+ for (_i = (_r)->entries; \
+ (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
+ _i = (void *) (_i) + (_r)->entry_size)
+
+/* iterate over superblock replicas - used by userspace tools: */
+
+#define replicas_entry_next(_i) \
+ ((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
+
+#define for_each_replicas_entry(_r, _i) \
+ for (_i = (_r)->entries; \
+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
+ (_i) = replicas_entry_next(_i))
+
+#define for_each_replicas_entry_v0(_r, _i) \
+ for (_i = (_r)->entries; \
+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
+ (_i) = replicas_entry_next(_i))
+
+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
+
+void bch2_fs_replicas_exit(struct bch_fs *);
+int bch2_fs_replicas_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_REPLICAS_H */
diff --git a/c_src/libbcachefs/replicas_types.h b/c_src/libbcachefs/replicas_types.h
new file mode 100644
index 00000000..ac90d142
--- /dev/null
+++ b/c_src/libbcachefs/replicas_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REPLICAS_TYPES_H
+#define _BCACHEFS_REPLICAS_TYPES_H
+
+struct bch_replicas_cpu {
+ unsigned nr;
+ unsigned entry_size;
+ struct bch_replicas_entry_v1 *entries;
+};
+
+struct replicas_delta {
+ s64 delta;
+ struct bch_replicas_entry_v1 r;
+} __packed;
+
+struct replicas_delta_list {
+ unsigned size;
+ unsigned used;
+
+ struct {} memset_start;
+ u64 nr_inodes;
+ u64 persistent_reserved[BCH_REPLICAS_MAX];
+ struct {} memset_end;
+ struct replicas_delta d[];
+};
+
+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
diff --git a/c_src/libbcachefs/sb-clean.c b/c_src/libbcachefs/sb-clean.c
new file mode 100644
index 00000000..9632f36f
--- /dev/null
+++ b/c_src/libbcachefs/sb-clean.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "error.h"
+#include "journal_io.h"
+#include "replicas.h"
+#include "sb-clean.h"
+#include "super-io.h"
+
+/*
+ * BCH_SB_FIELD_clean:
+ *
+ * Btree roots, and a few other things, are recovered from the journal after an
+ * unclean shutdown - but after a clean shutdown, to avoid having to read the
+ * journal, we can store them in the superblock.
+ *
+ * bch_sb_field_clean simply contains a list of journal entries, stored exactly
+ * as they would be in the journal:
+ */
+
+int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean,
+ int write)
+{
+ struct jset_entry *entry;
+ int ret;
+
+ for (entry = clean->start;
+ entry < (struct jset_entry *) vstruct_end(&clean->field);
+ entry = vstruct_next(entry)) {
+ ret = bch2_journal_entry_validate(c, NULL, entry,
+ le16_to_cpu(c->disk_sb.sb->version),
+ BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
+ write);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static struct bkey_i *btree_root_find(struct bch_fs *c,
+ struct bch_sb_field_clean *clean,
+ struct jset *j,
+ enum btree_id id, unsigned *level)
+{
+ struct bkey_i *k;
+ struct jset_entry *entry, *start, *end;
+
+ if (clean) {
+ start = clean->start;
+ end = vstruct_end(&clean->field);
+ } else {
+ start = j->start;
+ end = vstruct_last(j);
+ }
+
+ for (entry = start; entry < end; entry = vstruct_next(entry))
+ if (entry->type == BCH_JSET_ENTRY_btree_root &&
+ entry->btree_id == id)
+ goto found;
+
+ return NULL;
+found:
+ if (!entry->u64s)
+ return ERR_PTR(-EINVAL);
+
+ k = entry->start;
+ *level = entry->level;
+ return k;
+}
+
+int bch2_verify_superblock_clean(struct bch_fs *c,
+ struct bch_sb_field_clean **cleanp,
+ struct jset *j)
+{
+ unsigned i;
+ struct bch_sb_field_clean *clean = *cleanp;
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+ int ret = 0;
+
+ if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
+ sb_clean_journal_seq_mismatch,
+ "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
+ le64_to_cpu(clean->journal_seq),
+ le64_to_cpu(j->seq))) {
+ kfree(clean);
+ *cleanp = NULL;
+ return 0;
+ }
+
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct bkey_i *k1, *k2;
+ unsigned l1 = 0, l2 = 0;
+
+ k1 = btree_root_find(c, clean, NULL, i, &l1);
+ k2 = btree_root_find(c, NULL, j, i, &l2);
+
+ if (!k1 && !k2)
+ continue;
+
+ printbuf_reset(&buf1);
+ printbuf_reset(&buf2);
+
+ if (k1)
+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1));
+ else
+ prt_printf(&buf1, "(none)");
+
+ if (k2)
+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2));
+ else
+ prt_printf(&buf2, "(none)");
+
+ mustfix_fsck_err_on(!k1 || !k2 ||
+ IS_ERR(k1) ||
+ IS_ERR(k2) ||
+ k1->k.u64s != k2->k.u64s ||
+ memcmp(k1, k2, bkey_bytes(&k1->k)) ||
+ l1 != l2, c,
+ sb_clean_btree_root_mismatch,
+ "superblock btree root %u doesn't match journal after clean shutdown\n"
+ "sb: l=%u %s\n"
+ "journal: l=%u %s\n", i,
+ l1, buf1.buf,
+ l2, buf2.buf);
+ }
+fsck_err:
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ return ret;
+}
+
+struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
+{
+ struct bch_sb_field_clean *clean, *sb_clean;
+ int ret;
+
+ mutex_lock(&c->sb_lock);
+ sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean);
+
+ if (fsck_err_on(!sb_clean, c,
+ sb_clean_missing,
+ "superblock marked clean but clean section not present")) {
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ c->sb.clean = false;
+ mutex_unlock(&c->sb_lock);
+ return NULL;
+ }
+
+ clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
+ GFP_KERNEL);
+ if (!clean) {
+ mutex_unlock(&c->sb_lock);
+ return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean);
+ }
+
+ ret = bch2_sb_clean_validate_late(c, clean, READ);
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ return ERR_PTR(ret);
+ }
+
+ mutex_unlock(&c->sb_lock);
+
+ return clean;
+fsck_err:
+ mutex_unlock(&c->sb_lock);
+ return ERR_PTR(ret);
+}
+
+static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
+{
+ struct jset_entry *entry = *end;
+ unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
+
+ memset(entry, 0, u64s * sizeof(u64));
+ /*
+ * The u64s field counts from the start of data, ignoring the shared
+ * fields.
+ */
+ entry->u64s = cpu_to_le16(u64s - 1);
+
+ *end = vstruct_next(*end);
+ return entry;
+}
+
+void bch2_journal_super_entries_add_common(struct bch_fs *c,
+ struct jset_entry **end,
+ u64 journal_seq)
+{
+ percpu_down_read(&c->mark_lock);
+
+ if (!journal_seq) {
+ for (unsigned i = 0; i < ARRAY_SIZE(c->usage); i++)
+ bch2_fs_usage_acc_to_base(c, i);
+ } else {
+ bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK);
+ }
+
+ {
+ struct jset_entry_usage *u =
+ container_of(jset_entry_init(end, sizeof(*u)),
+ struct jset_entry_usage, entry);
+
+ u->entry.type = BCH_JSET_ENTRY_usage;
+ u->entry.btree_id = BCH_FS_USAGE_inodes;
+ u->v = cpu_to_le64(c->usage_base->nr_inodes);
+ }
+
+ {
+ struct jset_entry_usage *u =
+ container_of(jset_entry_init(end, sizeof(*u)),
+ struct jset_entry_usage, entry);
+
+ u->entry.type = BCH_JSET_ENTRY_usage;
+ u->entry.btree_id = BCH_FS_USAGE_key_version;
+ u->v = cpu_to_le64(atomic64_read(&c->key_version));
+ }
+
+ for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
+ struct jset_entry_usage *u =
+ container_of(jset_entry_init(end, sizeof(*u)),
+ struct jset_entry_usage, entry);
+
+ u->entry.type = BCH_JSET_ENTRY_usage;
+ u->entry.btree_id = BCH_FS_USAGE_reserved;
+ u->entry.level = i;
+ u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]);
+ }
+
+ for (unsigned i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry_v1 *e =
+ cpu_replicas_entry(&c->replicas, i);
+ struct jset_entry_data_usage *u =
+ container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs),
+ struct jset_entry_data_usage, entry);
+
+ u->entry.type = BCH_JSET_ENTRY_data_usage;
+ u->v = cpu_to_le64(c->usage_base->replicas[i]);
+ unsafe_memcpy(&u->r, e, replicas_entry_bytes(e),
+ "embedded variable length struct");
+ }
+
+ for_each_member_device(c, ca) {
+ unsigned b = sizeof(struct jset_entry_dev_usage) +
+ sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR;
+ struct jset_entry_dev_usage *u =
+ container_of(jset_entry_init(end, b),
+ struct jset_entry_dev_usage, entry);
+
+ u->entry.type = BCH_JSET_ENTRY_dev_usage;
+ u->dev = cpu_to_le32(ca->dev_idx);
+
+ for (unsigned i = 0; i < BCH_DATA_NR; i++) {
+ u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);
+ u->d[i].sectors = cpu_to_le64(ca->usage_base->d[i].sectors);
+ u->d[i].fragmented = cpu_to_le64(ca->usage_base->d[i].fragmented);
+ }
+ }
+
+ percpu_up_read(&c->mark_lock);
+
+ for (unsigned i = 0; i < 2; i++) {
+ struct jset_entry_clock *clock =
+ container_of(jset_entry_init(end, sizeof(*clock)),
+ struct jset_entry_clock, entry);
+
+ clock->entry.type = BCH_JSET_ENTRY_clock;
+ clock->rw = i;
+ clock->time = cpu_to_le64(atomic64_read(&c->io_clock[i].now));
+ }
+}
+
+static int bch2_sb_clean_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_clean *clean = field_to_type(f, clean);
+
+ if (vstruct_bytes(&clean->field) < sizeof(*clean)) {
+ prt_printf(err, "wrong size (got %zu should be %zu)",
+ vstruct_bytes(&clean->field), sizeof(*clean));
+ return -BCH_ERR_invalid_sb_clean;
+ }
+
+ return 0;
+}
+
+static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_clean *clean = field_to_type(f, clean);
+ struct jset_entry *entry;
+
+ prt_printf(out, "flags: %x", le32_to_cpu(clean->flags));
+ prt_newline(out);
+ prt_printf(out, "journal_seq: %llu", le64_to_cpu(clean->journal_seq));
+ prt_newline(out);
+
+ for (entry = clean->start;
+ entry != vstruct_end(&clean->field);
+ entry = vstruct_next(entry)) {
+ if (entry->type == BCH_JSET_ENTRY_btree_keys &&
+ !entry->u64s)
+ continue;
+
+ bch2_journal_entry_to_text(out, NULL, entry);
+ prt_newline(out);
+ }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_clean = {
+ .validate = bch2_sb_clean_validate,
+ .to_text = bch2_sb_clean_to_text,
+};
+
+int bch2_fs_mark_dirty(struct bch_fs *c)
+{
+ int ret;
+
+ /*
+ * Unconditionally write superblock, to verify it hasn't changed before
+ * we go rw:
+ */
+
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
+
+ ret = bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+void bch2_fs_mark_clean(struct bch_fs *c)
+{
+ struct bch_sb_field_clean *sb_clean;
+ struct jset_entry *entry;
+ unsigned u64s;
+ int ret;
+
+ mutex_lock(&c->sb_lock);
+ if (BCH_SB_CLEAN(c->disk_sb.sb))
+ goto out;
+
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
+
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata);
+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates));
+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled));
+
+ u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
+
+ sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s);
+ if (!sb_clean) {
+ bch_err(c, "error resizing superblock while setting filesystem clean");
+ goto out;
+ }
+
+ sb_clean->flags = 0;
+ sb_clean->journal_seq = cpu_to_le64(atomic64_read(&c->journal.seq));
+
+ /* Trying to catch outstanding bug: */
+ BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
+
+ entry = sb_clean->start;
+ bch2_journal_super_entries_add_common(c, &entry, 0);
+ entry = bch2_btree_roots_to_journal_entries(c, entry, 0);
+ BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
+
+ memset(entry, 0,
+ vstruct_end(&sb_clean->field) - (void *) entry);
+
+ /*
+ * this should be in the write path, and we should be validating every
+ * superblock section:
+ */
+ ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE);
+ if (ret) {
+ bch_err(c, "error writing marking filesystem clean: validate error");
+ goto out;
+ }
+
+ bch2_write_super(c);
+out:
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/c_src/libbcachefs/sb-clean.h b/c_src/libbcachefs/sb-clean.h
new file mode 100644
index 00000000..71caef28
--- /dev/null
+++ b/c_src/libbcachefs/sb-clean.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_CLEAN_H
+#define _BCACHEFS_SB_CLEAN_H
+
+int bch2_sb_clean_validate_late(struct bch_fs *, struct bch_sb_field_clean *, int);
+int bch2_verify_superblock_clean(struct bch_fs *, struct bch_sb_field_clean **,
+ struct jset *);
+struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *);
+void bch2_journal_super_entries_add_common(struct bch_fs *, struct jset_entry **, u64);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_clean;
+
+int bch2_fs_mark_dirty(struct bch_fs *);
+void bch2_fs_mark_clean(struct bch_fs *);
+
+#endif /* _BCACHEFS_SB_CLEAN_H */
diff --git a/c_src/libbcachefs/sb-downgrade.c b/c_src/libbcachefs/sb-downgrade.c
new file mode 100644
index 00000000..441dcb1b
--- /dev/null
+++ b/c_src/libbcachefs/sb-downgrade.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Superblock section that contains a list of recovery passes to run when
+ * downgrading past a given version
+ */
+
+#include "bcachefs.h"
+#include "darray.h"
+#include "recovery.h"
+#include "sb-downgrade.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+#define RECOVERY_PASS_ALL_FSCK BIT_ULL(63)
+
+/*
+ * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
+ *
+ * x(version, recovery_passes, errors...)
+ */
+#define UPGRADE_TABLE() \
+ x(backpointers, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(inode_v3, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(unwritten_extents, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(bucket_gens, \
+ BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(lru_v2, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(fragmentation_lru, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(no_bps_in_alloc_keys, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot_trees, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot_skiplists, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \
+ BCH_FSCK_ERR_snapshot_bad_depth, \
+ BCH_FSCK_ERR_snapshot_bad_skiplist) \
+ x(deleted_inodes, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
+ BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
+ x(rebalance_work, \
+ BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
+
+#define DOWNGRADE_TABLE()
+
+struct upgrade_downgrade_entry {
+ u64 recovery_passes;
+ u16 version;
+ u16 nr_errors;
+ const u16 *errors;
+};
+
+#define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
+UPGRADE_TABLE()
+#undef x
+
+static const struct upgrade_downgrade_entry upgrade_table[] = {
+#define x(ver, passes, ...) { \
+ .recovery_passes = passes, \
+ .version = bcachefs_metadata_version_##ver,\
+ .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \
+ .errors = upgrade_##ver##_errors, \
+},
+UPGRADE_TABLE()
+#undef x
+};
+
+void bch2_sb_set_upgrade(struct bch_fs *c,
+ unsigned old_version,
+ unsigned new_version)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ for (const struct upgrade_downgrade_entry *i = upgrade_table;
+ i < upgrade_table + ARRAY_SIZE(upgrade_table);
+ i++)
+ if (i->version > old_version && i->version <= new_version) {
+ u64 passes = i->recovery_passes;
+
+ if (passes & RECOVERY_PASS_ALL_FSCK)
+ passes |= bch2_fsck_recovery_passes();
+ passes &= ~RECOVERY_PASS_ALL_FSCK;
+
+ ext->recovery_passes_required[0] |=
+ cpu_to_le64(bch2_recovery_passes_to_stable(passes));
+
+ for (const u16 *e = i->errors;
+ e < i->errors + i->nr_errors;
+ e++) {
+ __set_bit(*e, c->sb.errors_silent);
+ ext->errors_silent[*e / 64] |= cpu_to_le64(BIT_ULL(*e % 64));
+ }
+ }
+}
+
+#define x(ver, passes, ...) static const u16 downgrade_ver_##errors[] = { __VA_ARGS__ };
+DOWNGRADE_TABLE()
+#undef x
+
+static const struct upgrade_downgrade_entry downgrade_table[] = {
+#define x(ver, passes, ...) { \
+ .recovery_passes = passes, \
+ .version = bcachefs_metadata_version_##ver,\
+ .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \
+ .errors = downgrade_##ver##_errors, \
+},
+DOWNGRADE_TABLE()
+#undef x
+};
+
+static inline const struct bch_sb_field_downgrade_entry *
+downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
+{
+ return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
+}
+
+#define for_each_downgrade_entry(_d, _i) \
+ for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \
+ (void *) _i < vstruct_end(&(_d)->field) && \
+ (void *) &_i->errors[0] < vstruct_end(&(_d)->field); \
+ _i = downgrade_entry_next_c(_i))
+
+static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+ for_each_downgrade_entry(e, i) {
+ if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
+ BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
+ prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
+ BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
+ BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
+ return -BCH_ERR_invalid_sb_downgrade;
+ }
+ }
+
+ return 0;
+}
+
+static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+ if (out->nr_tabstops <= 1)
+ printbuf_tabstop_push(out, 16);
+
+ for_each_downgrade_entry(e, i) {
+ prt_str(out, "version:");
+ prt_tab(out);
+ bch2_version_to_text(out, le16_to_cpu(i->version));
+ prt_newline(out);
+
+ prt_str(out, "recovery passes:");
+ prt_tab(out);
+ prt_bitflags(out, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
+ prt_newline(out);
+
+ prt_str(out, "errors:");
+ prt_tab(out);
+ bool first = true;
+ for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+ if (!first)
+ prt_char(out, ',');
+ first = false;
+ unsigned e = le16_to_cpu(i->errors[j]);
+ prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
+ }
+ prt_newline(out);
+ }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
+ .validate = bch2_sb_downgrade_validate,
+ .to_text = bch2_sb_downgrade_to_text,
+};
+
+int bch2_sb_downgrade_update(struct bch_fs *c)
+{
+ darray_char table = {};
+ int ret = 0;
+
+ for (const struct upgrade_downgrade_entry *src = downgrade_table;
+ src < downgrade_table + ARRAY_SIZE(downgrade_table);
+ src++) {
+ if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+ continue;
+
+ struct bch_sb_field_downgrade_entry *dst;
+ unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
+
+ ret = darray_make_room(&table, bytes);
+ if (ret)
+ goto out;
+
+ dst = (void *) &darray_top(table);
+ dst->version = cpu_to_le16(src->version);
+ dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+ dst->recovery_passes[1] = 0;
+ dst->nr_errors = cpu_to_le16(src->nr_errors);
+ for (unsigned i = 0; i < src->nr_errors; i++)
+ dst->errors[i] = cpu_to_le16(src->errors[i]);
+
+ table.nr += bytes;
+ }
+
+ struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+
+ unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
+
+ if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
+ goto out;
+
+ d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
+ if (!d) {
+ ret = -BCH_ERR_ENOSPC_sb_downgrade;
+ goto out;
+ }
+
+ memcpy(d->entries, table.data, table.nr);
+ memset_u64s_tail(d->entries, 0, table.nr);
+out:
+ darray_exit(&table);
+ return ret;
+}
+
+void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+{
+ struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+ if (!d)
+ return;
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ for_each_downgrade_entry(d, i) {
+ unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
+ if (new_minor < minor && minor <= old_minor) {
+ ext->recovery_passes_required[0] |= i->recovery_passes[0];
+ ext->recovery_passes_required[1] |= i->recovery_passes[1];
+
+ for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+ unsigned e = le16_to_cpu(i->errors[j]);
+ if (e < BCH_SB_ERR_MAX)
+ __set_bit(e, c->sb.errors_silent);
+ if (e < sizeof(ext->errors_silent) * 8)
+ ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
+ }
+ }
+ }
+}
diff --git a/c_src/libbcachefs/sb-downgrade.h b/c_src/libbcachefs/sb-downgrade.h
new file mode 100644
index 00000000..57e6c916
--- /dev/null
+++ b/c_src/libbcachefs/sb-downgrade.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_H
+#define _BCACHEFS_SB_DOWNGRADE_H
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
+
+int bch2_sb_downgrade_update(struct bch_fs *);
+void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned);
+void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_H */
diff --git a/c_src/libbcachefs/sb-errors.c b/c_src/libbcachefs/sb-errors.c
new file mode 100644
index 00000000..5f5bcae3
--- /dev/null
+++ b/c_src/libbcachefs/sb-errors.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+const char * const bch2_sb_error_strs[] = {
+#define x(t, n, ...) [n] = #t,
+ BCH_SB_ERRS()
+ NULL
+};
+
+static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id)
+{
+ if (id < BCH_SB_ERR_MAX)
+ prt_str(out, bch2_sb_error_strs[id]);
+ else
+ prt_printf(out, "(unknown error %u)", id);
+}
+
+static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
+{
+ return bch2_sb_field_nr_entries(e);
+}
+
+static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
+{
+ return (sizeof(struct bch_sb_field_errors) +
+ sizeof(struct bch_sb_field_error_entry) * nr) / sizeof(u64);
+}
+
+static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_errors *e = field_to_type(f, errors);
+ unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
+
+ for (i = 0; i < nr; i++) {
+ if (!BCH_SB_ERROR_ENTRY_NR(&e->entries[i])) {
+ prt_printf(err, "entry with count 0 (id ");
+ bch2_sb_error_id_to_text(err, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
+ prt_printf(err, ")");
+ return -BCH_ERR_invalid_sb_errors;
+ }
+
+ if (i + 1 < nr &&
+ BCH_SB_ERROR_ENTRY_ID(&e->entries[i]) >=
+ BCH_SB_ERROR_ENTRY_ID(&e->entries[i + 1])) {
+ prt_printf(err, "entries out of order");
+ return -BCH_ERR_invalid_sb_errors;
+ }
+ }
+
+ return 0;
+}
+
+static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_errors *e = field_to_type(f, errors);
+ unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
+
+ if (out->nr_tabstops <= 1)
+ printbuf_tabstop_push(out, 16);
+
+ for (i = 0; i < nr; i++) {
+ bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i]));
+ prt_tab(out);
+ prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
+ prt_tab(out);
+ bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time));
+ prt_newline(out);
+ }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_errors = {
+ .validate = bch2_sb_errors_validate,
+ .to_text = bch2_sb_errors_to_text,
+};
+
+void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err)
+{
+ bch_sb_errors_cpu *e = &c->fsck_error_counts;
+ struct bch_sb_error_entry_cpu n = {
+ .id = err,
+ .nr = 1,
+ .last_error_time = ktime_get_real_seconds()
+ };
+ unsigned i;
+
+ mutex_lock(&c->fsck_error_counts_lock);
+ for (i = 0; i < e->nr; i++) {
+ if (err == e->data[i].id) {
+ e->data[i].nr++;
+ e->data[i].last_error_time = n.last_error_time;
+ goto out;
+ }
+ if (err < e->data[i].id)
+ break;
+ }
+
+ if (darray_make_room(e, 1))
+ goto out;
+
+ darray_insert_item(e, i, n);
+out:
+ mutex_unlock(&c->fsck_error_counts_lock);
+}
+
+void bch2_sb_errors_from_cpu(struct bch_fs *c)
+{
+ bch_sb_errors_cpu *src = &c->fsck_error_counts;
+ struct bch_sb_field_errors *dst =
+ bch2_sb_field_resize(&c->disk_sb, errors,
+ bch2_sb_field_errors_u64s(src->nr));
+ unsigned i;
+
+ if (!dst)
+ return;
+
+ for (i = 0; i < src->nr; i++) {
+ SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
+ SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
+ dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
+ }
+}
+
+static int bch2_sb_errors_to_cpu(struct bch_fs *c)
+{
+ struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors);
+ bch_sb_errors_cpu *dst = &c->fsck_error_counts;
+ unsigned i, nr = bch2_sb_field_errors_nr_entries(src);
+ int ret;
+
+ if (!nr)
+ return 0;
+
+ mutex_lock(&c->fsck_error_counts_lock);
+ ret = darray_make_room(dst, nr);
+ if (ret)
+ goto err;
+
+ dst->nr = nr;
+
+ for (i = 0; i < nr; i++) {
+ dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]);
+ dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]);
+ dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time);
+ }
+err:
+ mutex_unlock(&c->fsck_error_counts_lock);
+
+ return ret;
+}
+
+void bch2_fs_sb_errors_exit(struct bch_fs *c)
+{
+ darray_exit(&c->fsck_error_counts);
+}
+
+void bch2_fs_sb_errors_init_early(struct bch_fs *c)
+{
+ mutex_init(&c->fsck_error_counts_lock);
+ darray_init(&c->fsck_error_counts);
+}
+
+int bch2_fs_sb_errors_init(struct bch_fs *c)
+{
+ return bch2_sb_errors_to_cpu(c);
+}
diff --git a/c_src/libbcachefs/sb-errors.h b/c_src/libbcachefs/sb-errors.h
new file mode 100644
index 00000000..8889001e
--- /dev/null
+++ b/c_src/libbcachefs/sb-errors.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_ERRORS_H
+#define _BCACHEFS_SB_ERRORS_H
+
+#include "sb-errors_types.h"
+
+extern const char * const bch2_sb_error_strs[];
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
+
+void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);
+
+void bch2_sb_errors_from_cpu(struct bch_fs *);
+
+void bch2_fs_sb_errors_exit(struct bch_fs *);
+void bch2_fs_sb_errors_init_early(struct bch_fs *);
+int bch2_fs_sb_errors_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_SB_ERRORS_H */
diff --git a/c_src/libbcachefs/sb-errors_types.h b/c_src/libbcachefs/sb-errors_types.h
new file mode 100644
index 00000000..c08aacdf
--- /dev/null
+++ b/c_src/libbcachefs/sb-errors_types.h
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_ERRORS_TYPES_H
+#define _BCACHEFS_SB_ERRORS_TYPES_H
+
+#include "darray.h"
+
+#define BCH_SB_ERRS() \
+ x(clean_but_journal_not_empty, 0) \
+ x(dirty_but_no_journal_entries, 1) \
+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
+ x(sb_clean_journal_seq_mismatch, 3) \
+ x(sb_clean_btree_root_mismatch, 4) \
+ x(sb_clean_missing, 5) \
+ x(jset_unsupported_version, 6) \
+ x(jset_unknown_csum, 7) \
+ x(jset_last_seq_newer_than_seq, 8) \
+ x(jset_past_bucket_end, 9) \
+ x(jset_seq_blacklisted, 10) \
+ x(journal_entries_missing, 11) \
+ x(journal_entry_replicas_not_marked, 12) \
+ x(journal_entry_past_jset_end, 13) \
+ x(journal_entry_replicas_data_mismatch, 14) \
+ x(journal_entry_bkey_u64s_0, 15) \
+ x(journal_entry_bkey_past_end, 16) \
+ x(journal_entry_bkey_bad_format, 17) \
+ x(journal_entry_bkey_invalid, 18) \
+ x(journal_entry_btree_root_bad_size, 19) \
+ x(journal_entry_blacklist_bad_size, 20) \
+ x(journal_entry_blacklist_v2_bad_size, 21) \
+ x(journal_entry_blacklist_v2_start_past_end, 22) \
+ x(journal_entry_usage_bad_size, 23) \
+ x(journal_entry_data_usage_bad_size, 24) \
+ x(journal_entry_clock_bad_size, 25) \
+ x(journal_entry_clock_bad_rw, 26) \
+ x(journal_entry_dev_usage_bad_size, 27) \
+ x(journal_entry_dev_usage_bad_dev, 28) \
+ x(journal_entry_dev_usage_bad_pad, 29) \
+ x(btree_node_unreadable, 30) \
+ x(btree_node_fault_injected, 31) \
+ x(btree_node_bad_magic, 32) \
+ x(btree_node_bad_seq, 33) \
+ x(btree_node_unsupported_version, 34) \
+ x(btree_node_bset_older_than_sb_min, 35) \
+ x(btree_node_bset_newer_than_sb, 36) \
+ x(btree_node_data_missing, 37) \
+ x(btree_node_bset_after_end, 38) \
+ x(btree_node_replicas_sectors_written_mismatch, 39) \
+ x(btree_node_replicas_data_mismatch, 40) \
+ x(bset_unknown_csum, 41) \
+ x(bset_bad_csum, 42) \
+ x(bset_past_end_of_btree_node, 43) \
+ x(bset_wrong_sector_offset, 44) \
+ x(bset_empty, 45) \
+ x(bset_bad_seq, 46) \
+ x(bset_blacklisted_journal_seq, 47) \
+ x(first_bset_blacklisted_journal_seq, 48) \
+ x(btree_node_bad_btree, 49) \
+ x(btree_node_bad_level, 50) \
+ x(btree_node_bad_min_key, 51) \
+ x(btree_node_bad_max_key, 52) \
+ x(btree_node_bad_format, 53) \
+ x(btree_node_bkey_past_bset_end, 54) \
+ x(btree_node_bkey_bad_format, 55) \
+ x(btree_node_bad_bkey, 56) \
+ x(btree_node_bkey_out_of_order, 57) \
+ x(btree_root_bkey_invalid, 58) \
+ x(btree_root_read_error, 59) \
+ x(btree_root_bad_min_key, 60) \
+ x(btree_root_bad_max_key, 61) \
+ x(btree_node_read_error, 62) \
+ x(btree_node_topology_bad_min_key, 63) \
+ x(btree_node_topology_bad_max_key, 64) \
+ x(btree_node_topology_overwritten_by_prev_node, 65) \
+ x(btree_node_topology_overwritten_by_next_node, 66) \
+ x(btree_node_topology_interior_node_empty, 67) \
+ x(fs_usage_hidden_wrong, 68) \
+ x(fs_usage_btree_wrong, 69) \
+ x(fs_usage_data_wrong, 70) \
+ x(fs_usage_cached_wrong, 71) \
+ x(fs_usage_reserved_wrong, 72) \
+ x(fs_usage_persistent_reserved_wrong, 73) \
+ x(fs_usage_nr_inodes_wrong, 74) \
+ x(fs_usage_replicas_wrong, 75) \
+ x(dev_usage_buckets_wrong, 76) \
+ x(dev_usage_sectors_wrong, 77) \
+ x(dev_usage_fragmented_wrong, 78) \
+ x(dev_usage_buckets_ec_wrong, 79) \
+ x(bkey_version_in_future, 80) \
+ x(bkey_u64s_too_small, 81) \
+ x(bkey_invalid_type_for_btree, 82) \
+ x(bkey_extent_size_zero, 83) \
+ x(bkey_extent_size_greater_than_offset, 84) \
+ x(bkey_size_nonzero, 85) \
+ x(bkey_snapshot_nonzero, 86) \
+ x(bkey_snapshot_zero, 87) \
+ x(bkey_at_pos_max, 88) \
+ x(bkey_before_start_of_btree_node, 89) \
+ x(bkey_after_end_of_btree_node, 90) \
+ x(bkey_val_size_nonzero, 91) \
+ x(bkey_val_size_too_small, 92) \
+ x(alloc_v1_val_size_bad, 93) \
+ x(alloc_v2_unpack_error, 94) \
+ x(alloc_v3_unpack_error, 95) \
+ x(alloc_v4_val_size_bad, 96) \
+ x(alloc_v4_backpointers_start_bad, 97) \
+ x(alloc_key_data_type_bad, 98) \
+ x(alloc_key_empty_but_have_data, 99) \
+ x(alloc_key_dirty_sectors_0, 100) \
+ x(alloc_key_data_type_inconsistency, 101) \
+ x(alloc_key_to_missing_dev_bucket, 102) \
+ x(alloc_key_cached_inconsistency, 103) \
+ x(alloc_key_cached_but_read_time_zero, 104) \
+ x(alloc_key_to_missing_lru_entry, 105) \
+ x(alloc_key_data_type_wrong, 106) \
+ x(alloc_key_gen_wrong, 107) \
+ x(alloc_key_dirty_sectors_wrong, 108) \
+ x(alloc_key_cached_sectors_wrong, 109) \
+ x(alloc_key_stripe_wrong, 110) \
+ x(alloc_key_stripe_redundancy_wrong, 111) \
+ x(bucket_sector_count_overflow, 112) \
+ x(bucket_metadata_type_mismatch, 113) \
+ x(need_discard_key_wrong, 114) \
+ x(freespace_key_wrong, 115) \
+ x(freespace_hole_missing, 116) \
+ x(bucket_gens_val_size_bad, 117) \
+ x(bucket_gens_key_wrong, 118) \
+ x(bucket_gens_hole_wrong, 119) \
+ x(bucket_gens_to_invalid_dev, 120) \
+ x(bucket_gens_to_invalid_buckets, 121) \
+ x(bucket_gens_nonzero_for_invalid_buckets, 122) \
+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
+ x(need_discard_freespace_key_bad, 124) \
+ x(backpointer_pos_wrong, 125) \
+ x(backpointer_to_missing_device, 126) \
+ x(backpointer_to_missing_alloc, 127) \
+ x(backpointer_to_missing_ptr, 128) \
+ x(lru_entry_at_time_0, 129) \
+ x(lru_entry_to_invalid_bucket, 130) \
+ x(lru_entry_bad, 131) \
+ x(btree_ptr_val_too_big, 132) \
+ x(btree_ptr_v2_val_too_big, 133) \
+ x(btree_ptr_has_non_ptr, 134) \
+ x(extent_ptrs_invalid_entry, 135) \
+ x(extent_ptrs_no_ptrs, 136) \
+ x(extent_ptrs_too_many_ptrs, 137) \
+ x(extent_ptrs_redundant_crc, 138) \
+ x(extent_ptrs_redundant_stripe, 139) \
+ x(extent_ptrs_unwritten, 140) \
+ x(extent_ptrs_written_and_unwritten, 141) \
+ x(ptr_to_invalid_device, 142) \
+ x(ptr_to_duplicate_device, 143) \
+ x(ptr_after_last_bucket, 144) \
+ x(ptr_before_first_bucket, 145) \
+ x(ptr_spans_multiple_buckets, 146) \
+ x(ptr_to_missing_backpointer, 147) \
+ x(ptr_to_missing_alloc_key, 148) \
+ x(ptr_to_missing_replicas_entry, 149) \
+ x(ptr_to_missing_stripe, 150) \
+ x(ptr_to_incorrect_stripe, 151) \
+ x(ptr_gen_newer_than_bucket_gen, 152) \
+ x(ptr_too_stale, 153) \
+ x(stale_dirty_ptr, 154) \
+ x(ptr_bucket_data_type_mismatch, 155) \
+ x(ptr_cached_and_erasure_coded, 156) \
+ x(ptr_crc_uncompressed_size_too_small, 157) \
+ x(ptr_crc_csum_type_unknown, 158) \
+ x(ptr_crc_compression_type_unknown, 159) \
+ x(ptr_crc_redundant, 160) \
+ x(ptr_crc_uncompressed_size_too_big, 161) \
+ x(ptr_crc_nonce_mismatch, 162) \
+ x(ptr_stripe_redundant, 163) \
+ x(reservation_key_nr_replicas_invalid, 164) \
+ x(reflink_v_refcount_wrong, 165) \
+ x(reflink_p_to_missing_reflink_v, 166) \
+ x(stripe_pos_bad, 167) \
+ x(stripe_val_size_bad, 168) \
+ x(stripe_sector_count_wrong, 169) \
+ x(snapshot_tree_pos_bad, 170) \
+ x(snapshot_tree_to_missing_snapshot, 171) \
+ x(snapshot_tree_to_missing_subvol, 172) \
+ x(snapshot_tree_to_wrong_subvol, 173) \
+ x(snapshot_tree_to_snapshot_subvol, 174) \
+ x(snapshot_pos_bad, 175) \
+ x(snapshot_parent_bad, 176) \
+ x(snapshot_children_not_normalized, 177) \
+ x(snapshot_child_duplicate, 178) \
+ x(snapshot_child_bad, 179) \
+ x(snapshot_skiplist_not_normalized, 180) \
+ x(snapshot_skiplist_bad, 181) \
+ x(snapshot_should_not_have_subvol, 182) \
+ x(snapshot_to_bad_snapshot_tree, 183) \
+ x(snapshot_bad_depth, 184) \
+ x(snapshot_bad_skiplist, 185) \
+ x(subvol_pos_bad, 186) \
+ x(subvol_not_master_and_not_snapshot, 187) \
+ x(subvol_to_missing_root, 188) \
+ x(subvol_root_wrong_bi_subvol, 189) \
+ x(bkey_in_missing_snapshot, 190) \
+ x(inode_pos_inode_nonzero, 191) \
+ x(inode_pos_blockdev_range, 192) \
+ x(inode_unpack_error, 193) \
+ x(inode_str_hash_invalid, 194) \
+ x(inode_v3_fields_start_bad, 195) \
+ x(inode_snapshot_mismatch, 196) \
+ x(inode_unlinked_but_clean, 197) \
+ x(inode_unlinked_but_nlink_nonzero, 198) \
+ x(inode_checksum_type_invalid, 199) \
+ x(inode_compression_type_invalid, 200) \
+ x(inode_subvol_root_but_not_dir, 201) \
+ x(inode_i_size_dirty_but_clean, 202) \
+ x(inode_i_sectors_dirty_but_clean, 203) \
+ x(inode_i_sectors_wrong, 204) \
+ x(inode_dir_wrong_nlink, 205) \
+ x(inode_dir_multiple_links, 206) \
+ x(inode_multiple_links_but_nlink_0, 207) \
+ x(inode_wrong_backpointer, 208) \
+ x(inode_wrong_nlink, 209) \
+ x(inode_unreachable, 210) \
+ x(deleted_inode_but_clean, 211) \
+ x(deleted_inode_missing, 212) \
+ x(deleted_inode_is_dir, 213) \
+ x(deleted_inode_not_unlinked, 214) \
+ x(extent_overlapping, 215) \
+ x(extent_in_missing_inode, 216) \
+ x(extent_in_non_reg_inode, 217) \
+ x(extent_past_end_of_inode, 218) \
+ x(dirent_empty_name, 219) \
+ x(dirent_val_too_big, 220) \
+ x(dirent_name_too_long, 221) \
+ x(dirent_name_embedded_nul, 222) \
+ x(dirent_name_dot_or_dotdot, 223) \
+ x(dirent_name_has_slash, 224) \
+ x(dirent_d_type_wrong, 225) \
+ x(dirent_d_parent_subvol_wrong, 226) \
+ x(dirent_in_missing_dir_inode, 227) \
+ x(dirent_in_non_dir_inode, 228) \
+ x(dirent_to_missing_inode, 229) \
+ x(dirent_to_missing_subvol, 230) \
+ x(dirent_to_itself, 231) \
+ x(quota_type_invalid, 232) \
+ x(xattr_val_size_too_small, 233) \
+ x(xattr_val_size_too_big, 234) \
+ x(xattr_invalid_type, 235) \
+ x(xattr_name_invalid_chars, 236) \
+ x(xattr_in_missing_inode, 237) \
+ x(root_subvol_missing, 238) \
+ x(root_dir_missing, 239) \
+ x(root_inode_not_dir, 240) \
+ x(dir_loop, 241) \
+ x(hash_table_key_duplicate, 242) \
+ x(hash_table_key_wrong_offset, 243) \
+ x(unlinked_inode_not_on_deleted_list, 244) \
+ x(reflink_p_front_pad_bad, 245)
+
+enum bch_sb_error_id {
+#define x(t, n) BCH_FSCK_ERR_##t = n,
+ BCH_SB_ERRS()
+#undef x
+ BCH_SB_ERR_MAX
+};
+
+struct bch_sb_error_entry_cpu {
+ u64 id:16,
+ nr:48;
+ u64 last_error_time;
+};
+
+typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu;
+
+#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */
+
diff --git a/c_src/libbcachefs/sb-members.c b/c_src/libbcachefs/sb-members.c
new file mode 100644
index 00000000..a44a238b
--- /dev/null
+++ b/c_src/libbcachefs/sb-members.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "disk_groups.h"
+#include "opts.h"
+#include "replicas.h"
+#include "sb-members.h"
+#include "super-io.h"
+
+#define x(t, n, ...) [n] = #t,
+static const char * const bch2_iops_measurements[] = {
+ BCH_IOPS_MEASUREMENTS()
+ NULL
+};
+
+char * const bch2_member_error_strs[] = {
+ BCH_MEMBER_ERROR_TYPES()
+ NULL
+};
+#undef x
+
+/* Code for bch_sb_field_members_v1: */
+
+struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
+{
+ return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
+}
+
+static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
+{
+ struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
+ memset(&ret, 0, sizeof(ret));
+ memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
+ return ret;
+}
+
+static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
+{
+ return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
+}
+
+static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
+{
+ struct bch_member ret, *p = members_v1_get_mut(mi, i);
+ memset(&ret, 0, sizeof(ret));
+ memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
+ return ret;
+}
+
+struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
+{
+ struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
+ if (mi2)
+ return members_v2_get(mi2, i);
+ struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
+ return members_v1_get(mi1, i);
+}
+
+static int sb_members_v2_resize_entries(struct bch_fs *c)
+{
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+
+ if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
+ unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
+ c->disk_sb.sb->nr_devices), 8);
+
+ mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
+ if (!mi)
+ return -BCH_ERR_ENOSPC_sb_members_v2;
+
+ for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
+ void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
+ memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
+ memset(dst + le16_to_cpu(mi->member_bytes),
+ 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
+ }
+ mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
+ }
+ return 0;
+}
+
+int bch2_sb_members_v2_init(struct bch_fs *c)
+{
+ struct bch_sb_field_members_v1 *mi1;
+ struct bch_sb_field_members_v2 *mi2;
+
+ if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
+ mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
+ DIV_ROUND_UP(sizeof(*mi2) +
+ sizeof(struct bch_member) * c->sb.nr_devices,
+ sizeof(u64)));
+ mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
+ memcpy(&mi2->_members[0], &mi1->_members[0],
+ BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
+ memset(&mi2->pad[0], 0, sizeof(mi2->pad));
+ mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
+ }
+
+ return sb_members_v2_resize_entries(c);
+}
+
+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
+{
+ struct bch_sb_field_members_v1 *mi1;
+ struct bch_sb_field_members_v2 *mi2;
+
+ mi1 = bch2_sb_field_resize(disk_sb, members_v1,
+ DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
+ disk_sb->sb->nr_devices, sizeof(u64)));
+ if (!mi1)
+ return -BCH_ERR_ENOSPC_sb_members;
+
+ mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
+
+ for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
+ memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
+
+ return 0;
+}
+
+static int validate_member(struct printbuf *err,
+ struct bch_member m,
+ struct bch_sb *sb,
+ int i)
+{
+ if (le64_to_cpu(m.nbuckets) > LONG_MAX) {
+ prt_printf(err, "device %u: too many buckets (got %llu, max %lu)",
+ i, le64_to_cpu(m.nbuckets), LONG_MAX);
+ return -BCH_ERR_invalid_sb_members;
+ }
+
+ if (le64_to_cpu(m.nbuckets) -
+ le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
+ prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
+ i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
+ return -BCH_ERR_invalid_sb_members;
+ }
+
+ if (le16_to_cpu(m.bucket_size) <
+ le16_to_cpu(sb->block_size)) {
+ prt_printf(err, "device %u: bucket size %u smaller than block size %u",
+ i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
+ return -BCH_ERR_invalid_sb_members;
+ }
+
+ if (le16_to_cpu(m.bucket_size) <
+ BCH_SB_BTREE_NODE_SIZE(sb)) {
+ prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
+ i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
+ return -BCH_ERR_invalid_sb_members;
+ }
+
+ return 0;
+}
+
+static void member_to_text(struct printbuf *out,
+ struct bch_member m,
+ struct bch_sb_field_disk_groups *gi,
+ struct bch_sb *sb,
+ int i)
+{
+ unsigned data_have = bch2_sb_dev_has_data(sb, i);
+ u64 bucket_size = le16_to_cpu(m.bucket_size);
+ u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
+
+ if (!bch2_member_exists(&m))
+ return;
+
+ prt_printf(out, "Device:");
+ prt_tab(out);
+ prt_printf(out, "%u", i);
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+
+ prt_printf(out, "Label:");
+ prt_tab(out);
+ if (BCH_MEMBER_GROUP(&m)) {
+ unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
+
+ if (idx < disk_groups_nr(gi))
+ prt_printf(out, "%s (%u)",
+ gi->entries[idx].label, idx);
+ else
+ prt_printf(out, "(bad disk labels section)");
+ } else {
+ prt_printf(out, "(none)");
+ }
+ prt_newline(out);
+
+ prt_printf(out, "UUID:");
+ prt_tab(out);
+ pr_uuid(out, m.uuid.b);
+ prt_newline(out);
+
+ prt_printf(out, "Size:");
+ prt_tab(out);
+ prt_units_u64(out, device_size << 9);
+ prt_newline(out);
+
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+ prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
+ prt_tab(out);
+ prt_u64(out, le64_to_cpu(m.errors[i]));
+ prt_newline(out);
+ }
+
+ for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
+ prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
+ prt_tab(out);
+ prt_printf(out, "%u", le32_to_cpu(m.iops[i]));
+ prt_newline(out);
+ }
+
+ prt_printf(out, "Bucket size:");
+ prt_tab(out);
+ prt_units_u64(out, bucket_size << 9);
+ prt_newline(out);
+
+ prt_printf(out, "First bucket:");
+ prt_tab(out);
+ prt_printf(out, "%u", le16_to_cpu(m.first_bucket));
+ prt_newline(out);
+
+ prt_printf(out, "Buckets:");
+ prt_tab(out);
+ prt_printf(out, "%llu", le64_to_cpu(m.nbuckets));
+ prt_newline(out);
+
+ prt_printf(out, "Last mount:");
+ prt_tab(out);
+ if (m.last_mount)
+ bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
+ else
+ prt_printf(out, "(never)");
+ prt_newline(out);
+
+ prt_printf(out, "Last superblock write:");
+ prt_tab(out);
+ prt_u64(out, le64_to_cpu(m.seq));
+ prt_newline(out);
+
+ prt_printf(out, "State:");
+ prt_tab(out);
+ prt_printf(out, "%s",
+ BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
+ ? bch2_member_states[BCH_MEMBER_STATE(&m)]
+ : "unknown");
+ prt_newline(out);
+
+ prt_printf(out, "Data allowed:");
+ prt_tab(out);
+ if (BCH_MEMBER_DATA_ALLOWED(&m))
+ prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
+ else
+ prt_printf(out, "(none)");
+ prt_newline(out);
+
+ prt_printf(out, "Has data:");
+ prt_tab(out);
+ if (data_have)
+ prt_bitflags(out, bch2_data_types, data_have);
+ else
+ prt_printf(out, "(none)");
+ prt_newline(out);
+
+ prt_str(out, "Durability:");
+ prt_tab(out);
+ prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
+ prt_newline(out);
+
+ prt_printf(out, "Discard:");
+ prt_tab(out);
+ prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
+ prt_newline(out);
+
+ prt_printf(out, "Freespace initialized:");
+ prt_tab(out);
+ prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
+ prt_newline(out);
+
+ printbuf_indent_sub(out, 2);
+}
+
+static int bch2_sb_members_v1_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
+ unsigned i;
+
+ if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
+ prt_printf(err, "too many devices for section size");
+ return -BCH_ERR_invalid_sb_members;
+ }
+
+ for (i = 0; i < sb->nr_devices; i++) {
+ struct bch_member m = members_v1_get(mi, i);
+
+ int ret = validate_member(err, m, sb, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
+ struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
+ unsigned i;
+
+ for (i = 0; i < sb->nr_devices; i++)
+ member_to_text(out, members_v1_get(mi, i), gi, sb, i);
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
+ .validate = bch2_sb_members_v1_validate,
+ .to_text = bch2_sb_members_v1_to_text,
+};
+
+static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
+ struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
+ unsigned i;
+
+ for (i = 0; i < sb->nr_devices; i++)
+ member_to_text(out, members_v2_get(mi, i), gi, sb, i);
+}
+
+static int bch2_sb_members_v2_validate(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
+ size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
+ (void *) mi;
+
+ if (mi_bytes > vstruct_bytes(&mi->field)) {
+ prt_printf(err, "section too small (%zu > %zu)",
+ mi_bytes, vstruct_bytes(&mi->field));
+ return -BCH_ERR_invalid_sb_members;
+ }
+
+ for (unsigned i = 0; i < sb->nr_devices; i++) {
+ int ret = validate_member(err, members_v2_get(mi, i), sb, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
+ .validate = bch2_sb_members_v2_validate,
+ .to_text = bch2_sb_members_v2_to_text,
+};
+
+void bch2_sb_members_from_cpu(struct bch_fs *c)
+{
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL) {
+ struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
+
+ for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
+ m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
+ }
+ rcu_read_unlock();
+}
+
+void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+ struct bch_member m;
+
+ mutex_lock(&ca->fs->sb_lock);
+ m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
+ mutex_unlock(&ca->fs->sb_lock);
+
+ printbuf_tabstop_push(out, 12);
+
+ prt_str(out, "IO errors since filesystem creation");
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+ prt_printf(out, "%s:", bch2_member_error_strs[i]);
+ prt_tab(out);
+ prt_u64(out, atomic64_read(&ca->errors[i]));
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+
+ prt_str(out, "IO errors since ");
+ bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
+ prt_str(out, " ago");
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
+ prt_printf(out, "%s:", bch2_member_error_strs[i]);
+ prt_tab(out);
+ prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+}
+
+void bch2_dev_errors_reset(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+ struct bch_member *m;
+
+ mutex_lock(&c->sb_lock);
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
+ m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
+ m->errors_reset_time = ktime_get_real_seconds();
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/c_src/libbcachefs/sb-members.h b/c_src/libbcachefs/sb-members.h
new file mode 100644
index 00000000..be0a9418
--- /dev/null
+++ b/c_src/libbcachefs/sb-members.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_MEMBERS_H
+#define _BCACHEFS_SB_MEMBERS_H
+
+#include "darray.h"
+
+extern char * const bch2_member_error_strs[];
+
+static inline struct bch_member *
+__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i)
+{
+ return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
+}
+
+int bch2_sb_members_v2_init(struct bch_fs *c);
+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
+struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
+struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
+
+static inline bool bch2_dev_is_online(struct bch_dev *ca)
+{
+ return !percpu_ref_is_zero(&ca->io_ref);
+}
+
+static inline bool bch2_dev_is_readable(struct bch_dev *ca)
+{
+ return bch2_dev_is_online(ca) &&
+ ca->mi.state != BCH_MEMBER_STATE_failed;
+}
+
+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw)
+{
+ if (!percpu_ref_tryget(&ca->io_ref))
+ return false;
+
+ if (ca->mi.state == BCH_MEMBER_STATE_rw ||
+ (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ))
+ return true;
+
+ percpu_ref_put(&ca->io_ref);
+ return false;
+}
+
+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs)
+{
+ return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX);
+}
+
+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs,
+ unsigned dev)
+{
+ darray_for_each(devs, i)
+ if (*i == dev)
+ return true;
+ return false;
+}
+
+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs,
+ unsigned dev)
+{
+ darray_for_each(*devs, i)
+ if (*i == dev) {
+ darray_remove_item(devs, i);
+ return;
+ }
+}
+
+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs,
+ unsigned dev)
+{
+ if (!bch2_dev_list_has_dev(*devs, dev)) {
+ BUG_ON(devs->nr >= ARRAY_SIZE(devs->data));
+ devs->data[devs->nr++] = dev;
+ }
+}
+
+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev)
+{
+ return (struct bch_devs_list) { .nr = 1, .data[0] = dev };
+}
+
+static inline struct bch_dev *__bch2_next_dev_idx(struct bch_fs *c, unsigned idx,
+ const struct bch_devs_mask *mask)
+{
+ struct bch_dev *ca = NULL;
+
+ while ((idx = mask
+ ? find_next_bit(mask->d, c->sb.nr_devices, idx)
+ : idx) < c->sb.nr_devices &&
+ !(ca = rcu_dereference_check(c->devs[idx],
+ lockdep_is_held(&c->state_lock))))
+ idx++;
+
+ return ca;
+}
+
+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *ca,
+ const struct bch_devs_mask *mask)
+{
+ return __bch2_next_dev_idx(c, ca ? ca->dev_idx + 1 : 0, mask);
+}
+
+#define for_each_member_device_rcu(_c, _ca, _mask) \
+ for (struct bch_dev *_ca = NULL; \
+ (_ca = __bch2_next_dev((_c), _ca, (_mask)));)
+
+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca)
+{
+ if (ca)
+ percpu_ref_put(&ca->ref);
+
+ rcu_read_lock();
+ if ((ca = __bch2_next_dev(c, ca, NULL)))
+ percpu_ref_get(&ca->ref);
+ rcu_read_unlock();
+
+ return ca;
+}
+
+/*
+ * If you break early, you must drop your ref on the current device
+ */
+#define __for_each_member_device(_c, _ca) \
+ for (; (_ca = bch2_get_next_dev(_c, _ca));)
+
+#define for_each_member_device(_c, _ca) \
+ for (struct bch_dev *_ca = NULL; \
+ (_ca = bch2_get_next_dev(_c, _ca));)
+
+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
+ struct bch_dev *ca,
+ unsigned state_mask)
+{
+ if (ca)
+ percpu_ref_put(&ca->io_ref);
+
+ rcu_read_lock();
+ while ((ca = __bch2_next_dev(c, ca, NULL)) &&
+ (!((1 << ca->mi.state) & state_mask) ||
+ !percpu_ref_tryget(&ca->io_ref)))
+ ;
+ rcu_read_unlock();
+
+ return ca;
+}
+
+#define __for_each_online_member(_c, _ca, state_mask) \
+ for (struct bch_dev *_ca = NULL; \
+ (_ca = bch2_get_next_online_dev(_c, _ca, state_mask));)
+
+#define for_each_online_member(c, ca) \
+ __for_each_online_member(c, ca, ~0)
+
+#define for_each_rw_member(c, ca) \
+ __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw))
+
+#define for_each_readable_member(c, ca) \
+ __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro))
+
+/*
+ * If a key exists that references a device, the device won't be going away and
+ * we can omit rcu_read_lock():
+ */
+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx)
+{
+ EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
+
+ return rcu_dereference_check(c->devs[idx], 1);
+}
+
+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx)
+{
+ EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]);
+
+ return rcu_dereference_protected(c->devs[idx],
+ lockdep_is_held(&c->sb_lock) ||
+ lockdep_is_held(&c->state_lock));
+}
+
+/* XXX kill, move to struct bch_fs */
+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
+{
+ struct bch_devs_mask devs;
+
+ memset(&devs, 0, sizeof(devs));
+ for_each_online_member(c, ca)
+ __set_bit(ca->dev_idx, devs.d);
+ return devs;
+}
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
+extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
+
+static inline bool bch2_member_exists(struct bch_member *m)
+{
+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+}
+
+static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev)
+{
+ if (dev < sb->nr_devices) {
+ struct bch_member m = bch2_sb_member_get(sb, dev);
+ return bch2_member_exists(&m);
+ }
+ return false;
+}
+
+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
+{
+ return (struct bch_member_cpu) {
+ .nbuckets = le64_to_cpu(mi->nbuckets),
+ .first_bucket = le16_to_cpu(mi->first_bucket),
+ .bucket_size = le16_to_cpu(mi->bucket_size),
+ .group = BCH_MEMBER_GROUP(mi),
+ .state = BCH_MEMBER_STATE(mi),
+ .discard = BCH_MEMBER_DISCARD(mi),
+ .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi),
+ .durability = BCH_MEMBER_DURABILITY(mi)
+ ? BCH_MEMBER_DURABILITY(mi) - 1
+ : 1,
+ .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
+ .valid = bch2_member_exists(mi),
+ };
+}
+
+void bch2_sb_members_from_cpu(struct bch_fs *);
+
+void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
+void bch2_dev_errors_reset(struct bch_dev *);
+
+#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/c_src/libbcachefs/seqmutex.h b/c_src/libbcachefs/seqmutex.h
new file mode 100644
index 00000000..c1860d81
--- /dev/null
+++ b/c_src/libbcachefs/seqmutex.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SEQMUTEX_H
+#define _BCACHEFS_SEQMUTEX_H
+
+#include <linux/mutex.h>
+
+struct seqmutex {
+ struct mutex lock;
+ u32 seq;
+};
+
+#define seqmutex_init(_lock) mutex_init(&(_lock)->lock)
+
+static inline bool seqmutex_trylock(struct seqmutex *lock)
+{
+ return mutex_trylock(&lock->lock);
+}
+
+static inline void seqmutex_lock(struct seqmutex *lock)
+{
+ mutex_lock(&lock->lock);
+}
+
+static inline void seqmutex_unlock(struct seqmutex *lock)
+{
+ lock->seq++;
+ mutex_unlock(&lock->lock);
+}
+
+static inline u32 seqmutex_seq(struct seqmutex *lock)
+{
+ return lock->seq;
+}
+
+static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
+{
+ if (lock->seq != seq || !mutex_trylock(&lock->lock))
+ return false;
+
+ if (lock->seq != seq) {
+ mutex_unlock(&lock->lock);
+ return false;
+ }
+
+ return true;
+}
+
+#endif /* _BCACHEFS_SEQMUTEX_H */
diff --git a/c_src/libbcachefs/siphash.c b/c_src/libbcachefs/siphash.c
new file mode 100644
index 00000000..dc1a27cc
--- /dev/null
+++ b/c_src/libbcachefs/siphash.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* $OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */
+
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
+ * are the number of compression rounds and the number of finalization rounds.
+ * A compression round is identical to a finalization round and this round
+ * function is called SipRound. Given a 128-bit key k and a (possibly empty)
+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
+ *
+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
+ * https://131002.net/siphash/siphash.pdf
+ * https://131002.net/siphash/
+ */
+
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+#include <linux/bitops.h>
+#include <linux/string.h>
+
+#include "siphash.h"
+
+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
+{
+ while (rounds--) {
+ ctx->v[0] += ctx->v[1];
+ ctx->v[2] += ctx->v[3];
+ ctx->v[1] = rol64(ctx->v[1], 13);
+ ctx->v[3] = rol64(ctx->v[3], 16);
+
+ ctx->v[1] ^= ctx->v[0];
+ ctx->v[3] ^= ctx->v[2];
+ ctx->v[0] = rol64(ctx->v[0], 32);
+
+ ctx->v[2] += ctx->v[1];
+ ctx->v[0] += ctx->v[3];
+ ctx->v[1] = rol64(ctx->v[1], 17);
+ ctx->v[3] = rol64(ctx->v[3], 21);
+
+ ctx->v[1] ^= ctx->v[2];
+ ctx->v[3] ^= ctx->v[0];
+ ctx->v[2] = rol64(ctx->v[2], 32);
+ }
+}
+
+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
+{
+ u64 m = get_unaligned_le64(ptr);
+
+ ctx->v[3] ^= m;
+ SipHash_Rounds(ctx, rounds);
+ ctx->v[0] ^= m;
+}
+
+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
+{
+ u64 k0, k1;
+
+ k0 = le64_to_cpu(key->k0);
+ k1 = le64_to_cpu(key->k1);
+
+ ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
+ ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
+ ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
+ ctx->v[3] = 0x7465646279746573ULL ^ k1;
+
+ memset(ctx->buf, 0, sizeof(ctx->buf));
+ ctx->bytes = 0;
+}
+
+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
+ const void *src, size_t len)
+{
+ const u8 *ptr = src;
+ size_t left, used;
+
+ if (len == 0)
+ return;
+
+ used = ctx->bytes % sizeof(ctx->buf);
+ ctx->bytes += len;
+
+ if (used > 0) {
+ left = sizeof(ctx->buf) - used;
+
+ if (len >= left) {
+ memcpy(&ctx->buf[used], ptr, left);
+ SipHash_CRounds(ctx, ctx->buf, rc);
+ len -= left;
+ ptr += left;
+ } else {
+ memcpy(&ctx->buf[used], ptr, len);
+ return;
+ }
+ }
+
+ while (len >= sizeof(ctx->buf)) {
+ SipHash_CRounds(ctx, ptr, rc);
+ len -= sizeof(ctx->buf);
+ ptr += sizeof(ctx->buf);
+ }
+
+ if (len > 0)
+ memcpy(&ctx->buf[used], ptr, len);
+}
+
+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
+{
+ u64 r;
+
+ r = SipHash_End(ctx, rc, rf);
+
+ *((__le64 *) dst) = cpu_to_le64(r);
+}
+
+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
+{
+ u64 r;
+ size_t left, used;
+
+ used = ctx->bytes % sizeof(ctx->buf);
+ left = sizeof(ctx->buf) - used;
+ memset(&ctx->buf[used], 0, left - 1);
+ ctx->buf[7] = ctx->bytes;
+
+ SipHash_CRounds(ctx, ctx->buf, rc);
+ ctx->v[2] ^= 0xff;
+ SipHash_Rounds(ctx, rf);
+
+ r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
+ memset(ctx, 0, sizeof(*ctx));
+ return r;
+}
+
+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
+{
+ SIPHASH_CTX ctx;
+
+ SipHash_Init(&ctx, key);
+ SipHash_Update(&ctx, rc, rf, src, len);
+ return SipHash_End(&ctx, rc, rf);
+}
diff --git a/c_src/libbcachefs/siphash.h b/c_src/libbcachefs/siphash.h
new file mode 100644
index 00000000..3dfaf34a
--- /dev/null
+++ b/c_src/libbcachefs/siphash.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
+ * optimized for speed on short messages returning a 64bit hash/digest value.
+ *
+ * The number of rounds is defined during the initialization:
+ * SipHash24_Init() for the fast and resonable strong version
+ * SipHash48_Init() for the strong version (half as fast)
+ *
+ * struct SIPHASH_CTX ctx;
+ * SipHash24_Init(&ctx);
+ * SipHash_SetKey(&ctx, "16bytes long key");
+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
+ * SipHash_Final(output, &ctx);
+ */
+
+#ifndef _SIPHASH_H_
+#define _SIPHASH_H_
+
+#include <linux/types.h>
+
+#define SIPHASH_BLOCK_LENGTH 8
+#define SIPHASH_KEY_LENGTH 16
+#define SIPHASH_DIGEST_LENGTH 8
+
+typedef struct _SIPHASH_CTX {
+ u64 v[4];
+ u8 buf[SIPHASH_BLOCK_LENGTH];
+ u32 bytes;
+} SIPHASH_CTX;
+
+typedef struct {
+ __le64 k0;
+ __le64 k1;
+} SIPHASH_KEY;
+
+void SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
+void SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
+u64 SipHash_End(SIPHASH_CTX *, int, int);
+void SipHash_Final(void *, SIPHASH_CTX *, int, int);
+u64 SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
+
+#define SipHash24_Init(_c, _k) SipHash_Init((_c), (_k))
+#define SipHash24_Update(_c, _p, _l) SipHash_Update((_c), 2, 4, (_p), (_l))
+#define SipHash24_End(_d) SipHash_End((_d), 2, 4)
+#define SipHash24_Final(_d, _c) SipHash_Final((_d), (_c), 2, 4)
+#define SipHash24(_k, _p, _l) SipHash((_k), 2, 4, (_p), (_l))
+
+#define SipHash48_Init(_c, _k) SipHash_Init((_c), (_k))
+#define SipHash48_Update(_c, _p, _l) SipHash_Update((_c), 4, 8, (_p), (_l))
+#define SipHash48_End(_d) SipHash_End((_d), 4, 8)
+#define SipHash48_Final(_d, _c) SipHash_Final((_d), (_c), 4, 8)
+#define SipHash48(_k, _p, _l) SipHash((_k), 4, 8, (_p), (_l))
+
+#endif /* _SIPHASH_H_ */
diff --git a/c_src/libbcachefs/six.c b/c_src/libbcachefs/six.c
new file mode 100644
index 00000000..3a494c5d
--- /dev/null
+++ b/c_src/libbcachefs/six.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <linux/log2.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/rt.h>
+#include <linux/sched/task.h>
+#include <linux/slab.h>
+
+#include <trace/events/lock.h>
+
+#include "six.h"
+
+#ifdef DEBUG
+#define EBUG_ON(cond) BUG_ON(cond)
+#else
+#define EBUG_ON(cond) do {} while (0)
+#endif
+
+#define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip)
+#define six_release(l, ip) lock_release(l, ip)
+
+static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
+
+#define SIX_LOCK_HELD_read_OFFSET 0
+#define SIX_LOCK_HELD_read ~(~0U << 26)
+#define SIX_LOCK_HELD_intent (1U << 26)
+#define SIX_LOCK_HELD_write (1U << 27)
+#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read))
+#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write))
+#define SIX_LOCK_NOSPIN (1U << 31)
+
+struct six_lock_vals {
+ /* Value we add to the lock in order to take the lock: */
+ u32 lock_val;
+
+ /* If the lock has this value (used as a mask), taking the lock fails: */
+ u32 lock_fail;
+
+ /* Mask that indicates lock is held for this type: */
+ u32 held_mask;
+
+ /* Waitlist we wakeup when releasing the lock: */
+ enum six_lock_type unlock_wakeup;
+};
+
+static const struct six_lock_vals l[] = {
+ [SIX_LOCK_read] = {
+ .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET,
+ .lock_fail = SIX_LOCK_HELD_write,
+ .held_mask = SIX_LOCK_HELD_read,
+ .unlock_wakeup = SIX_LOCK_write,
+ },
+ [SIX_LOCK_intent] = {
+ .lock_val = SIX_LOCK_HELD_intent,
+ .lock_fail = SIX_LOCK_HELD_intent,
+ .held_mask = SIX_LOCK_HELD_intent,
+ .unlock_wakeup = SIX_LOCK_intent,
+ },
+ [SIX_LOCK_write] = {
+ .lock_val = SIX_LOCK_HELD_write,
+ .lock_fail = SIX_LOCK_HELD_read,
+ .held_mask = SIX_LOCK_HELD_write,
+ .unlock_wakeup = SIX_LOCK_read,
+ },
+};
+
+static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
+{
+ if ((atomic_read(&lock->state) & mask) != mask)
+ atomic_or(mask, &lock->state);
+}
+
+static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
+{
+ if (atomic_read(&lock->state) & mask)
+ atomic_and(~mask, &lock->state);
+}
+
+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
+ u32 old, struct task_struct *owner)
+{
+ if (type != SIX_LOCK_intent)
+ return;
+
+ if (!(old & SIX_LOCK_HELD_intent)) {
+ EBUG_ON(lock->owner);
+ lock->owner = owner;
+ } else {
+ EBUG_ON(lock->owner != current);
+ }
+}
+
+static inline unsigned pcpu_read_count(struct six_lock *lock)
+{
+ unsigned read_count = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ read_count += *per_cpu_ptr(lock->readers, cpu);
+ return read_count;
+}
+
+/*
+ * __do_six_trylock() - main trylock routine
+ *
+ * Returns 1 on success, 0 on failure
+ *
+ * In percpu reader mode, a failed trylock may cause a spurious trylock failure
+ * for anoter thread taking the competing lock type, and we may havve to do a
+ * wakeup: when a wakeup is required, we return -1 - wakeup_type.
+ */
+static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
+ struct task_struct *task, bool try)
+{
+ int ret;
+ u32 old;
+
+ EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
+ EBUG_ON(type == SIX_LOCK_write &&
+ (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
+
+ /*
+ * Percpu reader mode:
+ *
+ * The basic idea behind this algorithm is that you can implement a lock
+ * between two threads without any atomics, just memory barriers:
+ *
+ * For two threads you'll need two variables, one variable for "thread a
+ * has the lock" and another for "thread b has the lock".
+ *
+ * To take the lock, a thread sets its variable indicating that it holds
+ * the lock, then issues a full memory barrier, then reads from the
+ * other thread's variable to check if the other thread thinks it has
+ * the lock. If we raced, we backoff and retry/sleep.
+ *
+ * Failure to take the lock may cause a spurious trylock failure in
+ * another thread, because we temporarily set the lock to indicate that
+ * we held it. This would be a problem for a thread in six_lock(), when
+ * they are calling trylock after adding themself to the waitlist and
+ * prior to sleeping.
+ *
+ * Therefore, if we fail to get the lock, and there were waiters of the
+ * type we conflict with, we will have to issue a wakeup.
+ *
+ * Since we may be called under wait_lock (and by the wakeup code
+ * itself), we return that the wakeup has to be done instead of doing it
+ * here.
+ */
+ if (type == SIX_LOCK_read && lock->readers) {
+ preempt_disable();
+ this_cpu_inc(*lock->readers); /* signal that we own lock */
+
+ smp_mb();
+
+ old = atomic_read(&lock->state);
+ ret = !(old & l[type].lock_fail);
+
+ this_cpu_sub(*lock->readers, !ret);
+ preempt_enable();
+
+ if (!ret) {
+ smp_mb();
+ if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+ ret = -1 - SIX_LOCK_write;
+ }
+ } else if (type == SIX_LOCK_write && lock->readers) {
+ if (try) {
+ atomic_add(SIX_LOCK_HELD_write, &lock->state);
+ smp_mb__after_atomic();
+ }
+
+ ret = !pcpu_read_count(lock);
+
+ if (try && !ret) {
+ old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
+ if (old & SIX_LOCK_WAITING_read)
+ ret = -1 - SIX_LOCK_read;
+ }
+ } else {
+ old = atomic_read(&lock->state);
+ do {
+ ret = !(old & l[type].lock_fail);
+ if (!ret || (type == SIX_LOCK_write && !try)) {
+ smp_mb();
+ break;
+ }
+ } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
+
+ EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
+ }
+
+ if (ret > 0)
+ six_set_owner(lock, type, old, task);
+
+ EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
+ (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
+
+ return ret;
+}
+
+static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
+{
+ struct six_lock_waiter *w, *next;
+ struct task_struct *task;
+ bool saw_one;
+ int ret;
+again:
+ ret = 0;
+ saw_one = false;
+ raw_spin_lock(&lock->wait_lock);
+
+ list_for_each_entry_safe(w, next, &lock->wait_list, list) {
+ if (w->lock_want != lock_type)
+ continue;
+
+ if (saw_one && lock_type != SIX_LOCK_read)
+ goto unlock;
+ saw_one = true;
+
+ ret = __do_six_trylock(lock, lock_type, w->task, false);
+ if (ret <= 0)
+ goto unlock;
+
+ /*
+ * Similar to percpu_rwsem_wake_function(), we need to guard
+ * against the wakee noticing w->lock_acquired, returning, and
+ * then exiting before we do the wakeup:
+ */
+ task = get_task_struct(w->task);
+ __list_del(w->list.prev, w->list.next);
+ /*
+ * The release barrier here ensures the ordering of the
+ * __list_del before setting w->lock_acquired; @w is on the
+ * stack of the thread doing the waiting and will be reused
+ * after it sees w->lock_acquired with no other locking:
+ * pairs with smp_load_acquire() in six_lock_slowpath()
+ */
+ smp_store_release(&w->lock_acquired, true);
+ wake_up_process(task);
+ put_task_struct(task);
+ }
+
+ six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
+unlock:
+ raw_spin_unlock(&lock->wait_lock);
+
+ if (ret < 0) {
+ lock_type = -ret - 1;
+ goto again;
+ }
+}
+
+__always_inline
+static void six_lock_wakeup(struct six_lock *lock, u32 state,
+ enum six_lock_type lock_type)
+{
+ if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
+ return;
+
+ if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
+ return;
+
+ __six_lock_wakeup(lock, lock_type);
+}
+
+__always_inline
+static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
+{
+ int ret;
+
+ ret = __do_six_trylock(lock, type, current, try);
+ if (ret < 0)
+ __six_lock_wakeup(lock, -ret - 1);
+
+ return ret > 0;
+}
+
+/**
+ * six_trylock_ip - attempt to take a six lock without blocking
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * Return: true on success, false on failure.
+ */
+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
+{
+ if (!do_six_trylock(lock, type, true))
+ return false;
+
+ if (type != SIX_LOCK_write)
+ six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
+ return true;
+}
+EXPORT_SYMBOL_GPL(six_trylock_ip);
+
+/**
+ * six_relock_ip - attempt to re-take a lock that was held previously
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @seq: lock sequence number obtained from six_lock_seq() while lock was
+ * held previously
+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * Return: true on success, false on failure.
+ */
+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
+ unsigned seq, unsigned long ip)
+{
+ if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
+ return false;
+
+ if (six_lock_seq(lock) != seq) {
+ six_unlock_ip(lock, type, ip);
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(six_relock_ip);
+
+#ifdef CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN
+
+static inline bool six_owner_running(struct six_lock *lock)
+{
+ /*
+ * When there's no owner, we might have preempted between the owner
+ * acquiring the lock and setting the owner field. If we're an RT task
+ * that will live-lock because we won't let the owner complete.
+ */
+ rcu_read_lock();
+ struct task_struct *owner = READ_ONCE(lock->owner);
+ bool ret = owner ? owner_on_cpu(owner) : !rt_task(current);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static inline bool six_optimistic_spin(struct six_lock *lock,
+ struct six_lock_waiter *wait,
+ enum six_lock_type type)
+{
+ unsigned loop = 0;
+ u64 end_time;
+
+ if (type == SIX_LOCK_write)
+ return false;
+
+ if (lock->wait_list.next != &wait->list)
+ return false;
+
+ if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN)
+ return false;
+
+ preempt_disable();
+ end_time = sched_clock() + 10 * NSEC_PER_USEC;
+
+ while (!need_resched() && six_owner_running(lock)) {
+ /*
+ * Ensures that writes to the waitlist entry happen after we see
+ * wait->lock_acquired: pairs with the smp_store_release in
+ * __six_lock_wakeup
+ */
+ if (smp_load_acquire(&wait->lock_acquired)) {
+ preempt_enable();
+ return true;
+ }
+
+ if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
+ six_set_bitmask(lock, SIX_LOCK_NOSPIN);
+ break;
+ }
+
+ /*
+ * The cpu_relax() call is a compiler barrier which forces
+ * everything in this loop to be re-loaded. We don't need
+ * memory barriers as we'll eventually observe the right
+ * values at the cost of a few extra spins.
+ */
+ cpu_relax();
+ }
+
+ preempt_enable();
+ return false;
+}
+
+#else /* CONFIG_LOCK_SPIN_ON_OWNER */
+
+static inline bool six_optimistic_spin(struct six_lock *lock,
+ struct six_lock_waiter *wait,
+ enum six_lock_type type)
+{
+ return false;
+}
+
+#endif
+
+noinline
+static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
+ struct six_lock_waiter *wait,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip)
+{
+ int ret = 0;
+
+ if (type == SIX_LOCK_write) {
+ EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
+ atomic_add(SIX_LOCK_HELD_write, &lock->state);
+ smp_mb__after_atomic();
+ }
+
+ trace_contention_begin(lock, 0);
+ lock_contended(&lock->dep_map, ip);
+
+ wait->task = current;
+ wait->lock_want = type;
+ wait->lock_acquired = false;
+
+ raw_spin_lock(&lock->wait_lock);
+ six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
+ /*
+ * Retry taking the lock after taking waitlist lock, in case we raced
+ * with an unlock:
+ */
+ ret = __do_six_trylock(lock, type, current, false);
+ if (ret <= 0) {
+ wait->start_time = local_clock();
+
+ if (!list_empty(&lock->wait_list)) {
+ struct six_lock_waiter *last =
+ list_last_entry(&lock->wait_list,
+ struct six_lock_waiter, list);
+
+ if (time_before_eq64(wait->start_time, last->start_time))
+ wait->start_time = last->start_time + 1;
+ }
+
+ list_add_tail(&wait->list, &lock->wait_list);
+ }
+ raw_spin_unlock(&lock->wait_lock);
+
+ if (unlikely(ret > 0)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (unlikely(ret < 0)) {
+ __six_lock_wakeup(lock, -ret - 1);
+ ret = 0;
+ }
+
+ if (six_optimistic_spin(lock, wait, type))
+ goto out;
+
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /*
+ * Ensures that writes to the waitlist entry happen after we see
+ * wait->lock_acquired: pairs with the smp_store_release in
+ * __six_lock_wakeup
+ */
+ if (smp_load_acquire(&wait->lock_acquired))
+ break;
+
+ ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
+ if (unlikely(ret)) {
+ bool acquired;
+
+ /*
+ * If should_sleep_fn() returns an error, we are
+ * required to return that error even if we already
+ * acquired the lock - should_sleep_fn() might have
+ * modified external state (e.g. when the deadlock cycle
+ * detector in bcachefs issued a transaction restart)
+ */
+ raw_spin_lock(&lock->wait_lock);
+ acquired = wait->lock_acquired;
+ if (!acquired)
+ list_del(&wait->list);
+ raw_spin_unlock(&lock->wait_lock);
+
+ if (unlikely(acquired))
+ do_six_unlock_type(lock, type);
+ break;
+ }
+
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+out:
+ if (ret && type == SIX_LOCK_write) {
+ six_clear_bitmask(lock, SIX_LOCK_HELD_write);
+ six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
+ }
+ trace_contention_end(lock, 0);
+
+ return ret;
+}
+
+/**
+ * six_lock_ip_waiter - take a lock, with full waitlist interface
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @wait: pointer to wait object, which will be added to lock's waitlist
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * This is the most general six_lock() variant, with parameters to support full
+ * cycle detection for deadlock avoidance.
+ *
+ * The code calling this function must implement tracking of held locks, and the
+ * @wait object should be embedded into the struct that tracks held locks -
+ * which must also be accessible in a thread-safe way.
+ *
+ * @should_sleep_fn should invoke the cycle detector; it should walk each
+ * lock's waiters, and for each waiter recursively walk their held locks.
+ *
+ * When this function must block, @wait will be added to @lock's waitlist before
+ * calling trylock, and before calling @should_sleep_fn, and @wait will not be
+ * removed from the lock waitlist until the lock has been successfully acquired,
+ * or we abort.
+ *
+ * @wait.start_time will be monotonically increasing for any given waitlist, and
+ * thus may be used as a loop cursor.
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
+ struct six_lock_waiter *wait,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip)
+{
+ int ret;
+
+ wait->start_time = 0;
+
+ if (type != SIX_LOCK_write)
+ six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
+
+ ret = do_six_trylock(lock, type, true) ? 0
+ : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
+
+ if (ret && type != SIX_LOCK_write)
+ six_release(&lock->dep_map, ip);
+ if (!ret)
+ lock_acquired(&lock->dep_map, ip);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
+
+__always_inline
+static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
+{
+ u32 state;
+
+ if (type == SIX_LOCK_intent)
+ lock->owner = NULL;
+
+ if (type == SIX_LOCK_read &&
+ lock->readers) {
+ smp_mb(); /* unlock barrier */
+ this_cpu_dec(*lock->readers);
+ smp_mb(); /* between unlocking and checking for waiters */
+ state = atomic_read(&lock->state);
+ } else {
+ u32 v = l[type].lock_val;
+
+ if (type != SIX_LOCK_read)
+ v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
+
+ EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
+ state = atomic_sub_return_release(v, &lock->state);
+ }
+
+ six_lock_wakeup(lock, state, l[type].unlock_wakeup);
+}
+
+/**
+ * six_unlock_ip - drop a six lock
+ * @lock: lock to unlock
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * When a lock is held multiple times (because six_lock_incement()) was used),
+ * this decrements the 'lock held' counter by one.
+ *
+ * For example:
+ * six_lock_read(&foo->lock); read count 1
+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
+ */
+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
+{
+ EBUG_ON(type == SIX_LOCK_write &&
+ !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
+ EBUG_ON((type == SIX_LOCK_write ||
+ type == SIX_LOCK_intent) &&
+ lock->owner != current);
+
+ if (type != SIX_LOCK_write)
+ six_release(&lock->dep_map, ip);
+ else
+ lock->seq++;
+
+ if (type == SIX_LOCK_intent &&
+ lock->intent_lock_recurse) {
+ --lock->intent_lock_recurse;
+ return;
+ }
+
+ do_six_unlock_type(lock, type);
+}
+EXPORT_SYMBOL_GPL(six_unlock_ip);
+
+/**
+ * six_lock_downgrade - convert an intent lock to a read lock
+ * @lock: lock to dowgrade
+ *
+ * @lock will have read count incremented and intent count decremented
+ */
+void six_lock_downgrade(struct six_lock *lock)
+{
+ six_lock_increment(lock, SIX_LOCK_read);
+ six_unlock_intent(lock);
+}
+EXPORT_SYMBOL_GPL(six_lock_downgrade);
+
+/**
+ * six_lock_tryupgrade - attempt to convert read lock to an intent lock
+ * @lock: lock to upgrade
+ *
+ * On success, @lock will have intent count incremented and read count
+ * decremented
+ *
+ * Return: true on success, false on failure
+ */
+bool six_lock_tryupgrade(struct six_lock *lock)
+{
+ u32 old = atomic_read(&lock->state), new;
+
+ do {
+ new = old;
+
+ if (new & SIX_LOCK_HELD_intent)
+ return false;
+
+ if (!lock->readers) {
+ EBUG_ON(!(new & SIX_LOCK_HELD_read));
+ new -= l[SIX_LOCK_read].lock_val;
+ }
+
+ new |= SIX_LOCK_HELD_intent;
+ } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
+
+ if (lock->readers)
+ this_cpu_dec(*lock->readers);
+
+ six_set_owner(lock, SIX_LOCK_intent, old, current);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
+
+/**
+ * six_trylock_convert - attempt to convert a held lock from one type to another
+ * @lock: lock to upgrade
+ * @from: SIX_LOCK_read or SIX_LOCK_intent
+ * @to: SIX_LOCK_read or SIX_LOCK_intent
+ *
+ * On success, @lock will have intent count incremented and read count
+ * decremented
+ *
+ * Return: true on success, false on failure
+ */
+bool six_trylock_convert(struct six_lock *lock,
+ enum six_lock_type from,
+ enum six_lock_type to)
+{
+ EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
+
+ if (to == from)
+ return true;
+
+ if (to == SIX_LOCK_read) {
+ six_lock_downgrade(lock);
+ return true;
+ } else {
+ return six_lock_tryupgrade(lock);
+ }
+}
+EXPORT_SYMBOL_GPL(six_trylock_convert);
+
+/**
+ * six_lock_increment - increase held lock count on a lock that is already held
+ * @lock: lock to increment
+ * @type: SIX_LOCK_read or SIX_LOCK_intent
+ *
+ * @lock must already be held, with a lock type that is greater than or equal to
+ * @type
+ *
+ * A corresponding six_unlock_type() call will be required for @lock to be fully
+ * unlocked.
+ */
+void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
+{
+ six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
+
+ /* XXX: assert already locked, and that we don't overflow: */
+
+ switch (type) {
+ case SIX_LOCK_read:
+ if (lock->readers) {
+ this_cpu_inc(*lock->readers);
+ } else {
+ EBUG_ON(!(atomic_read(&lock->state) &
+ (SIX_LOCK_HELD_read|
+ SIX_LOCK_HELD_intent)));
+ atomic_add(l[type].lock_val, &lock->state);
+ }
+ break;
+ case SIX_LOCK_intent:
+ EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
+ lock->intent_lock_recurse++;
+ break;
+ case SIX_LOCK_write:
+ BUG();
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(six_lock_increment);
+
+/**
+ * six_lock_wakeup_all - wake up all waiters on @lock
+ * @lock: lock to wake up waiters for
+ *
+ * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
+ * abort the lock operation.
+ *
+ * This function is never needed in a bug-free program; it's only useful in
+ * debug code, e.g. to determine if a cycle detector is at fault.
+ */
+void six_lock_wakeup_all(struct six_lock *lock)
+{
+ u32 state = atomic_read(&lock->state);
+ struct six_lock_waiter *w;
+
+ six_lock_wakeup(lock, state, SIX_LOCK_read);
+ six_lock_wakeup(lock, state, SIX_LOCK_intent);
+ six_lock_wakeup(lock, state, SIX_LOCK_write);
+
+ raw_spin_lock(&lock->wait_lock);
+ list_for_each_entry(w, &lock->wait_list, list)
+ wake_up_process(w->task);
+ raw_spin_unlock(&lock->wait_lock);
+}
+EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
+
+/**
+ * six_lock_counts - return held lock counts, for each lock type
+ * @lock: lock to return counters for
+ *
+ * Return: the number of times a lock is held for read, intent and write.
+ */
+struct six_lock_count six_lock_counts(struct six_lock *lock)
+{
+ struct six_lock_count ret;
+
+ ret.n[SIX_LOCK_read] = !lock->readers
+ ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
+ : pcpu_read_count(lock);
+ ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
+ lock->intent_lock_recurse;
+ ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(six_lock_counts);
+
+/**
+ * six_lock_readers_add - directly manipulate reader count of a lock
+ * @lock: lock to add/subtract readers for
+ * @nr: reader count to add/subtract
+ *
+ * When an upper layer is implementing lock reentrency, we may have both read
+ * and intent locks on the same lock.
+ *
+ * When we need to take a write lock, the read locks will cause self-deadlock,
+ * because six locks themselves do not track which read locks are held by the
+ * current thread and which are held by a different thread - it does no
+ * per-thread tracking of held locks.
+ *
+ * The upper layer that is tracking held locks may however, if trylock() has
+ * failed, count up its own read locks, subtract them, take the write lock, and
+ * then re-add them.
+ *
+ * As in any other situation when taking a write lock, @lock must be held for
+ * intent one (or more) times, so @lock will never be left unlocked.
+ */
+void six_lock_readers_add(struct six_lock *lock, int nr)
+{
+ if (lock->readers) {
+ this_cpu_add(*lock->readers, nr);
+ } else {
+ EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
+ /* reader count starts at bit 0 */
+ atomic_add(nr, &lock->state);
+ }
+}
+EXPORT_SYMBOL_GPL(six_lock_readers_add);
+
+/**
+ * six_lock_exit - release resources held by a lock prior to freeing
+ * @lock: lock to exit
+ *
+ * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
+ * required to free the percpu read counts.
+ */
+void six_lock_exit(struct six_lock *lock)
+{
+ WARN_ON(lock->readers && pcpu_read_count(lock));
+ WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
+
+ free_percpu(lock->readers);
+ lock->readers = NULL;
+}
+EXPORT_SYMBOL_GPL(six_lock_exit);
+
+void __six_lock_init(struct six_lock *lock, const char *name,
+ struct lock_class_key *key, enum six_lock_init_flags flags)
+{
+ atomic_set(&lock->state, 0);
+ raw_spin_lock_init(&lock->wait_lock);
+ INIT_LIST_HEAD(&lock->wait_list);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ debug_check_no_locks_freed((void *) lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+
+ /*
+ * Don't assume that we have real percpu variables available in
+ * userspace:
+ */
+#ifdef __KERNEL__
+ if (flags & SIX_LOCK_INIT_PCPU) {
+ /*
+ * We don't return an error here on memory allocation failure
+ * since percpu is an optimization, and locks will work with the
+ * same semantics in non-percpu mode: callers can check for
+ * failure if they wish by checking lock->readers, but generally
+ * will not want to treat it as an error.
+ */
+ lock->readers = alloc_percpu(unsigned);
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(__six_lock_init);
diff --git a/c_src/libbcachefs/six.h b/c_src/libbcachefs/six.h
new file mode 100644
index 00000000..68d46fd7
--- /dev/null
+++ b/c_src/libbcachefs/six.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_SIX_H
+#define _LINUX_SIX_H
+
+/**
+ * DOC: SIX locks overview
+ *
+ * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
+ * but with an additional state: read/shared, intent, exclusive/write
+ *
+ * The purpose of the intent state is to allow for greater concurrency on tree
+ * structures without deadlocking. In general, a read can't be upgraded to a
+ * write lock without deadlocking, so an operation that updates multiple nodes
+ * will have to take write locks for the full duration of the operation.
+ *
+ * But by adding an intent state, which is exclusive with other intent locks but
+ * not with readers, we can take intent locks at the start of the operation,
+ * and then take write locks only for the actual update to each individual
+ * nodes, without deadlocking.
+ *
+ * Example usage:
+ * six_lock_read(&foo->lock);
+ * six_unlock_read(&foo->lock);
+ *
+ * An intent lock must be held before taking a write lock:
+ * six_lock_intent(&foo->lock);
+ * six_lock_write(&foo->lock);
+ * six_unlock_write(&foo->lock);
+ * six_unlock_intent(&foo->lock);
+ *
+ * Other operations:
+ * six_trylock_read()
+ * six_trylock_intent()
+ * six_trylock_write()
+ *
+ * six_lock_downgrade() convert from intent to read
+ * six_lock_tryupgrade() attempt to convert from read to intent, may fail
+ *
+ * There are also interfaces that take the lock type as an enum:
+ *
+ * six_lock_type(&foo->lock, SIX_LOCK_read);
+ * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
+ * six_lock_type(&foo->lock, SIX_LOCK_write);
+ * six_unlock_type(&foo->lock, SIX_LOCK_write);
+ * six_unlock_type(&foo->lock, SIX_LOCK_intent);
+ *
+ * Lock sequence numbers - unlock(), relock():
+ *
+ * Locks embed sequences numbers, which are incremented on write lock/unlock.
+ * This allows locks to be dropped and the retaken iff the state they protect
+ * hasn't changed; this makes it much easier to avoid holding locks while e.g.
+ * doing IO or allocating memory.
+ *
+ * Example usage:
+ * six_lock_read(&foo->lock);
+ * u32 seq = six_lock_seq(&foo->lock);
+ * six_unlock_read(&foo->lock);
+ *
+ * some_operation_that_may_block();
+ *
+ * if (six_relock_read(&foo->lock, seq)) { ... }
+ *
+ * If the relock operation succeeds, it is as if the lock was never unlocked.
+ *
+ * Reentrancy:
+ *
+ * Six locks are not by themselves reentrant, but have counters for both the
+ * read and intent states that can be used to provide reentrancy by an upper
+ * layer that tracks held locks. If a lock is known to already be held in the
+ * read or intent state, six_lock_increment() can be used to bump the "lock
+ * held in this state" counter, increasing the number of unlock calls that
+ * will be required to fully unlock it.
+ *
+ * Example usage:
+ * six_lock_read(&foo->lock);
+ * six_lock_increment(&foo->lock, SIX_LOCK_read);
+ * six_unlock_read(&foo->lock);
+ * six_unlock_read(&foo->lock);
+ * foo->lock is now fully unlocked.
+ *
+ * Since the intent state supercedes read, it's legal to increment the read
+ * counter when holding an intent lock, but not the reverse.
+ *
+ * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
+ * is not legal.
+ *
+ * should_sleep_fn:
+ *
+ * There is a six_lock() variant that takes a function pointer that is called
+ * immediately prior to schedule() when blocking, and may return an error to
+ * abort.
+ *
+ * One possible use for this feature is when objects being locked are part of
+ * a cache and may reused, and lock ordering is based on a property of the
+ * object that will change when the object is reused - i.e. logical key order.
+ *
+ * If looking up an object in the cache may race with object reuse, and lock
+ * ordering is required to prevent deadlock, object reuse may change the
+ * correct lock order for that object and cause a deadlock. should_sleep_fn
+ * can be used to check if the object is still the object we want and avoid
+ * this deadlock.
+ *
+ * Wait list entry interface:
+ *
+ * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
+ * wait list entry. By embedding six_lock_waiter into another object, and by
+ * traversing lock waitlists, it is then possible for an upper layer to
+ * implement full cycle detection for deadlock avoidance.
+ *
+ * should_sleep_fn should be used for invoking the cycle detector, walking the
+ * graph of held locks to check for a deadlock. The upper layer must track
+ * held locks for each thread, and each thread's held locks must be reachable
+ * from its six_lock_waiter object.
+ *
+ * six_lock_waiter() will add the wait object to the waitlist re-trying taking
+ * the lock, and before calling should_sleep_fn, and the wait object will not
+ * be removed from the waitlist until either the lock has been successfully
+ * acquired, or we aborted because should_sleep_fn returned an error.
+ *
+ * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
+ * have timestamps in strictly ascending order - this is so the timestamp can
+ * be used as a cursor for lock graph traverse.
+ */
+
+#include <linux/lockdep.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+enum six_lock_type {
+ SIX_LOCK_read,
+ SIX_LOCK_intent,
+ SIX_LOCK_write,
+};
+
+struct six_lock {
+ atomic_t state;
+ u32 seq;
+ unsigned intent_lock_recurse;
+ struct task_struct *owner;
+ unsigned __percpu *readers;
+ raw_spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+struct six_lock_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ enum six_lock_type lock_want;
+ bool lock_acquired;
+ u64 start_time;
+};
+
+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
+
+void six_lock_exit(struct six_lock *lock);
+
+enum six_lock_init_flags {
+ SIX_LOCK_INIT_PCPU = 1U << 0,
+};
+
+void __six_lock_init(struct six_lock *lock, const char *name,
+ struct lock_class_key *key, enum six_lock_init_flags flags);
+
+/**
+ * six_lock_init - initialize a six lock
+ * @lock: lock to initialize
+ * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
+ */
+#define six_lock_init(lock, flags) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __six_lock_init((lock), #lock, &__key, flags); \
+} while (0)
+
+/**
+ * six_lock_seq - obtain current lock sequence number
+ * @lock: six_lock to obtain sequence number for
+ *
+ * @lock should be held for read or intent, and not write
+ *
+ * By saving the lock sequence number, we can unlock @lock and then (typically
+ * after some blocking operation) attempt to relock it: the relock will succeed
+ * if the sequence number hasn't changed, meaning no write locks have been taken
+ * and state corresponding to what @lock protects is still valid.
+ */
+static inline u32 six_lock_seq(const struct six_lock *lock)
+{
+ return lock->seq;
+}
+
+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
+
+/**
+ * six_trylock_type - attempt to take a six lock without blocking
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ *
+ * Return: true on success, false on failure.
+ */
+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
+{
+ return six_trylock_ip(lock, type, _THIS_IP_);
+}
+
+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
+ struct six_lock_waiter *wait,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip);
+
+/**
+ * six_lock_waiter - take a lock, with full waitlist interface
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @wait: pointer to wait object, which will be added to lock's waitlist
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ *
+ * This is a convenience wrapper around six_lock_ip_waiter(), see that function
+ * for full documentation.
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
+ struct six_lock_waiter *wait,
+ six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+ return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
+}
+
+/**
+ * six_lock_ip - take a six lock lock
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip)
+{
+ struct six_lock_waiter wait;
+
+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
+}
+
+/**
+ * six_lock_type - take a six lock lock
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn, void *p)
+{
+ struct six_lock_waiter wait;
+
+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
+}
+
+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
+ unsigned seq, unsigned long ip);
+
+/**
+ * six_relock_type - attempt to re-take a lock that was held previously
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @seq: lock sequence number obtained from six_lock_seq() while lock was
+ * held previously
+ *
+ * Return: true on success, false on failure.
+ */
+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
+ unsigned seq)
+{
+ return six_relock_ip(lock, type, seq, _THIS_IP_);
+}
+
+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
+
+/**
+ * six_unlock_type - drop a six lock
+ * @lock: lock to unlock
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ *
+ * When a lock is held multiple times (because six_lock_incement()) was used),
+ * this decrements the 'lock held' counter by one.
+ *
+ * For example:
+ * six_lock_read(&foo->lock); read count 1
+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
+ */
+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
+{
+ six_unlock_ip(lock, type, _THIS_IP_);
+}
+
+#define __SIX_LOCK(type) \
+static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
+{ \
+ return six_trylock_ip(lock, SIX_LOCK_##type, ip); \
+} \
+ \
+static inline bool six_trylock_##type(struct six_lock *lock) \
+{ \
+ return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
+} \
+ \
+static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
+ struct six_lock_waiter *wait, \
+ six_lock_should_sleep_fn should_sleep_fn, void *p,\
+ unsigned long ip) \
+{ \
+ return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
+} \
+ \
+static inline int six_lock_ip_##type(struct six_lock *lock, \
+ six_lock_should_sleep_fn should_sleep_fn, void *p, \
+ unsigned long ip) \
+{ \
+ return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
+} \
+ \
+static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
+{ \
+ return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \
+} \
+ \
+static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
+{ \
+ return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \
+} \
+ \
+static inline int six_lock_##type(struct six_lock *lock, \
+ six_lock_should_sleep_fn fn, void *p)\
+{ \
+ return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
+} \
+ \
+static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
+{ \
+ six_unlock_ip(lock, SIX_LOCK_##type, ip); \
+} \
+ \
+static inline void six_unlock_##type(struct six_lock *lock) \
+{ \
+ six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
+}
+
+__SIX_LOCK(read)
+__SIX_LOCK(intent)
+__SIX_LOCK(write)
+#undef __SIX_LOCK
+
+void six_lock_downgrade(struct six_lock *);
+bool six_lock_tryupgrade(struct six_lock *);
+bool six_trylock_convert(struct six_lock *, enum six_lock_type,
+ enum six_lock_type);
+
+void six_lock_increment(struct six_lock *, enum six_lock_type);
+
+void six_lock_wakeup_all(struct six_lock *);
+
+struct six_lock_count {
+ unsigned n[3];
+};
+
+struct six_lock_count six_lock_counts(struct six_lock *);
+void six_lock_readers_add(struct six_lock *, int);
+
+#endif /* _LINUX_SIX_H */
diff --git a/c_src/libbcachefs/snapshot.c b/c_src/libbcachefs/snapshot.c
new file mode 100644
index 00000000..56af9375
--- /dev/null
+++ b/c_src/libbcachefs/snapshot.c
@@ -0,0 +1,1685 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "bkey_buf.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "buckets.h"
+#include "errcode.h"
+#include "error.h"
+#include "fs.h"
+#include "snapshot.h"
+
+#include <linux/random.h>
+
+/*
+ * Snapshot trees:
+ *
+ * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they
+ * exist to provide a stable identifier for the whole lifetime of a snapshot
+ * tree.
+ */
+
+void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k);
+
+ prt_printf(out, "subvol %u root snapshot %u",
+ le32_to_cpu(t.v->master_subvol),
+ le32_to_cpu(t.v->root_snapshot));
+}
+
+int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
+ bkey_lt(k.k->p, POS(0, 1)), c, err,
+ snapshot_tree_pos_bad,
+ "bad pos");
+fsck_err:
+ return ret;
+}
+
+int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id,
+ struct bch_snapshot_tree *s)
+{
+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id),
+ BTREE_ITER_WITH_UPDATES, snapshot_tree, s);
+
+ if (bch2_err_matches(ret, ENOENT))
+ ret = -BCH_ERR_ENOENT_snapshot_tree;
+ return ret;
+}
+
+struct bkey_i_snapshot_tree *
+__bch2_snapshot_tree_create(struct btree_trans *trans)
+{
+ struct btree_iter iter;
+ int ret = bch2_bkey_get_empty_slot(trans, &iter,
+ BTREE_ID_snapshot_trees, POS(0, U32_MAX));
+ struct bkey_i_snapshot_tree *s_t;
+
+ if (ret == -BCH_ERR_ENOSPC_btree_slot)
+ ret = -BCH_ERR_ENOSPC_snapshot_tree;
+ if (ret)
+ return ERR_PTR(ret);
+
+ s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(s_t);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret ? ERR_PTR(ret) : s_t;
+}
+
+static int bch2_snapshot_tree_create(struct btree_trans *trans,
+ u32 root_id, u32 subvol_id, u32 *tree_id)
+{
+ struct bkey_i_snapshot_tree *n_tree =
+ __bch2_snapshot_tree_create(trans);
+
+ if (IS_ERR(n_tree))
+ return PTR_ERR(n_tree);
+
+ n_tree->v.master_subvol = cpu_to_le32(subvol_id);
+ n_tree->v.root_snapshot = cpu_to_le32(root_id);
+ *tree_id = n_tree->k.p.offset;
+ return 0;
+}
+
+/* Snapshot nodes: */
+
+static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ struct snapshot_table *t;
+
+ rcu_read_lock();
+ t = rcu_dereference(c->snapshots);
+
+ while (id && id < ancestor)
+ id = __snapshot_t(t, id)->parent;
+ rcu_read_unlock();
+
+ return id == ancestor;
+}
+
+static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+ const struct snapshot_t *s = __snapshot_t(t, id);
+
+ if (s->skip[2] <= ancestor)
+ return s->skip[2];
+ if (s->skip[1] <= ancestor)
+ return s->skip[1];
+ if (s->skip[0] <= ancestor)
+ return s->skip[0];
+ return s->parent;
+}
+
+bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ struct snapshot_table *t;
+ bool ret;
+
+ EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots);
+
+ rcu_read_lock();
+ t = rcu_dereference(c->snapshots);
+
+ while (id && id < ancestor - IS_ANCESTOR_BITMAP)
+ id = get_ancestor_below(t, id, ancestor);
+
+ if (id && id < ancestor) {
+ ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+
+ EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
+ } else {
+ ret = id == ancestor;
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
+{
+ size_t idx = U32_MAX - id;
+ size_t new_size;
+ struct snapshot_table *new, *old;
+
+ new_size = max(16UL, roundup_pow_of_two(idx + 1));
+
+ new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ old = rcu_dereference_protected(c->snapshots, true);
+ if (old)
+ memcpy(new->s,
+ rcu_dereference_protected(c->snapshots, true)->s,
+ sizeof(new->s[0]) * c->snapshot_table_size);
+
+ rcu_assign_pointer(c->snapshots, new);
+ c->snapshot_table_size = new_size;
+ kvfree_rcu_mightsleep(old);
+
+ return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+}
+
+static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
+{
+ size_t idx = U32_MAX - id;
+
+ lockdep_assert_held(&c->snapshot_table_lock);
+
+ if (likely(idx < c->snapshot_table_size))
+ return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+
+ return __snapshot_t_mut(c, id);
+}
+
+void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
+
+ prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u",
+ BCH_SNAPSHOT_SUBVOL(s.v),
+ BCH_SNAPSHOT_DELETED(s.v),
+ le32_to_cpu(s.v->parent),
+ le32_to_cpu(s.v->children[0]),
+ le32_to_cpu(s.v->children[1]),
+ le32_to_cpu(s.v->subvol),
+ le32_to_cpu(s.v->tree));
+
+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth))
+ prt_printf(out, " depth %u skiplist %u %u %u",
+ le32_to_cpu(s.v->depth),
+ le32_to_cpu(s.v->skip[0]),
+ le32_to_cpu(s.v->skip[1]),
+ le32_to_cpu(s.v->skip[2]));
+}
+
+int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_snapshot s;
+ u32 i, id;
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) ||
+ bkey_lt(k.k->p, POS(0, 1)), c, err,
+ snapshot_pos_bad,
+ "bad pos");
+
+ s = bkey_s_c_to_snapshot(k);
+
+ id = le32_to_cpu(s.v->parent);
+ bkey_fsck_err_on(id && id <= k.k->p.offset, c, err,
+ snapshot_parent_bad,
+ "bad parent node (%u <= %llu)",
+ id, k.k->p.offset);
+
+ bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err,
+ snapshot_children_not_normalized,
+ "children not normalized");
+
+ bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err,
+ snapshot_child_duplicate,
+ "duplicate child nodes");
+
+ for (i = 0; i < 2; i++) {
+ id = le32_to_cpu(s.v->children[i]);
+
+ bkey_fsck_err_on(id >= k.k->p.offset, c, err,
+ snapshot_child_bad,
+ "bad child node (%u >= %llu)",
+ id, k.k->p.offset);
+ }
+
+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) {
+ bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) ||
+ le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err,
+ snapshot_skiplist_not_normalized,
+ "skiplist not normalized");
+
+ for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) {
+ id = le32_to_cpu(s.v->skip[i]);
+
+ bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err,
+ snapshot_skiplist_bad,
+ "bad skiplist node %u", id);
+ }
+ }
+fsck_err:
+ return ret;
+}
+
+static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
+{
+ struct snapshot_t *t = snapshot_t_mut(c, id);
+ u32 parent = id;
+
+ while ((parent = bch2_snapshot_parent_early(c, parent)) &&
+ parent - id - 1 < IS_ANCESTOR_BITMAP)
+ __set_bit(parent - id - 1, t->is_ancestor);
+}
+
+static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
+{
+ mutex_lock(&c->snapshot_table_lock);
+ __set_is_ancestor_bitmap(c, id);
+ mutex_unlock(&c->snapshot_table_lock);
+}
+
+static int __bch2_mark_snapshot(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_t *t;
+ u32 id = new.k->p.offset;
+ int ret = 0;
+
+ mutex_lock(&c->snapshot_table_lock);
+
+ t = snapshot_t_mut(c, id);
+ if (!t) {
+ ret = -BCH_ERR_ENOMEM_mark_snapshot;
+ goto err;
+ }
+
+ if (new.k->type == KEY_TYPE_snapshot) {
+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new);
+
+ t->parent = le32_to_cpu(s.v->parent);
+ t->children[0] = le32_to_cpu(s.v->children[0]);
+ t->children[1] = le32_to_cpu(s.v->children[1]);
+ t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0;
+ t->tree = le32_to_cpu(s.v->tree);
+
+ if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) {
+ t->depth = le32_to_cpu(s.v->depth);
+ t->skip[0] = le32_to_cpu(s.v->skip[0]);
+ t->skip[1] = le32_to_cpu(s.v->skip[1]);
+ t->skip[2] = le32_to_cpu(s.v->skip[2]);
+ } else {
+ t->depth = 0;
+ t->skip[0] = 0;
+ t->skip[1] = 0;
+ t->skip[2] = 0;
+ }
+
+ __set_is_ancestor_bitmap(c, id);
+
+ if (BCH_SNAPSHOT_DELETED(s.v)) {
+ set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots)
+ bch2_delete_dead_snapshots_async(c);
+ }
+ } else {
+ memset(t, 0, sizeof(*t));
+ }
+err:
+ mutex_unlock(&c->snapshot_table_lock);
+ return ret;
+}
+
+int bch2_mark_snapshot(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
+}
+
+int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
+ struct bch_snapshot *s)
+{
+ return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id),
+ BTREE_ITER_WITH_UPDATES, snapshot, s);
+}
+
+static int bch2_snapshot_live(struct btree_trans *trans, u32 id)
+{
+ struct bch_snapshot v;
+ int ret;
+
+ if (!id)
+ return 0;
+
+ ret = bch2_snapshot_lookup(trans, id, &v);
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(trans->c, "snapshot node %u not found", id);
+ if (ret)
+ return ret;
+
+ return !BCH_SNAPSHOT_DELETED(&v);
+}
+
+/*
+ * If @k is a snapshot with just one live child, it's part of a linear chain,
+ * which we consider to be an equivalence class: and then after snapshot
+ * deletion cleanup, there should only be a single key at a given position in
+ * this equivalence class.
+ *
+ * This sets the equivalence class of @k to be the child's equivalence class, if
+ * it's part of such a linear chain: this correctly sets equivalence classes on
+ * startup if we run leaf to root (i.e. in natural key order).
+ */
+static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ unsigned i, nr_live = 0, live_idx = 0;
+ struct bkey_s_c_snapshot snap;
+ u32 id = k.k->p.offset, child[2];
+
+ if (k.k->type != KEY_TYPE_snapshot)
+ return 0;
+
+ snap = bkey_s_c_to_snapshot(k);
+
+ child[0] = le32_to_cpu(snap.v->children[0]);
+ child[1] = le32_to_cpu(snap.v->children[1]);
+
+ for (i = 0; i < 2; i++) {
+ int ret = bch2_snapshot_live(trans, child[i]);
+
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ live_idx = i;
+ nr_live += ret;
+ }
+
+ mutex_lock(&c->snapshot_table_lock);
+
+ snapshot_t_mut(c, id)->equiv = nr_live == 1
+ ? snapshot_t_mut(c, child[live_idx])->equiv
+ : id;
+
+ mutex_unlock(&c->snapshot_table_lock);
+
+ return 0;
+}
+
+/* fsck: */
+
+static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child)
+{
+ return snapshot_t(c, id)->children[child];
+}
+
+static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id)
+{
+ return bch2_snapshot_child(c, id, 0);
+}
+
+static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id)
+{
+ return bch2_snapshot_child(c, id, 1);
+}
+
+static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id)
+{
+ u32 n, parent;
+
+ n = bch2_snapshot_left_child(c, id);
+ if (n)
+ return n;
+
+ while ((parent = bch2_snapshot_parent(c, id))) {
+ n = bch2_snapshot_right_child(c, parent);
+ if (n && n != id)
+ return n;
+ id = parent;
+ }
+
+ return 0;
+}
+
+static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
+{
+ u32 id = snapshot_root;
+ u32 subvol = 0, s;
+
+ while (id) {
+ s = snapshot_t(c, id)->subvol;
+
+ if (s && (!subvol || s < subvol))
+ subvol = s;
+
+ id = bch2_snapshot_tree_next(c, id);
+ }
+
+ return subvol;
+}
+
+static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans,
+ u32 snapshot_root, u32 *subvol_id)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ bool found = false;
+ int ret;
+
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN,
+ 0, k, ret) {
+ if (k.k->type != KEY_TYPE_subvolume)
+ continue;
+
+ struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+ if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root))
+ continue;
+ if (!BCH_SUBVOLUME_SNAP(s.v)) {
+ *subvol_id = s.k->p.offset;
+ found = true;
+ break;
+ }
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (!ret && !found) {
+ struct bkey_i_subvolume *u;
+
+ *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root);
+
+ u = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, *subvol_id),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ return ret;
+
+ SET_BCH_SUBVOLUME_SNAP(&u->v, false);
+ }
+
+ return ret;
+}
+
+static int check_snapshot_tree(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c_snapshot_tree st;
+ struct bch_snapshot s;
+ struct bch_subvolume subvol;
+ struct printbuf buf = PRINTBUF;
+ u32 root_id;
+ int ret;
+
+ if (k.k->type != KEY_TYPE_snapshot_tree)
+ return 0;
+
+ st = bkey_s_c_to_snapshot_tree(k);
+ root_id = le32_to_cpu(st.v->root_snapshot);
+
+ ret = bch2_snapshot_lookup(trans, root_id, &s);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ if (fsck_err_on(ret ||
+ root_id != bch2_snapshot_root(c, root_id) ||
+ st.k->p.offset != le32_to_cpu(s.tree),
+ c, snapshot_tree_to_missing_snapshot,
+ "snapshot tree points to missing/incorrect snapshot:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at(trans, iter, 0);
+ goto err;
+ }
+
+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol),
+ false, 0, &subvol);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ goto err;
+
+ if (fsck_err_on(ret,
+ c, snapshot_tree_to_missing_subvol,
+ "snapshot tree points to missing subvolume:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
+ fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
+ le32_to_cpu(subvol.snapshot),
+ root_id),
+ c, snapshot_tree_to_wrong_subvol,
+ "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
+ fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
+ c, snapshot_tree_to_snapshot_subvol,
+ "snapshot tree points to snapshot subvolume:\n %s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
+ struct bkey_i_snapshot_tree *u;
+ u32 subvol_id;
+
+ ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+ if (ret)
+ goto err;
+
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.master_subvol = cpu_to_le32(subvol_id);
+ st = snapshot_tree_i_to_s_c(u);
+ }
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+/*
+ * For each snapshot_tree, make sure it points to the root of a snapshot tree
+ * and that snapshot entry points back to it, or delete it.
+ *
+ * And, make sure it points to a subvolume within that snapshot tree, or correct
+ * it to point to the oldest subvolume within that snapshot tree.
+ */
+int bch2_check_snapshot_trees(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_snapshot_trees, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_snapshot_tree(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/*
+ * Look up snapshot tree for @tree_id and find root,
+ * make sure @snap_id is a descendent:
+ */
+static int snapshot_tree_ptr_good(struct btree_trans *trans,
+ u32 snap_id, u32 tree_id)
+{
+ struct bch_snapshot_tree s_t;
+ int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
+
+ if (bch2_err_matches(ret, ENOENT))
+ return 0;
+ if (ret)
+ return ret;
+
+ return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot));
+}
+
+u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id)
+{
+ const struct snapshot_t *s;
+
+ if (!id)
+ return 0;
+
+ rcu_read_lock();
+ s = snapshot_t(c, id);
+ if (s->parent)
+ id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth));
+ rcu_read_unlock();
+
+ return id;
+}
+
+static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s)
+{
+ unsigned i;
+
+ for (i = 0; i < 3; i++)
+ if (!s.parent) {
+ if (s.skip[i])
+ return false;
+ } else {
+ if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i])))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * snapshot_tree pointer was incorrect: look up root snapshot node, make sure
+ * its snapshot_tree pointer is correct (allocate new one if necessary), then
+ * update this node's pointer to root node's pointer:
+ */
+static int snapshot_tree_ptr_repair(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct bch_snapshot *s)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter root_iter;
+ struct bch_snapshot_tree s_t;
+ struct bkey_s_c_snapshot root;
+ struct bkey_i_snapshot *u;
+ u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id;
+ int ret;
+
+ root = bch2_bkey_get_iter_typed(trans, &root_iter,
+ BTREE_ID_snapshots, POS(0, root_id),
+ BTREE_ITER_WITH_UPDATES, snapshot);
+ ret = bkey_err(root);
+ if (ret)
+ goto err;
+
+ tree_id = le32_to_cpu(root.v->tree);
+
+ ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
+ if (ret && !bch2_err_matches(ret, ENOENT))
+ return ret;
+
+ if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
+ u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u) ?:
+ bch2_snapshot_tree_create(trans, root_id,
+ bch2_snapshot_tree_oldest_subvol(c, root_id),
+ &tree_id);
+ if (ret)
+ goto err;
+
+ u->v.tree = cpu_to_le32(tree_id);
+ if (k.k->p.offset == root_id)
+ *s = u->v;
+ }
+
+ if (k.k->p.offset != root_id) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.tree = cpu_to_le32(tree_id);
+ *s = u->v;
+ }
+err:
+ bch2_trans_iter_exit(trans, &root_iter);
+ return ret;
+}
+
+static int check_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_snapshot s;
+ struct bch_subvolume subvol;
+ struct bch_snapshot v;
+ struct bkey_i_snapshot *u;
+ u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
+ u32 real_depth;
+ struct printbuf buf = PRINTBUF;
+ bool should_have_subvol;
+ u32 i, id;
+ int ret = 0;
+
+ if (k.k->type != KEY_TYPE_snapshot)
+ return 0;
+
+ memset(&s, 0, sizeof(s));
+ memcpy(&s, k.v, bkey_val_bytes(k.k));
+
+ id = le32_to_cpu(s.parent);
+ if (id) {
+ ret = bch2_snapshot_lookup(trans, id, &v);
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(c, "snapshot with nonexistent parent:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ if (ret)
+ goto err;
+
+ if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
+ le32_to_cpu(v.children[1]) != k.k->p.offset) {
+ bch_err(c, "snapshot parent %u missing pointer to child %llu",
+ id, k.k->p.offset);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ for (i = 0; i < 2 && s.children[i]; i++) {
+ id = le32_to_cpu(s.children[i]);
+
+ ret = bch2_snapshot_lookup(trans, id, &v);
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(c, "snapshot node %llu has nonexistent child %u",
+ k.k->p.offset, id);
+ if (ret)
+ goto err;
+
+ if (le32_to_cpu(v.parent) != k.k->p.offset) {
+ bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
+ id, le32_to_cpu(v.parent), k.k->p.offset);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+ !BCH_SNAPSHOT_DELETED(&s);
+
+ if (should_have_subvol) {
+ id = le32_to_cpu(s.subvol);
+ ret = bch2_subvolume_get(trans, id, 0, false, &subvol);
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ if (ret)
+ goto err;
+
+ if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
+ bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
+ k.k->p.offset);
+ ret = -EINVAL;
+ goto err;
+ }
+ } else {
+ if (fsck_err_on(s.subvol,
+ c, snapshot_should_not_have_subvol,
+ "snapshot should not point to subvol:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.subvol = 0;
+ s = u->v;
+ }
+ }
+
+ ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree));
+ if (ret < 0)
+ goto err;
+
+ if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree,
+ "snapshot points to missing/incorrect tree:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
+ if (ret)
+ goto err;
+ }
+ ret = 0;
+
+ real_depth = bch2_snapshot_depth(c, parent_id);
+
+ if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
+ c, snapshot_bad_depth,
+ "snapshot with incorrect depth field, should be %u:\n %s",
+ real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ u->v.depth = cpu_to_le32(real_depth);
+ s = u->v;
+ }
+
+ ret = snapshot_skiplist_good(trans, k.k->p.offset, s);
+ if (ret < 0)
+ goto err;
+
+ if (fsck_err_on(!ret, c, snapshot_bad_skiplist,
+ "snapshot with bad skiplist field:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(u);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
+ u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id));
+
+ bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
+ s = u->v;
+ }
+ ret = 0;
+err:
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_check_snapshots(struct bch_fs *c)
+{
+ /*
+ * We iterate backwards as checking/fixing the depth field requires that
+ * the parent's depth already be correct:
+ */
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_reverse_commit(trans, iter,
+ BTREE_ID_snapshots, POS_MAX,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_snapshot(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/*
+ * Mark a snapshot as deleted, for future cleanup:
+ */
+int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id)
+{
+ struct btree_iter iter;
+ struct bkey_i_snapshot *s;
+ int ret = 0;
+
+ s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshots, POS(0, id),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
+ if (unlikely(ret)) {
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
+ trans->c, "missing snapshot %u", id);
+ return ret;
+ }
+
+ /* already deleted? */
+ if (BCH_SNAPSHOT_DELETED(&s->v))
+ goto err;
+
+ SET_BCH_SNAPSHOT_DELETED(&s->v, true);
+ SET_BCH_SNAPSHOT_SUBVOL(&s->v, false);
+ s->v.subvol = 0;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
+{
+ if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1]))
+ swap(s->children[0], s->children[1]);
+}
+
+static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
+ struct btree_iter c_iter = (struct btree_iter) { NULL };
+ struct btree_iter tree_iter = (struct btree_iter) { NULL };
+ struct bkey_s_c_snapshot s;
+ u32 parent_id, child_id;
+ unsigned i;
+ int ret = 0;
+
+ s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id),
+ BTREE_ITER_INTENT, snapshot);
+ ret = bkey_err(s);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "missing snapshot %u", id);
+
+ if (ret)
+ goto err;
+
+ BUG_ON(s.v->children[1]);
+
+ parent_id = le32_to_cpu(s.v->parent);
+ child_id = le32_to_cpu(s.v->children[0]);
+
+ if (parent_id) {
+ struct bkey_i_snapshot *parent;
+
+ parent = bch2_bkey_get_mut_typed(trans, &p_iter,
+ BTREE_ID_snapshots, POS(0, parent_id),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(parent);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "missing snapshot %u", parent_id);
+ if (unlikely(ret))
+ goto err;
+
+ /* find entry in parent->children for node being deleted */
+ for (i = 0; i < 2; i++)
+ if (le32_to_cpu(parent->v.children[i]) == id)
+ break;
+
+ if (bch2_fs_inconsistent_on(i == 2, c,
+ "snapshot %u missing child pointer to %u",
+ parent_id, id))
+ goto err;
+
+ parent->v.children[i] = cpu_to_le32(child_id);
+
+ normalize_snapshot_child_pointers(&parent->v);
+ }
+
+ if (child_id) {
+ struct bkey_i_snapshot *child;
+
+ child = bch2_bkey_get_mut_typed(trans, &c_iter,
+ BTREE_ID_snapshots, POS(0, child_id),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(child);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "missing snapshot %u", child_id);
+ if (unlikely(ret))
+ goto err;
+
+ child->v.parent = cpu_to_le32(parent_id);
+
+ if (!child->v.parent) {
+ child->v.skip[0] = 0;
+ child->v.skip[1] = 0;
+ child->v.skip[2] = 0;
+ }
+ }
+
+ if (!parent_id) {
+ /*
+ * We're deleting the root of a snapshot tree: update the
+ * snapshot_tree entry to point to the new root, or delete it if
+ * this is the last snapshot ID in this tree:
+ */
+ struct bkey_i_snapshot_tree *s_t;
+
+ BUG_ON(s.v->children[1]);
+
+ s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
+ BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)),
+ 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(s_t);
+ if (ret)
+ goto err;
+
+ if (s.v->children[0]) {
+ s_t->v.root_snapshot = s.v->children[0];
+ } else {
+ s_t->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&s_t->k, 0);
+ }
+ }
+
+ ret = bch2_btree_delete_at(trans, &iter, 0);
+err:
+ bch2_trans_iter_exit(trans, &tree_iter);
+ bch2_trans_iter_exit(trans, &p_iter);
+ bch2_trans_iter_exit(trans, &c_iter);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_i_snapshot *n;
+ struct bkey_s_c k;
+ unsigned i, j;
+ u32 depth = bch2_snapshot_depth(c, parent);
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
+ POS_MIN, BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < nr_snapids; i++) {
+ k = bch2_btree_iter_prev_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!k.k || !k.k->p.offset) {
+ ret = -BCH_ERR_ENOSPC_snapshot_create;
+ goto err;
+ }
+
+ n = bch2_bkey_alloc(trans, &iter, 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto err;
+
+ n->v.flags = 0;
+ n->v.parent = cpu_to_le32(parent);
+ n->v.subvol = cpu_to_le32(snapshot_subvols[i]);
+ n->v.tree = cpu_to_le32(tree);
+ n->v.depth = cpu_to_le32(depth);
+
+ for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
+ n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
+
+ bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
+ SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
+
+ ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+ bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
+ if (ret)
+ goto err;
+
+ new_snapids[i] = iter.pos.offset;
+
+ mutex_lock(&c->snapshot_table_lock);
+ snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i];
+ mutex_unlock(&c->snapshot_table_lock);
+ }
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/*
+ * Create new snapshot IDs as children of an existing snapshot ID:
+ */
+static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ struct btree_iter iter;
+ struct bkey_i_snapshot *n_parent;
+ int ret = 0;
+
+ n_parent = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshots, POS(0, parent),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(n_parent);
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(trans->c, "snapshot %u not found", parent);
+ return ret;
+ }
+
+ if (n_parent->v.children[0] || n_parent->v.children[1]) {
+ bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
+ new_snapids, snapshot_subvols, nr_snapids);
+ if (ret)
+ goto err;
+
+ n_parent->v.children[0] = cpu_to_le32(new_snapids[0]);
+ n_parent->v.children[1] = cpu_to_le32(new_snapids[1]);
+ n_parent->v.subvol = 0;
+ SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/*
+ * Create a snapshot node that is the root of a new tree:
+ */
+static int bch2_snapshot_node_create_tree(struct btree_trans *trans,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ struct bkey_i_snapshot_tree *n_tree;
+ int ret;
+
+ n_tree = __bch2_snapshot_tree_create(trans);
+ ret = PTR_ERR_OR_ZERO(n_tree) ?:
+ create_snapids(trans, 0, n_tree->k.p.offset,
+ new_snapids, snapshot_subvols, nr_snapids);
+ if (ret)
+ return ret;
+
+ n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]);
+ n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]);
+ return 0;
+}
+
+int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
+ u32 *new_snapids,
+ u32 *snapshot_subvols,
+ unsigned nr_snapids)
+{
+ BUG_ON((parent == 0) != (nr_snapids == 1));
+ BUG_ON((parent != 0) != (nr_snapids == 2));
+
+ return parent
+ ? bch2_snapshot_node_create_children(trans, parent,
+ new_snapids, snapshot_subvols, nr_snapids)
+ : bch2_snapshot_node_create_tree(trans,
+ new_snapids, snapshot_subvols, nr_snapids);
+
+}
+
+/*
+ * If we have an unlinked inode in an internal snapshot node, and the inode
+ * really has been deleted in all child snapshots, how does this get cleaned up?
+ *
+ * first there is the problem of how keys that have been overwritten in all
+ * child snapshots get deleted (unimplemented?), but inodes may perhaps be
+ * special?
+ *
+ * also: unlinked inode in internal snapshot appears to not be getting deleted
+ * correctly if inode doesn't exist in leaf snapshots
+ *
+ * solution:
+ *
+ * for a key in an interior snapshot node that needs work to be done that
+ * requires it to be mutated: iterate over all descendent leaf nodes and copy
+ * that key to snapshot leaf nodes, where we can mutate it
+ */
+
+static int snapshot_delete_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ snapshot_id_list *deleted,
+ snapshot_id_list *equiv_seen,
+ struct bpos *last_pos)
+{
+ struct bch_fs *c = trans->c;
+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+ if (!bkey_eq(k.k->p, *last_pos))
+ equiv_seen->nr = 0;
+ *last_pos = k.k->p;
+
+ if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
+ snapshot_list_has_id(equiv_seen, equiv)) {
+ return bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ } else {
+ return snapshot_list_add(c, equiv_seen, equiv);
+ }
+}
+
+static int move_key_to_correct_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+
+ /*
+ * When we have a linear chain of snapshot nodes, we consider
+ * those to form an equivalence class: we're going to collapse
+ * them all down to a single node, and keep the leaf-most node -
+ * which has the same id as the equivalence class id.
+ *
+ * If there are multiple keys in different snapshots at the same
+ * position, we're only going to keep the one in the newest
+ * snapshot - the rest have been overwritten and are redundant,
+ * and for the key we're going to keep we need to move it to the
+ * equivalance class ID if it's not there already.
+ */
+ if (equiv != k.k->p.snapshot) {
+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
+ struct btree_iter new_iter;
+ int ret;
+
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ return ret;
+
+ new->k.p.snapshot = equiv;
+
+ bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_CACHED|
+ BTREE_ITER_INTENT);
+
+ ret = bch2_btree_iter_traverse(&new_iter) ?:
+ bch2_trans_update(trans, &new_iter, new,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ bch2_trans_iter_exit(trans, &new_iter);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot snap;
+ u32 children[2];
+ int ret;
+
+ if (k.k->type != KEY_TYPE_snapshot)
+ return 0;
+
+ snap = bkey_s_c_to_snapshot(k);
+ if (BCH_SNAPSHOT_DELETED(snap.v) ||
+ BCH_SNAPSHOT_SUBVOL(snap.v))
+ return 0;
+
+ children[0] = le32_to_cpu(snap.v->children[0]);
+ children[1] = le32_to_cpu(snap.v->children[1]);
+
+ ret = bch2_snapshot_live(trans, children[0]) ?:
+ bch2_snapshot_live(trans, children[1]);
+ if (ret < 0)
+ return ret;
+ return !ret;
+}
+
+/*
+ * For a given snapshot, if it doesn't have a subvolume that points to it, and
+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it
+ * as deleted.
+ */
+static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k)
+{
+ int ret = bch2_snapshot_needs_delete(trans, k);
+
+ return ret <= 0
+ ? ret
+ : bch2_snapshot_node_set_deleted(trans, k.k->p.offset);
+}
+
+static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
+ snapshot_id_list *skip)
+{
+ rcu_read_lock();
+ while (snapshot_list_has_id(skip, id))
+ id = __bch2_snapshot_parent(c, id);
+
+ while (n--) {
+ do {
+ id = __bch2_snapshot_parent(c, id);
+ } while (snapshot_list_has_id(skip, id));
+ }
+ rcu_read_unlock();
+
+ return id;
+}
+
+static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
+ struct btree_iter *iter, struct bkey_s_c k,
+ snapshot_id_list *deleted)
+{
+ struct bch_fs *c = trans->c;
+ u32 nr_deleted_ancestors = 0;
+ struct bkey_i_snapshot *s;
+ int ret;
+
+ if (k.k->type != KEY_TYPE_snapshot)
+ return 0;
+
+ if (snapshot_list_has_id(deleted, k.k->p.offset))
+ return 0;
+
+ s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ return ret;
+
+ darray_for_each(*deleted, i)
+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i);
+
+ if (!nr_deleted_ancestors)
+ return 0;
+
+ le32_add_cpu(&s->v.depth, -nr_deleted_ancestors);
+
+ if (!s->v.depth) {
+ s->v.skip[0] = 0;
+ s->v.skip[1] = 0;
+ s->v.skip[2] = 0;
+ } else {
+ u32 depth = le32_to_cpu(s->v.depth);
+ u32 parent = bch2_snapshot_parent(c, s->k.p.offset);
+
+ for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) {
+ u32 id = le32_to_cpu(s->v.skip[j]);
+
+ if (snapshot_list_has_id(deleted, id)) {
+ id = bch2_snapshot_nth_parent_skip(c,
+ parent,
+ depth > 1
+ ? get_random_u32_below(depth - 1)
+ : 0,
+ deleted);
+ s->v.skip[j] = cpu_to_le32(id);
+ }
+ }
+
+ bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32);
+ }
+
+ return bch2_trans_update(trans, iter, &s->k_i, 0);
+}
+
+int bch2_delete_dead_snapshots(struct bch_fs *c)
+{
+ struct btree_trans *trans;
+ snapshot_id_list deleted = { 0 };
+ snapshot_id_list deleted_interior = { 0 };
+ u32 id;
+ int ret = 0;
+
+ if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
+ return 0;
+
+ if (!test_bit(BCH_FS_started, &c->flags)) {
+ ret = bch2_fs_read_write_early(c);
+ bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
+ if (ret)
+ return ret;
+ }
+
+ trans = bch2_trans_get(c);
+
+ /*
+ * For every snapshot node: If we have no live children and it's not
+ * pointed to by a subvolume, delete it:
+ */
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k,
+ NULL, NULL, 0,
+ bch2_delete_redundant_snapshot(trans, k));
+ bch_err_msg(c, ret, "deleting redundant snapshots");
+ if (ret)
+ goto err;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k,
+ bch2_snapshot_set_equiv(trans, k));
+ bch_err_msg(c, ret, "in bch2_snapshots_set_equiv");
+ if (ret)
+ goto err;
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k, ({
+ if (k.k->type != KEY_TYPE_snapshot)
+ continue;
+
+ BCH_SNAPSHOT_DELETED(bkey_s_c_to_snapshot(k).v)
+ ? snapshot_list_add(c, &deleted, k.k->p.offset)
+ : 0;
+ }));
+ bch_err_msg(c, ret, "walking snapshots");
+ if (ret)
+ goto err;
+
+ for (id = 0; id < BTREE_ID_NR; id++) {
+ struct bpos last_pos = POS_MIN;
+ snapshot_id_list equiv_seen = { 0 };
+ struct disk_reservation res = { 0 };
+
+ if (!btree_type_has_snapshots(id))
+ continue;
+
+ /*
+ * deleted inodes btree is maintained by a trigger on the inodes
+ * btree - no work for us to do here, and it's not safe to scan
+ * it because we'll see out of date keys due to the btree write
+ * buffer:
+ */
+ if (id == BTREE_ID_deleted_inodes)
+ continue;
+
+ ret = for_each_btree_key_commit(trans, iter,
+ id, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc,
+ snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
+ for_each_btree_key_commit(trans, iter,
+ id, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc,
+ move_key_to_correct_snapshot(trans, &iter, k));
+
+ bch2_disk_reservation_put(c, &res);
+ darray_exit(&equiv_seen);
+
+ bch_err_msg(c, ret, "deleting keys from dying snapshots");
+ if (ret)
+ goto err;
+ }
+
+ bch2_trans_unlock(trans);
+ down_write(&c->snapshot_create_lock);
+
+ ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k, ({
+ u32 snapshot = k.k->p.offset;
+ u32 equiv = bch2_snapshot_equiv(c, snapshot);
+
+ equiv != snapshot
+ ? snapshot_list_add(c, &deleted_interior, snapshot)
+ : 0;
+ }));
+
+ bch_err_msg(c, ret, "walking snapshots");
+ if (ret)
+ goto err_create_lock;
+
+ /*
+ * Fixing children of deleted snapshots can't be done completely
+ * atomically, if we crash between here and when we delete the interior
+ * nodes some depth fields will be off:
+ */
+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
+ BTREE_ITER_INTENT, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
+ if (ret)
+ goto err_create_lock;
+
+ darray_for_each(deleted, i) {
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, *i));
+ bch_err_msg(c, ret, "deleting snapshot %u", *i);
+ if (ret)
+ goto err_create_lock;
+ }
+
+ darray_for_each(deleted_interior, i) {
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_snapshot_node_delete(trans, *i));
+ bch_err_msg(c, ret, "deleting snapshot %u", *i);
+ if (ret)
+ goto err_create_lock;
+ }
+err_create_lock:
+ up_write(&c->snapshot_create_lock);
+err:
+ darray_exit(&deleted_interior);
+ darray_exit(&deleted);
+ bch2_trans_put(trans);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+void bch2_delete_dead_snapshots_work(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
+
+ bch2_delete_dead_snapshots(c);
+ bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+}
+
+void bch2_delete_dead_snapshots_async(struct bch_fs *c)
+{
+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) &&
+ !queue_work(c->write_ref_wq, &c->snapshot_delete_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
+}
+
+int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
+ enum btree_id id,
+ struct bpos pos)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, id, pos,
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS);
+ while (1) {
+ k = bch2_btree_iter_prev(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ break;
+
+ if (!k.k)
+ break;
+
+ if (!bkey_eq(pos, k.k->p))
+ break;
+
+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
+ ret = 1;
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
+static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id)
+{
+ const struct snapshot_t *s = snapshot_t(c, id);
+
+ return s->children[1] ?: s->children[0];
+}
+
+static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id)
+{
+ u32 child;
+
+ while ((child = bch2_snapshot_smallest_child(c, id)))
+ id = child;
+ return id;
+}
+
+static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bkey_s_c interior_k,
+ u32 leaf_id, struct bpos *new_min_pos)
+{
+ struct btree_iter iter;
+ struct bpos pos = interior_k.k->p;
+ struct bkey_s_c k;
+ struct bkey_i *new;
+ int ret;
+
+ pos.snapshot = leaf_id;
+
+ bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto out;
+
+ /* key already overwritten in this snapshot? */
+ if (k.k->p.snapshot != interior_k.k->p.snapshot)
+ goto out;
+
+ if (bpos_eq(*new_min_pos, POS_MIN)) {
+ *new_min_pos = k.k->p;
+ new_min_pos->snapshot = leaf_id;
+ }
+
+ new = bch2_bkey_make_mut_noupdate(trans, interior_k);
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ goto out;
+
+ new->k.p.snapshot = leaf_id;
+ ret = bch2_trans_update(trans, &iter, new, 0);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans,
+ enum btree_id btree,
+ struct bkey_s_c k,
+ struct bpos *new_min_pos)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf sk;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&sk);
+ bch2_bkey_buf_reassemble(&sk, c, k);
+ k = bkey_i_to_s_c(sk.k);
+
+ *new_min_pos = POS_MIN;
+
+ for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot);
+ id < k.k->p.snapshot;
+ id++) {
+ if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) ||
+ !bch2_snapshot_is_leaf(c, id))
+ continue;
+again:
+ ret = btree_trans_too_many_iters(trans) ?:
+ bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+ if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ bch2_trans_begin(trans);
+ goto again;
+ }
+
+ if (ret)
+ break;
+ }
+
+ bch2_bkey_buf_exit(&sk, c);
+
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c_snapshot snap;
+ int ret = 0;
+
+ if (k.k->type != KEY_TYPE_snapshot)
+ return 0;
+
+ snap = bkey_s_c_to_snapshot(k);
+ if (BCH_SNAPSHOT_DELETED(snap.v) ||
+ bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset ||
+ (ret = bch2_snapshot_needs_delete(trans, k)) > 0) {
+ set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags);
+ return 0;
+ }
+
+ return ret;
+}
+
+int bch2_snapshots_read(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k,
+ __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
+ bch2_snapshot_set_equiv(trans, k) ?:
+ bch2_check_snapshot_needs_deletion(trans, k)) ?:
+ for_each_btree_key(trans, iter, BTREE_ID_snapshots,
+ POS_MIN, 0, k,
+ (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+void bch2_fs_snapshots_exit(struct bch_fs *c)
+{
+ kfree(rcu_dereference_protected(c->snapshots, true));
+}
diff --git a/c_src/libbcachefs/snapshot.h b/c_src/libbcachefs/snapshot.h
new file mode 100644
index 00000000..7c66ffc0
--- /dev/null
+++ b/c_src/libbcachefs/snapshot.h
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SNAPSHOT_H
+#define _BCACHEFS_SNAPSHOT_H
+
+enum bkey_invalid_flags;
+
+void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+
+#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \
+ .key_invalid = bch2_snapshot_tree_invalid, \
+ .val_to_text = bch2_snapshot_tree_to_text, \
+ .min_val_size = 8, \
+})
+
+struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *);
+
+int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *);
+
+void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
+ .key_invalid = bch2_snapshot_invalid, \
+ .val_to_text = bch2_snapshot_to_text, \
+ .trigger = bch2_mark_snapshot, \
+ .min_val_size = 24, \
+})
+
+static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
+{
+ return &t->s[U32_MAX - id];
+}
+
+static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
+{
+ return __snapshot_t(rcu_dereference(c->snapshots), id);
+}
+
+static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = snapshot_t(c, id)->tree;
+ rcu_read_unlock();
+
+ return id;
+}
+
+static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
+{
+ return snapshot_t(c, id)->parent;
+}
+
+static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = __bch2_snapshot_parent_early(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
+static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+ u32 parent = snapshot_t(c, id)->parent;
+
+ if (parent &&
+ snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
+ panic("id %u depth=%u parent %u depth=%u\n",
+ id, snapshot_t(c, id)->depth,
+ parent, snapshot_t(c, parent)->depth);
+
+ return parent;
+#else
+ return snapshot_t(c, id)->parent;
+#endif
+}
+
+static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = __bch2_snapshot_parent(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
+static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n)
+{
+ rcu_read_lock();
+ while (n--)
+ id = __bch2_snapshot_parent(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
+u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32);
+
+static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
+{
+ u32 parent;
+
+ rcu_read_lock();
+ while ((parent = __bch2_snapshot_parent(c, id)))
+ id = parent;
+ rcu_read_unlock();
+
+ return id;
+}
+
+static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
+{
+ return snapshot_t(c, id)->equiv;
+}
+
+static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
+{
+ rcu_read_lock();
+ id = __bch2_snapshot_equiv(c, id);
+ rcu_read_unlock();
+
+ return id;
+}
+
+static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
+{
+ return id == bch2_snapshot_equiv(c, id);
+}
+
+static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
+{
+ const struct snapshot_t *s;
+ bool ret;
+
+ rcu_read_lock();
+ s = snapshot_t(c, id);
+ ret = s->children[0];
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
+{
+ return !bch2_snapshot_is_internal_node(c, id);
+}
+
+static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
+{
+ const struct snapshot_t *s;
+ u32 parent = __bch2_snapshot_parent(c, id);
+
+ if (!parent)
+ return 0;
+
+ s = snapshot_t(c, __bch2_snapshot_parent(c, id));
+ if (id == s->children[0])
+ return s->children[1];
+ if (id == s->children[1])
+ return s->children[0];
+ return 0;
+}
+
+static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
+{
+ u32 depth;
+
+ rcu_read_lock();
+ depth = parent ? snapshot_t(c, parent)->depth + 1 : 0;
+ rcu_read_unlock();
+
+ return depth;
+}
+
+bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
+
+static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ return id == ancestor
+ ? true
+ : __bch2_snapshot_is_ancestor(c, id, ancestor);
+}
+
+static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
+{
+ const struct snapshot_t *t;
+ bool ret;
+
+ rcu_read_lock();
+ t = snapshot_t(c, id);
+ ret = (t->children[0]|t->children[1]) != 0;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
+{
+ darray_for_each(*s, i)
+ if (*i == id)
+ return true;
+ return false;
+}
+
+static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+ darray_for_each(*s, i)
+ if (bch2_snapshot_is_ancestor(c, id, *i))
+ return true;
+ return false;
+}
+
+static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+ int ret;
+
+ BUG_ON(snapshot_list_has_id(s, id));
+ ret = darray_push(s, id);
+ if (ret)
+ bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+ return ret;
+}
+
+int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
+ struct bch_snapshot *s);
+int bch2_snapshot_get_subvol(struct btree_trans *, u32,
+ struct bch_subvolume *);
+
+/* only exported for tests: */
+int bch2_snapshot_node_create(struct btree_trans *, u32,
+ u32 *, u32 *, unsigned);
+
+int bch2_check_snapshot_trees(struct bch_fs *);
+int bch2_check_snapshots(struct bch_fs *);
+
+int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
+void bch2_delete_dead_snapshots_work(struct work_struct *);
+
+int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos);
+
+static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
+ enum btree_id id,
+ struct bpos pos)
+{
+ if (!btree_type_has_snapshots(id) ||
+ bch2_snapshot_is_leaf(trans->c, pos.snapshot))
+ return 0;
+
+ return __bch2_key_has_snapshot_overwrites(trans, id, pos);
+}
+
+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *, enum btree_id,
+ struct bkey_s_c, struct bpos *);
+
+int bch2_snapshots_read(struct bch_fs *);
+void bch2_fs_snapshots_exit(struct bch_fs *);
+
+#endif /* _BCACHEFS_SNAPSHOT_H */
diff --git a/c_src/libbcachefs/str_hash.h b/c_src/libbcachefs/str_hash.h
new file mode 100644
index 00000000..89fdb7c2
--- /dev/null
+++ b/c_src/libbcachefs/str_hash.h
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_STR_HASH_H
+#define _BCACHEFS_STR_HASH_H
+
+#include "btree_iter.h"
+#include "btree_update.h"
+#include "checksum.h"
+#include "error.h"
+#include "inode.h"
+#include "siphash.h"
+#include "subvolume.h"
+#include "super.h"
+
+#include <linux/crc32c.h>
+#include <crypto/hash.h>
+#include <crypto/sha2.h>
+
+typedef unsigned __bitwise bch_str_hash_flags_t;
+
+enum bch_str_hash_flags {
+ __BCH_HASH_SET_MUST_CREATE,
+ __BCH_HASH_SET_MUST_REPLACE,
+};
+
+#define BCH_HASH_SET_MUST_CREATE (__force bch_str_hash_flags_t) BIT(__BCH_HASH_SET_MUST_CREATE)
+#define BCH_HASH_SET_MUST_REPLACE (__force bch_str_hash_flags_t) BIT(__BCH_HASH_SET_MUST_REPLACE)
+
+static inline enum bch_str_hash_type
+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
+{
+ switch (opt) {
+ case BCH_STR_HASH_OPT_crc32c:
+ return BCH_STR_HASH_crc32c;
+ case BCH_STR_HASH_OPT_crc64:
+ return BCH_STR_HASH_crc64;
+ case BCH_STR_HASH_OPT_siphash:
+ return c->sb.features & (1ULL << BCH_FEATURE_new_siphash)
+ ? BCH_STR_HASH_siphash
+ : BCH_STR_HASH_siphash_old;
+ default:
+ BUG();
+ }
+}
+
+struct bch_hash_info {
+ u8 type;
+ /*
+ * For crc32 or crc64 string hashes the first key value of
+ * the siphash_key (k0) is used as the key.
+ */
+ SIPHASH_KEY siphash_key;
+};
+
+static inline struct bch_hash_info
+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
+{
+ /* XXX ick */
+ struct bch_hash_info info = {
+ .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
+ ~(~0U << INODE_STR_HASH_BITS),
+ .siphash_key = { .k0 = bi->bi_hash_seed }
+ };
+
+ if (unlikely(info.type == BCH_STR_HASH_siphash_old)) {
+ SHASH_DESC_ON_STACK(desc, c->sha256);
+ u8 digest[SHA256_DIGEST_SIZE];
+
+ desc->tfm = c->sha256;
+
+ crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
+ sizeof(bi->bi_hash_seed), digest);
+ memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
+ }
+
+ return info;
+}
+
+struct bch_str_hash_ctx {
+ union {
+ u32 crc32c;
+ u64 crc64;
+ SIPHASH_CTX siphash;
+ };
+};
+
+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx,
+ const struct bch_hash_info *info)
+{
+ switch (info->type) {
+ case BCH_STR_HASH_crc32c:
+ ctx->crc32c = crc32c(~0, &info->siphash_key.k0,
+ sizeof(info->siphash_key.k0));
+ break;
+ case BCH_STR_HASH_crc64:
+ ctx->crc64 = crc64_be(~0, &info->siphash_key.k0,
+ sizeof(info->siphash_key.k0));
+ break;
+ case BCH_STR_HASH_siphash_old:
+ case BCH_STR_HASH_siphash:
+ SipHash24_Init(&ctx->siphash, &info->siphash_key);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
+ const struct bch_hash_info *info,
+ const void *data, size_t len)
+{
+ switch (info->type) {
+ case BCH_STR_HASH_crc32c:
+ ctx->crc32c = crc32c(ctx->crc32c, data, len);
+ break;
+ case BCH_STR_HASH_crc64:
+ ctx->crc64 = crc64_be(ctx->crc64, data, len);
+ break;
+ case BCH_STR_HASH_siphash_old:
+ case BCH_STR_HASH_siphash:
+ SipHash24_Update(&ctx->siphash, data, len);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
+ const struct bch_hash_info *info)
+{
+ switch (info->type) {
+ case BCH_STR_HASH_crc32c:
+ return ctx->crc32c;
+ case BCH_STR_HASH_crc64:
+ return ctx->crc64 >> 1;
+ case BCH_STR_HASH_siphash_old:
+ case BCH_STR_HASH_siphash:
+ return SipHash24_End(&ctx->siphash) >> 1;
+ default:
+ BUG();
+ }
+}
+
+struct bch_hash_desc {
+ enum btree_id btree_id;
+ u8 key_type;
+
+ u64 (*hash_key)(const struct bch_hash_info *, const void *);
+ u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
+ bool (*cmp_key)(struct bkey_s_c, const void *);
+ bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
+ bool (*is_visible)(subvol_inum inum, struct bkey_s_c);
+};
+
+static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, struct bkey_s_c k)
+{
+ return k.k->type == desc.key_type &&
+ (!desc.is_visible ||
+ !inum.inum ||
+ desc.is_visible(inum, k));
+}
+
+static __always_inline int
+bch2_hash_lookup(struct btree_trans *trans,
+ struct btree_iter *iter,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ subvol_inum inum, const void *key,
+ unsigned flags)
+{
+ struct bkey_s_c k;
+ u32 snapshot;
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
+ SPOS(inum.inum, desc.hash_key(info, key), snapshot),
+ POS(inum.inum, U64_MAX),
+ BTREE_ITER_SLOTS|flags, k, ret) {
+ if (is_visible_key(desc, inum, k)) {
+ if (!desc.cmp_key(k, key))
+ return 0;
+ } else if (k.k->type == KEY_TYPE_hash_whiteout) {
+ ;
+ } else {
+ /* hole, not found */
+ break;
+ }
+ }
+ bch2_trans_iter_exit(trans, iter);
+
+ return ret ?: -BCH_ERR_ENOENT_str_hash_lookup;
+}
+
+static __always_inline int
+bch2_hash_hole(struct btree_trans *trans,
+ struct btree_iter *iter,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ subvol_inum inum, const void *key)
+{
+ struct bkey_s_c k;
+ u32 snapshot;
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
+ SPOS(inum.inum, desc.hash_key(info, key), snapshot),
+ POS(inum.inum, U64_MAX),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
+ if (!is_visible_key(desc, inum, k))
+ return 0;
+ bch2_trans_iter_exit(trans, iter);
+
+ return ret ?: -BCH_ERR_ENOSPC_str_hash_create;
+}
+
+static __always_inline
+int bch2_hash_needs_whiteout(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct btree_iter *start)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_trans_copy_iter(&iter, start);
+
+ bch2_btree_iter_advance(&iter);
+
+ for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, k, ret) {
+ if (k.k->type != desc.key_type &&
+ k.k->type != KEY_TYPE_hash_whiteout)
+ break;
+
+ if (k.k->type == desc.key_type &&
+ desc.hash_bkey(info, k) <= start->pos.offset) {
+ ret = 1;
+ break;
+ }
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static __always_inline
+int bch2_hash_set_snapshot(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ subvol_inum inum, u32 snapshot,
+ struct bkey_i *insert,
+ bch_str_hash_flags_t str_hash_flags,
+ int update_flags)
+{
+ struct btree_iter iter, slot = { NULL };
+ struct bkey_s_c k;
+ bool found = false;
+ int ret;
+
+ for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
+ SPOS(insert->k.p.inode,
+ desc.hash_bkey(info, bkey_i_to_s_c(insert)),
+ snapshot),
+ POS(insert->k.p.inode, U64_MAX),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
+ if (is_visible_key(desc, inum, k)) {
+ if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
+ goto found;
+
+ /* hash collision: */
+ continue;
+ }
+
+ if (!slot.path &&
+ !(str_hash_flags & BCH_HASH_SET_MUST_REPLACE))
+ bch2_trans_copy_iter(&slot, &iter);
+
+ if (k.k->type != KEY_TYPE_hash_whiteout)
+ goto not_found;
+ }
+
+ if (!ret)
+ ret = -BCH_ERR_ENOSPC_str_hash_create;
+out:
+ bch2_trans_iter_exit(trans, &slot);
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+found:
+ found = true;
+not_found:
+
+ if (!found && (str_hash_flags & BCH_HASH_SET_MUST_REPLACE)) {
+ ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
+ } else if (found && (str_hash_flags & BCH_HASH_SET_MUST_CREATE)) {
+ ret = -EEXIST;
+ } else {
+ if (!found && slot.path)
+ swap(iter, slot);
+
+ insert->k.p = iter.pos;
+ ret = bch2_trans_update(trans, &iter, insert, update_flags);
+ }
+
+ goto out;
+}
+
+static __always_inline
+int bch2_hash_set(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ subvol_inum inum,
+ struct bkey_i *insert,
+ bch_str_hash_flags_t str_hash_flags)
+{
+ u32 snapshot;
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ return ret;
+
+ insert->k.p.inode = inum.inum;
+
+ return bch2_hash_set_snapshot(trans, desc, info, inum,
+ snapshot, insert, str_hash_flags, 0);
+}
+
+static __always_inline
+int bch2_hash_delete_at(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ struct btree_iter *iter,
+ unsigned update_flags)
+{
+ struct bkey_i *delete;
+ int ret;
+
+ delete = bch2_trans_kmalloc(trans, sizeof(*delete));
+ ret = PTR_ERR_OR_ZERO(delete);
+ if (ret)
+ return ret;
+
+ ret = bch2_hash_needs_whiteout(trans, desc, info, iter);
+ if (ret < 0)
+ return ret;
+
+ bkey_init(&delete->k);
+ delete->k.p = iter->pos;
+ delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted;
+
+ return bch2_trans_update(trans, iter, delete, update_flags);
+}
+
+static __always_inline
+int bch2_hash_delete(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ subvol_inum inum, const void *key)
+{
+ struct btree_iter iter;
+ int ret;
+
+ ret = bch2_hash_lookup(trans, &iter, desc, info, inum, key,
+ BTREE_ITER_INTENT);
+ if (ret)
+ return ret;
+
+ ret = bch2_hash_delete_at(trans, desc, info, &iter, 0);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+#endif /* _BCACHEFS_STR_HASH_H */
diff --git a/c_src/libbcachefs/subvolume.c b/c_src/libbcachefs/subvolume.c
new file mode 100644
index 00000000..7c67c28d
--- /dev/null
+++ b/c_src/libbcachefs/subvolume.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "errcode.h"
+#include "error.h"
+#include "fs.h"
+#include "snapshot.h"
+#include "subvolume.h"
+
+#include <linux/random.h>
+
+static int bch2_subvolume_delete(struct btree_trans *, u32);
+
+static int check_subvol(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_s_c_subvolume subvol;
+ struct bch_snapshot snapshot;
+ unsigned snapid;
+ int ret = 0;
+
+ if (k.k->type != KEY_TYPE_subvolume)
+ return 0;
+
+ subvol = bkey_s_c_to_subvolume(k);
+ snapid = le32_to_cpu(subvol.v->snapshot);
+ ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
+
+ if (bch2_err_matches(ret, ENOENT))
+ bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
+ k.k->p.offset, snapid);
+ if (ret)
+ return ret;
+
+ if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
+ ret = bch2_subvolume_delete(trans, iter->pos.offset);
+ bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
+ return ret ?: -BCH_ERR_transaction_restart_nested;
+ }
+
+ if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
+ u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
+ u32 snapshot_tree;
+ struct bch_snapshot_tree st;
+
+ rcu_read_lock();
+ snapshot_tree = snapshot_t(c, snapshot_root)->tree;
+ rcu_read_unlock();
+
+ ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
+
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "%s: snapshot tree %u not found", __func__, snapshot_tree);
+
+ if (ret)
+ return ret;
+
+ if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
+ c, subvol_not_master_and_not_snapshot,
+ "subvolume %llu is not set as snapshot but is not master subvolume",
+ k.k->p.offset)) {
+ struct bkey_i_subvolume *s =
+ bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ return ret;
+
+ SET_BCH_SUBVOLUME_SNAP(&s->v, true);
+ }
+ }
+
+fsck_err:
+ return ret;
+}
+
+int bch2_check_subvols(struct bch_fs *c)
+{
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_subvol(trans, &iter, k)));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+/* Subvolumes: */
+
+int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags, struct printbuf *err)
+{
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
+ bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err,
+ subvol_pos_bad,
+ "invalid pos");
+fsck_err:
+ return ret;
+}
+
+void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+
+ prt_printf(out, "root %llu snapshot id %u",
+ le64_to_cpu(s.v->inode),
+ le32_to_cpu(s.v->snapshot));
+
+ if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
+ prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
+}
+
+static __always_inline int
+bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
+ bool inconsistent_if_not_found,
+ int iter_flags,
+ struct bch_subvolume *s)
+{
+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
+ iter_flags, subvolume, s);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
+ inconsistent_if_not_found,
+ trans->c, "missing subvolume %u", subvol);
+ return ret;
+}
+
+int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
+ bool inconsistent_if_not_found,
+ int iter_flags,
+ struct bch_subvolume *s)
+{
+ return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
+}
+
+int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
+{
+ struct bch_subvolume s;
+ int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s);
+ if (ret)
+ return ret;
+
+ if (BCH_SUBVOLUME_RO(&s))
+ return -EROFS;
+ return 0;
+}
+
+int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
+{
+ return bch2_trans_do(c, NULL, NULL, 0,
+ bch2_subvol_is_ro_trans(trans, subvol));
+}
+
+int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
+ struct bch_subvolume *subvol)
+{
+ struct bch_snapshot snap;
+
+ return bch2_snapshot_lookup(trans, snapshot, &snap) ?:
+ bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
+}
+
+int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
+ u32 *snapid)
+{
+ struct btree_iter iter;
+ struct bkey_s_c_subvolume subvol;
+ int ret;
+
+ subvol = bch2_bkey_get_iter_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, subvolid),
+ BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES,
+ subvolume);
+ ret = bkey_err(subvol);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+ "missing subvolume %u", subvolid);
+
+ if (likely(!ret))
+ *snapid = le32_to_cpu(subvol.v->snapshot);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_subvolume_reparent(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ u32 old_parent, u32 new_parent)
+{
+ struct bkey_i_subvolume *s;
+ int ret;
+
+ if (k.k->type != KEY_TYPE_subvolume)
+ return 0;
+
+ if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
+ le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
+ return 0;
+
+ s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ return ret;
+
+ s->v.parent = cpu_to_le32(new_parent);
+ return 0;
+}
+
+/*
+ * Separate from the snapshot tree in the snapshots btree, we record the tree
+ * structure of how snapshot subvolumes were created - the parent subvolume of
+ * each snapshot subvolume.
+ *
+ * When a subvolume is deleted, we scan for child subvolumes and reparant them,
+ * to avoid dangling references:
+ */
+static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
+{
+ struct bch_subvolume s;
+
+ return lockrestart_do(trans,
+ bch2_subvolume_get(trans, subvolid_to_delete, true,
+ BTREE_ITER_CACHED, &s)) ?:
+ for_each_btree_key_commit(trans, iter,
+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_subvolume_reparent(trans, &iter, k,
+ subvolid_to_delete, le32_to_cpu(s.parent)));
+}
+
+/*
+ * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
+ * deletion/cleanup:
+ */
+static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
+{
+ struct btree_iter iter;
+ struct bkey_s_c_subvolume subvol;
+ u32 snapid;
+ int ret = 0;
+
+ subvol = bch2_bkey_get_iter_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, subvolid),
+ BTREE_ITER_CACHED|BTREE_ITER_INTENT,
+ subvolume);
+ ret = bkey_err(subvol);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+ "missing subvolume %u", subvolid);
+ if (ret)
+ return ret;
+
+ snapid = le32_to_cpu(subvol.v->snapshot);
+
+ ret = bch2_btree_delete_at(trans, &iter, 0) ?:
+ bch2_snapshot_node_set_deleted(trans, snapid);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
+{
+ return bch2_subvolumes_reparent(trans, subvolid) ?:
+ commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_subvolume_delete(trans, subvolid));
+}
+
+static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
+{
+ struct bch_fs *c = container_of(work, struct bch_fs,
+ snapshot_wait_for_pagecache_and_delete_work);
+ snapshot_id_list s;
+ u32 *id;
+ int ret = 0;
+
+ while (!ret) {
+ mutex_lock(&c->snapshots_unlinked_lock);
+ s = c->snapshots_unlinked;
+ darray_init(&c->snapshots_unlinked);
+ mutex_unlock(&c->snapshots_unlinked_lock);
+
+ if (!s.nr)
+ break;
+
+ bch2_evict_subvolume_inodes(c, &s);
+
+ for (id = s.data; id < s.data + s.nr; id++) {
+ ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
+ bch_err_msg(c, ret, "deleting subvolume %u", *id);
+ if (ret)
+ break;
+ }
+
+ darray_exit(&s);
+ }
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
+}
+
+struct subvolume_unlink_hook {
+ struct btree_trans_commit_hook h;
+ u32 subvol;
+};
+
+static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
+ struct btree_trans_commit_hook *_h)
+{
+ struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
+ struct bch_fs *c = trans->c;
+ int ret = 0;
+
+ mutex_lock(&c->snapshots_unlinked_lock);
+ if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
+ ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
+ mutex_unlock(&c->snapshots_unlinked_lock);
+
+ if (ret)
+ return ret;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
+ return -EROFS;
+
+ if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
+ bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
+ return 0;
+}
+
+int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
+{
+ struct btree_iter iter;
+ struct bkey_i_subvolume *n;
+ struct subvolume_unlink_hook *h;
+ int ret = 0;
+
+ h = bch2_trans_kmalloc(trans, sizeof(*h));
+ ret = PTR_ERR_OR_ZERO(h);
+ if (ret)
+ return ret;
+
+ h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
+ h->subvol = subvolid;
+ bch2_trans_commit_hook(trans, &h->h);
+
+ n = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, subvolid),
+ BTREE_ITER_CACHED, subvolume);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (unlikely(ret)) {
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+ "missing subvolume %u", subvolid);
+ return ret;
+ }
+
+ SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
+ u32 src_subvolid,
+ u32 *new_subvolid,
+ u32 *new_snapshotid,
+ bool ro)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
+ struct bkey_i_subvolume *new_subvol = NULL;
+ struct bkey_i_subvolume *src_subvol = NULL;
+ u32 parent = 0, new_nodes[2], snapshot_subvols[2];
+ int ret = 0;
+
+ ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
+ BTREE_ID_subvolumes, POS(0, U32_MAX));
+ if (ret == -BCH_ERR_ENOSPC_btree_slot)
+ ret = -BCH_ERR_ENOSPC_subvolume_create;
+ if (ret)
+ return ret;
+
+ snapshot_subvols[0] = dst_iter.pos.offset;
+ snapshot_subvols[1] = src_subvolid;
+
+ if (src_subvolid) {
+ /* Creating a snapshot: */
+
+ src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
+ BTREE_ID_subvolumes, POS(0, src_subvolid),
+ BTREE_ITER_CACHED, subvolume);
+ ret = PTR_ERR_OR_ZERO(src_subvol);
+ if (unlikely(ret)) {
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
+ "subvolume %u not found", src_subvolid);
+ goto err;
+ }
+
+ parent = le32_to_cpu(src_subvol->v.snapshot);
+ }
+
+ ret = bch2_snapshot_node_create(trans, parent, new_nodes,
+ snapshot_subvols,
+ src_subvolid ? 2 : 1);
+ if (ret)
+ goto err;
+
+ if (src_subvolid) {
+ src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
+ ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
+ if (ret)
+ goto err;
+ }
+
+ new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(new_subvol);
+ if (ret)
+ goto err;
+
+ new_subvol->v.flags = 0;
+ new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
+ new_subvol->v.inode = cpu_to_le64(inode);
+ new_subvol->v.parent = cpu_to_le32(src_subvolid);
+ new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
+ new_subvol->v.otime.hi = 0;
+
+ SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
+ SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
+
+ *new_subvolid = new_subvol->k.p.offset;
+ *new_snapshotid = new_nodes[0];
+err:
+ bch2_trans_iter_exit(trans, &src_iter);
+ bch2_trans_iter_exit(trans, &dst_iter);
+ return ret;
+}
+
+int bch2_fs_subvolumes_init(struct bch_fs *c)
+{
+ INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
+ INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
+ bch2_subvolume_wait_for_pagecache_and_delete);
+ mutex_init(&c->snapshots_unlinked_lock);
+ return 0;
+}
diff --git a/c_src/libbcachefs/subvolume.h b/c_src/libbcachefs/subvolume.h
new file mode 100644
index 00000000..a6f56f66
--- /dev/null
+++ b/c_src/libbcachefs/subvolume.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUBVOLUME_H
+#define _BCACHEFS_SUBVOLUME_H
+
+#include "darray.h"
+#include "subvolume_types.h"
+
+enum bkey_invalid_flags;
+
+int bch2_check_subvols(struct bch_fs *);
+
+int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \
+ .key_invalid = bch2_subvolume_invalid, \
+ .val_to_text = bch2_subvolume_to_text, \
+ .min_val_size = 16, \
+})
+
+int bch2_subvolume_get(struct btree_trans *, unsigned,
+ bool, int, struct bch_subvolume *);
+int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
+
+int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
+int bch2_subvol_is_ro(struct bch_fs *, u32);
+
+int bch2_delete_dead_snapshots(struct bch_fs *);
+void bch2_delete_dead_snapshots_async(struct bch_fs *);
+
+int bch2_subvolume_unlink(struct btree_trans *, u32);
+int bch2_subvolume_create(struct btree_trans *, u64, u32,
+ u32 *, u32 *, bool);
+
+int bch2_fs_subvolumes_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_SUBVOLUME_H */
diff --git a/c_src/libbcachefs/subvolume_types.h b/c_src/libbcachefs/subvolume_types.h
new file mode 100644
index 00000000..ae644adf
--- /dev/null
+++ b/c_src/libbcachefs/subvolume_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUBVOLUME_TYPES_H
+#define _BCACHEFS_SUBVOLUME_TYPES_H
+
+#include "darray.h"
+
+typedef DARRAY(u32) snapshot_id_list;
+
+#define IS_ANCESTOR_BITMAP 128
+
+struct snapshot_t {
+ u32 parent;
+ u32 skip[3];
+ u32 depth;
+ u32 children[2];
+ u32 subvol; /* Nonzero only if a subvolume points to this node: */
+ u32 tree;
+ u32 equiv;
+ unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)];
+};
+
+struct snapshot_table {
+#ifndef RUST_BINDGEN
+ DECLARE_FLEX_ARRAY(struct snapshot_t, s);
+#else
+ struct snapshot_t s[0];
+#endif
+};
+
+typedef struct {
+ u32 subvol;
+ u64 inum;
+} subvol_inum;
+
+#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */
diff --git a/c_src/libbcachefs/super-io.c b/c_src/libbcachefs/super-io.c
new file mode 100644
index 00000000..55926b81
--- /dev/null
+++ b/c_src/libbcachefs/super-io.c
@@ -0,0 +1,1391 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "checksum.h"
+#include "counters.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "error.h"
+#include "journal.h"
+#include "journal_sb.h"
+#include "journal_seq_blacklist.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "quota.h"
+#include "sb-clean.h"
+#include "sb-downgrade.h"
+#include "sb-errors.h"
+#include "sb-members.h"
+#include "super-io.h"
+#include "super.h"
+#include "trace.h"
+#include "vstructs.h"
+
+#include <linux/backing-dev.h>
+#include <linux/sort.h>
+
+static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
+};
+
+struct bch2_metadata_version {
+ u16 version;
+ const char *name;
+};
+
+static const struct bch2_metadata_version bch2_metadata_versions[] = {
+#define x(n, v) { \
+ .version = v, \
+ .name = #n, \
+},
+ BCH_METADATA_VERSIONS()
+#undef x
+};
+
+void bch2_version_to_text(struct printbuf *out, unsigned v)
+{
+ const char *str = "(unknown version)";
+
+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++)
+ if (bch2_metadata_versions[i].version == v) {
+ str = bch2_metadata_versions[i].name;
+ break;
+ }
+
+ prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str);
+}
+
+unsigned bch2_latest_compatible_version(unsigned v)
+{
+ if (!BCH_VERSION_MAJOR(v))
+ return v;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++)
+ if (bch2_metadata_versions[i].version > v &&
+ BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) ==
+ BCH_VERSION_MAJOR(v))
+ v = bch2_metadata_versions[i].version;
+
+ return v;
+}
+
+const char * const bch2_sb_fields[] = {
+#define x(name, nr) #name,
+ BCH_SB_FIELDS()
+#undef x
+ NULL
+};
+
+static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *,
+ struct printbuf *);
+
+struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb,
+ enum bch_sb_field_type type)
+{
+ /* XXX: need locking around superblock to access optional fields */
+
+ vstruct_for_each(sb, f)
+ if (le32_to_cpu(f->type) == type)
+ return f;
+ return NULL;
+}
+
+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
+ struct bch_sb_field *f,
+ unsigned u64s)
+{
+ unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+ unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s;
+
+ BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size);
+
+ if (!f && !u64s) {
+ /* nothing to do: */
+ } else if (!f) {
+ f = vstruct_last(sb->sb);
+ memset(f, 0, sizeof(u64) * u64s);
+ f->u64s = cpu_to_le32(u64s);
+ f->type = 0;
+ } else {
+ void *src, *dst;
+
+ src = vstruct_end(f);
+
+ if (u64s) {
+ f->u64s = cpu_to_le32(u64s);
+ dst = vstruct_end(f);
+ } else {
+ dst = f;
+ }
+
+ memmove(dst, src, vstruct_end(sb->sb) - src);
+
+ if (dst > src)
+ memset(src, 0, dst - src);
+ }
+
+ sb->sb->u64s = cpu_to_le32(sb_u64s);
+
+ return u64s ? f : NULL;
+}
+
+void bch2_sb_field_delete(struct bch_sb_handle *sb,
+ enum bch_sb_field_type type)
+{
+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
+
+ if (f)
+ __bch2_sb_field_resize(sb, f, 0);
+}
+
+/* Superblock realloc/free: */
+
+void bch2_free_super(struct bch_sb_handle *sb)
+{
+ kfree(sb->bio);
+ if (!IS_ERR_OR_NULL(sb->bdev))
+ blkdev_put(sb->bdev, sb->holder);
+ kfree(sb->holder);
+ kfree(sb->sb_name);
+
+ kfree(sb->sb);
+ memset(sb, 0, sizeof(*sb));
+}
+
+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
+{
+ size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s);
+ size_t new_buffer_size;
+ struct bch_sb *new_sb;
+ struct bio *bio;
+
+ if (sb->bdev)
+ new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev));
+
+ new_buffer_size = roundup_pow_of_two(new_bytes);
+
+ if (sb->sb && sb->buffer_size >= new_buffer_size)
+ return 0;
+
+ if (sb->sb && sb->have_layout) {
+ u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
+
+ if (new_bytes > max_bytes) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_bdevname(&buf, sb->bdev);
+ prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes);
+ pr_err("%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_ENOSPC_sb;
+ }
+ }
+
+ if (sb->buffer_size >= new_buffer_size && sb->sb)
+ return 0;
+
+ if (dynamic_fault("bcachefs:add:super_realloc"))
+ return -BCH_ERR_ENOMEM_sb_realloc_injected;
+
+ new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO);
+ if (!new_sb)
+ return -BCH_ERR_ENOMEM_sb_buf_realloc;
+
+ sb->sb = new_sb;
+
+ if (sb->have_bio) {
+ unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size);
+
+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
+ if (!bio)
+ return -BCH_ERR_ENOMEM_sb_bio_realloc;
+
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
+
+ kfree(sb->bio);
+ sb->bio = bio;
+ }
+
+ sb->buffer_size = new_buffer_size;
+
+ return 0;
+}
+
+struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
+ enum bch_sb_field_type type,
+ unsigned u64s)
+{
+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
+ ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+ ssize_t d = -old_u64s + u64s;
+
+ if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
+ return NULL;
+
+ if (sb->fs_sb) {
+ struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
+
+ lockdep_assert_held(&c->sb_lock);
+
+ /* XXX: we're not checking that offline device have enough space */
+
+ for_each_online_member(c, ca) {
+ struct bch_sb_handle *dev_sb = &ca->disk_sb;
+
+ if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
+ percpu_ref_put(&ca->ref);
+ return NULL;
+ }
+ }
+ }
+
+ f = bch2_sb_field_get_id(sb->sb, type);
+ f = __bch2_sb_field_resize(sb, f, u64s);
+ if (f)
+ f->type = cpu_to_le32(type);
+ return f;
+}
+
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb,
+ enum bch_sb_field_type type,
+ unsigned u64s)
+{
+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
+
+ if (!f || le32_to_cpu(f->u64s) < u64s)
+ f = bch2_sb_field_resize_id(sb, type, u64s);
+ return f;
+}
+
+/* Superblock validate: */
+
+static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
+{
+ u64 offset, prev_offset, max_sectors;
+ unsigned i;
+
+ BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
+
+ if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) &&
+ !uuid_equal(&layout->magic, &BCHFS_MAGIC)) {
+ prt_printf(out, "Not a bcachefs superblock layout");
+ return -BCH_ERR_invalid_sb_layout;
+ }
+
+ if (layout->layout_type != 0) {
+ prt_printf(out, "Invalid superblock layout type %u",
+ layout->layout_type);
+ return -BCH_ERR_invalid_sb_layout_type;
+ }
+
+ if (!layout->nr_superblocks) {
+ prt_printf(out, "Invalid superblock layout: no superblocks");
+ return -BCH_ERR_invalid_sb_layout_nr_superblocks;
+ }
+
+ if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) {
+ prt_printf(out, "Invalid superblock layout: too many superblocks");
+ return -BCH_ERR_invalid_sb_layout_nr_superblocks;
+ }
+
+ max_sectors = 1 << layout->sb_max_size_bits;
+
+ prev_offset = le64_to_cpu(layout->sb_offset[0]);
+
+ for (i = 1; i < layout->nr_superblocks; i++) {
+ offset = le64_to_cpu(layout->sb_offset[i]);
+
+ if (offset < prev_offset + max_sectors) {
+ prt_printf(out, "Invalid superblock layout: superblocks overlap\n"
+ " (sb %u ends at %llu next starts at %llu",
+ i - 1, prev_offset + max_sectors, offset);
+ return -BCH_ERR_invalid_sb_layout_superblocks_overlap;
+ }
+ prev_offset = offset;
+ }
+
+ return 0;
+}
+
+static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out)
+{
+ u16 version = le16_to_cpu(sb->version);
+ u16 version_min = le16_to_cpu(sb->version_min);
+
+ if (!bch2_version_compatible(version)) {
+ prt_str(out, "Unsupported superblock version ");
+ bch2_version_to_text(out, version);
+ prt_str(out, " (min ");
+ bch2_version_to_text(out, bcachefs_metadata_version_min);
+ prt_str(out, ", max ");
+ bch2_version_to_text(out, bcachefs_metadata_version_current);
+ prt_str(out, ")");
+ return -BCH_ERR_invalid_sb_version;
+ }
+
+ if (!bch2_version_compatible(version_min)) {
+ prt_str(out, "Unsupported superblock version_min ");
+ bch2_version_to_text(out, version_min);
+ prt_str(out, " (min ");
+ bch2_version_to_text(out, bcachefs_metadata_version_min);
+ prt_str(out, ", max ");
+ bch2_version_to_text(out, bcachefs_metadata_version_current);
+ prt_str(out, ")");
+ return -BCH_ERR_invalid_sb_version;
+ }
+
+ if (version_min > version) {
+ prt_str(out, "Bad minimum version ");
+ bch2_version_to_text(out, version_min);
+ prt_str(out, ", greater than version field ");
+ bch2_version_to_text(out, version);
+ return -BCH_ERR_invalid_sb_version;
+ }
+
+ return 0;
+}
+
+static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
+ int rw)
+{
+ struct bch_sb *sb = disk_sb->sb;
+ struct bch_sb_field_members_v1 *mi;
+ enum bch_opt_id opt_id;
+ u16 block_size;
+ int ret;
+
+ ret = bch2_sb_compatible(sb, out);
+ if (ret)
+ return ret;
+
+ if (sb->features[1] ||
+ (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) {
+ prt_printf(out, "Filesystem has incompatible features");
+ return -BCH_ERR_invalid_sb_features;
+ }
+
+ block_size = le16_to_cpu(sb->block_size);
+
+ if (block_size > PAGE_SECTORS) {
+ prt_printf(out, "Block size too big (got %u, max %u)",
+ block_size, PAGE_SECTORS);
+ return -BCH_ERR_invalid_sb_block_size;
+ }
+
+ if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) {
+ prt_printf(out, "Bad user UUID (got zeroes)");
+ return -BCH_ERR_invalid_sb_uuid;
+ }
+
+ if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) {
+ prt_printf(out, "Bad internal UUID (got zeroes)");
+ return -BCH_ERR_invalid_sb_uuid;
+ }
+
+ if (!sb->nr_devices ||
+ sb->nr_devices > BCH_SB_MEMBERS_MAX) {
+ prt_printf(out, "Bad number of member devices %u (max %u)",
+ sb->nr_devices, BCH_SB_MEMBERS_MAX);
+ return -BCH_ERR_invalid_sb_too_many_members;
+ }
+
+ if (sb->dev_idx >= sb->nr_devices) {
+ prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)",
+ sb->dev_idx, sb->nr_devices);
+ return -BCH_ERR_invalid_sb_dev_idx;
+ }
+
+ if (!sb->time_precision ||
+ le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) {
+ prt_printf(out, "Invalid time precision: %u (min 1, max %lu)",
+ le32_to_cpu(sb->time_precision), NSEC_PER_SEC);
+ return -BCH_ERR_invalid_sb_time_precision;
+ }
+
+ if (rw == READ) {
+ /*
+ * Been seeing a bug where these are getting inexplicably
+ * zeroed, so we're now validating them, but we have to be
+ * careful not to preven people's filesystems from mounting:
+ */
+ if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
+ SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000);
+ if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb))
+ SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000);
+
+ if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
+ }
+
+ for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
+ const struct bch_option *opt = bch2_opt_table + opt_id;
+
+ if (opt->get_sb != BCH2_NO_SB_OPT) {
+ u64 v = bch2_opt_from_sb(sb, opt_id);
+
+ prt_printf(out, "Invalid option ");
+ ret = bch2_opt_validate(opt, v, out);
+ if (ret)
+ return ret;
+
+ printbuf_reset(out);
+ }
+ }
+
+ /* validate layout */
+ ret = validate_sb_layout(&sb->layout, out);
+ if (ret)
+ return ret;
+
+ vstruct_for_each(sb, f) {
+ if (!f->u64s) {
+ prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)",
+ le32_to_cpu(f->type));
+ return -BCH_ERR_invalid_sb_field_size;
+ }
+
+ if (vstruct_next(f) > vstruct_last(sb)) {
+ prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)",
+ le32_to_cpu(f->type));
+ return -BCH_ERR_invalid_sb_field_size;
+ }
+ }
+
+ /* members must be validated first: */
+ mi = bch2_sb_field_get(sb, members_v1);
+ if (!mi) {
+ prt_printf(out, "Invalid superblock: member info area missing");
+ return -BCH_ERR_invalid_sb_members_missing;
+ }
+
+ ret = bch2_sb_field_validate(sb, &mi->field, out);
+ if (ret)
+ return ret;
+
+ vstruct_for_each(sb, f) {
+ if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1)
+ continue;
+
+ ret = bch2_sb_field_validate(sb, f, out);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* device open: */
+
+static unsigned long le_ulong_to_cpu(unsigned long v)
+{
+ return sizeof(unsigned long) == 8
+ ? le64_to_cpu(v)
+ : le32_to_cpu(v);
+}
+
+static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr)
+{
+ BUG_ON(nr & (BITS_PER_TYPE(long) - 1));
+
+ for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++)
+ dst[i] = le_ulong_to_cpu(src[i]);
+}
+
+static void bch2_sb_update(struct bch_fs *c)
+{
+ struct bch_sb *src = c->disk_sb.sb;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ c->sb.uuid = src->uuid;
+ c->sb.user_uuid = src->user_uuid;
+ c->sb.version = le16_to_cpu(src->version);
+ c->sb.version_min = le16_to_cpu(src->version_min);
+ c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src);
+ c->sb.nr_devices = src->nr_devices;
+ c->sb.clean = BCH_SB_CLEAN(src);
+ c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
+
+ c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision);
+ c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
+
+ /* XXX this is wrong, we need a 96 or 128 bit integer type */
+ c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo),
+ c->sb.nsec_per_time_unit);
+ c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
+
+ c->sb.features = le64_to_cpu(src->features[0]);
+ c->sb.compat = le64_to_cpu(src->compat[0]);
+
+ memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
+ if (ext)
+ le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
+ sizeof(c->sb.errors_silent) * 8);
+
+ for_each_member_device(c, ca) {
+ struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
+ ca->mi = bch2_mi_to_cpu(&m);
+ }
+}
+
+static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
+{
+ struct bch_sb_field *src_f, *dst_f;
+ struct bch_sb *dst = dst_handle->sb;
+ unsigned i;
+
+ dst->version = src->version;
+ dst->version_min = src->version_min;
+ dst->seq = src->seq;
+ dst->uuid = src->uuid;
+ dst->user_uuid = src->user_uuid;
+ memcpy(dst->label, src->label, sizeof(dst->label));
+
+ dst->block_size = src->block_size;
+ dst->nr_devices = src->nr_devices;
+
+ dst->time_base_lo = src->time_base_lo;
+ dst->time_base_hi = src->time_base_hi;
+ dst->time_precision = src->time_precision;
+ dst->write_time = src->write_time;
+
+ memcpy(dst->flags, src->flags, sizeof(dst->flags));
+ memcpy(dst->features, src->features, sizeof(dst->features));
+ memcpy(dst->compat, src->compat, sizeof(dst->compat));
+
+ for (i = 0; i < BCH_SB_FIELD_NR; i++) {
+ int d;
+
+ if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS)
+ continue;
+
+ src_f = bch2_sb_field_get_id(src, i);
+ dst_f = bch2_sb_field_get_id(dst, i);
+
+ d = (src_f ? le32_to_cpu(src_f->u64s) : 0) -
+ (dst_f ? le32_to_cpu(dst_f->u64s) : 0);
+ if (d > 0) {
+ int ret = bch2_sb_realloc(dst_handle,
+ le32_to_cpu(dst_handle->sb->u64s) + d);
+
+ if (ret)
+ return ret;
+
+ dst = dst_handle->sb;
+ dst_f = bch2_sb_field_get_id(dst, i);
+ }
+
+ dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
+ src_f ? le32_to_cpu(src_f->u64s) : 0);
+
+ if (src_f)
+ memcpy(dst_f, src_f, vstruct_bytes(src_f));
+ }
+
+ return 0;
+}
+
+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src)
+{
+ int ret;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ ret = bch2_sb_realloc(&c->disk_sb, 0) ?:
+ __copy_super(&c->disk_sb, src) ?:
+ bch2_sb_replicas_to_cpu_replicas(c) ?:
+ bch2_sb_disk_groups_to_cpu(c);
+ if (ret)
+ return ret;
+
+ bch2_sb_update(c);
+ return 0;
+}
+
+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
+{
+ return __copy_super(&ca->disk_sb, c->disk_sb.sb);
+}
+
+/* read superblock: */
+
+static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
+{
+ size_t bytes;
+ int ret;
+reread:
+ bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
+ sb->bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(sb->bio, sb->sb, sb->buffer_size);
+
+ ret = submit_bio_wait(sb->bio);
+ if (ret) {
+ prt_printf(err, "IO error: %i", ret);
+ return ret;
+ }
+
+ if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
+ !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) {
+ prt_str(err, "Not a bcachefs superblock (got magic ");
+ pr_uuid(err, sb->sb->magic.b);
+ prt_str(err, ")");
+ return -BCH_ERR_invalid_sb_magic;
+ }
+
+ ret = bch2_sb_compatible(sb->sb, err);
+ if (ret)
+ return ret;
+
+ bytes = vstruct_bytes(sb->sb);
+
+ if (bytes > 512 << sb->sb->layout.sb_max_size_bits) {
+ prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
+ bytes, 512UL << sb->sb->layout.sb_max_size_bits);
+ return -BCH_ERR_invalid_sb_too_big;
+ }
+
+ if (bytes > sb->buffer_size) {
+ ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s));
+ if (ret)
+ return ret;
+ goto reread;
+ }
+
+ enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
+ if (csum_type >= BCH_CSUM_NR) {
+ prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
+ return -BCH_ERR_invalid_sb_csum_type;
+ }
+
+ /* XXX: verify MACs */
+ struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
+ if (bch2_crc_cmp(csum, sb->sb->csum)) {
+ bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
+ return -BCH_ERR_invalid_sb_csum;
+ }
+
+ sb->seq = le64_to_cpu(sb->sb->seq);
+
+ return 0;
+}
+
+static int __bch2_read_super(const char *path, struct bch_opts *opts,
+ struct bch_sb_handle *sb, bool ignore_notbchfs_msg)
+{
+ u64 offset = opt_get(*opts, sb);
+ struct bch_sb_layout layout;
+ struct printbuf err = PRINTBUF;
+ struct printbuf err2 = PRINTBUF;
+ __le64 *i;
+ int ret;
+#ifndef __KERNEL__
+retry:
+#endif
+ memset(sb, 0, sizeof(*sb));
+ sb->mode = BLK_OPEN_READ;
+ sb->have_bio = true;
+ sb->holder = kmalloc(1, GFP_KERNEL);
+ if (!sb->holder)
+ return -ENOMEM;
+
+ sb->sb_name = kstrdup(path, GFP_KERNEL);
+ if (!sb->sb_name)
+ return -ENOMEM;
+
+#ifndef __KERNEL__
+ if (opt_get(*opts, direct_io) == false)
+ sb->mode |= BLK_OPEN_BUFFERED;
+#endif
+
+ if (!opt_get(*opts, noexcl))
+ sb->mode |= BLK_OPEN_EXCL;
+
+ if (!opt_get(*opts, nochanges))
+ sb->mode |= BLK_OPEN_WRITE;
+
+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
+ if (IS_ERR(sb->bdev) &&
+ PTR_ERR(sb->bdev) == -EACCES &&
+ opt_get(*opts, read_only)) {
+ sb->mode &= ~BLK_OPEN_WRITE;
+
+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
+ if (!IS_ERR(sb->bdev))
+ opt_set(*opts, nochanges, true);
+ }
+
+ if (IS_ERR(sb->bdev)) {
+ ret = PTR_ERR(sb->bdev);
+ goto out;
+ }
+
+ ret = bch2_sb_realloc(sb, 0);
+ if (ret) {
+ prt_printf(&err, "error allocating memory for superblock");
+ goto err;
+ }
+
+ if (bch2_fs_init_fault("read_super")) {
+ prt_printf(&err, "dynamic fault");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ ret = read_one_super(sb, offset, &err);
+ if (!ret)
+ goto got_super;
+
+ if (opt_defined(*opts, sb))
+ goto err;
+
+ prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n",
+ path, err.buf);
+ if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg)
+ printk(KERN_INFO "%s", err2.buf);
+ else
+ printk(KERN_ERR "%s", err2.buf);
+
+ printbuf_exit(&err2);
+ printbuf_reset(&err);
+
+ /*
+ * Error reading primary superblock - read location of backup
+ * superblocks:
+ */
+ bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
+ sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
+ /*
+ * use sb buffer to read layout, since sb buffer is page aligned but
+ * layout won't be:
+ */
+ bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout));
+
+ ret = submit_bio_wait(sb->bio);
+ if (ret) {
+ prt_printf(&err, "IO error: %i", ret);
+ goto err;
+ }
+
+ memcpy(&layout, sb->sb, sizeof(layout));
+ ret = validate_sb_layout(&layout, &err);
+ if (ret)
+ goto err;
+
+ for (i = layout.sb_offset;
+ i < layout.sb_offset + layout.nr_superblocks; i++) {
+ offset = le64_to_cpu(*i);
+
+ if (offset == opt_get(*opts, sb))
+ continue;
+
+ ret = read_one_super(sb, offset, &err);
+ if (!ret)
+ goto got_super;
+ }
+
+ goto err;
+
+got_super:
+ if (le16_to_cpu(sb->sb->block_size) << 9 <
+ bdev_logical_block_size(sb->bdev) &&
+ opt_get(*opts, direct_io)) {
+#ifndef __KERNEL__
+ opt_set(*opts, direct_io, false);
+ bch2_free_super(sb);
+ goto retry;
+#endif
+ prt_printf(&err, "block size (%u) smaller than device block size (%u)",
+ le16_to_cpu(sb->sb->block_size) << 9,
+ bdev_logical_block_size(sb->bdev));
+ ret = -BCH_ERR_block_size_too_small;
+ goto err;
+ }
+
+ ret = 0;
+ sb->have_layout = true;
+
+ ret = bch2_sb_validate(sb, &err, READ);
+ if (ret) {
+ printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
+ path, err.buf);
+ goto err_no_print;
+ }
+out:
+ printbuf_exit(&err);
+ return ret;
+err:
+ printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
+ path, err.buf);
+err_no_print:
+ bch2_free_super(sb);
+ goto out;
+}
+
+int bch2_read_super(const char *path, struct bch_opts *opts,
+ struct bch_sb_handle *sb)
+{
+ return __bch2_read_super(path, opts, sb, false);
+}
+
+/* provide a silenced version for mount.bcachefs */
+
+int bch2_read_super_silent(const char *path, struct bch_opts *opts,
+ struct bch_sb_handle *sb)
+{
+ return __bch2_read_super(path, opts, sb, true);
+}
+
+/* write superblock: */
+
+static void write_super_endio(struct bio *bio)
+{
+ struct bch_dev *ca = bio->bi_private;
+
+ /* XXX: return errors directly */
+
+ if (bch2_dev_io_err_on(bio->bi_status, ca,
+ bio_data_dir(bio)
+ ? BCH_MEMBER_ERROR_write
+ : BCH_MEMBER_ERROR_read,
+ "superblock %s error: %s",
+ bio_data_dir(bio) ? "write" : "read",
+ bch2_blk_status_to_str(bio->bi_status)))
+ ca->sb_write_error = 1;
+
+ closure_put(&ca->fs->sb_write);
+ percpu_ref_put(&ca->io_ref);
+}
+
+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
+{
+ struct bch_sb *sb = ca->disk_sb.sb;
+ struct bio *bio = ca->disk_sb.bio;
+
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META);
+ bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
+ bio->bi_end_io = write_super_endio;
+ bio->bi_private = ca;
+ bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE);
+
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb],
+ bio_sectors(bio));
+
+ percpu_ref_get(&ca->io_ref);
+ closure_bio_submit(bio, &c->sb_write);
+}
+
+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
+{
+ struct bch_sb *sb = ca->disk_sb.sb;
+ struct bio *bio = ca->disk_sb.bio;
+
+ sb->offset = sb->layout.sb_offset[idx];
+
+ SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false));
+ sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
+ null_nonce(), sb);
+
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
+ bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
+ bio->bi_end_io = write_super_endio;
+ bio->bi_private = ca;
+ bch2_bio_map(bio, sb,
+ roundup((size_t) vstruct_bytes(sb),
+ bdev_logical_block_size(ca->disk_sb.bdev)));
+
+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
+ bio_sectors(bio));
+
+ percpu_ref_get(&ca->io_ref);
+ closure_bio_submit(bio, &c->sb_write);
+}
+
+int bch2_write_super(struct bch_fs *c)
+{
+ struct closure *cl = &c->sb_write;
+ struct printbuf err = PRINTBUF;
+ unsigned sb = 0, nr_wrote;
+ struct bch_devs_mask sb_written;
+ bool wrote, can_mount_without_written, can_mount_with_written;
+ unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
+ int ret = 0;
+
+ trace_and_count(c, write_super, c, _RET_IP_);
+
+ if (c->opts.very_degraded)
+ degraded_flags |= BCH_FORCE_IF_LOST;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ closure_init_stack(cl);
+ memset(&sb_written, 0, sizeof(sb_written));
+
+ /* Make sure we're using the new magic numbers: */
+ c->disk_sb.sb->magic = BCHFS_MAGIC;
+ c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
+
+ le64_add_cpu(&c->disk_sb.sb->seq, 1);
+
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ for_each_online_member(c, ca)
+ __bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq;
+ c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds());
+
+ if (test_bit(BCH_FS_error, &c->flags))
+ SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
+ if (test_bit(BCH_FS_topology_error, &c->flags))
+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1);
+
+ SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
+
+ bch2_sb_counters_from_cpu(c);
+ bch2_sb_members_from_cpu(c);
+ bch2_sb_members_cpy_v2_v1(&c->disk_sb);
+ bch2_sb_errors_from_cpu(c);
+ bch2_sb_downgrade_update(c);
+
+ for_each_online_member(c, ca)
+ bch2_sb_from_fs(c, ca);
+
+ for_each_online_member(c, ca) {
+ printbuf_reset(&err);
+
+ ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE);
+ if (ret) {
+ bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf);
+ percpu_ref_put(&ca->io_ref);
+ goto out;
+ }
+ }
+
+ if (c->opts.nochanges)
+ goto out;
+
+ /*
+ * Defer writing the superblock until filesystem initialization is
+ * complete - don't write out a partly initialized superblock:
+ */
+ if (!BCH_SB_INITIALIZED(c->disk_sb.sb))
+ goto out;
+
+ if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
+ bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
+ prt_str(&buf, " > ");
+ bch2_version_to_text(&buf, bcachefs_metadata_version_current);
+ prt_str(&buf, ")");
+ bch2_fs_fatal_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_sb_not_downgraded;
+ }
+
+ for_each_online_member(c, ca) {
+ __set_bit(ca->dev_idx, sb_written.d);
+ ca->sb_write_error = 0;
+ }
+
+ for_each_online_member(c, ca)
+ read_back_super(c, ca);
+ closure_sync(cl);
+
+ for_each_online_member(c, ca) {
+ if (ca->sb_write_error)
+ continue;
+
+ if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
+ bch2_fs_fatal_error(c,
+ "Superblock write was silently dropped! (seq %llu expected %llu)",
+ le64_to_cpu(ca->sb_read_scratch->seq),
+ ca->disk_sb.seq);
+ percpu_ref_put(&ca->io_ref);
+ ret = -BCH_ERR_erofs_sb_err;
+ goto out;
+ }
+
+ if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
+ bch2_fs_fatal_error(c,
+ "Superblock modified by another process (seq %llu expected %llu)",
+ le64_to_cpu(ca->sb_read_scratch->seq),
+ ca->disk_sb.seq);
+ percpu_ref_put(&ca->io_ref);
+ ret = -BCH_ERR_erofs_sb_err;
+ goto out;
+ }
+ }
+
+ do {
+ wrote = false;
+ for_each_online_member(c, ca)
+ if (!ca->sb_write_error &&
+ sb < ca->disk_sb.sb->layout.nr_superblocks) {
+ write_one_super(c, ca, sb);
+ wrote = true;
+ }
+ closure_sync(cl);
+ sb++;
+ } while (wrote);
+
+ for_each_online_member(c, ca) {
+ if (ca->sb_write_error)
+ __clear_bit(ca->dev_idx, sb_written.d);
+ else
+ ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
+ }
+
+ nr_wrote = dev_mask_nr(&sb_written);
+
+ can_mount_with_written =
+ bch2_have_enough_devs(c, sb_written, degraded_flags, false);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++)
+ sb_written.d[i] = ~sb_written.d[i];
+
+ can_mount_without_written =
+ bch2_have_enough_devs(c, sb_written, degraded_flags, false);
+
+ /*
+ * If we would be able to mount _without_ the devices we successfully
+ * wrote superblocks to, we weren't able to write to enough devices:
+ *
+ * Exception: if we can mount without the successes because we haven't
+ * written anything (new filesystem), we continue if we'd be able to
+ * mount with the devices we did successfully write to:
+ */
+ if (bch2_fs_fatal_err_on(!nr_wrote ||
+ !can_mount_with_written ||
+ (can_mount_without_written &&
+ !can_mount_with_written), c,
+ "Unable to write superblock to sufficient devices (from %ps)",
+ (void *) _RET_IP_))
+ ret = -1;
+out:
+ /* Make new options visible after they're persistent: */
+ bch2_sb_update(c);
+ printbuf_exit(&err);
+ return ret;
+}
+
+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
+{
+ mutex_lock(&c->sb_lock);
+ if (!(c->sb.features & (1ULL << feat))) {
+ c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat);
+
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+}
+
+/* Downgrade if superblock is at a higher version than currently supported: */
+bool bch2_check_version_downgrade(struct bch_fs *c)
+{
+ bool ret = bcachefs_metadata_version_current < c->sb.version;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ /*
+ * Downgrade, if superblock is at a higher version than currently
+ * supported:
+ *
+ * c->sb will be checked before we write the superblock, so update it as
+ * well:
+ */
+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) {
+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
+ c->sb.version_upgrade_complete = bcachefs_metadata_version_current;
+ }
+ if (c->sb.version > bcachefs_metadata_version_current) {
+ c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
+ c->sb.version = bcachefs_metadata_version_current;
+ }
+ if (c->sb.version_min > bcachefs_metadata_version_current) {
+ c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
+ c->sb.version_min = bcachefs_metadata_version_current;
+ }
+ c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+ return ret;
+}
+
+void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ if (BCH_VERSION_MAJOR(new_version) >
+ BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+ bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
+
+ c->disk_sb.sb->version = cpu_to_le16(new_version);
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
+}
+
+static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ if (vstruct_bytes(f) < 88) {
+ prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88);
+ return -BCH_ERR_invalid_sb_ext;
+ }
+
+ return 0;
+}
+
+static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ struct bch_sb_field_ext *e = field_to_type(f, ext);
+
+ prt_printf(out, "Recovery passes required:");
+ prt_tab(out);
+ prt_bitflags(out, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0])));
+ prt_newline(out);
+
+ unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL);
+ if (errors_silent) {
+ le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
+
+ prt_printf(out, "Errors to silently fix:");
+ prt_tab(out);
+ prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8);
+ prt_newline(out);
+
+ kfree(errors_silent);
+ }
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
+ .validate = bch2_sb_ext_validate,
+ .to_text = bch2_sb_ext_to_text,
+};
+
+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
+#define x(f, nr) \
+ [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
+ BCH_SB_FIELDS()
+#undef x
+};
+
+static const struct bch_sb_field_ops bch2_sb_field_null_ops;
+
+static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type)
+{
+ return likely(type < ARRAY_SIZE(bch2_sb_field_ops))
+ ? bch2_sb_field_ops[type]
+ : &bch2_sb_field_null_ops;
+}
+
+static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
+ struct printbuf *err)
+{
+ unsigned type = le32_to_cpu(f->type);
+ struct printbuf field_err = PRINTBUF;
+ const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
+ int ret;
+
+ ret = ops->validate ? ops->validate(sb, f, &field_err) : 0;
+ if (ret) {
+ prt_printf(err, "Invalid superblock section %s: %s",
+ bch2_sb_fields[type], field_err.buf);
+ prt_newline(err);
+ bch2_sb_field_to_text(err, sb, f);
+ }
+
+ printbuf_exit(&field_err);
+ return ret;
+}
+
+void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ unsigned type = le32_to_cpu(f->type);
+ const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
+
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 32);
+
+ if (ops->to_text)
+ ops->to_text(out, sb, f);
+}
+
+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ unsigned type = le32_to_cpu(f->type);
+
+ if (type < BCH_SB_FIELD_NR)
+ prt_printf(out, "%s", bch2_sb_fields[type]);
+ else
+ prt_printf(out, "(unknown field %u)", type);
+
+ prt_printf(out, " (size %zu):", vstruct_bytes(f));
+ prt_newline(out);
+
+ __bch2_sb_field_to_text(out, sb, f);
+}
+
+void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
+{
+ unsigned i;
+
+ prt_printf(out, "Type: %u", l->layout_type);
+ prt_newline(out);
+
+ prt_str(out, "Superblock max size: ");
+ prt_units_u64(out, 512 << l->sb_max_size_bits);
+ prt_newline(out);
+
+ prt_printf(out, "Nr superblocks: %u", l->nr_superblocks);
+ prt_newline(out);
+
+ prt_str(out, "Offsets: ");
+ for (i = 0; i < l->nr_superblocks; i++) {
+ if (i)
+ prt_str(out, ", ");
+ prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i]));
+ }
+ prt_newline(out);
+}
+
+void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
+ bool print_layout, unsigned fields)
+{
+ u64 fields_have = 0;
+ unsigned nr_devices = 0;
+
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 44);
+
+ for (int i = 0; i < sb->nr_devices; i++)
+ nr_devices += bch2_dev_exists(sb, i);
+
+ prt_printf(out, "External UUID:");
+ prt_tab(out);
+ pr_uuid(out, sb->user_uuid.b);
+ prt_newline(out);
+
+ prt_printf(out, "Internal UUID:");
+ prt_tab(out);
+ pr_uuid(out, sb->uuid.b);
+ prt_newline(out);
+
+ prt_printf(out, "Magic number:");
+ prt_tab(out);
+ pr_uuid(out, sb->magic.b);
+ prt_newline(out);
+
+ prt_str(out, "Device index:");
+ prt_tab(out);
+ prt_printf(out, "%u", sb->dev_idx);
+ prt_newline(out);
+
+ prt_str(out, "Label:");
+ prt_tab(out);
+ prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label);
+ prt_newline(out);
+
+ prt_str(out, "Version:");
+ prt_tab(out);
+ bch2_version_to_text(out, le16_to_cpu(sb->version));
+ prt_newline(out);
+
+ prt_str(out, "Version upgrade complete:");
+ prt_tab(out);
+ bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb));
+ prt_newline(out);
+
+ prt_printf(out, "Oldest version on disk:");
+ prt_tab(out);
+ bch2_version_to_text(out, le16_to_cpu(sb->version_min));
+ prt_newline(out);
+
+ prt_printf(out, "Created:");
+ prt_tab(out);
+ if (sb->time_base_lo)
+ bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
+ else
+ prt_printf(out, "(not set)");
+ prt_newline(out);
+
+ prt_printf(out, "Sequence number:");
+ prt_tab(out);
+ prt_printf(out, "%llu", le64_to_cpu(sb->seq));
+ prt_newline(out);
+
+ prt_printf(out, "Time of last write:");
+ prt_tab(out);
+ bch2_prt_datetime(out, le64_to_cpu(sb->write_time));
+ prt_newline(out);
+
+ prt_printf(out, "Superblock size:");
+ prt_tab(out);
+ prt_printf(out, "%zu", vstruct_bytes(sb));
+ prt_newline(out);
+
+ prt_printf(out, "Clean:");
+ prt_tab(out);
+ prt_printf(out, "%llu", BCH_SB_CLEAN(sb));
+ prt_newline(out);
+
+ prt_printf(out, "Devices:");
+ prt_tab(out);
+ prt_printf(out, "%u", nr_devices);
+ prt_newline(out);
+
+ prt_printf(out, "Sections:");
+ vstruct_for_each(sb, f)
+ fields_have |= 1 << le32_to_cpu(f->type);
+ prt_tab(out);
+ prt_bitflags(out, bch2_sb_fields, fields_have);
+ prt_newline(out);
+
+ prt_printf(out, "Features:");
+ prt_tab(out);
+ prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0]));
+ prt_newline(out);
+
+ prt_printf(out, "Compat features:");
+ prt_tab(out);
+ prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0]));
+ prt_newline(out);
+
+ prt_newline(out);
+ prt_printf(out, "Options:");
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+ {
+ enum bch_opt_id id;
+
+ for (id = 0; id < bch2_opts_nr; id++) {
+ const struct bch_option *opt = bch2_opt_table + id;
+
+ if (opt->get_sb != BCH2_NO_SB_OPT) {
+ u64 v = bch2_opt_from_sb(sb, id);
+
+ prt_printf(out, "%s:", opt->attr.name);
+ prt_tab(out);
+ bch2_opt_to_text(out, NULL, sb, opt, v,
+ OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST);
+ prt_newline(out);
+ }
+ }
+ }
+
+ printbuf_indent_sub(out, 2);
+
+ if (print_layout) {
+ prt_newline(out);
+ prt_printf(out, "layout:");
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+ bch2_sb_layout_to_text(out, &sb->layout);
+ printbuf_indent_sub(out, 2);
+ }
+
+ vstruct_for_each(sb, f)
+ if (fields & (1 << le32_to_cpu(f->type))) {
+ prt_newline(out);
+ bch2_sb_field_to_text(out, sb, f);
+ }
+}
diff --git a/c_src/libbcachefs/super-io.h b/c_src/libbcachefs/super-io.h
new file mode 100644
index 00000000..95e80e06
--- /dev/null
+++ b/c_src/libbcachefs/super-io.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUPER_IO_H
+#define _BCACHEFS_SUPER_IO_H
+
+#include "extents.h"
+#include "eytzinger.h"
+#include "super_types.h"
+#include "super.h"
+#include "sb-members.h"
+
+#include <asm/byteorder.h>
+
+static inline bool bch2_version_compatible(u16 version)
+{
+ return BCH_VERSION_MAJOR(version) <= BCH_VERSION_MAJOR(bcachefs_metadata_version_current) &&
+ version >= bcachefs_metadata_version_min;
+}
+
+void bch2_version_to_text(struct printbuf *, unsigned);
+unsigned bch2_latest_compatible_version(unsigned);
+
+static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
+{
+ return le32_to_cpu(f->u64s) * sizeof(u64);
+}
+
+#define field_to_type(_f, _name) \
+ container_of_or_null(_f, struct bch_sb_field_##_name, field)
+
+struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *, enum bch_sb_field_type);
+#define bch2_sb_field_get(_sb, _name) \
+ field_to_type(bch2_sb_field_get_id(_sb, BCH_SB_FIELD_##_name), _name)
+
+struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *,
+ enum bch_sb_field_type, unsigned);
+#define bch2_sb_field_resize(_sb, _name, _u64s) \
+ field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
+
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *,
+ enum bch_sb_field_type, unsigned);
+#define bch2_sb_field_get_minsize(_sb, _name, _u64s) \
+ field_to_type(bch2_sb_field_get_minsize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
+
+#define bch2_sb_field_nr_entries(_f) \
+ (_f ? ((bch2_sb_field_bytes(&_f->field) - sizeof(*_f)) / \
+ sizeof(_f->entries[0])) \
+ : 0)
+
+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
+
+extern const char * const bch2_sb_fields[];
+
+struct bch_sb_field_ops {
+ int (*validate)(struct bch_sb *, struct bch_sb_field *, struct printbuf *);
+ void (*to_text)(struct printbuf *, struct bch_sb *, struct bch_sb_field *);
+};
+
+static inline __le64 bch2_sb_magic(struct bch_fs *c)
+{
+ __le64 ret;
+
+ memcpy(&ret, &c->sb.uuid, sizeof(ret));
+ return ret;
+}
+
+static inline __u64 jset_magic(struct bch_fs *c)
+{
+ return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC);
+}
+
+static inline __u64 bset_magic(struct bch_fs *c)
+{
+ return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC);
+}
+
+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *);
+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *);
+
+void bch2_free_super(struct bch_sb_handle *);
+int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
+
+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
+int bch2_read_super_silent(const char *, struct bch_opts *, struct bch_sb_handle *);
+int bch2_write_super(struct bch_fs *);
+void __bch2_check_set_feature(struct bch_fs *, unsigned);
+
+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
+{
+ if (!(c->sb.features & (1ULL << feat)))
+ __bch2_check_set_feature(c, feat);
+}
+
+bool bch2_check_version_downgrade(struct bch_fs *);
+void bch2_sb_upgrade(struct bch_fs *, unsigned);
+
+void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
+ struct bch_sb_field *);
+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
+ struct bch_sb_field *);
+void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *);
+void bch2_sb_to_text(struct printbuf *, struct bch_sb *, bool, unsigned);
+
+#endif /* _BCACHEFS_SUPER_IO_H */
diff --git a/c_src/libbcachefs/super.c b/c_src/libbcachefs/super.c
new file mode 100644
index 00000000..9dbc3594
--- /dev/null
+++ b/c_src/libbcachefs/super.c
@@ -0,0 +1,2124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bcachefs setup/teardown code, and some metadata io - read a superblock and
+ * figure out what to do with it.
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "bkey_sort.h"
+#include "btree_cache.h"
+#include "btree_gc.h"
+#include "btree_journal_iter.h"
+#include "btree_key_cache.h"
+#include "btree_update_interior.h"
+#include "btree_io.h"
+#include "btree_write_buffer.h"
+#include "buckets_waiting_for_journal.h"
+#include "chardev.h"
+#include "checksum.h"
+#include "clock.h"
+#include "compress.h"
+#include "counters.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "errcode.h"
+#include "error.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "fs-io-buffered.h"
+#include "fs-io-direct.h"
+#include "fsck.h"
+#include "inode.h"
+#include "io_read.h"
+#include "io_write.h"
+#include "journal.h"
+#include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
+#include "move.h"
+#include "migrate.h"
+#include "movinggc.h"
+#include "nocow_locking.h"
+#include "quota.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "replicas.h"
+#include "sb-clean.h"
+#include "sb-errors.h"
+#include "sb-members.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "super.h"
+#include "super-io.h"
+#include "sysfs.h"
+#include "trace.h"
+
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/random.h>
+#include <linux/sysfs.h>
+#include <crypto/hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
+MODULE_DESCRIPTION("bcachefs filesystem");
+MODULE_SOFTDEP("pre: crc32c");
+MODULE_SOFTDEP("pre: crc64");
+MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: chacha20");
+MODULE_SOFTDEP("pre: poly1305");
+MODULE_SOFTDEP("pre: xxhash");
+
+const char * const bch2_fs_flag_strs[] = {
+#define x(n) #n,
+ BCH_FS_FLAGS()
+#undef x
+ NULL
+};
+
+void __bch2_print(struct bch_fs *c, const char *fmt, ...)
+{
+ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
+
+ va_list args;
+ va_start(args, fmt);
+ if (likely(!stdio)) {
+ vprintk(fmt, args);
+ } else {
+ unsigned long flags;
+
+ if (fmt[0] == KERN_SOH[0])
+ fmt += 2;
+
+ spin_lock_irqsave(&stdio->output_lock, flags);
+ prt_vprintf(&stdio->output_buf, fmt, args);
+ spin_unlock_irqrestore(&stdio->output_lock, flags);
+
+ wake_up(&stdio->output_wait);
+ }
+ va_end(args);
+}
+
+#define KTYPE(type) \
+static const struct attribute_group type ## _group = { \
+ .attrs = type ## _files \
+}; \
+ \
+static const struct attribute_group *type ## _groups[] = { \
+ &type ## _group, \
+ NULL \
+}; \
+ \
+static const struct kobj_type type ## _ktype = { \
+ .release = type ## _release, \
+ .sysfs_ops = &type ## _sysfs_ops, \
+ .default_groups = type ## _groups \
+}
+
+static void bch2_fs_release(struct kobject *);
+static void bch2_dev_release(struct kobject *);
+static void bch2_fs_counters_release(struct kobject *k)
+{
+}
+
+static void bch2_fs_internal_release(struct kobject *k)
+{
+}
+
+static void bch2_fs_opts_dir_release(struct kobject *k)
+{
+}
+
+static void bch2_fs_time_stats_release(struct kobject *k)
+{
+}
+
+KTYPE(bch2_fs);
+KTYPE(bch2_fs_counters);
+KTYPE(bch2_fs_internal);
+KTYPE(bch2_fs_opts_dir);
+KTYPE(bch2_fs_time_stats);
+KTYPE(bch2_dev);
+
+static struct kset *bcachefs_kset;
+static LIST_HEAD(bch_fs_list);
+static DEFINE_MUTEX(bch_fs_list_lock);
+
+DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait);
+
+static void bch2_dev_free(struct bch_dev *);
+static int bch2_dev_alloc(struct bch_fs *, unsigned);
+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
+
+struct bch_fs *bch2_dev_to_fs(dev_t dev)
+{
+ struct bch_fs *c;
+
+ mutex_lock(&bch_fs_list_lock);
+ rcu_read_lock();
+
+ list_for_each_entry(c, &bch_fs_list, list)
+ for_each_member_device_rcu(c, ca, NULL)
+ if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) {
+ closure_get(&c->cl);
+ goto found;
+ }
+ c = NULL;
+found:
+ rcu_read_unlock();
+ mutex_unlock(&bch_fs_list_lock);
+
+ return c;
+}
+
+static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid)
+{
+ struct bch_fs *c;
+
+ lockdep_assert_held(&bch_fs_list_lock);
+
+ list_for_each_entry(c, &bch_fs_list, list)
+ if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid)))
+ return c;
+
+ return NULL;
+}
+
+struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
+{
+ struct bch_fs *c;
+
+ mutex_lock(&bch_fs_list_lock);
+ c = __bch2_uuid_to_fs(uuid);
+ if (c)
+ closure_get(&c->cl);
+ mutex_unlock(&bch_fs_list_lock);
+
+ return c;
+}
+
+static void bch2_dev_usage_journal_reserve(struct bch_fs *c)
+{
+ unsigned nr = 0, u64s =
+ ((sizeof(struct jset_entry_dev_usage) +
+ sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR)) /
+ sizeof(u64);
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL)
+ nr++;
+ rcu_read_unlock();
+
+ bch2_journal_entry_res_resize(&c->journal,
+ &c->dev_usage_journal_res, u64s * nr);
+}
+
+/* Filesystem RO/RW: */
+
+/*
+ * For startup/shutdown of RW stuff, the dependencies are:
+ *
+ * - foreground writes depend on copygc and rebalance (to free up space)
+ *
+ * - copygc and rebalance depend on mark and sweep gc (they actually probably
+ * don't because they either reserve ahead of time or don't block if
+ * allocations fail, but allocations can require mark and sweep gc to run
+ * because of generation number wraparound)
+ *
+ * - all of the above depends on the allocator threads
+ *
+ * - allocator depends on the journal (when it rewrites prios and gens)
+ */
+
+static void __bch2_fs_read_only(struct bch_fs *c)
+{
+ unsigned clean_passes = 0;
+ u64 seq = 0;
+
+ bch2_fs_ec_stop(c);
+ bch2_open_buckets_stop(c, NULL, true);
+ bch2_rebalance_stop(c);
+ bch2_copygc_stop(c);
+ bch2_gc_thread_stop(c);
+ bch2_fs_ec_flush(c);
+
+ bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
+ journal_cur_seq(&c->journal));
+
+ do {
+ clean_passes++;
+
+ if (bch2_btree_interior_updates_flush(c) ||
+ bch2_journal_flush_all_pins(&c->journal) ||
+ bch2_btree_flush_all_writes(c) ||
+ seq != atomic64_read(&c->journal.seq)) {
+ seq = atomic64_read(&c->journal.seq);
+ clean_passes = 0;
+ }
+ } while (clean_passes < 2);
+
+ bch_verbose(c, "flushing journal and stopping allocators complete, journal seq %llu",
+ journal_cur_seq(&c->journal));
+
+ if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
+ !test_bit(BCH_FS_emergency_ro, &c->flags))
+ set_bit(BCH_FS_clean_shutdown, &c->flags);
+ bch2_fs_journal_stop(&c->journal);
+
+ /*
+ * After stopping journal:
+ */
+ for_each_member_device(c, ca)
+ bch2_dev_allocator_remove(c, ca);
+}
+
+#ifndef BCH_WRITE_REF_DEBUG
+static void bch2_writes_disabled(struct percpu_ref *writes)
+{
+ struct bch_fs *c = container_of(writes, struct bch_fs, writes);
+
+ set_bit(BCH_FS_write_disable_complete, &c->flags);
+ wake_up(&bch2_read_only_wait);
+}
+#endif
+
+void bch2_fs_read_only(struct bch_fs *c)
+{
+ if (!test_bit(BCH_FS_rw, &c->flags)) {
+ bch2_journal_reclaim_stop(&c->journal);
+ return;
+ }
+
+ BUG_ON(test_bit(BCH_FS_write_disable_complete, &c->flags));
+
+ bch_verbose(c, "going read-only");
+
+ /*
+ * Block new foreground-end write operations from starting - any new
+ * writes will return -EROFS:
+ */
+ set_bit(BCH_FS_going_ro, &c->flags);
+#ifndef BCH_WRITE_REF_DEBUG
+ percpu_ref_kill(&c->writes);
+#else
+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
+ bch2_write_ref_put(c, i);
+#endif
+
+ /*
+ * If we're not doing an emergency shutdown, we want to wait on
+ * outstanding writes to complete so they don't see spurious errors due
+ * to shutting down the allocator:
+ *
+ * If we are doing an emergency shutdown outstanding writes may
+ * hang until we shutdown the allocator so we don't want to wait
+ * on outstanding writes before shutting everything down - but
+ * we do need to wait on them before returning and signalling
+ * that going RO is complete:
+ */
+ wait_event(bch2_read_only_wait,
+ test_bit(BCH_FS_write_disable_complete, &c->flags) ||
+ test_bit(BCH_FS_emergency_ro, &c->flags));
+
+ bool writes_disabled = test_bit(BCH_FS_write_disable_complete, &c->flags);
+ if (writes_disabled)
+ bch_verbose(c, "finished waiting for writes to stop");
+
+ __bch2_fs_read_only(c);
+
+ wait_event(bch2_read_only_wait,
+ test_bit(BCH_FS_write_disable_complete, &c->flags));
+
+ if (!writes_disabled)
+ bch_verbose(c, "finished waiting for writes to stop");
+
+ clear_bit(BCH_FS_write_disable_complete, &c->flags);
+ clear_bit(BCH_FS_going_ro, &c->flags);
+ clear_bit(BCH_FS_rw, &c->flags);
+
+ if (!bch2_journal_error(&c->journal) &&
+ !test_bit(BCH_FS_error, &c->flags) &&
+ !test_bit(BCH_FS_emergency_ro, &c->flags) &&
+ test_bit(BCH_FS_started, &c->flags) &&
+ test_bit(BCH_FS_clean_shutdown, &c->flags) &&
+ !c->opts.norecovery) {
+ BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
+ BUG_ON(atomic_read(&c->btree_cache.dirty));
+ BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
+ BUG_ON(c->btree_write_buffer.inc.keys.nr);
+ BUG_ON(c->btree_write_buffer.flushing.keys.nr);
+
+ bch_verbose(c, "marking filesystem clean");
+ bch2_fs_mark_clean(c);
+ } else {
+ bch_verbose(c, "done going read-only, filesystem not clean");
+ }
+}
+
+static void bch2_fs_read_only_work(struct work_struct *work)
+{
+ struct bch_fs *c =
+ container_of(work, struct bch_fs, read_only_work);
+
+ down_write(&c->state_lock);
+ bch2_fs_read_only(c);
+ up_write(&c->state_lock);
+}
+
+static void bch2_fs_read_only_async(struct bch_fs *c)
+{
+ queue_work(system_long_wq, &c->read_only_work);
+}
+
+bool bch2_fs_emergency_read_only(struct bch_fs *c)
+{
+ bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
+
+ bch2_journal_halt(&c->journal);
+ bch2_fs_read_only_async(c);
+
+ wake_up(&bch2_read_only_wait);
+ return ret;
+}
+
+static int bch2_fs_read_write_late(struct bch_fs *c)
+{
+ int ret;
+
+ /*
+ * Data move operations can't run until after check_snapshots has
+ * completed, and bch2_snapshot_is_ancestor() is available.
+ *
+ * Ideally we'd start copygc/rebalance earlier instead of waiting for
+ * all of recovery/fsck to complete:
+ */
+ ret = bch2_copygc_start(c);
+ if (ret) {
+ bch_err(c, "error starting copygc thread");
+ return ret;
+ }
+
+ ret = bch2_rebalance_start(c);
+ if (ret) {
+ bch_err(c, "error starting rebalance thread");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __bch2_fs_read_write(struct bch_fs *c, bool early)
+{
+ int ret;
+
+ if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) {
+ bch_err(c, "cannot go rw, unfixed btree errors");
+ return -BCH_ERR_erofs_unfixed_errors;
+ }
+
+ if (test_bit(BCH_FS_rw, &c->flags))
+ return 0;
+
+ bch_info(c, "going read-write");
+
+ ret = bch2_sb_members_v2_init(c);
+ if (ret)
+ goto err;
+
+ ret = bch2_fs_mark_dirty(c);
+ if (ret)
+ goto err;
+
+ clear_bit(BCH_FS_clean_shutdown, &c->flags);
+
+ /*
+ * First journal write must be a flush write: after a clean shutdown we
+ * don't read the journal, so the first journal write may end up
+ * overwriting whatever was there previously, and there must always be
+ * at least one non-flush write in the journal or recovery will fail:
+ */
+ set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags);
+
+ for_each_rw_member(c, ca)
+ bch2_dev_allocator_add(c, ca);
+ bch2_recalc_capacity(c);
+
+ set_bit(BCH_FS_rw, &c->flags);
+ set_bit(BCH_FS_was_rw, &c->flags);
+
+#ifndef BCH_WRITE_REF_DEBUG
+ percpu_ref_reinit(&c->writes);
+#else
+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
+ BUG_ON(atomic_long_read(&c->writes[i]));
+ atomic_long_inc(&c->writes[i]);
+ }
+#endif
+
+ ret = bch2_gc_thread_start(c);
+ if (ret) {
+ bch_err(c, "error starting gc thread");
+ return ret;
+ }
+
+ ret = bch2_journal_reclaim_start(&c->journal);
+ if (ret)
+ goto err;
+
+ if (!early) {
+ ret = bch2_fs_read_write_late(c);
+ if (ret)
+ goto err;
+ }
+
+ bch2_do_discards(c);
+ bch2_do_invalidates(c);
+ bch2_do_stripe_deletes(c);
+ bch2_do_pending_node_rewrites(c);
+ return 0;
+err:
+ if (test_bit(BCH_FS_rw, &c->flags))
+ bch2_fs_read_only(c);
+ else
+ __bch2_fs_read_only(c);
+ return ret;
+}
+
+int bch2_fs_read_write(struct bch_fs *c)
+{
+ if (c->opts.norecovery)
+ return -BCH_ERR_erofs_norecovery;
+
+ if (c->opts.nochanges)
+ return -BCH_ERR_erofs_nochanges;
+
+ return __bch2_fs_read_write(c, false);
+}
+
+int bch2_fs_read_write_early(struct bch_fs *c)
+{
+ lockdep_assert_held(&c->state_lock);
+
+ return __bch2_fs_read_write(c, true);
+}
+
+/* Filesystem startup/shutdown: */
+
+static void __bch2_fs_free(struct bch_fs *c)
+{
+ unsigned i;
+
+ for (i = 0; i < BCH_TIME_STAT_NR; i++)
+ bch2_time_stats_exit(&c->times[i]);
+
+ bch2_free_pending_node_rewrites(c);
+ bch2_fs_sb_errors_exit(c);
+ bch2_fs_counters_exit(c);
+ bch2_fs_snapshots_exit(c);
+ bch2_fs_quota_exit(c);
+ bch2_fs_fs_io_direct_exit(c);
+ bch2_fs_fs_io_buffered_exit(c);
+ bch2_fs_fsio_exit(c);
+ bch2_fs_ec_exit(c);
+ bch2_fs_encryption_exit(c);
+ bch2_fs_nocow_locking_exit(c);
+ bch2_fs_io_write_exit(c);
+ bch2_fs_io_read_exit(c);
+ bch2_fs_buckets_waiting_for_journal_exit(c);
+ bch2_fs_btree_interior_update_exit(c);
+ bch2_fs_btree_iter_exit(c);
+ bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
+ bch2_fs_btree_cache_exit(c);
+ bch2_fs_replicas_exit(c);
+ bch2_fs_journal_exit(&c->journal);
+ bch2_io_clock_exit(&c->io_clock[WRITE]);
+ bch2_io_clock_exit(&c->io_clock[READ]);
+ bch2_fs_compress_exit(c);
+ bch2_journal_keys_put_initial(c);
+ BUG_ON(atomic_read(&c->journal_keys.ref));
+ bch2_fs_btree_write_buffer_exit(c);
+ percpu_free_rwsem(&c->mark_lock);
+ free_percpu(c->online_reserved);
+
+ darray_exit(&c->btree_roots_extra);
+ free_percpu(c->pcpu);
+ mempool_exit(&c->large_bkey_pool);
+ mempool_exit(&c->btree_bounce_pool);
+ bioset_exit(&c->btree_bio);
+ mempool_exit(&c->fill_iter);
+#ifndef BCH_WRITE_REF_DEBUG
+ percpu_ref_exit(&c->writes);
+#endif
+ kfree(rcu_dereference_protected(c->disk_groups, 1));
+ kfree(c->journal_seq_blacklist_table);
+ kfree(c->unused_inode_hints);
+
+ if (c->write_ref_wq)
+ destroy_workqueue(c->write_ref_wq);
+ if (c->io_complete_wq)
+ destroy_workqueue(c->io_complete_wq);
+ if (c->copygc_wq)
+ destroy_workqueue(c->copygc_wq);
+ if (c->btree_io_complete_wq)
+ destroy_workqueue(c->btree_io_complete_wq);
+ if (c->btree_update_wq)
+ destroy_workqueue(c->btree_update_wq);
+
+ bch2_free_super(&c->disk_sb);
+ kvpfree(c, sizeof(*c));
+ module_put(THIS_MODULE);
+}
+
+static void bch2_fs_release(struct kobject *kobj)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
+
+ __bch2_fs_free(c);
+}
+
+void __bch2_fs_stop(struct bch_fs *c)
+{
+ bch_verbose(c, "shutting down");
+
+ set_bit(BCH_FS_stopping, &c->flags);
+
+ cancel_work_sync(&c->journal_seq_blacklist_gc_work);
+
+ down_write(&c->state_lock);
+ bch2_fs_read_only(c);
+ up_write(&c->state_lock);
+
+ for_each_member_device(c, ca)
+ if (ca->kobj.state_in_sysfs &&
+ ca->disk_sb.bdev)
+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
+
+ if (c->kobj.state_in_sysfs)
+ kobject_del(&c->kobj);
+
+ bch2_fs_debug_exit(c);
+ bch2_fs_chardev_exit(c);
+
+ bch2_ro_ref_put(c);
+ wait_event(c->ro_ref_wait, !refcount_read(&c->ro_ref));
+
+ kobject_put(&c->counters_kobj);
+ kobject_put(&c->time_stats);
+ kobject_put(&c->opts_dir);
+ kobject_put(&c->internal);
+
+ /* btree prefetch might have kicked off reads in the background: */
+ bch2_btree_flush_all_reads(c);
+
+ for_each_member_device(c, ca)
+ cancel_work_sync(&ca->io_error_work);
+
+ cancel_work_sync(&c->read_only_work);
+}
+
+void bch2_fs_free(struct bch_fs *c)
+{
+ unsigned i;
+
+ mutex_lock(&bch_fs_list_lock);
+ list_del(&c->list);
+ mutex_unlock(&bch_fs_list_lock);
+
+ closure_sync(&c->cl);
+ closure_debug_destroy(&c->cl);
+
+ for (i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
+
+ if (ca) {
+ bch2_free_super(&ca->disk_sb);
+ bch2_dev_free(ca);
+ }
+ }
+
+ bch_verbose(c, "shutdown complete");
+
+ kobject_put(&c->kobj);
+}
+
+void bch2_fs_stop(struct bch_fs *c)
+{
+ __bch2_fs_stop(c);
+ bch2_fs_free(c);
+}
+
+static int bch2_fs_online(struct bch_fs *c)
+{
+ int ret = 0;
+
+ lockdep_assert_held(&bch_fs_list_lock);
+
+ if (__bch2_uuid_to_fs(c->sb.uuid)) {
+ bch_err(c, "filesystem UUID already open");
+ return -EINVAL;
+ }
+
+ ret = bch2_fs_chardev_init(c);
+ if (ret) {
+ bch_err(c, "error creating character device");
+ return ret;
+ }
+
+ bch2_fs_debug_init(c);
+
+ ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?:
+ kobject_add(&c->internal, &c->kobj, "internal") ?:
+ kobject_add(&c->opts_dir, &c->kobj, "options") ?:
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+ kobject_add(&c->time_stats, &c->kobj, "time_stats") ?:
+#endif
+ kobject_add(&c->counters_kobj, &c->kobj, "counters") ?:
+ bch2_opts_create_sysfs_files(&c->opts_dir);
+ if (ret) {
+ bch_err(c, "error creating sysfs objects");
+ return ret;
+ }
+
+ down_write(&c->state_lock);
+
+ for_each_member_device(c, ca) {
+ ret = bch2_dev_sysfs_online(c, ca);
+ if (ret) {
+ bch_err(c, "error creating sysfs objects");
+ percpu_ref_put(&ca->ref);
+ goto err;
+ }
+ }
+
+ BUG_ON(!list_empty(&c->list));
+ list_add(&c->list, &bch_fs_list);
+err:
+ up_write(&c->state_lock);
+ return ret;
+}
+
+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
+{
+ struct bch_fs *c;
+ struct printbuf name = PRINTBUF;
+ unsigned i, iter_size;
+ int ret = 0;
+
+ c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
+ if (!c) {
+ c = ERR_PTR(-BCH_ERR_ENOMEM_fs_alloc);
+ goto out;
+ }
+
+ c->stdio = (void *)(unsigned long) opts.stdio;
+
+ __module_get(THIS_MODULE);
+
+ closure_init(&c->cl, NULL);
+
+ c->kobj.kset = bcachefs_kset;
+ kobject_init(&c->kobj, &bch2_fs_ktype);
+ kobject_init(&c->internal, &bch2_fs_internal_ktype);
+ kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
+ kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
+ kobject_init(&c->counters_kobj, &bch2_fs_counters_ktype);
+
+ c->minor = -1;
+ c->disk_sb.fs_sb = true;
+
+ init_rwsem(&c->state_lock);
+ mutex_init(&c->sb_lock);
+ mutex_init(&c->replicas_gc_lock);
+ mutex_init(&c->btree_root_lock);
+ INIT_WORK(&c->read_only_work, bch2_fs_read_only_work);
+
+ refcount_set(&c->ro_ref, 1);
+ init_waitqueue_head(&c->ro_ref_wait);
+ sema_init(&c->online_fsck_mutex, 1);
+
+ init_rwsem(&c->gc_lock);
+ mutex_init(&c->gc_gens_lock);
+ atomic_set(&c->journal_keys.ref, 1);
+ c->journal_keys.initial_ref_held = true;
+
+ for (i = 0; i < BCH_TIME_STAT_NR; i++)
+ bch2_time_stats_init(&c->times[i]);
+
+ bch2_fs_copygc_init(c);
+ bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
+ bch2_fs_btree_iter_init_early(c);
+ bch2_fs_btree_interior_update_init_early(c);
+ bch2_fs_allocator_background_init(c);
+ bch2_fs_allocator_foreground_init(c);
+ bch2_fs_rebalance_init(c);
+ bch2_fs_quota_init(c);
+ bch2_fs_ec_init_early(c);
+ bch2_fs_move_init(c);
+ bch2_fs_sb_errors_init_early(c);
+
+ INIT_LIST_HEAD(&c->list);
+
+ mutex_init(&c->usage_scratch_lock);
+
+ mutex_init(&c->bio_bounce_pages_lock);
+ mutex_init(&c->snapshot_table_lock);
+ init_rwsem(&c->snapshot_create_lock);
+
+ spin_lock_init(&c->btree_write_error_lock);
+
+ INIT_WORK(&c->journal_seq_blacklist_gc_work,
+ bch2_blacklist_entries_gc);
+
+ INIT_LIST_HEAD(&c->journal_iters);
+
+ INIT_LIST_HEAD(&c->fsck_error_msgs);
+ mutex_init(&c->fsck_error_msgs_lock);
+
+ seqcount_init(&c->gc_pos_lock);
+
+ seqcount_init(&c->usage_lock);
+
+ sema_init(&c->io_in_flight, 128);
+
+ INIT_LIST_HEAD(&c->vfs_inodes_list);
+ mutex_init(&c->vfs_inodes_lock);
+
+ c->copy_gc_enabled = 1;
+ c->rebalance.enabled = 1;
+ c->promote_whole_extents = true;
+
+ c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write];
+ c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
+ c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
+
+ bch2_fs_btree_cache_init_early(&c->btree_cache);
+
+ mutex_init(&c->sectors_available_lock);
+
+ ret = percpu_init_rwsem(&c->mark_lock);
+ if (ret)
+ goto err;
+
+ mutex_lock(&c->sb_lock);
+ ret = bch2_sb_to_fs(c, sb);
+ mutex_unlock(&c->sb_lock);
+
+ if (ret)
+ goto err;
+
+ pr_uuid(&name, c->sb.user_uuid.b);
+ strscpy(c->name, name.buf, sizeof(c->name));
+ printbuf_exit(&name);
+
+ ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
+ if (ret)
+ goto err;
+
+ /* Compat: */
+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
+ !BCH_SB_JOURNAL_FLUSH_DELAY(sb))
+ SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000);
+
+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
+ !BCH_SB_JOURNAL_RECLAIM_DELAY(sb))
+ SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 100);
+
+ c->opts = bch2_opts_default;
+ ret = bch2_opts_from_sb(&c->opts, sb);
+ if (ret)
+ goto err;
+
+ bch2_opts_apply(&c->opts, opts);
+
+ c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc;
+ if (c->opts.inodes_use_key_cache)
+ c->btree_key_cache_btrees |= 1U << BTREE_ID_inodes;
+ c->btree_key_cache_btrees |= 1U << BTREE_ID_logged_ops;
+
+ c->block_bits = ilog2(block_sectors(c));
+ c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c);
+
+ if (bch2_fs_init_fault("fs_alloc")) {
+ bch_err(c, "fs_alloc fault injected");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ iter_size = sizeof(struct sort_iter) +
+ (btree_blocks(c) + 1) * 2 *
+ sizeof(struct sort_iter_set);
+
+ c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
+
+ if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
+ WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512)) ||
+ !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+ !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
+ !(c->io_complete_wq = alloc_workqueue("bcachefs_io",
+ WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 512)) ||
+ !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
+ WQ_FREEZABLE, 0)) ||
+#ifndef BCH_WRITE_REF_DEBUG
+ percpu_ref_init(&c->writes, bch2_writes_disabled,
+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+#endif
+ mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
+ bioset_init(&c->btree_bio, 1,
+ max(offsetof(struct btree_read_bio, bio),
+ offsetof(struct btree_write_bio, wbio.bio)),
+ BIOSET_NEED_BVECS) ||
+ !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
+ !(c->online_reserved = alloc_percpu(u64)) ||
+ mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
+ btree_bytes(c)) ||
+ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
+ !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
+ sizeof(u64), GFP_KERNEL))) {
+ ret = -BCH_ERR_ENOMEM_fs_other_alloc;
+ goto err;
+ }
+
+ ret = bch2_fs_counters_init(c) ?:
+ bch2_fs_sb_errors_init(c) ?:
+ bch2_io_clock_init(&c->io_clock[READ]) ?:
+ bch2_io_clock_init(&c->io_clock[WRITE]) ?:
+ bch2_fs_journal_init(&c->journal) ?:
+ bch2_fs_replicas_init(c) ?:
+ bch2_fs_btree_cache_init(c) ?:
+ bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
+ bch2_fs_btree_iter_init(c) ?:
+ bch2_fs_btree_interior_update_init(c) ?:
+ bch2_fs_buckets_waiting_for_journal_init(c) ?:
+ bch2_fs_btree_write_buffer_init(c) ?:
+ bch2_fs_subvolumes_init(c) ?:
+ bch2_fs_io_read_init(c) ?:
+ bch2_fs_io_write_init(c) ?:
+ bch2_fs_nocow_locking_init(c) ?:
+ bch2_fs_encryption_init(c) ?:
+ bch2_fs_compress_init(c) ?:
+ bch2_fs_ec_init(c) ?:
+ bch2_fs_fsio_init(c) ?:
+ bch2_fs_fs_io_buffered_init(c) ?:
+ bch2_fs_fs_io_direct_init(c);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < c->sb.nr_devices; i++)
+ if (bch2_dev_exists(c->disk_sb.sb, i) &&
+ bch2_dev_alloc(c, i)) {
+ ret = -EEXIST;
+ goto err;
+ }
+
+ bch2_journal_entry_res_resize(&c->journal,
+ &c->btree_root_journal_res,
+ BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX));
+ bch2_dev_usage_journal_reserve(c);
+ bch2_journal_entry_res_resize(&c->journal,
+ &c->clock_journal_res,
+ (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2);
+
+ mutex_lock(&bch_fs_list_lock);
+ ret = bch2_fs_online(c);
+ mutex_unlock(&bch_fs_list_lock);
+
+ if (ret)
+ goto err;
+out:
+ return c;
+err:
+ bch2_fs_free(c);
+ c = ERR_PTR(ret);
+ goto out;
+}
+
+noinline_for_stack
+static void print_mount_opts(struct bch_fs *c)
+{
+ enum bch_opt_id i;
+ struct printbuf p = PRINTBUF;
+ bool first = true;
+
+ prt_str(&p, "mounting version ");
+ bch2_version_to_text(&p, c->sb.version);
+
+ if (c->opts.read_only) {
+ prt_str(&p, " opts=");
+ first = false;
+ prt_printf(&p, "ro");
+ }
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ const struct bch_option *opt = &bch2_opt_table[i];
+ u64 v = bch2_opt_get_by_id(&c->opts, i);
+
+ if (!(opt->flags & OPT_MOUNT))
+ continue;
+
+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
+ continue;
+
+ prt_str(&p, first ? " opts=" : ",");
+ first = false;
+ bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE);
+ }
+
+ bch_info(c, "%s", p.buf);
+ printbuf_exit(&p);
+}
+
+int bch2_fs_start(struct bch_fs *c)
+{
+ time64_t now = ktime_get_real_seconds();
+ int ret;
+
+ print_mount_opts(c);
+
+ down_write(&c->state_lock);
+
+ BUG_ON(test_bit(BCH_FS_started, &c->flags));
+
+ mutex_lock(&c->sb_lock);
+
+ ret = bch2_sb_members_v2_init(c);
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ goto err;
+ }
+
+ for_each_online_member(c, ca)
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
+
+ mutex_unlock(&c->sb_lock);
+
+ for_each_rw_member(c, ca)
+ bch2_dev_allocator_add(c, ca);
+ bch2_recalc_capacity(c);
+
+ ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
+ ? bch2_fs_recovery(c)
+ : bch2_fs_initialize(c);
+ if (ret)
+ goto err;
+
+ ret = bch2_opts_check_may_set(c);
+ if (ret)
+ goto err;
+
+ if (bch2_fs_init_fault("fs_start")) {
+ bch_err(c, "fs_start fault injected");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ set_bit(BCH_FS_started, &c->flags);
+
+ if (c->opts.read_only) {
+ bch2_fs_read_only(c);
+ } else {
+ ret = !test_bit(BCH_FS_rw, &c->flags)
+ ? bch2_fs_read_write(c)
+ : bch2_fs_read_write_late(c);
+ if (ret)
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (ret)
+ bch_err_msg(c, ret, "starting filesystem");
+ else
+ bch_verbose(c, "done starting filesystem");
+ up_write(&c->state_lock);
+ return ret;
+}
+
+static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
+{
+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
+
+ if (le16_to_cpu(sb->block_size) != block_sectors(c))
+ return -BCH_ERR_mismatched_block_size;
+
+ if (le16_to_cpu(m.bucket_size) <
+ BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb))
+ return -BCH_ERR_bucket_size_too_small;
+
+ return 0;
+}
+
+static int bch2_dev_in_fs(struct bch_sb_handle *fs,
+ struct bch_sb_handle *sb)
+{
+ if (fs == sb)
+ return 0;
+
+ if (!uuid_equal(&fs->sb->uuid, &sb->sb->uuid))
+ return -BCH_ERR_device_not_a_member_of_filesystem;
+
+ if (!bch2_dev_exists(fs->sb, sb->sb->dev_idx))
+ return -BCH_ERR_device_has_been_removed;
+
+ if (fs->sb->block_size != sb->sb->block_size)
+ return -BCH_ERR_mismatched_block_size;
+
+ if (le16_to_cpu(fs->sb->version) < bcachefs_metadata_version_member_seq ||
+ le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_member_seq)
+ return 0;
+
+ if (fs->sb->seq == sb->sb->seq &&
+ fs->sb->write_time != sb->sb->write_time) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Split brain detected between ");
+ prt_bdevname(&buf, sb->bdev);
+ prt_str(&buf, " and ");
+ prt_bdevname(&buf, fs->bdev);
+ prt_char(&buf, ':');
+ prt_newline(&buf);
+ prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq));
+ prt_newline(&buf);
+
+ prt_bdevname(&buf, fs->bdev);
+ prt_char(&buf, ' ');
+ bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));;
+ prt_newline(&buf);
+
+ prt_bdevname(&buf, sb->bdev);
+ prt_char(&buf, ' ');
+ bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));;
+ prt_newline(&buf);
+
+ prt_printf(&buf, "Not using older sb");
+
+ pr_err("%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_device_splitbrain;
+ }
+
+ struct bch_member m = bch2_sb_member_get(fs->sb, sb->sb->dev_idx);
+ u64 seq_from_fs = le64_to_cpu(m.seq);
+ u64 seq_from_member = le64_to_cpu(sb->sb->seq);
+
+ if (seq_from_fs && seq_from_fs < seq_from_member) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Split brain detected between ");
+ prt_bdevname(&buf, sb->bdev);
+ prt_str(&buf, " and ");
+ prt_bdevname(&buf, fs->bdev);
+ prt_char(&buf, ':');
+ prt_newline(&buf);
+
+ prt_bdevname(&buf, fs->bdev);
+ prt_str(&buf, "believes seq of ");
+ prt_bdevname(&buf, sb->bdev);
+ prt_printf(&buf, " to be %llu, but ", seq_from_fs);
+ prt_bdevname(&buf, sb->bdev);
+ prt_printf(&buf, " has %llu\n", seq_from_member);
+ prt_str(&buf, "Not using ");
+ prt_bdevname(&buf, sb->bdev);
+
+ pr_err("%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_device_splitbrain;
+ }
+
+ return 0;
+}
+
+/* Device startup/shutdown: */
+
+static void bch2_dev_release(struct kobject *kobj)
+{
+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+
+ kfree(ca);
+}
+
+static void bch2_dev_free(struct bch_dev *ca)
+{
+ cancel_work_sync(&ca->io_error_work);
+
+ if (ca->kobj.state_in_sysfs &&
+ ca->disk_sb.bdev)
+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
+
+ if (ca->kobj.state_in_sysfs)
+ kobject_del(&ca->kobj);
+
+ bch2_free_super(&ca->disk_sb);
+ bch2_dev_journal_exit(ca);
+
+ free_percpu(ca->io_done);
+ bioset_exit(&ca->replica_set);
+ bch2_dev_buckets_free(ca);
+ free_page((unsigned long) ca->sb_read_scratch);
+
+ bch2_time_stats_exit(&ca->io_latency[WRITE]);
+ bch2_time_stats_exit(&ca->io_latency[READ]);
+
+ percpu_ref_exit(&ca->io_ref);
+ percpu_ref_exit(&ca->ref);
+ kobject_put(&ca->kobj);
+}
+
+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
+{
+
+ lockdep_assert_held(&c->state_lock);
+
+ if (percpu_ref_is_zero(&ca->io_ref))
+ return;
+
+ __bch2_dev_read_only(c, ca);
+
+ reinit_completion(&ca->io_ref_completion);
+ percpu_ref_kill(&ca->io_ref);
+ wait_for_completion(&ca->io_ref_completion);
+
+ if (ca->kobj.state_in_sysfs) {
+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
+ sysfs_remove_link(&ca->kobj, "block");
+ }
+
+ bch2_free_super(&ca->disk_sb);
+ bch2_dev_journal_exit(ca);
+}
+
+static void bch2_dev_ref_complete(struct percpu_ref *ref)
+{
+ struct bch_dev *ca = container_of(ref, struct bch_dev, ref);
+
+ complete(&ca->ref_completion);
+}
+
+static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
+{
+ struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
+
+ complete(&ca->io_ref_completion);
+}
+
+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
+{
+ int ret;
+
+ if (!c->kobj.state_in_sysfs)
+ return 0;
+
+ if (!ca->kobj.state_in_sysfs) {
+ ret = kobject_add(&ca->kobj, &c->kobj,
+ "dev-%u", ca->dev_idx);
+ if (ret)
+ return ret;
+ }
+
+ if (ca->disk_sb.bdev) {
+ struct kobject *block = bdev_kobj(ca->disk_sb.bdev);
+
+ ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
+ if (ret)
+ return ret;
+
+ ret = sysfs_create_link(&ca->kobj, block, "block");
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
+ struct bch_member *member)
+{
+ struct bch_dev *ca;
+ unsigned i;
+
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca)
+ return NULL;
+
+ kobject_init(&ca->kobj, &bch2_dev_ktype);
+ init_completion(&ca->ref_completion);
+ init_completion(&ca->io_ref_completion);
+
+ init_rwsem(&ca->bucket_lock);
+
+ INIT_WORK(&ca->io_error_work, bch2_io_error_work);
+
+ bch2_time_stats_init(&ca->io_latency[READ]);
+ bch2_time_stats_init(&ca->io_latency[WRITE]);
+
+ ca->mi = bch2_mi_to_cpu(member);
+
+ for (i = 0; i < ARRAY_SIZE(member->errors); i++)
+ atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));
+
+ ca->uuid = member->uuid;
+
+ ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
+ ca->mi.bucket_size / btree_sectors(c));
+
+ if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete,
+ 0, GFP_KERNEL) ||
+ percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+ !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
+ bch2_dev_buckets_alloc(c, ca) ||
+ bioset_init(&ca->replica_set, 4,
+ offsetof(struct bch_write_bio, bio), 0) ||
+ !(ca->io_done = alloc_percpu(*ca->io_done)))
+ goto err;
+
+ return ca;
+err:
+ bch2_dev_free(ca);
+ return NULL;
+}
+
+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca,
+ unsigned dev_idx)
+{
+ ca->dev_idx = dev_idx;
+ __set_bit(ca->dev_idx, ca->self.d);
+ scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx);
+
+ ca->fs = c;
+ rcu_assign_pointer(c->devs[ca->dev_idx], ca);
+
+ if (bch2_dev_sysfs_online(c, ca))
+ pr_warn("error creating sysfs objects");
+}
+
+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
+{
+ struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
+ struct bch_dev *ca = NULL;
+ int ret = 0;
+
+ if (bch2_fs_init_fault("dev_alloc"))
+ goto err;
+
+ ca = __bch2_dev_alloc(c, &member);
+ if (!ca)
+ goto err;
+
+ ca->fs = c;
+
+ bch2_dev_attach(c, ca, dev_idx);
+ return ret;
+err:
+ if (ca)
+ bch2_dev_free(ca);
+ return -BCH_ERR_ENOMEM_dev_alloc;
+}
+
+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
+{
+ unsigned ret;
+
+ if (bch2_dev_is_online(ca)) {
+ bch_err(ca, "already have device online in slot %u",
+ sb->sb->dev_idx);
+ return -BCH_ERR_device_already_online;
+ }
+
+ if (get_capacity(sb->bdev->bd_disk) <
+ ca->mi.bucket_size * ca->mi.nbuckets) {
+ bch_err(ca, "cannot online: device too small");
+ return -BCH_ERR_device_size_too_small;
+ }
+
+ BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
+
+ ret = bch2_dev_journal_init(ca, sb->sb);
+ if (ret)
+ return ret;
+
+ /* Commit: */
+ ca->disk_sb = *sb;
+ memset(sb, 0, sizeof(*sb));
+
+ ca->dev = ca->disk_sb.bdev->bd_dev;
+
+ percpu_ref_reinit(&ca->io_ref);
+
+ return 0;
+}
+
+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
+{
+ struct bch_dev *ca;
+ int ret;
+
+ lockdep_assert_held(&c->state_lock);
+
+ if (le64_to_cpu(sb->sb->seq) >
+ le64_to_cpu(c->disk_sb.sb->seq))
+ bch2_sb_to_fs(c, sb->sb);
+
+ BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices ||
+ !c->devs[sb->sb->dev_idx]);
+
+ ca = bch_dev_locked(c, sb->sb->dev_idx);
+
+ ret = __bch2_dev_attach_bdev(ca, sb);
+ if (ret)
+ return ret;
+
+ bch2_dev_sysfs_online(c, ca);
+
+ struct printbuf name = PRINTBUF;
+ prt_bdevname(&name, ca->disk_sb.bdev);
+
+ if (c->sb.nr_devices == 1)
+ strlcpy(c->name, name.buf, sizeof(c->name));
+ strlcpy(ca->name, name.buf, sizeof(ca->name));
+
+ printbuf_exit(&name);
+
+ rebalance_wakeup(c);
+ return 0;
+}
+
+/* Device management: */
+
+/*
+ * Note: this function is also used by the error paths - when a particular
+ * device sees an error, we call it to determine whether we can just set the
+ * device RO, or - if this function returns false - we'll set the whole
+ * filesystem RO:
+ *
+ * XXX: maybe we should be more explicit about whether we're changing state
+ * because we got an error or what have you?
+ */
+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
+ enum bch_member_state new_state, int flags)
+{
+ struct bch_devs_mask new_online_devs;
+ int nr_rw = 0, required;
+
+ lockdep_assert_held(&c->state_lock);
+
+ switch (new_state) {
+ case BCH_MEMBER_STATE_rw:
+ return true;
+ case BCH_MEMBER_STATE_ro:
+ if (ca->mi.state != BCH_MEMBER_STATE_rw)
+ return true;
+
+ /* do we have enough devices to write to? */
+ for_each_member_device(c, ca2)
+ if (ca2 != ca)
+ nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw;
+
+ required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
+ ? c->opts.metadata_replicas
+ : c->opts.metadata_replicas_required,
+ !(flags & BCH_FORCE_IF_DATA_DEGRADED)
+ ? c->opts.data_replicas
+ : c->opts.data_replicas_required);
+
+ return nr_rw >= required;
+ case BCH_MEMBER_STATE_failed:
+ case BCH_MEMBER_STATE_spare:
+ if (ca->mi.state != BCH_MEMBER_STATE_rw &&
+ ca->mi.state != BCH_MEMBER_STATE_ro)
+ return true;
+
+ /* do we have enough devices to read from? */
+ new_online_devs = bch2_online_devs(c);
+ __clear_bit(ca->dev_idx, new_online_devs.d);
+
+ return bch2_have_enough_devs(c, new_online_devs, flags, false);
+ default:
+ BUG();
+ }
+}
+
+static bool bch2_fs_may_start(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i, flags = 0;
+
+ if (c->opts.very_degraded)
+ flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
+
+ if (c->opts.degraded)
+ flags |= BCH_FORCE_IF_DEGRADED;
+
+ if (!c->opts.degraded &&
+ !c->opts.very_degraded) {
+ mutex_lock(&c->sb_lock);
+
+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
+ if (!bch2_dev_exists(c->disk_sb.sb, i))
+ continue;
+
+ ca = bch_dev_locked(c, i);
+
+ if (!bch2_dev_is_online(ca) &&
+ (ca->mi.state == BCH_MEMBER_STATE_rw ||
+ ca->mi.state == BCH_MEMBER_STATE_ro)) {
+ mutex_unlock(&c->sb_lock);
+ return false;
+ }
+ }
+ mutex_unlock(&c->sb_lock);
+ }
+
+ return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
+}
+
+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
+{
+ /*
+ * The allocator thread itself allocates btree nodes, so stop it first:
+ */
+ bch2_dev_allocator_remove(c, ca);
+ bch2_dev_journal_stop(&c->journal, ca);
+}
+
+static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
+{
+ lockdep_assert_held(&c->state_lock);
+
+ BUG_ON(ca->mi.state != BCH_MEMBER_STATE_rw);
+
+ bch2_dev_allocator_add(c, ca);
+ bch2_recalc_capacity(c);
+}
+
+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
+ enum bch_member_state new_state, int flags)
+{
+ struct bch_member *m;
+ int ret = 0;
+
+ if (ca->mi.state == new_state)
+ return 0;
+
+ if (!bch2_dev_state_allowed(c, ca, new_state, flags))
+ return -BCH_ERR_device_state_not_allowed;
+
+ if (new_state != BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_only(c, ca);
+
+ bch_notice(ca, "%s", bch2_member_states[new_state]);
+
+ mutex_lock(&c->sb_lock);
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_STATE(m, new_state);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ if (new_state == BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_write(c, ca);
+
+ rebalance_wakeup(c);
+
+ return ret;
+}
+
+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
+ enum bch_member_state new_state, int flags)
+{
+ int ret;
+
+ down_write(&c->state_lock);
+ ret = __bch2_dev_set_state(c, ca, new_state, flags);
+ up_write(&c->state_lock);
+
+ return ret;
+}
+
+/* Device add/removal: */
+
+static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
+{
+ struct bpos start = POS(ca->dev_idx, 0);
+ struct bpos end = POS(ca->dev_idx, U64_MAX);
+ int ret;
+
+ /*
+ * We clear the LRU and need_discard btrees first so that we don't race
+ * with bch2_do_invalidates() and bch2_do_discards()
+ */
+ ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
+ BTREE_TRIGGER_NORUN, NULL) ?:
+ bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
+ BTREE_TRIGGER_NORUN, NULL) ?:
+ bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
+ BTREE_TRIGGER_NORUN, NULL) ?:
+ bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
+ BTREE_TRIGGER_NORUN, NULL) ?:
+ bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
+ BTREE_TRIGGER_NORUN, NULL) ?:
+ bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
+ BTREE_TRIGGER_NORUN, NULL);
+ bch_err_msg(c, ret, "removing dev alloc info");
+ return ret;
+}
+
+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
+{
+ struct bch_member *m;
+ unsigned dev_idx = ca->dev_idx, data;
+ int ret;
+
+ down_write(&c->state_lock);
+
+ /*
+ * We consume a reference to ca->ref, regardless of whether we succeed
+ * or fail:
+ */
+ percpu_ref_put(&ca->ref);
+
+ if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
+ bch_err(ca, "Cannot remove without losing data");
+ ret = -BCH_ERR_device_state_not_allowed;
+ goto err;
+ }
+
+ __bch2_dev_read_only(c, ca);
+
+ ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
+ bch_err_msg(ca, ret, "dropping data");
+ if (ret)
+ goto err;
+
+ ret = bch2_dev_remove_alloc(c, ca);
+ bch_err_msg(ca, ret, "deleting alloc info");
+ if (ret)
+ goto err;
+
+ ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
+ bch_err_msg(ca, ret, "flushing journal");
+ if (ret)
+ goto err;
+
+ ret = bch2_journal_flush(&c->journal);
+ bch_err(ca, "journal error");
+ if (ret)
+ goto err;
+
+ ret = bch2_replicas_gc2(c);
+ bch_err_msg(ca, ret, "in replicas_gc2()");
+ if (ret)
+ goto err;
+
+ data = bch2_dev_has_data(c, ca);
+ if (data) {
+ struct printbuf data_has = PRINTBUF;
+
+ prt_bitflags(&data_has, bch2_data_types, data);
+ bch_err(ca, "Remove failed, still has data (%s)", data_has.buf);
+ printbuf_exit(&data_has);
+ ret = -EBUSY;
+ goto err;
+ }
+
+ __bch2_dev_offline(c, ca);
+
+ mutex_lock(&c->sb_lock);
+ rcu_assign_pointer(c->devs[ca->dev_idx], NULL);
+ mutex_unlock(&c->sb_lock);
+
+ percpu_ref_kill(&ca->ref);
+ wait_for_completion(&ca->ref_completion);
+
+ bch2_dev_free(ca);
+
+ /*
+ * At this point the device object has been removed in-core, but the
+ * on-disk journal might still refer to the device index via sb device
+ * usage entries. Recovery fails if it sees usage information for an
+ * invalid device. Flush journal pins to push the back of the journal
+ * past now invalid device index references before we update the
+ * superblock, but after the device object has been removed so any
+ * further journal writes elide usage info for the device.
+ */
+ bch2_journal_flush_all_pins(&c->journal);
+
+ /*
+ * Free this device's slot in the bch_member array - all pointers to
+ * this device must be gone:
+ */
+ mutex_lock(&c->sb_lock);
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
+ memset(&m->uuid, 0, sizeof(m->uuid));
+
+ bch2_write_super(c);
+
+ mutex_unlock(&c->sb_lock);
+ up_write(&c->state_lock);
+
+ bch2_dev_usage_journal_reserve(c);
+ return 0;
+err:
+ if (ca->mi.state == BCH_MEMBER_STATE_rw &&
+ !percpu_ref_is_zero(&ca->io_ref))
+ __bch2_dev_read_write(c, ca);
+ up_write(&c->state_lock);
+ return ret;
+}
+
+/* Add new device to running filesystem: */
+int bch2_dev_add(struct bch_fs *c, const char *path)
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_sb_handle sb;
+ struct bch_dev *ca = NULL;
+ struct bch_sb_field_members_v2 *mi;
+ struct bch_member dev_mi;
+ unsigned dev_idx, nr_devices, u64s;
+ struct printbuf errbuf = PRINTBUF;
+ struct printbuf label = PRINTBUF;
+ int ret;
+
+ ret = bch2_read_super(path, &opts, &sb);
+ bch_err_msg(c, ret, "reading super");
+ if (ret)
+ goto err;
+
+ dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx);
+
+ if (BCH_MEMBER_GROUP(&dev_mi)) {
+ bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1);
+ if (label.allocation_failure) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+
+ ret = bch2_dev_may_add(sb.sb, c);
+ if (ret)
+ goto err;
+
+ ca = __bch2_dev_alloc(c, &dev_mi);
+ if (!ca) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bch2_dev_usage_init(ca);
+
+ ret = __bch2_dev_attach_bdev(ca, &sb);
+ if (ret)
+ goto err;
+
+ ret = bch2_dev_journal_alloc(ca);
+ bch_err_msg(c, ret, "allocating journal");
+ if (ret)
+ goto err;
+
+ down_write(&c->state_lock);
+ mutex_lock(&c->sb_lock);
+
+ ret = bch2_sb_from_fs(c, ca);
+ bch_err_msg(c, ret, "setting up new superblock");
+ if (ret)
+ goto err_unlock;
+
+ if (dynamic_fault("bcachefs:add:no_slot"))
+ goto no_slot;
+
+ for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
+ if (!bch2_dev_exists(c->disk_sb.sb, dev_idx))
+ goto have_slot;
+no_slot:
+ ret = -BCH_ERR_ENOSPC_sb_members;
+ bch_err_msg(c, ret, "setting up new superblock");
+ goto err_unlock;
+
+have_slot:
+ nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
+
+ mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
+ le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
+
+ mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
+ if (!mi) {
+ ret = -BCH_ERR_ENOSPC_sb_members;
+ bch_err_msg(c, ret, "setting up new superblock");
+ goto err_unlock;
+ }
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
+
+ /* success: */
+
+ *m = dev_mi;
+ m->last_mount = cpu_to_le64(ktime_get_real_seconds());
+ c->disk_sb.sb->nr_devices = nr_devices;
+
+ ca->disk_sb.sb->dev_idx = dev_idx;
+ bch2_dev_attach(c, ca, dev_idx);
+
+ if (BCH_MEMBER_GROUP(&dev_mi)) {
+ ret = __bch2_dev_group_set(c, ca, label.buf);
+ bch_err_msg(c, ret, "creating new label");
+ if (ret)
+ goto err_unlock;
+ }
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ bch2_dev_usage_journal_reserve(c);
+
+ ret = bch2_trans_mark_dev_sb(c, ca);
+ bch_err_msg(ca, ret, "marking new superblock");
+ if (ret)
+ goto err_late;
+
+ ret = bch2_fs_freespace_init(c);
+ bch_err_msg(ca, ret, "initializing free space");
+ if (ret)
+ goto err_late;
+
+ ca->new_fs_bucket_idx = 0;
+
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_write(c, ca);
+
+ up_write(&c->state_lock);
+ return 0;
+
+err_unlock:
+ mutex_unlock(&c->sb_lock);
+ up_write(&c->state_lock);
+err:
+ if (ca)
+ bch2_dev_free(ca);
+ bch2_free_super(&sb);
+ printbuf_exit(&label);
+ printbuf_exit(&errbuf);
+ bch_err_fn(c, ret);
+ return ret;
+err_late:
+ up_write(&c->state_lock);
+ ca = NULL;
+ goto err;
+}
+
+/* Hot add existing device to running filesystem: */
+int bch2_dev_online(struct bch_fs *c, const char *path)
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_sb_handle sb = { NULL };
+ struct bch_dev *ca;
+ unsigned dev_idx;
+ int ret;
+
+ down_write(&c->state_lock);
+
+ ret = bch2_read_super(path, &opts, &sb);
+ if (ret) {
+ up_write(&c->state_lock);
+ return ret;
+ }
+
+ dev_idx = sb.sb->dev_idx;
+
+ ret = bch2_dev_in_fs(&c->disk_sb, &sb);
+ bch_err_msg(c, ret, "bringing %s online", path);
+ if (ret)
+ goto err;
+
+ ret = bch2_dev_attach_bdev(c, &sb);
+ if (ret)
+ goto err;
+
+ ca = bch_dev_locked(c, dev_idx);
+
+ ret = bch2_trans_mark_dev_sb(c, ca);
+ bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path);
+ if (ret)
+ goto err;
+
+ if (ca->mi.state == BCH_MEMBER_STATE_rw)
+ __bch2_dev_read_write(c, ca);
+
+ if (!ca->mi.freespace_initialized) {
+ ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
+ bch_err_msg(ca, ret, "initializing free space");
+ if (ret)
+ goto err;
+ }
+
+ if (!ca->journal.nr) {
+ ret = bch2_dev_journal_alloc(ca);
+ bch_err_msg(ca, ret, "allocating journal");
+ if (ret)
+ goto err;
+ }
+
+ mutex_lock(&c->sb_lock);
+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
+ cpu_to_le64(ktime_get_real_seconds());
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ up_write(&c->state_lock);
+ return 0;
+err:
+ up_write(&c->state_lock);
+ bch2_free_super(&sb);
+ return ret;
+}
+
+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
+{
+ down_write(&c->state_lock);
+
+ if (!bch2_dev_is_online(ca)) {
+ bch_err(ca, "Already offline");
+ up_write(&c->state_lock);
+ return 0;
+ }
+
+ if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) {
+ bch_err(ca, "Cannot offline required disk");
+ up_write(&c->state_lock);
+ return -BCH_ERR_device_state_not_allowed;
+ }
+
+ __bch2_dev_offline(c, ca);
+
+ up_write(&c->state_lock);
+ return 0;
+}
+
+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
+{
+ struct bch_member *m;
+ u64 old_nbuckets;
+ int ret = 0;
+
+ down_write(&c->state_lock);
+ old_nbuckets = ca->mi.nbuckets;
+
+ if (nbuckets < ca->mi.nbuckets) {
+ bch_err(ca, "Cannot shrink yet");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (bch2_dev_is_online(ca) &&
+ get_capacity(ca->disk_sb.bdev->bd_disk) <
+ ca->mi.bucket_size * nbuckets) {
+ bch_err(ca, "New size larger than device");
+ ret = -BCH_ERR_device_size_too_small;
+ goto err;
+ }
+
+ ret = bch2_dev_buckets_resize(c, ca, nbuckets);
+ bch_err_msg(ca, ret, "resizing buckets");
+ if (ret)
+ goto err;
+
+ ret = bch2_trans_mark_dev_sb(c, ca);
+ if (ret)
+ goto err;
+
+ mutex_lock(&c->sb_lock);
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ m->nbuckets = cpu_to_le64(nbuckets);
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ if (ca->mi.freespace_initialized) {
+ ret = bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
+ if (ret)
+ goto err;
+
+ /*
+ * XXX: this is all wrong transactionally - we'll be able to do
+ * this correctly after the disk space accounting rewrite
+ */
+ ca->usage_base->d[BCH_DATA_free].buckets += nbuckets - old_nbuckets;
+ }
+
+ bch2_recalc_capacity(c);
+err:
+ up_write(&c->state_lock);
+ return ret;
+}
+
+/* return with ref on ca->ref: */
+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
+{
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL)
+ if (!strcmp(name, ca->name)) {
+ rcu_read_unlock();
+ return ca;
+ }
+ rcu_read_unlock();
+ return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found);
+}
+
+/* Filesystem open: */
+
+static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r)
+{
+ return cmp_int(le64_to_cpu(l->seq), le64_to_cpu(r->seq)) ?:
+ cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time));
+}
+
+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
+ struct bch_opts opts)
+{
+ DARRAY(struct bch_sb_handle) sbs = { 0 };
+ struct bch_fs *c = NULL;
+ struct bch_sb_handle *best = NULL;
+ struct printbuf errbuf = PRINTBUF;
+ int ret = 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return ERR_PTR(-ENODEV);
+
+ if (!nr_devices) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = darray_make_room(&sbs, nr_devices);
+ if (ret)
+ goto err;
+
+ for (unsigned i = 0; i < nr_devices; i++) {
+ struct bch_sb_handle sb = { NULL };
+
+ ret = bch2_read_super(devices[i], &opts, &sb);
+ if (ret)
+ goto err;
+
+ BUG_ON(darray_push(&sbs, sb));
+ }
+
+ if (opts.nochanges && !opts.read_only) {
+ ret = -BCH_ERR_erofs_nochanges;
+ goto err_print;
+ }
+
+ darray_for_each(sbs, sb)
+ if (!best || sb_cmp(sb->sb, best->sb) > 0)
+ best = sb;
+
+ darray_for_each_reverse(sbs, sb) {
+ ret = bch2_dev_in_fs(best, sb);
+
+ if (ret == -BCH_ERR_device_has_been_removed ||
+ ret == -BCH_ERR_device_splitbrain) {
+ bch2_free_super(sb);
+ darray_remove_item(&sbs, sb);
+ best -= best > sb;
+ ret = 0;
+ continue;
+ }
+
+ if (ret)
+ goto err_print;
+ }
+
+ c = bch2_fs_alloc(best->sb, opts);
+ ret = PTR_ERR_OR_ZERO(c);
+ if (ret)
+ goto err;
+
+ down_write(&c->state_lock);
+ darray_for_each(sbs, sb) {
+ ret = bch2_dev_attach_bdev(c, sb);
+ if (ret) {
+ up_write(&c->state_lock);
+ goto err;
+ }
+ }
+ up_write(&c->state_lock);
+
+ if (!bch2_fs_may_start(c)) {
+ ret = -BCH_ERR_insufficient_devices_to_start;
+ goto err_print;
+ }
+
+ if (!c->opts.nostart) {
+ ret = bch2_fs_start(c);
+ if (ret)
+ goto err;
+ }
+out:
+ darray_for_each(sbs, sb)
+ bch2_free_super(sb);
+ darray_exit(&sbs);
+ printbuf_exit(&errbuf);
+ module_put(THIS_MODULE);
+ return c;
+err_print:
+ pr_err("bch_fs_open err opening %s: %s",
+ devices[0], bch2_err_str(ret));
+err:
+ if (!IS_ERR_OR_NULL(c))
+ bch2_fs_stop(c);
+ c = ERR_PTR(ret);
+ goto out;
+}
+
+/* Global interfaces/init */
+
+static void bcachefs_exit(void)
+{
+ bch2_debug_exit();
+ bch2_vfs_exit();
+ bch2_chardev_exit();
+ bch2_btree_key_cache_exit();
+ if (bcachefs_kset)
+ kset_unregister(bcachefs_kset);
+}
+
+static int __init bcachefs_init(void)
+{
+ bch2_bkey_pack_test();
+
+ if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
+ bch2_btree_key_cache_init() ||
+ bch2_chardev_init() ||
+ bch2_vfs_init() ||
+ bch2_debug_init())
+ goto err;
+
+ return 0;
+err:
+ bcachefs_exit();
+ return -ENOMEM;
+}
+
+#define BCH_DEBUG_PARAM(name, description) \
+ bool bch2_##name; \
+ module_param_named(name, bch2_##name, bool, 0644); \
+ MODULE_PARM_DESC(name, description);
+BCH_DEBUG_PARAMS()
+#undef BCH_DEBUG_PARAM
+
+__maybe_unused
+static unsigned bch2_metadata_version = bcachefs_metadata_version_current;
+module_param_named(version, bch2_metadata_version, uint, 0400);
+
+module_exit(bcachefs_exit);
+module_init(bcachefs_init);
diff --git a/c_src/libbcachefs/super.h b/c_src/libbcachefs/super.h
new file mode 100644
index 00000000..dada0933
--- /dev/null
+++ b/c_src/libbcachefs/super.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUPER_H
+#define _BCACHEFS_SUPER_H
+
+#include "extents.h"
+
+#include "bcachefs_ioctl.h"
+
+#include <linux/math64.h>
+
+extern const char * const bch2_fs_flag_strs[];
+
+struct bch_fs *bch2_dev_to_fs(dev_t);
+struct bch_fs *bch2_uuid_to_fs(__uuid_t);
+
+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
+ enum bch_member_state, int);
+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
+ enum bch_member_state, int);
+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *,
+ enum bch_member_state, int);
+
+int bch2_dev_fail(struct bch_dev *, int);
+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int);
+int bch2_dev_add(struct bch_fs *, const char *);
+int bch2_dev_online(struct bch_fs *, const char *);
+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int);
+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
+
+bool bch2_fs_emergency_read_only(struct bch_fs *);
+void bch2_fs_read_only(struct bch_fs *);
+
+int bch2_fs_read_write(struct bch_fs *);
+int bch2_fs_read_write_early(struct bch_fs *);
+
+/*
+ * Only for use in the recovery/fsck path:
+ */
+static inline void bch2_fs_lazy_rw(struct bch_fs *c)
+{
+ if (!test_bit(BCH_FS_rw, &c->flags) &&
+ !test_bit(BCH_FS_was_rw, &c->flags))
+ bch2_fs_read_write_early(c);
+}
+
+void __bch2_fs_stop(struct bch_fs *);
+void bch2_fs_free(struct bch_fs *);
+void bch2_fs_stop(struct bch_fs *);
+
+int bch2_fs_start(struct bch_fs *);
+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
+
+#endif /* _BCACHEFS_SUPER_H */
diff --git a/c_src/libbcachefs/super_types.h b/c_src/libbcachefs/super_types.h
new file mode 100644
index 00000000..87d159b9
--- /dev/null
+++ b/c_src/libbcachefs/super_types.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUPER_TYPES_H
+#define _BCACHEFS_SUPER_TYPES_H
+
+struct bch_sb_handle {
+ struct bch_sb *sb;
+ struct block_device *bdev;
+ char *sb_name;
+ struct bio *bio;
+ void *holder;
+ size_t buffer_size;
+ blk_mode_t mode;
+ unsigned have_layout:1;
+ unsigned have_bio:1;
+ unsigned fs_sb:1;
+ u64 seq;
+};
+
+struct bch_devs_mask {
+ unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
+};
+
+struct bch_devs_list {
+ u8 nr;
+ u8 data[BCH_BKEY_PTRS_MAX];
+};
+
+struct bch_member_cpu {
+ u64 nbuckets; /* device size */
+ u16 first_bucket; /* index of first bucket used */
+ u16 bucket_size; /* sectors */
+ u16 group;
+ u8 state;
+ u8 discard;
+ u8 data_allowed;
+ u8 durability;
+ u8 freespace_initialized;
+ u8 valid;
+};
+
+#endif /* _BCACHEFS_SUPER_TYPES_H */
diff --git a/c_src/libbcachefs/sysfs.c b/c_src/libbcachefs/sysfs.c
new file mode 100644
index 00000000..8ed52319
--- /dev/null
+++ b/c_src/libbcachefs/sysfs.c
@@ -0,0 +1,1026 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bcache sysfs interfaces
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#ifndef NO_BCACHEFS_SYSFS
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "sysfs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_iter.h"
+#include "btree_key_cache.h"
+#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "disk_groups.h"
+#include "ec.h"
+#include "inode.h"
+#include "journal.h"
+#include "keylist.h"
+#include "move.h"
+#include "movinggc.h"
+#include "nocow_locking.h"
+#include "opts.h"
+#include "rebalance.h"
+#include "replicas.h"
+#include "super-io.h"
+#include "tests.h"
+
+#include <linux/blkdev.h>
+#include <linux/sort.h>
+#include <linux/sched/clock.h>
+
+#include "util.h"
+
+#define SYSFS_OPS(type) \
+const struct sysfs_ops type ## _sysfs_ops = { \
+ .show = type ## _show, \
+ .store = type ## _store \
+}
+
+#define SHOW(fn) \
+static ssize_t fn ## _to_text(struct printbuf *, \
+ struct kobject *, struct attribute *); \
+ \
+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
+ char *buf) \
+{ \
+ struct printbuf out = PRINTBUF; \
+ ssize_t ret = fn ## _to_text(&out, kobj, attr); \
+ \
+ if (out.pos && out.buf[out.pos - 1] != '\n') \
+ prt_newline(&out); \
+ \
+ if (!ret && out.allocation_failure) \
+ ret = -ENOMEM; \
+ \
+ if (!ret) { \
+ ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \
+ memcpy(buf, out.buf, ret); \
+ } \
+ printbuf_exit(&out); \
+ return bch2_err_class(ret); \
+} \
+ \
+static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\
+ struct attribute *attr)
+
+#define STORE(fn) \
+static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\
+ const char *, size_t); \
+ \
+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
+ const char *buf, size_t size) \
+{ \
+ return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \
+} \
+ \
+static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\
+ const char *buf, size_t size)
+
+#define __sysfs_attribute(_name, _mode) \
+ static struct attribute sysfs_##_name = \
+ { .name = #_name, .mode = _mode }
+
+#define write_attribute(n) __sysfs_attribute(n, 0200)
+#define read_attribute(n) __sysfs_attribute(n, 0444)
+#define rw_attribute(n) __sysfs_attribute(n, 0644)
+
+#define sysfs_printf(file, fmt, ...) \
+do { \
+ if (attr == &sysfs_ ## file) \
+ prt_printf(out, fmt "\n", __VA_ARGS__); \
+} while (0)
+
+#define sysfs_print(file, var) \
+do { \
+ if (attr == &sysfs_ ## file) \
+ snprint(out, var); \
+} while (0)
+
+#define sysfs_hprint(file, val) \
+do { \
+ if (attr == &sysfs_ ## file) \
+ prt_human_readable_s64(out, val); \
+} while (0)
+
+#define sysfs_strtoul(file, var) \
+do { \
+ if (attr == &sysfs_ ## file) \
+ return strtoul_safe(buf, var) ?: (ssize_t) size; \
+} while (0)
+
+#define sysfs_strtoul_clamp(file, var, min, max) \
+do { \
+ if (attr == &sysfs_ ## file) \
+ return strtoul_safe_clamp(buf, var, min, max) \
+ ?: (ssize_t) size; \
+} while (0)
+
+#define strtoul_or_return(cp) \
+({ \
+ unsigned long _v; \
+ int _r = kstrtoul(cp, 10, &_v); \
+ if (_r) \
+ return _r; \
+ _v; \
+})
+
+write_attribute(trigger_gc);
+write_attribute(trigger_discards);
+write_attribute(trigger_invalidates);
+write_attribute(prune_cache);
+write_attribute(btree_wakeup);
+rw_attribute(btree_gc_periodic);
+rw_attribute(gc_gens_pos);
+
+read_attribute(uuid);
+read_attribute(minor);
+read_attribute(flags);
+read_attribute(bucket_size);
+read_attribute(first_bucket);
+read_attribute(nbuckets);
+rw_attribute(durability);
+read_attribute(io_done);
+read_attribute(io_errors);
+write_attribute(io_errors_reset);
+
+read_attribute(io_latency_read);
+read_attribute(io_latency_write);
+read_attribute(io_latency_stats_read);
+read_attribute(io_latency_stats_write);
+read_attribute(congested);
+
+read_attribute(btree_write_stats);
+
+read_attribute(btree_cache_size);
+read_attribute(compression_stats);
+read_attribute(journal_debug);
+read_attribute(btree_updates);
+read_attribute(btree_cache);
+read_attribute(btree_key_cache);
+read_attribute(stripes_heap);
+read_attribute(open_buckets);
+read_attribute(open_buckets_partial);
+read_attribute(write_points);
+read_attribute(nocow_lock_table);
+
+#ifdef BCH_WRITE_REF_DEBUG
+read_attribute(write_refs);
+
+static const char * const bch2_write_refs[] = {
+#define x(n) #n,
+ BCH_WRITE_REFS()
+#undef x
+ NULL
+};
+
+static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ bch2_printbuf_tabstop_push(out, 24);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) {
+ prt_str(out, bch2_write_refs[i]);
+ prt_tab(out);
+ prt_printf(out, "%li", atomic_long_read(&c->writes[i]));
+ prt_newline(out);
+ }
+}
+#endif
+
+read_attribute(internal_uuid);
+read_attribute(disk_groups);
+
+read_attribute(has_data);
+read_attribute(alloc_debug);
+
+#define x(t, n, ...) read_attribute(t);
+BCH_PERSISTENT_COUNTERS()
+#undef x
+
+rw_attribute(discard);
+rw_attribute(label);
+
+rw_attribute(copy_gc_enabled);
+read_attribute(copy_gc_wait);
+
+rw_attribute(rebalance_enabled);
+sysfs_pd_controller_attribute(rebalance);
+read_attribute(rebalance_status);
+rw_attribute(promote_whole_extents);
+
+read_attribute(new_stripes);
+
+read_attribute(io_timers_read);
+read_attribute(io_timers_write);
+
+read_attribute(moving_ctxts);
+
+#ifdef CONFIG_BCACHEFS_TESTS
+write_attribute(perf_test);
+#endif /* CONFIG_BCACHEFS_TESTS */
+
+#define x(_name) \
+ static struct attribute sysfs_time_stat_##_name = \
+ { .name = #_name, .mode = 0444 };
+ BCH_TIME_STATS()
+#undef x
+
+static struct attribute sysfs_state_rw = {
+ .name = "state",
+ .mode = 0444,
+};
+
+static size_t bch2_btree_cache_size(struct bch_fs *c)
+{
+ size_t ret = 0;
+ struct btree *b;
+
+ mutex_lock(&c->btree_cache.lock);
+ list_for_each_entry(b, &c->btree_cache.live, list)
+ ret += btree_bytes(c);
+
+ mutex_unlock(&c->btree_cache.lock);
+ return ret;
+}
+
+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ struct btree_trans *trans;
+ enum btree_id id;
+ struct compression_type_stats {
+ u64 nr_extents;
+ u64 sectors_compressed;
+ u64 sectors_uncompressed;
+ } s[BCH_COMPRESSION_TYPE_NR];
+ u64 compressed_incompressible = 0;
+ int ret = 0;
+
+ memset(s, 0, sizeof(s));
+
+ if (!test_bit(BCH_FS_started, &c->flags))
+ return -EPERM;
+
+ trans = bch2_trans_get(c);
+
+ for (id = 0; id < BTREE_ID_NR; id++) {
+ if (!btree_type_has_ptrs(id))
+ continue;
+
+ ret = for_each_btree_key(trans, iter, id, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ bool compressed = false, incompressible = false;
+
+ bkey_for_each_crc(k.k, ptrs, crc, entry) {
+ incompressible |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
+ compressed |= crc_is_compressed(crc);
+
+ if (crc_is_compressed(crc)) {
+ s[crc.compression_type].nr_extents++;
+ s[crc.compression_type].sectors_compressed += crc.compressed_size;
+ s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size;
+ }
+ }
+
+ compressed_incompressible += compressed && incompressible;
+
+ if (!compressed) {
+ unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0;
+
+ s[t].nr_extents++;
+ s[t].sectors_compressed += k.k->size;
+ s[t].sectors_uncompressed += k.k->size;
+ }
+ 0;
+ }));
+ }
+
+ bch2_trans_put(trans);
+
+ if (ret)
+ return ret;
+
+ prt_str(out, "type");
+ printbuf_tabstop_push(out, 12);
+ prt_tab(out);
+
+ prt_str(out, "compressed");
+ printbuf_tabstop_push(out, 16);
+ prt_tab_rjust(out);
+
+ prt_str(out, "uncompressed");
+ printbuf_tabstop_push(out, 16);
+ prt_tab_rjust(out);
+
+ prt_str(out, "average extent size");
+ printbuf_tabstop_push(out, 24);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
+ prt_str(out, bch2_compression_types[i]);
+ prt_tab(out);
+
+ prt_human_readable_u64(out, s[i].sectors_compressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, s[i].sectors_uncompressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, s[i].nr_extents
+ ? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents)
+ : 0);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ if (compressed_incompressible) {
+ prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible);
+ prt_newline(out);
+ }
+
+ return 0;
+}
+
+static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree));
+ bch2_bpos_to_text(out, c->gc_gens_pos);
+ prt_printf(out, "\n");
+}
+
+static void bch2_btree_wakeup_all(struct bch_fs *c)
+{
+ struct btree_trans *trans;
+
+ seqmutex_lock(&c->btree_trans_lock);
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
+
+ if (b)
+ six_lock_wakeup_all(&b->lock);
+
+ }
+ seqmutex_unlock(&c->btree_trans_lock);
+}
+
+SHOW(bch2_fs)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
+
+ sysfs_print(minor, c->minor);
+ sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
+
+ if (attr == &sysfs_flags)
+ prt_bitflags(out, bch2_fs_flag_strs, c->flags);
+
+ sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c));
+
+ if (attr == &sysfs_btree_write_stats)
+ bch2_btree_write_stats_to_text(out, c);
+
+ sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic);
+
+ if (attr == &sysfs_gc_gens_pos)
+ bch2_gc_gens_pos_to_text(out, c);
+
+ sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+
+ sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
+ sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
+
+ if (attr == &sysfs_copy_gc_wait)
+ bch2_copygc_wait_to_text(out, c);
+
+ if (attr == &sysfs_rebalance_status)
+ bch2_rebalance_status_to_text(out, c);
+
+ sysfs_print(promote_whole_extents, c->promote_whole_extents);
+
+ /* Debugging: */
+
+ if (attr == &sysfs_journal_debug)
+ bch2_journal_debug_to_text(out, &c->journal);
+
+ if (attr == &sysfs_btree_updates)
+ bch2_btree_updates_to_text(out, c);
+
+ if (attr == &sysfs_btree_cache)
+ bch2_btree_cache_to_text(out, c);
+
+ if (attr == &sysfs_btree_key_cache)
+ bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
+
+ if (attr == &sysfs_stripes_heap)
+ bch2_stripes_heap_to_text(out, c);
+
+ if (attr == &sysfs_open_buckets)
+ bch2_open_buckets_to_text(out, c);
+
+ if (attr == &sysfs_open_buckets_partial)
+ bch2_open_buckets_partial_to_text(out, c);
+
+ if (attr == &sysfs_write_points)
+ bch2_write_points_to_text(out, c);
+
+ if (attr == &sysfs_compression_stats)
+ bch2_compression_stats_to_text(out, c);
+
+ if (attr == &sysfs_new_stripes)
+ bch2_new_stripes_to_text(out, c);
+
+ if (attr == &sysfs_io_timers_read)
+ bch2_io_timers_to_text(out, &c->io_clock[READ]);
+
+ if (attr == &sysfs_io_timers_write)
+ bch2_io_timers_to_text(out, &c->io_clock[WRITE]);
+
+ if (attr == &sysfs_moving_ctxts)
+ bch2_fs_moving_ctxts_to_text(out, c);
+
+#ifdef BCH_WRITE_REF_DEBUG
+ if (attr == &sysfs_write_refs)
+ bch2_write_refs_to_text(out, c);
+#endif
+
+ if (attr == &sysfs_nocow_lock_table)
+ bch2_nocow_locks_to_text(out, &c->nocow_locks);
+
+ if (attr == &sysfs_disk_groups)
+ bch2_disk_groups_to_text(out, c);
+
+ return 0;
+}
+
+STORE(bch2_fs)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
+
+ if (attr == &sysfs_btree_gc_periodic) {
+ ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
+ ?: (ssize_t) size;
+
+ wake_up_process(c->gc_thread);
+ return ret;
+ }
+
+ if (attr == &sysfs_copy_gc_enabled) {
+ ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
+ ?: (ssize_t) size;
+
+ if (c->copygc_thread)
+ wake_up_process(c->copygc_thread);
+ return ret;
+ }
+
+ if (attr == &sysfs_rebalance_enabled) {
+ ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
+ ?: (ssize_t) size;
+
+ rebalance_wakeup(c);
+ return ret;
+ }
+
+ sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
+
+ sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
+
+ /* Debugging: */
+
+ if (!test_bit(BCH_FS_started, &c->flags))
+ return -EPERM;
+
+ /* Debugging: */
+
+ if (!test_bit(BCH_FS_rw, &c->flags))
+ return -EROFS;
+
+ if (attr == &sysfs_prune_cache) {
+ struct shrink_control sc;
+
+ sc.gfp_mask = GFP_KERNEL;
+ sc.nr_to_scan = strtoul_or_return(buf);
+ c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
+ }
+
+ if (attr == &sysfs_btree_wakeup)
+ bch2_btree_wakeup_all(c);
+
+ if (attr == &sysfs_trigger_gc) {
+ /*
+ * Full gc is currently incompatible with btree key cache:
+ */
+#if 0
+ down_read(&c->state_lock);
+ bch2_gc(c, false, false);
+ up_read(&c->state_lock);
+#else
+ bch2_gc_gens(c);
+#endif
+ }
+
+ if (attr == &sysfs_trigger_discards)
+ bch2_do_discards(c);
+
+ if (attr == &sysfs_trigger_invalidates)
+ bch2_do_invalidates(c);
+
+#ifdef CONFIG_BCACHEFS_TESTS
+ if (attr == &sysfs_perf_test) {
+ char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
+ char *test = strsep(&p, " \t\n");
+ char *nr_str = strsep(&p, " \t\n");
+ char *threads_str = strsep(&p, " \t\n");
+ unsigned threads;
+ u64 nr;
+ int ret = -EINVAL;
+
+ if (threads_str &&
+ !(ret = kstrtouint(threads_str, 10, &threads)) &&
+ !(ret = bch2_strtoull_h(nr_str, &nr)))
+ ret = bch2_btree_perf_test(c, test, nr, threads);
+ kfree(tmp);
+
+ if (ret)
+ size = ret;
+ }
+#endif
+ return size;
+}
+SYSFS_OPS(bch2_fs);
+
+struct attribute *bch2_fs_files[] = {
+ &sysfs_minor,
+ &sysfs_btree_cache_size,
+ &sysfs_btree_write_stats,
+
+ &sysfs_promote_whole_extents,
+
+ &sysfs_compression_stats,
+
+#ifdef CONFIG_BCACHEFS_TESTS
+ &sysfs_perf_test,
+#endif
+ NULL
+};
+
+/* counters dir */
+
+SHOW(bch2_fs_counters)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, counters_kobj);
+ u64 counter = 0;
+ u64 counter_since_mount = 0;
+
+ printbuf_tabstop_push(out, 32);
+
+ #define x(t, ...) \
+ if (attr == &sysfs_##t) { \
+ counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
+ counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
+ prt_printf(out, "since mount:"); \
+ prt_tab(out); \
+ prt_human_readable_u64(out, counter_since_mount); \
+ prt_newline(out); \
+ \
+ prt_printf(out, "since filesystem creation:"); \
+ prt_tab(out); \
+ prt_human_readable_u64(out, counter); \
+ prt_newline(out); \
+ }
+ BCH_PERSISTENT_COUNTERS()
+ #undef x
+ return 0;
+}
+
+STORE(bch2_fs_counters) {
+ return 0;
+}
+
+SYSFS_OPS(bch2_fs_counters);
+
+struct attribute *bch2_fs_counters_files[] = {
+#define x(t, ...) \
+ &sysfs_##t,
+ BCH_PERSISTENT_COUNTERS()
+#undef x
+ NULL
+};
+/* internal dir - just a wrapper */
+
+SHOW(bch2_fs_internal)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
+
+ return bch2_fs_to_text(out, &c->kobj, attr);
+}
+
+STORE(bch2_fs_internal)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
+
+ return bch2_fs_store(&c->kobj, attr, buf, size);
+}
+SYSFS_OPS(bch2_fs_internal);
+
+struct attribute *bch2_fs_internal_files[] = {
+ &sysfs_flags,
+ &sysfs_journal_debug,
+ &sysfs_btree_updates,
+ &sysfs_btree_cache,
+ &sysfs_btree_key_cache,
+ &sysfs_new_stripes,
+ &sysfs_stripes_heap,
+ &sysfs_open_buckets,
+ &sysfs_open_buckets_partial,
+ &sysfs_write_points,
+#ifdef BCH_WRITE_REF_DEBUG
+ &sysfs_write_refs,
+#endif
+ &sysfs_nocow_lock_table,
+ &sysfs_io_timers_read,
+ &sysfs_io_timers_write,
+
+ &sysfs_trigger_gc,
+ &sysfs_trigger_discards,
+ &sysfs_trigger_invalidates,
+ &sysfs_prune_cache,
+ &sysfs_btree_wakeup,
+
+ &sysfs_gc_gens_pos,
+
+ &sysfs_copy_gc_enabled,
+ &sysfs_copy_gc_wait,
+
+ &sysfs_rebalance_enabled,
+ &sysfs_rebalance_status,
+ sysfs_pd_controller_files(rebalance),
+
+ &sysfs_moving_ctxts,
+
+ &sysfs_internal_uuid,
+
+ &sysfs_disk_groups,
+ NULL
+};
+
+/* options */
+
+SHOW(bch2_fs_opts_dir)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+ const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+ int id = opt - bch2_opt_table;
+ u64 v = bch2_opt_get_by_id(&c->opts, id);
+
+ bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST);
+ prt_char(out, '\n');
+
+ return 0;
+}
+
+STORE(bch2_fs_opts_dir)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
+ const struct bch_option *opt = container_of(attr, struct bch_option, attr);
+ int ret, id = opt - bch2_opt_table;
+ char *tmp;
+ u64 v;
+
+ /*
+ * We don't need to take c->writes for correctness, but it eliminates an
+ * unsightly error message in the dmesg log when we're RO:
+ */
+ if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
+ return -EROFS;
+
+ tmp = kstrdup(buf, GFP_KERNEL);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL);
+ kfree(tmp);
+
+ if (ret < 0)
+ goto err;
+
+ ret = bch2_opt_check_may_set(c, id, v);
+ if (ret < 0)
+ goto err;
+
+ bch2_opt_set_sb(c, opt, v);
+ bch2_opt_set_by_id(&c->opts, id, v);
+
+ if ((id == Opt_background_target ||
+ id == Opt_background_compression) && v)
+ bch2_set_rebalance_needs_scan(c, 0);
+
+ ret = size;
+err:
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
+ return ret;
+}
+SYSFS_OPS(bch2_fs_opts_dir);
+
+struct attribute *bch2_fs_opts_dir_files[] = { NULL };
+
+int bch2_opts_create_sysfs_files(struct kobject *kobj)
+{
+ const struct bch_option *i;
+ int ret;
+
+ for (i = bch2_opt_table;
+ i < bch2_opt_table + bch2_opts_nr;
+ i++) {
+ if (!(i->flags & OPT_FS))
+ continue;
+
+ ret = sysfs_create_file(kobj, &i->attr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* time stats */
+
+SHOW(bch2_fs_time_stats)
+{
+ struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
+
+#define x(name) \
+ if (attr == &sysfs_time_stat_##name) \
+ bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]);
+ BCH_TIME_STATS()
+#undef x
+
+ return 0;
+}
+
+STORE(bch2_fs_time_stats)
+{
+ return size;
+}
+SYSFS_OPS(bch2_fs_time_stats);
+
+struct attribute *bch2_fs_time_stats_files[] = {
+#define x(name) \
+ &sysfs_time_stat_##name,
+ BCH_TIME_STATS()
+#undef x
+ NULL
+};
+
+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+ struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+ unsigned i, nr[BCH_DATA_NR];
+
+ memset(nr, 0, sizeof(nr));
+
+ for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
+ nr[c->open_buckets[i].data_type]++;
+
+ printbuf_tabstop_push(out, 8);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+
+ bch2_dev_usage_to_text(out, &stats);
+
+ prt_newline(out);
+
+ prt_printf(out, "reserves:");
+ prt_newline(out);
+ for (i = 0; i < BCH_WATERMARK_NR; i++) {
+ prt_str(out, bch2_watermarks[i]);
+ prt_tab(out);
+ prt_u64(out, bch2_dev_buckets_reserved(ca, i));
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 24);
+
+ prt_str(out, "freelist_wait");
+ prt_tab(out);
+ prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open buckets allocated");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
+ prt_newline(out);
+
+ prt_str(out, "open buckets this dev");
+ prt_tab(out);
+ prt_u64(out, ca->nr_open_buckets);
+ prt_newline(out);
+
+ prt_str(out, "open buckets total");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_wait");
+ prt_tab(out);
+ prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_btree");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_btree]);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_user");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_user]);
+ prt_newline(out);
+
+ prt_str(out, "buckets_to_invalidate");
+ prt_tab(out);
+ prt_u64(out, should_invalidate_buckets(ca, stats));
+ prt_newline(out);
+
+ prt_str(out, "btree reserve cache");
+ prt_tab(out);
+ prt_u64(out, c->btree_reserve_cache_nr);
+ prt_newline(out);
+}
+
+static const char * const bch2_rw[] = {
+ "read",
+ "write",
+ NULL
+};
+
+static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+ int rw, i;
+
+ for (rw = 0; rw < 2; rw++) {
+ prt_printf(out, "%s:\n", bch2_rw[rw]);
+
+ for (i = 1; i < BCH_DATA_NR; i++)
+ prt_printf(out, "%-12s:%12llu\n",
+ bch2_data_types[i],
+ percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
+ }
+}
+
+SHOW(bch2_dev)
+{
+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+ struct bch_fs *c = ca->fs;
+
+ sysfs_printf(uuid, "%pU\n", ca->uuid.b);
+
+ sysfs_print(bucket_size, bucket_bytes(ca));
+ sysfs_print(first_bucket, ca->mi.first_bucket);
+ sysfs_print(nbuckets, ca->mi.nbuckets);
+ sysfs_print(durability, ca->mi.durability);
+ sysfs_print(discard, ca->mi.discard);
+
+ if (attr == &sysfs_label) {
+ if (ca->mi.group)
+ bch2_disk_path_to_text(out, c, ca->mi.group - 1);
+ prt_char(out, '\n');
+ }
+
+ if (attr == &sysfs_has_data) {
+ prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca));
+ prt_char(out, '\n');
+ }
+
+ if (attr == &sysfs_state_rw) {
+ prt_string_option(out, bch2_member_states, ca->mi.state);
+ prt_char(out, '\n');
+ }
+
+ if (attr == &sysfs_io_done)
+ dev_io_done_to_text(out, ca);
+
+ if (attr == &sysfs_io_errors)
+ bch2_dev_io_errors_to_text(out, ca);
+
+ sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ]));
+ sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE]));
+
+ if (attr == &sysfs_io_latency_stats_read)
+ bch2_time_stats_to_text(out, &ca->io_latency[READ]);
+
+ if (attr == &sysfs_io_latency_stats_write)
+ bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
+
+ sysfs_printf(congested, "%u%%",
+ clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+ * 100 / CONGESTED_MAX);
+
+ if (attr == &sysfs_alloc_debug)
+ dev_alloc_debug_to_text(out, ca);
+
+ return 0;
+}
+
+STORE(bch2_dev)
+{
+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
+ struct bch_fs *c = ca->fs;
+ struct bch_member *mi;
+
+ if (attr == &sysfs_discard) {
+ bool v = strtoul_or_return(buf);
+
+ mutex_lock(&c->sb_lock);
+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+
+ if (v != BCH_MEMBER_DISCARD(mi)) {
+ SET_BCH_MEMBER_DISCARD(mi, v);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+ }
+
+ if (attr == &sysfs_durability) {
+ u64 v = strtoul_or_return(buf);
+
+ mutex_lock(&c->sb_lock);
+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+
+ if (v + 1 != BCH_MEMBER_DURABILITY(mi)) {
+ SET_BCH_MEMBER_DURABILITY(mi, v + 1);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+ }
+
+ if (attr == &sysfs_label) {
+ char *tmp;
+ int ret;
+
+ tmp = kstrdup(buf, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ ret = bch2_dev_group_set(c, ca, strim(tmp));
+ kfree(tmp);
+ if (ret)
+ return ret;
+ }
+
+ if (attr == &sysfs_io_errors_reset)
+ bch2_dev_errors_reset(ca);
+
+ return size;
+}
+SYSFS_OPS(bch2_dev);
+
+struct attribute *bch2_dev_files[] = {
+ &sysfs_uuid,
+ &sysfs_bucket_size,
+ &sysfs_first_bucket,
+ &sysfs_nbuckets,
+ &sysfs_durability,
+
+ /* settings: */
+ &sysfs_discard,
+ &sysfs_state_rw,
+ &sysfs_label,
+
+ &sysfs_has_data,
+ &sysfs_io_done,
+ &sysfs_io_errors,
+ &sysfs_io_errors_reset,
+
+ &sysfs_io_latency_read,
+ &sysfs_io_latency_write,
+ &sysfs_io_latency_stats_read,
+ &sysfs_io_latency_stats_write,
+ &sysfs_congested,
+
+ /* debug: */
+ &sysfs_alloc_debug,
+ NULL
+};
+
+#endif /* _BCACHEFS_SYSFS_H_ */
diff --git a/c_src/libbcachefs/sysfs.h b/c_src/libbcachefs/sysfs.h
new file mode 100644
index 00000000..222cd506
--- /dev/null
+++ b/c_src/libbcachefs/sysfs.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SYSFS_H_
+#define _BCACHEFS_SYSFS_H_
+
+#include <linux/sysfs.h>
+
+#ifndef NO_BCACHEFS_SYSFS
+
+struct attribute;
+struct sysfs_ops;
+
+extern struct attribute *bch2_fs_files[];
+extern struct attribute *bch2_fs_counters_files[];
+extern struct attribute *bch2_fs_internal_files[];
+extern struct attribute *bch2_fs_opts_dir_files[];
+extern struct attribute *bch2_fs_time_stats_files[];
+extern struct attribute *bch2_dev_files[];
+
+extern const struct sysfs_ops bch2_fs_sysfs_ops;
+extern const struct sysfs_ops bch2_fs_counters_sysfs_ops;
+extern const struct sysfs_ops bch2_fs_internal_sysfs_ops;
+extern const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
+extern const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
+extern const struct sysfs_ops bch2_dev_sysfs_ops;
+
+int bch2_opts_create_sysfs_files(struct kobject *);
+
+#else
+
+static struct attribute *bch2_fs_files[] = {};
+static struct attribute *bch2_fs_counters_files[] = {};
+static struct attribute *bch2_fs_internal_files[] = {};
+static struct attribute *bch2_fs_opts_dir_files[] = {};
+static struct attribute *bch2_fs_time_stats_files[] = {};
+static struct attribute *bch2_dev_files[] = {};
+
+static const struct sysfs_ops bch2_fs_sysfs_ops;
+static const struct sysfs_ops bch2_fs_counters_sysfs_ops;
+static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
+static const struct sysfs_ops bch2_dev_sysfs_ops;
+
+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; }
+
+#endif /* NO_BCACHEFS_SYSFS */
+
+#endif /* _BCACHEFS_SYSFS_H_ */
diff --git a/c_src/libbcachefs/tests.c b/c_src/libbcachefs/tests.c
new file mode 100644
index 00000000..b3fe9fc5
--- /dev/null
+++ b/c_src/libbcachefs/tests.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef CONFIG_BCACHEFS_TESTS
+
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "journal_reclaim.h"
+#include "snapshot.h"
+#include "tests.h"
+
+#include "linux/kthread.h"
+#include "linux/random.h"
+
+static void delete_test_keys(struct bch_fs *c)
+{
+ int ret;
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX),
+ POS(0, U64_MAX),
+ 0, NULL);
+ BUG_ON(ret);
+
+ ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ POS(0, U64_MAX),
+ 0, NULL);
+ BUG_ON(ret);
+}
+
+/* unit tests */
+
+static int test_delete(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_i_cookie k;
+ int ret;
+
+ bkey_cookie_init(&k.k_i);
+ k.k.p.snapshot = U32_MAX;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p,
+ BTREE_ITER_INTENT);
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, &k.k_i, 0));
+ bch_err_msg(c, ret, "update error");
+ if (ret)
+ goto err;
+
+ pr_info("deleting once");
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_btree_delete_at(trans, &iter, 0));
+ bch_err_msg(c, ret, "delete error (first)");
+ if (ret)
+ goto err;
+
+ pr_info("deleting twice");
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_btree_delete_at(trans, &iter, 0));
+ bch_err_msg(c, ret, "delete error (second)");
+ if (ret)
+ goto err;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int test_delete_written(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_i_cookie k;
+ int ret;
+
+ bkey_cookie_init(&k.k_i);
+ k.k.p.snapshot = U32_MAX;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p,
+ BTREE_ITER_INTENT);
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, &k.k_i, 0));
+ bch_err_msg(c, ret, "update error");
+ if (ret)
+ goto err;
+
+ bch2_trans_unlock(trans);
+ bch2_journal_flush_all_pins(&c->journal);
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_btree_delete_at(trans, &iter, 0));
+ bch_err_msg(c, ret, "delete error");
+ if (ret)
+ goto err;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int test_iterate(struct bch_fs *c, u64 nr)
+{
+ u64 i;
+ int ret = 0;
+
+ delete_test_keys(c);
+
+ pr_info("inserting test keys");
+
+ for (i = 0; i < nr; i++) {
+ struct bkey_i_cookie ck;
+
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i;
+ ck.k.p.snapshot = U32_MAX;
+
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0);
+ bch_err_msg(c, ret, "insert error");
+ if (ret)
+ return ret;
+ }
+
+ pr_info("iterating forwards");
+ i = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
+ BUG_ON(k.k->p.offset != i++);
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating forwards");
+ if (ret)
+ return ret;
+
+ BUG_ON(i != nr);
+
+ pr_info("iterating backwards");
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, U64_MAX, U32_MAX), 0, k, ({
+ BUG_ON(k.k->p.offset != --i);
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating backwards");
+ if (ret)
+ return ret;
+
+ BUG_ON(i);
+ return 0;
+}
+
+static int test_iterate_extents(struct bch_fs *c, u64 nr)
+{
+ u64 i;
+ int ret = 0;
+
+ delete_test_keys(c);
+
+ pr_info("inserting test extents");
+
+ for (i = 0; i < nr; i += 8) {
+ struct bkey_i_cookie ck;
+
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i + 8;
+ ck.k.p.snapshot = U32_MAX;
+ ck.k.size = 8;
+
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0);
+ bch_err_msg(c, ret, "insert error");
+ if (ret)
+ return ret;
+ }
+
+ pr_info("iterating forwards");
+ i = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
+ BUG_ON(bkey_start_offset(k.k) != i);
+ i = k.k->p.offset;
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating forwards");
+ if (ret)
+ return ret;
+
+ BUG_ON(i != nr);
+
+ pr_info("iterating backwards");
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_reverse(trans, iter, BTREE_ID_extents,
+ SPOS(0, U64_MAX, U32_MAX), 0, k, ({
+ BUG_ON(k.k->p.offset != i);
+ i = bkey_start_offset(k.k);
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating backwards");
+ if (ret)
+ return ret;
+
+ BUG_ON(i);
+ return 0;
+}
+
+static int test_iterate_slots(struct bch_fs *c, u64 nr)
+{
+ u64 i;
+ int ret = 0;
+
+ delete_test_keys(c);
+
+ pr_info("inserting test keys");
+
+ for (i = 0; i < nr; i++) {
+ struct bkey_i_cookie ck;
+
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i * 2;
+ ck.k.p.snapshot = U32_MAX;
+
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0);
+ bch_err_msg(c, ret, "insert error");
+ if (ret)
+ return ret;
+ }
+
+ pr_info("iterating forwards");
+ i = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
+ BUG_ON(k.k->p.offset != i);
+ i += 2;
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating forwards");
+ if (ret)
+ return ret;
+
+ BUG_ON(i != nr * 2);
+
+ pr_info("iterating forwards by slots");
+ i = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ BTREE_ITER_SLOTS, k, ({
+ if (i >= nr * 2)
+ break;
+
+ BUG_ON(k.k->p.offset != i);
+ BUG_ON(bkey_deleted(k.k) != (i & 1));
+
+ i++;
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating forwards by slots");
+ return ret;
+}
+
+static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
+{
+ u64 i;
+ int ret = 0;
+
+ delete_test_keys(c);
+
+ pr_info("inserting test keys");
+
+ for (i = 0; i < nr; i += 16) {
+ struct bkey_i_cookie ck;
+
+ bkey_cookie_init(&ck.k_i);
+ ck.k.p.offset = i + 16;
+ ck.k.p.snapshot = U32_MAX;
+ ck.k.size = 8;
+
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0);
+ bch_err_msg(c, ret, "insert error");
+ if (ret)
+ return ret;
+ }
+
+ pr_info("iterating forwards");
+ i = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k, ({
+ BUG_ON(bkey_start_offset(k.k) != i + 8);
+ BUG_ON(k.k->size != 8);
+ i += 16;
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating forwards");
+ if (ret)
+ return ret;
+
+ BUG_ON(i != nr);
+
+ pr_info("iterating forwards by slots");
+ i = 0;
+
+ ret = bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ BTREE_ITER_SLOTS, k, ({
+ if (i == nr)
+ break;
+ BUG_ON(bkey_deleted(k.k) != !(i % 16));
+
+ BUG_ON(bkey_start_offset(k.k) != i);
+ BUG_ON(k.k->size != 8);
+ i = k.k->p.offset;
+ 0;
+ })));
+ bch_err_msg(c, ret, "error iterating forwards by slots");
+ return ret;
+}
+
+/*
+ * XXX: we really want to make sure we've got a btree with depth > 0 for these
+ * tests
+ */
+static int test_peek_end(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), 0);
+
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ BUG_ON(k.k);
+
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ BUG_ON(k.k);
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return 0;
+}
+
+static int test_peek_end_extents(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), 0);
+
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ BUG_ON(k.k);
+
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+ BUG_ON(k.k);
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return 0;
+}
+
+/* extent unit tests */
+
+static u64 test_version;
+
+static int insert_test_extent(struct bch_fs *c,
+ u64 start, u64 end)
+{
+ struct bkey_i_cookie k;
+ int ret;
+
+ bkey_cookie_init(&k.k_i);
+ k.k_i.k.p.offset = end;
+ k.k_i.k.p.snapshot = U32_MAX;
+ k.k_i.k.size = end - start;
+ k.k_i.k.version.lo = test_version++;
+
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __test_extent_overwrite(struct bch_fs *c,
+ u64 e1_start, u64 e1_end,
+ u64 e2_start, u64 e2_end)
+{
+ int ret;
+
+ ret = insert_test_extent(c, e1_start, e1_end) ?:
+ insert_test_extent(c, e2_start, e2_end);
+
+ delete_test_keys(c);
+ return ret;
+}
+
+static int test_extent_overwrite_front(struct bch_fs *c, u64 nr)
+{
+ return __test_extent_overwrite(c, 0, 64, 0, 32) ?:
+ __test_extent_overwrite(c, 8, 64, 0, 32);
+}
+
+static int test_extent_overwrite_back(struct bch_fs *c, u64 nr)
+{
+ return __test_extent_overwrite(c, 0, 64, 32, 64) ?:
+ __test_extent_overwrite(c, 0, 64, 32, 72);
+}
+
+static int test_extent_overwrite_middle(struct bch_fs *c, u64 nr)
+{
+ return __test_extent_overwrite(c, 0, 64, 32, 40);
+}
+
+static int test_extent_overwrite_all(struct bch_fs *c, u64 nr)
+{
+ return __test_extent_overwrite(c, 32, 64, 0, 64) ?:
+ __test_extent_overwrite(c, 32, 64, 0, 128) ?:
+ __test_extent_overwrite(c, 32, 64, 32, 64) ?:
+ __test_extent_overwrite(c, 32, 64, 32, 128);
+}
+
+static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid)
+{
+ struct bkey_i_cookie k;
+ int ret;
+
+ bkey_cookie_init(&k.k_i);
+ k.k_i.k.p.inode = inum;
+ k.k_i.k.p.offset = start + len;
+ k.k_i.k.p.snapshot = snapid;
+ k.k_i.k.size = len;
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
+{
+ return insert_test_overlapping_extent(c, inum, 0, 16, U32_MAX - 2) ?: /* overwrite entire */
+ insert_test_overlapping_extent(c, inum, 2, 8, U32_MAX - 2) ?:
+ insert_test_overlapping_extent(c, inum, 4, 4, U32_MAX) ?:
+ insert_test_overlapping_extent(c, inum, 32, 8, U32_MAX - 2) ?: /* overwrite front/back */
+ insert_test_overlapping_extent(c, inum, 36, 8, U32_MAX) ?:
+ insert_test_overlapping_extent(c, inum, 60, 8, U32_MAX - 2) ?:
+ insert_test_overlapping_extent(c, inum, 64, 8, U32_MAX);
+}
+
+/* snapshot unit tests */
+
+/* Test skipping over keys in unrelated snapshots: */
+static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
+{
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_i_cookie cookie;
+ int ret;
+
+ bkey_cookie_init(&cookie.k_i);
+ cookie.k.p.snapshot = snapid_hi;
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0);
+ if (ret)
+ return ret;
+
+ trans = bch2_trans_get(c);
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
+ SPOS(0, 0, snapid_lo), 0);
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX))));
+
+ BUG_ON(k.k->p.snapshot != U32_MAX);
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int test_snapshots(struct bch_fs *c, u64 nr)
+{
+ struct bkey_i_cookie cookie;
+ u32 snapids[2];
+ u32 snapid_subvols[2] = { 1, 1 };
+ int ret;
+
+ bkey_cookie_init(&cookie.k_i);
+ cookie.k.p.snapshot = U32_MAX;
+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0);
+ if (ret)
+ return ret;
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_snapshot_node_create(trans, U32_MAX,
+ snapids,
+ snapid_subvols,
+ 2));
+ if (ret)
+ return ret;
+
+ if (snapids[0] > snapids[1])
+ swap(snapids[0], snapids[1]);
+
+ ret = test_snapshot_filter(c, snapids[0], snapids[1]);
+ bch_err_msg(c, ret, "from test_snapshot_filter");
+ return ret;
+}
+
+/* perf tests */
+
+static u64 test_rand(void)
+{
+ u64 v;
+
+ get_random_bytes(&v, sizeof(v));
+ return v;
+}
+
+static int rand_insert(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bkey_i_cookie k;
+ int ret = 0;
+ u64 i;
+
+ for (i = 0; i < nr; i++) {
+ bkey_cookie_init(&k.k_i);
+ k.k.p.offset = test_rand();
+ k.k.p.snapshot = U32_MAX;
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0));
+ if (ret)
+ break;
+ }
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int rand_insert_multi(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct bkey_i_cookie k[8];
+ int ret = 0;
+ unsigned j;
+ u64 i;
+
+ for (i = 0; i < nr; i += ARRAY_SIZE(k)) {
+ for (j = 0; j < ARRAY_SIZE(k); j++) {
+ bkey_cookie_init(&k[j].k_i);
+ k[j].k.p.offset = test_rand();
+ k[j].k.p.snapshot = U32_MAX;
+ }
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?:
+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0));
+ if (ret)
+ break;
+ }
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int rand_lookup(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+ u64 i;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), 0);
+
+ for (i = 0; i < nr; i++) {
+ bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX));
+
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
+ ret = bkey_err(k);
+ if (ret)
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int rand_mixed_trans(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i_cookie *cookie,
+ u64 i, u64 pos)
+{
+ struct bkey_s_c k;
+ int ret;
+
+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX));
+
+ k = bch2_btree_iter_peek(iter);
+ ret = bkey_err(k);
+ bch_err_msg(trans->c, ret, "lookup error");
+ if (ret)
+ return ret;
+
+ if (!(i & 3) && k.k) {
+ bkey_cookie_init(&cookie->k_i);
+ cookie->k.p = iter->pos;
+ ret = bch2_trans_update(trans, iter, &cookie->k_i, 0);
+ }
+
+ return ret;
+}
+
+static int rand_mixed(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_i_cookie cookie;
+ int ret = 0;
+ u64 i, rand;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), 0);
+
+ for (i = 0; i < nr; i++) {
+ rand = test_rand();
+ ret = commit_do(trans, NULL, NULL, 0,
+ rand_mixed_trans(trans, &iter, &cookie, i, rand));
+ if (ret)
+ break;
+ }
+
+ bch2_trans_iter_exit(trans, &iter);
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int __do_delete(struct btree_trans *trans, struct bpos pos)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
+ BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (!k.k)
+ goto err;
+
+ ret = bch2_btree_delete_at(trans, &iter, 0);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int rand_delete(struct bch_fs *c, u64 nr)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret = 0;
+ u64 i;
+
+ for (i = 0; i < nr; i++) {
+ struct bpos pos = SPOS(0, test_rand(), U32_MAX);
+
+ ret = commit_do(trans, NULL, NULL, 0,
+ __do_delete(trans, pos));
+ if (ret)
+ break;
+ }
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int seq_insert(struct bch_fs *c, u64 nr)
+{
+ struct bkey_i_cookie insert;
+
+ bkey_cookie_init(&insert.k_i);
+
+ return bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k,
+ NULL, NULL, 0, ({
+ if (iter.pos.offset >= nr)
+ break;
+ insert.k.p = iter.pos;
+ bch2_trans_update(trans, &iter, &insert.k_i, 0);
+ })));
+}
+
+static int seq_lookup(struct bch_fs *c, u64 nr)
+{
+ return bch2_trans_run(c,
+ for_each_btree_key_upto(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX),
+ 0, k,
+ 0));
+}
+
+static int seq_overwrite(struct bch_fs *c, u64 nr)
+{
+ return bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ BTREE_ITER_INTENT, k,
+ NULL, NULL, 0, ({
+ struct bkey_i_cookie u;
+
+ bkey_reassemble(&u.k_i, k);
+ bch2_trans_update(trans, &iter, &u.k_i, 0);
+ })));
+}
+
+static int seq_delete(struct bch_fs *c, u64 nr)
+{
+ return bch2_btree_delete_range(c, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ POS(0, U64_MAX),
+ 0, NULL);
+}
+
+typedef int (*perf_test_fn)(struct bch_fs *, u64);
+
+struct test_job {
+ struct bch_fs *c;
+ u64 nr;
+ unsigned nr_threads;
+ perf_test_fn fn;
+
+ atomic_t ready;
+ wait_queue_head_t ready_wait;
+
+ atomic_t done;
+ struct completion done_completion;
+
+ u64 start;
+ u64 finish;
+ int ret;
+};
+
+static int btree_perf_test_thread(void *data)
+{
+ struct test_job *j = data;
+ int ret;
+
+ if (atomic_dec_and_test(&j->ready)) {
+ wake_up(&j->ready_wait);
+ j->start = sched_clock();
+ } else {
+ wait_event(j->ready_wait, !atomic_read(&j->ready));
+ }
+
+ ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads));
+ if (ret) {
+ bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret));
+ j->ret = ret;
+ }
+
+ if (atomic_dec_and_test(&j->done)) {
+ j->finish = sched_clock();
+ complete(&j->done_completion);
+ }
+
+ return 0;
+}
+
+int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
+ u64 nr, unsigned nr_threads)
+{
+ struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads };
+ char name_buf[20];
+ struct printbuf nr_buf = PRINTBUF;
+ struct printbuf per_sec_buf = PRINTBUF;
+ unsigned i;
+ u64 time;
+
+ atomic_set(&j.ready, nr_threads);
+ init_waitqueue_head(&j.ready_wait);
+
+ atomic_set(&j.done, nr_threads);
+ init_completion(&j.done_completion);
+
+#define perf_test(_test) \
+ if (!strcmp(testname, #_test)) j.fn = _test
+
+ perf_test(rand_insert);
+ perf_test(rand_insert_multi);
+ perf_test(rand_lookup);
+ perf_test(rand_mixed);
+ perf_test(rand_delete);
+
+ perf_test(seq_insert);
+ perf_test(seq_lookup);
+ perf_test(seq_overwrite);
+ perf_test(seq_delete);
+
+ /* a unit test, not a perf test: */
+ perf_test(test_delete);
+ perf_test(test_delete_written);
+ perf_test(test_iterate);
+ perf_test(test_iterate_extents);
+ perf_test(test_iterate_slots);
+ perf_test(test_iterate_slots_extents);
+ perf_test(test_peek_end);
+ perf_test(test_peek_end_extents);
+
+ perf_test(test_extent_overwrite_front);
+ perf_test(test_extent_overwrite_back);
+ perf_test(test_extent_overwrite_middle);
+ perf_test(test_extent_overwrite_all);
+ perf_test(test_extent_create_overlapping);
+
+ perf_test(test_snapshots);
+
+ if (!j.fn) {
+ pr_err("unknown test %s", testname);
+ return -EINVAL;
+ }
+
+ //pr_info("running test %s:", testname);
+
+ if (nr_threads == 1)
+ btree_perf_test_thread(&j);
+ else
+ for (i = 0; i < nr_threads; i++)
+ kthread_run(btree_perf_test_thread, &j,
+ "bcachefs perf test[%u]", i);
+
+ while (wait_for_completion_interruptible(&j.done_completion))
+ ;
+
+ time = j.finish - j.start;
+
+ scnprintf(name_buf, sizeof(name_buf), "%s:", testname);
+ prt_human_readable_u64(&nr_buf, nr);
+ prt_human_readable_u64(&per_sec_buf, div64_u64(nr * NSEC_PER_SEC, time));
+ printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n",
+ name_buf, nr_buf.buf, nr_threads,
+ div_u64(time, NSEC_PER_SEC),
+ div_u64(time * nr_threads, nr),
+ per_sec_buf.buf);
+ printbuf_exit(&per_sec_buf);
+ printbuf_exit(&nr_buf);
+ return j.ret;
+}
+
+#endif /* CONFIG_BCACHEFS_TESTS */
diff --git a/c_src/libbcachefs/tests.h b/c_src/libbcachefs/tests.h
new file mode 100644
index 00000000..c73b18ae
--- /dev/null
+++ b/c_src/libbcachefs/tests.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_TEST_H
+#define _BCACHEFS_TEST_H
+
+struct bch_fs;
+
+#ifdef CONFIG_BCACHEFS_TESTS
+
+int bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned);
+
+#else
+
+#endif /* CONFIG_BCACHEFS_TESTS */
+
+#endif /* _BCACHEFS_TEST_H */
diff --git a/c_src/libbcachefs/thread_with_file.c b/c_src/libbcachefs/thread_with_file.c
new file mode 100644
index 00000000..b1c867aa
--- /dev/null
+++ b/c_src/libbcachefs/thread_with_file.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "printbuf.h"
+#include "thread_with_file.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+
+void bch2_thread_with_file_exit(struct thread_with_file *thr)
+{
+ if (thr->task) {
+ kthread_stop(thr->task);
+ put_task_struct(thr->task);
+ }
+}
+
+int bch2_run_thread_with_file(struct thread_with_file *thr,
+ const struct file_operations *fops,
+ int (*fn)(void *))
+{
+ struct file *file = NULL;
+ int ret, fd = -1;
+ unsigned fd_flags = O_CLOEXEC;
+
+ if (fops->read && fops->write)
+ fd_flags |= O_RDWR;
+ else if (fops->read)
+ fd_flags |= O_RDONLY;
+ else if (fops->write)
+ fd_flags |= O_WRONLY;
+
+ char name[TASK_COMM_LEN];
+ get_task_comm(name, current);
+
+ thr->ret = 0;
+ thr->task = kthread_create(fn, thr, "%s", name);
+ ret = PTR_ERR_OR_ZERO(thr->task);
+ if (ret)
+ return ret;
+
+ ret = get_unused_fd_flags(fd_flags);
+ if (ret < 0)
+ goto err;
+ fd = ret;
+
+ file = anon_inode_getfile(name, fops, thr, fd_flags);
+ ret = PTR_ERR_OR_ZERO(file);
+ if (ret)
+ goto err;
+
+ fd_install(fd, file);
+ get_task_struct(thr->task);
+ wake_up_process(thr->task);
+ return fd;
+err:
+ if (fd >= 0)
+ put_unused_fd(fd);
+ if (thr->task)
+ kthread_stop(thr->task);
+ return ret;
+}
+
+static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr)
+{
+ return thr->stdio.output_buf.pos ||
+ thr->output2.nr ||
+ thr->thr.done;
+}
+
+static ssize_t thread_with_stdio_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+ size_t copied = 0, b;
+ int ret = 0;
+
+ if ((file->f_flags & O_NONBLOCK) &&
+ !thread_with_stdio_has_output(thr))
+ return -EAGAIN;
+
+ ret = wait_event_interruptible(thr->stdio.output_wait,
+ thread_with_stdio_has_output(thr));
+ if (ret)
+ return ret;
+
+ if (thr->thr.done)
+ return 0;
+
+ while (len) {
+ ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos);
+ if (ret)
+ break;
+
+ spin_lock_irq(&thr->stdio.output_lock);
+ b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos);
+
+ memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b);
+ memmove(thr->stdio.output_buf.buf,
+ thr->stdio.output_buf.buf + b,
+ thr->stdio.output_buf.pos - b);
+
+ thr->output2.nr += b;
+ thr->stdio.output_buf.pos -= b;
+ spin_unlock_irq(&thr->stdio.output_lock);
+
+ b = min(len, thr->output2.nr);
+ if (!b)
+ break;
+
+ b -= copy_to_user(buf, thr->output2.data, b);
+ if (!b) {
+ ret = -EFAULT;
+ break;
+ }
+
+ copied += b;
+ buf += b;
+ len -= b;
+
+ memmove(thr->output2.data,
+ thr->output2.data + b,
+ thr->output2.nr - b);
+ thr->output2.nr -= b;
+ }
+
+ return copied ?: ret;
+}
+
+static int thread_with_stdio_release(struct inode *inode, struct file *file)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+
+ bch2_thread_with_file_exit(&thr->thr);
+ printbuf_exit(&thr->stdio.input_buf);
+ printbuf_exit(&thr->stdio.output_buf);
+ darray_exit(&thr->output2);
+ thr->exit(thr);
+ return 0;
+}
+
+#define WRITE_BUFFER 4096
+
+static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr)
+{
+ return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done;
+}
+
+static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf,
+ size_t len, loff_t *ppos)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+ struct printbuf *buf = &thr->stdio.input_buf;
+ size_t copied = 0;
+ ssize_t ret = 0;
+
+ while (len) {
+ if (thr->thr.done) {
+ ret = -EPIPE;
+ break;
+ }
+
+ size_t b = len - fault_in_readable(ubuf, len);
+ if (!b) {
+ ret = -EFAULT;
+ break;
+ }
+
+ spin_lock(&thr->stdio.input_lock);
+ if (buf->pos < WRITE_BUFFER)
+ bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos));
+ b = min(len, printbuf_remaining_size(buf));
+
+ if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) {
+ ubuf += b;
+ len -= b;
+ copied += b;
+ buf->pos += b;
+ }
+ spin_unlock(&thr->stdio.input_lock);
+
+ if (b) {
+ wake_up(&thr->stdio.input_wait);
+ } else {
+ if ((file->f_flags & O_NONBLOCK)) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ ret = wait_event_interruptible(thr->stdio.input_wait,
+ thread_with_stdio_has_input_space(thr));
+ if (ret)
+ break;
+ }
+ }
+
+ return copied ?: ret;
+}
+
+static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_struct *wait)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+
+ poll_wait(file, &thr->stdio.output_wait, wait);
+ poll_wait(file, &thr->stdio.input_wait, wait);
+
+ __poll_t mask = 0;
+
+ if (thread_with_stdio_has_output(thr))
+ mask |= EPOLLIN;
+ if (thread_with_stdio_has_input_space(thr))
+ mask |= EPOLLOUT;
+ if (thr->thr.done)
+ mask |= EPOLLHUP|EPOLLERR;
+ return mask;
+}
+
+static const struct file_operations thread_with_stdio_fops = {
+ .release = thread_with_stdio_release,
+ .read = thread_with_stdio_read,
+ .write = thread_with_stdio_write,
+ .poll = thread_with_stdio_poll,
+ .llseek = no_llseek,
+};
+
+int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
+ void (*exit)(struct thread_with_stdio *),
+ int (*fn)(void *))
+{
+ thr->stdio.input_buf = PRINTBUF;
+ thr->stdio.input_buf.atomic++;
+ spin_lock_init(&thr->stdio.input_lock);
+ init_waitqueue_head(&thr->stdio.input_wait);
+
+ thr->stdio.output_buf = PRINTBUF;
+ thr->stdio.output_buf.atomic++;
+ spin_lock_init(&thr->stdio.output_lock);
+ init_waitqueue_head(&thr->stdio.output_wait);
+
+ darray_init(&thr->output2);
+ thr->exit = exit;
+
+ return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn);
+}
+
+int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len)
+{
+ wait_event(stdio->input_wait,
+ stdio->input_buf.pos || stdio->done);
+
+ if (stdio->done)
+ return -1;
+
+ spin_lock(&stdio->input_lock);
+ int ret = min(len, stdio->input_buf.pos);
+ stdio->input_buf.pos -= ret;
+ memcpy(buf, stdio->input_buf.buf, ret);
+ memmove(stdio->input_buf.buf,
+ stdio->input_buf.buf + ret,
+ stdio->input_buf.pos);
+ spin_unlock(&stdio->input_lock);
+
+ wake_up(&stdio->input_wait);
+ return ret;
+}
+
+int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len)
+{
+ wait_event(stdio->input_wait,
+ stdio->input_buf.pos || stdio->done);
+
+ if (stdio->done)
+ return -1;
+
+ spin_lock(&stdio->input_lock);
+ int ret = min(len, stdio->input_buf.pos);
+ char *n = memchr(stdio->input_buf.buf, '\n', ret);
+ if (n)
+ ret = min(ret, n + 1 - stdio->input_buf.buf);
+ stdio->input_buf.pos -= ret;
+ memcpy(buf, stdio->input_buf.buf, ret);
+ memmove(stdio->input_buf.buf,
+ stdio->input_buf.buf + ret,
+ stdio->input_buf.pos);
+ spin_unlock(&stdio->input_lock);
+
+ wake_up(&stdio->input_wait);
+ return ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/c_src/libbcachefs/thread_with_file.h b/c_src/libbcachefs/thread_with_file.h
new file mode 100644
index 00000000..05879c50
--- /dev/null
+++ b/c_src/libbcachefs/thread_with_file.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_THREAD_WITH_FILE_H
+#define _BCACHEFS_THREAD_WITH_FILE_H
+
+#include "thread_with_file_types.h"
+
+struct task_struct;
+
+struct thread_with_file {
+ struct task_struct *task;
+ int ret;
+ bool done;
+};
+
+void bch2_thread_with_file_exit(struct thread_with_file *);
+int bch2_run_thread_with_file(struct thread_with_file *,
+ const struct file_operations *,
+ int (*fn)(void *));
+
+struct thread_with_stdio {
+ struct thread_with_file thr;
+ struct stdio_redirect stdio;
+ DARRAY(char) output2;
+ void (*exit)(struct thread_with_stdio *);
+};
+
+static inline void thread_with_stdio_done(struct thread_with_stdio *thr)
+{
+ thr->thr.done = true;
+ thr->stdio.done = true;
+ wake_up(&thr->stdio.input_wait);
+ wake_up(&thr->stdio.output_wait);
+}
+
+int bch2_run_thread_with_stdio(struct thread_with_stdio *,
+ void (*exit)(struct thread_with_stdio *),
+ int (*fn)(void *));
+int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t);
+int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t);
+
+#endif /* _BCACHEFS_THREAD_WITH_FILE_H */
diff --git a/c_src/libbcachefs/thread_with_file_types.h b/c_src/libbcachefs/thread_with_file_types.h
new file mode 100644
index 00000000..90b5e645
--- /dev/null
+++ b/c_src/libbcachefs/thread_with_file_types.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H
+#define _BCACHEFS_THREAD_WITH_FILE_TYPES_H
+
+struct stdio_redirect {
+ spinlock_t output_lock;
+ wait_queue_head_t output_wait;
+ struct printbuf output_buf;
+
+ spinlock_t input_lock;
+ wait_queue_head_t input_wait;
+ struct printbuf input_buf;
+ bool done;
+};
+
+#endif /* _BCACHEFS_THREAD_WITH_FILE_TYPES_H */
diff --git a/c_src/libbcachefs/trace.c b/c_src/libbcachefs/trace.c
new file mode 100644
index 00000000..dc48b52b
--- /dev/null
+++ b/c_src/libbcachefs/trace.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "alloc_types.h"
+#include "buckets.h"
+#include "btree_cache.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
+#include "btree_update_interior.h"
+#include "keylist.h"
+#include "move_types.h"
+#include "opts.h"
+#include "six.h"
+
+#include <linux/blktrace_api.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/c_src/libbcachefs/trace.h b/c_src/libbcachefs/trace.h
new file mode 100644
index 00000000..c94876b3
--- /dev/null
+++ b/c_src/libbcachefs/trace.h
@@ -0,0 +1,1473 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bcachefs
+
+#if !defined(_TRACE_BCACHEFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BCACHEFS_H
+
+#include <linux/tracepoint.h>
+
+#define TRACE_BPOS_entries(name) \
+ __field(u64, name##_inode ) \
+ __field(u64, name##_offset ) \
+ __field(u32, name##_snapshot )
+
+#define TRACE_BPOS_assign(dst, src) \
+ __entry->dst##_inode = (src).inode; \
+ __entry->dst##_offset = (src).offset; \
+ __entry->dst##_snapshot = (src).snapshot
+
+DECLARE_EVENT_CLASS(bpos,
+ TP_PROTO(const struct bpos *p),
+ TP_ARGS(p),
+
+ TP_STRUCT__entry(
+ TRACE_BPOS_entries(p)
+ ),
+
+ TP_fast_assign(
+ TRACE_BPOS_assign(p, *p);
+ ),
+
+ TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot)
+);
+
+DECLARE_EVENT_CLASS(fs_str,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __string(str, str )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __assign_str(str, str);
+ ),
+
+ TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
+);
+
+DECLARE_EVENT_CLASS(trans_str,
+ TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str),
+ TP_ARGS(trans, caller_ip, str),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __string(str, str )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = trans->c->dev;
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __assign_str(str, str);
+ ),
+
+ TP_printk("%d,%d %s %pS %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str))
+);
+
+DECLARE_EVENT_CLASS(trans_str_nocaller,
+ TP_PROTO(struct btree_trans *trans, const char *str),
+ TP_ARGS(trans, str),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, trans_fn, 32 )
+ __string(str, str )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = trans->c->dev;
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __assign_str(str, str);
+ ),
+
+ TP_printk("%d,%d %s %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->trans_fn, __get_str(str))
+);
+
+DECLARE_EVENT_CLASS(btree_node_nofs,
+ TP_PROTO(struct bch_fs *c, struct btree *b),
+ TP_ARGS(c, b),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u8, level )
+ __field(u8, btree_id )
+ TRACE_BPOS_entries(pos)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->level = b->c.level;
+ __entry->btree_id = b->c.btree_id;
+ TRACE_BPOS_assign(pos, b->key.k.p);
+ ),
+
+ TP_printk("%d,%d %u %s %llu:%llu:%u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->level,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
+);
+
+DECLARE_EVENT_CLASS(btree_node,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, trans_fn, 32 )
+ __field(u8, level )
+ __field(u8, btree_id )
+ TRACE_BPOS_entries(pos)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = trans->c->dev;
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->level = b->c.level;
+ __entry->btree_id = b->c.btree_id;
+ TRACE_BPOS_assign(pos, b->key.k.p);
+ ),
+
+ TP_printk("%d,%d %s %u %s %llu:%llu:%u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn,
+ __entry->level,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot)
+);
+
+DECLARE_EVENT_CLASS(bch_fs,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ ),
+
+ TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
+);
+
+DECLARE_EVENT_CLASS(btree_trans,
+ TP_PROTO(struct btree_trans *trans),
+ TP_ARGS(trans),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, trans_fn, 32 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = trans->c->dev;
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ ),
+
+ TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn)
+);
+
+DECLARE_EVENT_CLASS(bio,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(sector_t, sector )
+ __field(unsigned int, nr_sector )
+ __array(char, rwbs, 6 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev ? bio_dev(bio) : 0;
+ __entry->sector = bio->bi_iter.bi_sector;
+ __entry->nr_sector = bio->bi_iter.bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
+ ),
+
+ TP_printk("%d,%d %s %llu + %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ (unsigned long long)__entry->sector, __entry->nr_sector)
+);
+
+/* super-io.c: */
+TRACE_EVENT(write_super,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(unsigned long, ip )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->ip = ip;
+ ),
+
+ TP_printk("%d,%d for %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (void *) __entry->ip)
+);
+
+/* io.c: */
+
+DEFINE_EVENT(bio, read_promote,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+TRACE_EVENT(read_nopromote,
+ TP_PROTO(struct bch_fs *c, int ret),
+ TP_ARGS(c, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, ret, 32 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
+ ),
+
+ TP_printk("%d,%d ret %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ret)
+);
+
+DEFINE_EVENT(bio, read_bounce,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, read_split,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, read_retry,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bio, read_reuse_race,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+/* Journal */
+
+DEFINE_EVENT(bch_fs, journal_full,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, journal_entry_full,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c)
+);
+
+TRACE_EVENT(journal_entry_close,
+ TP_PROTO(struct bch_fs *c, unsigned bytes),
+ TP_ARGS(c, bytes),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u32, bytes )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->bytes = bytes;
+ ),
+
+ TP_printk("%d,%d entry bytes %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->bytes)
+);
+
+DEFINE_EVENT(bio, journal_write,
+ TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+
+TRACE_EVENT(journal_reclaim_start,
+ TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
+ u64 min_nr, u64 min_key_cache,
+ u64 btree_cache_dirty, u64 btree_cache_total,
+ u64 btree_key_cache_dirty, u64 btree_key_cache_total),
+ TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
+ btree_cache_dirty, btree_cache_total,
+ btree_key_cache_dirty, btree_key_cache_total),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(bool, direct )
+ __field(bool, kicked )
+ __field(u64, min_nr )
+ __field(u64, min_key_cache )
+ __field(u64, btree_cache_dirty )
+ __field(u64, btree_cache_total )
+ __field(u64, btree_key_cache_dirty )
+ __field(u64, btree_key_cache_total )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->direct = direct;
+ __entry->kicked = kicked;
+ __entry->min_nr = min_nr;
+ __entry->min_key_cache = min_key_cache;
+ __entry->btree_cache_dirty = btree_cache_dirty;
+ __entry->btree_cache_total = btree_cache_total;
+ __entry->btree_key_cache_dirty = btree_key_cache_dirty;
+ __entry->btree_key_cache_total = btree_key_cache_total;
+ ),
+
+ TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->direct,
+ __entry->kicked,
+ __entry->min_nr,
+ __entry->min_key_cache,
+ __entry->btree_cache_dirty,
+ __entry->btree_cache_total,
+ __entry->btree_key_cache_dirty,
+ __entry->btree_key_cache_total)
+);
+
+TRACE_EVENT(journal_reclaim_finish,
+ TP_PROTO(struct bch_fs *c, u64 nr_flushed),
+ TP_ARGS(c, nr_flushed),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, nr_flushed )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->nr_flushed = nr_flushed;
+ ),
+
+ TP_printk("%d,%d flushed %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_flushed)
+);
+
+/* bset.c: */
+
+DEFINE_EVENT(bpos, bkey_pack_pos_fail,
+ TP_PROTO(const struct bpos *p),
+ TP_ARGS(p)
+);
+
+/* Btree cache: */
+
+TRACE_EVENT(btree_cache_scan,
+ TP_PROTO(long nr_to_scan, long can_free, long ret),
+ TP_ARGS(nr_to_scan, can_free, ret),
+
+ TP_STRUCT__entry(
+ __field(long, nr_to_scan )
+ __field(long, can_free )
+ __field(long, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->nr_to_scan = nr_to_scan;
+ __entry->can_free = can_free;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("scanned for %li nodes, can free %li, ret %li",
+ __entry->nr_to_scan, __entry->can_free, __entry->ret)
+);
+
+DEFINE_EVENT(btree_node_nofs, btree_cache_reap,
+ TP_PROTO(struct bch_fs *c, struct btree *b),
+ TP_ARGS(c, b)
+);
+
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock_fail,
+ TP_PROTO(struct btree_trans *trans),
+ TP_ARGS(trans)
+);
+
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock,
+ TP_PROTO(struct btree_trans *trans),
+ TP_ARGS(trans)
+);
+
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize,
+ TP_PROTO(struct btree_trans *trans),
+ TP_ARGS(trans)
+);
+
+DEFINE_EVENT(btree_trans, btree_cache_cannibalize_unlock,
+ TP_PROTO(struct btree_trans *trans),
+ TP_ARGS(trans)
+);
+
+/* Btree */
+
+DEFINE_EVENT(btree_node, btree_node_read,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+TRACE_EVENT(btree_node_write,
+ TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors),
+ TP_ARGS(b, bytes, sectors),
+
+ TP_STRUCT__entry(
+ __field(enum btree_node_type, type)
+ __field(unsigned, bytes )
+ __field(unsigned, sectors )
+ ),
+
+ TP_fast_assign(
+ __entry->type = btree_node_type(b);
+ __entry->bytes = bytes;
+ __entry->sectors = sectors;
+ ),
+
+ TP_printk("bkey type %u bytes %u sectors %u",
+ __entry->type , __entry->bytes, __entry->sectors)
+);
+
+DEFINE_EVENT(btree_node, btree_node_alloc,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_free,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+TRACE_EVENT(btree_reserve_get_fail,
+ TP_PROTO(const char *trans_fn,
+ unsigned long caller_ip,
+ size_t required,
+ int ret),
+ TP_ARGS(trans_fn, caller_ip, required, ret),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(size_t, required )
+ __array(char, ret, 32 )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->required = required;
+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret));
+ ),
+
+ TP_printk("%s %pS required %zu ret %s",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ __entry->required,
+ __entry->ret)
+);
+
+DEFINE_EVENT(btree_node, btree_node_compact,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_merge,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_split,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_rewrite,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+DEFINE_EVENT(btree_node, btree_node_set_root,
+ TP_PROTO(struct btree_trans *trans, struct btree *b),
+ TP_ARGS(trans, b)
+);
+
+TRACE_EVENT(btree_path_relock_fail,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path,
+ unsigned level),
+ TP_ARGS(trans, caller_ip, path, level),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(u8, btree_id )
+ __field(u8, level )
+ TRACE_BPOS_entries(pos)
+ __array(char, node, 24 )
+ __field(u8, self_read_count )
+ __field(u8, self_intent_count)
+ __field(u8, read_count )
+ __field(u8, intent_count )
+ __field(u32, iter_lock_seq )
+ __field(u32, node_lock_seq )
+ ),
+
+ TP_fast_assign(
+ struct btree *b = btree_path_node(path, level);
+ struct six_lock_count c;
+
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->btree_id = path->btree_id;
+ __entry->level = path->level;
+ TRACE_BPOS_assign(pos, path->pos);
+
+ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
+ __entry->self_read_count = c.n[SIX_LOCK_read];
+ __entry->self_intent_count = c.n[SIX_LOCK_intent];
+
+ if (IS_ERR(b)) {
+ strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node));
+ } else {
+ c = six_lock_counts(&path->l[level].b->c.lock);
+ __entry->read_count = c.n[SIX_LOCK_read];
+ __entry->intent_count = c.n[SIX_LOCK_intent];
+ scnprintf(__entry->node, sizeof(__entry->node), "%px", b);
+ }
+ __entry->iter_lock_seq = path->l[level].lock_seq;
+ __entry->node_lock_seq = is_btree_node(path, level)
+ ? six_lock_seq(&path->l[level].b->c.lock)
+ : 0;
+ ),
+
+ TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s held %u:%u lock count %u:%u iter seq %u lock seq %u",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode,
+ __entry->pos_offset,
+ __entry->pos_snapshot,
+ __entry->level,
+ __entry->node,
+ __entry->self_read_count,
+ __entry->self_intent_count,
+ __entry->read_count,
+ __entry->intent_count,
+ __entry->iter_lock_seq,
+ __entry->node_lock_seq)
+);
+
+TRACE_EVENT(btree_path_upgrade_fail,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path,
+ unsigned level),
+ TP_ARGS(trans, caller_ip, path, level),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(u8, btree_id )
+ __field(u8, level )
+ TRACE_BPOS_entries(pos)
+ __field(u8, locked )
+ __field(u8, self_read_count )
+ __field(u8, self_intent_count)
+ __field(u8, read_count )
+ __field(u8, intent_count )
+ __field(u32, iter_lock_seq )
+ __field(u32, node_lock_seq )
+ ),
+
+ TP_fast_assign(
+ struct six_lock_count c;
+
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->btree_id = path->btree_id;
+ __entry->level = level;
+ TRACE_BPOS_assign(pos, path->pos);
+ __entry->locked = btree_node_locked(path, level);
+
+ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
+ __entry->self_read_count = c.n[SIX_LOCK_read];
+ __entry->self_intent_count = c.n[SIX_LOCK_intent];
+ c = six_lock_counts(&path->l[level].b->c.lock);
+ __entry->read_count = c.n[SIX_LOCK_read];
+ __entry->intent_count = c.n[SIX_LOCK_intent];
+ __entry->iter_lock_seq = path->l[level].lock_seq;
+ __entry->node_lock_seq = is_btree_node(path, level)
+ ? six_lock_seq(&path->l[level].b->c.lock)
+ : 0;
+ ),
+
+ TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode,
+ __entry->pos_offset,
+ __entry->pos_snapshot,
+ __entry->level,
+ __entry->locked,
+ __entry->self_read_count,
+ __entry->self_intent_count,
+ __entry->read_count,
+ __entry->intent_count,
+ __entry->iter_lock_seq,
+ __entry->node_lock_seq)
+);
+
+/* Garbage collection */
+
+DEFINE_EVENT(bch_fs, gc_gens_start,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c)
+);
+
+DEFINE_EVENT(bch_fs, gc_gens_end,
+ TP_PROTO(struct bch_fs *c),
+ TP_ARGS(c)
+);
+
+/* Allocator */
+
+DECLARE_EVENT_CLASS(bucket_alloc,
+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
+ u64 bucket,
+ u64 free,
+ u64 avail,
+ u64 copygc_wait_amount,
+ s64 copygc_waiting_for,
+ struct bucket_alloc_state *s,
+ bool nonblocking,
+ const char *err),
+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
+ copygc_wait_amount, copygc_waiting_for,
+ s, nonblocking, err),
+
+ TP_STRUCT__entry(
+ __field(u8, dev )
+ __array(char, reserve, 16 )
+ __field(u64, bucket )
+ __field(u64, free )
+ __field(u64, avail )
+ __field(u64, copygc_wait_amount )
+ __field(s64, copygc_waiting_for )
+ __field(u64, seen )
+ __field(u64, open )
+ __field(u64, need_journal_commit )
+ __field(u64, nouse )
+ __field(bool, nonblocking )
+ __field(u64, nocow )
+ __array(char, err, 32 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = ca->dev_idx;
+ strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve));
+ __entry->bucket = bucket;
+ __entry->free = free;
+ __entry->avail = avail;
+ __entry->copygc_wait_amount = copygc_wait_amount;
+ __entry->copygc_waiting_for = copygc_waiting_for;
+ __entry->seen = s->buckets_seen;
+ __entry->open = s->skipped_open;
+ __entry->need_journal_commit = s->skipped_need_journal_commit;
+ __entry->nouse = s->skipped_nouse;
+ __entry->nonblocking = nonblocking;
+ __entry->nocow = s->skipped_nocow;
+ strscpy(__entry->err, err, sizeof(__entry->err));
+ ),
+
+ TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s",
+ __entry->reserve,
+ __entry->dev,
+ __entry->bucket,
+ __entry->free,
+ __entry->avail,
+ __entry->copygc_wait_amount,
+ __entry->copygc_waiting_for,
+ __entry->seen,
+ __entry->open,
+ __entry->need_journal_commit,
+ __entry->nouse,
+ __entry->nocow,
+ __entry->nonblocking,
+ __entry->err)
+);
+
+DEFINE_EVENT(bucket_alloc, bucket_alloc,
+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
+ u64 bucket,
+ u64 free,
+ u64 avail,
+ u64 copygc_wait_amount,
+ s64 copygc_waiting_for,
+ struct bucket_alloc_state *s,
+ bool nonblocking,
+ const char *err),
+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
+ copygc_wait_amount, copygc_waiting_for,
+ s, nonblocking, err)
+);
+
+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail,
+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve,
+ u64 bucket,
+ u64 free,
+ u64 avail,
+ u64 copygc_wait_amount,
+ s64 copygc_waiting_for,
+ struct bucket_alloc_state *s,
+ bool nonblocking,
+ const char *err),
+ TP_ARGS(ca, alloc_reserve, bucket, free, avail,
+ copygc_wait_amount, copygc_waiting_for,
+ s, nonblocking, err)
+);
+
+TRACE_EVENT(discard_buckets,
+ TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
+ u64 need_journal_commit, u64 discarded, const char *err),
+ TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, seen )
+ __field(u64, open )
+ __field(u64, need_journal_commit )
+ __field(u64, discarded )
+ __array(char, err, 16 )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->seen = seen;
+ __entry->open = open;
+ __entry->need_journal_commit = need_journal_commit;
+ __entry->discarded = discarded;
+ strscpy(__entry->err, err, sizeof(__entry->err));
+ ),
+
+ TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->seen,
+ __entry->open,
+ __entry->need_journal_commit,
+ __entry->discarded,
+ __entry->err)
+);
+
+TRACE_EVENT(bucket_invalidate,
+ TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
+ TP_ARGS(c, dev, bucket, sectors),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u32, dev_idx )
+ __field(u32, sectors )
+ __field(u64, bucket )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->dev_idx = dev;
+ __entry->sectors = sectors;
+ __entry->bucket = bucket;
+ ),
+
+ TP_printk("%d:%d invalidated %u:%llu cached sectors %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dev_idx, __entry->bucket,
+ __entry->sectors)
+);
+
+/* Moving IO */
+
+TRACE_EVENT(bucket_evacuate,
+ TP_PROTO(struct bch_fs *c, struct bpos *bucket),
+ TP_ARGS(c, bucket),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u32, dev_idx )
+ __field(u64, bucket )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->dev_idx = bucket->inode;
+ __entry->bucket = bucket->offset;
+ ),
+
+ TP_printk("%d:%d %u:%llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dev_idx, __entry->bucket)
+);
+
+DEFINE_EVENT(fs_str, move_extent,
+ TP_PROTO(struct bch_fs *c, const char *k),
+ TP_ARGS(c, k)
+);
+
+DEFINE_EVENT(fs_str, move_extent_read,
+ TP_PROTO(struct bch_fs *c, const char *k),
+ TP_ARGS(c, k)
+);
+
+DEFINE_EVENT(fs_str, move_extent_write,
+ TP_PROTO(struct bch_fs *c, const char *k),
+ TP_ARGS(c, k)
+);
+
+DEFINE_EVENT(fs_str, move_extent_finish,
+ TP_PROTO(struct bch_fs *c, const char *k),
+ TP_ARGS(c, k)
+);
+
+TRACE_EVENT(move_extent_fail,
+ TP_PROTO(struct bch_fs *c, const char *msg),
+ TP_ARGS(c, msg),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __string(msg, msg )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
+);
+
+DEFINE_EVENT(fs_str, move_extent_start_fail,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+TRACE_EVENT(move_data,
+ TP_PROTO(struct bch_fs *c,
+ struct bch_move_stats *stats),
+ TP_ARGS(c, stats),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, keys_moved )
+ __field(u64, keys_raced )
+ __field(u64, sectors_seen )
+ __field(u64, sectors_moved )
+ __field(u64, sectors_raced )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->keys_moved = atomic64_read(&stats->keys_moved);
+ __entry->keys_raced = atomic64_read(&stats->keys_raced);
+ __entry->sectors_seen = atomic64_read(&stats->sectors_seen);
+ __entry->sectors_moved = atomic64_read(&stats->sectors_moved);
+ __entry->sectors_raced = atomic64_read(&stats->sectors_raced);
+ ),
+
+ TP_printk("%d,%d keys moved %llu raced %llu"
+ "sectors seen %llu moved %llu raced %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->keys_moved,
+ __entry->keys_raced,
+ __entry->sectors_seen,
+ __entry->sectors_moved,
+ __entry->sectors_raced)
+);
+
+TRACE_EVENT(evacuate_bucket,
+ TP_PROTO(struct bch_fs *c, struct bpos *bucket,
+ unsigned sectors, unsigned bucket_size,
+ u64 fragmentation, int ret),
+ TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, member )
+ __field(u64, bucket )
+ __field(u32, sectors )
+ __field(u32, bucket_size )
+ __field(u64, fragmentation )
+ __field(int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->member = bucket->inode;
+ __entry->bucket = bucket->offset;
+ __entry->sectors = sectors;
+ __entry->bucket_size = bucket_size;
+ __entry->fragmentation = fragmentation;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->member, __entry->bucket,
+ __entry->sectors, __entry->bucket_size,
+ __entry->fragmentation, __entry->ret)
+);
+
+TRACE_EVENT(copygc,
+ TP_PROTO(struct bch_fs *c,
+ u64 sectors_moved, u64 sectors_not_moved,
+ u64 buckets_moved, u64 buckets_not_moved),
+ TP_ARGS(c,
+ sectors_moved, sectors_not_moved,
+ buckets_moved, buckets_not_moved),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, sectors_moved )
+ __field(u64, sectors_not_moved )
+ __field(u64, buckets_moved )
+ __field(u64, buckets_not_moved )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->sectors_moved = sectors_moved;
+ __entry->sectors_not_moved = sectors_not_moved;
+ __entry->buckets_moved = buckets_moved;
+ __entry->buckets_not_moved = buckets_moved;
+ ),
+
+ TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->sectors_moved, __entry->sectors_not_moved,
+ __entry->buckets_moved, __entry->buckets_not_moved)
+);
+
+TRACE_EVENT(copygc_wait,
+ TP_PROTO(struct bch_fs *c,
+ u64 wait_amount, u64 until),
+ TP_ARGS(c, wait_amount, until),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, wait_amount )
+ __field(u64, until )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->wait_amount = wait_amount;
+ __entry->until = until;
+ ),
+
+ TP_printk("%d,%u waiting for %llu sectors until %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait_amount, __entry->until)
+);
+
+/* btree transactions: */
+
+DECLARE_EVENT_CLASS(transaction_event,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ ),
+
+ TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip)
+);
+
+DEFINE_EVENT(transaction_event, transaction_commit,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+DEFINE_EVENT(transaction_event, trans_restart_injected,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+TRACE_EVENT(trans_restart_split_race,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree *b),
+ TP_ARGS(trans, caller_ip, b),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(u8, level )
+ __field(u16, written )
+ __field(u16, blocks )
+ __field(u16, u64s_remaining )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->level = b->c.level;
+ __entry->written = b->written;
+ __entry->blocks = btree_blocks(trans->c);
+ __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b);
+ ),
+
+ TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
+ __entry->trans_fn, (void *) __entry->caller_ip,
+ __entry->level,
+ __entry->written, __entry->blocks,
+ __entry->u64s_remaining)
+);
+
+DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+TRACE_EVENT(trans_restart_journal_preres_get,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ unsigned flags),
+ TP_ARGS(trans, caller_ip, flags),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(unsigned, flags )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("%s %pS %x", __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ __entry->flags)
+);
+
+DEFINE_EVENT(transaction_event, trans_restart_fault_inject,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+DEFINE_EVENT(transaction_event, trans_traverse_all,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+DEFINE_EVENT(trans_str, trans_restart_too_many_iters,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ const char *paths),
+ TP_ARGS(trans, caller_ip, paths)
+);
+
+DECLARE_EVENT_CLASS(transaction_restart_iter,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(u8, btree_id )
+ TRACE_BPOS_entries(pos)
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->btree_id = path->btree_id;
+ TRACE_BPOS_assign(pos, path->pos)
+ ),
+
+ TP_printk("%s %pS btree %s pos %llu:%llu:%u",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode,
+ __entry->pos_offset,
+ __entry->pos_snapshot)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+struct get_locks_fail;
+
+TRACE_EVENT(trans_restart_upgrade,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path,
+ unsigned old_locks_want,
+ unsigned new_locks_want,
+ struct get_locks_fail *f),
+ TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(u8, btree_id )
+ __field(u8, old_locks_want )
+ __field(u8, new_locks_want )
+ __field(u8, level )
+ __field(u32, path_seq )
+ __field(u32, node_seq )
+ TRACE_BPOS_entries(pos)
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->btree_id = path->btree_id;
+ __entry->old_locks_want = old_locks_want;
+ __entry->new_locks_want = new_locks_want;
+ __entry->level = f->l;
+ __entry->path_seq = path->l[f->l].lock_seq;
+ __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
+ TRACE_BPOS_assign(pos, path->pos)
+ ),
+
+ TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode,
+ __entry->pos_offset,
+ __entry->pos_snapshot,
+ __entry->old_locks_want,
+ __entry->new_locks_want,
+ __entry->level,
+ __entry->path_seq,
+ __entry->node_seq)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path)
+);
+
+DEFINE_EVENT(trans_str_nocaller, trans_restart_would_deadlock,
+ TP_PROTO(struct btree_trans *trans,
+ const char *cycle),
+ TP_ARGS(trans, cycle)
+);
+
+DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+TRACE_EVENT(trans_restart_would_deadlock_write,
+ TP_PROTO(struct btree_trans *trans),
+ TP_ARGS(trans),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ ),
+
+ TP_printk("%s", __entry->trans_fn)
+);
+
+TRACE_EVENT(trans_restart_mem_realloced,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ unsigned long bytes),
+ TP_ARGS(trans, caller_ip, bytes),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(unsigned long, bytes )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->bytes = bytes;
+ ),
+
+ TP_printk("%s %pS bytes %lu",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ __entry->bytes)
+);
+
+TRACE_EVENT(trans_restart_key_cache_key_realloced,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path,
+ unsigned old_u64s,
+ unsigned new_u64s),
+ TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(enum btree_id, btree_id )
+ TRACE_BPOS_entries(pos)
+ __field(u32, old_u64s )
+ __field(u32, new_u64s )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+
+ __entry->btree_id = path->btree_id;
+ TRACE_BPOS_assign(pos, path->pos);
+ __entry->old_u64s = old_u64s;
+ __entry->new_u64s = new_u64s;
+ ),
+
+ TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ bch2_btree_id_str(__entry->btree_id),
+ __entry->pos_inode,
+ __entry->pos_offset,
+ __entry->pos_snapshot,
+ __entry->old_u64s,
+ __entry->new_u64s)
+);
+
+TRACE_EVENT(path_downgrade,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path,
+ unsigned old_locks_want),
+ TP_ARGS(trans, caller_ip, path, old_locks_want),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ __field(unsigned, old_locks_want )
+ __field(unsigned, new_locks_want )
+ __field(unsigned, btree )
+ TRACE_BPOS_entries(pos)
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->old_locks_want = old_locks_want;
+ __entry->new_locks_want = path->locks_want;
+ __entry->btree = path->btree_id;
+ TRACE_BPOS_assign(pos, path->pos);
+ ),
+
+ TP_printk("%s %pS locks_want %u -> %u %s %llu:%llu:%u",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ __entry->old_locks_want,
+ __entry->new_locks_want,
+ bch2_btree_id_str(__entry->btree),
+ __entry->pos_inode,
+ __entry->pos_offset,
+ __entry->pos_snapshot)
+);
+
+DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip)
+);
+
+TRACE_EVENT(write_buffer_flush,
+ TP_PROTO(struct btree_trans *trans, size_t nr, size_t skipped, size_t fast, size_t size),
+ TP_ARGS(trans, nr, skipped, fast, size),
+
+ TP_STRUCT__entry(
+ __field(size_t, nr )
+ __field(size_t, skipped )
+ __field(size_t, fast )
+ __field(size_t, size )
+ ),
+
+ TP_fast_assign(
+ __entry->nr = nr;
+ __entry->skipped = skipped;
+ __entry->fast = fast;
+ __entry->size = size;
+ ),
+
+ TP_printk("%zu/%zu skipped %zu fast %zu",
+ __entry->nr, __entry->size, __entry->skipped, __entry->fast)
+);
+
+TRACE_EVENT(write_buffer_flush_sync,
+ TP_PROTO(struct btree_trans *trans, unsigned long caller_ip),
+ TP_ARGS(trans, caller_ip),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ ),
+
+ TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip)
+);
+
+TRACE_EVENT(write_buffer_flush_slowpath,
+ TP_PROTO(struct btree_trans *trans, size_t slowpath, size_t total),
+ TP_ARGS(trans, slowpath, total),
+
+ TP_STRUCT__entry(
+ __field(size_t, slowpath )
+ __field(size_t, total )
+ ),
+
+ TP_fast_assign(
+ __entry->slowpath = slowpath;
+ __entry->total = total;
+ ),
+
+ TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
+);
+
+DEFINE_EVENT(fs_str, rebalance_extent,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(fs_str, data_update,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
+#endif /* _TRACE_BCACHEFS_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../fs/bcachefs
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/c_src/libbcachefs/two_state_shared_lock.c b/c_src/libbcachefs/two_state_shared_lock.c
new file mode 100644
index 00000000..9764c2e6
--- /dev/null
+++ b/c_src/libbcachefs/two_state_shared_lock.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "two_state_shared_lock.h"
+
+void __bch2_two_state_lock(two_state_lock_t *lock, int s)
+{
+ __wait_event(lock->wait, bch2_two_state_trylock(lock, s));
+}
diff --git a/c_src/libbcachefs/two_state_shared_lock.h b/c_src/libbcachefs/two_state_shared_lock.h
new file mode 100644
index 00000000..90580177
--- /dev/null
+++ b/c_src/libbcachefs/two_state_shared_lock.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_TWO_STATE_LOCK_H
+#define _BCACHEFS_TWO_STATE_LOCK_H
+
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "util.h"
+
+/*
+ * Two-state lock - can be taken for add or block - both states are shared,
+ * like read side of rwsem, but conflict with other state:
+ */
+typedef struct {
+ atomic_long_t v;
+ wait_queue_head_t wait;
+} two_state_lock_t;
+
+static inline void two_state_lock_init(two_state_lock_t *lock)
+{
+ atomic_long_set(&lock->v, 0);
+ init_waitqueue_head(&lock->wait);
+}
+
+static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s)
+{
+ long i = s ? 1 : -1;
+
+ EBUG_ON(atomic_long_read(&lock->v) == 0);
+
+ if (atomic_long_sub_return_release(i, &lock->v) == 0)
+ wake_up_all(&lock->wait);
+}
+
+static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s)
+{
+ long i = s ? 1 : -1;
+ long v = atomic_long_read(&lock->v), old;
+
+ do {
+ old = v;
+
+ if (i > 0 ? v < 0 : v > 0)
+ return false;
+ } while ((v = atomic_long_cmpxchg_acquire(&lock->v,
+ old, old + i)) != old);
+ return true;
+}
+
+void __bch2_two_state_lock(two_state_lock_t *, int);
+
+static inline void bch2_two_state_lock(two_state_lock_t *lock, int s)
+{
+ if (!bch2_two_state_trylock(lock, s))
+ __bch2_two_state_lock(lock, s);
+}
+
+#endif /* _BCACHEFS_TWO_STATE_LOCK_H */
diff --git a/c_src/libbcachefs/util.c b/c_src/libbcachefs/util.c
new file mode 100644
index 00000000..c2ef7cdd
--- /dev/null
+++ b/c_src/libbcachefs/util.c
@@ -0,0 +1,1210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * random utiility code, for bcache but in theory not specific to bcache
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/log2.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/sched/clock.h>
+
+#include "eytzinger.h"
+#include "mean_and_variance.h"
+#include "util.h"
+
+static const char si_units[] = "?kMGTPEZY";
+
+/* string_get_size units: */
+static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+};
+static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+};
+
+static int parse_u64(const char *cp, u64 *res)
+{
+ const char *start = cp;
+ u64 v = 0;
+
+ if (!isdigit(*cp))
+ return -EINVAL;
+
+ do {
+ if (v > U64_MAX / 10)
+ return -ERANGE;
+ v *= 10;
+ if (v > U64_MAX - (*cp - '0'))
+ return -ERANGE;
+ v += *cp - '0';
+ cp++;
+ } while (isdigit(*cp));
+
+ *res = v;
+ return cp - start;
+}
+
+static int bch2_pow(u64 n, u64 p, u64 *res)
+{
+ *res = 1;
+
+ while (p--) {
+ if (*res > div_u64(U64_MAX, n))
+ return -ERANGE;
+ *res *= n;
+ }
+ return 0;
+}
+
+static int parse_unit_suffix(const char *cp, u64 *res)
+{
+ const char *start = cp;
+ u64 base = 1024;
+ unsigned u;
+ int ret;
+
+ if (*cp == ' ')
+ cp++;
+
+ for (u = 1; u < strlen(si_units); u++)
+ if (*cp == si_units[u]) {
+ cp++;
+ goto got_unit;
+ }
+
+ for (u = 0; u < ARRAY_SIZE(units_2); u++)
+ if (!strncmp(cp, units_2[u], strlen(units_2[u]))) {
+ cp += strlen(units_2[u]);
+ goto got_unit;
+ }
+
+ for (u = 0; u < ARRAY_SIZE(units_10); u++)
+ if (!strncmp(cp, units_10[u], strlen(units_10[u]))) {
+ cp += strlen(units_10[u]);
+ base = 1000;
+ goto got_unit;
+ }
+
+ *res = 1;
+ return 0;
+got_unit:
+ ret = bch2_pow(base, u, res);
+ if (ret)
+ return ret;
+
+ return cp - start;
+}
+
+#define parse_or_ret(cp, _f) \
+do { \
+ int _ret = _f; \
+ if (_ret < 0) \
+ return _ret; \
+ cp += _ret; \
+} while (0)
+
+static int __bch2_strtou64_h(const char *cp, u64 *res)
+{
+ const char *start = cp;
+ u64 v = 0, b, f_n = 0, f_d = 1;
+ int ret;
+
+ parse_or_ret(cp, parse_u64(cp, &v));
+
+ if (*cp == '.') {
+ cp++;
+ ret = parse_u64(cp, &f_n);
+ if (ret < 0)
+ return ret;
+ cp += ret;
+
+ ret = bch2_pow(10, ret, &f_d);
+ if (ret)
+ return ret;
+ }
+
+ parse_or_ret(cp, parse_unit_suffix(cp, &b));
+
+ if (v > div_u64(U64_MAX, b))
+ return -ERANGE;
+ v *= b;
+
+ if (f_n > div_u64(U64_MAX, b))
+ return -ERANGE;
+
+ f_n = div_u64(f_n * b, f_d);
+ if (v + f_n < v)
+ return -ERANGE;
+ v += f_n;
+
+ *res = v;
+ return cp - start;
+}
+
+static int __bch2_strtoh(const char *cp, u64 *res,
+ u64 t_max, bool t_signed)
+{
+ bool positive = *cp != '-';
+ u64 v = 0;
+
+ if (*cp == '+' || *cp == '-')
+ cp++;
+
+ parse_or_ret(cp, __bch2_strtou64_h(cp, &v));
+
+ if (*cp == '\n')
+ cp++;
+ if (*cp)
+ return -EINVAL;
+
+ if (positive) {
+ if (v > t_max)
+ return -ERANGE;
+ } else {
+ if (v && !t_signed)
+ return -ERANGE;
+
+ if (v > t_max + 1)
+ return -ERANGE;
+ v = -v;
+ }
+
+ *res = v;
+ return 0;
+}
+
+#define STRTO_H(name, type) \
+int bch2_ ## name ## _h(const char *cp, type *res) \
+{ \
+ u64 v = 0; \
+ int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \
+ ANYSINT_MAX(type) != ((type) ~0ULL)); \
+ *res = v; \
+ return ret; \
+}
+
+STRTO_H(strtoint, int)
+STRTO_H(strtouint, unsigned int)
+STRTO_H(strtoll, long long)
+STRTO_H(strtoull, unsigned long long)
+STRTO_H(strtou64, u64)
+
+u64 bch2_read_flag_list(char *opt, const char * const list[])
+{
+ u64 ret = 0;
+ char *p, *s, *d = kstrdup(opt, GFP_KERNEL);
+
+ if (!d)
+ return -ENOMEM;
+
+ s = strim(d);
+
+ while ((p = strsep(&s, ","))) {
+ int flag = match_string(list, -1, p);
+
+ if (flag < 0) {
+ ret = -1;
+ break;
+ }
+
+ ret |= 1 << flag;
+ }
+
+ kfree(d);
+
+ return ret;
+}
+
+bool bch2_is_zero(const void *_p, size_t n)
+{
+ const char *p = _p;
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ if (p[i])
+ return false;
+ return true;
+}
+
+void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
+{
+ while (nr_bits)
+ prt_char(out, '0' + ((v >> --nr_bits) & 1));
+}
+
+void bch2_print_string_as_lines(const char *prefix, const char *lines)
+{
+ const char *p;
+
+ if (!lines) {
+ printk("%s (null)\n", prefix);
+ return;
+ }
+
+ console_lock();
+ while (1) {
+ p = strchrnul(lines, '\n');
+ printk("%s%.*s\n", prefix, (int) (p - lines), lines);
+ if (!*p)
+ break;
+ lines = p + 1;
+ }
+ console_unlock();
+}
+
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr)
+{
+#ifdef CONFIG_STACKTRACE
+ unsigned nr_entries = 0;
+ int ret = 0;
+
+ stack->nr = 0;
+ ret = darray_make_room(stack, 32);
+ if (ret)
+ return ret;
+
+ if (!down_read_trylock(&task->signal->exec_update_lock))
+ return -1;
+
+ do {
+ nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
+ } while (nr_entries == stack->size &&
+ !(ret = darray_make_room(stack, stack->size * 2)));
+
+ stack->nr = nr_entries;
+ up_read(&task->signal->exec_update_lock);
+
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
+{
+ darray_for_each(*stack, i) {
+ prt_printf(out, "[<0>] %pB", (void *) *i);
+ prt_newline(out);
+ }
+}
+
+int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr)
+{
+ bch_stacktrace stack = { 0 };
+ int ret = bch2_save_backtrace(&stack, task, skipnr + 1);
+
+ bch2_prt_backtrace(out, &stack);
+ darray_exit(&stack);
+ return ret;
+}
+
+#ifndef __KERNEL__
+#include <time.h>
+void bch2_prt_datetime(struct printbuf *out, time64_t sec)
+{
+ time_t t = sec;
+ char buf[64];
+ ctime_r(&t, buf);
+ strim(buf);
+ prt_str(out, buf);
+}
+#else
+void bch2_prt_datetime(struct printbuf *out, time64_t sec)
+{
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%ptT", &sec);
+ prt_u64(out, sec);
+}
+#endif
+
+static const struct time_unit {
+ const char *name;
+ u64 nsecs;
+} time_units[] = {
+ { "ns", 1 },
+ { "us", NSEC_PER_USEC },
+ { "ms", NSEC_PER_MSEC },
+ { "s", NSEC_PER_SEC },
+ { "m", (u64) NSEC_PER_SEC * 60},
+ { "h", (u64) NSEC_PER_SEC * 3600},
+ { "eon", U64_MAX },
+};
+
+static const struct time_unit *pick_time_units(u64 ns)
+{
+ const struct time_unit *u;
+
+ for (u = time_units;
+ u + 1 < time_units + ARRAY_SIZE(time_units) &&
+ ns >= u[1].nsecs << 1;
+ u++)
+ ;
+
+ return u;
+}
+
+void bch2_pr_time_units(struct printbuf *out, u64 ns)
+{
+ const struct time_unit *u = pick_time_units(ns);
+
+ prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
+}
+
+/* time stats: */
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
+{
+ unsigned i = 0;
+
+ while (i < ARRAY_SIZE(q->entries)) {
+ struct bch2_quantile_entry *e = q->entries + i;
+
+ if (unlikely(!e->step)) {
+ e->m = v;
+ e->step = max_t(unsigned, v / 2, 1024);
+ } else if (e->m > v) {
+ e->m = e->m >= e->step
+ ? e->m - e->step
+ : 0;
+ } else if (e->m < v) {
+ e->m = e->m + e->step > e->m
+ ? e->m + e->step
+ : U32_MAX;
+ }
+
+ if ((e->m > v ? e->m - v : v - e->m) < e->step)
+ e->step = max_t(unsigned, e->step / 2, 1);
+
+ if (v >= e->m)
+ break;
+
+ i = eytzinger0_child(i, v > e->m);
+ }
+}
+
+static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
+ u64 start, u64 end)
+{
+ u64 duration, freq;
+
+ if (time_after64(end, start)) {
+ duration = end - start;
+ mean_and_variance_update(&stats->duration_stats, duration);
+ mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
+ stats->max_duration = max(stats->max_duration, duration);
+ stats->min_duration = min(stats->min_duration, duration);
+ stats->total_duration += duration;
+ bch2_quantiles_update(&stats->quantiles, duration);
+ }
+
+ if (time_after64(end, stats->last_event)) {
+ freq = end - stats->last_event;
+ mean_and_variance_update(&stats->freq_stats, freq);
+ mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
+ stats->max_freq = max(stats->max_freq, freq);
+ stats->min_freq = min(stats->min_freq, freq);
+ stats->last_event = end;
+ }
+}
+
+static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
+ struct bch2_time_stat_buffer *b)
+{
+ for (struct bch2_time_stat_buffer_entry *i = b->entries;
+ i < b->entries + ARRAY_SIZE(b->entries);
+ i++)
+ bch2_time_stats_update_one(stats, i->start, i->end);
+ b->nr = 0;
+}
+
+static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
+ struct bch2_time_stat_buffer *b)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&stats->lock, flags);
+ __bch2_time_stats_clear_buffer(stats, b);
+ spin_unlock_irqrestore(&stats->lock, flags);
+}
+
+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
+{
+ unsigned long flags;
+
+ WARN_ONCE(!stats->duration_stats_weighted.weight ||
+ !stats->freq_stats_weighted.weight,
+ "uninitialized time_stats");
+
+ if (!stats->buffer) {
+ spin_lock_irqsave(&stats->lock, flags);
+ bch2_time_stats_update_one(stats, start, end);
+
+ if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
+ stats->duration_stats.n > 1024)
+ stats->buffer =
+ alloc_percpu_gfp(struct bch2_time_stat_buffer,
+ GFP_ATOMIC);
+ spin_unlock_irqrestore(&stats->lock, flags);
+ } else {
+ struct bch2_time_stat_buffer *b;
+
+ preempt_disable();
+ b = this_cpu_ptr(stats->buffer);
+
+ BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
+ b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) {
+ .start = start,
+ .end = end
+ };
+
+ if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
+ bch2_time_stats_clear_buffer(stats, b);
+ preempt_enable();
+ }
+}
+
+static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
+{
+ const struct time_unit *u = pick_time_units(ns);
+
+ prt_printf(out, "%llu ", div64_u64(ns, u->nsecs));
+ prt_tab_rjust(out);
+ prt_printf(out, "%s", u->name);
+}
+
+static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
+{
+ prt_str(out, name);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, ns);
+ prt_newline(out);
+}
+
+#define TABSTOP_SIZE 12
+
+void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
+{
+ const struct time_unit *u;
+ s64 f_mean = 0, d_mean = 0;
+ u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
+ int i;
+
+ if (stats->buffer) {
+ int cpu;
+
+ spin_lock_irq(&stats->lock);
+ for_each_possible_cpu(cpu)
+ __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu));
+ spin_unlock_irq(&stats->lock);
+ }
+
+ /*
+ * avoid divide by zero
+ */
+ if (stats->freq_stats.n) {
+ f_mean = mean_and_variance_get_mean(stats->freq_stats);
+ f_stddev = mean_and_variance_get_stddev(stats->freq_stats);
+ d_mean = mean_and_variance_get_mean(stats->duration_stats);
+ d_stddev = mean_and_variance_get_stddev(stats->duration_stats);
+ }
+
+ printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE);
+ prt_printf(out, "count:");
+ prt_tab(out);
+ prt_printf(out, "%llu ",
+ stats->duration_stats.n);
+ printbuf_tabstop_pop(out);
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+
+ printbuf_tabstop_push(out, out->indent + 20);
+ printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
+ printbuf_tabstop_push(out, 0);
+ printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
+
+ prt_tab(out);
+ prt_printf(out, "since mount");
+ prt_tab_rjust(out);
+ prt_tab(out);
+ prt_printf(out, "recent");
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, out->indent + 20);
+ printbuf_tabstop_push(out, TABSTOP_SIZE);
+ printbuf_tabstop_push(out, 2);
+ printbuf_tabstop_push(out, TABSTOP_SIZE);
+
+ prt_printf(out, "duration of events");
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ pr_name_and_units(out, "min:", stats->min_duration);
+ pr_name_and_units(out, "max:", stats->max_duration);
+ pr_name_and_units(out, "total:", stats->total_duration);
+
+ prt_printf(out, "mean:");
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, d_mean);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
+ prt_newline(out);
+
+ prt_printf(out, "stddev:");
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, d_stddev);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
+
+ printbuf_indent_sub(out, 2);
+ prt_newline(out);
+
+ prt_printf(out, "time between events");
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ pr_name_and_units(out, "min:", stats->min_freq);
+ pr_name_and_units(out, "max:", stats->max_freq);
+
+ prt_printf(out, "mean:");
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, f_mean);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
+ prt_newline(out);
+
+ prt_printf(out, "stddev:");
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, f_stddev);
+ prt_tab(out);
+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
+
+ printbuf_indent_sub(out, 2);
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+
+ i = eytzinger0_first(NR_QUANTILES);
+ u = pick_time_units(stats->quantiles.entries[i].m);
+
+ prt_printf(out, "quantiles (%s):\t", u->name);
+ eytzinger0_for_each(i, NR_QUANTILES) {
+ bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
+
+ q = max(stats->quantiles.entries[i].m, last_q);
+ prt_printf(out, "%llu ",
+ div_u64(q, u->nsecs));
+ if (is_last)
+ prt_newline(out);
+ last_q = q;
+ }
+}
+#else
+void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {}
+#endif
+
+void bch2_time_stats_exit(struct bch2_time_stats *stats)
+{
+ free_percpu(stats->buffer);
+}
+
+void bch2_time_stats_init(struct bch2_time_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->duration_stats_weighted.weight = 8;
+ stats->freq_stats_weighted.weight = 8;
+ stats->min_duration = U64_MAX;
+ stats->min_freq = U64_MAX;
+ spin_lock_init(&stats->lock);
+}
+
+/* ratelimit: */
+
+/**
+ * bch2_ratelimit_delay() - return how long to delay until the next time to do
+ * some work
+ * @d: the struct bch_ratelimit to update
+ * Returns: the amount of time to delay by, in jiffies
+ */
+u64 bch2_ratelimit_delay(struct bch_ratelimit *d)
+{
+ u64 now = local_clock();
+
+ return time_after64(d->next, now)
+ ? nsecs_to_jiffies(d->next - now)
+ : 0;
+}
+
+/**
+ * bch2_ratelimit_increment() - increment @d by the amount of work done
+ * @d: the struct bch_ratelimit to update
+ * @done: the amount of work done, in arbitrary units
+ */
+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
+{
+ u64 now = local_clock();
+
+ d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+
+ if (time_before64(now + NSEC_PER_SEC, d->next))
+ d->next = now + NSEC_PER_SEC;
+
+ if (time_after64(now - NSEC_PER_SEC * 2, d->next))
+ d->next = now - NSEC_PER_SEC * 2;
+}
+
+/* pd controller: */
+
+/*
+ * Updates pd_controller. Attempts to scale inputed values to units per second.
+ * @target: desired value
+ * @actual: current value
+ *
+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
+ * it makes actual go down.
+ */
+void bch2_pd_controller_update(struct bch_pd_controller *pd,
+ s64 target, s64 actual, int sign)
+{
+ s64 proportional, derivative, change;
+
+ unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ;
+
+ if (seconds_since_update == 0)
+ return;
+
+ pd->last_update = jiffies;
+
+ proportional = actual - target;
+ proportional *= seconds_since_update;
+ proportional = div_s64(proportional, pd->p_term_inverse);
+
+ derivative = actual - pd->last_actual;
+ derivative = div_s64(derivative, seconds_since_update);
+ derivative = ewma_add(pd->smoothed_derivative, derivative,
+ (pd->d_term / seconds_since_update) ?: 1);
+ derivative = derivative * pd->d_term;
+ derivative = div_s64(derivative, pd->p_term_inverse);
+
+ change = proportional + derivative;
+
+ /* Don't increase rate if not keeping up */
+ if (change > 0 &&
+ pd->backpressure &&
+ time_after64(local_clock(),
+ pd->rate.next + NSEC_PER_MSEC))
+ change = 0;
+
+ change *= (sign * -1);
+
+ pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change,
+ 1, UINT_MAX);
+
+ pd->last_actual = actual;
+ pd->last_derivative = derivative;
+ pd->last_proportional = proportional;
+ pd->last_change = change;
+ pd->last_target = target;
+}
+
+void bch2_pd_controller_init(struct bch_pd_controller *pd)
+{
+ pd->rate.rate = 1024;
+ pd->last_update = jiffies;
+ pd->p_term_inverse = 6000;
+ pd->d_term = 30;
+ pd->d_smooth = pd->d_term;
+ pd->backpressure = 1;
+}
+
+void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd)
+{
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 20);
+
+ prt_printf(out, "rate:");
+ prt_tab(out);
+ prt_human_readable_s64(out, pd->rate.rate);
+ prt_newline(out);
+
+ prt_printf(out, "target:");
+ prt_tab(out);
+ prt_human_readable_u64(out, pd->last_target);
+ prt_newline(out);
+
+ prt_printf(out, "actual:");
+ prt_tab(out);
+ prt_human_readable_u64(out, pd->last_actual);
+ prt_newline(out);
+
+ prt_printf(out, "proportional:");
+ prt_tab(out);
+ prt_human_readable_s64(out, pd->last_proportional);
+ prt_newline(out);
+
+ prt_printf(out, "derivative:");
+ prt_tab(out);
+ prt_human_readable_s64(out, pd->last_derivative);
+ prt_newline(out);
+
+ prt_printf(out, "change:");
+ prt_tab(out);
+ prt_human_readable_s64(out, pd->last_change);
+ prt_newline(out);
+
+ prt_printf(out, "next io:");
+ prt_tab(out);
+ prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC));
+ prt_newline(out);
+}
+
+/* misc: */
+
+void bch2_bio_map(struct bio *bio, void *base, size_t size)
+{
+ while (size) {
+ struct page *page = is_vmalloc_addr(base)
+ ? vmalloc_to_page(base)
+ : virt_to_page(base);
+ unsigned offset = offset_in_page(base);
+ unsigned len = min_t(size_t, PAGE_SIZE - offset, size);
+
+ BUG_ON(!bio_add_page(bio, page, len, offset));
+ size -= len;
+ base += len;
+ }
+}
+
+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
+{
+ while (size) {
+ struct page *page = alloc_pages(gfp_mask, 0);
+ unsigned len = min_t(size_t, PAGE_SIZE, size);
+
+ if (!page)
+ return -ENOMEM;
+
+ if (unlikely(!bio_add_page(bio, page, len, 0))) {
+ __free_page(page);
+ break;
+ }
+
+ size -= len;
+ }
+
+ return 0;
+}
+
+size_t bch2_rand_range(size_t max)
+{
+ size_t rand;
+
+ if (!max)
+ return 0;
+
+ do {
+ rand = get_random_long();
+ rand &= roundup_pow_of_two(max) - 1;
+ } while (rand >= max);
+
+ return rand;
+}
+
+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+
+ __bio_for_each_segment(bv, dst, iter, dst_iter) {
+ void *dstp = kmap_local_page(bv.bv_page);
+
+ memcpy(dstp + bv.bv_offset, src, bv.bv_len);
+ kunmap_local(dstp);
+
+ src += bv.bv_len;
+ }
+}
+
+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+
+ __bio_for_each_segment(bv, src, iter, src_iter) {
+ void *srcp = kmap_local_page(bv.bv_page);
+
+ memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
+ kunmap_local(srcp);
+
+ dst += bv.bv_len;
+ }
+}
+
+static int alignment_ok(const void *base, size_t align)
+{
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+ ((unsigned long)base & (align - 1)) == 0;
+}
+
+static void u32_swap(void *a, void *b, size_t size)
+{
+ u32 t = *(u32 *)a;
+ *(u32 *)a = *(u32 *)b;
+ *(u32 *)b = t;
+}
+
+static void u64_swap(void *a, void *b, size_t size)
+{
+ u64 t = *(u64 *)a;
+ *(u64 *)a = *(u64 *)b;
+ *(u64 *)b = t;
+}
+
+static void generic_swap(void *a, void *b, size_t size)
+{
+ char t;
+
+ do {
+ t = *(char *)a;
+ *(char *)a++ = *(char *)b;
+ *(char *)b++ = t;
+ } while (--size > 0);
+}
+
+static inline int do_cmp(void *base, size_t n, size_t size,
+ int (*cmp_func)(const void *, const void *, size_t),
+ size_t l, size_t r)
+{
+ return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size);
+}
+
+static inline void do_swap(void *base, size_t n, size_t size,
+ void (*swap_func)(void *, void *, size_t),
+ size_t l, size_t r)
+{
+ swap_func(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size);
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+ int (*cmp_func)(const void *, const void *, size_t),
+ void (*swap_func)(void *, void *, size_t))
+{
+ int i, c, r;
+
+ if (!swap_func) {
+ if (size == 4 && alignment_ok(base, 4))
+ swap_func = u32_swap;
+ else if (size == 8 && alignment_ok(base, 8))
+ swap_func = u64_swap;
+ else
+ swap_func = generic_swap;
+ }
+
+ /* heapify */
+ for (i = n / 2 - 1; i >= 0; --i) {
+ for (r = i; r * 2 + 1 < n; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < n &&
+ do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
+ c++;
+
+ if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
+ break;
+
+ do_swap(base, n, size, swap_func, r, c);
+ }
+ }
+
+ /* sort */
+ for (i = n - 1; i > 0; --i) {
+ do_swap(base, n, size, swap_func, 0, i);
+
+ for (r = 0; r * 2 + 1 < i; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < i &&
+ do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
+ c++;
+
+ if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
+ break;
+
+ do_swap(base, n, size, swap_func, r, c);
+ }
+ }
+}
+
+void sort_cmp_size(void *base, size_t num, size_t size,
+ int (*cmp_func)(const void *, const void *, size_t),
+ void (*swap_func)(void *, void *, size_t size))
+{
+ /* pre-scale counters for performance */
+ int i = (num/2 - 1) * size, n = num * size, c, r;
+
+ if (!swap_func) {
+ if (size == 4 && alignment_ok(base, 4))
+ swap_func = u32_swap;
+ else if (size == 8 && alignment_ok(base, 8))
+ swap_func = u64_swap;
+ else
+ swap_func = generic_swap;
+ }
+
+ /* heapify */
+ for ( ; i >= 0; i -= size) {
+ for (r = i; r * 2 + size < n; r = c) {
+ c = r * 2 + size;
+ if (c < n - size &&
+ cmp_func(base + c, base + c + size, size) < 0)
+ c += size;
+ if (cmp_func(base + r, base + c, size) >= 0)
+ break;
+ swap_func(base + r, base + c, size);
+ }
+ }
+
+ /* sort */
+ for (i = n - size; i > 0; i -= size) {
+ swap_func(base, base + i, size);
+ for (r = 0; r * 2 + size < i; r = c) {
+ c = r * 2 + size;
+ if (c < i - size &&
+ cmp_func(base + c, base + c + size, size) < 0)
+ c += size;
+ if (cmp_func(base + r, base + c, size) >= 0)
+ break;
+ swap_func(base + r, base + c, size);
+ }
+ }
+}
+
+static void mempool_free_vp(void *element, void *pool_data)
+{
+ size_t size = (size_t) pool_data;
+
+ vpfree(element, size);
+}
+
+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
+{
+ size_t size = (size_t) pool_data;
+
+ return vpmalloc(size, gfp_mask);
+}
+
+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+ return size < PAGE_SIZE
+ ? mempool_init_kmalloc_pool(pool, min_nr, size)
+ : mempool_init(pool, min_nr, mempool_alloc_vp,
+ mempool_free_vp, (void *) size);
+}
+
+#if 0
+void eytzinger1_test(void)
+{
+ unsigned inorder, eytz, size;
+
+ pr_info("1 based eytzinger test:");
+
+ for (size = 2;
+ size < 65536;
+ size++) {
+ unsigned extra = eytzinger1_extra(size);
+
+ if (!(size % 4096))
+ pr_info("tree size %u", size);
+
+ BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size));
+ BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size));
+
+ BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0);
+ BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0);
+
+ inorder = 1;
+ eytzinger1_for_each(eytz, size) {
+ BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz);
+ BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder);
+ BUG_ON(eytz != eytzinger1_last(size) &&
+ eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz);
+
+ inorder++;
+ }
+ }
+}
+
+void eytzinger0_test(void)
+{
+
+ unsigned inorder, eytz, size;
+
+ pr_info("0 based eytzinger test:");
+
+ for (size = 1;
+ size < 65536;
+ size++) {
+ unsigned extra = eytzinger0_extra(size);
+
+ if (!(size % 4096))
+ pr_info("tree size %u", size);
+
+ BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size));
+ BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size));
+
+ BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1);
+ BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1);
+
+ inorder = 0;
+ eytzinger0_for_each(eytz, size) {
+ BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz);
+ BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder);
+ BUG_ON(eytz != eytzinger0_last(size) &&
+ eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz);
+
+ inorder++;
+ }
+ }
+}
+
+static inline int cmp_u16(const void *_l, const void *_r, size_t size)
+{
+ const u16 *l = _l, *r = _r;
+
+ return (*l > *r) - (*r - *l);
+}
+
+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search)
+{
+ int i, c1 = -1, c2 = -1;
+ ssize_t r;
+
+ r = eytzinger0_find_le(test_array, nr,
+ sizeof(test_array[0]),
+ cmp_u16, &search);
+ if (r >= 0)
+ c1 = test_array[r];
+
+ for (i = 0; i < nr; i++)
+ if (test_array[i] <= search && test_array[i] > c2)
+ c2 = test_array[i];
+
+ if (c1 != c2) {
+ eytzinger0_for_each(i, nr)
+ pr_info("[%3u] = %12u", i, test_array[i]);
+ pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
+ i, r, c1, c2);
+ }
+}
+
+void eytzinger0_find_test(void)
+{
+ unsigned i, nr, allocated = 1 << 12;
+ u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL);
+
+ for (nr = 1; nr < allocated; nr++) {
+ pr_info("testing %u elems", nr);
+
+ get_random_bytes(test_array, nr * sizeof(test_array[0]));
+ eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL);
+
+ /* verify array is sorted correctly: */
+ eytzinger0_for_each(i, nr)
+ BUG_ON(i != eytzinger0_last(nr) &&
+ test_array[i] > test_array[eytzinger0_next(i, nr)]);
+
+ for (i = 0; i < U16_MAX; i += 1 << 12)
+ eytzinger0_find_test_val(test_array, nr, i);
+
+ for (i = 0; i < nr; i++) {
+ eytzinger0_find_test_val(test_array, nr, test_array[i] - 1);
+ eytzinger0_find_test_val(test_array, nr, test_array[i]);
+ eytzinger0_find_test_val(test_array, nr, test_array[i] + 1);
+ }
+ }
+
+ kfree(test_array);
+}
+#endif
+
+/*
+ * Accumulate percpu counters onto one cpu's copy - only valid when access
+ * against any percpu counter is guarded against
+ */
+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr)
+{
+ u64 *ret;
+ int cpu;
+
+ /* access to pcpu vars has to be blocked by other locking */
+ preempt_disable();
+ ret = this_cpu_ptr(p);
+ preempt_enable();
+
+ for_each_possible_cpu(cpu) {
+ u64 *i = per_cpu_ptr(p, cpu);
+
+ if (i != ret) {
+ acc_u64s(ret, i, nr);
+ memset(i, 0, nr * sizeof(u64));
+ }
+ }
+
+ return ret;
+}
+
+void bch2_darray_str_exit(darray_str *d)
+{
+ darray_for_each(*d, i)
+ kfree(*i);
+ darray_exit(d);
+}
+
+int bch2_split_devs(const char *_dev_name, darray_str *ret)
+{
+ darray_init(ret);
+
+ char *dev_name = kstrdup(_dev_name, GFP_KERNEL), *s = dev_name;
+ if (!dev_name)
+ return -ENOMEM;
+
+ while ((s = strsep(&dev_name, ":"))) {
+ char *p = kstrdup(s, GFP_KERNEL);
+ if (!p)
+ goto err;
+
+ if (darray_push(ret, p)) {
+ kfree(p);
+ goto err;
+ }
+ }
+
+ kfree(dev_name);
+ return 0;
+err:
+ bch2_darray_str_exit(ret);
+ kfree(dev_name);
+ return -ENOMEM;
+}
diff --git a/c_src/libbcachefs/util.h b/c_src/libbcachefs/util.h
new file mode 100644
index 00000000..c75fc319
--- /dev/null
+++ b/c_src/libbcachefs/util.h
@@ -0,0 +1,878 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_UTIL_H
+#define _BCACHEFS_UTIL_H
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/closure.h>
+#include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/sched/clock.h>
+#include <linux/llist.h>
+#include <linux/log2.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
+
+#include "mean_and_variance.h"
+
+#include "darray.h"
+
+struct closure;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+#define EBUG_ON(cond) BUG_ON(cond)
+#else
+#define EBUG_ON(cond)
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define CPU_BIG_ENDIAN 0
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define CPU_BIG_ENDIAN 1
+#endif
+
+/* type hackery */
+
+#define type_is_exact(_val, _type) \
+ __builtin_types_compatible_p(typeof(_val), _type)
+
+#define type_is(_val, _type) \
+ (__builtin_types_compatible_p(typeof(_val), _type) || \
+ __builtin_types_compatible_p(typeof(_val), const _type))
+
+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
+static inline size_t buf_pages(void *p, size_t len)
+{
+ return DIV_ROUND_UP(len +
+ ((unsigned long) p & (PAGE_SIZE - 1)),
+ PAGE_SIZE);
+}
+
+static inline void vpfree(void *p, size_t size)
+{
+ if (is_vmalloc_addr(p))
+ vfree(p);
+ else
+ free_pages((unsigned long) p, get_order(size));
+}
+
+static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
+{
+ return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
+ get_order(size)) ?:
+ __vmalloc(size, gfp_mask);
+}
+
+static inline void kvpfree(void *p, size_t size)
+{
+ if (size < PAGE_SIZE)
+ kfree(p);
+ else
+ vpfree(p, size);
+}
+
+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
+{
+ return size < PAGE_SIZE
+ ? kmalloc(size, gfp_mask)
+ : vpmalloc(size, gfp_mask);
+}
+
+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
+
+#define HEAP(type) \
+struct { \
+ size_t size, used; \
+ type *data; \
+}
+
+#define DECLARE_HEAP(type, name) HEAP(type) name
+
+#define init_heap(heap, _size, gfp) \
+({ \
+ (heap)->used = 0; \
+ (heap)->size = (_size); \
+ (heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
+ (gfp)); \
+})
+
+#define free_heap(heap) \
+do { \
+ kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0])); \
+ (heap)->data = NULL; \
+} while (0)
+
+#define heap_set_backpointer(h, i, _fn) \
+do { \
+ void (*fn)(typeof(h), size_t) = _fn; \
+ if (fn) \
+ fn(h, i); \
+} while (0)
+
+#define heap_swap(h, i, j, set_backpointer) \
+do { \
+ swap((h)->data[i], (h)->data[j]); \
+ heap_set_backpointer(h, i, set_backpointer); \
+ heap_set_backpointer(h, j, set_backpointer); \
+} while (0)
+
+#define heap_peek(h) \
+({ \
+ EBUG_ON(!(h)->used); \
+ (h)->data[0]; \
+})
+
+#define heap_full(h) ((h)->used == (h)->size)
+
+#define heap_sift_down(h, i, cmp, set_backpointer) \
+do { \
+ size_t _c, _j = i; \
+ \
+ for (; _j * 2 + 1 < (h)->used; _j = _c) { \
+ _c = _j * 2 + 1; \
+ if (_c + 1 < (h)->used && \
+ cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \
+ _c++; \
+ \
+ if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
+ break; \
+ heap_swap(h, _c, _j, set_backpointer); \
+ } \
+} while (0)
+
+#define heap_sift_up(h, i, cmp, set_backpointer) \
+do { \
+ while (i) { \
+ size_t p = (i - 1) / 2; \
+ if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
+ break; \
+ heap_swap(h, i, p, set_backpointer); \
+ i = p; \
+ } \
+} while (0)
+
+#define __heap_add(h, d, cmp, set_backpointer) \
+({ \
+ size_t _i = (h)->used++; \
+ (h)->data[_i] = d; \
+ heap_set_backpointer(h, _i, set_backpointer); \
+ \
+ heap_sift_up(h, _i, cmp, set_backpointer); \
+ _i; \
+})
+
+#define heap_add(h, d, cmp, set_backpointer) \
+({ \
+ bool _r = !heap_full(h); \
+ if (_r) \
+ __heap_add(h, d, cmp, set_backpointer); \
+ _r; \
+})
+
+#define heap_add_or_replace(h, new, cmp, set_backpointer) \
+do { \
+ if (!heap_add(h, new, cmp, set_backpointer) && \
+ cmp(h, new, heap_peek(h)) >= 0) { \
+ (h)->data[0] = new; \
+ heap_set_backpointer(h, 0, set_backpointer); \
+ heap_sift_down(h, 0, cmp, set_backpointer); \
+ } \
+} while (0)
+
+#define heap_del(h, i, cmp, set_backpointer) \
+do { \
+ size_t _i = (i); \
+ \
+ BUG_ON(_i >= (h)->used); \
+ (h)->used--; \
+ if ((_i) < (h)->used) { \
+ heap_swap(h, _i, (h)->used, set_backpointer); \
+ heap_sift_up(h, _i, cmp, set_backpointer); \
+ heap_sift_down(h, _i, cmp, set_backpointer); \
+ } \
+} while (0)
+
+#define heap_pop(h, d, cmp, set_backpointer) \
+({ \
+ bool _r = (h)->used; \
+ if (_r) { \
+ (d) = (h)->data[0]; \
+ heap_del(h, 0, cmp, set_backpointer); \
+ } \
+ _r; \
+})
+
+#define heap_resort(heap, cmp, set_backpointer) \
+do { \
+ ssize_t _i; \
+ for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
+ heap_sift_down(heap, _i, cmp, set_backpointer); \
+} while (0)
+
+#define ANYSINT_MAX(t) \
+ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
+
+#include "printbuf.h"
+
+#define prt_vprintf(_out, ...) bch2_prt_vprintf(_out, __VA_ARGS__)
+#define prt_printf(_out, ...) bch2_prt_printf(_out, __VA_ARGS__)
+#define printbuf_str(_buf) bch2_printbuf_str(_buf)
+#define printbuf_exit(_buf) bch2_printbuf_exit(_buf)
+
+#define printbuf_tabstops_reset(_buf) bch2_printbuf_tabstops_reset(_buf)
+#define printbuf_tabstop_pop(_buf) bch2_printbuf_tabstop_pop(_buf)
+#define printbuf_tabstop_push(_buf, _n) bch2_printbuf_tabstop_push(_buf, _n)
+
+#define printbuf_indent_add(_out, _n) bch2_printbuf_indent_add(_out, _n)
+#define printbuf_indent_sub(_out, _n) bch2_printbuf_indent_sub(_out, _n)
+
+#define prt_newline(_out) bch2_prt_newline(_out)
+#define prt_tab(_out) bch2_prt_tab(_out)
+#define prt_tab_rjust(_out) bch2_prt_tab_rjust(_out)
+
+#define prt_bytes_indented(...) bch2_prt_bytes_indented(__VA_ARGS__)
+#define prt_u64(_out, _v) prt_printf(_out, "%llu", (u64) (_v))
+#define prt_human_readable_u64(...) bch2_prt_human_readable_u64(__VA_ARGS__)
+#define prt_human_readable_s64(...) bch2_prt_human_readable_s64(__VA_ARGS__)
+#define prt_units_u64(...) bch2_prt_units_u64(__VA_ARGS__)
+#define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__)
+#define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__)
+#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
+#define prt_bitflags_vector(...) bch2_prt_bitflags_vector(__VA_ARGS__)
+
+void bch2_pr_time_units(struct printbuf *, u64);
+void bch2_prt_datetime(struct printbuf *, time64_t);
+
+#ifdef __KERNEL__
+static inline void uuid_unparse_lower(u8 *uuid, char *out)
+{
+ sprintf(out, "%pUb", uuid);
+}
+#else
+#include <uuid/uuid.h>
+#endif
+
+static inline void pr_uuid(struct printbuf *out, u8 *uuid)
+{
+ char uuid_str[40];
+
+ uuid_unparse_lower(uuid, uuid_str);
+ prt_printf(out, "%s", uuid_str);
+}
+
+int bch2_strtoint_h(const char *, int *);
+int bch2_strtouint_h(const char *, unsigned int *);
+int bch2_strtoll_h(const char *, long long *);
+int bch2_strtoull_h(const char *, unsigned long long *);
+int bch2_strtou64_h(const char *, u64 *);
+
+static inline int bch2_strtol_h(const char *cp, long *res)
+{
+#if BITS_PER_LONG == 32
+ return bch2_strtoint_h(cp, (int *) res);
+#else
+ return bch2_strtoll_h(cp, (long long *) res);
+#endif
+}
+
+static inline int bch2_strtoul_h(const char *cp, long *res)
+{
+#if BITS_PER_LONG == 32
+ return bch2_strtouint_h(cp, (unsigned int *) res);
+#else
+ return bch2_strtoull_h(cp, (unsigned long long *) res);
+#endif
+}
+
+#define strtoi_h(cp, res) \
+ ( type_is(*res, int) ? bch2_strtoint_h(cp, (void *) res)\
+ : type_is(*res, long) ? bch2_strtol_h(cp, (void *) res)\
+ : type_is(*res, long long) ? bch2_strtoll_h(cp, (void *) res)\
+ : type_is(*res, unsigned) ? bch2_strtouint_h(cp, (void *) res)\
+ : type_is(*res, unsigned long) ? bch2_strtoul_h(cp, (void *) res)\
+ : type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\
+ : -EINVAL)
+
+#define strtoul_safe(cp, var) \
+({ \
+ unsigned long _v; \
+ int _r = kstrtoul(cp, 10, &_v); \
+ if (!_r) \
+ var = _v; \
+ _r; \
+})
+
+#define strtoul_safe_clamp(cp, var, min, max) \
+({ \
+ unsigned long _v; \
+ int _r = kstrtoul(cp, 10, &_v); \
+ if (!_r) \
+ var = clamp_t(typeof(var), _v, min, max); \
+ _r; \
+})
+
+#define strtoul_safe_restrict(cp, var, min, max) \
+({ \
+ unsigned long _v; \
+ int _r = kstrtoul(cp, 10, &_v); \
+ if (!_r && _v >= min && _v <= max) \
+ var = _v; \
+ else \
+ _r = -EINVAL; \
+ _r; \
+})
+
+#define snprint(out, var) \
+ prt_printf(out, \
+ type_is(var, int) ? "%i\n" \
+ : type_is(var, unsigned) ? "%u\n" \
+ : type_is(var, long) ? "%li\n" \
+ : type_is(var, unsigned long) ? "%lu\n" \
+ : type_is(var, s64) ? "%lli\n" \
+ : type_is(var, u64) ? "%llu\n" \
+ : type_is(var, char *) ? "%s\n" \
+ : "%i\n", var)
+
+bool bch2_is_zero(const void *, size_t);
+
+u64 bch2_read_flag_list(char *, const char * const[]);
+
+void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
+
+void bch2_print_string_as_lines(const char *prefix, const char *lines);
+
+typedef DARRAY(unsigned long) bch_stacktrace;
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned);
+void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *);
+int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned);
+
+static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
+{
+#ifdef __KERNEL__
+ prt_printf(out, "%pg", bdev);
+#else
+ prt_str(out, bdev->name);
+#endif
+}
+
+#define NR_QUANTILES 15
+#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
+#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
+#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
+
+struct bch2_quantiles {
+ struct bch2_quantile_entry {
+ u64 m;
+ u64 step;
+ } entries[NR_QUANTILES];
+};
+
+struct bch2_time_stat_buffer {
+ unsigned nr;
+ struct bch2_time_stat_buffer_entry {
+ u64 start;
+ u64 end;
+ } entries[32];
+};
+
+struct bch2_time_stats {
+ spinlock_t lock;
+ /* all fields are in nanoseconds */
+ u64 min_duration;
+ u64 max_duration;
+ u64 total_duration;
+ u64 max_freq;
+ u64 min_freq;
+ u64 last_event;
+ struct bch2_quantiles quantiles;
+
+ struct mean_and_variance duration_stats;
+ struct mean_and_variance_weighted duration_stats_weighted;
+ struct mean_and_variance freq_stats;
+ struct mean_and_variance_weighted freq_stats_weighted;
+ struct bch2_time_stat_buffer __percpu *buffer;
+};
+
+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
+
+static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
+{
+ __bch2_time_stats_update(stats, start, local_clock());
+}
+
+static inline bool track_event_change(struct bch2_time_stats *stats,
+ u64 *start, bool v)
+{
+ if (v != !!*start) {
+ if (!v) {
+ bch2_time_stats_update(stats, *start);
+ *start = 0;
+ } else {
+ *start = local_clock() ?: 1;
+ return true;
+ }
+ }
+
+ return false;
+}
+#else
+static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
+static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) {}
+static inline bool track_event_change(struct bch2_time_stats *stats,
+ u64 *start, bool v)
+{
+ bool ret = v && !*start;
+ *start = v;
+ return ret;
+}
+#endif
+
+void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
+
+void bch2_time_stats_exit(struct bch2_time_stats *);
+void bch2_time_stats_init(struct bch2_time_stats *);
+
+#define ewma_add(ewma, val, weight) \
+({ \
+ typeof(ewma) _ewma = (ewma); \
+ typeof(weight) _weight = (weight); \
+ \
+ (((_ewma << _weight) - _ewma) + (val)) >> _weight; \
+})
+
+struct bch_ratelimit {
+ /* Next time we want to do some work, in nanoseconds */
+ u64 next;
+
+ /*
+ * Rate at which we want to do work, in units per nanosecond
+ * The units here correspond to the units passed to
+ * bch2_ratelimit_increment()
+ */
+ unsigned rate;
+};
+
+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
+{
+ d->next = local_clock();
+}
+
+u64 bch2_ratelimit_delay(struct bch_ratelimit *);
+void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
+
+struct bch_pd_controller {
+ struct bch_ratelimit rate;
+ unsigned long last_update;
+
+ s64 last_actual;
+ s64 smoothed_derivative;
+
+ unsigned p_term_inverse;
+ unsigned d_smooth;
+ unsigned d_term;
+
+ /* for exporting to sysfs (no effect on behavior) */
+ s64 last_derivative;
+ s64 last_proportional;
+ s64 last_change;
+ s64 last_target;
+
+ /*
+ * If true, the rate will not increase if bch2_ratelimit_delay()
+ * is not being called often enough.
+ */
+ bool backpressure;
+};
+
+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int);
+void bch2_pd_controller_init(struct bch_pd_controller *);
+void bch2_pd_controller_debug_to_text(struct printbuf *, struct bch_pd_controller *);
+
+#define sysfs_pd_controller_attribute(name) \
+ rw_attribute(name##_rate); \
+ rw_attribute(name##_rate_bytes); \
+ rw_attribute(name##_rate_d_term); \
+ rw_attribute(name##_rate_p_term_inverse); \
+ read_attribute(name##_rate_debug)
+
+#define sysfs_pd_controller_files(name) \
+ &sysfs_##name##_rate, \
+ &sysfs_##name##_rate_bytes, \
+ &sysfs_##name##_rate_d_term, \
+ &sysfs_##name##_rate_p_term_inverse, \
+ &sysfs_##name##_rate_debug
+
+#define sysfs_pd_controller_show(name, var) \
+do { \
+ sysfs_hprint(name##_rate, (var)->rate.rate); \
+ sysfs_print(name##_rate_bytes, (var)->rate.rate); \
+ sysfs_print(name##_rate_d_term, (var)->d_term); \
+ sysfs_print(name##_rate_p_term_inverse, (var)->p_term_inverse); \
+ \
+ if (attr == &sysfs_##name##_rate_debug) \
+ bch2_pd_controller_debug_to_text(out, var); \
+} while (0)
+
+#define sysfs_pd_controller_store(name, var) \
+do { \
+ sysfs_strtoul_clamp(name##_rate, \
+ (var)->rate.rate, 1, UINT_MAX); \
+ sysfs_strtoul_clamp(name##_rate_bytes, \
+ (var)->rate.rate, 1, UINT_MAX); \
+ sysfs_strtoul(name##_rate_d_term, (var)->d_term); \
+ sysfs_strtoul_clamp(name##_rate_p_term_inverse, \
+ (var)->p_term_inverse, 1, INT_MAX); \
+} while (0)
+
+#define container_of_or_null(ptr, type, member) \
+({ \
+ typeof(ptr) _ptr = ptr; \
+ _ptr ? container_of(_ptr, type, member) : NULL; \
+})
+
+/* Does linear interpolation between powers of two */
+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
+{
+ unsigned fract = x & ~(~0 << fract_bits);
+
+ x >>= fract_bits;
+ x = 1 << x;
+ x += (x * fract) >> fract_bits;
+
+ return x;
+}
+
+void bch2_bio_map(struct bio *bio, void *base, size_t);
+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
+
+static inline sector_t bdev_sectors(struct block_device *bdev)
+{
+ return bdev->bd_inode->i_size >> 9;
+}
+
+#define closure_bio_submit(bio, cl) \
+do { \
+ closure_get(cl); \
+ submit_bio(bio); \
+} while (0)
+
+#define kthread_wait(cond) \
+({ \
+ int _ret = 0; \
+ \
+ while (1) { \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (kthread_should_stop()) { \
+ _ret = -1; \
+ break; \
+ } \
+ \
+ if (cond) \
+ break; \
+ \
+ schedule(); \
+ } \
+ set_current_state(TASK_RUNNING); \
+ _ret; \
+})
+
+#define kthread_wait_freezable(cond) \
+({ \
+ int _ret = 0; \
+ while (1) { \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (kthread_should_stop()) { \
+ _ret = -1; \
+ break; \
+ } \
+ \
+ if (cond) \
+ break; \
+ \
+ schedule(); \
+ try_to_freeze(); \
+ } \
+ set_current_state(TASK_RUNNING); \
+ _ret; \
+})
+
+size_t bch2_rand_range(size_t);
+
+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
+void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
+
+static inline void memcpy_u64s_small(void *dst, const void *src,
+ unsigned u64s)
+{
+ u64 *d = dst;
+ const u64 *s = src;
+
+ while (u64s--)
+ *d++ = *s++;
+}
+
+static inline void __memcpy_u64s(void *dst, const void *src,
+ unsigned u64s)
+{
+#ifdef CONFIG_X86_64
+ long d0, d1, d2;
+
+ asm volatile("rep ; movsq"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (u64s), "1" (dst), "2" (src)
+ : "memory");
+#else
+ u64 *d = dst;
+ const u64 *s = src;
+
+ while (u64s--)
+ *d++ = *s++;
+#endif
+}
+
+static inline void memcpy_u64s(void *dst, const void *src,
+ unsigned u64s)
+{
+ EBUG_ON(!(dst >= src + u64s * sizeof(u64) ||
+ dst + u64s * sizeof(u64) <= src));
+
+ __memcpy_u64s(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_down(void *dst, const void *src,
+ unsigned u64s)
+{
+ __memcpy_u64s(dst, src, u64s);
+}
+
+static inline void memmove_u64s_down(void *dst, const void *src,
+ unsigned u64s)
+{
+ EBUG_ON(dst > src);
+
+ __memmove_u64s_down(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_down_small(void *dst, const void *src,
+ unsigned u64s)
+{
+ memcpy_u64s_small(dst, src, u64s);
+}
+
+static inline void memmove_u64s_down_small(void *dst, const void *src,
+ unsigned u64s)
+{
+ EBUG_ON(dst > src);
+
+ __memmove_u64s_down_small(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_up_small(void *_dst, const void *_src,
+ unsigned u64s)
+{
+ u64 *dst = (u64 *) _dst + u64s;
+ u64 *src = (u64 *) _src + u64s;
+
+ while (u64s--)
+ *--dst = *--src;
+}
+
+static inline void memmove_u64s_up_small(void *dst, const void *src,
+ unsigned u64s)
+{
+ EBUG_ON(dst < src);
+
+ __memmove_u64s_up_small(dst, src, u64s);
+}
+
+static inline void __memmove_u64s_up(void *_dst, const void *_src,
+ unsigned u64s)
+{
+ u64 *dst = (u64 *) _dst + u64s - 1;
+ u64 *src = (u64 *) _src + u64s - 1;
+
+#ifdef CONFIG_X86_64
+ long d0, d1, d2;
+
+ asm volatile("std ;\n"
+ "rep ; movsq\n"
+ "cld ;\n"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (u64s), "1" (dst), "2" (src)
+ : "memory");
+#else
+ while (u64s--)
+ *dst-- = *src--;
+#endif
+}
+
+static inline void memmove_u64s_up(void *dst, const void *src,
+ unsigned u64s)
+{
+ EBUG_ON(dst < src);
+
+ __memmove_u64s_up(dst, src, u64s);
+}
+
+static inline void memmove_u64s(void *dst, const void *src,
+ unsigned u64s)
+{
+ if (dst < src)
+ __memmove_u64s_down(dst, src, u64s);
+ else
+ __memmove_u64s_up(dst, src, u64s);
+}
+
+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */
+static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
+{
+ unsigned rem = round_up(bytes, sizeof(u64)) - bytes;
+
+ memset(s + bytes, c, rem);
+}
+
+void sort_cmp_size(void *base, size_t num, size_t size,
+ int (*cmp_func)(const void *, const void *, size_t),
+ void (*swap_func)(void *, void *, size_t));
+
+/* just the memmove, doesn't update @_nr */
+#define __array_insert_item(_array, _nr, _pos) \
+ memmove(&(_array)[(_pos) + 1], \
+ &(_array)[(_pos)], \
+ sizeof((_array)[0]) * ((_nr) - (_pos)))
+
+#define array_insert_item(_array, _nr, _pos, _new_item) \
+do { \
+ __array_insert_item(_array, _nr, _pos); \
+ (_nr)++; \
+ (_array)[(_pos)] = (_new_item); \
+} while (0)
+
+#define array_remove_items(_array, _nr, _pos, _nr_to_remove) \
+do { \
+ (_nr) -= (_nr_to_remove); \
+ memmove(&(_array)[(_pos)], \
+ &(_array)[(_pos) + (_nr_to_remove)], \
+ sizeof((_array)[0]) * ((_nr) - (_pos))); \
+} while (0)
+
+#define array_remove_item(_array, _nr, _pos) \
+ array_remove_items(_array, _nr, _pos, 1)
+
+static inline void __move_gap(void *array, size_t element_size,
+ size_t nr, size_t size,
+ size_t old_gap, size_t new_gap)
+{
+ size_t gap_end = old_gap + size - nr;
+
+ if (new_gap < old_gap) {
+ size_t move = old_gap - new_gap;
+
+ memmove(array + element_size * (gap_end - move),
+ array + element_size * (old_gap - move),
+ element_size * move);
+ } else if (new_gap > old_gap) {
+ size_t move = new_gap - old_gap;
+
+ memmove(array + element_size * old_gap,
+ array + element_size * gap_end,
+ element_size * move);
+ }
+}
+
+/* Move the gap in a gap buffer: */
+#define move_gap(_array, _nr, _size, _old_gap, _new_gap) \
+ __move_gap(_array, sizeof(_array[0]), _nr, _size, _old_gap, _new_gap)
+
+#define bubble_sort(_base, _nr, _cmp) \
+do { \
+ ssize_t _i, _last; \
+ bool _swapped = true; \
+ \
+ for (_last= (ssize_t) (_nr) - 1; _last > 0 && _swapped; --_last) {\
+ _swapped = false; \
+ for (_i = 0; _i < _last; _i++) \
+ if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \
+ swap((_base)[_i], (_base)[_i + 1]); \
+ _swapped = true; \
+ } \
+ } \
+} while (0)
+
+static inline u64 percpu_u64_get(u64 __percpu *src)
+{
+ u64 ret = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ ret += *per_cpu_ptr(src, cpu);
+ return ret;
+}
+
+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(dst, cpu) = 0;
+ this_cpu_write(*dst, src);
+}
+
+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; i++)
+ acc[i] += src[i];
+}
+
+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src,
+ unsigned nr)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ acc_u64s(acc, per_cpu_ptr(src, cpu), nr);
+}
+
+static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ memset(per_cpu_ptr(p, cpu), c, bytes);
+}
+
+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
+
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+static inline int u8_cmp(u8 l, u8 r)
+{
+ return cmp_int(l, r);
+}
+
+static inline int cmp_le32(__le32 l, __le32 r)
+{
+ return cmp_int(le32_to_cpu(l), le32_to_cpu(r));
+}
+
+#include <linux/uuid.h>
+
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static inline bool qstr_eq(const struct qstr l, const struct qstr r)
+{
+ return l.len == r.len && !memcmp(l.name, r.name, l.len);
+}
+
+void bch2_darray_str_exit(darray_str *);
+int bch2_split_devs(const char *, darray_str *);
+
+#endif /* _BCACHEFS_UTIL_H */
diff --git a/c_src/libbcachefs/varint.c b/c_src/libbcachefs/varint.c
new file mode 100644
index 00000000..cb4f33ed
--- /dev/null
+++ b/c_src/libbcachefs/varint.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitops.h>
+#include <linux/math.h>
+#include <linux/string.h>
+#include <asm/unaligned.h>
+
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+#endif
+
+#include "varint.h"
+
+/**
+ * bch2_varint_encode - encode a variable length integer
+ * @out: destination to encode to
+ * @v: unsigned integer to encode
+ * Returns: size in bytes of the encoded integer - at most 9 bytes
+ */
+int bch2_varint_encode(u8 *out, u64 v)
+{
+ unsigned bits = fls64(v|1);
+ unsigned bytes = DIV_ROUND_UP(bits, 7);
+ __le64 v_le;
+
+ if (likely(bytes < 9)) {
+ v <<= bytes;
+ v |= ~(~0 << (bytes - 1));
+ v_le = cpu_to_le64(v);
+ memcpy(out, &v_le, bytes);
+ } else {
+ *out++ = 255;
+ bytes = 9;
+ put_unaligned_le64(v, out);
+ }
+
+ return bytes;
+}
+
+/**
+ * bch2_varint_decode - encode a variable length integer
+ * @in: varint to decode
+ * @end: end of buffer to decode from
+ * @out: on success, decoded integer
+ * Returns: size in bytes of the decoded integer - or -1 on failure (would
+ * have read past the end of the buffer)
+ */
+int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out)
+{
+ unsigned bytes = likely(in < end)
+ ? ffz(*in & 255) + 1
+ : 1;
+ u64 v;
+
+ if (unlikely(in + bytes > end))
+ return -1;
+
+ if (likely(bytes < 9)) {
+ __le64 v_le = 0;
+
+ memcpy(&v_le, in, bytes);
+ v = le64_to_cpu(v_le);
+ v >>= bytes;
+ } else {
+ v = get_unaligned_le64(++in);
+ }
+
+ *out = v;
+ return bytes;
+}
+
+/**
+ * bch2_varint_encode_fast - fast version of bch2_varint_encode
+ * @out: destination to encode to
+ * @v: unsigned integer to encode
+ * Returns: size in bytes of the encoded integer - at most 9 bytes
+ *
+ * This version assumes it's always safe to write 8 bytes to @out, even if the
+ * encoded integer would be smaller.
+ */
+int bch2_varint_encode_fast(u8 *out, u64 v)
+{
+ unsigned bits = fls64(v|1);
+ unsigned bytes = DIV_ROUND_UP(bits, 7);
+
+ if (likely(bytes < 9)) {
+ v <<= bytes;
+ v |= ~(~0 << (bytes - 1));
+ } else {
+ *out++ = 255;
+ bytes = 9;
+ }
+
+ put_unaligned_le64(v, out);
+ return bytes;
+}
+
+/**
+ * bch2_varint_decode_fast - fast version of bch2_varint_decode
+ * @in: varint to decode
+ * @end: end of buffer to decode from
+ * @out: on success, decoded integer
+ * Returns: size in bytes of the decoded integer - or -1 on failure (would
+ * have read past the end of the buffer)
+ *
+ * This version assumes that it is safe to read at most 8 bytes past the end of
+ * @end (we still return an error if the varint extends past @end).
+ */
+int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out)
+{
+#ifdef CONFIG_VALGRIND
+ VALGRIND_MAKE_MEM_DEFINED(in, 8);
+#endif
+ u64 v = get_unaligned_le64(in);
+ unsigned bytes = ffz(*in) + 1;
+
+ if (unlikely(in + bytes > end))
+ return -1;
+
+ if (likely(bytes < 9)) {
+ v >>= bytes;
+ v &= ~(~0ULL << (7 * bytes));
+ } else {
+ v = get_unaligned_le64(++in);
+ }
+
+ *out = v;
+ return bytes;
+}
diff --git a/c_src/libbcachefs/varint.h b/c_src/libbcachefs/varint.h
new file mode 100644
index 00000000..92a182fb
--- /dev/null
+++ b/c_src/libbcachefs/varint.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_VARINT_H
+#define _BCACHEFS_VARINT_H
+
+int bch2_varint_encode(u8 *, u64);
+int bch2_varint_decode(const u8 *, const u8 *, u64 *);
+
+int bch2_varint_encode_fast(u8 *, u64);
+int bch2_varint_decode_fast(const u8 *, const u8 *, u64 *);
+
+#endif /* _BCACHEFS_VARINT_H */
diff --git a/c_src/libbcachefs/vstructs.h b/c_src/libbcachefs/vstructs.h
new file mode 100644
index 00000000..2ad338e2
--- /dev/null
+++ b/c_src/libbcachefs/vstructs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _VSTRUCTS_H
+#define _VSTRUCTS_H
+
+#include "util.h"
+
+/*
+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
+ * assumes u64 is little endian:
+ */
+#define __vstruct_u64s(_s) \
+({ \
+ ( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s) \
+ : type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s) \
+ : type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s) \
+ : ((__force u8) ((_s)->u64s))); \
+})
+
+#define __vstruct_bytes(_type, _u64s) \
+({ \
+ BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64)); \
+ \
+ (size_t) (offsetof(_type, _data) + (_u64s) * sizeof(u64)); \
+})
+
+#define vstruct_bytes(_s) \
+ __vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
+
+#define __vstruct_blocks(_type, _sector_block_bits, _u64s) \
+ (round_up(__vstruct_bytes(_type, _u64s), \
+ 512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
+
+#define vstruct_blocks(_s, _sector_block_bits) \
+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
+
+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s) \
+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, \
+ __vstruct_u64s(_s) + (_u64s))
+
+#define vstruct_sectors(_s, _sector_block_bits) \
+ (round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
+
+#define vstruct_next(_s) \
+ ((typeof(_s)) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
+#define vstruct_last(_s) \
+ ((typeof(&(_s)->start[0])) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
+#define vstruct_end(_s) \
+ ((void *) ((u64 *) (_s)->_data + __vstruct_u64s(_s)))
+
+#define vstruct_for_each(_s, _i) \
+ for (typeof(&(_s)->start[0]) _i = (_s)->start; \
+ _i < vstruct_last(_s); \
+ _i = vstruct_next(_i))
+
+#define vstruct_for_each_safe(_s, _i) \
+ for (typeof(&(_s)->start[0]) _next, _i = (_s)->start; \
+ _i < vstruct_last(_s) && (_next = vstruct_next(_i), true); \
+ _i = _next)
+
+#define vstruct_idx(_s, _idx) \
+ ((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
+
+#endif /* _VSTRUCTS_H */
diff --git a/c_src/libbcachefs/xattr.c b/c_src/libbcachefs/xattr.c
new file mode 100644
index 00000000..5a1858fb
--- /dev/null
+++ b/c_src/libbcachefs/xattr.c
@@ -0,0 +1,653 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "acl.h"
+#include "bkey_methods.h"
+#include "btree_update.h"
+#include "extents.h"
+#include "fs.h"
+#include "rebalance.h"
+#include "str_hash.h"
+#include "xattr.h"
+
+#include <linux/dcache.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
+
+static u64 bch2_xattr_hash(const struct bch_hash_info *info,
+ const struct xattr_search_key *key)
+{
+ struct bch_str_hash_ctx ctx;
+
+ bch2_str_hash_init(&ctx, info);
+ bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
+ bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
+
+ return bch2_str_hash_end(&ctx, info);
+}
+
+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
+{
+ return bch2_xattr_hash(info, key);
+}
+
+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+{
+ struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
+
+ return bch2_xattr_hash(info,
+ &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
+}
+
+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
+ const struct xattr_search_key *r = _r;
+
+ return l.v->x_type != r->type ||
+ l.v->x_name_len != r->name.len ||
+ memcmp(l.v->x_name, r->name.name, r->name.len);
+}
+
+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
+{
+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
+ struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r);
+
+ return l.v->x_type != r.v->x_type ||
+ l.v->x_name_len != r.v->x_name_len ||
+ memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
+}
+
+const struct bch_hash_desc bch2_xattr_hash_desc = {
+ .btree_id = BTREE_ID_xattrs,
+ .key_type = KEY_TYPE_xattr,
+ .hash_key = xattr_hash_key,
+ .hash_bkey = xattr_hash_bkey,
+ .cmp_key = xattr_cmp_key,
+ .cmp_bkey = xattr_cmp_bkey,
+};
+
+int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
+{
+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
+ unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len,
+ le16_to_cpu(xattr.v->x_val_len));
+ int ret = 0;
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err,
+ xattr_val_size_too_small,
+ "value too small (%zu < %u)",
+ bkey_val_u64s(k.k), val_u64s);
+
+ /* XXX why +4 ? */
+ val_u64s = xattr_val_u64s(xattr.v->x_name_len,
+ le16_to_cpu(xattr.v->x_val_len) + 4);
+
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err,
+ xattr_val_size_too_big,
+ "value too big (%zu > %u)",
+ bkey_val_u64s(k.k), val_u64s);
+
+ bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err,
+ xattr_invalid_type,
+ "invalid type (%u)", xattr.v->x_type);
+
+ bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err,
+ xattr_name_invalid_chars,
+ "xattr name has invalid characters");
+fsck_err:
+ return ret;
+}
+
+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ const struct xattr_handler *handler;
+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
+
+ handler = bch2_xattr_type_to_handler(xattr.v->x_type);
+ if (handler && handler->prefix)
+ prt_printf(out, "%s", handler->prefix);
+ else if (handler)
+ prt_printf(out, "(type %u)", xattr.v->x_type);
+ else
+ prt_printf(out, "(unknown type %u)", xattr.v->x_type);
+
+ prt_printf(out, "%.*s:%.*s",
+ xattr.v->x_name_len,
+ xattr.v->x_name,
+ le16_to_cpu(xattr.v->x_val_len),
+ (char *) xattr_val(xattr.v));
+
+ if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS ||
+ xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT) {
+ prt_char(out, ' ');
+ bch2_acl_to_text(out, xattr_val(xattr.v),
+ le16_to_cpu(xattr.v->x_val_len));
+ }
+}
+
+static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode,
+ const char *name, void *buffer, size_t size, int type)
+{
+ struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode);
+ struct xattr_search_key search = X_SEARCH(type, name, strlen(name));
+ struct btree_iter iter;
+ struct bkey_s_c_xattr xattr;
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash,
+ inode_inum(inode), &search, 0);
+ if (ret)
+ goto err1;
+
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err2;
+
+ xattr = bkey_s_c_to_xattr(k);
+ ret = le16_to_cpu(xattr.v->x_val_len);
+ if (buffer) {
+ if (ret > size)
+ ret = -ERANGE;
+ else
+ memcpy(buffer, xattr_val(xattr.v), ret);
+ }
+err2:
+ bch2_trans_iter_exit(trans, &iter);
+err1:
+ return ret < 0 && bch2_err_matches(ret, ENOENT) ? -ENODATA : ret;
+}
+
+int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
+ struct bch_inode_unpacked *inode_u,
+ const struct bch_hash_info *hash_info,
+ const char *name, const void *value, size_t size,
+ int type, int flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter inode_iter = { NULL };
+ int ret;
+
+ ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?:
+ bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
+ if (ret)
+ return ret;
+
+ inode_u->bi_ctime = bch2_current_time(c);
+
+ ret = bch2_inode_write(trans, &inode_iter, inode_u);
+ bch2_trans_iter_exit(trans, &inode_iter);
+
+ if (ret)
+ return ret;
+
+ if (value) {
+ struct bkey_i_xattr *xattr;
+ unsigned namelen = strlen(name);
+ unsigned u64s = BKEY_U64s +
+ xattr_val_u64s(namelen, size);
+
+ if (u64s > U8_MAX)
+ return -ERANGE;
+
+ xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
+ if (IS_ERR(xattr))
+ return PTR_ERR(xattr);
+
+ bkey_xattr_init(&xattr->k_i);
+ xattr->k.u64s = u64s;
+ xattr->v.x_type = type;
+ xattr->v.x_name_len = namelen;
+ xattr->v.x_val_len = cpu_to_le16(size);
+ memcpy(xattr->v.x_name, name, namelen);
+ memcpy(xattr_val(&xattr->v), value, size);
+
+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
+ inum, &xattr->k_i,
+ (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
+ (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
+ } else {
+ struct xattr_search_key search =
+ X_SEARCH(type, name, strlen(name));
+
+ ret = bch2_hash_delete(trans, bch2_xattr_hash_desc,
+ hash_info, inum, &search);
+ }
+
+ if (bch2_err_matches(ret, ENOENT))
+ ret = flags & XATTR_REPLACE ? -ENODATA : 0;
+
+ return ret;
+}
+
+struct xattr_buf {
+ char *buf;
+ size_t len;
+ size_t used;
+};
+
+static int __bch2_xattr_emit(const char *prefix,
+ const char *name, size_t name_len,
+ struct xattr_buf *buf)
+{
+ const size_t prefix_len = strlen(prefix);
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (buf->buf) {
+ if (buf->used + total_len > buf->len)
+ return -ERANGE;
+
+ memcpy(buf->buf + buf->used, prefix, prefix_len);
+ memcpy(buf->buf + buf->used + prefix_len,
+ name, name_len);
+ buf->buf[buf->used + prefix_len + name_len] = '\0';
+ }
+
+ buf->used += total_len;
+ return 0;
+}
+
+static int bch2_xattr_emit(struct dentry *dentry,
+ const struct bch_xattr *xattr,
+ struct xattr_buf *buf)
+{
+ const struct xattr_handler *handler =
+ bch2_xattr_type_to_handler(xattr->x_type);
+
+ return handler && (!handler->list || handler->list(dentry))
+ ? __bch2_xattr_emit(handler->prefix ?: handler->name,
+ xattr->x_name, xattr->x_name_len, buf)
+ : 0;
+}
+
+static int bch2_xattr_list_bcachefs(struct bch_fs *c,
+ struct bch_inode_unpacked *inode,
+ struct xattr_buf *buf,
+ bool all)
+{
+ const char *prefix = all ? "bcachefs_effective." : "bcachefs.";
+ unsigned id;
+ int ret = 0;
+ u64 v;
+
+ for (id = 0; id < Inode_opt_nr; id++) {
+ v = bch2_inode_opt_get(inode, id);
+ if (!v)
+ continue;
+
+ if (!all &&
+ !(inode->bi_fields_set & (1 << id)))
+ continue;
+
+ ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id],
+ strlen(bch2_inode_opts[id]), buf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+ struct bch_fs *c = dentry->d_sb->s_fs_info;
+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
+ u64 offset = 0, inum = inode->ei_inode.bi_inum;
+ u32 snapshot;
+ int ret;
+retry:
+ bch2_trans_begin(trans);
+ iter = (struct btree_iter) { NULL };
+
+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_xattrs,
+ SPOS(inum, offset, snapshot),
+ POS(inum, U64_MAX), 0, k, ret) {
+ if (k.k->type != KEY_TYPE_xattr)
+ continue;
+
+ ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
+ if (ret)
+ break;
+ }
+
+ offset = iter.pos.offset;
+ bch2_trans_iter_exit(trans, &iter);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
+
+ bch2_trans_put(trans);
+
+ if (ret)
+ goto out;
+
+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false);
+ if (ret)
+ goto out;
+
+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
+ if (ret)
+ goto out;
+
+ return buf.used;
+out:
+ return bch2_err_class(ret);
+}
+
+static int bch2_xattr_get_handler(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, void *buffer, size_t size)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ int ret = bch2_trans_do(c, NULL, NULL, 0,
+ bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags));
+
+ return bch2_err_class(ret);
+}
+
+static int bch2_xattr_set_handler(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
+ struct bch_inode_unpacked inode_u;
+ int ret;
+
+ ret = bch2_trans_run(c,
+ commit_do(trans, NULL, NULL, 0,
+ bch2_xattr_set(trans, inode_inum(inode), &inode_u,
+ &hash, name, value, size,
+ handler->flags, flags)) ?:
+ (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0));
+
+ return bch2_err_class(ret);
+}
+
+static const struct xattr_handler bch_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = bch2_xattr_get_handler,
+ .set = bch2_xattr_set_handler,
+ .flags = KEY_TYPE_XATTR_INDEX_USER,
+};
+
+static bool bch2_xattr_trusted_list(struct dentry *dentry)
+{
+ return capable(CAP_SYS_ADMIN);
+}
+
+static const struct xattr_handler bch_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .list = bch2_xattr_trusted_list,
+ .get = bch2_xattr_get_handler,
+ .set = bch2_xattr_set_handler,
+ .flags = KEY_TYPE_XATTR_INDEX_TRUSTED,
+};
+
+static const struct xattr_handler bch_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = bch2_xattr_get_handler,
+ .set = bch2_xattr_set_handler,
+ .flags = KEY_TYPE_XATTR_INDEX_SECURITY,
+};
+
+#ifndef NO_BCACHEFS_FS
+
+static int opt_to_inode_opt(int id)
+{
+ switch (id) {
+#define x(name, ...) \
+ case Opt_##name: return Inode_opt_##name;
+ BCH_INODE_OPTS()
+#undef x
+ default:
+ return -1;
+ }
+}
+
+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, void *buffer, size_t size,
+ bool all)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ struct bch_opts opts =
+ bch2_inode_opts_to_opts(&inode->ei_inode);
+ const struct bch_option *opt;
+ int id, inode_opt_id;
+ struct printbuf out = PRINTBUF;
+ int ret;
+ u64 v;
+
+ id = bch2_opt_lookup(name);
+ if (id < 0 || !bch2_opt_is_inode_opt(id))
+ return -EINVAL;
+
+ inode_opt_id = opt_to_inode_opt(id);
+ if (inode_opt_id < 0)
+ return -EINVAL;
+
+ opt = bch2_opt_table + id;
+
+ if (!bch2_opt_defined_by_id(&opts, id))
+ return -ENODATA;
+
+ if (!all &&
+ !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id)))
+ return -ENODATA;
+
+ v = bch2_opt_get_by_id(&opts, id);
+ bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0);
+
+ ret = out.pos;
+
+ if (out.allocation_failure) {
+ ret = -ENOMEM;
+ } else if (buffer) {
+ if (out.pos > size)
+ ret = -ERANGE;
+ else
+ memcpy(buffer, out.buf, out.pos);
+ }
+
+ printbuf_exit(&out);
+ return ret;
+}
+
+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, void *buffer, size_t size)
+{
+ return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
+ name, buffer, size, false);
+}
+
+struct inode_opt_set {
+ int id;
+ u64 v;
+ bool defined;
+};
+
+static int inode_opt_set_fn(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct bch_inode_unpacked *bi,
+ void *p)
+{
+ struct inode_opt_set *s = p;
+
+ if (s->defined)
+ bi->bi_fields_set |= 1U << s->id;
+ else
+ bi->bi_fields_set &= ~(1U << s->id);
+
+ bch2_inode_opt_set(bi, s->id, s->v);
+
+ return 0;
+}
+
+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct bch_inode_info *inode = to_bch_ei(vinode);
+ struct bch_fs *c = inode->v.i_sb->s_fs_info;
+ const struct bch_option *opt;
+ char *buf;
+ struct inode_opt_set s;
+ int opt_id, inode_opt_id, ret;
+
+ opt_id = bch2_opt_lookup(name);
+ if (opt_id < 0)
+ return -EINVAL;
+
+ opt = bch2_opt_table + opt_id;
+
+ inode_opt_id = opt_to_inode_opt(opt_id);
+ if (inode_opt_id < 0)
+ return -EINVAL;
+
+ s.id = inode_opt_id;
+
+ if (value) {
+ u64 v = 0;
+
+ buf = kmalloc(size + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, value, size);
+ buf[size] = '\0';
+
+ ret = bch2_opt_parse(c, opt, buf, &v, NULL);
+ kfree(buf);
+
+ if (ret < 0)
+ return ret;
+
+ ret = bch2_opt_check_may_set(c, opt_id, v);
+ if (ret < 0)
+ return ret;
+
+ s.v = v + 1;
+ s.defined = true;
+ } else {
+ /*
+ * Check if this option was set on the parent - if so, switched
+ * back to inheriting from the parent:
+ *
+ * rename() also has to deal with keeping inherited options up
+ * to date - see bch2_reinherit_attrs()
+ */
+ spin_lock(&dentry->d_lock);
+ if (!IS_ROOT(dentry)) {
+ struct bch_inode_info *dir =
+ to_bch_ei(d_inode(dentry->d_parent));
+
+ s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id);
+ } else {
+ s.v = 0;
+ }
+ spin_unlock(&dentry->d_lock);
+
+ s.defined = false;
+ }
+
+ mutex_lock(&inode->ei_update_lock);
+ if (inode_opt_id == Inode_opt_project) {
+ /*
+ * inode fields accessible via the xattr interface are stored
+ * with a +1 bias, so that 0 means unset:
+ */
+ ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0);
+ if (ret)
+ goto err;
+ }
+
+ ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0);
+err:
+ mutex_unlock(&inode->ei_update_lock);
+
+ if (value &&
+ (opt_id == Opt_background_compression ||
+ opt_id == Opt_background_target))
+ bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
+
+ return bch2_err_class(ret);
+}
+
+static const struct xattr_handler bch_xattr_bcachefs_handler = {
+ .prefix = "bcachefs.",
+ .get = bch2_xattr_bcachefs_get,
+ .set = bch2_xattr_bcachefs_set,
+};
+
+static int bch2_xattr_bcachefs_get_effective(
+ const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, void *buffer, size_t size)
+{
+ return __bch2_xattr_bcachefs_get(handler, dentry, vinode,
+ name, buffer, size, true);
+}
+
+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
+ .prefix = "bcachefs_effective.",
+ .get = bch2_xattr_bcachefs_get_effective,
+ .set = bch2_xattr_bcachefs_set,
+};
+
+#endif /* NO_BCACHEFS_FS */
+
+const struct xattr_handler *bch2_xattr_handlers[] = {
+ &bch_xattr_user_handler,
+#ifdef CONFIG_BCACHEFS_POSIX_ACL
+ &nop_posix_acl_access,
+ &nop_posix_acl_default,
+#endif
+ &bch_xattr_trusted_handler,
+ &bch_xattr_security_handler,
+#ifndef NO_BCACHEFS_FS
+ &bch_xattr_bcachefs_handler,
+ &bch_xattr_bcachefs_effective_handler,
+#endif
+ NULL
+};
+
+static const struct xattr_handler *bch_xattr_handler_map[] = {
+ [KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler,
+ [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] =
+ &nop_posix_acl_access,
+ [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] =
+ &nop_posix_acl_default,
+ [KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler,
+ [KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler,
+};
+
+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type)
+{
+ return type < ARRAY_SIZE(bch_xattr_handler_map)
+ ? bch_xattr_handler_map[type]
+ : NULL;
+}
diff --git a/c_src/libbcachefs/xattr.h b/c_src/libbcachefs/xattr.h
new file mode 100644
index 00000000..1337f31a
--- /dev/null
+++ b/c_src/libbcachefs/xattr.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_XATTR_H
+#define _BCACHEFS_XATTR_H
+
+#include "str_hash.h"
+
+extern const struct bch_hash_desc bch2_xattr_hash_desc;
+
+int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c,
+ enum bkey_invalid_flags, struct printbuf *);
+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+
+#define bch2_bkey_ops_xattr ((struct bkey_ops) { \
+ .key_invalid = bch2_xattr_invalid, \
+ .val_to_text = bch2_xattr_to_text, \
+ .min_val_size = 8, \
+})
+
+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
+{
+ return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) +
+ name_len + val_len, sizeof(u64));
+}
+
+#define xattr_val(_xattr) \
+ ((void *) (_xattr)->x_name + (_xattr)->x_name_len)
+
+struct xattr_search_key {
+ u8 type;
+ struct qstr name;
+};
+
+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \
+ { .type = _type, .name = QSTR_INIT(_name, _len) })
+
+struct dentry;
+struct xattr_handler;
+struct bch_hash_info;
+struct bch_inode_info;
+
+/* Exported for cmd_migrate.c in tools: */
+int bch2_xattr_set(struct btree_trans *, subvol_inum,
+ struct bch_inode_unpacked *, const struct bch_hash_info *,
+ const char *, const void *, size_t, int, int);
+
+ssize_t bch2_xattr_list(struct dentry *, char *, size_t);
+
+extern const struct xattr_handler *bch2_xattr_handlers[];
+
+#endif /* _BCACHEFS_XATTR_H */
diff --git a/c_src/linux/atomic64.c b/c_src/linux/atomic64.c
new file mode 100644
index 00000000..4654d092
--- /dev/null
+++ b/c_src/linux/atomic64.c
@@ -0,0 +1,188 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+#ifdef ATOMIC64_SPINLOCK
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable. Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS 16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+ raw_spinlock_t lock;
+ char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] ____cacheline_aligned_in_smp = {
+ [0 ... (NR_LOCKS - 1)] = {
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+ },
+};
+
+static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
+{
+ unsigned long addr = (unsigned long) v;
+
+ addr >>= L1_CACHE_SHIFT;
+ addr ^= (addr >> 8) ^ (addr >> 16);
+ return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+
+ raw_spin_lock_irqsave(lock, flags);
+ v->counter = i;
+ raw_spin_unlock_irqrestore(lock, flags);
+}
+
+#define ATOMIC64_OP(op, c_op) \
+void atomic64_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ v->counter c_op a; \
+ raw_spin_unlock_irqrestore(lock, flags); \
+}
+
+#define ATOMIC64_OP_RETURN(op, c_op) \
+long long atomic64_##op##_return(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ long long val; \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ val = (v->counter c_op a); \
+ raw_spin_unlock_irqrestore(lock, flags); \
+ return val; \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op) \
+long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ long long val; \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ val = v->counter; \
+ v->counter c_op a; \
+ raw_spin_unlock_irqrestore(lock, flags); \
+ return val; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +=)
+ATOMIC64_OPS(sub, -=)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter - 1;
+ if (val >= 0)
+ v->counter = val;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ if (val == o)
+ v->counter = n;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ v->counter = new;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ int ret = 0;
+
+ raw_spin_lock_irqsave(lock, flags);
+ if (v->counter != u) {
+ v->counter += a;
+ ret = 1;
+ }
+ raw_spin_unlock_irqrestore(lock, flags);
+ return ret;
+}
+
+#endif
diff --git a/c_src/linux/bio.c b/c_src/linux/bio.c
new file mode 100644
index 00000000..93a791c4
--- /dev/null
+++ b/c_src/linux/bio.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static const struct {
+ int err;
+ const char *name;
+} blk_errors[] = {
+ [BLK_STS_OK] = { 0, "" },
+ [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
+ [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
+ [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
+ [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
+ [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
+ [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
+ [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
+ [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
+ [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
+ [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+
+ /* device mapper special case, should not leak out: */
+ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
+
+ /* everything else not covered above: */
+ [BLK_STS_IOERR] = { -EIO, "I/O" },
+};
+
+int blk_status_to_errno(blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return -EIO;
+ return blk_errors[idx].err;
+}
+
+const char *blk_status_to_str(blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return "(invalid error)";
+ return blk_errors[idx].name;
+}
+
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+ struct bio *src, struct bvec_iter *src_iter)
+{
+ struct bio_vec src_bv, dst_bv;
+ void *src_p, *dst_p;
+ unsigned bytes;
+
+ while (src_iter->bi_size && dst_iter->bi_size) {
+ src_bv = bio_iter_iovec(src, *src_iter);
+ dst_bv = bio_iter_iovec(dst, *dst_iter);
+
+ bytes = min(src_bv.bv_len, dst_bv.bv_len);
+
+ src_p = kmap_atomic(src_bv.bv_page);
+ dst_p = kmap_atomic(dst_bv.bv_page);
+
+ memcpy(dst_p + dst_bv.bv_offset,
+ src_p + src_bv.bv_offset,
+ bytes);
+
+ kunmap_atomic(dst_p);
+ kunmap_atomic(src_p);
+
+ flush_dcache_page(dst_bv.bv_page);
+
+ bio_advance_iter(src, src_iter, bytes);
+ bio_advance_iter(dst, dst_iter, bytes);
+ }
+}
+
+/**
+ * bio_copy_data - copy contents of data buffers from one bio to another
+ * @src: source bio
+ * @dst: destination bio
+ *
+ * Stops when it reaches the end of either @src or @dst - that is, copies
+ * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
+ */
+void bio_copy_data(struct bio *dst, struct bio *src)
+{
+ struct bvec_iter src_iter = src->bi_iter;
+ struct bvec_iter dst_iter = dst->bi_iter;
+
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+}
+
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
+{
+ unsigned long flags;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+
+ __bio_for_each_segment(bv, bio, iter, start) {
+ char *data = bvec_kmap_irq(&bv, &flags);
+ memset(data, 0, bv.bv_len);
+ bvec_kunmap_irq(data, &flags);
+ }
+}
+
+static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
+{
+ bio_set_flag(bio, BIO_CLONED);
+ bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_iter = bio_src->bi_iter;
+ return 0;
+}
+
+struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
+ gfp_t gfp, struct bio_set *bs)
+{
+ struct bio *bio;
+
+ bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs);
+ if (!bio)
+ return NULL;
+
+ if (__bio_clone(bio, bio_src, gfp) < 0) {
+ bio_put(bio);
+ return NULL;
+ }
+ bio->bi_io_vec = bio_src->bi_io_vec;
+
+ return bio;
+}
+
+struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
+{
+ struct bio *split = NULL;
+
+ BUG_ON(sectors <= 0);
+ BUG_ON(sectors >= bio_sectors(bio));
+
+ split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs);
+ if (!split)
+ return NULL;
+
+ split->bi_iter.bi_size = sectors << 9;
+
+ bio_advance(bio, split->bi_iter.bi_size);
+
+ return split;
+}
+
+void bio_free_pages(struct bio *bio)
+{
+ struct bvec_iter_all iter;
+ struct bio_vec *bvec;
+
+ bio_for_each_segment_all(bvec, bio, iter)
+ __free_page(bvec->bv_page);
+}
+
+void bio_advance(struct bio *bio, unsigned bytes)
+{
+ bio_advance_iter(bio, &bio->bi_iter, bytes);
+}
+
+static void bio_free(struct bio *bio)
+{
+ struct bio_set *bs = bio->bi_pool;
+
+ if (bs) {
+ if (bio->bi_max_vecs > BIO_INLINE_VECS)
+ mempool_free(bio->bi_io_vec, &bs->bvec_pool);
+
+ mempool_free((void *) bio - bs->front_pad, &bs->bio_pool);
+ } else {
+ kfree(bio);
+ }
+}
+
+void bio_put(struct bio *bio)
+{
+ if (!bio_flagged(bio, BIO_REFFED))
+ bio_free(bio);
+ else {
+ BUG_ON(!atomic_read(&bio->__bi_cnt));
+
+ /*
+ * last put frees it
+ */
+ if (atomic_dec_and_test(&bio->__bi_cnt))
+ bio_free(bio);
+ }
+}
+
+int bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs);
+
+ bv->bv_page = page;
+ bv->bv_offset = off;
+ bv->bv_len = len;
+
+ bio->bi_iter.bi_size += len;
+ bio->bi_vcnt++;
+ return len;
+}
+
+static inline bool bio_remaining_done(struct bio *bio)
+{
+ /*
+ * If we're not chaining, then ->__bi_remaining is always 1 and
+ * we always end io on the first invocation.
+ */
+ if (!bio_flagged(bio, BIO_CHAIN))
+ return true;
+
+ BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
+
+ if (atomic_dec_and_test(&bio->__bi_remaining)) {
+ bio_clear_flag(bio, BIO_CHAIN);
+ return true;
+ }
+
+ return false;
+}
+
+static struct bio *__bio_chain_endio(struct bio *bio)
+{
+ struct bio *parent = bio->bi_private;
+
+ if (!parent->bi_status)
+ parent->bi_status = bio->bi_status;
+ bio_put(bio);
+ return parent;
+}
+
+static void bio_chain_endio(struct bio *bio)
+{
+ bio_endio(__bio_chain_endio(bio));
+}
+
+void bio_endio(struct bio *bio)
+{
+again:
+ if (!bio_remaining_done(bio))
+ return;
+
+ /*
+ * Need to have a real endio function for chained bios, otherwise
+ * various corner cases will break (like stacking block devices that
+ * save/restore bi_end_io) - however, we want to avoid unbounded
+ * recursion and blowing the stack. Tail call optimization would
+ * handle this, but compiling with frame pointers also disables
+ * gcc's sibling call optimization.
+ */
+ if (bio->bi_end_io == bio_chain_endio) {
+ bio = __bio_chain_endio(bio);
+ goto again;
+ }
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio);
+}
+
+void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
+{
+ unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
+
+ memset(bio, 0, BIO_RESET_BYTES);
+ bio->bi_bdev = bdev;
+ bio->bi_opf = opf;
+ bio->bi_flags = flags;
+ atomic_set(&bio->__bi_remaining, 1);
+}
+
+struct bio *bio_kmalloc(unsigned int nr_iovecs, gfp_t gfp_mask)
+{
+ struct bio *bio;
+
+ bio = kmalloc(sizeof(struct bio) +
+ sizeof(struct bio_vec) * nr_iovecs, gfp_mask);
+ if (unlikely(!bio))
+ return NULL;
+ bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs, 0);
+ bio->bi_pool = NULL;
+ return bio;
+}
+
+static struct bio_vec *bvec_alloc(mempool_t *pool, int *nr_vecs,
+ gfp_t gfp_mask)
+{
+ *nr_vecs = roundup_pow_of_two(*nr_vecs);
+ /*
+ * Try a slab allocation first for all smaller allocations. If that
+ * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
+ * The mempool is sized to handle up to BIO_MAX_VECS entries.
+ */
+ if (*nr_vecs < BIO_MAX_VECS) {
+ struct bio_vec *bvl;
+
+ bvl = kmalloc(sizeof(*bvl) * *nr_vecs, gfp_mask);
+ if (likely(bvl))
+ return bvl;
+ *nr_vecs = BIO_MAX_VECS;
+ }
+
+ return mempool_alloc(pool, gfp_mask);
+}
+
+struct bio *bio_alloc_bioset(struct block_device *bdev,
+ unsigned nr_iovecs,
+ unsigned opf,
+ gfp_t gfp_mask,
+ struct bio_set *bs)
+{
+ struct bio *bio;
+ void *p;
+
+ if (nr_iovecs > BIO_MAX_VECS)
+ return NULL;
+
+ p = mempool_alloc(&bs->bio_pool, gfp_mask);
+ if (unlikely(!p))
+ return NULL;
+
+ bio = p + bs->front_pad;
+ if (nr_iovecs > BIO_INLINE_VECS) {
+ struct bio_vec *bvl = NULL;
+
+ bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
+ if (unlikely(!bvl))
+ goto err_free;
+
+ bio_init(bio, bdev, bvl, nr_iovecs, opf);
+ } else if (nr_iovecs) {
+ bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf);
+ } else {
+ bio_init(bio, bdev, NULL, 0, opf);
+ }
+
+ bio->bi_pool = bs;
+ return bio;
+
+err_free:
+ mempool_free(p, &bs->bio_pool);
+ return NULL;
+}
+
+void bioset_exit(struct bio_set *bs)
+{
+ mempool_exit(&bs->bio_pool);
+ mempool_exit(&bs->bvec_pool);
+}
+
+int bioset_init(struct bio_set *bs,
+ unsigned int pool_size,
+ unsigned int front_pad,
+ int flags)
+{
+ int ret;
+
+ bs->front_pad = front_pad;
+ if (flags & BIOSET_NEED_BVECS)
+ bs->back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
+ else
+ bs->back_pad = 0;
+
+ ret = mempool_init_kmalloc_pool(&bs->bio_pool, pool_size, bs->front_pad +
+ sizeof(struct bio) + bs->back_pad) ?:
+ mempool_init_kmalloc_pool(&bs->bvec_pool, pool_size,
+ sizeof(struct bio_vec) * BIO_MAX_VECS);
+ if (ret)
+ bioset_exit(bs);
+ return ret;
+}
diff --git a/c_src/linux/blkdev.c b/c_src/linux/blkdev.c
new file mode 100644
index 00000000..61f23362
--- /dev/null
+++ b/c_src/linux/blkdev.c
@@ -0,0 +1,428 @@
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include <libaio.h>
+
+#ifdef CONFIG_VALGRIND
+#include <valgrind/memcheck.h>
+#endif
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+
+#include "tools-util.h"
+
+struct fops {
+ void (*init)(void);
+ void (*cleanup)(void);
+ void (*read)(struct bio *bio, struct iovec * iov, unsigned i);
+ void (*write)(struct bio *bio, struct iovec * iov, unsigned i);
+};
+
+static struct fops *fops;
+static io_context_t aio_ctx;
+static atomic_t running_requests;
+
+void generic_make_request(struct bio *bio)
+{
+ struct iovec *iov;
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ ssize_t ret;
+ unsigned i;
+
+ if (bio->bi_opf & REQ_PREFLUSH) {
+ ret = fdatasync(bio->bi_bdev->bd_fd);
+ if (ret) {
+ fprintf(stderr, "fsync error: %m\n");
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return;
+ }
+ }
+
+ i = 0;
+ bio_for_each_segment(bv, bio, iter)
+ i++;
+
+ iov = alloca(sizeof(*iov) * i);
+
+ i = 0;
+ bio_for_each_segment(bv, bio, iter) {
+ void *start = page_address(bv.bv_page) + bv.bv_offset;
+ size_t len = bv.bv_len;
+
+ iov[i++] = (struct iovec) {
+ .iov_base = start,
+ .iov_len = len,
+ };
+
+#ifdef CONFIG_VALGRIND
+ /* To be pedantic it should only be on IO completion. */
+ if (bio_op(bio) == REQ_OP_READ)
+ VALGRIND_MAKE_MEM_DEFINED(start, len);
+#endif
+ }
+
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ fops->read(bio, iov, i);
+ break;
+ case REQ_OP_WRITE:
+ fops->write(bio, iov, i);
+ break;
+ case REQ_OP_FLUSH:
+ ret = fsync(bio->bi_bdev->bd_fd);
+ if (ret)
+ die("fsync error: %m");
+ bio_endio(bio);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void submit_bio_wait_endio(struct bio *bio)
+{
+ complete(bio->bi_private);
+}
+
+int submit_bio_wait(struct bio *bio)
+{
+ struct completion done;
+
+ init_completion(&done);
+ bio->bi_private = &done;
+ bio->bi_end_io = submit_bio_wait_endio;
+ bio->bi_opf |= REQ_SYNC;
+ submit_bio(bio);
+ wait_for_completion(&done);
+
+ return blk_status_to_errno(bio->bi_status);
+}
+
+int blkdev_issue_discard(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects,
+ gfp_t gfp_mask)
+{
+ return 0;
+}
+
+int blkdev_issue_zeroout(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects,
+ gfp_t gfp_mask, unsigned flags)
+{
+ /* Not yet implemented: */
+ BUG();
+}
+
+unsigned bdev_logical_block_size(struct block_device *bdev)
+{
+ struct stat statbuf;
+ unsigned blksize;
+ int ret;
+
+ ret = fstat(bdev->bd_fd, &statbuf);
+ BUG_ON(ret);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_blksize;
+
+ xioctl(bdev->bd_fd, BLKPBSZGET, &blksize);
+ return blksize;
+}
+
+sector_t get_capacity(struct gendisk *disk)
+{
+ struct block_device *bdev =
+ container_of(disk, struct block_device, __bd_disk);
+ struct stat statbuf;
+ u64 bytes;
+ int ret;
+
+ ret = fstat(bdev->bd_fd, &statbuf);
+ BUG_ON(ret);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_size >> 9;
+
+ ret = ioctl(bdev->bd_fd, BLKGETSIZE64, &bytes);
+ BUG_ON(ret);
+
+ return bytes >> 9;
+}
+
+void blkdev_put(struct block_device *bdev, void *holder)
+{
+ fdatasync(bdev->bd_fd);
+ close(bdev->bd_fd);
+ free(bdev);
+}
+
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hop)
+{
+ struct block_device *bdev;
+ int fd, flags = 0;
+
+ if ((mode & (BLK_OPEN_READ|BLK_OPEN_WRITE)) == (BLK_OPEN_READ|BLK_OPEN_WRITE))
+ flags = O_RDWR;
+ else if (mode & BLK_OPEN_READ)
+ flags = O_RDONLY;
+ else if (mode & BLK_OPEN_WRITE)
+ flags = O_WRONLY;
+
+ if (!(mode & BLK_OPEN_BUFFERED))
+ flags |= O_DIRECT;
+
+ if (mode & BLK_OPEN_EXCL)
+ flags |= O_EXCL;
+
+ fd = open(path, flags);
+ if (fd < 0)
+ return ERR_PTR(-errno);
+
+ bdev = malloc(sizeof(*bdev));
+ memset(bdev, 0, sizeof(*bdev));
+
+ strncpy(bdev->name, path, sizeof(bdev->name));
+ bdev->name[sizeof(bdev->name) - 1] = '\0';
+
+ bdev->bd_dev = xfstat(fd).st_rdev;
+ bdev->bd_fd = fd;
+ bdev->bd_holder = holder;
+ bdev->bd_disk = &bdev->__bd_disk;
+ bdev->bd_disk->bdi = &bdev->bd_disk->__bdi;
+ bdev->queue.backing_dev_info = bdev->bd_disk->bdi;
+
+ return bdev;
+}
+
+void bdput(struct block_device *bdev)
+{
+ BUG();
+}
+
+int lookup_bdev(const char *path, dev_t *dev)
+{
+ return -EINVAL;
+}
+
+static void io_fallback(void)
+{
+ fops++;
+ if (fops->init == NULL)
+ die("no fallback possible, something is very wrong");
+ fops->init();
+}
+
+static void sync_check(struct bio *bio, int ret)
+{
+ if (ret != bio->bi_iter.bi_size) {
+ die("IO error: %s\n", strerror(-ret));
+ }
+
+ if (bio->bi_opf & REQ_FUA) {
+ ret = fdatasync(bio->bi_bdev->bd_fd);
+ if (ret)
+ die("fsync error: %s\n", strerror(-ret));
+ }
+ bio_endio(bio);
+}
+
+static void sync_init(void) {}
+
+static void sync_cleanup(void)
+{
+ /* not necessary? */
+ sync();
+}
+
+static void sync_read(struct bio *bio, struct iovec * iov, unsigned i)
+{
+
+ ssize_t ret = preadv(bio->bi_bdev->bd_fd, iov, i,
+ bio->bi_iter.bi_sector << 9);
+ sync_check(bio, ret);
+}
+
+static void sync_write(struct bio *bio, struct iovec * iov, unsigned i)
+{
+ ssize_t ret = pwritev2(bio->bi_bdev->bd_fd, iov, i,
+ bio->bi_iter.bi_sector << 9,
+ bio->bi_opf & REQ_FUA ? RWF_SYNC : 0);
+ sync_check(bio, ret);
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(aio_events_completed);
+
+static int aio_completion_thread(void *arg)
+{
+ struct io_event events[8], *ev;
+ int ret;
+ bool stop = false;
+
+ while (!stop) {
+ ret = io_getevents(aio_ctx, 1, ARRAY_SIZE(events),
+ events, NULL);
+
+ if (ret < 0 && ret == -EINTR)
+ continue;
+ if (ret < 0)
+ die("io_getevents() error: %s", strerror(-ret));
+ if (ret)
+ wake_up(&aio_events_completed);
+
+ for (ev = events; ev < events + ret; ev++) {
+ struct bio *bio = (struct bio *) ev->data;
+
+ /* This should only happen during blkdev_cleanup() */
+ if (!bio) {
+ BUG_ON(atomic_read(&running_requests) != 0);
+ stop = true;
+ continue;
+ }
+
+ if (ev->res != bio->bi_iter.bi_size)
+ bio->bi_status = BLK_STS_IOERR;
+
+ bio_endio(bio);
+ atomic_dec(&running_requests);
+ }
+ }
+
+ return 0;
+}
+
+static struct task_struct *aio_task = NULL;
+
+static void aio_init(void)
+{
+ struct task_struct *p;
+ long err = io_setup(256, &aio_ctx);
+ if (!err) {
+ p = kthread_run(aio_completion_thread, NULL, "aio_completion");
+ BUG_ON(IS_ERR(p));
+
+ aio_task = p;
+
+ } else if (err == -ENOSYS) {
+ io_fallback();
+ } else {
+ die("io_setup() error: %s", strerror(err));
+ }
+}
+
+static void aio_cleanup(void)
+{
+ struct task_struct *p = NULL;
+ swap(aio_task, p);
+ get_task_struct(p);
+
+ /* I mean, really?! IO_CMD_NOOP is even defined, but not implemented. */
+ int fds[2];
+ int ret = pipe(fds);
+ if (ret != 0)
+ die("pipe err: %s", strerror(ret));
+
+ /* Wake up the completion thread with spurious work. */
+ int junk = 0;
+ struct iocb iocb = {
+ .aio_lio_opcode = IO_CMD_PWRITE,
+ .data = NULL, /* Signal to stop */
+ .aio_fildes = fds[1],
+ .u.c.buf = &junk,
+ .u.c.nbytes = 1,
+ }, *iocbp = &iocb;
+ ret = io_submit(aio_ctx, 1, &iocbp);
+ if (ret != 1)
+ die("io_submit cleanup err: %s", strerror(-ret));
+
+ ret = kthread_stop(p);
+ BUG_ON(ret);
+
+ put_task_struct(p);
+
+ close(fds[0]);
+ close(fds[1]);
+}
+
+static void aio_op(struct bio *bio, struct iovec *iov, unsigned i, int opcode)
+{
+ ssize_t ret;
+ struct iocb iocb = {
+ .data = bio,
+ .aio_fildes = bio->bi_bdev->bd_fd,
+ .aio_rw_flags = bio->bi_opf & REQ_FUA ? RWF_SYNC : 0,
+ .aio_lio_opcode = opcode,
+ .u.c.buf = iov,
+ .u.c.nbytes = i,
+ .u.c.offset = bio->bi_iter.bi_sector << 9,
+
+ }, *iocbp = &iocb;
+
+ atomic_inc(&running_requests);
+
+ wait_event(aio_events_completed,
+ (ret = io_submit(aio_ctx, 1, &iocbp)) != -EAGAIN);;
+
+ if (ret != 1)
+ die("io_submit err: %s", strerror(-ret));
+}
+
+static void aio_read(struct bio *bio, struct iovec *iov, unsigned i)
+{
+ aio_op(bio, iov, i, IO_CMD_PREADV);
+}
+
+static void aio_write(struct bio *bio, struct iovec * iov, unsigned i)
+{
+ aio_op(bio, iov, i, IO_CMD_PWRITEV);
+}
+
+
+/* not implemented */
+static void uring_init(void) {
+ io_fallback();
+}
+
+struct fops fops_list[] = {
+ {
+ .init = uring_init,
+ }, {
+ .init = aio_init,
+ .cleanup = aio_cleanup,
+ .read = aio_read,
+ .write = aio_write,
+ }, {
+ .init = sync_init,
+ .cleanup = sync_cleanup,
+ .read = sync_read,
+ .write = sync_write,
+ }, {
+ /* NULL */
+ }
+};
+
+__attribute__((constructor(102)))
+static void blkdev_init(void)
+{
+ fops = fops_list;
+ fops->init();
+}
+
+__attribute__((destructor(102)))
+static void blkdev_cleanup(void)
+{
+ fops->cleanup();
+}
diff --git a/c_src/linux/closure.c b/c_src/linux/closure.c
new file mode 100644
index 00000000..c1654055
--- /dev/null
+++ b/c_src/linux/closure.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Asynchronous refcounty things
+ *
+ * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
+ * Copyright 2012 Google, Inc.
+ */
+
+#include <linux/closure.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/sched/debug.h>
+
+static inline void closure_put_after_sub(struct closure *cl, int flags)
+{
+ int r = flags & CLOSURE_REMAINING_MASK;
+
+ BUG_ON(flags & CLOSURE_GUARD_MASK);
+ BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
+
+ if (!r) {
+ smp_acquire__after_ctrl_dep();
+
+ cl->closure_get_happened = false;
+
+ if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
+ atomic_set(&cl->remaining,
+ CLOSURE_REMAINING_INITIALIZER);
+ closure_queue(cl);
+ } else {
+ struct closure *parent = cl->parent;
+ closure_fn *destructor = cl->fn;
+
+ closure_debug_destroy(cl);
+
+ if (destructor)
+ destructor(&cl->work);
+
+ if (parent)
+ closure_put(parent);
+ }
+ }
+}
+
+/* For clearing flags with the same atomic op as a put */
+void closure_sub(struct closure *cl, int v)
+{
+ closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining));
+}
+EXPORT_SYMBOL(closure_sub);
+
+/*
+ * closure_put - decrement a closure's refcount
+ */
+void closure_put(struct closure *cl)
+{
+ closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining));
+}
+EXPORT_SYMBOL(closure_put);
+
+/*
+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
+ */
+void __closure_wake_up(struct closure_waitlist *wait_list)
+{
+ struct llist_node *list;
+ struct closure *cl, *t;
+ struct llist_node *reverse = NULL;
+
+ list = llist_del_all(&wait_list->list);
+
+ /* We first reverse the list to preserve FIFO ordering and fairness */
+ reverse = llist_reverse_order(list);
+
+ /* Then do the wakeups */
+ llist_for_each_entry_safe(cl, t, reverse, list) {
+ closure_set_waiting(cl, 0);
+ closure_sub(cl, CLOSURE_WAITING + 1);
+ }
+}
+EXPORT_SYMBOL(__closure_wake_up);
+
+/**
+ * closure_wait - add a closure to a waitlist
+ * @waitlist: will own a ref on @cl, which will be released when
+ * closure_wake_up() is called on @waitlist.
+ * @cl: closure pointer.
+ *
+ */
+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
+{
+ if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
+ return false;
+
+ cl->closure_get_happened = true;
+ closure_set_waiting(cl, _RET_IP_);
+ atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
+ llist_add(&cl->list, &waitlist->list);
+
+ return true;
+}
+EXPORT_SYMBOL(closure_wait);
+
+struct closure_syncer {
+ struct task_struct *task;
+ int done;
+};
+
+static CLOSURE_CALLBACK(closure_sync_fn)
+{
+ struct closure *cl = container_of(ws, struct closure, work);
+ struct closure_syncer *s = cl->s;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ p = READ_ONCE(s->task);
+ s->done = 1;
+ wake_up_process(p);
+ rcu_read_unlock();
+}
+
+void __sched __closure_sync(struct closure *cl)
+{
+ struct closure_syncer s = { .task = current };
+
+ cl->s = &s;
+ continue_at(cl, closure_sync_fn, NULL);
+
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (s.done)
+ break;
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL(__closure_sync);
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+static LIST_HEAD(closure_list);
+static DEFINE_SPINLOCK(closure_list_lock);
+
+void closure_debug_create(struct closure *cl)
+{
+ unsigned long flags;
+
+ BUG_ON(cl->magic == CLOSURE_MAGIC_ALIVE);
+ cl->magic = CLOSURE_MAGIC_ALIVE;
+
+ spin_lock_irqsave(&closure_list_lock, flags);
+ list_add(&cl->all, &closure_list);
+ spin_unlock_irqrestore(&closure_list_lock, flags);
+}
+EXPORT_SYMBOL(closure_debug_create);
+
+void closure_debug_destroy(struct closure *cl)
+{
+ unsigned long flags;
+
+ BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
+ cl->magic = CLOSURE_MAGIC_DEAD;
+
+ spin_lock_irqsave(&closure_list_lock, flags);
+ list_del(&cl->all);
+ spin_unlock_irqrestore(&closure_list_lock, flags);
+}
+EXPORT_SYMBOL(closure_debug_destroy);
+
+static int debug_show(struct seq_file *f, void *data)
+{
+ struct closure *cl;
+
+ spin_lock_irq(&closure_list_lock);
+
+ list_for_each_entry(cl, &closure_list, all) {
+ int r = atomic_read(&cl->remaining);
+
+ seq_printf(f, "%p: %pS -> %pS p %p r %i ",
+ cl, (void *) cl->ip, cl->fn, cl->parent,
+ r & CLOSURE_REMAINING_MASK);
+
+ seq_printf(f, "%s%s\n",
+ test_bit(WORK_STRUCT_PENDING_BIT,
+ work_data_bits(&cl->work)) ? "Q" : "",
+ r & CLOSURE_RUNNING ? "R" : "");
+
+ if (r & CLOSURE_WAITING)
+ seq_printf(f, " W %pS\n",
+ (void *) cl->waiting_on);
+
+ seq_puts(f, "\n");
+ }
+
+ spin_unlock_irq(&closure_list_lock);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(debug);
+
+static int __init closure_debug_init(void)
+{
+ debugfs_create_file("closures", 0400, NULL, NULL, &debug_fops);
+ return 0;
+}
+late_initcall(closure_debug_init)
+
+#endif
diff --git a/c_src/linux/crc64.c b/c_src/linux/crc64.c
new file mode 100644
index 00000000..0ef8ae6a
--- /dev/null
+++ b/c_src/linux/crc64.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ * Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+ or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+ size_t i, t;
+
+ const unsigned char *_p = p;
+
+ for (i = 0; i < len; i++) {
+ t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+ crc = crc64table[t] ^ (crc << 8);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/c_src/linux/crc64table.h b/c_src/linux/crc64table.h
new file mode 100644
index 00000000..9964164d
--- /dev/null
+++ b/c_src/linux/crc64table.h
@@ -0,0 +1,135 @@
+/* this file is generated - do not edit */
+
+#include <linux/types.h>
+#include <linux/cache.h>
+
+static const u64 ____cacheline_aligned crc64table[256] = {
+ 0x0000000000000000ULL, 0x42f0e1eba9ea3693ULL,
+ 0x85e1c3d753d46d26ULL, 0xc711223cfa3e5bb5ULL,
+ 0x493366450e42ecdfULL, 0x0bc387aea7a8da4cULL,
+ 0xccd2a5925d9681f9ULL, 0x8e224479f47cb76aULL,
+ 0x9266cc8a1c85d9beULL, 0xd0962d61b56fef2dULL,
+ 0x17870f5d4f51b498ULL, 0x5577eeb6e6bb820bULL,
+ 0xdb55aacf12c73561ULL, 0x99a54b24bb2d03f2ULL,
+ 0x5eb4691841135847ULL, 0x1c4488f3e8f96ed4ULL,
+ 0x663d78ff90e185efULL, 0x24cd9914390bb37cULL,
+ 0xe3dcbb28c335e8c9ULL, 0xa12c5ac36adfde5aULL,
+ 0x2f0e1eba9ea36930ULL, 0x6dfeff5137495fa3ULL,
+ 0xaaefdd6dcd770416ULL, 0xe81f3c86649d3285ULL,
+ 0xf45bb4758c645c51ULL, 0xb6ab559e258e6ac2ULL,
+ 0x71ba77a2dfb03177ULL, 0x334a9649765a07e4ULL,
+ 0xbd68d2308226b08eULL, 0xff9833db2bcc861dULL,
+ 0x388911e7d1f2dda8ULL, 0x7a79f00c7818eb3bULL,
+ 0xcc7af1ff21c30bdeULL, 0x8e8a101488293d4dULL,
+ 0x499b3228721766f8ULL, 0x0b6bd3c3dbfd506bULL,
+ 0x854997ba2f81e701ULL, 0xc7b97651866bd192ULL,
+ 0x00a8546d7c558a27ULL, 0x4258b586d5bfbcb4ULL,
+ 0x5e1c3d753d46d260ULL, 0x1cecdc9e94ace4f3ULL,
+ 0xdbfdfea26e92bf46ULL, 0x990d1f49c77889d5ULL,
+ 0x172f5b3033043ebfULL, 0x55dfbadb9aee082cULL,
+ 0x92ce98e760d05399ULL, 0xd03e790cc93a650aULL,
+ 0xaa478900b1228e31ULL, 0xe8b768eb18c8b8a2ULL,
+ 0x2fa64ad7e2f6e317ULL, 0x6d56ab3c4b1cd584ULL,
+ 0xe374ef45bf6062eeULL, 0xa1840eae168a547dULL,
+ 0x66952c92ecb40fc8ULL, 0x2465cd79455e395bULL,
+ 0x3821458aada7578fULL, 0x7ad1a461044d611cULL,
+ 0xbdc0865dfe733aa9ULL, 0xff3067b657990c3aULL,
+ 0x711223cfa3e5bb50ULL, 0x33e2c2240a0f8dc3ULL,
+ 0xf4f3e018f031d676ULL, 0xb60301f359dbe0e5ULL,
+ 0xda050215ea6c212fULL, 0x98f5e3fe438617bcULL,
+ 0x5fe4c1c2b9b84c09ULL, 0x1d14202910527a9aULL,
+ 0x93366450e42ecdf0ULL, 0xd1c685bb4dc4fb63ULL,
+ 0x16d7a787b7faa0d6ULL, 0x5427466c1e109645ULL,
+ 0x4863ce9ff6e9f891ULL, 0x0a932f745f03ce02ULL,
+ 0xcd820d48a53d95b7ULL, 0x8f72eca30cd7a324ULL,
+ 0x0150a8daf8ab144eULL, 0x43a04931514122ddULL,
+ 0x84b16b0dab7f7968ULL, 0xc6418ae602954ffbULL,
+ 0xbc387aea7a8da4c0ULL, 0xfec89b01d3679253ULL,
+ 0x39d9b93d2959c9e6ULL, 0x7b2958d680b3ff75ULL,
+ 0xf50b1caf74cf481fULL, 0xb7fbfd44dd257e8cULL,
+ 0x70eadf78271b2539ULL, 0x321a3e938ef113aaULL,
+ 0x2e5eb66066087d7eULL, 0x6cae578bcfe24bedULL,
+ 0xabbf75b735dc1058ULL, 0xe94f945c9c3626cbULL,
+ 0x676dd025684a91a1ULL, 0x259d31cec1a0a732ULL,
+ 0xe28c13f23b9efc87ULL, 0xa07cf2199274ca14ULL,
+ 0x167ff3eacbaf2af1ULL, 0x548f120162451c62ULL,
+ 0x939e303d987b47d7ULL, 0xd16ed1d631917144ULL,
+ 0x5f4c95afc5edc62eULL, 0x1dbc74446c07f0bdULL,
+ 0xdaad56789639ab08ULL, 0x985db7933fd39d9bULL,
+ 0x84193f60d72af34fULL, 0xc6e9de8b7ec0c5dcULL,
+ 0x01f8fcb784fe9e69ULL, 0x43081d5c2d14a8faULL,
+ 0xcd2a5925d9681f90ULL, 0x8fdab8ce70822903ULL,
+ 0x48cb9af28abc72b6ULL, 0x0a3b7b1923564425ULL,
+ 0x70428b155b4eaf1eULL, 0x32b26afef2a4998dULL,
+ 0xf5a348c2089ac238ULL, 0xb753a929a170f4abULL,
+ 0x3971ed50550c43c1ULL, 0x7b810cbbfce67552ULL,
+ 0xbc902e8706d82ee7ULL, 0xfe60cf6caf321874ULL,
+ 0xe224479f47cb76a0ULL, 0xa0d4a674ee214033ULL,
+ 0x67c58448141f1b86ULL, 0x253565a3bdf52d15ULL,
+ 0xab1721da49899a7fULL, 0xe9e7c031e063acecULL,
+ 0x2ef6e20d1a5df759ULL, 0x6c0603e6b3b7c1caULL,
+ 0xf6fae5c07d3274cdULL, 0xb40a042bd4d8425eULL,
+ 0x731b26172ee619ebULL, 0x31ebc7fc870c2f78ULL,
+ 0xbfc9838573709812ULL, 0xfd39626eda9aae81ULL,
+ 0x3a28405220a4f534ULL, 0x78d8a1b9894ec3a7ULL,
+ 0x649c294a61b7ad73ULL, 0x266cc8a1c85d9be0ULL,
+ 0xe17dea9d3263c055ULL, 0xa38d0b769b89f6c6ULL,
+ 0x2daf4f0f6ff541acULL, 0x6f5faee4c61f773fULL,
+ 0xa84e8cd83c212c8aULL, 0xeabe6d3395cb1a19ULL,
+ 0x90c79d3fedd3f122ULL, 0xd2377cd44439c7b1ULL,
+ 0x15265ee8be079c04ULL, 0x57d6bf0317edaa97ULL,
+ 0xd9f4fb7ae3911dfdULL, 0x9b041a914a7b2b6eULL,
+ 0x5c1538adb04570dbULL, 0x1ee5d94619af4648ULL,
+ 0x02a151b5f156289cULL, 0x4051b05e58bc1e0fULL,
+ 0x87409262a28245baULL, 0xc5b073890b687329ULL,
+ 0x4b9237f0ff14c443ULL, 0x0962d61b56fef2d0ULL,
+ 0xce73f427acc0a965ULL, 0x8c8315cc052a9ff6ULL,
+ 0x3a80143f5cf17f13ULL, 0x7870f5d4f51b4980ULL,
+ 0xbf61d7e80f251235ULL, 0xfd913603a6cf24a6ULL,
+ 0x73b3727a52b393ccULL, 0x31439391fb59a55fULL,
+ 0xf652b1ad0167feeaULL, 0xb4a25046a88dc879ULL,
+ 0xa8e6d8b54074a6adULL, 0xea16395ee99e903eULL,
+ 0x2d071b6213a0cb8bULL, 0x6ff7fa89ba4afd18ULL,
+ 0xe1d5bef04e364a72ULL, 0xa3255f1be7dc7ce1ULL,
+ 0x64347d271de22754ULL, 0x26c49cccb40811c7ULL,
+ 0x5cbd6cc0cc10fafcULL, 0x1e4d8d2b65facc6fULL,
+ 0xd95caf179fc497daULL, 0x9bac4efc362ea149ULL,
+ 0x158e0a85c2521623ULL, 0x577eeb6e6bb820b0ULL,
+ 0x906fc95291867b05ULL, 0xd29f28b9386c4d96ULL,
+ 0xcedba04ad0952342ULL, 0x8c2b41a1797f15d1ULL,
+ 0x4b3a639d83414e64ULL, 0x09ca82762aab78f7ULL,
+ 0x87e8c60fded7cf9dULL, 0xc51827e4773df90eULL,
+ 0x020905d88d03a2bbULL, 0x40f9e43324e99428ULL,
+ 0x2cffe7d5975e55e2ULL, 0x6e0f063e3eb46371ULL,
+ 0xa91e2402c48a38c4ULL, 0xebeec5e96d600e57ULL,
+ 0x65cc8190991cb93dULL, 0x273c607b30f68faeULL,
+ 0xe02d4247cac8d41bULL, 0xa2dda3ac6322e288ULL,
+ 0xbe992b5f8bdb8c5cULL, 0xfc69cab42231bacfULL,
+ 0x3b78e888d80fe17aULL, 0x7988096371e5d7e9ULL,
+ 0xf7aa4d1a85996083ULL, 0xb55aacf12c735610ULL,
+ 0x724b8ecdd64d0da5ULL, 0x30bb6f267fa73b36ULL,
+ 0x4ac29f2a07bfd00dULL, 0x08327ec1ae55e69eULL,
+ 0xcf235cfd546bbd2bULL, 0x8dd3bd16fd818bb8ULL,
+ 0x03f1f96f09fd3cd2ULL, 0x41011884a0170a41ULL,
+ 0x86103ab85a2951f4ULL, 0xc4e0db53f3c36767ULL,
+ 0xd8a453a01b3a09b3ULL, 0x9a54b24bb2d03f20ULL,
+ 0x5d45907748ee6495ULL, 0x1fb5719ce1045206ULL,
+ 0x919735e51578e56cULL, 0xd367d40ebc92d3ffULL,
+ 0x1476f63246ac884aULL, 0x568617d9ef46bed9ULL,
+ 0xe085162ab69d5e3cULL, 0xa275f7c11f7768afULL,
+ 0x6564d5fde549331aULL, 0x279434164ca30589ULL,
+ 0xa9b6706fb8dfb2e3ULL, 0xeb46918411358470ULL,
+ 0x2c57b3b8eb0bdfc5ULL, 0x6ea7525342e1e956ULL,
+ 0x72e3daa0aa188782ULL, 0x30133b4b03f2b111ULL,
+ 0xf7021977f9cceaa4ULL, 0xb5f2f89c5026dc37ULL,
+ 0x3bd0bce5a45a6b5dULL, 0x79205d0e0db05dceULL,
+ 0xbe317f32f78e067bULL, 0xfcc19ed95e6430e8ULL,
+ 0x86b86ed5267cdbd3ULL, 0xc4488f3e8f96ed40ULL,
+ 0x0359ad0275a8b6f5ULL, 0x41a94ce9dc428066ULL,
+ 0xcf8b0890283e370cULL, 0x8d7be97b81d4019fULL,
+ 0x4a6acb477bea5a2aULL, 0x089a2aacd2006cb9ULL,
+ 0x14dea25f3af9026dULL, 0x562e43b4931334feULL,
+ 0x913f6188692d6f4bULL, 0xd3cf8063c0c759d8ULL,
+ 0x5dedc41a34bbeeb2ULL, 0x1f1d25f19d51d821ULL,
+ 0xd80c07cd676f8394ULL, 0x9afce626ce85b507ULL,
+};
diff --git a/c_src/linux/crypto/api.c b/c_src/linux/crypto/api.c
new file mode 100644
index 00000000..2f3ab2b5
--- /dev/null
+++ b/c_src/linux/crypto/api.c
@@ -0,0 +1,114 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <crypto/algapi.h>
+
+static LIST_HEAD(crypto_alg_list);
+static DECLARE_RWSEM(crypto_alg_sem);
+
+struct crypto_type {
+};
+
+int crypto_register_alg(struct crypto_alg *alg)
+{
+ down_write(&crypto_alg_sem);
+ list_add(&alg->cra_list, &crypto_alg_list);
+ up_write(&crypto_alg_sem);
+
+ return 0;
+}
+
+static void *crypto_alloc_tfm(const char *name,
+ const struct crypto_type *type)
+{
+ struct crypto_alg *alg;
+
+ down_read(&crypto_alg_sem);
+ list_for_each_entry(alg, &crypto_alg_list, cra_list)
+ if (alg->cra_type == type && !strcmp(alg->cra_name, name))
+ goto found;
+
+ alg = ERR_PTR(-ENOENT);
+found:
+ up_read(&crypto_alg_sem);
+
+ if (IS_ERR(alg))
+ return ERR_CAST(alg);
+
+ return alg->alloc_tfm() ?: ERR_PTR(-ENOMEM);
+}
+
+/* skcipher: */
+
+static const struct crypto_type crypto_skcipher_type2 = {
+};
+
+struct crypto_skcipher *crypto_alloc_skcipher(const char *name,
+ u32 type, u32 mask)
+{
+ return crypto_alloc_tfm(name, &crypto_skcipher_type2);
+}
+
+int crypto_register_skcipher(struct skcipher_alg *alg)
+{
+ alg->base.cra_type = &crypto_skcipher_type2;
+
+ return crypto_register_alg(&alg->base);
+}
+
+/* shash: */
+
+#include <crypto/hash.h>
+
+static int shash_finup(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_update(desc, data, len) ?:
+ crypto_shash_final(desc, out);
+}
+
+static int shash_digest(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_init(desc) ?:
+ crypto_shash_finup(desc, data, len, out);
+}
+
+static const struct crypto_type crypto_shash_type = {
+};
+
+struct crypto_shash *crypto_alloc_shash(const char *name,
+ u32 type, u32 mask)
+{
+ return crypto_alloc_tfm(name, &crypto_shash_type);
+}
+
+int crypto_register_shash(struct shash_alg *alg)
+{
+ alg->base.cra_type = &crypto_shash_type;
+
+ if (!alg->finup)
+ alg->finup = shash_finup;
+ if (!alg->digest)
+ alg->digest = shash_digest;
+
+ return crypto_register_alg(&alg->base);
+}
diff --git a/c_src/linux/crypto/chacha20_generic.c b/c_src/linux/crypto/chacha20_generic.c
new file mode 100644
index 00000000..914189e7
--- /dev/null
+++ b/c_src/linux/crypto/chacha20_generic.c
@@ -0,0 +1,111 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/skcipher.h>
+
+#include <sodium/crypto_stream_chacha20.h>
+
+static struct skcipher_alg alg;
+
+struct chacha20_tfm {
+ struct crypto_skcipher tfm;
+ u32 key[8];
+};
+
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ struct chacha20_tfm *ctx =
+ container_of(tfm, struct chacha20_tfm, tfm);
+ int i;
+
+ if (keysize != CHACHA_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ return 0;
+}
+
+static int crypto_chacha20_crypt(struct skcipher_request *req)
+{
+ struct chacha20_tfm *ctx =
+ container_of(req->tfm, struct chacha20_tfm, tfm.base);
+ struct scatterlist *sg = req->src;
+ unsigned nbytes = req->cryptlen;
+ u32 iv[4];
+ int ret;
+
+ BUG_ON(req->src != req->dst);
+
+ memcpy(iv, req->iv, sizeof(iv));
+
+ while (1) {
+ ret = crypto_stream_chacha20_xor_ic(sg_virt(sg),
+ sg_virt(sg),
+ sg->length,
+ (void *) &iv[2],
+ iv[0] | ((u64) iv[1] << 32),
+ (void *) ctx->key);
+ BUG_ON(ret);
+
+ nbytes -= sg->length;
+
+ if (sg_is_last(sg))
+ break;
+
+ BUG_ON(sg->length % CHACHA_BLOCK_SIZE);
+ iv[0] += sg->length / CHACHA_BLOCK_SIZE;
+ sg = sg_next(sg);
+ };
+
+ BUG_ON(nbytes);
+
+ return 0;
+}
+
+static void *crypto_chacha20_alloc_tfm(void)
+{
+ struct chacha20_tfm *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+ if (!tfm)
+ return NULL;
+
+ tfm->tfm.base.alg = &alg.base;
+ tfm->tfm.setkey = crypto_chacha20_setkey;
+ tfm->tfm.encrypt = crypto_chacha20_crypt;
+ tfm->tfm.decrypt = crypto_chacha20_crypt;
+ tfm->tfm.ivsize = CHACHA_IV_SIZE;
+ tfm->tfm.keysize = CHACHA_KEY_SIZE;
+
+ return tfm;
+}
+
+static struct skcipher_alg alg = {
+ .base.cra_name = "chacha20",
+ .base.alloc_tfm = crypto_chacha20_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int chacha20_generic_mod_init(void)
+{
+ return crypto_register_skcipher(&alg);
+}
diff --git a/c_src/linux/crypto/poly1305_generic.c b/c_src/linux/crypto/poly1305_generic.c
new file mode 100644
index 00000000..acb554c0
--- /dev/null
+++ b/c_src/linux/crypto/poly1305_generic.c
@@ -0,0 +1,88 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/poly1305.h>
+
+static struct shash_alg poly1305_alg;
+
+struct poly1305_desc_ctx {
+ bool key_done;
+ crypto_onetimeauth_poly1305_state s;
+};
+
+static int poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+ state->key_done = false;
+ return 0;
+}
+
+static int poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned len)
+{
+ struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+ if (!state->key_done) {
+ BUG_ON(len != crypto_onetimeauth_poly1305_KEYBYTES);
+
+ state->key_done = true;
+ return crypto_onetimeauth_poly1305_init(&state->s, src);
+ }
+
+ return crypto_onetimeauth_poly1305_update(&state->s, src, len);
+}
+
+static int poly1305_final(struct shash_desc *desc, u8 *out)
+{
+ struct poly1305_desc_ctx *state = (void *) desc->ctx;
+
+ return crypto_onetimeauth_poly1305_final(&state->s, out);
+}
+
+static void *poly1305_alloc_tfm(void)
+{
+ struct crypto_shash *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+ if (!tfm)
+ return NULL;
+
+ tfm->base.alg = &poly1305_alg.base;
+ tfm->descsize = sizeof(struct poly1305_desc_ctx);
+ return tfm;
+}
+
+static struct shash_alg poly1305_alg = {
+ .digestsize = crypto_onetimeauth_poly1305_BYTES,
+ .init = poly1305_init,
+ .update = poly1305_update,
+ .final = poly1305_final,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.alloc_tfm = poly1305_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int poly1305_mod_init(void)
+{
+ return crypto_register_shash(&poly1305_alg);
+}
diff --git a/c_src/linux/crypto/sha256_generic.c b/c_src/linux/crypto/sha256_generic.c
new file mode 100644
index 00000000..9326bfe7
--- /dev/null
+++ b/c_src/linux/crypto/sha256_generic.c
@@ -0,0 +1,81 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/byteorder.h>
+#include <linux/types.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+
+#include <sodium/crypto_hash_sha256.h>
+
+static struct shash_alg sha256_alg;
+
+static int sha256_init(struct shash_desc *desc)
+{
+ crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+ return crypto_hash_sha256_init(state);
+}
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+ return crypto_hash_sha256_update(state, data, len);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ crypto_hash_sha256_state *state = (void *) desc->ctx;
+
+ return crypto_hash_sha256_final(state, out);
+}
+
+static void *sha256_alloc_tfm(void)
+{
+ struct crypto_shash *tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+
+ if (!tfm)
+ return NULL;
+
+ tfm->base.alg = &sha256_alg.base;
+ tfm->descsize = sizeof(crypto_hash_sha256_state);
+ return tfm;
+}
+
+static struct shash_alg sha256_alg = {
+ .digestsize = crypto_hash_sha256_BYTES,
+ .init = sha256_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .descsize = sizeof(crypto_hash_sha256_state),
+ .base.cra_name = "sha256",
+ .base.alloc_tfm = sha256_alloc_tfm,
+};
+
+__attribute__((constructor(110)))
+static int __init sha256_generic_mod_init(void)
+{
+ return crypto_register_shash(&sha256_alg);
+}
diff --git a/c_src/linux/fs.c b/c_src/linux/fs.c
new file mode 100644
index 00000000..623ca266
--- /dev/null
+++ b/c_src/linux/fs.c
@@ -0,0 +1,14 @@
+#include <linux/fs.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+const struct xattr_handler nop_posix_acl_access = {
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+};
+
+const struct xattr_handler nop_posix_acl_default = {
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+};
diff --git a/c_src/linux/generic-radix-tree.c b/c_src/linux/generic-radix-tree.c
new file mode 100644
index 00000000..41f1bcdc
--- /dev/null
+++ b/c_src/linux/generic-radix-tree.c
@@ -0,0 +1,307 @@
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+#include <linux/kmemleak.h>
+
+#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+ union {
+ /* Interior node: */
+ struct genradix_node *children[GENRADIX_ARY];
+
+ /* Leaf: */
+ u8 data[PAGE_SIZE];
+ };
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+ return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+ return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH \
+ DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK \
+ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+ return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+ return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+ struct genradix_root *r = READ_ONCE(radix->root);
+ struct genradix_node *n = genradix_root_to_node(r);
+ unsigned level = genradix_root_to_depth(r);
+
+ if (ilog2(offset) >= genradix_depth_shift(level))
+ return NULL;
+
+ while (1) {
+ if (!n)
+ return NULL;
+ if (!level)
+ break;
+
+ level--;
+
+ n = n->children[offset >> genradix_depth_shift(level)];
+ offset &= genradix_depth_size(level) - 1;
+ }
+
+ return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+ struct genradix_node *node;
+
+ node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+ /*
+ * We're using pages (not slab allocations) directly for kernel data
+ * structures, so we need to explicitly inform kmemleak of them in order
+ * to avoid false positive memory leak reports.
+ */
+ kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+ return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+ kmemleak_free(node);
+ free_page((unsigned long)node);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+ gfp_t gfp_mask)
+{
+ struct genradix_root *v = READ_ONCE(radix->root);
+ struct genradix_node *n, *new_node = NULL;
+ unsigned level;
+
+ /* Increase tree depth if necessary: */
+ while (1) {
+ struct genradix_root *r = v, *new_root;
+
+ n = genradix_root_to_node(r);
+ level = genradix_root_to_depth(r);
+
+ if (n && ilog2(offset) < genradix_depth_shift(level))
+ break;
+
+ if (!new_node) {
+ new_node = genradix_alloc_node(gfp_mask);
+ if (!new_node)
+ return NULL;
+ }
+
+ new_node->children[0] = n;
+ new_root = ((struct genradix_root *)
+ ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+ if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+ v = new_root;
+ new_node = NULL;
+ }
+ }
+
+ while (level--) {
+ struct genradix_node **p =
+ &n->children[offset >> genradix_depth_shift(level)];
+ offset &= genradix_depth_size(level) - 1;
+
+ n = READ_ONCE(*p);
+ if (!n) {
+ if (!new_node) {
+ new_node = genradix_alloc_node(gfp_mask);
+ if (!new_node)
+ return NULL;
+ }
+
+ if (!(n = cmpxchg_release(p, NULL, new_node)))
+ swap(n, new_node);
+ }
+ }
+
+ if (new_node)
+ genradix_free_node(new_node);
+
+ return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+ struct __genradix *radix,
+ size_t objs_per_page)
+{
+ struct genradix_root *r;
+ struct genradix_node *n;
+ unsigned level, i;
+
+ if (iter->offset == SIZE_MAX)
+ return NULL;
+
+restart:
+ r = READ_ONCE(radix->root);
+ if (!r)
+ return NULL;
+
+ n = genradix_root_to_node(r);
+ level = genradix_root_to_depth(r);
+
+ if (ilog2(iter->offset) >= genradix_depth_shift(level))
+ return NULL;
+
+ while (level) {
+ level--;
+
+ i = (iter->offset >> genradix_depth_shift(level)) &
+ (GENRADIX_ARY - 1);
+
+ while (!n->children[i]) {
+ size_t objs_per_ptr = genradix_depth_size(level);
+
+ if (iter->offset + objs_per_ptr < iter->offset) {
+ iter->offset = SIZE_MAX;
+ iter->pos = SIZE_MAX;
+ return NULL;
+ }
+
+ i++;
+ iter->offset = round_down(iter->offset + objs_per_ptr,
+ objs_per_ptr);
+ iter->pos = (iter->offset >> PAGE_SHIFT) *
+ objs_per_page;
+ if (i == GENRADIX_ARY)
+ goto restart;
+ }
+
+ n = n->children[i];
+ }
+
+ return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+void *__genradix_iter_peek_prev(struct genradix_iter *iter,
+ struct __genradix *radix,
+ size_t objs_per_page,
+ size_t obj_size_plus_page_remainder)
+{
+ struct genradix_root *r;
+ struct genradix_node *n;
+ unsigned level, i;
+
+ if (iter->offset == SIZE_MAX)
+ return NULL;
+
+restart:
+ r = READ_ONCE(radix->root);
+ if (!r)
+ return NULL;
+
+ n = genradix_root_to_node(r);
+ level = genradix_root_to_depth(r);
+
+ if (ilog2(iter->offset) >= genradix_depth_shift(level)) {
+ iter->offset = genradix_depth_size(level);
+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+
+ iter->offset -= obj_size_plus_page_remainder;
+ iter->pos--;
+ }
+
+ while (level) {
+ level--;
+
+ i = (iter->offset >> genradix_depth_shift(level)) &
+ (GENRADIX_ARY - 1);
+
+ while (!n->children[i]) {
+ size_t objs_per_ptr = genradix_depth_size(level);
+
+ iter->offset = round_down(iter->offset, objs_per_ptr);
+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+
+ if (!iter->offset)
+ return NULL;
+
+ iter->offset -= obj_size_plus_page_remainder;
+ iter->pos--;
+
+ if (!i)
+ goto restart;
+ --i;
+ }
+
+ n = n->children[i];
+ }
+
+ return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek_prev);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+ if (level) {
+ unsigned i;
+
+ for (i = 0; i < GENRADIX_ARY; i++)
+ if (n->children[i])
+ genradix_free_recurse(n->children[i], level - 1);
+ }
+
+ genradix_free_node(n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+ gfp_t gfp_mask)
+{
+ size_t offset;
+
+ for (offset = 0; offset < size; offset += PAGE_SIZE)
+ if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+ struct genradix_root *r = xchg(&radix->root, NULL);
+
+ genradix_free_recurse(genradix_root_to_node(r),
+ genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);
diff --git a/c_src/linux/int_sqrt.c b/c_src/linux/int_sqrt.c
new file mode 100644
index 00000000..a8170bb9
--- /dev/null
+++ b/c_src/linux/int_sqrt.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
+ *
+ * Based on the shift-and-subtract algorithm for computing integer
+ * square root from Guy L. Steele.
+ */
+
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/limits.h>
+#include <linux/math.h>
+
+/**
+ * int_sqrt - computes the integer square root
+ * @x: integer of which to calculate the sqrt
+ *
+ * Computes: floor(sqrt(x))
+ */
+unsigned long int_sqrt(unsigned long x)
+{
+ unsigned long b, m, y = 0;
+
+ if (x <= 1)
+ return x;
+
+ m = 1UL << (__fls(x) & ~1UL);
+ while (m != 0) {
+ b = y + m;
+ y >>= 1;
+
+ if (x >= b) {
+ x -= b;
+ y += m;
+ }
+ m >>= 2;
+ }
+
+ return y;
+}
+EXPORT_SYMBOL(int_sqrt);
+
+#if BITS_PER_LONG < 64
+/**
+ * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
+ * is expected.
+ * @x: 64bit integer of which to calculate the sqrt
+ */
+u32 int_sqrt64(u64 x)
+{
+ u64 b, m, y = 0;
+
+ if (x <= ULONG_MAX)
+ return int_sqrt((unsigned long) x);
+
+ m = 1ULL << ((fls64(x) - 1) & ~1ULL);
+ while (m != 0) {
+ b = y + m;
+ y >>= 1;
+
+ if (x >= b) {
+ x -= b;
+ y += m;
+ }
+ m >>= 2;
+ }
+
+ return y;
+}
+EXPORT_SYMBOL(int_sqrt64);
+#endif
diff --git a/c_src/linux/kstrtox.c b/c_src/linux/kstrtox.c
new file mode 100644
index 00000000..bde55808
--- /dev/null
+++ b/c_src/linux/kstrtox.c
@@ -0,0 +1,357 @@
+/*
+ * Convert integer string representation to an integer.
+ * If an integer doesn't fit into specified type, -E is returned.
+ *
+ * Integer starts with optional sign.
+ * kstrtou*() functions do not accept sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hints work after optional sign, but not before.
+ *
+ * If -E is returned, result is not touched.
+ */
+#include <errno.h>
+#include <ctype.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include "kstrtox.h"
+
+#define KSTRTOX_OVERFLOW (1U << 31)
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+ if (*base == 0) {
+ if (s[0] == '0') {
+ if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+ *base = 16;
+ else
+ *base = 8;
+ } else
+ *base = 10;
+ }
+ if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+ s += 2;
+ return s;
+}
+
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+ unsigned long long res;
+ unsigned int rv;
+ int overflow;
+
+ res = 0;
+ rv = 0;
+ overflow = 0;
+ while (*s) {
+ unsigned int val;
+
+ if ('0' <= *s && *s <= '9')
+ val = *s - '0';
+ else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+ val = _tolower(*s) - 'a' + 10;
+ else
+ break;
+
+ if (val >= base)
+ break;
+ /*
+ * Check for overflow only if we are within range of
+ * it in the max base we support (16)
+ */
+ if (unlikely(res & (~0ull << 60))) {
+ if (res > ULLONG_MAX - val / base)
+ overflow = 1;
+ }
+ res = res * base + val;
+ rv++;
+ s++;
+ }
+ *p = res;
+ if (overflow)
+ rv |= KSTRTOX_OVERFLOW;
+ return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ unsigned long long _res;
+ unsigned int rv;
+
+ s = _parse_integer_fixup_radix(s, &base);
+ rv = _parse_integer(s, base, &_res);
+ if (rv & KSTRTOX_OVERFLOW)
+ return -ERANGE;
+ if (rv == 0)
+ return -EINVAL;
+ s += rv;
+ if (*s == '\n')
+ s++;
+ if (*s)
+ return -EINVAL;
+ *res = _res;
+ return 0;
+}
+
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (s[0] == '+')
+ s++;
+ return _kstrtoull(s, base, res);
+}
+
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoll(const char *s, unsigned int base, long long *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ if (s[0] == '-') {
+ rv = _kstrtoull(s + 1, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if ((long long)-tmp > 0)
+ return -ERANGE;
+ *res = -tmp;
+ } else {
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if ((long long)tmp < 0)
+ return -ERANGE;
+ *res = tmp;
+ }
+ return 0;
+}
+
+/* Internal, do not use. */
+int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(unsigned long)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/* Internal, do not use. */
+int _kstrtol(const char *s, unsigned int base, long *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(long)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtouint(const char *s, unsigned int base, unsigned int *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(unsigned int)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoint(const char *s, unsigned int base, int *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(int)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtou16(const char *s, unsigned int base, u16 *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(u16)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtos16(const char *s, unsigned int base, s16 *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(s16)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtou8(const char *s, unsigned int base, u8 *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(u8)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+int kstrtos8(const char *s, unsigned int base, s8 *res)
+{
+ long long tmp;
+ int rv;
+
+ rv = kstrtoll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (long long)(s8)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+/**
+ * kstrtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
+ * pointed to by res is updated upon finding a match.
+ */
+int kstrtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
diff --git a/c_src/linux/kstrtox.h b/c_src/linux/kstrtox.h
new file mode 100644
index 00000000..910b6de8
--- /dev/null
+++ b/c_src/linux/kstrtox.h
@@ -0,0 +1,7 @@
+#ifndef _LIB_KSTRTOX_H
+#define _LIB_KSTRTOX_H
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
+
+#endif
diff --git a/c_src/linux/kthread.c b/c_src/linux/kthread.c
new file mode 100644
index 00000000..17830e5f
--- /dev/null
+++ b/c_src/linux/kthread.c
@@ -0,0 +1,139 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/bitops.h>
+#include <linux/kthread.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "tools-util.h"
+
+enum KTHREAD_BITS {
+ KTHREAD_IS_PER_CPU = 0,
+ KTHREAD_SHOULD_STOP,
+ KTHREAD_SHOULD_PARK,
+ KTHREAD_IS_PARKED,
+};
+
+static void *kthread_start_fn(void *data)
+{
+ rcu_register_thread();
+
+ current = data;
+ schedule();
+ current->thread_fn(current->thread_data);
+
+ complete(&current->exited);
+ put_task_struct(current);
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/**
+ * kthread_create_on_node - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @node: task and thread structures for the thread are allocated on this node
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread. The thread will be stopped: use wake_up_process() to start
+ * it. See also kthread_run(). The new thread has SCHED_NORMAL policy and
+ * is affine to all CPUs.
+ *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn() can either call do_exit() directly if it is a
+ * standalone thread for which no one will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called). The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
+ */
+struct task_struct *kthread_create(int (*thread_fn)(void *data),
+ void *thread_data,
+ const char namefmt[], ...)
+{
+ va_list args;
+ struct task_struct *p = malloc(sizeof(*p));
+ int ret;
+
+ memset(p, 0, sizeof(*p));
+
+ va_start(args, namefmt);
+ vsnprintf(p->comm, sizeof(p->comm), namefmt, args);
+ va_end(args);
+
+ p->flags |= PF_KTHREAD;
+ p->thread_fn = thread_fn;
+ p->thread_data = thread_data;
+ p->state = TASK_UNINTERRUPTIBLE;
+ p->signal = &p->_signal;
+ atomic_set(&p->usage, 1);
+ init_completion(&p->exited);
+ init_rwsem(&p->_signal.exec_update_lock);
+
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 32 << 10);
+
+ for (unsigned i = 0; i < 10; i++) {
+ ret = pthread_create(&p->thread, &attr, kthread_start_fn, p);
+ if (!ret)
+ break;
+
+ run_shrinkers(GFP_KERNEL, true);
+ }
+ if (ret)
+ return ERR_PTR(-ret);
+ pthread_setname_np(p->thread, p->comm);
+ return p;
+}
+
+/**
+ * kthread_should_stop - should this kthread return now?
+ *
+ * When someone calls kthread_stop() on your kthread, it will be woken
+ * and this will return true. You should then return, and your return
+ * value will be passed through to kthread_stop().
+ */
+bool kthread_should_stop(void)
+{
+ return test_bit(KTHREAD_SHOULD_STOP, &current->kthread_flags);
+}
+
+bool kthread_freezable_should_stop(bool *was_frozen)
+{
+ return test_bit(KTHREAD_SHOULD_STOP, &current->kthread_flags);
+}
+
+/**
+ * kthread_stop - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ *
+ * Sets kthread_should_stop() for @k to return true, wakes it, and
+ * waits for it to exit. This can also be called after kthread_create()
+ * instead of calling wake_up_process(): the thread will exit without
+ * calling threadfn().
+ *
+ * If threadfn() may call do_exit() itself, the caller must ensure
+ * task_struct can't go away.
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
+int kthread_stop(struct task_struct *p)
+{
+ get_task_struct(p);
+
+ set_bit(KTHREAD_SHOULD_STOP, &p->kthread_flags);
+ wake_up_process(p);
+ wait_for_completion(&p->exited);
+
+ put_task_struct(p);
+
+ return 0;
+}
diff --git a/c_src/linux/llist.c b/c_src/linux/llist.c
new file mode 100644
index 00000000..611ce488
--- /dev/null
+++ b/c_src/linux/llist.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * The basic atomic operation of this list is cmpxchg on long. On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handlers. So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/llist.h>
+
+
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first: first entry in batch to be added
+ * @new_last: last entry in batch to be added
+ * @head: the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
+ */
+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+ struct llist_head *head)
+{
+ struct llist_node *first;
+
+ do {
+ new_last->next = first = READ_ONCE(head->first);
+ } while (cmpxchg(&head->first, first, new_first) != first);
+
+ return !first;
+}
+EXPORT_SYMBOL_GPL(llist_add_batch);
+
+/**
+ * llist_del_first - delete the first entry of lock-less list
+ * @head: the head for your lock-less list
+ *
+ * If list is empty, return NULL, otherwise, return the first entry
+ * deleted, this is the newest added one.
+ *
+ * Only one llist_del_first user can be used simultaneously with
+ * multiple llist_add users without lock. Because otherwise
+ * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
+ * llist_add) sequence in another user may change @head->first->next,
+ * but keep @head->first. If multiple consumers are needed, please
+ * use llist_del_all or use lock between consumers.
+ */
+struct llist_node *llist_del_first(struct llist_head *head)
+{
+ struct llist_node *entry, *old_entry, *next;
+
+ entry = smp_load_acquire(&head->first);
+ for (;;) {
+ if (entry == NULL)
+ return NULL;
+ old_entry = entry;
+ next = READ_ONCE(entry->next);
+ entry = cmpxchg(&head->first, old_entry, next);
+ if (entry == old_entry)
+ break;
+ }
+
+ return entry;
+}
+EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_reverse_order - reverse order of a llist chain
+ * @head: first item of the list to be reversed
+ *
+ * Reverse the order of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+ struct llist_node *new_head = NULL;
+
+ while (head) {
+ struct llist_node *tmp = head;
+ head = head->next;
+ tmp->next = new_head;
+ new_head = tmp;
+ }
+
+ return new_head;
+}
+EXPORT_SYMBOL_GPL(llist_reverse_order);
diff --git a/c_src/linux/mempool.c b/c_src/linux/mempool.c
new file mode 100644
index 00000000..74d4fbb3
--- /dev/null
+++ b/c_src/linux/mempool.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/mm/mempool.c
+ *
+ * memory buffer pool support. Such pools are mostly used
+ * for guaranteed, deadlock-free memory allocations during
+ * extreme VM load.
+ *
+ * started by Ingo Molnar, Copyright (C) 2001
+ * debugging by David Rientjes, Copyright (C) 2015
+ */
+
+#include <linux/slab.h>
+//#include <linux/kasan.h>
+//#include <linux/kmemleak.h>
+#include <linux/export.h>
+#include <linux/jiffies.h>
+#include <linux/mempool.h>
+#include <linux/mempool.h>
+#include <linux/sched.h>
+
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
+static void poison_error(mempool_t *pool, void *element, size_t size,
+ size_t byte)
+{
+ const int nr = pool->curr_nr;
+ const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
+ const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
+ int i;
+
+ pr_err("BUG: mempool element poison mismatch\n");
+ pr_err("Mempool %p size %zu\n", pool, size);
+ pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
+ for (i = start; i < end; i++)
+ pr_cont("%x ", *(u8 *)(element + i));
+ pr_cont("%s\n", end < size ? "..." : "");
+ dump_stack();
+}
+
+static void __check_element(mempool_t *pool, void *element, size_t size)
+{
+ u8 *obj = element;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+ if (obj[i] != exp) {
+ poison_error(pool, element, size, i);
+ return;
+ }
+ }
+ memset(obj, POISON_INUSE, size);
+}
+
+static void check_element(mempool_t *pool, void *element)
+{
+ /* Mempools backed by slab allocator */
+ if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
+ __check_element(pool, element, ksize(element));
+ } else if (pool->free == mempool_free_pages) {
+ /* Mempools backed by page allocator */
+ int order = (int)(long)pool->pool_data;
+ void *addr = kmap_atomic((struct page *)element);
+
+ __check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
+ kunmap_atomic(addr);
+ }
+}
+
+static void __poison_element(void *element, size_t size)
+{
+ u8 *obj = element;
+
+ memset(obj, POISON_FREE, size - 1);
+ obj[size - 1] = POISON_END;
+}
+
+static void poison_element(mempool_t *pool, void *element)
+{
+ /* Mempools backed by slab allocator */
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) {
+ __poison_element(element, ksize(element));
+ } else if (pool->alloc == mempool_alloc_pages) {
+ /* Mempools backed by page allocator */
+ int order = (int)(long)pool->pool_data;
+ void *addr = kmap_atomic((struct page *)element);
+
+ __poison_element(addr, 1UL << (PAGE_SHIFT + order));
+ kunmap_atomic(addr);
+ }
+}
+#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+static inline void check_element(mempool_t *pool, void *element)
+{
+}
+static inline void poison_element(mempool_t *pool, void *element)
+{
+}
+#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+
+static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
+{
+#if 0
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_poison_kfree(element, _RET_IP_);
+ else if (pool->alloc == mempool_alloc_pages)
+ kasan_free_pages(element, (unsigned long)pool->pool_data);
+#endif
+}
+
+static void kasan_unpoison_element(mempool_t *pool, void *element)
+{
+#if 0
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_unpoison_slab(element);
+ else if (pool->alloc == mempool_alloc_pages)
+ kasan_alloc_pages(element, (unsigned long)pool->pool_data);
+#endif
+}
+
+static __always_inline void add_element(mempool_t *pool, void *element)
+{
+ BUG_ON(pool->curr_nr >= pool->min_nr);
+ poison_element(pool, element);
+ kasan_poison_element(pool, element);
+ pool->elements[pool->curr_nr++] = element;
+}
+
+static void *remove_element(mempool_t *pool)
+{
+ void *element = pool->elements[--pool->curr_nr];
+
+ BUG_ON(pool->curr_nr < 0);
+ kasan_unpoison_element(pool, element);
+ check_element(pool, element);
+ return element;
+}
+
+/**
+ * mempool_exit - exit a mempool initialized with mempool_init()
+ * @pool: pointer to the memory pool which was initialized with
+ * mempool_init().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ *
+ * May be called on a zeroed but uninitialized mempool (i.e. allocated with
+ * kzalloc()).
+ */
+void mempool_exit(mempool_t *pool)
+{
+ while (pool->curr_nr) {
+ void *element = remove_element(pool);
+ pool->free(element, pool->pool_data);
+ }
+ kfree(pool->elements);
+ pool->elements = NULL;
+}
+EXPORT_SYMBOL(mempool_exit);
+
+/**
+ * mempool_destroy - deallocate a memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ *
+ * Free all reserved elements in @pool and @pool itself. This function
+ * only sleeps if the free_fn() function sleeps.
+ */
+void mempool_destroy(mempool_t *pool)
+{
+ if (unlikely(!pool))
+ return;
+
+ mempool_exit(pool);
+ kfree(pool);
+}
+EXPORT_SYMBOL(mempool_destroy);
+
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
+{
+ spin_lock_init(&pool->lock);
+ pool->min_nr = min_nr;
+ pool->pool_data = pool_data;
+ pool->alloc = alloc_fn;
+ pool->free = free_fn;
+ init_waitqueue_head(&pool->wait);
+
+ pool->elements = kmalloc_array(min_nr, sizeof(void *), gfp_mask);
+ if (!pool->elements)
+ return -ENOMEM;
+
+ /*
+ * First pre-allocate the guaranteed number of buffers.
+ */
+ while (pool->curr_nr < pool->min_nr) {
+ void *element;
+
+ element = pool->alloc(gfp_mask, pool->pool_data);
+ if (unlikely(!element)) {
+ mempool_exit(pool);
+ return -ENOMEM;
+ }
+ add_element(pool, element);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mempool_init_node);
+
+/**
+ * mempool_init - initialize a memory pool
+ * @pool: pointer to the memory pool that should be initialized
+ * @min_nr: the minimum number of elements guaranteed to be
+ * allocated for this pool.
+ * @alloc_fn: user-defined element-allocation function.
+ * @free_fn: user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * Like mempool_create(), but initializes the pool in (i.e. embedded in another
+ * structure).
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
+{
+ return mempool_init_node(pool, min_nr, alloc_fn, free_fn,
+ pool_data, GFP_KERNEL, 0);
+
+}
+EXPORT_SYMBOL(mempool_init);
+
+/**
+ * mempool_create - create a memory pool
+ * @min_nr: the minimum number of elements guaranteed to be
+ * allocated for this pool.
+ * @alloc_fn: user-defined element-allocation function.
+ * @free_fn: user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * this function creates and allocates a guaranteed size, preallocated
+ * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
+ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
+ * functions might sleep - as long as the mempool_alloc() function is not called
+ * from IRQ contexts.
+ *
+ * Return: pointer to the created memory pool object or %NULL on error.
+ */
+mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
+{
+ return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
+ GFP_KERNEL, 0);
+}
+EXPORT_SYMBOL(mempool_create);
+
+mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
+{
+ mempool_t *pool;
+
+ pool = kzalloc(sizeof(*pool), gfp_mask);
+ if (!pool)
+ return NULL;
+
+ if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data,
+ gfp_mask, node_id)) {
+ kfree(pool);
+ return NULL;
+ }
+
+ return pool;
+}
+EXPORT_SYMBOL(mempool_create_node);
+
+/**
+ * mempool_resize - resize an existing memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ * @new_min_nr: the new minimum number of elements guaranteed to be
+ * allocated for this pool.
+ *
+ * This function shrinks/grows the pool. In the case of growing,
+ * it cannot be guaranteed that the pool will be grown to the new
+ * size immediately, but new mempool_free() calls will refill it.
+ * This function may sleep.
+ *
+ * Note, the caller must guarantee that no mempool_destroy is called
+ * while this function is running. mempool_alloc() & mempool_free()
+ * might be called (eg. from IRQ contexts) while this function executes.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int mempool_resize(mempool_t *pool, int new_min_nr)
+{
+ void *element;
+ void **new_elements;
+ unsigned long flags;
+
+ BUG_ON(new_min_nr <= 0);
+ might_sleep();
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (new_min_nr <= pool->min_nr) {
+ while (new_min_nr < pool->curr_nr) {
+ element = remove_element(pool);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ pool->free(element, pool->pool_data);
+ spin_lock_irqsave(&pool->lock, flags);
+ }
+ pool->min_nr = new_min_nr;
+ goto out_unlock;
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ /* Grow the pool */
+ new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements),
+ GFP_KERNEL);
+ if (!new_elements)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (unlikely(new_min_nr <= pool->min_nr)) {
+ /* Raced, other resize will do our work */
+ spin_unlock_irqrestore(&pool->lock, flags);
+ kfree(new_elements);
+ goto out;
+ }
+ memcpy(new_elements, pool->elements,
+ pool->curr_nr * sizeof(*new_elements));
+ kfree(pool->elements);
+ pool->elements = new_elements;
+ pool->min_nr = new_min_nr;
+
+ while (pool->curr_nr < pool->min_nr) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ element = pool->alloc(GFP_KERNEL, pool->pool_data);
+ if (!element)
+ goto out;
+ spin_lock_irqsave(&pool->lock, flags);
+ if (pool->curr_nr < pool->min_nr) {
+ add_element(pool, element);
+ } else {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ pool->free(element, pool->pool_data); /* Raced */
+ goto out;
+ }
+ }
+out_unlock:
+ spin_unlock_irqrestore(&pool->lock, flags);
+out:
+ return 0;
+}
+EXPORT_SYMBOL(mempool_resize);
+
+/**
+ * mempool_alloc - allocate an element from a specific memory pool
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ * @gfp_mask: the usual allocation bitmask.
+ *
+ * this function only sleeps if the alloc_fn() function sleeps or
+ * returns NULL. Note that due to preallocation, this function
+ * *never* fails when called from process contexts. (it might
+ * fail if called from an IRQ context.)
+ * Note: using __GFP_ZERO is not supported.
+ *
+ * Return: pointer to the allocated element or %NULL on error.
+ */
+void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+{
+ void *element;
+ unsigned long flags;
+ DEFINE_WAIT(wait);
+ gfp_t gfp_temp;
+
+ WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
+
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_mask |= __GFP_NOWARN; /* failures are OK */
+
+ gfp_temp = gfp_mask & ~(__GFP_IO);
+
+repeat_alloc:
+
+ element = pool->alloc(gfp_temp, pool->pool_data);
+ if (likely(element != NULL))
+ return element;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (likely(pool->curr_nr)) {
+ element = remove_element(pool);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ /* paired with rmb in mempool_free(), read comment there */
+ smp_wmb();
+ return element;
+ }
+
+ /*
+ * We use gfp mask w/o direct reclaim or IO for the first round. If
+ * alloc failed with that and @pool was empty, retry immediately.
+ */
+ if (gfp_temp != gfp_mask) {
+ spin_unlock_irqrestore(&pool->lock, flags);
+ gfp_temp = gfp_mask;
+ goto repeat_alloc;
+ }
+
+ /* Let's wait for someone else to return an element to @pool */
+ prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ /*
+ * FIXME: this should be io_schedule(). The timeout is there as a
+ * workaround for some DM problems in 2.6.18.
+ */
+ io_schedule_timeout(5*HZ);
+
+ finish_wait(&pool->wait, &wait);
+ goto repeat_alloc;
+}
+EXPORT_SYMBOL(mempool_alloc);
+
+/**
+ * mempool_free - return an element to the pool.
+ * @element: pool element pointer.
+ * @pool: pointer to the memory pool which was allocated via
+ * mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps.
+ */
+void mempool_free(void *element, mempool_t *pool)
+{
+ unsigned long flags;
+
+ if (unlikely(element == NULL))
+ return;
+
+ /*
+ * Paired with the wmb in mempool_alloc(). The preceding read is
+ * for @element and the following @pool->curr_nr. This ensures
+ * that the visible value of @pool->curr_nr is from after the
+ * allocation of @element. This is necessary for fringe cases
+ * where @element was passed to this task without going through
+ * barriers.
+ *
+ * For example, assume @p is %NULL at the beginning and one task
+ * performs "p = mempool_alloc(...);" while another task is doing
+ * "while (!p) cpu_relax(); mempool_free(p, ...);". This function
+ * may end up using curr_nr value which is from before allocation
+ * of @p without the following rmb.
+ */
+ smp_rmb();
+
+ /*
+ * For correctness, we need a test which is guaranteed to trigger
+ * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr
+ * without locking achieves that and refilling as soon as possible
+ * is desirable.
+ *
+ * Because curr_nr visible here is always a value after the
+ * allocation of @element, any task which decremented curr_nr below
+ * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
+ * incremented to min_nr afterwards. If curr_nr gets incremented
+ * to min_nr after the allocation of @element, the elements
+ * allocated after that are subject to the same guarantee.
+ *
+ * Waiters happen iff curr_nr is 0 and the above guarantee also
+ * ensures that there will be frees which return elements to the
+ * pool waking up the waiters.
+ */
+ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) {
+ spin_lock_irqsave(&pool->lock, flags);
+ if (likely(pool->curr_nr < pool->min_nr)) {
+ add_element(pool, element);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ wake_up(&pool->wait);
+ return;
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+ }
+ pool->free(element, pool->pool_data);
+}
+EXPORT_SYMBOL(mempool_free);
+
+/*
+ * A commonly used alloc and free fn.
+ */
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
+{
+ struct kmem_cache *mem = pool_data;
+ return kmem_cache_alloc(mem, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_alloc_slab);
+
+void mempool_free_slab(void *element, void *pool_data)
+{
+ struct kmem_cache *mem = pool_data;
+ kmem_cache_free(mem, element);
+}
+EXPORT_SYMBOL(mempool_free_slab);
+
+/*
+ * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory
+ * specified by pool_data
+ */
+void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
+{
+ size_t size = (size_t)pool_data;
+ return kmalloc(size, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_kmalloc);
+
+void mempool_kfree(void *element, void *pool_data)
+{
+ kfree(element);
+}
+EXPORT_SYMBOL(mempool_kfree);
+
+/*
+ * A simple mempool-backed page allocator that allocates pages
+ * of the order specified by pool_data.
+ */
+void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
+{
+ int order = (int)(long)pool_data;
+ return alloc_pages(gfp_mask, order);
+}
+EXPORT_SYMBOL(mempool_alloc_pages);
+
+void mempool_free_pages(void *element, void *pool_data)
+{
+ int order = (int)(long)pool_data;
+ __free_pages(element, order);
+}
+EXPORT_SYMBOL(mempool_free_pages);
diff --git a/c_src/linux/preempt.c b/c_src/linux/preempt.c
new file mode 100644
index 00000000..72eceed3
--- /dev/null
+++ b/c_src/linux/preempt.c
@@ -0,0 +1,37 @@
+#include <pthread.h>
+
+#include "linux/preempt.h"
+
+/*
+ * In userspace, pthreads are preemptible and can migrate CPUs at any time.
+ *
+ * In the kernel, preempt_disable() logic essentially guarantees that a marked
+ * critical section owns its CPU for the relevant block. This is necessary for
+ * various code paths, critically including the percpu system as it allows for
+ * non-atomic reads and writes to CPU-local data structures.
+ *
+ * The high performance userspace equivalent would be to use thread local
+ * storage to replace percpu data, but that would be complicated. It should be
+ * correct to instead guarantee mutual exclusion for the critical sections.
+ */
+
+static pthread_mutex_t preempt_lock;
+
+__attribute__((constructor))
+static void preempt_init(void) {
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+ pthread_mutex_init(&preempt_lock, &attr);
+ pthread_mutexattr_destroy(&attr);
+}
+
+void preempt_disable(void)
+{
+ pthread_mutex_lock(&preempt_lock);
+}
+
+void preempt_enable(void)
+{
+ pthread_mutex_unlock(&preempt_lock);
+}
diff --git a/c_src/linux/ratelimit.c b/c_src/linux/ratelimit.c
new file mode 100644
index 00000000..21a6d6c8
--- /dev/null
+++ b/c_src/linux/ratelimit.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ratelimit.c - Do something with rate limit.
+ *
+ * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
+ *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ */
+
+#include <linux/ratelimit.h>
+#include <linux/jiffies.h>
+#include <linux/export.h>
+
+/*
+ * __ratelimit - rate limiting
+ * @rs: ratelimit_state data
+ * @func: name of calling function
+ *
+ * This enforces a rate limit: not more than @rs->burst callbacks
+ * in every @rs->interval
+ *
+ * RETURNS:
+ * 0 means callbacks will be suppressed.
+ * 1 means go ahead and do it.
+ */
+int ___ratelimit(struct ratelimit_state *rs, const char *func)
+{
+ int ret;
+
+ if (!rs->interval)
+ return 1;
+
+ /*
+ * If we contend on this state's lock then almost
+ * by definition we are too busy to print a message,
+ * in addition to the one that will be printed by
+ * the entity that is holding the lock already:
+ */
+ if (!raw_spin_trylock(&rs->lock))
+ return 0;
+
+ if (!rs->begin)
+ rs->begin = jiffies;
+
+ if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (rs->missed) {
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ printk(KERN_WARNING
+ "%s: %d callbacks suppressed\n",
+ func, rs->missed);
+ rs->missed = 0;
+ }
+ }
+ rs->begin = jiffies;
+ rs->printed = 0;
+ }
+ if (rs->burst && rs->burst > rs->printed) {
+ rs->printed++;
+ ret = 1;
+ } else {
+ rs->missed++;
+ ret = 0;
+ }
+ raw_spin_unlock(&rs->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(___ratelimit);
diff --git a/c_src/linux/rhashtable.c b/c_src/linux/rhashtable.c
new file mode 100644
index 00000000..ba2196fc
--- /dev/null
+++ b/c_src/linux/rhashtable.c
@@ -0,0 +1,1237 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/jhash.h>
+#include <linux/overflow.h>
+#include <linux/random.h>
+#include <linux/rhashtable.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#define HASH_DEFAULT_SIZE 64UL
+#define HASH_MIN_SIZE 4U
+
+union nested_table {
+ union nested_table __rcu *table;
+ struct rhash_lock_head __rcu *bucket;
+};
+
+static u32 head_hashfn(struct rhashtable *ht,
+ const struct bucket_table *tbl,
+ const struct rhash_head *he)
+{
+ return rht_head_hashfn(ht, tbl, he, ht->p);
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+
+int lockdep_rht_mutex_is_held(struct rhashtable *ht)
+{
+ return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
+{
+ if (!debug_locks)
+ return 1;
+ if (unlikely(tbl->nest))
+ return 1;
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
+#else
+#define ASSERT_RHT_MUTEX(HT)
+#endif
+
+static inline union nested_table *nested_table_top(
+ const struct bucket_table *tbl)
+{
+ /* The top-level bucket entry does not need RCU protection
+ * because it's set at the same time as tbl->nest.
+ */
+ return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
+}
+
+static void nested_table_free(union nested_table *ntbl, unsigned int size)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ const unsigned int len = 1 << shift;
+ unsigned int i;
+
+ ntbl = rcu_dereference_protected(ntbl->table, 1);
+ if (!ntbl)
+ return;
+
+ if (size > len) {
+ size >>= shift;
+ for (i = 0; i < len; i++)
+ nested_table_free(ntbl + i, size);
+ }
+
+ kfree(ntbl);
+}
+
+static void nested_bucket_table_free(const struct bucket_table *tbl)
+{
+ unsigned int size = tbl->size >> tbl->nest;
+ unsigned int len = 1 << tbl->nest;
+ union nested_table *ntbl;
+ unsigned int i;
+
+ ntbl = nested_table_top(tbl);
+
+ for (i = 0; i < len; i++)
+ nested_table_free(ntbl + i, size);
+
+ kfree(ntbl);
+}
+
+static void bucket_table_free(struct bucket_table *tbl)
+{
+ if (tbl->nest)
+ nested_bucket_table_free(tbl);
+
+ kvfree(tbl);
+}
+
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+ bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
+static union nested_table *nested_table_alloc(struct rhashtable *ht,
+ union nested_table __rcu **prev,
+ bool leaf)
+{
+ union nested_table *ntbl;
+ int i;
+
+ ntbl = rcu_dereference(*prev);
+ if (ntbl)
+ return ntbl;
+
+ ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+
+ if (ntbl && leaf) {
+ for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
+ INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
+ }
+
+ if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
+ return ntbl;
+ /* Raced with another thread. */
+ kfree(ntbl);
+ return rcu_dereference(*prev);
+}
+
+static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
+ size_t nbuckets,
+ gfp_t gfp)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ struct bucket_table *tbl;
+ size_t size;
+
+ if (nbuckets < (1 << (shift + 1)))
+ return NULL;
+
+ size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
+
+ tbl = kzalloc(size, gfp);
+ if (!tbl)
+ return NULL;
+
+ if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
+ false)) {
+ kfree(tbl);
+ return NULL;
+ }
+
+ tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
+
+ return tbl;
+}
+
+static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
+ size_t nbuckets,
+ gfp_t gfp)
+{
+ struct bucket_table *tbl = NULL;
+ size_t size;
+ int i;
+
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
+
+ size = nbuckets;
+
+ if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
+ tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
+ nbuckets = 0;
+ }
+
+ if (tbl == NULL)
+ return NULL;
+
+ tbl->size = size;
+
+ rcu_head_init(&tbl->rcu);
+ INIT_LIST_HEAD(&tbl->walkers);
+
+ tbl->hash_rnd = get_random_u32();
+
+ for (i = 0; i < nbuckets; i++)
+ INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
+
+ return tbl;
+}
+
+static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
+ struct bucket_table *tbl)
+{
+ struct bucket_table *new_tbl;
+
+ do {
+ new_tbl = tbl;
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ } while (tbl);
+
+ return new_tbl;
+}
+
+static int rhashtable_rehash_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
+ unsigned int old_hash)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
+ int err = -EAGAIN;
+ struct rhash_head *head, *next, *entry;
+ struct rhash_head __rcu **pprev = NULL;
+ unsigned int new_hash;
+
+ if (new_tbl->nest)
+ goto out;
+
+ err = -ENOENT;
+
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
+ err = 0;
+ next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
+
+ if (rht_is_a_nulls(next))
+ break;
+
+ pprev = &entry->next;
+ }
+
+ if (err)
+ goto out;
+
+ new_hash = head_hashfn(ht, new_tbl, entry);
+
+ rht_lock(new_tbl, &new_tbl->buckets[new_hash]);
+
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
+
+ RCU_INIT_POINTER(entry->next, head);
+
+ rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
+
+ if (pprev)
+ rcu_assign_pointer(*pprev, next);
+ else
+ /* Need to preserved the bit lock. */
+ rht_assign_locked(bkt, next);
+
+out:
+ return err;
+}
+
+static int rhashtable_rehash_chain(struct rhashtable *ht,
+ unsigned int old_hash)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
+ int err;
+
+ if (!bkt)
+ return 0;
+ rht_lock(old_tbl, bkt);
+
+ while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
+ ;
+
+ if (err == -ENOENT)
+ err = 0;
+ rht_unlock(old_tbl, bkt);
+
+ return err;
+}
+
+static int rhashtable_rehash_attach(struct rhashtable *ht,
+ struct bucket_table *old_tbl,
+ struct bucket_table *new_tbl)
+{
+ /* Make insertions go into the new, empty table right away. Deletions
+ * and lookups will be attempted in both tables until we synchronize.
+ * As cmpxchg() provides strong barriers, we do not need
+ * rcu_assign_pointer().
+ */
+
+ if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
+ new_tbl) != NULL)
+ return -EEXIST;
+
+ return 0;
+}
+
+static int rhashtable_rehash_table(struct rhashtable *ht)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ struct bucket_table *new_tbl;
+ struct rhashtable_walker *walker;
+ unsigned int old_hash;
+ int err;
+
+ new_tbl = rht_dereference(old_tbl->future_tbl, ht);
+ if (!new_tbl)
+ return 0;
+
+ for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+ err = rhashtable_rehash_chain(ht, old_hash);
+ if (err)
+ return err;
+ cond_resched();
+ }
+
+ /* Publish the new table pointer. */
+ rcu_assign_pointer(ht->tbl, new_tbl);
+
+ spin_lock(&ht->lock);
+ list_for_each_entry(walker, &old_tbl->walkers, list)
+ walker->tbl = NULL;
+
+ /* Wait for readers. All new readers will see the new
+ * table, and thus no references to the old table will
+ * remain.
+ * We do this inside the locked region so that
+ * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
+ * to check if it should not re-link the table.
+ */
+ call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+ spin_unlock(&ht->lock);
+
+ return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
+}
+
+static int rhashtable_rehash_alloc(struct rhashtable *ht,
+ struct bucket_table *old_tbl,
+ unsigned int size)
+{
+ struct bucket_table *new_tbl;
+ int err;
+
+ ASSERT_RHT_MUTEX(ht);
+
+ new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+ if (new_tbl == NULL)
+ return -ENOMEM;
+
+ err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+ if (err)
+ bucket_table_free(new_tbl);
+
+ return err;
+}
+
+/**
+ * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
+ * @ht: the hash table to shrink
+ *
+ * This function shrinks the hash table to fit, i.e., the smallest
+ * size would not cause it to expand right away automatically.
+ *
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
+ * The caller must ensure that no concurrent table mutations take place.
+ * It is however valid to have concurrent lookups if they are RCU protected.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
+ */
+static int rhashtable_shrink(struct rhashtable *ht)
+{
+ struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+ unsigned int nelems = atomic_read(&ht->nelems);
+ unsigned int size = 0;
+
+ if (nelems)
+ size = roundup_pow_of_two(nelems * 3 / 2);
+ if (size < ht->p.min_size)
+ size = ht->p.min_size;
+
+ if (old_tbl->size <= size)
+ return 0;
+
+ if (rht_dereference(old_tbl->future_tbl, ht))
+ return -EEXIST;
+
+ return rhashtable_rehash_alloc(ht, old_tbl, size);
+}
+
+static void rht_deferred_worker(struct work_struct *work)
+{
+ struct rhashtable *ht;
+ struct bucket_table *tbl;
+ int err = 0;
+
+ ht = container_of(work, struct rhashtable, run_work);
+ mutex_lock(&ht->mutex);
+
+ tbl = rht_dereference(ht->tbl, ht);
+ tbl = rhashtable_last_table(ht, tbl);
+
+ if (rht_grow_above_75(ht, tbl))
+ err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
+ else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
+ err = rhashtable_shrink(ht);
+ else if (tbl->nest)
+ err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
+
+ if (!err || err == -EEXIST) {
+ int nerr;
+
+ nerr = rhashtable_rehash_table(ht);
+ err = err ?: nerr;
+ }
+
+ mutex_unlock(&ht->mutex);
+
+ if (err)
+ schedule_work(&ht->run_work);
+}
+
+static int rhashtable_insert_rehash(struct rhashtable *ht,
+ struct bucket_table *tbl)
+{
+ struct bucket_table *old_tbl;
+ struct bucket_table *new_tbl;
+ unsigned int size;
+ int err;
+
+ old_tbl = rht_dereference_rcu(ht->tbl, ht);
+
+ size = tbl->size;
+
+ err = -EBUSY;
+
+ if (rht_grow_above_75(ht, tbl))
+ size *= 2;
+ /* Do not schedule more than one rehash */
+ else if (old_tbl != tbl)
+ goto fail;
+
+ err = -ENOMEM;
+
+ new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
+ if (new_tbl == NULL)
+ goto fail;
+
+ err = rhashtable_rehash_attach(ht, tbl, new_tbl);
+ if (err) {
+ bucket_table_free(new_tbl);
+ if (err == -EEXIST)
+ err = 0;
+ } else
+ schedule_work(&ht->run_work);
+
+ return err;
+
+fail:
+ /* Do not fail the insert if someone else did a rehash. */
+ if (likely(rcu_access_pointer(tbl->future_tbl)))
+ return 0;
+
+ /* Schedule async rehash to retry allocation in process context. */
+ if (err == -ENOMEM)
+ schedule_work(&ht->run_work);
+
+ return err;
+}
+
+static void *rhashtable_lookup_one(struct rhashtable *ht,
+ struct rhash_lock_head __rcu **bkt,
+ struct bucket_table *tbl, unsigned int hash,
+ const void *key, struct rhash_head *obj)
+{
+ struct rhashtable_compare_arg arg = {
+ .ht = ht,
+ .key = key,
+ };
+ struct rhash_head __rcu **pprev = NULL;
+ struct rhash_head *head;
+ int elasticity;
+
+ elasticity = RHT_ELASTICITY;
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ struct rhlist_head *list;
+ struct rhlist_head *plist;
+
+ elasticity--;
+ if (!key ||
+ (ht->p.obj_cmpfn ?
+ ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
+ rhashtable_compare(&arg, rht_obj(ht, head)))) {
+ pprev = &head->next;
+ continue;
+ }
+
+ if (!ht->rhlist)
+ return rht_obj(ht, head);
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ plist = container_of(head, struct rhlist_head, rhead);
+
+ RCU_INIT_POINTER(list->next, plist);
+ head = rht_dereference_bucket(head->next, tbl, hash);
+ RCU_INIT_POINTER(list->rhead.next, head);
+ if (pprev)
+ rcu_assign_pointer(*pprev, obj);
+ else
+ /* Need to preserve the bit lock */
+ rht_assign_locked(bkt, obj);
+
+ return NULL;
+ }
+
+ if (elasticity <= 0)
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct bucket_table *rhashtable_insert_one(
+ struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
+ struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
+ void *data)
+{
+ struct bucket_table *new_tbl;
+ struct rhash_head *head;
+
+ if (!IS_ERR_OR_NULL(data))
+ return ERR_PTR(-EEXIST);
+
+ if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (new_tbl)
+ return new_tbl;
+
+ if (PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+ return ERR_PTR(-E2BIG);
+
+ if (unlikely(rht_grow_above_100(ht, tbl)))
+ return ERR_PTR(-EAGAIN);
+
+ head = rht_ptr(bkt, tbl, hash);
+
+ RCU_INIT_POINTER(obj->next, head);
+ if (ht->rhlist) {
+ struct rhlist_head *list;
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ RCU_INIT_POINTER(list->next, NULL);
+ }
+
+ /* bkt is always the head of the list, so it holds
+ * the lock, which we need to preserve
+ */
+ rht_assign_locked(bkt, obj);
+
+ atomic_inc(&ht->nelems);
+ if (rht_grow_above_75(ht, tbl))
+ schedule_work(&ht->run_work);
+
+ return NULL;
+}
+
+static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ struct bucket_table *new_tbl;
+ struct bucket_table *tbl;
+ struct rhash_lock_head __rcu **bkt;
+ unsigned int hash;
+ void *data;
+
+ new_tbl = rcu_dereference(ht->tbl);
+
+ do {
+ tbl = new_tbl;
+ hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+ if (rcu_access_pointer(tbl->future_tbl))
+ /* Failure is OK */
+ bkt = rht_bucket_var(tbl, hash);
+ else
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ if (bkt == NULL) {
+ new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ data = ERR_PTR(-EAGAIN);
+ } else {
+ rht_lock(tbl, bkt);
+ data = rhashtable_lookup_one(ht, bkt, tbl,
+ hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, bkt, tbl,
+ hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ rht_unlock(tbl, bkt);
+ }
+ } while (!IS_ERR_OR_NULL(new_tbl));
+
+ if (PTR_ERR(data) == -EAGAIN)
+ data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
+ -EAGAIN);
+
+ return data;
+}
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ void *data;
+
+ do {
+ rcu_read_lock();
+ data = rhashtable_try_insert(ht, key, obj);
+ rcu_read_unlock();
+ } while (PTR_ERR(data) == -EAGAIN);
+
+ return data;
+}
+EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
+
+/**
+ * rhashtable_walk_enter - Initialise an iterator
+ * @ht: Table to walk over
+ * @iter: Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice. Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
+ *
+ * You must call rhashtable_walk_exit after this function returns.
+ */
+void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
+{
+ iter->ht = ht;
+ iter->p = NULL;
+ iter->slot = 0;
+ iter->skip = 0;
+ iter->end_of_table = 0;
+
+ spin_lock(&ht->lock);
+ iter->walker.tbl =
+ rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
+ list_add(&iter->walker.list, &iter->walker.tbl->walkers);
+ spin_unlock(&ht->lock);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
+
+/**
+ * rhashtable_walk_exit - Free an iterator
+ * @iter: Hash table Iterator
+ *
+ * This function frees resources allocated by rhashtable_walk_enter.
+ */
+void rhashtable_walk_exit(struct rhashtable_iter *iter)
+{
+ spin_lock(&iter->ht->lock);
+ if (iter->walker.tbl)
+ list_del(&iter->walker.list);
+ spin_unlock(&iter->ht->lock);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
+
+/**
+ * rhashtable_walk_start_check - Start a hash table walk
+ * @iter: Hash table iterator
+ *
+ * Start a hash table walk at the current iterator position. Note that we take
+ * the RCU lock in all cases including when we return an error. So you must
+ * always call rhashtable_walk_stop to clean up.
+ *
+ * Returns zero if successful.
+ *
+ * Returns -EAGAIN if resize event occured. Note that the iterator
+ * will rewind back to the beginning and you may use it immediately
+ * by calling rhashtable_walk_next.
+ *
+ * rhashtable_walk_start is defined as an inline variant that returns
+ * void. This is preferred in cases where the caller would ignore
+ * resize events and always continue.
+ */
+int rhashtable_walk_start_check(struct rhashtable_iter *iter)
+ __acquires(RCU)
+{
+ struct rhashtable *ht = iter->ht;
+ bool rhlist = ht->rhlist;
+
+ rcu_read_lock();
+
+ spin_lock(&ht->lock);
+ if (iter->walker.tbl)
+ list_del(&iter->walker.list);
+ spin_unlock(&ht->lock);
+
+ if (iter->end_of_table)
+ return 0;
+ if (!iter->walker.tbl) {
+ iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+ iter->slot = 0;
+ iter->skip = 0;
+ return -EAGAIN;
+ }
+
+ if (iter->p && !rhlist) {
+ /*
+ * We need to validate that 'p' is still in the table, and
+ * if so, update 'skip'
+ */
+ struct rhash_head *p;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ skip++;
+ if (p == iter->p) {
+ iter->skip = skip;
+ goto found;
+ }
+ }
+ iter->p = NULL;
+ } else if (iter->p && rhlist) {
+ /* Need to validate that 'list' is still in the table, and
+ * if so, update 'skip' and 'p'.
+ */
+ struct rhash_head *p;
+ struct rhlist_head *list;
+ int skip = 0;
+ rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+ for (list = container_of(p, struct rhlist_head, rhead);
+ list;
+ list = rcu_dereference(list->next)) {
+ skip++;
+ if (list == iter->list) {
+ iter->p = p;
+ iter->skip = skip;
+ goto found;
+ }
+ }
+ }
+ iter->p = NULL;
+ }
+found:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
+
+/**
+ * __rhashtable_walk_find_next - Find the next element in a table (or the first
+ * one in case of a new walk).
+ *
+ * @iter: Hash table iterator
+ *
+ * Returns the found object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.
+ */
+static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
+{
+ struct bucket_table *tbl = iter->walker.tbl;
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
+
+ if (!tbl)
+ return NULL;
+
+ for (; iter->slot < tbl->size; iter->slot++) {
+ int skip = iter->skip;
+
+ rht_for_each_rcu(p, tbl, iter->slot) {
+ if (rhlist) {
+ list = container_of(p, struct rhlist_head,
+ rhead);
+ do {
+ if (!skip)
+ goto next;
+ skip--;
+ list = rcu_dereference(list->next);
+ } while (list);
+
+ continue;
+ }
+ if (!skip)
+ break;
+ skip--;
+ }
+
+next:
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
+
+ iter->skip = 0;
+ }
+
+ iter->p = NULL;
+
+ /* Ensure we see any new tables. */
+ smp_rmb();
+
+ iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (iter->walker.tbl) {
+ iter->slot = 0;
+ iter->skip = 0;
+ return ERR_PTR(-EAGAIN);
+ } else {
+ iter->end_of_table = true;
+ }
+
+ return NULL;
+}
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
+
+ if (p) {
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
+ if (!rht_is_a_nulls(p)) {
+ iter->skip++;
+ iter->p = p;
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
+ }
+
+ /* At the end of this slot, switch to next one and then find
+ * next entry from that point.
+ */
+ iter->skip = 0;
+ iter->slot++;
+ }
+
+ return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_next);
+
+/**
+ * rhashtable_walk_peek - Return the next object but don't advance the iterator
+ * @iter: Hash table iterator
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_peek(struct rhashtable_iter *iter)
+{
+ struct rhlist_head *list = iter->list;
+ struct rhashtable *ht = iter->ht;
+ struct rhash_head *p = iter->p;
+
+ if (p)
+ return rht_obj(ht, ht->rhlist ? &list->rhead : p);
+
+ /* No object found in current iter, find next one in the table. */
+
+ if (iter->skip) {
+ /* A nonzero skip value points to the next entry in the table
+ * beyond that last one that was found. Decrement skip so
+ * we find the current value. __rhashtable_walk_find_next
+ * will restore the original value of skip assuming that
+ * the table hasn't changed.
+ */
+ iter->skip--;
+ }
+
+ return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
+
+/**
+ * rhashtable_walk_stop - Finish a hash table walk
+ * @iter: Hash table iterator
+ *
+ * Finish a hash table walk. Does not reset the iterator to the start of the
+ * hash table.
+ */
+void rhashtable_walk_stop(struct rhashtable_iter *iter)
+ __releases(RCU)
+{
+ struct rhashtable *ht;
+ struct bucket_table *tbl = iter->walker.tbl;
+
+ if (!tbl)
+ goto out;
+
+ ht = iter->ht;
+
+ spin_lock(&ht->lock);
+ if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
+ /* This bucket table is being freed, don't re-link it. */
+ iter->walker.tbl = NULL;
+ else
+ list_add(&iter->walker.list, &tbl->walkers);
+ spin_unlock(&ht->lock);
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
+
+static size_t rounded_hashtable_size(const struct rhashtable_params *params)
+{
+ size_t retsize;
+
+ if (params->nelem_hint)
+ retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+ (unsigned long)params->min_size);
+ else
+ retsize = max(HASH_DEFAULT_SIZE,
+ (unsigned long)params->min_size);
+
+ return retsize;
+}
+
+static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+{
+ return jhash2(key, length, seed);
+}
+
+/**
+ * rhashtable_init - initialize a new hash table
+ * @ht: hash table to be initialized
+ * @params: configuration parameters
+ *
+ * Initializes a new hash table based on the provided configuration
+ * parameters. A table can be configured either with a variable or
+ * fixed length key:
+ *
+ * Configuration Example 1: Fixed length keys
+ * struct test_obj {
+ * int key;
+ * void * my_member;
+ * struct rhash_head node;
+ * };
+ *
+ * struct rhashtable_params params = {
+ * .head_offset = offsetof(struct test_obj, node),
+ * .key_offset = offsetof(struct test_obj, key),
+ * .key_len = sizeof(int),
+ * .hashfn = jhash,
+ * };
+ *
+ * Configuration Example 2: Variable length keys
+ * struct test_obj {
+ * [...]
+ * struct rhash_head node;
+ * };
+ *
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
+ * {
+ * struct test_obj *obj = data;
+ *
+ * return [... hash ...];
+ * }
+ *
+ * struct rhashtable_params params = {
+ * .head_offset = offsetof(struct test_obj, node),
+ * .hashfn = jhash,
+ * .obj_hashfn = my_hash_fn,
+ * };
+ */
+int rhashtable_init(struct rhashtable *ht,
+ const struct rhashtable_params *params)
+{
+ struct bucket_table *tbl;
+ size_t size;
+
+ if ((!params->key_len && !params->obj_hashfn) ||
+ (params->obj_hashfn && !params->obj_cmpfn))
+ return -EINVAL;
+
+ memset(ht, 0, sizeof(*ht));
+ mutex_init(&ht->mutex);
+ spin_lock_init(&ht->lock);
+ memcpy(&ht->p, params, sizeof(*params));
+
+ if (params->min_size)
+ ht->p.min_size = roundup_pow_of_two(params->min_size);
+
+ /* Cap total entries at 2^31 to avoid nelems overflow. */
+ ht->max_elems = 1u << 31;
+
+ if (params->max_size) {
+ ht->p.max_size = rounddown_pow_of_two(params->max_size);
+ if (ht->p.max_size < ht->max_elems / 2)
+ ht->max_elems = ht->p.max_size * 2;
+ }
+
+ ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+
+ size = rounded_hashtable_size(&ht->p);
+
+ ht->key_len = ht->p.key_len;
+ if (!params->hashfn) {
+ ht->p.hashfn = jhash;
+
+ if (!(ht->key_len & (sizeof(u32) - 1))) {
+ ht->key_len /= sizeof(u32);
+ ht->p.hashfn = rhashtable_jhash2;
+ }
+ }
+
+ /*
+ * This is api initialization and thus we need to guarantee the
+ * initial rhashtable allocation. Upon failure, retry with the
+ * smallest possible size with __GFP_NOFAIL semantics.
+ */
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+ if (unlikely(tbl == NULL)) {
+ size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
+ }
+
+ atomic_set(&ht->nelems, 0);
+
+ RCU_INIT_POINTER(ht->tbl, tbl);
+
+ INIT_WORK(&ht->run_work, rht_deferred_worker);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_init);
+
+/**
+ * rhltable_init - initialize a new hash list table
+ * @hlt: hash list table to be initialized
+ * @params: configuration parameters
+ *
+ * Initializes a new hash list table.
+ *
+ * See documentation for rhashtable_init.
+ */
+int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+{
+ int err;
+
+ err = rhashtable_init(&hlt->ht, params);
+ hlt->ht.rhlist = true;
+ return err;
+}
+EXPORT_SYMBOL_GPL(rhltable_init);
+
+static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg)
+{
+ struct rhlist_head *list;
+
+ if (!ht->rhlist) {
+ free_fn(rht_obj(ht, obj), arg);
+ return;
+ }
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ do {
+ obj = &list->rhead;
+ list = rht_dereference(list->next, ht);
+ free_fn(rht_obj(ht, obj), arg);
+ } while (list);
+}
+
+/**
+ * rhashtable_free_and_destroy - free elements and destroy hash table
+ * @ht: the hash table to destroy
+ * @free_fn: callback to release resources of element
+ * @arg: pointer passed to free_fn
+ *
+ * Stops an eventual async resize. If defined, invokes free_fn for each
+ * element to releasal resources. Please note that RCU protected
+ * readers may still be accessing the elements. Releasing of resources
+ * must occur in a compatible manner. Then frees the bucket array.
+ *
+ * This function will eventually sleep to wait for an async resize
+ * to complete. The caller is responsible that no further write operations
+ * occurs in parallel.
+ */
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg)
+{
+ struct bucket_table *tbl, *next_tbl;
+ unsigned int i;
+
+ cancel_work_sync(&ht->run_work);
+
+ mutex_lock(&ht->mutex);
+ tbl = rht_dereference(ht->tbl, ht);
+restart:
+ if (free_fn) {
+ for (i = 0; i < tbl->size; i++) {
+ struct rhash_head *pos, *next;
+
+ cond_resched();
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL;
+ !rht_is_a_nulls(pos);
+ pos = next,
+ next = !rht_is_a_nulls(pos) ?
+ rht_dereference(pos->next, ht) : NULL)
+ rhashtable_free_one(ht, pos, free_fn, arg);
+ }
+ }
+
+ next_tbl = rht_dereference(tbl->future_tbl, ht);
+ bucket_table_free(tbl);
+ if (next_tbl) {
+ tbl = next_tbl;
+ goto restart;
+ }
+ mutex_unlock(&ht->mutex);
+}
+EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+ return rhashtable_free_and_destroy(ht, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ unsigned int index = hash & ((1 << tbl->nest) - 1);
+ unsigned int size = tbl->size >> tbl->nest;
+ unsigned int subhash = hash;
+ union nested_table *ntbl;
+
+ ntbl = nested_table_top(tbl);
+ ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
+ subhash >>= tbl->nest;
+
+ while (ntbl && size > (1 << shift)) {
+ index = subhash & ((1 << shift) - 1);
+ ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
+ tbl, hash);
+ size >>= shift;
+ subhash >>= shift;
+ }
+
+ if (!ntbl)
+ return NULL;
+
+ return &ntbl[subhash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(__rht_bucket_nested);
+
+struct rhash_lock_head __rcu **rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash)
+{
+ static struct rhash_lock_head __rcu *rhnull;
+
+ if (!rhnull)
+ INIT_RHT_NULLS_HEAD(rhnull);
+ return __rht_bucket_nested(tbl, hash) ?: &rhnull;
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested);
+
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+ const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+ unsigned int index = hash & ((1 << tbl->nest) - 1);
+ unsigned int size = tbl->size >> tbl->nest;
+ union nested_table *ntbl;
+
+ ntbl = nested_table_top(tbl);
+ hash >>= tbl->nest;
+ ntbl = nested_table_alloc(ht, &ntbl[index].table,
+ size <= (1 << shift));
+
+ while (ntbl && size > (1 << shift)) {
+ index = hash & ((1 << shift) - 1);
+ size >>= shift;
+ hash >>= shift;
+ ntbl = nested_table_alloc(ht, &ntbl[index].table,
+ size <= (1 << shift));
+ }
+
+ if (!ntbl)
+ return NULL;
+
+ return &ntbl[hash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
diff --git a/c_src/linux/sched.c b/c_src/linux/sched.c
new file mode 100644
index 00000000..1c7198d2
--- /dev/null
+++ b/c_src/linux/sched.c
@@ -0,0 +1,133 @@
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <linux/futex.h>
+
+/* hack for mips: */
+#define CONFIG_RCU_HAVE_FUTEX 1
+#include <urcu/futex.h>
+
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+__thread struct task_struct *current;
+
+void __put_task_struct(struct task_struct *t)
+{
+ pthread_join(t->thread, NULL);
+ free(t);
+}
+
+/* returns true if process was woken up, false if it was already running */
+int wake_up_process(struct task_struct *p)
+{
+ int ret = p->state != TASK_RUNNING;
+
+ p->state = TASK_RUNNING;
+ futex(&p->state, FUTEX_WAKE|FUTEX_PRIVATE_FLAG,
+ INT_MAX, NULL, NULL, 0);
+ return ret;
+}
+
+void schedule(void)
+{
+ int v;
+
+ rcu_quiescent_state();
+
+ while ((v = READ_ONCE(current->state)) != TASK_RUNNING)
+ futex(&current->state, FUTEX_WAIT|FUTEX_PRIVATE_FLAG,
+ v, NULL, NULL, 0);
+}
+
+struct process_timer {
+ struct timer_list timer;
+ struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
+{
+ struct process_timer *timeout =
+ container_of(t, struct process_timer, timer);
+
+ wake_up_process(timeout->task);
+}
+
+long schedule_timeout(long timeout)
+{
+ struct process_timer timer;
+ unsigned long expire;
+
+ switch (timeout)
+ {
+ case MAX_SCHEDULE_TIMEOUT:
+ /*
+ * These two special cases are useful to be comfortable
+ * in the caller. Nothing more. We could take
+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
+ * but I' d like to return a valid offset (>=0) to allow
+ * the caller to do everything it want with the retval.
+ */
+ schedule();
+ goto out;
+ default:
+ /*
+ * Another bit of PARANOID. Note that the retval will be
+ * 0 since no piece of kernel is supposed to do a check
+ * for a negative retval of schedule_timeout() (since it
+ * should never happens anyway). You just have the printk()
+ * that will tell you if something is gone wrong and where.
+ */
+ if (timeout < 0) {
+ fprintf(stderr, "schedule_timeout: wrong timeout "
+ "value %lx\n", timeout);
+ current->state = TASK_RUNNING;
+ goto out;
+ }
+ }
+
+ expire = timeout + jiffies;
+
+ timer.task = current;
+ timer_setup_on_stack(&timer.timer, process_timeout, 0);
+ mod_timer(&timer.timer, expire);
+ schedule();
+ del_timer_sync(&timer.timer);
+
+ timeout = expire - jiffies;
+out:
+ return timeout < 0 ? 0 : timeout;
+}
+
+__attribute__((constructor(101)))
+static void sched_init(void)
+{
+ struct task_struct *p = malloc(sizeof(*p));
+
+ memset(p, 0, sizeof(*p));
+
+ p->state = TASK_RUNNING;
+ atomic_set(&p->usage, 1);
+ init_completion(&p->exited);
+
+ current = p;
+
+ rcu_init();
+ rcu_register_thread();
+}
+
+#ifndef SYS_getrandom
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+int urandom_fd;
+
+__attribute__((constructor(101)))
+static void rand_init(void)
+{
+ urandom_fd = open("/dev/urandom", O_RDONLY);
+ BUG_ON(urandom_fd < 0);
+}
+#endif
diff --git a/c_src/linux/semaphore.c b/c_src/linux/semaphore.c
new file mode 100644
index 00000000..b7d4b517
--- /dev/null
+++ b/c_src/linux/semaphore.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * This file implements counting semaphores.
+ * A counting semaphore may be acquired 'n' times before sleeping.
+ * See mutex.c for single-acquisition sleeping locks which enforce
+ * rules which allow code to be debugged more easily.
+ */
+
+/*
+ * Some notes on the implementation:
+ *
+ * The spinlock controls access to the other members of the semaphore.
+ * down_trylock() and up() can be called from interrupt context, so we
+ * have to disable interrupts when taking the lock. It turns out various
+ * parts of the kernel expect to be able to use down() on a semaphore in
+ * interrupt context when they know it will succeed, so we have to use
+ * irqsave variants for down(), down_interruptible() and down_killable()
+ * too.
+ *
+ * The ->count variable represents how many more tasks can acquire this
+ * semaphore. If it's zero, there may be tasks waiting on the wait_list.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+
+static noinline void __down(struct semaphore *sem);
+static noinline int __down_timeout(struct semaphore *sem, long timeout);
+static noinline void __up(struct semaphore *sem);
+
+/**
+ * down - acquire the semaphore
+ * @sem: the semaphore to be acquired
+ *
+ * Acquires the semaphore. If no more tasks are allowed to acquire the
+ * semaphore, calling this function will put the task to sleep until the
+ * semaphore is released.
+ *
+ * Use of this function is deprecated, please use down_interruptible() or
+ * down_killable() instead.
+ */
+void down(struct semaphore *sem)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ if (likely(sem->count > 0))
+ sem->count--;
+ else
+ __down(sem);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(down);
+
+/**
+ * down_trylock - try to acquire the semaphore, without waiting
+ * @sem: the semaphore to be acquired
+ *
+ * Try to acquire the semaphore atomically. Returns 0 if the semaphore has
+ * been acquired successfully or 1 if it it cannot be acquired.
+ *
+ * NOTE: This return value is inverted from both spin_trylock and
+ * mutex_trylock! Be careful about this when converting code.
+ *
+ * Unlike mutex_trylock, this function can be used from interrupt context,
+ * and the semaphore can be released by any task or interrupt.
+ */
+int down_trylock(struct semaphore *sem)
+{
+ unsigned long flags;
+ int count;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ count = sem->count - 1;
+ if (likely(count >= 0))
+ sem->count = count;
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+
+ return (count < 0);
+}
+EXPORT_SYMBOL(down_trylock);
+
+/**
+ * down_timeout - acquire the semaphore within a specified time
+ * @sem: the semaphore to be acquired
+ * @timeout: how long to wait before failing
+ *
+ * Attempts to acquire the semaphore. If no more tasks are allowed to
+ * acquire the semaphore, calling this function will put the task to sleep.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME. It returns 0 if the semaphore was acquired.
+ */
+int down_timeout(struct semaphore *sem, long timeout)
+{
+ unsigned long flags;
+ int result = 0;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ if (likely(sem->count > 0))
+ sem->count--;
+ else
+ result = __down_timeout(sem, timeout);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL(down_timeout);
+
+/**
+ * up - release the semaphore
+ * @sem: the semaphore to release
+ *
+ * Release the semaphore. Unlike mutexes, up() may be called from any
+ * context and even by tasks which have never called down().
+ */
+void up(struct semaphore *sem)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&sem->lock, flags);
+ if (likely(list_empty(&sem->wait_list)))
+ sem->count++;
+ else
+ __up(sem);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(up);
+
+/* Functions for the contended case */
+
+struct semaphore_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ bool up;
+};
+
+/*
+ * Because this function is inlined, the 'state' parameter will be
+ * constant, and thus optimised away by the compiler. Likewise the
+ * 'timeout' parameter for the cases without timeouts.
+ */
+static inline int __sched __down_common(struct semaphore *sem, long state,
+ long timeout)
+{
+ struct semaphore_waiter waiter;
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+ waiter.task = current;
+ waiter.up = false;
+
+ for (;;) {
+ if (unlikely(timeout <= 0))
+ goto timed_out;
+ __set_current_state(state);
+ raw_spin_unlock_irq(&sem->lock);
+ timeout = schedule_timeout(timeout);
+ raw_spin_lock_irq(&sem->lock);
+ if (waiter.up)
+ return 0;
+ }
+
+ timed_out:
+ list_del(&waiter.list);
+ return -ETIME;
+}
+
+static noinline void __sched __down(struct semaphore *sem)
+{
+ __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
+{
+ return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout);
+}
+
+static noinline void __sched __up(struct semaphore *sem)
+{
+ struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+ struct semaphore_waiter, list);
+ list_del(&waiter->list);
+ waiter->up = true;
+ wake_up_process(waiter->task);
+}
diff --git a/c_src/linux/seq_buf.c b/c_src/linux/seq_buf.c
new file mode 100644
index 00000000..cf8709ad
--- /dev/null
+++ b/c_src/linux/seq_buf.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * seq_buf.c
+ *
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * The seq_buf is a handy tool that allows you to pass a descriptor around
+ * to a buffer that other functions can write to. It is similar to the
+ * seq_file functionality but has some differences.
+ *
+ * To use it, the seq_buf must be initialized with seq_buf_init().
+ * This will set up the counters within the descriptor. You can call
+ * seq_buf_init() more than once to reset the seq_buf to start
+ * from scratch.
+ */
+#include <linux/seq_buf.h>
+#include <stdio.h>
+
+/**
+ * seq_buf_can_fit - can the new data fit in the current buffer?
+ * @s: the seq_buf descriptor
+ * @len: The length to see if it can fit in the current buffer
+ *
+ * Returns true if there's enough unused space in the seq_buf buffer
+ * to fit the amount of new data according to @len.
+ */
+static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
+{
+ return s->len + len <= s->size;
+}
+
+/**
+ * seq_buf_vprintf - sequence printing of information.
+ * @s: seq_buf descriptor
+ * @fmt: printf format string
+ * @args: va_list of arguments from a printf() type function
+ *
+ * Writes a vnprintf() format into the sequencce buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
+{
+ int len;
+
+ WARN_ON(s->size == 0);
+
+ if (s->len < s->size) {
+ len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args);
+ if (s->len + len < s->size) {
+ s->len += len;
+ return 0;
+ }
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
+
+/**
+ * seq_buf_printf - sequence printing of information
+ * @s: seq_buf descriptor
+ * @fmt: printf format string
+ *
+ * Writes a printf() format into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = seq_buf_vprintf(s, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+/**
+ * seq_buf_puts - sequence printing of simple string
+ * @s: seq_buf descriptor
+ * @str: simple string to record
+ *
+ * Copy a simple string into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_puts(struct seq_buf *s, const char *str)
+{
+ size_t len = strlen(str);
+
+ WARN_ON(s->size == 0);
+
+ /* Add 1 to len for the trailing null byte which must be there */
+ len += 1;
+
+ if (seq_buf_can_fit(s, len)) {
+ memcpy(s->buffer + s->len, str, len);
+ /* Don't count the trailing null byte against the capacity */
+ s->len += len - 1;
+ return 0;
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
+
+/**
+ * seq_buf_putc - sequence printing of simple character
+ * @s: seq_buf descriptor
+ * @c: simple character to record
+ *
+ * Copy a single character into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putc(struct seq_buf *s, unsigned char c)
+{
+ WARN_ON(s->size == 0);
+
+ if (seq_buf_can_fit(s, 1)) {
+ s->buffer[s->len++] = c;
+ return 0;
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
+
+/**
+ * seq_buf_putmem - write raw data into the sequenc buffer
+ * @s: seq_buf descriptor
+ * @mem: The raw memory to copy into the buffer
+ * @len: The length of the raw memory to copy (in bytes)
+ *
+ * There may be cases where raw memory needs to be written into the
+ * buffer and a strcpy() would not work. Using this function allows
+ * for such cases.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
+{
+ WARN_ON(s->size == 0);
+
+ if (seq_buf_can_fit(s, len)) {
+ memcpy(s->buffer + s->len, mem, len);
+ s->len += len;
+ return 0;
+ }
+ seq_buf_set_overflow(s);
+ return -1;
+}
diff --git a/c_src/linux/shrinker.c b/c_src/linux/shrinker.c
new file mode 100644
index 00000000..ca34ebc7
--- /dev/null
+++ b/c_src/linux/shrinker.c
@@ -0,0 +1,140 @@
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/shrinker.h>
+
+#include "tools-util.h"
+
+static LIST_HEAD(shrinker_list);
+static DEFINE_MUTEX(shrinker_lock);
+
+void shrinker_free(struct shrinker *s)
+{
+ if (s->list.next) {
+ mutex_lock(&shrinker_lock);
+ list_del(&s->list);
+ mutex_unlock(&shrinker_lock);
+ }
+ free(s);
+}
+
+struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
+{
+ return calloc(sizeof(struct shrinker), 1);
+}
+
+int shrinker_register(struct shrinker *shrinker)
+{
+ mutex_lock(&shrinker_lock);
+ list_add_tail(&shrinker->list, &shrinker_list);
+ mutex_unlock(&shrinker_lock);
+ return 0;
+}
+
+static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
+{
+ struct shrinker *shrinker;
+
+ mutex_lock(&shrinker_lock);
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ struct shrink_control sc = { .gfp_mask = gfp_mask, };
+
+ unsigned long have = shrinker->count_objects(shrinker, &sc);
+
+ sc.nr_to_scan = have / 8;
+
+ shrinker->scan_objects(shrinker, &sc);
+ }
+ mutex_unlock(&shrinker_lock);
+}
+
+void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
+{
+ struct shrinker *shrinker;
+ struct sysinfo info;
+ s64 want_shrink;
+
+ if (!(gfp_mask & GFP_KERNEL))
+ return;
+
+ /* Fast out if there are no shrinkers to run. */
+ if (list_empty(&shrinker_list))
+ return;
+
+ if (allocation_failed) {
+ run_shrinkers_allocation_failed(gfp_mask);
+ return;
+ }
+
+ si_meminfo(&info);
+
+ /* Aim for 6% of physical RAM free without anything in swap */
+ want_shrink = (info.totalram >> 4) - info.freeram
+ + info.totalswap - info.freeswap;
+ if (want_shrink <= 0)
+ return;
+
+ mutex_lock(&shrinker_lock);
+ list_for_each_entry(shrinker, &shrinker_list, list) {
+ struct shrink_control sc = {
+ .gfp_mask = gfp_mask,
+ .nr_to_scan = want_shrink >> PAGE_SHIFT
+ };
+
+ shrinker->scan_objects(shrinker, &sc);
+ }
+ mutex_unlock(&shrinker_lock);
+}
+
+static int shrinker_thread(void *arg)
+{
+ while (!kthread_should_stop()) {
+ struct timespec to;
+ int v;
+
+ clock_gettime(CLOCK_MONOTONIC, &to);
+ to.tv_sec += 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ errno = 0;
+ while ((v = READ_ONCE(current->state)) != TASK_RUNNING &&
+ errno != ETIMEDOUT)
+ futex(&current->state, FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG,
+ v, &to, NULL, (uint32_t)~0);
+ if (kthread_should_stop())
+ break;
+ if (v != TASK_RUNNING)
+ __set_current_state(TASK_RUNNING);
+ run_shrinkers(GFP_KERNEL, false);
+ }
+
+ return 0;
+}
+
+struct task_struct *shrinker_task;
+
+__attribute__((constructor(103)))
+static void shrinker_thread_init(void)
+{
+ shrinker_task = kthread_run(shrinker_thread, NULL, "shrinkers");
+ BUG_ON(IS_ERR(shrinker_task));
+}
+
+#if 0
+/*
+ * We seem to be hitting a rare segfault when shutting down the shrinker thread.
+ * Disabling this is going to cause some harmless warnings about memory leaks:
+ */
+__attribute__((destructor(103)))
+static void shrinker_thread_exit(void)
+{
+ int ret = kthread_stop(shrinker_task);
+ BUG_ON(ret);
+
+ shrinker_task = NULL;
+}
+#endif
diff --git a/c_src/linux/siphash.c b/c_src/linux/siphash.c
new file mode 100644
index 00000000..f8dbecea
--- /dev/null
+++ b/c_src/linux/siphash.c
@@ -0,0 +1,552 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#include <linux/siphash.h>
+#include <linux/bitops.h>
+#include <asm/unaligned.h>
+
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+#include <linux/dcache.h>
+#include <asm/word-at-a-time.h>
+#endif
+
+#define SIPROUND \
+ do { \
+ v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+ v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+ v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+ v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+ } while (0)
+
+#define PREAMBLE(len) \
+ u64 v0 = 0x736f6d6570736575ULL; \
+ u64 v1 = 0x646f72616e646f6dULL; \
+ u64 v2 = 0x6c7967656e657261ULL; \
+ u64 v3 = 0x7465646279746573ULL; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ PREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = le64_to_cpup(data);
+ v3 ^= m;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= le32_to_cpup(data); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+#endif
+ POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ PREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = get_unaligned_le64(data);
+ v3 ^= m;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= get_unaligned_le32(end); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+#endif
+ POSTAMBLE
+}
+EXPORT_SYMBOL(__siphash_unaligned);
+#endif
+
+/**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+ * @first: first u64
+ * @key: the siphash key
+ */
+u64 siphash_1u64(const u64 first, const siphash_key_t *key)
+{
+ PREAMBLE(8)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u64);
+
+/**
+ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
+ * @first: first u64
+ * @second: second u64
+ * @key: the siphash key
+ */
+u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_2u64);
+
+/**
+ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @key: the siphash key
+ */
+u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
+ const siphash_key_t *key)
+{
+ PREAMBLE(24)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= third;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u64);
+
+/**
+ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @forth: forth u64
+ * @key: the siphash key
+ */
+u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
+ const u64 forth, const siphash_key_t *key)
+{
+ PREAMBLE(32)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= third;
+ v3 ^= forth;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= forth;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_4u64);
+
+u64 siphash_1u32(const u32 first, const siphash_key_t *key)
+{
+ PREAMBLE(4)
+ b |= first;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_1u32);
+
+u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
+ const siphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ PREAMBLE(12)
+ v3 ^= combined;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= combined;
+ b |= third;
+ POSTAMBLE
+}
+EXPORT_SYMBOL(siphash_3u32);
+
+#if BITS_PER_LONG == 64
+/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
+ * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
+ */
+
+#define HSIPROUND SIPROUND
+#define HPREAMBLE(len) PREAMBLE(len)
+#define HPOSTAMBLE \
+ v3 ^= b; \
+ HSIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ HSIPROUND; \
+ HSIPROUND; \
+ HSIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = le64_to_cpup(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= le32_to_cpup(data); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+#endif
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u64));
+ const u8 left = len & (sizeof(u64) - 1);
+ u64 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u64)) {
+ m = get_unaligned_le64(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+ if (left)
+ b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+ bytemask_from_count(left)));
+#else
+ switch (left) {
+ case 7: b |= ((u64)end[6]) << 48; fallthrough;
+ case 6: b |= ((u64)end[5]) << 40; fallthrough;
+ case 5: b |= ((u64)end[4]) << 32; fallthrough;
+ case 4: b |= get_unaligned_le32(end); break;
+ case 3: b |= ((u64)end[2]) << 16; fallthrough;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+#endif
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+ HPREAMBLE(4)
+ b |= first;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(8)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+ const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(12)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ b |= third;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ const u32 forth, const hsiphash_key_t *key)
+{
+ u64 combined = (u64)second << 32 | first;
+ HPREAMBLE(16)
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ combined = (u64)forth << 32 | third;
+ v3 ^= combined;
+ HSIPROUND;
+ v0 ^= combined;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#else
+#define HSIPROUND \
+ do { \
+ v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
+ v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
+ v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
+ v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
+ } while (0)
+
+#define HPREAMBLE(len) \
+ u32 v0 = 0; \
+ u32 v1 = 0; \
+ u32 v2 = 0x6c796765U; \
+ u32 v3 = 0x74656462U; \
+ u32 b = ((u32)(len)) << 24; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define HPOSTAMBLE \
+ v3 ^= b; \
+ HSIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ HSIPROUND; \
+ HSIPROUND; \
+ HSIPROUND; \
+ return v1 ^ v3;
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u32));
+ const u8 left = len & (sizeof(u32) - 1);
+ u32 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u32)) {
+ m = le32_to_cpup(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+ switch (left) {
+ case 3: b |= ((u32)end[2]) << 16; fallthrough;
+ case 2: b |= le16_to_cpup(data); break;
+ case 1: b |= end[0];
+ }
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+ const hsiphash_key_t *key)
+{
+ const u8 *end = data + len - (len % sizeof(u32));
+ const u8 left = len & (sizeof(u32) - 1);
+ u32 m;
+ HPREAMBLE(len)
+ for (; data != end; data += sizeof(u32)) {
+ m = get_unaligned_le32(data);
+ v3 ^= m;
+ HSIPROUND;
+ v0 ^= m;
+ }
+ switch (left) {
+ case 3: b |= ((u32)end[2]) << 16; fallthrough;
+ case 2: b |= get_unaligned_le16(end); break;
+ case 1: b |= end[0];
+ }
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+ HPREAMBLE(4)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+ HPREAMBLE(8)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+ const hsiphash_key_t *key)
+{
+ HPREAMBLE(12)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ HSIPROUND;
+ v0 ^= third;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+ const u32 forth, const hsiphash_key_t *key)
+{
+ HPREAMBLE(16)
+ v3 ^= first;
+ HSIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ HSIPROUND;
+ v0 ^= second;
+ v3 ^= third;
+ HSIPROUND;
+ v0 ^= third;
+ v3 ^= forth;
+ HSIPROUND;
+ v0 ^= forth;
+ HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#endif
diff --git a/c_src/linux/string.c b/c_src/linux/string.c
new file mode 100644
index 00000000..a32a8995
--- /dev/null
+++ b/c_src/linux/string.c
@@ -0,0 +1,112 @@
+/*
+ * linux/lib/string.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * stupid library routines.. The optimized versions should generally be found
+ * as inline code in <asm-xx/string.h>
+ *
+ * These are buggy as well..
+ *
+ * * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de>
+ * - Added strsep() which will replace strtok() soon (because strsep() is
+ * reentrant and should be faster). Use only strsep() in new code, please.
+ *
+ * * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>,
+ * Matthew Hawkins <matt@mh.dropbear.id.au>
+ * - Kissed strtok() goodbye
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+static char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+
+char *strim(char *s)
+{
+ size_t size;
+ char *end;
+
+ size = strlen(s);
+ if (!size)
+ return s;
+
+ end = s + size - 1;
+ while (end >= s && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+
+ return skip_spaces(s);
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+ssize_t strscpy(char *dest, const char *src, size_t count)
+{
+ long res = 0;
+
+ if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
+ return -E2BIG;
+
+ while (count) {
+ char c;
+
+ c = src[res];
+ dest[res] = c;
+ if (!c)
+ return res;
+ res++;
+ count--;
+ }
+
+ /* Hit buffer length without finding a NUL; force NUL-termination. */
+ if (res)
+ dest[res-1] = '\0';
+
+ return -E2BIG;
+}
+
+void memzero_explicit(void *s, size_t count)
+{
+ memset(s, 0, count);
+ barrier_data(s);
+}
+
+int match_string(const char * const *array, size_t n, const char *string)
+{
+ int index;
+ const char *item;
+
+ for (index = 0; index < n; index++) {
+ item = array[index];
+ if (!item)
+ break;
+ if (!strcmp(item, string))
+ return index;
+ }
+
+ return -EINVAL;
+}
diff --git a/c_src/linux/string_helpers.c b/c_src/linux/string_helpers.c
new file mode 100644
index 00000000..0810ca13
--- /dev/null
+++ b/c_src/linux/string_helpers.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ * Copyright (C) 2013, Intel Corporation
+ */
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size: The size to be converted in blocks
+ * @blk_size: Size of the block (use 1 for size in bytes)
+ * @units: units to use (powers of 1000 or 1024)
+ * @buf: buffer to format to
+ * @len: length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units. @buf should have room for
+ * at least 9 bytes and will always be zero terminated.
+ *
+ */
+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
+ char *buf, int len)
+{
+ static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+ };
+ static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+ };
+ static const char *const *const units_str[] = {
+ [STRING_UNITS_10] = units_10,
+ [STRING_UNITS_2] = units_2,
+ };
+ static const unsigned int divisor[] = {
+ [STRING_UNITS_10] = 1000,
+ [STRING_UNITS_2] = 1024,
+ };
+ static const unsigned int rounding[] = { 500, 50, 5 };
+ int i = 0, j;
+ u32 remainder = 0, sf_cap;
+ char tmp[12];
+ const char *unit;
+
+ tmp[0] = '\0';
+
+ if (blk_size == 0)
+ size = 0;
+ if (size == 0)
+ goto out;
+
+ /* This is Napier's algorithm. Reduce the original block size to
+ *
+ * coefficient * divisor[units]^i
+ *
+ * we do the reduction so both coefficients are just under 32 bits so
+ * that multiplying them together won't overflow 64 bits and we keep
+ * as much precision as possible in the numbers.
+ *
+ * Note: it's safe to throw away the remainders here because all the
+ * precision is in the coefficients.
+ */
+ while (blk_size >> 32) {
+ do_div(blk_size, divisor[units]);
+ i++;
+ }
+
+ while (size >> 32) {
+ do_div(size, divisor[units]);
+ i++;
+ }
+
+ /* now perform the actual multiplication keeping i as the sum of the
+ * two logarithms */
+ size *= blk_size;
+
+ /* and logarithmically reduce it until it's just under the divisor */
+ while (size >= divisor[units]) {
+ remainder = do_div(size, divisor[units]);
+ i++;
+ }
+
+ /* work out in j how many digits of precision we need from the
+ * remainder */
+ sf_cap = size;
+ for (j = 0; sf_cap*10 < 1000; j++)
+ sf_cap *= 10;
+
+ if (units == STRING_UNITS_2) {
+ /* express the remainder as a decimal. It's currently the
+ * numerator of a fraction whose denominator is
+ * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
+ remainder *= 1000;
+ remainder >>= 10;
+ }
+
+ /* add a 5 to the digit below what will be printed to ensure
+ * an arithmetical round up and carry it through to size */
+ remainder += rounding[j];
+ if (remainder >= 1000) {
+ remainder -= 1000;
+ size += 1;
+ }
+
+ if (j) {
+ snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+ tmp[j+1] = '\0';
+ }
+
+ out:
+ if (i >= ARRAY_SIZE(units_2))
+ unit = "UNK";
+ else
+ unit = units_str[units][i];
+
+ return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
+}
+EXPORT_SYMBOL(string_get_size);
diff --git a/c_src/linux/timer.c b/c_src/linux/timer.c
new file mode 100644
index 00000000..7d519a4d
--- /dev/null
+++ b/c_src/linux/timer.c
@@ -0,0 +1,329 @@
+
+#include <pthread.h>
+#include <signal.h>
+#include <time.h>
+
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/timer.h>
+
+/**
+ * timespec_add_ns - Adds nanoseconds to a timespec
+ * @a: pointer to timespec to be incremented
+ * @ns: unsigned nanoseconds value to be added
+ *
+ * This must always be inlined because its used from the x86-64 vdso,
+ * which cannot call other kernel functions.
+ */
+static struct timespec timespec_add_ns(struct timespec a, u64 ns)
+{
+ a.tv_nsec += ns;
+ a.tv_sec += a.tv_nsec / NSEC_PER_SEC;
+ a.tv_nsec %= NSEC_PER_SEC;
+ return a;
+}
+
+#define DECLARE_HEAP(type) \
+struct { \
+ size_t size, used; \
+ type *data; \
+}
+
+#define heap_init(heap, _size) \
+({ \
+ size_t _bytes; \
+ (heap)->used = 0; \
+ (heap)->size = (_size); \
+ _bytes = (heap)->size * sizeof(*(heap)->data); \
+ (heap)->data = malloc(_bytes); \
+ (heap)->data; \
+})
+
+#define heap_free(heap) \
+do { \
+ kvfree((heap)->data); \
+ (heap)->data = NULL; \
+} while (0)
+
+#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
+
+#define heap_sift(h, i, cmp) \
+do { \
+ size_t _r, _j = i; \
+ \
+ for (; _j * 2 + 1 < (h)->used; _j = _r) { \
+ _r = _j * 2 + 1; \
+ if (_r + 1 < (h)->used && \
+ cmp((h)->data[_r], (h)->data[_r + 1])) \
+ _r++; \
+ \
+ if (cmp((h)->data[_r], (h)->data[_j])) \
+ break; \
+ heap_swap(h, _r, _j); \
+ } \
+} while (0)
+
+#define heap_sift_down(h, i, cmp) \
+do { \
+ while (i) { \
+ size_t p = (i - 1) / 2; \
+ if (cmp((h)->data[i], (h)->data[p])) \
+ break; \
+ heap_swap(h, i, p); \
+ i = p; \
+ } \
+} while (0)
+
+#define heap_add(h, d, cmp) \
+({ \
+ bool _r = !heap_full(h); \
+ if (_r) { \
+ size_t _i = (h)->used++; \
+ (h)->data[_i] = d; \
+ \
+ heap_sift_down(h, _i, cmp); \
+ heap_sift(h, _i, cmp); \
+ } \
+ _r; \
+})
+
+#define heap_del(h, i, cmp) \
+do { \
+ size_t _i = (i); \
+ \
+ BUG_ON(_i >= (h)->used); \
+ (h)->used--; \
+ if ((_i) < (h)->used) { \
+ heap_swap(h, _i, (h)->used); \
+ heap_sift_down(h, _i, cmp); \
+ heap_sift(h, _i, cmp); \
+ } \
+} while (0)
+
+#define heap_pop(h, d, cmp) \
+({ \
+ bool _r = (h)->used; \
+ if (_r) { \
+ (d) = (h)->data[0]; \
+ heap_del(h, 0, cmp); \
+ } \
+ _r; \
+})
+
+#define heap_peek(h) ((h)->used ? &(h)->data[0] : NULL)
+#define heap_full(h) ((h)->used == (h)->size)
+#define heap_empty(h) ((h)->used == 0)
+
+#define heap_resort(heap, cmp) \
+do { \
+ ssize_t _i; \
+ for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
+ heap_sift(heap, _i, cmp); \
+} while (0)
+
+struct pending_timer {
+ struct timer_list *timer;
+ unsigned long expires;
+};
+
+static inline bool pending_timer_cmp(struct pending_timer a,
+ struct pending_timer b)
+{
+ return a.expires < b.expires;
+}
+
+static DECLARE_HEAP(struct pending_timer) pending_timers;
+
+static pthread_mutex_t timer_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t timer_cond = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t timer_running_cond = PTHREAD_COND_INITIALIZER;
+static unsigned long timer_seq;
+
+static inline bool timer_running(void)
+{
+ return timer_seq & 1;
+}
+
+static ssize_t timer_idx(struct timer_list *timer)
+{
+ size_t i;
+
+ for (i = 0; i < pending_timers.used; i++)
+ if (pending_timers.data[i].timer == timer)
+ return i;
+
+ return -1;
+}
+
+int del_timer(struct timer_list *timer)
+{
+ ssize_t idx;
+
+ pthread_mutex_lock(&timer_lock);
+ idx = timer_idx(timer);
+ if (idx >= 0)
+ heap_del(&pending_timers, idx, pending_timer_cmp);
+
+ timer->pending = false;
+ pthread_mutex_unlock(&timer_lock);
+
+ return idx >= 0;
+}
+
+void flush_timers(void)
+{
+ unsigned long seq;
+
+ pthread_mutex_lock(&timer_lock);
+ seq = timer_seq;
+ while (timer_running() && seq == timer_seq)
+ pthread_cond_wait(&timer_running_cond, &timer_lock);
+
+ pthread_mutex_unlock(&timer_lock);
+}
+
+int del_timer_sync(struct timer_list *timer)
+{
+ unsigned long seq;
+ ssize_t idx;
+
+ pthread_mutex_lock(&timer_lock);
+ idx = timer_idx(timer);
+ if (idx >= 0)
+ heap_del(&pending_timers, idx, pending_timer_cmp);
+
+ timer->pending = false;
+
+ seq = timer_seq;
+ while (timer_running() && seq == timer_seq)
+ pthread_cond_wait(&timer_running_cond, &timer_lock);
+ pthread_mutex_unlock(&timer_lock);
+
+ return idx >= 0;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+ ssize_t idx;
+
+ pthread_mutex_lock(&timer_lock);
+ timer->expires = expires;
+ timer->pending = true;
+ idx = timer_idx(timer);
+
+ if (idx >= 0 &&
+ pending_timers.data[idx].expires == expires)
+ goto out;
+
+ if (idx >= 0) {
+ pending_timers.data[idx].expires = expires;
+
+ heap_sift_down(&pending_timers, idx, pending_timer_cmp);
+ heap_sift(&pending_timers, idx, pending_timer_cmp);
+ } else {
+ if (heap_full(&pending_timers)) {
+ pending_timers.size *= 2;
+ pending_timers.data =
+ realloc(pending_timers.data,
+ pending_timers.size *
+ sizeof(struct pending_timer));
+
+ BUG_ON(!pending_timers.data);
+ }
+
+ heap_add(&pending_timers,
+ ((struct pending_timer) {
+ .timer = timer,
+ .expires = expires,
+ }),
+ pending_timer_cmp);
+ }
+
+ pthread_cond_signal(&timer_cond);
+out:
+ pthread_mutex_unlock(&timer_lock);
+
+ return idx >= 0;
+}
+
+static bool timer_thread_stop = false;
+
+static int timer_thread(void *arg)
+{
+ struct pending_timer *p;
+ struct timespec ts;
+ unsigned long now;
+ int ret;
+
+ pthread_mutex_lock(&timer_lock);
+
+ while (!timer_thread_stop) {
+ now = jiffies;
+ p = heap_peek(&pending_timers);
+
+ if (!p) {
+ pthread_cond_wait(&timer_cond, &timer_lock);
+ continue;
+ }
+
+ if (time_after_eq(now, p->expires)) {
+ struct timer_list *timer = p->timer;
+
+ heap_del(&pending_timers, 0, pending_timer_cmp);
+ BUG_ON(!timer_pending(timer));
+ timer->pending = false;
+
+ timer_seq++;
+ BUG_ON(!timer_running());
+
+ pthread_mutex_unlock(&timer_lock);
+ timer->function(timer);
+ pthread_mutex_lock(&timer_lock);
+
+ timer_seq++;
+ pthread_cond_broadcast(&timer_running_cond);
+ continue;
+ }
+
+
+ ret = clock_gettime(CLOCK_REALTIME, &ts);
+ BUG_ON(ret);
+
+ ts = timespec_add_ns(ts, jiffies_to_nsecs(p->expires - now));
+
+ pthread_cond_timedwait(&timer_cond, &timer_lock, &ts);
+ }
+
+ pthread_mutex_unlock(&timer_lock);
+
+ return 0;
+}
+
+struct task_struct *timer_task;
+
+__attribute__((constructor(103)))
+static void timers_init(void)
+{
+ heap_init(&pending_timers, 64);
+ BUG_ON(!pending_timers.data);
+
+ timer_task = kthread_run(timer_thread, NULL, "timers");
+ BUG_ON(IS_ERR(timer_task));
+}
+
+__attribute__((destructor(103)))
+static void timers_cleanup(void)
+{
+ get_task_struct(timer_task);
+
+ pthread_mutex_lock(&timer_lock);
+ timer_thread_stop = true;
+ pthread_cond_signal(&timer_cond);
+ pthread_mutex_unlock(&timer_lock);
+
+ int ret = kthread_stop(timer_task);
+ BUG_ON(ret);
+
+ put_task_struct(timer_task);
+ timer_task = NULL;
+}
diff --git a/c_src/linux/wait.c b/c_src/linux/wait.c
new file mode 100644
index 00000000..b1f002b9
--- /dev/null
+++ b/c_src/linux/wait.c
@@ -0,0 +1,250 @@
+/*
+ * Generic waiting primitives.
+ *
+ * (C) 2004 Nadia Yvette Chambers, Oracle
+ */
+
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+static inline int waitqueue_active(wait_queue_head_t *q)
+{
+ return !list_empty(&q->task_list);
+}
+
+static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+{
+ list_add(&new->task_list, &head->task_list);
+}
+
+static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+ wait_queue_t *new)
+{
+ list_add_tail(&new->task_list, &head->task_list);
+}
+
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ __add_wait_queue_tail(q, wait);
+}
+
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+{
+ list_del(&old->task_list);
+}
+
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, int wake_flags, void *key)
+{
+ wait_queue_t *curr, *next;
+
+ list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+ unsigned flags = curr->flags;
+
+ if (curr->func(curr, mode, wake_flags, key) &&
+ (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+ break;
+ }
+}
+
+static void __wake_up(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, void *key)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->lock, flags);
+ __wake_up_common(q, mode, nr_exclusive, 0, key);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void wake_up(wait_queue_head_t *q)
+{
+ __wake_up(q, TASK_NORMAL, 1, NULL);
+}
+
+void wake_up_all(wait_queue_head_t *q)
+{
+ __wake_up(q, TASK_NORMAL, 0, NULL);
+}
+
+static void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+{
+ __wake_up_common(q, mode, nr, 0, NULL);
+}
+
+void
+prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue(q, wait);
+ set_current_state(state);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+static void
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue_tail(q, wait);
+ set_current_state(state);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ /*
+ * We can check for list emptiness outside the lock
+ * IFF:
+ * - we use the "careful" check that verifies both
+ * the next and prev pointers, so that there cannot
+ * be any half-pending updates in progress on other
+ * CPU's that we haven't seen yet (and that might
+ * still change the stack area.
+ * and
+ * - all other users take the lock (ie we can only
+ * have _one_ other CPU that looks at or modifies
+ * the list).
+ */
+ if (!list_empty_careful(&wait->task_list)) {
+ spin_lock_irqsave(&q->lock, flags);
+ list_del_init(&wait->task_list);
+ spin_unlock_irqrestore(&q->lock, flags);
+ }
+}
+
+int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+ void *key)
+{
+ return wake_up_process(curr->private);
+}
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ int ret = default_wake_function(wait, mode, sync, key);
+
+ if (ret)
+ list_del_init(&wait->task_list);
+ return ret;
+}
+
+struct wait_bit_key {
+ void *flags;
+ int bit_nr;
+ unsigned long timeout;
+};
+
+struct wait_bit_queue {
+ struct wait_bit_key key;
+ wait_queue_t wait;
+};
+
+static int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+ struct wait_bit_key *key = arg;
+ struct wait_bit_queue *wait_bit =
+ container_of(wait, struct wait_bit_queue, wait);
+
+ return (wait_bit->key.flags == key->flags &&
+ wait_bit->key.bit_nr == key->bit_nr &&
+ !test_bit(key->bit_nr, key->flags))
+ ? autoremove_wake_function(wait, mode, sync, key) : 0;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(bit_wq);
+
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
+ { .flags = word, .bit_nr = bit, }
+
+#define DEFINE_WAIT_BIT(name, word, bit) \
+ struct wait_bit_queue name = { \
+ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
+ .wait = { \
+ .private = current, \
+ .func = wake_bit_function, \
+ .task_list = \
+ LIST_HEAD_INIT((name).wait.task_list), \
+ }, \
+ }
+
+void wake_up_bit(void *word, int bit)
+{
+ struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+
+ if (waitqueue_active(&bit_wq))
+ __wake_up(&bit_wq, TASK_NORMAL, 1, &key);
+}
+
+void __wait_on_bit(void *word, int bit, unsigned mode)
+{
+ DEFINE_WAIT_BIT(wait, word, bit);
+
+ do {
+ prepare_to_wait(&bit_wq, &wait.wait, mode);
+ if (test_bit(wait.key.bit_nr, wait.key.flags))
+ schedule();
+ } while (test_bit(wait.key.bit_nr, wait.key.flags));
+
+ finish_wait(&bit_wq, &wait.wait);
+}
+
+void __wait_on_bit_lock(void *word, int bit, unsigned mode)
+{
+ DEFINE_WAIT_BIT(wait, word, bit);
+
+ do {
+ prepare_to_wait_exclusive(&bit_wq, &wait.wait, mode);
+ if (!test_bit(wait.key.bit_nr, wait.key.flags))
+ continue;
+ schedule();
+ } while (test_and_set_bit(wait.key.bit_nr, wait.key.flags));
+ finish_wait(&bit_wq, &wait.wait);
+}
+
+void complete(struct completion *x)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&x->wait.lock, flags);
+ x->done++;
+ __wake_up_locked(&x->wait, TASK_NORMAL, 1);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+
+void wait_for_completion(struct completion *x)
+{
+ spin_lock_irq(&x->wait.lock);
+
+ if (!x->done) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ __add_wait_queue_tail_exclusive(&x->wait, &wait);
+ do {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&x->wait.lock);
+
+ schedule();
+ spin_lock_irq(&x->wait.lock);
+ } while (!x->done);
+ __remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ goto out;
+ }
+ x->done--;
+out:
+ spin_unlock_irq(&x->wait.lock);
+}
diff --git a/c_src/linux/workqueue.c b/c_src/linux/workqueue.c
new file mode 100644
index 00000000..0d5af3fb
--- /dev/null
+++ b/c_src/linux/workqueue.c
@@ -0,0 +1,346 @@
+#include <pthread.h>
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+static pthread_mutex_t wq_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t work_finished = PTHREAD_COND_INITIALIZER;
+static LIST_HEAD(wq_list);
+
+struct workqueue_struct {
+ struct list_head list;
+
+ struct work_struct *current_work;
+ struct list_head pending_work;
+
+ struct task_struct *worker;
+ char name[24];
+};
+
+enum {
+ WORK_PENDING_BIT,
+};
+
+static bool work_pending(struct work_struct *work)
+{
+ return test_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static void clear_work_pending(struct work_struct *work)
+{
+ clear_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static bool set_work_pending(struct work_struct *work)
+{
+ return !test_and_set_bit(WORK_PENDING_BIT, work_data_bits(work));
+}
+
+static void __queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ BUG_ON(!work_pending(work));
+ BUG_ON(!list_empty(&work->entry));
+
+ list_add_tail(&work->entry, &wq->pending_work);
+ wake_up_process(wq->worker);
+}
+
+bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ if ((ret = set_work_pending(work)))
+ __queue_work(wq, work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+void delayed_work_timer_fn(struct timer_list *timer)
+{
+ struct delayed_work *dwork =
+ container_of(timer, struct delayed_work, timer);
+
+ pthread_mutex_lock(&wq_lock);
+ __queue_work(dwork->wq, &dwork->work);
+ pthread_mutex_unlock(&wq_lock);
+}
+
+static void __queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct timer_list *timer = &dwork->timer;
+ struct work_struct *work = &dwork->work;
+
+ BUG_ON(timer->function != delayed_work_timer_fn);
+ BUG_ON(timer_pending(timer));
+ BUG_ON(!list_empty(&work->entry));
+
+ if (!delay) {
+ __queue_work(wq, &dwork->work);
+ } else {
+ dwork->wq = wq;
+ timer->expires = jiffies + delay;
+ add_timer(timer);
+ }
+}
+
+bool queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ if ((ret = set_work_pending(work)))
+ __queue_delayed_work(wq, dwork, delay);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+static bool grab_pending(struct work_struct *work, bool is_dwork)
+{
+retry:
+ if (set_work_pending(work)) {
+ BUG_ON(!list_empty(&work->entry));
+ return false;
+ }
+
+ if (is_dwork) {
+ struct delayed_work *dwork = to_delayed_work(work);
+
+ if (likely(del_timer(&dwork->timer))) {
+ BUG_ON(!list_empty(&work->entry));
+ return true;
+ }
+ }
+
+ if (!list_empty(&work->entry)) {
+ list_del_init(&work->entry);
+ return true;
+ }
+
+ BUG_ON(!is_dwork);
+
+ pthread_mutex_unlock(&wq_lock);
+ flush_timers();
+ pthread_mutex_lock(&wq_lock);
+ goto retry;
+}
+
+static bool work_running(struct work_struct *work)
+{
+ struct workqueue_struct *wq;
+
+ list_for_each_entry(wq, &wq_list, list)
+ if (wq->current_work == work)
+ return true;
+
+ return false;
+}
+
+bool flush_work(struct work_struct *work)
+{
+ bool ret = false;
+
+ pthread_mutex_lock(&wq_lock);
+ while (work_pending(work) || work_running(work)) {
+ pthread_cond_wait(&work_finished, &wq_lock);
+ ret = true;
+ }
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+static bool __flush_work(struct work_struct *work)
+{
+ bool ret = false;
+
+ while (work_running(work)) {
+ pthread_cond_wait(&work_finished, &wq_lock);
+ ret = true;
+ }
+
+ return ret;
+}
+
+bool cancel_work_sync(struct work_struct *work)
+{
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, false);
+
+ __flush_work(work);
+ clear_work_pending(work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+bool mod_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, true);
+
+ __queue_delayed_work(wq, dwork, delay);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+bool cancel_delayed_work(struct delayed_work *dwork)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, true);
+
+ clear_work_pending(&dwork->work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+bool cancel_delayed_work_sync(struct delayed_work *dwork)
+{
+ struct work_struct *work = &dwork->work;
+ bool ret;
+
+ pthread_mutex_lock(&wq_lock);
+ ret = grab_pending(work, true);
+
+ __flush_work(work);
+ clear_work_pending(work);
+ pthread_mutex_unlock(&wq_lock);
+
+ return ret;
+}
+
+static int worker_thread(void *arg)
+{
+ struct workqueue_struct *wq = arg;
+ struct work_struct *work;
+
+ pthread_mutex_lock(&wq_lock);
+ while (1) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ work = list_first_entry_or_null(&wq->pending_work,
+ struct work_struct, entry);
+ wq->current_work = work;
+
+ if (kthread_should_stop()) {
+ BUG_ON(wq->current_work);
+ break;
+ }
+
+ if (!work) {
+ pthread_mutex_unlock(&wq_lock);
+ schedule();
+ pthread_mutex_lock(&wq_lock);
+ continue;
+ }
+
+ BUG_ON(!work_pending(work));
+ list_del_init(&work->entry);
+ clear_work_pending(work);
+
+ pthread_mutex_unlock(&wq_lock);
+ work->func(work);
+ pthread_mutex_lock(&wq_lock);
+
+ pthread_cond_broadcast(&work_finished);
+ }
+ pthread_mutex_unlock(&wq_lock);
+
+ return 0;
+}
+
+void destroy_workqueue(struct workqueue_struct *wq)
+{
+ kthread_stop(wq->worker);
+
+ pthread_mutex_lock(&wq_lock);
+ list_del(&wq->list);
+ pthread_mutex_unlock(&wq_lock);
+
+ kfree(wq);
+}
+
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+ unsigned flags,
+ int max_active,
+ ...)
+{
+ va_list args;
+ struct workqueue_struct *wq;
+
+ wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (!wq)
+ return NULL;
+
+ INIT_LIST_HEAD(&wq->list);
+ INIT_LIST_HEAD(&wq->pending_work);
+
+ va_start(args, max_active);
+ vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+ va_end(args);
+
+ wq->worker = kthread_run(worker_thread, wq, "%s", wq->name);
+ if (IS_ERR(wq->worker)) {
+ kfree(wq);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&wq_lock);
+ list_add(&wq->list, &wq_list);
+ pthread_mutex_unlock(&wq_lock);
+
+ return wq;
+}
+
+struct workqueue_struct *system_wq;
+struct workqueue_struct *system_highpri_wq;
+struct workqueue_struct *system_long_wq;
+struct workqueue_struct *system_unbound_wq;
+struct workqueue_struct *system_freezable_wq;
+
+__attribute__((constructor(102)))
+static void wq_init(void)
+{
+ system_wq = alloc_workqueue("events", 0, 0);
+ system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
+ system_long_wq = alloc_workqueue("events_long", 0, 0);
+ system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ system_freezable_wq = alloc_workqueue("events_freezable",
+ WQ_FREEZABLE, 0);
+ BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
+ !system_unbound_wq || !system_freezable_wq);
+}
+
+__attribute__((destructor(102)))
+static void wq_cleanup(void)
+{
+ destroy_workqueue(system_freezable_wq);
+ destroy_workqueue(system_unbound_wq);
+ destroy_workqueue(system_long_wq);
+ destroy_workqueue(system_highpri_wq);
+ destroy_workqueue(system_wq);
+
+ system_wq = system_highpri_wq = system_long_wq = system_unbound_wq =
+ system_freezable_wq = NULL;
+}
diff --git a/c_src/linux/xxhash.c b/c_src/linux/xxhash.c
new file mode 100644
index 00000000..d5bb9ff1
--- /dev/null
+++ b/c_src/linux/xxhash.c
@@ -0,0 +1,500 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2016, Yann Collet.
+ *
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ *
+ * You can contact the author at:
+ * - xxHash homepage: https://cyan4973.github.io/xxHash/
+ * - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#include <asm/unaligned.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/xxhash.h>
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
+
+#ifdef __LITTLE_ENDIAN
+# define XXH_CPU_LITTLE_ENDIAN 1
+#else
+# define XXH_CPU_LITTLE_ENDIAN 0
+#endif
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 = 668265263U;
+static const uint32_t PRIME32_5 = 374761393U;
+
+static const uint64_t PRIME64_1 = 11400714785074694791ULL;
+static const uint64_t PRIME64_2 = 14029467366897019727ULL;
+static const uint64_t PRIME64_3 = 1609587929392839161ULL;
+static const uint64_t PRIME64_4 = 9650029242287828579ULL;
+static const uint64_t PRIME64_5 = 2870177450012600261ULL;
+
+/*-**************************
+ * Utils
+ ***************************/
+void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+EXPORT_SYMBOL(xxh32_copy_state);
+
+void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+EXPORT_SYMBOL(xxh64_copy_state);
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+ seed += input * PRIME32_2;
+ seed = xxh_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *b_end = p + len;
+ uint32_t h32;
+
+ if (len >= 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+ uint32_t v2 = seed + PRIME32_2;
+ uint32_t v3 = seed + 0;
+ uint32_t v4 = seed - PRIME32_1;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (uint32_t)len;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+EXPORT_SYMBOL(xxh32);
+
+static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
+{
+ acc += input * PRIME64_2;
+ acc = xxh_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
+{
+ val = xxh64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+ uint64_t h64;
+
+ if (len >= 32) {
+ const uint8_t *const limit = b_end - 32;
+ uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
+ uint64_t v2 = seed + PRIME64_2;
+ uint64_t v3 = seed + 0;
+ uint64_t v4 = seed - PRIME64_1;
+
+ do {
+ v1 = xxh64_round(v1, get_unaligned_le64(p));
+ p += 8;
+ v2 = xxh64_round(v2, get_unaligned_le64(p));
+ p += 8;
+ v3 = xxh64_round(v3, get_unaligned_le64(p));
+ p += 8;
+ v4 = xxh64_round(v4, get_unaligned_le64(p));
+ p += 8;
+ } while (p <= limit);
+
+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+ h64 = xxh64_merge_round(h64, v1);
+ h64 = xxh64_merge_round(h64, v2);
+ h64 = xxh64_merge_round(h64, v3);
+ h64 = xxh64_merge_round(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (uint64_t)len;
+
+ while (p + 8 <= b_end) {
+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+ h64 ^= k1;
+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+ p += 8;
+ }
+
+ if (p + 4 <= b_end) {
+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+EXPORT_SYMBOL(xxh64);
+
+/*-**************************************************
+ * Advanced Hash Functions
+ ***************************************************/
+void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
+{
+ /* use a local state for memcpy() to avoid strict-aliasing warnings */
+ struct xxh32_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ memcpy(statePtr, &state, sizeof(state));
+}
+EXPORT_SYMBOL(xxh32_reset);
+
+void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
+{
+ /* use a local state for memcpy() to avoid strict-aliasing warnings */
+ struct xxh64_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ memcpy(statePtr, &state, sizeof(state));
+}
+EXPORT_SYMBOL(xxh64_reset);
+
+int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+
+ if (input == NULL)
+ return -EINVAL;
+
+ state->total_len_32 += (uint32_t)len;
+ state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
+ state->memsize += (uint32_t)len;
+ return 0;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ const uint32_t *p32 = state->mem32;
+
+ memcpy((uint8_t *)(state->mem32) + state->memsize, input,
+ 16 - state->memsize);
+
+ state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
+ p32++;
+ state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
+ p32++;
+ state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
+ p32++;
+ state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
+ p32++;
+
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= b_end - 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = state->v1;
+ uint32_t v2 = state->v2;
+ uint32_t v3 = state->v3;
+ uint32_t v4 = state->v4;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < b_end) {
+ memcpy(state->mem32, p, (size_t)(b_end-p));
+ state->memsize = (uint32_t)(b_end-p);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(xxh32_update);
+
+uint32_t xxh32_digest(const struct xxh32_state *state)
+{
+ const uint8_t *p = (const uint8_t *)state->mem32;
+ const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
+ state->memsize;
+ uint32_t h32;
+
+ if (state->large_len) {
+ h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
+ xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
+EXPORT_SYMBOL(xxh32_digest);
+
+int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+
+ if (input == NULL)
+ return -EINVAL;
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
+ state->memsize += (uint32_t)len;
+ return 0;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ uint64_t *p64 = state->mem64;
+
+ memcpy(((uint8_t *)p64) + state->memsize, input,
+ 32 - state->memsize);
+
+ state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
+ p64++;
+ state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
+ p64++;
+ state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
+ p64++;
+ state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
+
+ p += 32 - state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p + 32 <= b_end) {
+ const uint8_t *const limit = b_end - 32;
+ uint64_t v1 = state->v1;
+ uint64_t v2 = state->v2;
+ uint64_t v3 = state->v3;
+ uint64_t v4 = state->v4;
+
+ do {
+ v1 = xxh64_round(v1, get_unaligned_le64(p));
+ p += 8;
+ v2 = xxh64_round(v2, get_unaligned_le64(p));
+ p += 8;
+ v3 = xxh64_round(v3, get_unaligned_le64(p));
+ p += 8;
+ v4 = xxh64_round(v4, get_unaligned_le64(p));
+ p += 8;
+ } while (p <= limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < b_end) {
+ memcpy(state->mem64, p, (size_t)(b_end-p));
+ state->memsize = (uint32_t)(b_end - p);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(xxh64_update);
+
+uint64_t xxh64_digest(const struct xxh64_state *state)
+{
+ const uint8_t *p = (const uint8_t *)state->mem64;
+ const uint8_t *const b_end = (const uint8_t *)state->mem64 +
+ state->memsize;
+ uint64_t h64;
+
+ if (state->total_len >= 32) {
+ const uint64_t v1 = state->v1;
+ const uint64_t v2 = state->v2;
+ const uint64_t v3 = state->v3;
+ const uint64_t v4 = state->v4;
+
+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+ h64 = xxh64_merge_round(h64, v1);
+ h64 = xxh64_merge_round(h64, v2);
+ h64 = xxh64_merge_round(h64, v3);
+ h64 = xxh64_merge_round(h64, v4);
+ } else {
+ h64 = state->v3 + PRIME64_5;
+ }
+
+ h64 += (uint64_t)state->total_len;
+
+ while (p + 8 <= b_end) {
+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+ h64 ^= k1;
+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+ p += 8;
+ }
+
+ if (p + 4 <= b_end) {
+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
+EXPORT_SYMBOL(xxh64_digest);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("xxHash");
diff --git a/c_src/linux/zstd_compress_module.c b/c_src/linux/zstd_compress_module.c
new file mode 100644
index 00000000..35cc5cba
--- /dev/null
+++ b/c_src/linux/zstd_compress_module.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#define ZSTD_FORWARD_IF_ERR(ret) \
+ do { \
+ size_t const __ret = (ret); \
+ if (ZSTD_isError(__ret)) \
+ return __ret; \
+ } while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+ unsigned long long pledged_src_size)
+{
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+ cctx, ZSTD_reset_session_and_parameters));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+ cctx, pledged_src_size));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+ return 0;
+}
+
+int zstd_min_clevel(void)
+{
+ return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+ return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+ return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+ unsigned long long estimated_src_size)
+{
+ return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+ return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+ ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+ return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+ return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+ unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+ zstd_cstream *cstream;
+
+ if (workspace == NULL)
+ return NULL;
+
+ cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+ if (cstream == NULL)
+ return NULL;
+
+ /* 0 means unknown in linux zstd API but means 0 in new zstd API */
+ if (pledged_src_size == 0)
+ pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+ if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+ return NULL;
+
+ return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+ unsigned long long pledged_src_size)
+{
+ return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+ return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+ return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/c_src/linux/zstd_decompress_module.c b/c_src/linux/zstd_decompress_module.c
new file mode 100644
index 00000000..7e8cd446
--- /dev/null
+++ b/c_src/linux/zstd_decompress_module.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+ return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+ return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+ return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+ return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size)
+{
+ return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+ return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+ size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ (void)max_window_size;
+ return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+ return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+ return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+ size_t src_size)
+{
+ return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/c_src/qcow2.c b/c_src/qcow2.c
new file mode 100644
index 00000000..30a6e056
--- /dev/null
+++ b/c_src/qcow2.c
@@ -0,0 +1,134 @@
+
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "qcow2.h"
+#include "tools-util.h"
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 2
+#define QCOW_OFLAG_COPIED (1LL << 63)
+
+struct qcow2_hdr {
+ u32 magic;
+ u32 version;
+
+ u64 backing_file_offset;
+ u32 backing_file_size;
+
+ u32 block_bits;
+ u64 size;
+ u32 crypt_method;
+
+ u32 l1_size;
+ u64 l1_table_offset;
+
+ u64 refcount_table_offset;
+ u32 refcount_table_blocks;
+
+ u32 nb_snapshots;
+ u64 snapshots_offset;
+};
+
+struct qcow2_image {
+ int fd;
+ u32 block_size;
+ u64 *l1_table;
+ u64 l1_offset;
+ u32 l1_index;
+ u64 *l2_table;
+ u64 offset;
+};
+
+static void flush_l2(struct qcow2_image *img)
+{
+ if (img->l1_index != -1) {
+ img->l1_table[img->l1_index] =
+ cpu_to_be64(img->offset|QCOW_OFLAG_COPIED);
+ xpwrite(img->fd, img->l2_table, img->block_size, img->offset,
+ "qcow2 l2 table");
+ img->offset += img->block_size;
+
+ memset(img->l2_table, 0, img->block_size);
+ img->l1_index = -1;
+ }
+}
+
+static void add_l2(struct qcow2_image *img, u64 src_blk, u64 dst_offset)
+{
+ unsigned l2_size = img->block_size / sizeof(u64);
+ u64 l1_index = src_blk / l2_size;
+ u64 l2_index = src_blk & (l2_size - 1);
+
+ if (img->l1_index != l1_index) {
+ flush_l2(img);
+ img->l1_index = l1_index;
+ }
+
+ img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED);
+}
+
+void qcow2_write_image(int infd, int outfd, ranges *data,
+ unsigned block_size)
+{
+ u64 image_size = get_size(infd);
+ unsigned l2_size = block_size / sizeof(u64);
+ unsigned l1_size = DIV_ROUND_UP(image_size, (u64) block_size * l2_size);
+ struct qcow2_hdr hdr = { 0 };
+ struct qcow2_image img = {
+ .fd = outfd,
+ .block_size = block_size,
+ .l2_table = xcalloc(l2_size, sizeof(u64)),
+ .l1_table = xcalloc(l1_size, sizeof(u64)),
+ .l1_index = -1,
+ .offset = round_up(sizeof(hdr), block_size),
+ };
+ char *buf = xmalloc(block_size);
+ u64 src_offset, dst_offset;
+
+ assert(is_power_of_2(block_size));
+
+ ranges_roundup(data, block_size);
+ ranges_sort_merge(data);
+
+ /* Write data: */
+ darray_for_each(*data, r)
+ for (src_offset = r->start;
+ src_offset < r->end;
+ src_offset += block_size) {
+ dst_offset = img.offset;
+ img.offset += img.block_size;
+
+ xpread(infd, buf, block_size, src_offset);
+ xpwrite(outfd, buf, block_size, dst_offset,
+ "qcow2 data");
+
+ add_l2(&img, src_offset / block_size, dst_offset);
+ }
+
+ flush_l2(&img);
+
+ /* Write L1 table: */
+ dst_offset = img.offset;
+ img.offset += round_up(l1_size * sizeof(u64), block_size);
+ xpwrite(img.fd, img.l1_table, l1_size * sizeof(u64), dst_offset,
+ "qcow2 l1 table");
+
+ /* Write header: */
+ hdr.magic = cpu_to_be32(QCOW_MAGIC);
+ hdr.version = cpu_to_be32(QCOW_VERSION);
+ hdr.block_bits = cpu_to_be32(ilog2(block_size));
+ hdr.size = cpu_to_be64(image_size);
+ hdr.l1_size = cpu_to_be32(l1_size);
+ hdr.l1_table_offset = cpu_to_be64(dst_offset);
+
+ memset(buf, 0, block_size);
+ memcpy(buf, &hdr, sizeof(hdr));
+ xpwrite(img.fd, buf, block_size, 0,
+ "qcow2 header");
+
+ free(img.l2_table);
+ free(img.l1_table);
+ free(buf);
+}
diff --git a/c_src/qcow2.h b/c_src/qcow2.h
new file mode 100644
index 00000000..0943d55c
--- /dev/null
+++ b/c_src/qcow2.h
@@ -0,0 +1,9 @@
+#ifndef _QCOW2_H
+#define _QCOW2_H
+
+#include <linux/types.h>
+#include "tools-util.h"
+
+void qcow2_write_image(int, int, ranges *, unsigned);
+
+#endif /* _QCOW2_H */
diff --git a/c_src/raid/COPYING b/c_src/raid/COPYING
new file mode 100644
index 00000000..a43ea212
--- /dev/null
+++ b/c_src/raid/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/c_src/raid/check.c b/c_src/raid/check.c
new file mode 100644
index 00000000..9bed9337
--- /dev/null
+++ b/c_src/raid/check.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2015 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "combo.h"
+#include "gf.h"
+
+/**
+ * Validate the provided failed blocks.
+ *
+ * This function checks if the specified failed blocks satisfy the redundancy
+ * information using the data from the known valid parity blocks.
+ *
+ * It's similar at raid_check(), just with a different format for arguments.
+ *
+ * The number of failed blocks @nr must be strictly less than the number of
+ * parities @nv, because you need one more parity to validate the recovering.
+ *
+ * No data or parity blocks are modified.
+ *
+ * @nr Number of failed data blocks.
+ * @id[] Vector of @nr indexes of the failed data blocks.
+ * The indexes start from 0. They must be in order.
+ * @nv Number of valid parity blocks.
+ * @ip[] Vector of @nv indexes of the valid parity blocks.
+ * The indexes start from 0. They must be in order.
+ * @nd Number of data blocks.
+ * @size Size of the blocks pointed by @v. It must be a multipler of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @ip[@nv - 1] + 1) elements. The starting elements are the
+ * blocks for data, following with the parity blocks.
+ * Each block has @size bytes.
+ * @return 0 if the check is satisfied. -1 otherwise.
+ */
+static int raid_validate(int nr, int *id, int nv, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ const uint8_t *T[RAID_PARITY_MAX][RAID_PARITY_MAX];
+ uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ size_t i;
+ int j, k, l;
+
+ BUG_ON(nr >= nv);
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < nr; ++j)
+ for (k = 0; k < nr; ++k)
+ G[j * nr + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, nr);
+
+ /* get multiplication tables */
+ for (j = 0; j < nr; ++j)
+ for (k = 0; k < nr; ++k)
+ T[j][k] = table(V[j * nr + k]);
+
+ /* check all positions */
+ for (i = 0; i < size; ++i) {
+ uint8_t p[RAID_PARITY_MAX];
+
+ /* get parity */
+ for (j = 0; j < nv; ++j)
+ p[j] = v[nd + ip[j]][i];
+
+ /* compute delta parity, skipping broken disks */
+ for (j = 0, k = 0; j < nd; ++j) {
+ uint8_t b;
+
+ /* skip broken disks */
+ if (k < nr && id[k] == j) {
+ ++k;
+ continue;
+ }
+
+ b = v[j][i];
+ for (l = 0; l < nv; ++l)
+ p[l] ^= gfmul[b][gfgen[ip[l]][j]];
+ }
+
+ /* reconstruct data */
+ for (j = 0; j < nr; ++j) {
+ uint8_t b = 0;
+ int idj = id[j];
+
+ /* recompute the data */
+ for (k = 0; k < nr; ++k)
+ b ^= T[j][k][p[k]];
+
+ /* add the parity contribution of the reconstructed data */
+ for (l = nr; l < nv; ++l)
+ p[l] ^= gfmul[b][gfgen[ip[l]][idj]];
+ }
+
+ /* check that the final parity is 0 */
+ for (l = nr; l < nv; ++l)
+ if (p[l] != 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+int raid_check(int nr, int *ir, int nd, int np, size_t size, void **v)
+{
+ /* valid parity index */
+ int ip[RAID_PARITY_MAX];
+ int vp;
+ int rd;
+ int i, j;
+
+ /* enforce limit on size */
+ BUG_ON(size % 64 != 0);
+
+ /* enforce limit on number of failures */
+ BUG_ON(nr >= np); /* >= because we check with extra parity */
+ BUG_ON(np > RAID_PARITY_MAX);
+
+ /* enforce order in index vector */
+ BUG_ON(nr >= 2 && ir[0] >= ir[1]);
+ BUG_ON(nr >= 3 && ir[1] >= ir[2]);
+ BUG_ON(nr >= 4 && ir[2] >= ir[3]);
+ BUG_ON(nr >= 5 && ir[3] >= ir[4]);
+ BUG_ON(nr >= 6 && ir[4] >= ir[5]);
+
+ /* enforce limit on index vector */
+ BUG_ON(nr > 0 && ir[nr-1] >= nd + np);
+
+ /* count failed data disk */
+ rd = 0;
+ while (rd < nr && ir[rd] < nd)
+ ++rd;
+
+ /* put valid parities into ip[] */
+ vp = 0;
+ for (i = rd, j = 0; j < np; ++j) {
+ /* if parity is failed */
+ if (i < nr && ir[i] == nd + j) {
+ /* skip broken parity */
+ ++i;
+ } else {
+ /* store valid parity */
+ ip[vp] = j;
+ ++vp;
+ }
+ }
+
+ return raid_validate(rd, ir, vp, ip, nd, size, v);
+}
+
+int raid_scan(int *ir, int nd, int np, size_t size, void **v)
+{
+ int r;
+
+ /* check the special case of no failure */
+ if (np != 0 && raid_check(0, 0, nd, np, size, v) == 0)
+ return 0;
+
+ /* for each number of possible failures */
+ for (r = 1; r < np; ++r) {
+ /* try all combinations of r failures on n disks */
+ combination_first(r, nd + np, ir);
+ do {
+ /* verify if the combination is a valid one */
+ if (raid_check(r, ir, nd, np, size, v) == 0)
+ return r;
+ } while (combination_next(r, nd + np, ir));
+ }
+
+ /* no solution found */
+ return -1;
+}
+
diff --git a/c_src/raid/combo.h b/c_src/raid/combo.h
new file mode 100644
index 00000000..8efc31ad
--- /dev/null
+++ b/c_src/raid/combo.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_COMBO_H
+#define __RAID_COMBO_H
+
+#include <assert.h>
+
+/**
+ * Get the first permutation with repetition of r of n elements.
+ *
+ * Typical use is with permutation_next() in the form :
+ *
+ * int i[R];
+ * permutation_first(R, N, i);
+ * do {
+ * code using i[0], i[1], ..., i[R-1]
+ * } while (permutation_next(R, N, i));
+ *
+ * It's equivalent at the code :
+ *
+ * for(i[0]=0;i[0]<N;++i[0])
+ * for(i[1]=0;i[1]<N;++i[1])
+ * ...
+ * for(i[R-2]=0;i[R-2]<N;++i[R-2])
+ * for(i[R-1]=0;i[R-1]<N;++i[R-1])
+ * code using i[0], i[1], ..., i[R-1]
+ */
+static __always_inline void permutation_first(int r, int n, int *c)
+{
+ int i;
+
+ (void)n; /* unused, but kept for clarity */
+ assert(0 < r && r <= n);
+
+ for (i = 0; i < r; ++i)
+ c[i] = 0;
+}
+
+/**
+ * Get the next permutation with repetition of r of n elements.
+ * Return ==0 when finished.
+ */
+static __always_inline int permutation_next(int r, int n, int *c)
+{
+ int i = r - 1; /* present position */
+
+recurse:
+ /* next element at position i */
+ ++c[i];
+
+ /* if the position has reached the max */
+ if (c[i] >= n) {
+
+ /* if we are at the first level, we have finished */
+ if (i == 0)
+ return 0;
+
+ /* increase the previous position */
+ --i;
+ goto recurse;
+ }
+
+ ++i;
+
+ /* initialize all the next positions, if any */
+ while (i < r) {
+ c[i] = 0;
+ ++i;
+ }
+
+ return 1;
+}
+
+/**
+ * Get the first combination without repetition of r of n elements.
+ *
+ * Typical use is with combination_next() in the form :
+ *
+ * int i[R];
+ * combination_first(R, N, i);
+ * do {
+ * code using i[0], i[1], ..., i[R-1]
+ * } while (combination_next(R, N, i));
+ *
+ * It's equivalent at the code :
+ *
+ * for(i[0]=0;i[0]<N-(R-1);++i[0])
+ * for(i[1]=i[0]+1;i[1]<N-(R-2);++i[1])
+ * ...
+ * for(i[R-2]=i[R-3]+1;i[R-2]<N-1;++i[R-2])
+ * for(i[R-1]=i[R-2]+1;i[R-1]<N;++i[R-1])
+ * code using i[0], i[1], ..., i[R-1]
+ */
+static __always_inline void combination_first(int r, int n, int *c)
+{
+ int i;
+
+ (void)n; /* unused, but kept for clarity */
+ assert(0 < r && r <= n);
+
+ for (i = 0; i < r; ++i)
+ c[i] = i;
+}
+
+/**
+ * Get the next combination without repetition of r of n elements.
+ * Return ==0 when finished.
+ */
+static __always_inline int combination_next(int r, int n, int *c)
+{
+ int i = r - 1; /* present position */
+ int h = n; /* high limit for this position */
+
+recurse:
+ /* next element at position i */
+ ++c[i];
+
+ /* if the position has reached the max */
+ if (c[i] >= h) {
+
+ /* if we are at the first level, we have finished */
+ if (i == 0)
+ return 0;
+
+ /* increase the previous position */
+ --i;
+ --h;
+ goto recurse;
+ }
+
+ ++i;
+
+ /* initialize all the next positions, if any */
+ while (i < r) {
+ /* each position start at the next value of the previous one */
+ c[i] = c[i - 1] + 1;
+ ++i;
+ }
+
+ return 1;
+}
+#endif
+
diff --git a/c_src/raid/cpu.h b/c_src/raid/cpu.h
new file mode 100644
index 00000000..ed909bb7
--- /dev/null
+++ b/c_src/raid/cpu.h
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_CPU_H
+#define __RAID_CPU_H
+
+#ifdef CONFIG_X86
+
+static inline void raid_cpuid(uint32_t func_eax, uint32_t sub_ecx, uint32_t *reg)
+{
+ asm volatile (
+#if defined(__i386__) && defined(__PIC__)
+ /* allow compilation in PIC mode saving ebx */
+ "xchgl %%ebx, %1\n"
+ "cpuid\n"
+ "xchgl %%ebx, %1\n"
+ : "=a" (reg[0]), "=r" (reg[1]), "=c" (reg[2]), "=d" (reg[3])
+ : "0" (func_eax), "2" (sub_ecx)
+#else
+ "cpuid\n"
+ : "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3])
+ : "0" (func_eax), "2" (sub_ecx)
+#endif
+ );
+}
+
+static inline void raid_xgetbv(uint32_t* reg)
+{
+ /* get the value of the Extended Control Register ecx=0 */
+ asm volatile (
+ /* uses a direct encoding of the XGETBV instruction as only recent */
+ /* assemblers support it. */
+ /* the next line is equivalent at: "xgetbv\n" */
+ ".byte 0x0f, 0x01, 0xd0\n"
+ : "=a" (reg[0]), "=d" (reg[3])
+ : "c" (0)
+ );
+}
+
+#define CPU_VENDOR_MAX 13
+
+static inline void raid_cpu_info(char *vendor, unsigned *family, unsigned *model)
+{
+ uint32_t reg[4];
+ unsigned f, ef, m, em;
+
+ raid_cpuid(0, 0, reg);
+
+ ((uint32_t*)vendor)[0] = reg[1];
+ ((uint32_t*)vendor)[1] = reg[3];
+ ((uint32_t*)vendor)[2] = reg[2];
+ vendor[12] = 0;
+
+ raid_cpuid(1, 0, reg);
+
+ f = (reg[0] >> 8) & 0xF;
+ ef = (reg[0] >> 20) & 0xFF;
+ m = (reg[0] >> 4) & 0xF;
+ em = (reg[0] >> 16) & 0xF;
+
+ if (strcmp(vendor, "AuthenticAMD") == 0) {
+ if (f < 15) {
+ *family = f;
+ *model = m;
+ } else {
+ *family = f + ef;
+ *model = m + (em << 4);
+ }
+ } else {
+ *family = f + ef;
+ *model = m + (em << 4);
+ }
+}
+
+static inline int raid_cpu_match_sse(uint32_t cpuid_1_ecx, uint32_t cpuid_1_edx)
+{
+ uint32_t reg[4];
+
+ raid_cpuid(1, 0, reg);
+ if ((reg[2] & cpuid_1_ecx) != cpuid_1_ecx)
+ return 0;
+ if ((reg[3] & cpuid_1_edx) != cpuid_1_edx)
+ return 0;
+
+ return 1;
+}
+
+static inline int raid_cpu_match_avx(uint32_t cpuid_1_ecx, uint32_t cpuid_7_ebx, uint32_t xcr0)
+{
+ uint32_t reg[4];
+
+ raid_cpuid(1, 0, reg);
+ if ((reg[2] & cpuid_1_ecx) != cpuid_1_ecx)
+ return 0;
+
+ raid_xgetbv(reg);
+ if ((reg[0] & xcr0) != xcr0)
+ return 0;
+
+ raid_cpuid(7, 0, reg);
+ if ((reg[1] & cpuid_7_ebx) != cpuid_7_ebx)
+ return 0;
+
+ return 1;
+}
+
+static inline int raid_cpu_has_sse2(void)
+{
+ /*
+ * Intel® 64 and IA-32 Architectures Software Developer's Manual
+ * 325462-048US September 2013
+ *
+ * 11.6.2 Checking for SSE/SSE2 Support
+ * Before an application attempts to use the SSE and/or SSE2 extensions, it should check
+ * that they are present on the processor:
+ * 1. Check that the processor supports the CPUID instruction. Bit 21 of the EFLAGS
+ * register can be used to check processor's support the CPUID instruction.
+ * 2. Check that the processor supports the SSE and/or SSE2 extensions (true if
+ * CPUID.01H:EDX.SSE[bit 25] = 1 and/or CPUID.01H:EDX.SSE2[bit 26] = 1).
+ */
+ return raid_cpu_match_sse(
+ 0,
+ 1 << 26); /* SSE2 */
+}
+
+static inline int raid_cpu_has_ssse3(void)
+{
+ /*
+ * Intel® 64 and IA-32 Architectures Software Developer's Manual
+ * 325462-048US September 2013
+ *
+ * 12.7.2 Checking for SSSE3 Support
+ * Before an application attempts to use the SSSE3 extensions, the application should
+ * follow the steps illustrated in Section 11.6.2, "Checking for SSE/SSE2 Support."
+ * Next, use the additional step provided below:
+ * Check that the processor supports SSSE3 (if CPUID.01H:ECX.SSSE3[bit 9] = 1).
+ */
+ return raid_cpu_match_sse(
+ 1 << 9, /* SSSE3 */
+ 1 << 26); /* SSE2 */
+}
+
+static inline int raid_cpu_has_crc32(void)
+{
+ /*
+ * Intel® 64 and IA-32 Architectures Software Developer's Manual
+ * 325462-048US September 2013
+ *
+ * 12.12.3 Checking for SSE4.2 Support
+ * ...
+ * Before an application attempts to use the CRC32 instruction, it must check
+ * that the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1).
+ */
+ return raid_cpu_match_sse(
+ 1 << 20, /* CRC32 */
+ 0);
+}
+
+static inline int raid_cpu_has_avx2(void)
+{
+ /*
+ * Intel Architecture Instruction Set Extensions Programming Reference
+ * 319433-022 October 2014
+ *
+ * 14.3 Detection of AVX instructions
+ * 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use1)
+ * 2) Issue XGETBV and verify that XCR0[2:1] = `11b' (XMM state and YMM state are enabled by OS).
+ * 3) detect CPUID.1:ECX.AVX[bit 28] = 1 (AVX instructions supported).
+ * (Step 3 can be done in any order relative to 1 and 2)
+ *
+ * 14.7.1 Detection of AVX2
+ * Hardware support for AVX2 is indicated by CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]=1.
+ * Application Software must identify that hardware supports AVX, after that it must
+ * also detect support for AVX2 by checking CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5].
+ */
+ return raid_cpu_match_avx(
+ (1 << 27) | (1 << 28), /* OSXSAVE and AVX */
+ 1 << 5, /* AVX2 */
+ 3 << 1); /* OS saves XMM and YMM registers */
+}
+
+static inline int raid_cpu_has_avx512bw(void)
+{
+ /*
+ * Intel Architecture Instruction Set Extensions Programming Reference
+ * 319433-022 October 2014
+ *
+ * 2.2 Detection of 512-bit Instruction Groups of Intel AVX-512 Family
+ * 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use)
+ * 2) Execute XGETBV and verify that XCR0[7:5] = `111b' (OPMASK state, upper 256-bit of
+ * ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled by OS) and that XCR0[2:1] = `11b'
+ * (XMM state and YMM state are enabled by OS).
+ * 3) Verify both CPUID.0x7.0:EBX.AVX512F[bit 16] = 1, CPUID.0x7.0:EBX.AVX512BW[bit 30] = 1.
+ */
+
+ /* note that intentionally we don't check for AVX and AVX2 */
+ /* because the documentation doesn't require that */
+ return raid_cpu_match_avx(
+ 1 << 27, /* XSAVE/XGETBV */
+ (1 << 16) | (1 << 30), /* AVX512F and AVX512BW */
+ (3 << 1) | (7 << 5)); /* OS saves XMM, YMM and ZMM registers */
+}
+
+/**
+ * Check if it's an Intel Atom CPU.
+ */
+static inline int raid_cpu_is_atom(unsigned family, unsigned model)
+{
+ if (family != 6)
+ return 0;
+
+ /*
+ * x86 Architecture CPUID
+ * http://www.sandpile.org/x86/cpuid.htm
+ *
+ * Intel Atom
+ * 1C (28) Atom (45 nm) with 512 KB on-die L2
+ * 26 (38) Atom (45 nm) with 512 KB on-die L2
+ * 36 (54) Atom (32 nm) with 512 KB on-die L2
+ * 27 (39) Atom (32 nm) with 512 KB on-die L2
+ * 35 (53) Atom (?? nm) with ??? KB on-die L2
+ * 4A (74) Atom 2C (22 nm) 1 MB L2 + PowerVR (TGR)
+ * 5A (90) Atom 4C (22 nm) 2 MB L2 + PowerVR (ANN)
+ * 37 (55) Atom 4C (22 nm) 2 MB L2 + Intel Gen7 (BYT)
+ * 4C (76) Atom 4C (14 nm) 2 MB L2 + Intel Gen8 (BSW)
+ * 5D (93) Atom 4C (28 nm TSMC) 1 MB L2 + Mali (SoFIA)
+ * 4D (77) Atom 8C (22 nm) 4 MB L2 (AVN)
+ * ?? Atom ?C (14 nm) ? MB L2 (DVN)
+ */
+ return model == 28 || model == 38 || model == 54
+ || model == 39 || model == 53 || model == 74
+ || model == 90 || model == 55 || model == 76
+ || model == 93 || model == 77;
+}
+
+/**
+ * Check if the processor has a slow MULT implementation.
+ * If yes, it's better to use a hash not based on multiplication.
+ */
+static inline int raid_cpu_has_slowmult(void)
+{
+ char vendor[CPU_VENDOR_MAX];
+ unsigned family;
+ unsigned model;
+
+ /*
+ * In some cases Murmur3 based on MUL instruction,
+ * is a LOT slower than Spooky2 based on SHIFTs.
+ */
+ raid_cpu_info(vendor, &family, &model);
+
+ if (strcmp(vendor, "GenuineIntel") == 0) {
+ /*
+ * Intel Atom (Model 28)
+ * murmur3:378 MB/s, spooky2:3413 MB/s (x86)
+ *
+ * Intel Atom (Model 77)
+ * murmur3:1311 MB/s, spooky2:4056 MB/s (x64)
+ */
+ if (raid_cpu_is_atom(family, model))
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * Check if the processor has a slow extended set of SSE registers.
+ * If yes, it's better to limit the unroll to the firsrt 8 registers.
+ */
+static inline int raid_cpu_has_slowextendedreg(void)
+{
+ char vendor[CPU_VENDOR_MAX];
+ unsigned family;
+ unsigned model;
+
+ /*
+ * In some cases the PAR2 implementation using 16 SSE registers
+ * is a LITTLE slower than the one using only the first 8 registers.
+ * This doesn't happen for PARZ.
+ */
+ raid_cpu_info(vendor, &family, &model);
+
+ if (strcmp(vendor, "AuthenticAMD") == 0) {
+ /*
+ * AMD Bulldozer
+ * par2_sse2:4922 MB/s, par2_sse2e:4465 MB/s
+ */
+ if (family == 21)
+ return 1;
+ }
+
+ if (strcmp(vendor, "GenuineIntel") == 0) {
+ /*
+ * Intel Atom (Model 77)
+ * par2_sse2:5686 MB/s, par2_sse2e:5250 MB/s
+ * parz_sse2:3100 MB/s, parz_sse2e:3400 MB/s
+ * par3_sse3:1921 MB/s, par3_sse3e:1813 MB/s
+ * par4_sse3:1175 MB/s, par4_sse3e:1113 MB/s
+ * par5_sse3:876 MB/s, par5_sse3e:675 MB/s
+ * par6_sse3:705 MB/s, par6_sse3e:529 MB/s
+ *
+ * Intel Atom (Model 77) "Avoton C2750"
+ * par2_sse2:5661 MB/s, par2_sse2e:5382 MB/s
+ * parz_sse2:3110 MB/s, parz_sse2e:3450 MB/s
+ * par3_sse3:1769 MB/s, par3_sse3e:1856 MB/s
+ * par4_sse3:1221 MB/s, par4_sse3e:1141 MB/s
+ * par5_sse3:910 MB/s, par5_sse3e:675 MB/s
+ * par6_sse3:720 MB/s, par6_sse3e:534 MB/s
+ */
+ if (raid_cpu_is_atom(family, model))
+ return 1;
+ }
+
+ return 0;
+}
+#endif
+
+#endif
+
diff --git a/c_src/raid/gf.h b/c_src/raid/gf.h
new file mode 100644
index 00000000..1702c287
--- /dev/null
+++ b/c_src/raid/gf.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_GF_H
+#define __RAID_GF_H
+
+/*
+ * Galois field operations.
+ *
+ * Basic range checks are implemented using BUG_ON().
+ */
+
+/*
+ * GF a*b.
+ */
+static __always_inline uint8_t mul(uint8_t a, uint8_t b)
+{
+ return gfmul[a][b];
+}
+
+/*
+ * GF 1/a.
+ * Not defined for a == 0.
+ */
+static __always_inline uint8_t inv(uint8_t v)
+{
+ BUG_ON(v == 0); /* division by zero */
+
+ return gfinv[v];
+}
+
+/*
+ * GF 2^a.
+ */
+static __always_inline uint8_t pow2(int v)
+{
+ BUG_ON(v < 0 || v > 254); /* invalid exponent */
+
+ return gfexp[v];
+}
+
+/*
+ * Gets the multiplication table for a specified value.
+ */
+static __always_inline const uint8_t *table(uint8_t v)
+{
+ return gfmul[v];
+}
+
+/*
+ * Gets the generator matrix coefficient for parity 'p' and disk 'd'.
+ */
+static __always_inline uint8_t A(int p, int d)
+{
+ return gfgen[p][d];
+}
+
+/*
+ * Dereference as uint8_t
+ */
+#define v_8(p) (*(uint8_t *)&(p))
+
+/*
+ * Dereference as uint32_t
+ */
+#define v_32(p) (*(uint32_t *)&(p))
+
+/*
+ * Dereference as uint64_t
+ */
+#define v_64(p) (*(uint64_t *)&(p))
+
+/*
+ * Multiply each byte of a uint32 by 2 in the GF(2^8).
+ */
+static __always_inline uint32_t x2_32(uint32_t v)
+{
+ uint32_t mask = v & 0x80808080U;
+
+ mask = (mask << 1) - (mask >> 7);
+ v = (v << 1) & 0xfefefefeU;
+ v ^= mask & 0x1d1d1d1dU;
+ return v;
+}
+
+/*
+ * Multiply each byte of a uint64 by 2 in the GF(2^8).
+ */
+static __always_inline uint64_t x2_64(uint64_t v)
+{
+ uint64_t mask = v & 0x8080808080808080ULL;
+
+ mask = (mask << 1) - (mask >> 7);
+ v = (v << 1) & 0xfefefefefefefefeULL;
+ v ^= mask & 0x1d1d1d1d1d1d1d1dULL;
+ return v;
+}
+
+/*
+ * Divide each byte of a uint32 by 2 in the GF(2^8).
+ */
+static __always_inline uint32_t d2_32(uint32_t v)
+{
+ uint32_t mask = v & 0x01010101U;
+
+ mask = (mask << 8) - mask;
+ v = (v >> 1) & 0x7f7f7f7fU;
+ v ^= mask & 0x8e8e8e8eU;
+ return v;
+}
+
+/*
+ * Divide each byte of a uint64 by 2 in the GF(2^8).
+ */
+static __always_inline uint64_t d2_64(uint64_t v)
+{
+ uint64_t mask = v & 0x0101010101010101ULL;
+
+ mask = (mask << 8) - mask;
+ v = (v >> 1) & 0x7f7f7f7f7f7f7f7fULL;
+ v ^= mask & 0x8e8e8e8e8e8e8e8eULL;
+ return v;
+}
+
+#endif
+
diff --git a/c_src/raid/helper.c b/c_src/raid/helper.c
new file mode 100644
index 00000000..f66093fa
--- /dev/null
+++ b/c_src/raid/helper.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+#define RAID_SWAP(a, b) \
+ do { \
+ if (v[a] > v[b]) { \
+ int t = v[a]; \
+ v[a] = v[b]; \
+ v[b] = t; \
+ } \
+ } while (0)
+
+void raid_sort(int n, int *v)
+{
+ /* sorting networks generated with Batcher's Merge-Exchange */
+ switch (n) {
+ case 2:
+ RAID_SWAP(0, 1);
+ break;
+ case 3:
+ RAID_SWAP(0, 2);
+ RAID_SWAP(0, 1);
+ RAID_SWAP(1, 2);
+ break;
+ case 4:
+ RAID_SWAP(0, 2);
+ RAID_SWAP(1, 3);
+ RAID_SWAP(0, 1);
+ RAID_SWAP(2, 3);
+ RAID_SWAP(1, 2);
+ break;
+ case 5:
+ RAID_SWAP(0, 4);
+ RAID_SWAP(0, 2);
+ RAID_SWAP(1, 3);
+ RAID_SWAP(2, 4);
+ RAID_SWAP(0, 1);
+ RAID_SWAP(2, 3);
+ RAID_SWAP(1, 4);
+ RAID_SWAP(1, 2);
+ RAID_SWAP(3, 4);
+ break;
+ case 6:
+ RAID_SWAP(0, 4);
+ RAID_SWAP(1, 5);
+ RAID_SWAP(0, 2);
+ RAID_SWAP(1, 3);
+ RAID_SWAP(2, 4);
+ RAID_SWAP(3, 5);
+ RAID_SWAP(0, 1);
+ RAID_SWAP(2, 3);
+ RAID_SWAP(4, 5);
+ RAID_SWAP(1, 4);
+ RAID_SWAP(1, 2);
+ RAID_SWAP(3, 4);
+ break;
+ }
+}
+
+void raid_insert(int n, int *v, int i)
+{
+ /* we don't use binary search because this is intended */
+ /* for very small vectors and we want to optimize the case */
+ /* of elements inserted already in order */
+
+ /* insert at the end */
+ v[n] = i;
+
+ /* swap until in the correct position */
+ while (n > 0 && v[n - 1] > v[n]) {
+ /* swap */
+ int t = v[n - 1];
+
+ v[n - 1] = v[n];
+ v[n] = t;
+
+ /* previous position */
+ --n;
+ }
+}
+
diff --git a/c_src/raid/helper.h b/c_src/raid/helper.h
new file mode 100644
index 00000000..bf682882
--- /dev/null
+++ b/c_src/raid/helper.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_HELPER_H
+#define __RAID_HELPER_H
+
+/**
+ * Inserts an integer in a sorted vector.
+ *
+ * This function can be used to insert indexes in order, ready to be used for
+ * calling raid_rec().
+ *
+ * @n Number of integers currently in the vector.
+ * @v Vector of integers already sorted.
+ * It must have extra space for the new elemet at the end.
+ * @i Value to insert.
+ */
+void raid_insert(int n, int *v, int i);
+
+/**
+ * Sorts a small vector of integers.
+ *
+ * If you have indexes not in order, you can use this function to sort them
+ * before calling raid_rec().
+ *
+ * @n Number of integers. No more than RAID_PARITY_MAX.
+ * @v Vector of integers.
+ */
+void raid_sort(int n, int *v);
+
+#endif
+
diff --git a/c_src/raid/int.c b/c_src/raid/int.c
new file mode 100644
index 00000000..e16332a5
--- /dev/null
+++ b/c_src/raid/int.c
@@ -0,0 +1,556 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * GEN1 (RAID5 with xor) 32bit C implementation
+ */
+void raid_gen1_int32(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ int d, l;
+ size_t i;
+
+ uint32_t p0;
+ uint32_t p1;
+
+ l = nd - 1;
+ p = v[nd];
+
+ for (i = 0; i < size; i += 8) {
+ p0 = v_32(v[l][i]);
+ p1 = v_32(v[l][i + 4]);
+ for (d = l - 1; d >= 0; --d) {
+ p0 ^= v_32(v[d][i]);
+ p1 ^= v_32(v[d][i + 4]);
+ }
+ v_32(p[i]) = p0;
+ v_32(p[i + 4]) = p1;
+ }
+}
+
+/*
+ * GEN1 (RAID5 with xor) 64bit C implementation
+ */
+void raid_gen1_int64(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ int d, l;
+ size_t i;
+
+ uint64_t p0;
+ uint64_t p1;
+
+ l = nd - 1;
+ p = v[nd];
+
+ for (i = 0; i < size; i += 16) {
+ p0 = v_64(v[l][i]);
+ p1 = v_64(v[l][i + 8]);
+ for (d = l - 1; d >= 0; --d) {
+ p0 ^= v_64(v[d][i]);
+ p1 ^= v_64(v[d][i + 8]);
+ }
+ v_64(p[i]) = p0;
+ v_64(p[i + 8]) = p1;
+ }
+}
+
+/*
+ * GEN2 (RAID6 with powers of 2) 32bit C implementation
+ */
+void raid_gen2_int32(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ int d, l;
+ size_t i;
+
+ uint32_t d0, q0, p0;
+ uint32_t d1, q1, p1;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+
+ for (i = 0; i < size; i += 8) {
+ q0 = p0 = v_32(v[l][i]);
+ q1 = p1 = v_32(v[l][i + 4]);
+ for (d = l - 1; d >= 0; --d) {
+ d0 = v_32(v[d][i]);
+ d1 = v_32(v[d][i + 4]);
+
+ p0 ^= d0;
+ p1 ^= d1;
+
+ q0 = x2_32(q0);
+ q1 = x2_32(q1);
+
+ q0 ^= d0;
+ q1 ^= d1;
+ }
+ v_32(p[i]) = p0;
+ v_32(p[i + 4]) = p1;
+ v_32(q[i]) = q0;
+ v_32(q[i + 4]) = q1;
+ }
+}
+
+/*
+ * GEN2 (RAID6 with powers of 2) 64bit C implementation
+ */
+void raid_gen2_int64(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ int d, l;
+ size_t i;
+
+ uint64_t d0, q0, p0;
+ uint64_t d1, q1, p1;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+
+ for (i = 0; i < size; i += 16) {
+ q0 = p0 = v_64(v[l][i]);
+ q1 = p1 = v_64(v[l][i + 8]);
+ for (d = l - 1; d >= 0; --d) {
+ d0 = v_64(v[d][i]);
+ d1 = v_64(v[d][i + 8]);
+
+ p0 ^= d0;
+ p1 ^= d1;
+
+ q0 = x2_64(q0);
+ q1 = x2_64(q1);
+
+ q0 ^= d0;
+ q1 ^= d1;
+ }
+ v_64(p[i]) = p0;
+ v_64(p[i + 8]) = p1;
+ v_64(q[i]) = q0;
+ v_64(q[i + 8]) = q1;
+ }
+}
+
+/*
+ * GEN3 (triple parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen3_int8(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ uint8_t d0, r0, q0, p0;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ for (i = 0; i < size; i += 1) {
+ p0 = q0 = r0 = 0;
+ for (d = l; d > 0; --d) {
+ d0 = v_8(v[d][i]);
+
+ p0 ^= d0;
+ q0 ^= gfmul[d0][gfgen[1][d]];
+ r0 ^= gfmul[d0][gfgen[2][d]];
+ }
+
+ /* first disk with all coefficients at 1 */
+ d0 = v_8(v[0][i]);
+
+ p0 ^= d0;
+ q0 ^= d0;
+ r0 ^= d0;
+
+ v_8(p[i]) = p0;
+ v_8(q[i]) = q0;
+ v_8(r[i]) = r0;
+ }
+}
+
+/*
+ * GEN4 (quad parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen4_int8(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ int d, l;
+ size_t i;
+
+ uint8_t d0, s0, r0, q0, p0;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+
+ for (i = 0; i < size; i += 1) {
+ p0 = q0 = r0 = s0 = 0;
+ for (d = l; d > 0; --d) {
+ d0 = v_8(v[d][i]);
+
+ p0 ^= d0;
+ q0 ^= gfmul[d0][gfgen[1][d]];
+ r0 ^= gfmul[d0][gfgen[2][d]];
+ s0 ^= gfmul[d0][gfgen[3][d]];
+ }
+
+ /* first disk with all coefficients at 1 */
+ d0 = v_8(v[0][i]);
+
+ p0 ^= d0;
+ q0 ^= d0;
+ r0 ^= d0;
+ s0 ^= d0;
+
+ v_8(p[i]) = p0;
+ v_8(q[i]) = q0;
+ v_8(r[i]) = r0;
+ v_8(s[i]) = s0;
+ }
+}
+
+/*
+ * GEN5 (penta parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen5_int8(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ int d, l;
+ size_t i;
+
+ uint8_t d0, t0, s0, r0, q0, p0;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+
+ for (i = 0; i < size; i += 1) {
+ p0 = q0 = r0 = s0 = t0 = 0;
+ for (d = l; d > 0; --d) {
+ d0 = v_8(v[d][i]);
+
+ p0 ^= d0;
+ q0 ^= gfmul[d0][gfgen[1][d]];
+ r0 ^= gfmul[d0][gfgen[2][d]];
+ s0 ^= gfmul[d0][gfgen[3][d]];
+ t0 ^= gfmul[d0][gfgen[4][d]];
+ }
+
+ /* first disk with all coefficients at 1 */
+ d0 = v_8(v[0][i]);
+
+ p0 ^= d0;
+ q0 ^= d0;
+ r0 ^= d0;
+ s0 ^= d0;
+ t0 ^= d0;
+
+ v_8(p[i]) = p0;
+ v_8(q[i]) = q0;
+ v_8(r[i]) = r0;
+ v_8(s[i]) = s0;
+ v_8(t[i]) = t0;
+ }
+}
+
+/*
+ * GEN6 (hexa parity with Cauchy matrix) 8bit C implementation
+ *
+ * Note that instead of a generic multiplication table, likely resulting
+ * in multiple cache misses, a precomputed table could be used.
+ * But this is only a kind of reference function, and we are not really
+ * interested in speed.
+ */
+void raid_gen6_int8(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ uint8_t *u;
+ int d, l;
+ size_t i;
+
+ uint8_t d0, u0, t0, s0, r0, q0, p0;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+ u = v[nd + 5];
+
+ for (i = 0; i < size; i += 1) {
+ p0 = q0 = r0 = s0 = t0 = u0 = 0;
+ for (d = l; d > 0; --d) {
+ d0 = v_8(v[d][i]);
+
+ p0 ^= d0;
+ q0 ^= gfmul[d0][gfgen[1][d]];
+ r0 ^= gfmul[d0][gfgen[2][d]];
+ s0 ^= gfmul[d0][gfgen[3][d]];
+ t0 ^= gfmul[d0][gfgen[4][d]];
+ u0 ^= gfmul[d0][gfgen[5][d]];
+ }
+
+ /* first disk with all coefficients at 1 */
+ d0 = v_8(v[0][i]);
+
+ p0 ^= d0;
+ q0 ^= d0;
+ r0 ^= d0;
+ s0 ^= d0;
+ t0 ^= d0;
+ u0 ^= d0;
+
+ v_8(p[i]) = p0;
+ v_8(q[i]) = q0;
+ v_8(r[i]) = r0;
+ v_8(s[i]) = s0;
+ v_8(t[i]) = t0;
+ v_8(u[i]) = u0;
+ }
+}
+
+/*
+ * Recover failure of one data block at index id[0] using parity at index
+ * ip[0] for any RAID level.
+ *
+ * Starting from the equation:
+ *
+ * Pd = A[ip[0],id[0]] * Dx
+ *
+ * and solving we get:
+ *
+ * Dx = A[ip[0],id[0]]^-1 * Pd
+ */
+void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *pa;
+ const uint8_t *T;
+ uint8_t G;
+ uint8_t V;
+ size_t i;
+
+ (void)nr; /* unused, it's always 1 */
+
+ /* if it's RAID5 uses the faster function */
+ if (ip[0] == 0) {
+ raid_rec1of1(id, nd, size, vv);
+ return;
+ }
+
+ /* setup the coefficients matrix */
+ G = A(ip[0], id[0]);
+
+ /* invert it to solve the system of linear equations */
+ V = inv(G);
+
+ /* get multiplication tables */
+ T = table(V);
+
+ /* compute delta parity */
+ raid_delta_gen(1, id, ip, nd, size, vv);
+
+ p = v[nd + ip[0]];
+ pa = v[id[0]];
+
+ for (i = 0; i < size; ++i) {
+ /* delta */
+ uint8_t Pd = p[i] ^ pa[i];
+
+ /* reconstruct */
+ pa[i] = T[Pd];
+ }
+}
+
+/*
+ * Recover failure of two data blocks at indexes id[0],id[1] using parity at
+ * indexes ip[0],ip[1] for any RAID level.
+ *
+ * Starting from the equations:
+ *
+ * Pd = A[ip[0],id[0]] * Dx + A[ip[0],id[1]] * Dy
+ * Qd = A[ip[1],id[0]] * Dx + A[ip[1],id[1]] * Dy
+ *
+ * we solve inverting the coefficients matrix.
+ */
+void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *pa;
+ uint8_t *q;
+ uint8_t *qa;
+ const int N = 2;
+ const uint8_t *T[N][N];
+ uint8_t G[N * N];
+ uint8_t V[N * N];
+ size_t i;
+ int j, k;
+
+ (void)nr; /* unused, it's always 2 */
+
+ /* if it's RAID6 recovering with P and Q uses the faster function */
+ if (ip[0] == 0 && ip[1] == 1) {
+ raid_rec2of2_int8(id, ip, nd, size, vv);
+ return;
+ }
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < N; ++j)
+ for (k = 0; k < N; ++k)
+ G[j * N + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, N);
+
+ /* get multiplication tables */
+ for (j = 0; j < N; ++j)
+ for (k = 0; k < N; ++k)
+ T[j][k] = table(V[j * N + k]);
+
+ /* compute delta parity */
+ raid_delta_gen(2, id, ip, nd, size, vv);
+
+ p = v[nd + ip[0]];
+ q = v[nd + ip[1]];
+ pa = v[id[0]];
+ qa = v[id[1]];
+
+ for (i = 0; i < size; ++i) {
+ /* delta */
+ uint8_t Pd = p[i] ^ pa[i];
+ uint8_t Qd = q[i] ^ qa[i];
+
+ /* reconstruct */
+ pa[i] = T[0][0][Pd] ^ T[0][1][Qd];
+ qa[i] = T[1][0][Pd] ^ T[1][1][Qd];
+ }
+}
+
+/*
+ * Recover failure of N data blocks at indexes id[N] using parity at indexes
+ * ip[N] for any RAID level.
+ *
+ * Starting from the N equations, with 0<=i<N :
+ *
+ * PD[i] = sum(A[ip[i],id[j]] * D[i]) 0<=j<N
+ *
+ * we solve inverting the coefficients matrix.
+ *
+ * Note that referring at previous equations you have:
+ * PD[0] = Pd, PD[1] = Qd, PD[2] = Rd, ...
+ * D[0] = Dx, D[1] = Dy, D[2] = Dz, ...
+ */
+void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p[RAID_PARITY_MAX];
+ uint8_t *pa[RAID_PARITY_MAX];
+ const uint8_t *T[RAID_PARITY_MAX][RAID_PARITY_MAX];
+ uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ size_t i;
+ int j, k;
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < nr; ++j)
+ for (k = 0; k < nr; ++k)
+ G[j * nr + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, nr);
+
+ /* get multiplication tables */
+ for (j = 0; j < nr; ++j)
+ for (k = 0; k < nr; ++k)
+ T[j][k] = table(V[j * nr + k]);
+
+ /* compute delta parity */
+ raid_delta_gen(nr, id, ip, nd, size, vv);
+
+ for (j = 0; j < nr; ++j) {
+ p[j] = v[nd + ip[j]];
+ pa[j] = v[id[j]];
+ }
+
+ for (i = 0; i < size; ++i) {
+ uint8_t PD[RAID_PARITY_MAX];
+
+ /* delta */
+ for (j = 0; j < nr; ++j)
+ PD[j] = p[j][i] ^ pa[j][i];
+
+ /* reconstruct */
+ for (j = 0; j < nr; ++j) {
+ uint8_t b = 0;
+
+ for (k = 0; k < nr; ++k)
+ b ^= T[j][k][PD[k]];
+ pa[j][i] = b;
+ }
+ }
+}
+
diff --git a/c_src/raid/internal.h b/c_src/raid/internal.h
new file mode 100644
index 00000000..4465cb97
--- /dev/null
+++ b/c_src/raid/internal.h
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_INTERNAL_H
+#define __RAID_INTERNAL_H
+
+/*
+ * Supported instruction sets.
+ *
+ * It may happen that the assembler is too old to support
+ * all instructions, even if the architecture supports them.
+ * These defines allow to exclude from the build the not supported ones.
+ *
+ * If in your project you use a predefined assembler, you can define them
+ * using fixed values, instead of using the HAVE_* defines.
+ */
+#if HAVE_CONFIG_H
+
+/* Includes the project configuration for HAVE_* defines */
+#include "config.h"
+
+/* If the compiler supports assembly */
+#if HAVE_ASSEMBLY
+/* Autodetect from the compiler */
+#if defined(__i386__)
+#define CONFIG_X86 1
+#define CONFIG_X86_32 1
+#endif
+#if defined(__x86_64__)
+#define CONFIG_X86 1
+#define CONFIG_X86_64 1
+#endif
+#endif
+
+/* Enables SSE2, SSSE3, AVX2 only if the assembler supports it */
+#if HAVE_SSE2
+#define CONFIG_SSE2 1
+#endif
+#if HAVE_SSSE3
+#define CONFIG_SSSE3 1
+#endif
+#if HAVE_AVX2
+#define CONFIG_AVX2 1
+#endif
+
+#else /* if HAVE_CONFIG_H is not defined */
+
+/* Assume that assembly is always supported */
+#if defined(__i386__)
+#define CONFIG_X86 1
+#define CONFIG_X86_32 1
+#endif
+
+#if defined(__x86_64__)
+#define CONFIG_X86 1
+#define CONFIG_X86_64 1
+#endif
+
+/* Assumes that the assembler supports everything */
+#ifdef CONFIG_X86
+#define CONFIG_SSE2 1
+#define CONFIG_SSSE3 1
+#define CONFIG_AVX2 1
+#endif
+#endif
+
+/*
+ * Includes anything required for compatibility.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Inverse assert.
+ */
+#define BUG_ON(a) assert(!(a))
+
+/*
+ * Forced inline.
+ */
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
+/*
+ * Forced alignment.
+ */
+#ifndef __aligned
+#define __aligned(a) __attribute__((aligned(a)))
+#endif
+
+/*
+ * Align a pointer at the specified size.
+ */
+static __always_inline void *__align_ptr(void *ptr, uintptr_t size)
+{
+ uintptr_t offset = (uintptr_t)ptr;
+
+ offset = (offset + size - 1U) & ~(size - 1U);
+
+ return (void *)offset;
+}
+
+/*
+ * Includes the main interface headers.
+ */
+#include "raid.h"
+#include "helper.h"
+
+/*
+ * Internal functions.
+ *
+ * These are intended to provide access for testing.
+ */
+int raid_selftest(void);
+void raid_gen_ref(int nd, int np, size_t size, void **vv);
+void raid_invert(uint8_t *M, uint8_t *V, int n);
+void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v);
+void raid_rec1of1(int *id, int nd, size_t size, void **v);
+void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv);
+void raid_gen1_int32(int nd, size_t size, void **vv);
+void raid_gen1_int64(int nd, size_t size, void **vv);
+void raid_gen1_sse2(int nd, size_t size, void **vv);
+void raid_gen1_avx2(int nd, size_t size, void **vv);
+void raid_gen2_int32(int nd, size_t size, void **vv);
+void raid_gen2_int64(int nd, size_t size, void **vv);
+void raid_gen2_sse2(int nd, size_t size, void **vv);
+void raid_gen2_avx2(int nd, size_t size, void **vv);
+void raid_gen2_sse2ext(int nd, size_t size, void **vv);
+void raid_genz_int32(int nd, size_t size, void **vv);
+void raid_genz_int64(int nd, size_t size, void **vv);
+void raid_genz_sse2(int nd, size_t size, void **vv);
+void raid_genz_sse2ext(int nd, size_t size, void **vv);
+void raid_genz_avx2ext(int nd, size_t size, void **vv);
+void raid_gen3_int8(int nd, size_t size, void **vv);
+void raid_gen3_ssse3(int nd, size_t size, void **vv);
+void raid_gen3_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen3_avx2ext(int nd, size_t size, void **vv);
+void raid_gen4_int8(int nd, size_t size, void **vv);
+void raid_gen4_ssse3(int nd, size_t size, void **vv);
+void raid_gen4_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen4_avx2ext(int nd, size_t size, void **vv);
+void raid_gen5_int8(int nd, size_t size, void **vv);
+void raid_gen5_ssse3(int nd, size_t size, void **vv);
+void raid_gen5_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen5_avx2ext(int nd, size_t size, void **vv);
+void raid_gen6_int8(int nd, size_t size, void **vv);
+void raid_gen6_ssse3(int nd, size_t size, void **vv);
+void raid_gen6_ssse3ext(int nd, size_t size, void **vv);
+void raid_gen6_avx2ext(int nd, size_t size, void **vv);
+void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
+
+/*
+ * Internal naming.
+ *
+ * These are intented to provide access for testing.
+ */
+const char *raid_gen1_tag(void);
+const char *raid_gen2_tag(void);
+const char *raid_genz_tag(void);
+const char *raid_gen3_tag(void);
+const char *raid_gen4_tag(void);
+const char *raid_gen5_tag(void);
+const char *raid_gen6_tag(void);
+const char *raid_rec1_tag(void);
+const char *raid_rec2_tag(void);
+const char *raid_recX_tag(void);
+
+/*
+ * Internal forwarders.
+ */
+extern void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
+extern void (*raid_genz_ptr)(int nd, size_t size, void **vv);
+extern void (*raid_gen_ptr[RAID_PARITY_MAX])(
+ int nd, size_t size, void **vv);
+extern void (*raid_rec_ptr[RAID_PARITY_MAX])(
+ int nr, int *id, int *ip, int nd, size_t size, void **vv);
+
+/*
+ * Tables.
+ */
+extern const uint8_t raid_gfmul[256][256] __aligned(256);
+extern const uint8_t raid_gfexp[256] __aligned(256);
+extern const uint8_t raid_gfinv[256] __aligned(256);
+extern const uint8_t raid_gfvandermonde[3][256] __aligned(256);
+extern const uint8_t raid_gfcauchy[6][256] __aligned(256);
+extern const uint8_t raid_gfcauchypshufb[251][4][2][16] __aligned(256);
+extern const uint8_t raid_gfmulpshufb[256][2][16] __aligned(256);
+extern const uint8_t (*raid_gfgen)[256];
+#define gfmul raid_gfmul
+#define gfexp raid_gfexp
+#define gfinv raid_gfinv
+#define gfvandermonde raid_gfvandermonde
+#define gfcauchy raid_gfcauchy
+#define gfgenpshufb raid_gfcauchypshufb
+#define gfmulpshufb raid_gfmulpshufb
+#define gfgen raid_gfgen
+
+/*
+ * Assembler blocks.
+ */
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+static __always_inline void raid_sse_begin(void)
+{
+}
+
+static __always_inline void raid_sse_end(void)
+{
+ /* SSE and AVX code uses non-temporal writes, like MOVNTDQ, */
+ /* that use a weak memory model. To ensure that other processors */
+ /* see correctly the data written, we use a store-store memory */
+ /* barrier at the end of the asm code */
+ asm volatile ("sfence" : : : "memory");
+
+ /* clobbers registers used in the asm code */
+ /* this is required because in the Windows ABI, */
+ /* registers xmm6-xmm15 should be kept by the callee. */
+ /* this clobber list force the compiler to save any */
+ /* register that needs to be saved */
+ /* we check for __SSE2_ because we require that the */
+ /* compiler supports SSE2 registers in the clobber list */
+#ifdef __SSE2__
+ asm volatile ("" : : : "%xmm0", "%xmm1", "%xmm2", "%xmm3");
+ asm volatile ("" : : : "%xmm4", "%xmm5", "%xmm6", "%xmm7");
+#ifdef CONFIG_X86_64
+ asm volatile ("" : : : "%xmm8", "%xmm9", "%xmm10", "%xmm11");
+ asm volatile ("" : : : "%xmm12", "%xmm13", "%xmm14", "%xmm15");
+#endif
+#endif
+}
+#endif
+
+#ifdef CONFIG_AVX2
+static __always_inline void raid_avx_begin(void)
+{
+ raid_sse_begin();
+}
+
+static __always_inline void raid_avx_end(void)
+{
+ raid_sse_end();
+
+ /* reset the upper part of the ymm registers */
+ /* to avoid the 70 clocks penality on the next */
+ /* xmm register use */
+ asm volatile ("vzeroupper" : : : "memory");
+}
+#endif
+#endif /* CONFIG_X86 */
+
+#endif
+
diff --git a/c_src/raid/intz.c b/c_src/raid/intz.c
new file mode 100644
index 00000000..80c20142
--- /dev/null
+++ b/c_src/raid/intz.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * GENz (triple parity with powers of 2^-1) 32bit C implementation
+ */
+void raid_genz_int32(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t**)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ uint32_t d0, r0, q0, p0;
+ uint32_t d1, r1, q1, p1;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ for (i = 0; i < size; i += 8) {
+ r0 = q0 = p0 = v_32(v[l][i]);
+ r1 = q1 = p1 = v_32(v[l][i + 4]);
+ for (d = l - 1; d >= 0; --d) {
+ d0 = v_32(v[d][i]);
+ d1 = v_32(v[d][i + 4]);
+
+ p0 ^= d0;
+ p1 ^= d1;
+
+ q0 = x2_32(q0);
+ q1 = x2_32(q1);
+
+ q0 ^= d0;
+ q1 ^= d1;
+
+ r0 = d2_32(r0);
+ r1 = d2_32(r1);
+
+ r0 ^= d0;
+ r1 ^= d1;
+ }
+ v_32(p[i]) = p0;
+ v_32(p[i + 4]) = p1;
+ v_32(q[i]) = q0;
+ v_32(q[i + 4]) = q1;
+ v_32(r[i]) = r0;
+ v_32(r[i + 4]) = r1;
+ }
+}
+
+/*
+ * GENz (triple parity with powers of 2^-1) 64bit C implementation
+ */
+void raid_genz_int64(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t**)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ uint64_t d0, r0, q0, p0;
+ uint64_t d1, r1, q1, p1;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ for (i = 0; i < size; i += 16) {
+ r0 = q0 = p0 = v_64(v[l][i]);
+ r1 = q1 = p1 = v_64(v[l][i + 8]);
+ for (d = l - 1; d >= 0; --d) {
+ d0 = v_64(v[d][i]);
+ d1 = v_64(v[d][i + 8]);
+
+ p0 ^= d0;
+ p1 ^= d1;
+
+ q0 = x2_64(q0);
+ q1 = x2_64(q1);
+
+ q0 ^= d0;
+ q1 ^= d1;
+
+ r0 = d2_64(r0);
+ r1 = d2_64(r1);
+
+ r0 ^= d0;
+ r1 ^= d1;
+ }
+ v_64(p[i]) = p0;
+ v_64(p[i + 8]) = p1;
+ v_64(q[i]) = q0;
+ v_64(q[i + 8]) = q1;
+ v_64(r[i]) = r0;
+ v_64(r[i + 8]) = r1;
+ }
+}
+
diff --git a/c_src/raid/memory.c b/c_src/raid/memory.c
new file mode 100644
index 00000000..02a5a927
--- /dev/null
+++ b/c_src/raid/memory.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "memory.h"
+
+void *raid_malloc_align(size_t size, size_t align_size, void **freeptr)
+{
+ unsigned char *ptr;
+ uintptr_t offset;
+
+ ptr = malloc(size + align_size);
+ if (!ptr) {
+ /* LCOV_EXCL_START */
+ return 0;
+ /* LCOV_EXCL_STOP */
+ }
+
+ *freeptr = ptr;
+
+ offset = ((uintptr_t)ptr) % align_size;
+
+ if (offset != 0)
+ ptr += align_size - offset;
+
+ return ptr;
+}
+
+void *raid_malloc(size_t size, void **freeptr)
+{
+ return raid_malloc_align(size, RAID_MALLOC_ALIGN, freeptr);
+}
+
+void **raid_malloc_vector_align(int nd, int n, size_t size, size_t align_size, size_t displacement_size, void **freeptr)
+{
+ void **v;
+ unsigned char *va;
+ int i;
+
+ BUG_ON(n <= 0 || nd < 0);
+
+ v = malloc(n * sizeof(void *));
+ if (!v) {
+ /* LCOV_EXCL_START */
+ return 0;
+ /* LCOV_EXCL_STOP */
+ }
+
+ va = raid_malloc_align(n * (size + displacement_size), align_size, freeptr);
+ if (!va) {
+ /* LCOV_EXCL_START */
+ free(v);
+ return 0;
+ /* LCOV_EXCL_STOP */
+ }
+
+ for (i = 0; i < n; ++i) {
+ v[i] = va;
+ va += size + displacement_size;
+ }
+
+ /* reverse order of the data blocks */
+ /* because they are usually accessed from the last one */
+ for (i = 0; i < nd / 2; ++i) {
+ void *ptr = v[i];
+
+ v[i] = v[nd - 1 - i];
+ v[nd - 1 - i] = ptr;
+ }
+
+ return v;
+}
+
+void **raid_malloc_vector(int nd, int n, size_t size, void **freeptr)
+{
+ return raid_malloc_vector_align(nd, n, size, RAID_MALLOC_ALIGN, RAID_MALLOC_DISPLACEMENT, freeptr);
+}
+
+void raid_mrand_vector(unsigned seed, int n, size_t size, void **vv)
+{
+ unsigned char **v = (unsigned char **)vv;
+ int i;
+ size_t j;
+
+ for (i = 0; i < n; ++i)
+ for (j = 0; j < size; ++j) {
+ /* basic C99/C11 linear congruential generator */
+ seed = seed * 1103515245U + 12345U;
+
+ v[i][j] = seed >> 16;
+ }
+}
+
+int raid_mtest_vector(int n, size_t size, void **vv)
+{
+ unsigned char **v = (unsigned char **)vv;
+ int i;
+ size_t j;
+ unsigned k;
+ unsigned char d;
+ unsigned char p;
+
+ /* fill with 0 */
+ d = 0;
+ for (i = 0; i < n; ++i)
+ for (j = 0; j < size; ++j)
+ v[i][j] = d;
+
+ /* test with all the byte patterns */
+ for (k = 1; k < 256; ++k) {
+ p = d;
+ d = k;
+
+ /* forward fill */
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < size; ++j) {
+ if (v[i][j] != p) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ v[i][j] = d;
+ }
+ }
+
+ p = d;
+ d = ~p;
+ /* backward fill with complement */
+ for (i = 0; i < n; ++i) {
+ for (j = size; j > 0; --j) {
+ if (v[i][j - 1] != p) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ v[i][j - 1] = d;
+ }
+ }
+ }
+
+ return 0;
+}
+
diff --git a/c_src/raid/memory.h b/c_src/raid/memory.h
new file mode 100644
index 00000000..de00614f
--- /dev/null
+++ b/c_src/raid/memory.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_MEMORY_H
+#define __RAID_MEMORY_H
+
+/**
+ * Memory alignment provided by raid_malloc().
+ *
+ * It should guarantee good cache performance everywhere.
+ */
+#define RAID_MALLOC_ALIGN 256
+
+/**
+ * Memory displacement to avoid cache address sharing on contiguous blocks,
+ * used by raid_malloc_vector().
+ *
+ * When allocating a sequence of blocks with a size of power of 2,
+ * there is the risk that the addresses of each block are mapped into the
+ * same cache line and prefetching predictor, resulting in a lot of cache
+ * sharing if you access all the blocks in parallel, from the start to the
+ * end.
+ *
+ * To avoid this effect, it's better if all the blocks are allocated
+ * with a fixed displacement trying to reduce the cache addresses sharing.
+ *
+ * The selected displacement was chosen empirically with some speed tests
+ * with 8/12/16/20/24 data buffers of 256 KB.
+ *
+ * These are the results in MB/s with no displacement:
+ *
+ * sse2
+ * gen1 15368 [MB/s]
+ * gen2 6814 [MB/s]
+ * genz 3033 [MB/s]
+ *
+ * These are the results with displacement resulting in improvments
+ * in the order of 20% or more:
+ *
+ * sse2
+ * gen1 21936 [MB/s]
+ * gen2 11902 [MB/s]
+ * genz 5838 [MB/s]
+ *
+ */
+#define RAID_MALLOC_DISPLACEMENT (7*256)
+
+/**
+ * Aligned malloc.
+ * Use an alignment suitable for the raid functions.
+ */
+void *raid_malloc(size_t size, void **freeptr);
+
+/**
+ * Arbitrary aligned malloc.
+ */
+void *raid_malloc_align(size_t size, size_t align_size, void **freeptr);
+
+/**
+ * Aligned vector allocation.
+ * Use an alignment suitable for the raid functions.
+ * Returns a vector of @n pointers, each one pointing to a block of
+ * the specified @size.
+ * The first @nd elements are reversed in order.
+ */
+void **raid_malloc_vector(int nd, int n, size_t size, void **freeptr);
+
+/**
+ * Arbitrary aligned vector allocation.
+ */
+void **raid_malloc_vector_align(int nd, int n, size_t size, size_t align_size, size_t displacement_size, void **freeptr);
+
+/**
+ * Fills the memory vector with pseudo-random data based on the specified seed.
+ */
+void raid_mrand_vector(unsigned seed, int n, size_t size, void **vv);
+
+/**
+ * Tests the memory vector for RAM problems.
+ * If a problem is found, it crashes.
+ */
+int raid_mtest_vector(int n, size_t size, void **vv);
+
+#endif
+
diff --git a/c_src/raid/module.c b/c_src/raid/module.c
new file mode 100644
index 00000000..b688d22c
--- /dev/null
+++ b/c_src/raid/module.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "memory.h"
+#include "cpu.h"
+
+/*
+ * Initializes and selects the best algorithm.
+ */
+void raid_init(void)
+{
+ raid_gen3_ptr = raid_gen3_int8;
+ raid_gen_ptr[3] = raid_gen4_int8;
+ raid_gen_ptr[4] = raid_gen5_int8;
+ raid_gen_ptr[5] = raid_gen6_int8;
+
+ if (sizeof(void *) == 4) {
+ raid_gen_ptr[0] = raid_gen1_int32;
+ raid_gen_ptr[1] = raid_gen2_int32;
+ raid_genz_ptr = raid_genz_int32;
+ } else {
+ raid_gen_ptr[0] = raid_gen1_int64;
+ raid_gen_ptr[1] = raid_gen2_int64;
+ raid_genz_ptr = raid_genz_int64;
+ }
+
+ raid_rec_ptr[0] = raid_rec1_int8;
+ raid_rec_ptr[1] = raid_rec2_int8;
+ raid_rec_ptr[2] = raid_recX_int8;
+ raid_rec_ptr[3] = raid_recX_int8;
+ raid_rec_ptr[4] = raid_recX_int8;
+ raid_rec_ptr[5] = raid_recX_int8;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+ if (raid_cpu_has_sse2()) {
+ raid_gen_ptr[0] = raid_gen1_sse2;
+#ifdef CONFIG_X86_64
+ if (raid_cpu_has_slowextendedreg()) {
+ raid_gen_ptr[1] = raid_gen2_sse2;
+ } else {
+ raid_gen_ptr[1] = raid_gen2_sse2ext;
+ }
+ /* note that raid_cpu_has_slowextendedreg() doesn't affect parz */
+ raid_genz_ptr = raid_genz_sse2ext;
+#else
+ raid_gen_ptr[1] = raid_gen2_sse2;
+ raid_genz_ptr = raid_genz_sse2;
+#endif
+ }
+#endif
+
+#ifdef CONFIG_SSSE3
+ if (raid_cpu_has_ssse3()) {
+#ifdef CONFIG_X86_64
+ if (raid_cpu_has_slowextendedreg()) {
+ raid_gen3_ptr = raid_gen3_ssse3;
+ raid_gen_ptr[3] = raid_gen4_ssse3;
+ raid_gen_ptr[4] = raid_gen5_ssse3;
+ raid_gen_ptr[5] = raid_gen6_ssse3;
+ } else {
+ raid_gen3_ptr = raid_gen3_ssse3ext;
+ raid_gen_ptr[3] = raid_gen4_ssse3ext;
+ raid_gen_ptr[4] = raid_gen5_ssse3ext;
+ raid_gen_ptr[5] = raid_gen6_ssse3ext;
+ }
+#else
+ raid_gen3_ptr = raid_gen3_ssse3;
+ raid_gen_ptr[3] = raid_gen4_ssse3;
+ raid_gen_ptr[4] = raid_gen5_ssse3;
+ raid_gen_ptr[5] = raid_gen6_ssse3;
+#endif
+ raid_rec_ptr[0] = raid_rec1_ssse3;
+ raid_rec_ptr[1] = raid_rec2_ssse3;
+ raid_rec_ptr[2] = raid_recX_ssse3;
+ raid_rec_ptr[3] = raid_recX_ssse3;
+ raid_rec_ptr[4] = raid_recX_ssse3;
+ raid_rec_ptr[5] = raid_recX_ssse3;
+ }
+#endif
+
+#ifdef CONFIG_AVX2
+ if (raid_cpu_has_avx2()) {
+ raid_gen_ptr[0] = raid_gen1_avx2;
+ raid_gen_ptr[1] = raid_gen2_avx2;
+#ifdef CONFIG_X86_64
+ raid_gen3_ptr = raid_gen3_avx2ext;
+ raid_genz_ptr = raid_genz_avx2ext;
+ raid_gen_ptr[3] = raid_gen4_avx2ext;
+ raid_gen_ptr[4] = raid_gen5_avx2ext;
+ raid_gen_ptr[5] = raid_gen6_avx2ext;
+#endif
+ raid_rec_ptr[0] = raid_rec1_avx2;
+ raid_rec_ptr[1] = raid_rec2_avx2;
+ raid_rec_ptr[2] = raid_recX_avx2;
+ raid_rec_ptr[3] = raid_recX_avx2;
+ raid_rec_ptr[4] = raid_recX_avx2;
+ raid_rec_ptr[5] = raid_recX_avx2;
+ }
+#endif
+#endif /* CONFIG_X86 */
+
+ /* set the default mode */
+ raid_mode(RAID_MODE_CAUCHY);
+}
+
+/*
+ * Reference parity computation.
+ */
+void raid_gen_ref(int nd, int np, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ size_t i;
+
+ for (i = 0; i < size; ++i) {
+ uint8_t p[RAID_PARITY_MAX];
+ int j, d;
+
+ for (j = 0; j < np; ++j)
+ p[j] = 0;
+
+ for (d = 0; d < nd; ++d) {
+ uint8_t b = v[d][i];
+
+ for (j = 0; j < np; ++j)
+ p[j] ^= gfmul[b][gfgen[j][d]];
+ }
+
+ for (j = 0; j < np; ++j)
+ v[nd + j][i] = p[j];
+ }
+}
+
+/*
+ * Size of the blocks to test.
+ */
+#define TEST_SIZE 4096
+
+/*
+ * Number of data blocks to test.
+ */
+#define TEST_COUNT (65536 / TEST_SIZE)
+
+/*
+ * Parity generation test.
+ */
+static int raid_test_par(int nd, int np, size_t size, void **v, void **ref)
+{
+ int i;
+ void *t[TEST_COUNT + RAID_PARITY_MAX];
+
+ /* setup data */
+ for (i = 0; i < nd; ++i)
+ t[i] = ref[i];
+
+ /* setup parity */
+ for (i = 0; i < np; ++i)
+ t[nd + i] = v[nd + i];
+
+ raid_gen(nd, np, size, t);
+
+ /* compare parity */
+ for (i = 0; i < np; ++i) {
+ if (memcmp(t[nd + i], ref[nd + i], size) != 0) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Recovering test.
+ */
+static int raid_test_rec(int nr, int *ir, int nd, int np, size_t size, void **v, void **ref)
+{
+ int i, j;
+ void *t[TEST_COUNT + RAID_PARITY_MAX];
+
+ /* setup data and parity vector */
+ for (i = 0, j = 0; i < nd + np; ++i) {
+ if (j < nr && ir[j] == i) {
+ /* this block has to be recovered */
+ t[i] = v[i];
+ ++j;
+ } else {
+ /* this block is used for recovering */
+ t[i] = ref[i];
+ }
+ }
+
+ raid_rec(nr, ir, nd, np, size, t);
+
+ /* compare all data and parity */
+ for (i = 0; i < nd + np; ++i) {
+ if (t[i] != ref[i]
+ && memcmp(t[i], ref[i], size) != 0) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Recovering test for data.
+ */
+static int raid_test_data(int nr, int *id, int *ip, int nd, int np, size_t size, void **v, void **ref)
+{
+ int i, j;
+ void *t[TEST_COUNT + RAID_PARITY_MAX];
+
+ /* setup data vector */
+ for (i = 0, j = 0; i < nd; ++i) {
+ if (j < nr && id[j] == i) {
+ /* this block has to be recovered */
+ t[i] = v[i];
+ ++j;
+ } else {
+ /* this block is left unchanged */
+ t[i] = ref[i];
+ }
+ }
+
+ /* setup parity vector */
+ for (i = 0, j = 0; i < np; ++i) {
+ if (j < nr && ip[j] == i) {
+ /* this block is used for recovering */
+ t[nd + i] = ref[nd + i];
+ ++j;
+ } else {
+ /* this block should not be read or written */
+ t[nd + i] = 0;
+ }
+ }
+
+ raid_data(nr, id, ip, nd, size, t);
+
+ /* compare all data and parity */
+ for (i = 0; i < nd; ++i) {
+ if (t[i] != ref[i]
+ && t[i] != 0
+ && memcmp(t[i], ref[i], size) != 0) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Scan test.
+ */
+static int raid_test_scan(int nr, int *ir, int nd, int np, size_t size, void **v, void **ref)
+{
+ int i, j, ret;
+ void *t[TEST_COUNT + RAID_PARITY_MAX];
+ int is[RAID_PARITY_MAX];
+
+ /* setup data and parity vector */
+ for (i = 0, j = 0; i < nd + np; ++i) {
+ if (j < nr && ir[j] == i) {
+ /* this block is bad */
+ t[i] = v[i];
+ ++j;
+ } else {
+ /* this block is used for recovering */
+ t[i] = ref[i];
+ }
+ }
+
+ ret = raid_scan(is, nd, np, size, t);
+
+ /* compare identified bad blocks */
+ if (ret != nr)
+ return -1;
+ for (i = 0; i < nr; ++i) {
+ if (ir[i] != is[i]) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Basic functionality self test.
+ */
+int raid_selftest(void)
+{
+ const int nd = TEST_COUNT;
+ const size_t size = TEST_SIZE;
+ const int nv = nd + RAID_PARITY_MAX * 2 + 1;
+ void *v_alloc;
+ void **v;
+ void *ref[nd + RAID_PARITY_MAX];
+ int ir[RAID_PARITY_MAX];
+ int ip[RAID_PARITY_MAX];
+ int i, np;
+ int ret = 0;
+
+ /* ensure to have enough space for data */
+ BUG_ON(nd * size > 65536);
+
+ v = raid_malloc_vector(nd, nv, size, &v_alloc);
+ if (!v) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+
+ memset(v[nv - 1], 0, size);
+ raid_zero(v[nv - 1]);
+
+ /* use the multiplication table as data */
+ for (i = 0; i < nd; ++i)
+ ref[i] = ((uint8_t *)gfmul) + size * i;
+
+ /* setup reference parity */
+ for (i = 0; i < RAID_PARITY_MAX; ++i)
+ ref[nd + i] = v[nd + RAID_PARITY_MAX + i];
+
+ /* compute reference parity */
+ raid_gen_ref(nd, RAID_PARITY_MAX, size, ref);
+
+ /* test for each parity level */
+ for (np = 1; np <= RAID_PARITY_MAX; ++np) {
+ /* test parity generation */
+ ret = raid_test_par(nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* test recovering with broken ending data disks */
+ for (i = 0; i < np; ++i) {
+ /* bad data */
+ ir[i] = nd - np + i;
+
+ /* good parity */
+ ip[i] = i;
+ }
+
+ ret = raid_test_rec(np, ir, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ ret = raid_test_data(np, ir, ip, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* test recovering with broken leading data and broken leading parity */
+ for (i = 0; i < np / 2; ++i) {
+ /* bad data */
+ ir[i] = i;
+
+ /* good parity */
+ ip[i] = (np + 1) / 2 + i;
+ }
+
+ /* bad parity */
+ for (i = 0; i < (np + 1) / 2; ++i)
+ ir[np / 2 + i] = nd + i;
+
+ ret = raid_test_rec(np, ir, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ ret = raid_test_data(np / 2, ir, ip, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* test recovering with broken leading data and broken ending parity */
+ for (i = 0; i < np / 2; ++i) {
+ /* bad data */
+ ir[i] = i;
+
+ /* good parity */
+ ip[i] = i;
+ }
+
+ /* bad parity */
+ for (i = 0; i < (np + 1) / 2; ++i)
+ ir[np / 2 + i] = nd + np - (np + 1) / 2 + i;
+
+ ret = raid_test_rec(np, ir, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ ret = raid_test_data(np / 2, ir, ip, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* scan test with broken data and parity */
+ for (i = 0; i < np / 2; ++i) {
+ /* bad data */
+ ir[i] = i;
+ }
+ for (i = 0; i < (np - 1) / 2; ++i) {
+ /* bad parity */
+ ir[np / 2 + i] = nd + i;
+ }
+ for (i = 0; i < np - 1; ++i) {
+ /* make blocks bad */
+ /* we cannot fill them with 0, because the original */
+ /* data may be already filled with 0 */
+ memset(v[ir[i]], 0x55, size);
+ }
+
+ ret = raid_test_scan(np - 1, ir, nd, np, size, v, ref);
+ if (ret != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ /* scan test with no parity */
+ ret = raid_test_scan(0, 0, nd, 0, size, v, ref);
+ if (ret != -1) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ ret = 0;
+
+bail:
+ free(v);
+ free(v_alloc);
+
+ return ret;
+}
+
diff --git a/c_src/raid/raid.c b/c_src/raid/raid.c
new file mode 100644
index 00000000..3052675f
--- /dev/null
+++ b/c_src/raid/raid.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * This is a RAID implementation working in the Galois Field GF(2^8) with
+ * the primitive polynomial x^8 + x^4 + x^3 + x^2 + 1 (285 decimal), and
+ * supporting up to six parity levels.
+ *
+ * For RAID5 and RAID6 it works as as described in the H. Peter Anvin's
+ * paper "The mathematics of RAID-6" [1]. Please refer to this paper for a
+ * complete explanation.
+ *
+ * To support triple parity, it was first evaluated and then dropped, an
+ * extension of the same approach, with additional parity coefficients set
+ * as powers of 2^-1, with equations:
+ *
+ * P = sum(Di)
+ * Q = sum(2^i * Di)
+ * R = sum(2^-i * Di) with 0<=i<N
+ *
+ * This approach works well for triple parity and it's very efficient,
+ * because we can implement very fast parallel multiplications and
+ * divisions by 2 in GF(2^8).
+ *
+ * It's also similar at the approach used by ZFS RAIDZ3, with the
+ * difference that ZFS uses powers of 4 instead of 2^-1.
+ *
+ * Unfortunately it doesn't work beyond triple parity, because whatever
+ * value we choose to generate the power coefficients to compute other
+ * parities, the resulting equations are not solvable for some
+ * combinations of missing disks.
+ *
+ * This is expected, because the Vandermonde matrix used to compute the
+ * parity has no guarantee to have all submatrices not singular
+ * [2, Chap 11, Problem 7] and this is a requirement to have
+ * a MDS (Maximum Distance Separable) code [2, Chap 11, Theorem 8].
+ *
+ * To overcome this limitation, we use a Cauchy matrix [3][4] to compute
+ * the parity. A Cauchy matrix has the property to have all the square
+ * submatrices not singular, resulting in always solvable equations,
+ * for any combination of missing disks.
+ *
+ * The problem of this approach is that it requires the use of
+ * generic multiplications, and not only by 2 or 2^-1, potentially
+ * affecting badly the performance.
+ *
+ * Hopefully there is a method to implement parallel multiplications
+ * using SSSE3 or AVX2 instructions [1][5]. Method competitive with the
+ * computation of triple parity using power coefficients.
+ *
+ * Another important property of the Cauchy matrix is that we can setup
+ * the first two rows with coeffients equal at the RAID5 and RAID6 approach
+ * decribed, resulting in a compatible extension, and requiring SSSE3
+ * or AVX2 instructions only if triple parity or beyond is used.
+ *
+ * The matrix is also adjusted, multipling each row by a constant factor
+ * to make the first column of all 1, to optimize the computation for
+ * the first disk.
+ *
+ * This results in the matrix A[row,col] defined as:
+ *
+ * 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01...
+ * 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75...
+ * 01 f5 d2 c4 9a 71 f1 7f fc 87 c1 c6 19 2f 40 55 3d ba 53 04 9c 61...
+ * 01 bb a6 d7 c7 07 ce 82 4a 2f a5 9b b6 60 f1 ad e7 f4 06 d2 df 2e...
+ * 01 97 7f 9c 7c 18 bd a2 58 1a da 74 70 a3 e5 47 29 07 f5 80 23 e9...
+ * 01 2b 3f cf 73 2c d6 ed cb 74 15 78 8a c1 17 c9 89 68 21 ab 76 3b...
+ *
+ * This matrix supports 6 level of parity, one for each row, for up to 251
+ * data disks, one for each column, with all the 377,342,351,231 square
+ * submatrices not singular, verified also with brute-force.
+ *
+ * This matrix can be extended to support any number of parities, just
+ * adding additional rows, and removing one column for each new row.
+ * (see mktables.c for more details in how the matrix is generated)
+ *
+ * In details, parity is computed as:
+ *
+ * P = sum(Di)
+ * Q = sum(2^i * Di)
+ * R = sum(A[2,i] * Di)
+ * S = sum(A[3,i] * Di)
+ * T = sum(A[4,i] * Di)
+ * U = sum(A[5,i] * Di) with 0<=i<N
+ *
+ * To recover from a failure of six disks at indexes x,y,z,h,v,w,
+ * with 0<=x<y<z<h<v<w<N, we compute the parity of the available N-6
+ * disks as:
+ *
+ * Pa = sum(Di)
+ * Qa = sum(2^i * Di)
+ * Ra = sum(A[2,i] * Di)
+ * Sa = sum(A[3,i] * Di)
+ * Ta = sum(A[4,i] * Di)
+ * Ua = sum(A[5,i] * Di) with 0<=i<N,i!=x,i!=y,i!=z,i!=h,i!=v,i!=w.
+ *
+ * And if we define:
+ *
+ * Pd = Pa + P
+ * Qd = Qa + Q
+ * Rd = Ra + R
+ * Sd = Sa + S
+ * Td = Ta + T
+ * Ud = Ua + U
+ *
+ * we can sum these two sets of equations, obtaining:
+ *
+ * Pd = Dx + Dy + Dz + Dh + Dv + Dw
+ * Qd = 2^x * Dx + 2^y * Dy + 2^z * Dz + 2^h * Dh + 2^v * Dv + 2^w * Dw
+ * Rd = A[2,x] * Dx + A[2,y] * Dy + A[2,z] * Dz + A[2,h] * Dh + A[2,v] * Dv + A[2,w] * Dw
+ * Sd = A[3,x] * Dx + A[3,y] * Dy + A[3,z] * Dz + A[3,h] * Dh + A[3,v] * Dv + A[3,w] * Dw
+ * Td = A[4,x] * Dx + A[4,y] * Dy + A[4,z] * Dz + A[4,h] * Dh + A[4,v] * Dv + A[4,w] * Dw
+ * Ud = A[5,x] * Dx + A[5,y] * Dy + A[5,z] * Dz + A[5,h] * Dh + A[5,v] * Dv + A[5,w] * Dw
+ *
+ * A linear system always solvable because the coefficients matrix is
+ * always not singular due the properties of the matrix A[].
+ *
+ * Resulting speed in x64, with 8 data disks, using a stripe of 256 KiB,
+ * for a Core i5-4670K Haswell Quad-Core 3.4GHz is:
+ *
+ * int8 int32 int64 sse2 ssse3 avx2
+ * gen1 13339 25438 45438 50588
+ * gen2 4115 6514 21840 32201
+ * gen3 814 10154 18613
+ * gen4 620 7569 14229
+ * gen5 496 5149 10051
+ * gen6 413 4239 8190
+ *
+ * Values are in MiB/s of data processed by a single thread, not counting
+ * generated parity.
+ *
+ * You can replicate these results in your machine using the
+ * "raid/test/speedtest.c" program.
+ *
+ * For comparison, the triple parity computation using the power
+ * coeffients "1,2,2^-1" is only a little faster than the one based on
+ * the Cauchy matrix if SSSE3 or AVX2 is present.
+ *
+ * int8 int32 int64 sse2 ssse3 avx2
+ * genz 2337 2874 10920 18944
+ *
+ * In conclusion, the use of power coefficients, and specifically powers
+ * of 1,2,2^-1, is the best option to implement triple parity in CPUs
+ * without SSSE3 and AVX2.
+ * But if a modern CPU with SSSE3 or AVX2 is available, the Cauchy
+ * matrix is the best option because it provides a fast and general
+ * approach working for any number of parities.
+ *
+ * References:
+ * [1] Anvin, "The mathematics of RAID-6", 2004
+ * [2] MacWilliams, Sloane, "The Theory of Error-Correcting Codes", 1977
+ * [3] Blomer, "An XOR-Based Erasure-Resilient Coding Scheme", 1995
+ * [4] Roth, "Introduction to Coding Theory", 2006
+ * [5] Plank, "Screaming Fast Galois Field Arithmetic Using Intel SIMD Instructions", 2013
+ */
+
+/**
+ * Generator matrix currently used.
+ */
+const uint8_t (*raid_gfgen)[256];
+
+void raid_mode(int mode)
+{
+ if (mode == RAID_MODE_VANDERMONDE) {
+ raid_gen_ptr[2] = raid_genz_ptr;
+ raid_gfgen = gfvandermonde;
+ } else {
+ raid_gen_ptr[2] = raid_gen3_ptr;
+ raid_gfgen = gfcauchy;
+ }
+}
+
+/**
+ * Buffer filled with 0 used in recovering.
+ */
+static void *raid_zero_block;
+
+void raid_zero(void *zero)
+{
+ raid_zero_block = zero;
+}
+
+/*
+ * Forwarders for parity computation.
+ *
+ * These functions compute the parity blocks from the provided data.
+ *
+ * The number of parities to compute is implicit in the position in the
+ * forwarder vector. Position at index #i, computes (#i+1) parities.
+ *
+ * All these functions give the guarantee that parities are written
+ * in order. First parity P, then parity Q, and so on.
+ * This allows to specify the same memory buffer for multiple parities
+ * knowning that you'll get the latest written one.
+ * This characteristic is used by the raid_delta_gen() function to
+ * avoid to damage unused parities in recovering.
+ *
+ * @nd Number of data blocks
+ * @size Size of the blocks pointed by @v. It must be a multipler of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + #parities) elements. The starting elements are the blocks
+ * for data, following with the parity blocks.
+ * Each block has @size bytes.
+ */
+void (*raid_gen_ptr[RAID_PARITY_MAX])(int nd, size_t size, void **vv);
+void (*raid_gen3_ptr)(int nd, size_t size, void **vv);
+void (*raid_genz_ptr)(int nd, size_t size, void **vv);
+
+void raid_gen(int nd, int np, size_t size, void **v)
+{
+ /* enforce limit on size */
+ BUG_ON(size % 64 != 0);
+
+ /* enforce limit on number of failures */
+ BUG_ON(np < 1);
+ BUG_ON(np > RAID_PARITY_MAX);
+
+ raid_gen_ptr[np - 1](nd, size, v);
+}
+
+/**
+ * Inverts the square matrix M of size nxn into V.
+ *
+ * This is not a general matrix inversion because we assume the matrix M
+ * to have all the square submatrix not singular.
+ * We use Gauss elimination to invert.
+ *
+ * @M Matrix to invert with @n rows and @n columns.
+ * @V Destination matrix where the result is put.
+ * @n Number of rows and columns of the matrix.
+ */
+void raid_invert(uint8_t *M, uint8_t *V, int n)
+{
+ int i, j, k;
+
+ /* set the identity matrix in V */
+ for (i = 0; i < n; ++i)
+ for (j = 0; j < n; ++j)
+ V[i * n + j] = i == j;
+
+ /* for each element in the diagonal */
+ for (k = 0; k < n; ++k) {
+ uint8_t f;
+
+ /* the diagonal element cannot be 0 because */
+ /* we are inverting matrices with all the square */
+ /* submatrices not singular */
+ BUG_ON(M[k * n + k] == 0);
+
+ /* make the diagonal element to be 1 */
+ f = inv(M[k * n + k]);
+ for (j = 0; j < n; ++j) {
+ M[k * n + j] = mul(f, M[k * n + j]);
+ V[k * n + j] = mul(f, V[k * n + j]);
+ }
+
+ /* make all the elements over and under the diagonal */
+ /* to be zero */
+ for (i = 0; i < n; ++i) {
+ if (i == k)
+ continue;
+ f = M[i * n + k];
+ for (j = 0; j < n; ++j) {
+ M[i * n + j] ^= mul(f, M[k * n + j]);
+ V[i * n + j] ^= mul(f, V[k * n + j]);
+ }
+ }
+ }
+}
+
+/**
+ * Computes the parity without the missing data blocks
+ * and store it in the buffers of such data blocks.
+ *
+ * This is the parity expressed as Pa,Qa,Ra,Sa,Ta,Ua in the equations.
+ */
+void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v)
+{
+ void *p[RAID_PARITY_MAX];
+ void *pa[RAID_PARITY_MAX];
+ int i, j;
+ int np;
+ void *latest;
+
+ /* total number of parities we are going to process */
+ /* they are both the used and the unused ones */
+ np = ip[nr - 1] + 1;
+
+ /* latest missing data block */
+ latest = v[id[nr - 1]];
+
+ /* setup pointers for delta computation */
+ for (i = 0, j = 0; i < np; ++i) {
+ /* keep a copy of the original parity vector */
+ p[i] = v[nd + i];
+
+ if (ip[j] == i) {
+ /*
+ * Set used parities to point to the missing
+ * data blocks.
+ *
+ * The related data blocks are instead set
+ * to point to the "zero" buffer.
+ */
+
+ /* the latest parity to use ends the for loop and */
+ /* then it cannot happen to process more of them */
+ BUG_ON(j >= nr);
+
+ /* buffer for missing data blocks */
+ pa[j] = v[id[j]];
+
+ /* set at zero the missing data blocks */
+ v[id[j]] = raid_zero_block;
+
+ /* compute the parity over the missing data blocks */
+ v[nd + i] = pa[j];
+
+ /* check for the next used entry */
+ ++j;
+ } else {
+ /*
+ * Unused parities are going to be rewritten with
+ * not significative data, becase we don't have
+ * functions able to compute only a subset of
+ * parities.
+ *
+ * To avoid this, we reuse parity buffers,
+ * assuming that all the parity functions write
+ * parities in order.
+ *
+ * We assign the unused parity block to the same
+ * block of the latest used parity that we know it
+ * will be written.
+ *
+ * This means that this block will be written
+ * multiple times and only the latest write will
+ * contain the correct data.
+ */
+ v[nd + i] = latest;
+ }
+ }
+
+ /* all the parities have to be processed */
+ BUG_ON(j != nr);
+
+ /* recompute the parity, note that np may be smaller than the */
+ /* total number of parities available */
+ raid_gen(nd, np, size, v);
+
+ /* restore data buffers as before */
+ for (j = 0; j < nr; ++j)
+ v[id[j]] = pa[j];
+
+ /* restore parity buffers as before */
+ for (i = 0; i < np; ++i)
+ v[nd + i] = p[i];
+}
+
+/**
+ * Recover failure of one data block for PAR1.
+ *
+ * Starting from the equation:
+ *
+ * Pd = Dx
+ *
+ * and solving we get:
+ *
+ * Dx = Pd
+ */
+void raid_rec1of1(int *id, int nd, size_t size, void **v)
+{
+ void *p;
+ void *pa;
+
+ /* for PAR1 we can directly compute the missing block */
+ /* and we don't need to use the zero buffer */
+ p = v[nd];
+ pa = v[id[0]];
+
+ /* use the parity as missing data block */
+ v[id[0]] = p;
+
+ /* compute the parity over the missing data block */
+ v[nd] = pa;
+
+ /* compute */
+ raid_gen(nd, 1, size, v);
+
+ /* restore as before */
+ v[id[0]] = pa;
+ v[nd] = p;
+}
+
+/**
+ * Recover failure of two data blocks for PAR2.
+ *
+ * Starting from the equations:
+ *
+ * Pd = Dx + Dy
+ * Qd = 2^id[0] * Dx + 2^id[1] * Dy
+ *
+ * and solving we get:
+ *
+ * 1 2^(-id[0])
+ * Dy = ------------------- * Pd + ------------------- * Qd
+ * 2^(id[1]-id[0]) + 1 2^(id[1]-id[0]) + 1
+ *
+ * Dx = Dy + Pd
+ *
+ * with conditions:
+ *
+ * 2^id[0] != 0
+ * 2^(id[1]-id[0]) + 1 != 0
+ *
+ * That are always satisfied for any 0<=id[0]<id[1]<255.
+ */
+void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ size_t i;
+ uint8_t *p;
+ uint8_t *pa;
+ uint8_t *q;
+ uint8_t *qa;
+ const uint8_t *T[2];
+
+ /* get multiplication tables */
+ T[0] = table(inv(pow2(id[1] - id[0]) ^ 1));
+ T[1] = table(inv(pow2(id[0]) ^ pow2(id[1])));
+
+ /* compute delta parity */
+ raid_delta_gen(2, id, ip, nd, size, vv);
+
+ p = v[nd];
+ q = v[nd + 1];
+ pa = v[id[0]];
+ qa = v[id[1]];
+
+ for (i = 0; i < size; ++i) {
+ /* delta */
+ uint8_t Pd = p[i] ^ pa[i];
+ uint8_t Qd = q[i] ^ qa[i];
+
+ /* reconstruct */
+ uint8_t Dy = T[0][Pd] ^ T[1][Qd];
+ uint8_t Dx = Pd ^ Dy;
+
+ /* set */
+ pa[i] = Dx;
+ qa[i] = Dy;
+ }
+}
+
+/*
+ * Forwarders for data recovery.
+ *
+ * These functions recover data blocks using the specified parity
+ * to recompute the missing data.
+ *
+ * Note that the format of vectors @id/@ip is different than raid_rec().
+ * For example, in the vector @ip the first parity is represented with the
+ * value 0 and not @nd.
+ *
+ * @nr Number of failed data blocks to recover.
+ * @id[] Vector of @nr indexes of the data blocks to recover.
+ * The indexes start from 0. They must be in order.
+ * @ip[] Vector of @nr indexes of the parity blocks to use in the recovering.
+ * The indexes start from 0. They must be in order.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multipler of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @np) elements. The starting elements are the blocks
+ * for data, following with the parity blocks.
+ * Each block has @size bytes.
+ */
+void (*raid_rec_ptr[RAID_PARITY_MAX])(
+ int nr, int *id, int *ip, int nd, size_t size, void **vv);
+
+void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v)
+{
+ int nrd; /* number of data blocks to recover */
+ int nrp; /* number of parity blocks to recover */
+
+ /* enforce limit on size */
+ BUG_ON(size % 64 != 0);
+
+ /* enforce limit on number of failures */
+ BUG_ON(nr > np);
+ BUG_ON(np > RAID_PARITY_MAX);
+
+ /* enforce order in index vector */
+ BUG_ON(nr >= 2 && ir[0] >= ir[1]);
+ BUG_ON(nr >= 3 && ir[1] >= ir[2]);
+ BUG_ON(nr >= 4 && ir[2] >= ir[3]);
+ BUG_ON(nr >= 5 && ir[3] >= ir[4]);
+ BUG_ON(nr >= 6 && ir[4] >= ir[5]);
+
+ /* enforce limit on index vector */
+ BUG_ON(nr > 0 && ir[nr-1] >= nd + np);
+
+ /* count the number of data blocks to recover */
+ nrd = 0;
+ while (nrd < nr && ir[nrd] < nd)
+ ++nrd;
+
+ /* all the remaining are parity */
+ nrp = nr - nrd;
+
+ /* enforce limit on number of failures */
+ BUG_ON(nrd > nd);
+ BUG_ON(nrp > np);
+
+ /* if failed data is present */
+ if (nrd != 0) {
+ int ip[RAID_PARITY_MAX];
+ int i, j, k;
+
+ /* setup the vector of parities to use */
+ for (i = 0, j = 0, k = 0; i < np; ++i) {
+ if (j < nrp && ir[nrd + j] == nd + i) {
+ /* this parity has to be recovered */
+ ++j;
+ } else {
+ /* this parity is used for recovering */
+ ip[k] = i;
+ ++k;
+ }
+ }
+
+ /* recover the nrd data blocks specified in ir[], */
+ /* using the first nrd parity in ip[] for recovering */
+ raid_rec_ptr[nrd - 1](nrd, ir, ip, nd, size, v);
+ }
+
+ /* recompute all the parities up to the last bad one */
+ if (nrp != 0)
+ raid_gen(nd, ir[nr - 1] - nd + 1, size, v);
+}
+
+void raid_data(int nr, int *id, int *ip, int nd, size_t size, void **v)
+{
+ /* enforce limit on size */
+ BUG_ON(size % 64 != 0);
+
+ /* enforce limit on number of failures */
+ BUG_ON(nr > nd);
+ BUG_ON(nr > RAID_PARITY_MAX);
+
+ /* enforce order in index vector for data */
+ BUG_ON(nr >= 2 && id[0] >= id[1]);
+ BUG_ON(nr >= 3 && id[1] >= id[2]);
+ BUG_ON(nr >= 4 && id[2] >= id[3]);
+ BUG_ON(nr >= 5 && id[3] >= id[4]);
+ BUG_ON(nr >= 6 && id[4] >= id[5]);
+
+ /* enforce limit on index vector for data */
+ BUG_ON(nr > 0 && id[nr-1] >= nd);
+
+ /* enforce order in index vector for parity */
+ BUG_ON(nr >= 2 && ip[0] >= ip[1]);
+ BUG_ON(nr >= 3 && ip[1] >= ip[2]);
+ BUG_ON(nr >= 4 && ip[2] >= ip[3]);
+ BUG_ON(nr >= 5 && ip[3] >= ip[4]);
+ BUG_ON(nr >= 6 && ip[4] >= ip[5]);
+
+ /* if failed data is present */
+ if (nr != 0)
+ raid_rec_ptr[nr - 1](nr, id, ip, nd, size, v);
+}
+
diff --git a/c_src/raid/raid.h b/c_src/raid/raid.h
new file mode 100644
index 00000000..aeeb39f3
--- /dev/null
+++ b/c_src/raid/raid.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_H
+#define __RAID_H
+
+/**
+ * RAID mode supporting up to 6 parities.
+ *
+ * It requires SSSE3 to get good performance with triple or more parities.
+ *
+ * This is the default mode set after calling raid_init().
+ */
+#define RAID_MODE_CAUCHY 0
+
+/**
+ * RAID mode supporting up to 3 parities,
+ *
+ * It has a fast triple parity implementation without SSSE3, but it cannot
+ * go beyond triple parity.
+ *
+ * This is mostly intended for low end CPUs like ARM and AMD Athlon.
+ */
+#define RAID_MODE_VANDERMONDE 1
+
+/**
+ * Maximum number of parity disks supported.
+ */
+#define RAID_PARITY_MAX 6
+
+/**
+ * Maximum number of data disks supported.
+ */
+#define RAID_DATA_MAX 251
+
+/**
+ * Initializes the RAID system.
+ *
+ * You must call this function before any other.
+ *
+ * The RAID system is initialized in the RAID_MODE_CAUCHY mode.
+ */
+void raid_init(void);
+
+/**
+ * Runs a basic functionality self test.
+ *
+ * The test is immediate, and it's intended to be run at application
+ * startup to check the integrity of the RAID system.
+ *
+ * It returns 0 on success.
+ */
+int raid_selftest(void);
+
+/**
+ * Sets the mode to use. One of RAID_MODE_*.
+ *
+ * You can change mode at any time, and it will affect next calls to raid_gen(),
+ * raid_rec() and raid_data().
+ *
+ * The two modes are compatible for the first two levels of parity.
+ * The third one is different.
+ */
+void raid_mode(int mode);
+
+/**
+ * Sets the zero buffer to use in recovering.
+ *
+ * Before calling raid_rec() and raid_data() you must provide a memory
+ * buffer filled with zero with the same size of the blocks to recover.
+ *
+ * This buffer is only read and never written.
+ */
+void raid_zero(void *zero);
+
+/**
+ * Computes parity blocks.
+ *
+ * This function computes the specified number of parity blocks of the
+ * provided set of data blocks.
+ *
+ * Each parity block allows to recover one data block.
+ *
+ * @nd Number of data blocks.
+ * @np Number of parities blocks to compute.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @np) elements. The starting elements are the blocks for
+ * data, following with the parity blocks.
+ * Data blocks are only read and not modified. Parity blocks are written.
+ * Each block has @size bytes.
+ */
+void raid_gen(int nd, int np, size_t size, void **v);
+
+/**
+ * Recovers failures in data and parity blocks.
+ *
+ * This function recovers all the data and parity blocks marked as bad
+ * in the @ir vector.
+ *
+ * Ensure to have @nr <= @np, otherwise recovering is not possible.
+ *
+ * The parities blocks used for recovering are automatically selected from
+ * the ones NOT present in the @ir vector.
+ *
+ * In case there are more parity blocks than needed, the parities at lower
+ * indexes are used in the recovering, and the others are ignored.
+ *
+ * Note that no internal integrity check is done when recovering. If the
+ * provided parities are correct, the resulting data will be correct.
+ * If parities are wrong, the resulting recovered data will be wrong.
+ * This happens even in the case you have more parities blocks than needed,
+ * and some form of integrity verification would be possible.
+ *
+ * @nr Number of failed data and parity blocks to recover.
+ * @ir[] Vector of @nr indexes of the failed data and parity blocks.
+ * The indexes start from 0. They must be in order.
+ * The first parity is represented with value @nd, the second with value
+ * @nd + 1, just like positions in the @v vector.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @np) elements. The starting elements are the blocks
+ * for data, following with the parity blocks.
+ * Each block has @size bytes.
+ */
+void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v);
+
+/**
+ * Recovers failures in data blocks only.
+ *
+ * This function recovers all the data blocks marked as bad in the @id vector.
+ * The parity blocks are not modified.
+ *
+ * @nr Number of failed data blocks to recover.
+ * @id[] Vector of @nr indexes of the data blocks to recover.
+ * The indexes start from 0. They must be in order.
+ * @ip[] Vector of @nr indexes of the parity blocks to use for recovering.
+ * The indexes start from 0. They must be in order.
+ * @nd Number of data blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @ip[@nr - 1] + 1) elements. The starting elements are the
+ * blocks for data, following with the parity blocks.
+ * Each blocks has @size bytes.
+ */
+void raid_data(int nr, int *id, int *ip, int nd, size_t size, void **v);
+
+/**
+ * Check the provided failed blocks combination.
+ *
+ * This function checks if the specified failed blocks combination satisfies
+ * the redundancy information. A combination is assumed matching, if the
+ * remaining valid parity is matching the expected value after recovering.
+ *
+ * The number of failed blocks @nr must be strictly less than the number of
+ * parities @np, because you need one more parity to validate the recovering.
+ *
+ * No data or parity blocks are modified.
+ *
+ * @nr Number of failed data and parity blocks.
+ * @ir[] Vector of @nr indexes of the failed data and parity blocks.
+ * The indexes start from 0. They must be in order.
+ * The first parity is represented with value @nd, the second with value
+ * @nd + 1, just like positions in the @v vector.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @np) elements. The starting elements are the blocks
+ * for data, following with the parity blocks.
+ * Each block has @size bytes.
+ * @return 0 if the check is satisfied. -1 otherwise.
+ */
+int raid_check(int nr, int *ir, int nd, int np, size_t size, void **v);
+
+/**
+ * Scan for failed blocks.
+ *
+ * This function identifies the failed data and parity blocks using the
+ * available redundancy.
+ *
+ * It uses a brute force method, and then the call can be expansive.
+ * The expected execution time is proportional at the binomial coefficient
+ * @np + @nd choose @np - 1, usually written as:
+ *
+ * ( @np + @nd )
+ * ( )
+ * ( @np - 1 )
+ *
+ * No data or parity blocks are modified.
+ *
+ * The failed block indexes are returned in the @ir vector.
+ * It must have space for at least @np - 1 values.
+ *
+ * The returned @ir vector can then be used in a raid_rec() call to recover
+ * the failed data and parity blocks.
+ *
+ * @ir[] Vector filled with the indexes of the failed data and parity blocks.
+ * The indexes start from 0 and they are in order.
+ * The first parity is represented with value @nd, the second with value
+ * @nd + 1, just like positions in the @v vector.
+ * @nd Number of data blocks.
+ * @np Number of parity blocks.
+ * @size Size of the blocks pointed by @v. It must be a multiplier of 64.
+ * @v Vector of pointers to the blocks of data and parity.
+ * It has (@nd + @np) elements. The starting elements are the blocks
+ * for data, following with the parity blocks.
+ * Each block has @size bytes.
+ * @return Number of block indexes returned in the @ir vector.
+ * 0 if no error is detected.
+ * -1 if it's not possible to identify the failed disks.
+ */
+int raid_scan(int *ir, int nd, int np, size_t size, void **v);
+
+#endif
+
diff --git a/c_src/raid/tables.c b/c_src/raid/tables.c
new file mode 100644
index 00000000..49035022
--- /dev/null
+++ b/c_src/raid/tables.c
@@ -0,0 +1,14696 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+const uint8_t __aligned(256) raid_gfmul[256][256] =
+{
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ },
+ {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ },
+ {
+ 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
+ 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+ 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e,
+ 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e,
+ 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+ 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e,
+ 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+ 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae,
+ 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+ 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce,
+ 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+ 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee,
+ 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13,
+ 0x0d, 0x0f, 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03,
+ 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33,
+ 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23,
+ 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53,
+ 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43,
+ 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73,
+ 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63,
+ 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93,
+ 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83,
+ 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3,
+ 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3,
+ 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3,
+ 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3,
+ 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3,
+ 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3,
+ },
+ {
+ 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09,
+ 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+ 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39,
+ 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69,
+ 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+ 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59,
+ 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+ 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9,
+ 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+ 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9,
+ 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+ 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9,
+ 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99,
+ 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+ 0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94,
+ 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c,
+ 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4,
+ 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc,
+ 0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4,
+ 0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec,
+ 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4,
+ 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc,
+ 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54,
+ 0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c,
+ 0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64,
+ 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c,
+ 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34,
+ 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c,
+ 0x0d, 0x0e, 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04,
+ 0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c,
+ },
+ {
+ 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
+ 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c,
+ 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c,
+ 0x60, 0x64, 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c,
+ 0x80, 0x84, 0x88, 0x8c, 0x90, 0x94, 0x98, 0x9c,
+ 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, 0xb8, 0xbc,
+ 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc,
+ 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc,
+ 0x1d, 0x19, 0x15, 0x11, 0x0d, 0x09, 0x05, 0x01,
+ 0x3d, 0x39, 0x35, 0x31, 0x2d, 0x29, 0x25, 0x21,
+ 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, 0x45, 0x41,
+ 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61,
+ 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81,
+ 0xbd, 0xb9, 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1,
+ 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1,
+ 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1,
+ 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26,
+ 0x1a, 0x1e, 0x12, 0x16, 0x0a, 0x0e, 0x02, 0x06,
+ 0x7a, 0x7e, 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66,
+ 0x5a, 0x5e, 0x52, 0x56, 0x4a, 0x4e, 0x42, 0x46,
+ 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, 0xa2, 0xa6,
+ 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86,
+ 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6,
+ 0xda, 0xde, 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6,
+ 0x27, 0x23, 0x2f, 0x2b, 0x37, 0x33, 0x3f, 0x3b,
+ 0x07, 0x03, 0x0f, 0x0b, 0x17, 0x13, 0x1f, 0x1b,
+ 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b,
+ 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b,
+ 0xa7, 0xa3, 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb,
+ 0x87, 0x83, 0x8f, 0x8b, 0x97, 0x93, 0x9f, 0x9b,
+ 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, 0xff, 0xfb,
+ 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb,
+ },
+ {
+ 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b,
+ 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33,
+ 0x50, 0x55, 0x5a, 0x5f, 0x44, 0x41, 0x4e, 0x4b,
+ 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, 0x66, 0x63,
+ 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb,
+ 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93,
+ 0xf0, 0xf5, 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb,
+ 0xd8, 0xdd, 0xd2, 0xd7, 0xcc, 0xc9, 0xc6, 0xc3,
+ 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, 0x43, 0x46,
+ 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e,
+ 0x0d, 0x08, 0x07, 0x02, 0x19, 0x1c, 0x13, 0x16,
+ 0x25, 0x20, 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e,
+ 0xfd, 0xf8, 0xf7, 0xf2, 0xe9, 0xec, 0xe3, 0xe6,
+ 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, 0xcb, 0xce,
+ 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6,
+ 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e,
+ 0xba, 0xbf, 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1,
+ 0x92, 0x97, 0x98, 0x9d, 0x86, 0x83, 0x8c, 0x89,
+ 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, 0xf4, 0xf1,
+ 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9,
+ 0x1a, 0x1f, 0x10, 0x15, 0x0e, 0x0b, 0x04, 0x01,
+ 0x32, 0x37, 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29,
+ 0x4a, 0x4f, 0x40, 0x45, 0x5e, 0x5b, 0x54, 0x51,
+ 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, 0x7c, 0x79,
+ 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc,
+ 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4,
+ 0xb7, 0xb2, 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac,
+ 0x9f, 0x9a, 0x95, 0x90, 0x8b, 0x8e, 0x81, 0x84,
+ 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, 0x59, 0x5c,
+ 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74,
+ 0x17, 0x12, 0x1d, 0x18, 0x03, 0x06, 0x09, 0x0c,
+ 0x3f, 0x3a, 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24,
+ },
+ {
+ 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12,
+ 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22,
+ 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72,
+ 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42,
+ 0xc0, 0xc6, 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2,
+ 0xf0, 0xf6, 0xfc, 0xfa, 0xe8, 0xee, 0xe4, 0xe2,
+ 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, 0xb4, 0xb2,
+ 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82,
+ 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f,
+ 0xad, 0xab, 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf,
+ 0xfd, 0xfb, 0xf1, 0xf7, 0xe5, 0xe3, 0xe9, 0xef,
+ 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, 0xd9, 0xdf,
+ 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f,
+ 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f,
+ 0x3d, 0x3b, 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f,
+ 0x0d, 0x0b, 0x01, 0x07, 0x15, 0x13, 0x19, 0x1f,
+ 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, 0x33, 0x35,
+ 0x17, 0x11, 0x1b, 0x1d, 0x0f, 0x09, 0x03, 0x05,
+ 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55,
+ 0x77, 0x71, 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65,
+ 0xe7, 0xe1, 0xeb, 0xed, 0xff, 0xf9, 0xf3, 0xf5,
+ 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, 0xc3, 0xc5,
+ 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95,
+ 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5,
+ 0xba, 0xbc, 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8,
+ 0x8a, 0x8c, 0x86, 0x80, 0x92, 0x94, 0x9e, 0x98,
+ 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, 0xce, 0xc8,
+ 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8,
+ 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68,
+ 0x4a, 0x4c, 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58,
+ 0x1a, 0x1c, 0x16, 0x10, 0x02, 0x04, 0x0e, 0x08,
+ 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, 0x3e, 0x38,
+ },
+ {
+ 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15,
+ 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d,
+ 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65,
+ 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d,
+ 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5,
+ 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd,
+ 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85,
+ 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd,
+ 0xdd, 0xda, 0xd3, 0xd4, 0xc1, 0xc6, 0xcf, 0xc8,
+ 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, 0xf7, 0xf0,
+ 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8,
+ 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80,
+ 0x3d, 0x3a, 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28,
+ 0x05, 0x02, 0x0b, 0x0c, 0x19, 0x1e, 0x17, 0x10,
+ 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, 0x5f, 0x58,
+ 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60,
+ 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2,
+ 0x9f, 0x98, 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a,
+ 0xd7, 0xd0, 0xd9, 0xde, 0xcb, 0xcc, 0xc5, 0xc2,
+ 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, 0xfd, 0xfa,
+ 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52,
+ 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a,
+ 0x37, 0x30, 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22,
+ 0x0f, 0x08, 0x01, 0x06, 0x13, 0x14, 0x1d, 0x1a,
+ 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, 0x68, 0x6f,
+ 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57,
+ 0x0a, 0x0d, 0x04, 0x03, 0x16, 0x11, 0x18, 0x1f,
+ 0x32, 0x35, 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27,
+ 0x9a, 0x9d, 0x94, 0x93, 0x86, 0x81, 0x88, 0x8f,
+ 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, 0xb0, 0xb7,
+ 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff,
+ 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7,
+ },
+ {
+ 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38,
+ 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78,
+ 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8,
+ 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8,
+ 0x1d, 0x15, 0x0d, 0x05, 0x3d, 0x35, 0x2d, 0x25,
+ 0x5d, 0x55, 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65,
+ 0x9d, 0x95, 0x8d, 0x85, 0xbd, 0xb5, 0xad, 0xa5,
+ 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, 0xed, 0xe5,
+ 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0x0a, 0x02,
+ 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42,
+ 0xba, 0xb2, 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82,
+ 0xfa, 0xf2, 0xea, 0xe2, 0xda, 0xd2, 0xca, 0xc2,
+ 0x27, 0x2f, 0x37, 0x3f, 0x07, 0x0f, 0x17, 0x1f,
+ 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f,
+ 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f,
+ 0xe7, 0xef, 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf,
+ 0x74, 0x7c, 0x64, 0x6c, 0x54, 0x5c, 0x44, 0x4c,
+ 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, 0x04, 0x0c,
+ 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc,
+ 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c,
+ 0x69, 0x61, 0x79, 0x71, 0x49, 0x41, 0x59, 0x51,
+ 0x29, 0x21, 0x39, 0x31, 0x09, 0x01, 0x19, 0x11,
+ 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, 0xd9, 0xd1,
+ 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91,
+ 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76,
+ 0x0e, 0x06, 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36,
+ 0xce, 0xc6, 0xde, 0xd6, 0xee, 0xe6, 0xfe, 0xf6,
+ 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, 0xbe, 0xb6,
+ 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b,
+ 0x13, 0x1b, 0x03, 0x0b, 0x33, 0x3b, 0x23, 0x2b,
+ 0xd3, 0xdb, 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb,
+ 0x93, 0x9b, 0x83, 0x8b, 0xb3, 0xbb, 0xa3, 0xab,
+ },
+ {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
+ 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf,
+ 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+ 0x3d, 0x34, 0x2f, 0x26, 0x19, 0x10, 0x0b, 0x02,
+ 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, 0x43, 0x4a,
+ 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92,
+ 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda,
+ 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
+ 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
+ 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, 0xdc, 0xd5,
+ 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78,
+ 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+ 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8,
+ 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+ 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb,
+ 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83,
+ 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
+ 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
+ 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, 0xff, 0xf6,
+ 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe,
+ 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66,
+ 0x11, 0x18, 0x03, 0x0a, 0x35, 0x3c, 0x27, 0x2e,
+ 0x8e, 0x87, 0x9c, 0x95, 0xaa, 0xa3, 0xb8, 0xb1,
+ 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, 0xf0, 0xf9,
+ 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
+ 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
+ 0xb3, 0xba, 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c,
+ 0xfb, 0xf2, 0xe9, 0xe0, 0xdf, 0xd6, 0xcd, 0xc4,
+ 0x23, 0x2a, 0x31, 0x38, 0x07, 0x0e, 0x15, 0x1c,
+ 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54,
+ },
+ {
+ 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36,
+ 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66,
+ 0xa0, 0xaa, 0xb4, 0xbe, 0x88, 0x82, 0x9c, 0x96,
+ 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, 0xcc, 0xc6,
+ 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b,
+ 0x0d, 0x07, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b,
+ 0xfd, 0xf7, 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb,
+ 0xad, 0xa7, 0xb9, 0xb3, 0x85, 0x8f, 0x91, 0x9b,
+ 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, 0x86, 0x8c,
+ 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc,
+ 0x1a, 0x10, 0x0e, 0x04, 0x32, 0x38, 0x26, 0x2c,
+ 0x4a, 0x40, 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c,
+ 0xe7, 0xed, 0xf3, 0xf9, 0xcf, 0xc5, 0xdb, 0xd1,
+ 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, 0x8b, 0x81,
+ 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71,
+ 0x17, 0x1d, 0x03, 0x09, 0x3f, 0x35, 0x2b, 0x21,
+ 0x69, 0x63, 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f,
+ 0x39, 0x33, 0x2d, 0x27, 0x11, 0x1b, 0x05, 0x0f,
+ 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, 0xf5, 0xff,
+ 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf,
+ 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x08, 0x02,
+ 0x64, 0x6e, 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52,
+ 0x94, 0x9e, 0x80, 0x8a, 0xbc, 0xb6, 0xa8, 0xa2,
+ 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, 0xf8, 0xf2,
+ 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5,
+ 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5,
+ 0x73, 0x79, 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45,
+ 0x23, 0x29, 0x37, 0x3d, 0x0b, 0x01, 0x1f, 0x15,
+ 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, 0xb2, 0xb8,
+ 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8,
+ 0x2e, 0x24, 0x3a, 0x30, 0x06, 0x0c, 0x12, 0x18,
+ 0x7e, 0x74, 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48,
+ },
+ {
+ 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31,
+ 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+ 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81,
+ 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+ 0x7d, 0x76, 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c,
+ 0x25, 0x2e, 0x33, 0x38, 0x09, 0x02, 0x1f, 0x14,
+ 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, 0xf7, 0xfc,
+ 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4,
+ 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb,
+ 0xa2, 0xa9, 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93,
+ 0x4a, 0x41, 0x5c, 0x57, 0x66, 0x6d, 0x70, 0x7b,
+ 0x12, 0x19, 0x04, 0x0f, 0x3e, 0x35, 0x28, 0x23,
+ 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6,
+ 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee,
+ 0x37, 0x3c, 0x21, 0x2a, 0x1b, 0x10, 0x0d, 0x06,
+ 0x6f, 0x64, 0x79, 0x72, 0x43, 0x48, 0x55, 0x5e,
+ 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+ 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80,
+ 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+ 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30,
+ 0x94, 0x9f, 0x82, 0x89, 0xb8, 0xb3, 0xae, 0xa5,
+ 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, 0xf6, 0xfd,
+ 0x24, 0x2f, 0x32, 0x39, 0x08, 0x03, 0x1e, 0x15,
+ 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d,
+ 0x13, 0x18, 0x05, 0x0e, 0x3f, 0x34, 0x29, 0x22,
+ 0x4b, 0x40, 0x5d, 0x56, 0x67, 0x6c, 0x71, 0x7a,
+ 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, 0x99, 0x92,
+ 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca,
+ 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f,
+ 0x36, 0x3d, 0x20, 0x2b, 0x1a, 0x11, 0x0c, 0x07,
+ 0xde, 0xd5, 0xc8, 0xc3, 0xf2, 0xf9, 0xe4, 0xef,
+ 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, 0xbc, 0xb7,
+ },
+ {
+ 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24,
+ 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44,
+ 0xc0, 0xcc, 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4,
+ 0xa0, 0xac, 0xb8, 0xb4, 0x90, 0x9c, 0x88, 0x84,
+ 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, 0xb5, 0xb9,
+ 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9,
+ 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79,
+ 0x3d, 0x31, 0x25, 0x29, 0x0d, 0x01, 0x15, 0x19,
+ 0x27, 0x2b, 0x3f, 0x33, 0x17, 0x1b, 0x0f, 0x03,
+ 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, 0x6f, 0x63,
+ 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3,
+ 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3,
+ 0xba, 0xb6, 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e,
+ 0xda, 0xd6, 0xc2, 0xce, 0xea, 0xe6, 0xf2, 0xfe,
+ 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, 0x52, 0x5e,
+ 0x1a, 0x16, 0x02, 0x0e, 0x2a, 0x26, 0x32, 0x3e,
+ 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a,
+ 0x2e, 0x22, 0x36, 0x3a, 0x1e, 0x12, 0x06, 0x0a,
+ 0x8e, 0x82, 0x96, 0x9a, 0xbe, 0xb2, 0xa6, 0xaa,
+ 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, 0xc6, 0xca,
+ 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7,
+ 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97,
+ 0x13, 0x1f, 0x0b, 0x07, 0x23, 0x2f, 0x3b, 0x37,
+ 0x73, 0x7f, 0x6b, 0x67, 0x43, 0x4f, 0x5b, 0x57,
+ 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, 0x41, 0x4d,
+ 0x09, 0x05, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d,
+ 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d,
+ 0xc9, 0xc5, 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed,
+ 0xf4, 0xf8, 0xec, 0xe0, 0xc4, 0xc8, 0xdc, 0xd0,
+ 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, 0xbc, 0xb0,
+ 0x34, 0x38, 0x2c, 0x20, 0x04, 0x08, 0x1c, 0x10,
+ 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70,
+ },
+ {
+ 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23,
+ 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+ 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3,
+ 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e,
+ 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+ 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e,
+ 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+ 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44,
+ 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+ 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94,
+ 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+ 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9,
+ 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29,
+ 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+ 0xce, 0xc3, 0xd4, 0xd9, 0xfa, 0xf7, 0xe0, 0xed,
+ 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, 0x88, 0x85,
+ 0x1e, 0x13, 0x04, 0x09, 0x2a, 0x27, 0x30, 0x3d,
+ 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55,
+ 0x73, 0x7e, 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50,
+ 0x1b, 0x16, 0x01, 0x0c, 0x2f, 0x22, 0x35, 0x38,
+ 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, 0x8d, 0x80,
+ 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8,
+ 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a,
+ 0xc1, 0xcc, 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2,
+ 0x79, 0x74, 0x63, 0x6e, 0x4d, 0x40, 0x57, 0x5a,
+ 0x11, 0x1c, 0x0b, 0x06, 0x25, 0x28, 0x3f, 0x32,
+ 0x14, 0x19, 0x0e, 0x03, 0x20, 0x2d, 0x3a, 0x37,
+ 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f,
+ 0xc4, 0xc9, 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7,
+ 0xac, 0xa1, 0xb6, 0xbb, 0x98, 0x95, 0x82, 0x8f,
+ },
+ {
+ 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a,
+ 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca,
+ 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+ 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+ 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87,
+ 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67,
+ 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
+ 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd,
+ 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+ 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50,
+ 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+ 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0,
+ 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+ 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79,
+ 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x07, 0x09,
+ 0xb3, 0xbd, 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99,
+ 0xc3, 0xcd, 0xdf, 0xd1, 0xfb, 0xf5, 0xe7, 0xe9,
+ 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, 0xaa, 0xa4,
+ 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4,
+ 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44,
+ 0x1e, 0x10, 0x02, 0x0c, 0x26, 0x28, 0x3a, 0x34,
+ 0xf4, 0xfa, 0xe8, 0xe6, 0xcc, 0xc2, 0xd0, 0xde,
+ 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, 0xa0, 0xae,
+ 0x14, 0x1a, 0x08, 0x06, 0x2c, 0x22, 0x30, 0x3e,
+ 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e,
+ 0x29, 0x27, 0x35, 0x3b, 0x11, 0x1f, 0x0d, 0x03,
+ 0x59, 0x57, 0x45, 0x4b, 0x61, 0x6f, 0x7d, 0x73,
+ 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, 0xed, 0xe3,
+ 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93,
+ },
+ {
+ 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d,
+ 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55,
+ 0xf0, 0xff, 0xee, 0xe1, 0xcc, 0xc3, 0xd2, 0xdd,
+ 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, 0xaa, 0xa5,
+ 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0,
+ 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8,
+ 0x0d, 0x02, 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20,
+ 0x75, 0x7a, 0x6b, 0x64, 0x49, 0x46, 0x57, 0x58,
+ 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, 0xc5, 0xca,
+ 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2,
+ 0x17, 0x18, 0x09, 0x06, 0x2b, 0x24, 0x35, 0x3a,
+ 0x6f, 0x60, 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42,
+ 0x1a, 0x15, 0x04, 0x0b, 0x26, 0x29, 0x38, 0x37,
+ 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, 0x40, 0x4f,
+ 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7,
+ 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf,
+ 0xd3, 0xdc, 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe,
+ 0xab, 0xa4, 0xb5, 0xba, 0x97, 0x98, 0x89, 0x86,
+ 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, 0x01, 0x0e,
+ 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76,
+ 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0x0c, 0x03,
+ 0x56, 0x59, 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b,
+ 0xde, 0xd1, 0xc0, 0xcf, 0xe2, 0xed, 0xfc, 0xf3,
+ 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, 0x84, 0x8b,
+ 0x34, 0x3b, 0x2a, 0x25, 0x08, 0x07, 0x16, 0x19,
+ 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61,
+ 0xc4, 0xcb, 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9,
+ 0xbc, 0xb3, 0xa2, 0xad, 0x80, 0x8f, 0x9e, 0x91,
+ 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, 0xeb, 0xe4,
+ 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c,
+ 0x39, 0x36, 0x27, 0x28, 0x05, 0x0a, 0x1b, 0x14,
+ 0x41, 0x4e, 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c,
+ },
+ {
+ 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0,
+ 0x1d, 0x0d, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d,
+ 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed,
+ 0x3a, 0x2a, 0x1a, 0x0a, 0x7a, 0x6a, 0x5a, 0x4a,
+ 0xba, 0xaa, 0x9a, 0x8a, 0xfa, 0xea, 0xda, 0xca,
+ 0x27, 0x37, 0x07, 0x17, 0x67, 0x77, 0x47, 0x57,
+ 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7,
+ 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04,
+ 0xf4, 0xe4, 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84,
+ 0x69, 0x79, 0x49, 0x59, 0x29, 0x39, 0x09, 0x19,
+ 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, 0x89, 0x99,
+ 0x4e, 0x5e, 0x6e, 0x7e, 0x0e, 0x1e, 0x2e, 0x3e,
+ 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe,
+ 0x53, 0x43, 0x73, 0x63, 0x13, 0x03, 0x33, 0x23,
+ 0xd3, 0xc3, 0xf3, 0xe3, 0x93, 0x83, 0xb3, 0xa3,
+ 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98,
+ 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x08, 0x18,
+ 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85,
+ 0x75, 0x65, 0x55, 0x45, 0x35, 0x25, 0x15, 0x05,
+ 0xd2, 0xc2, 0xf2, 0xe2, 0x92, 0x82, 0xb2, 0xa2,
+ 0x52, 0x42, 0x72, 0x62, 0x12, 0x02, 0x32, 0x22,
+ 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf,
+ 0x4f, 0x5f, 0x6f, 0x7f, 0x0f, 0x1f, 0x2f, 0x3f,
+ 0x9c, 0x8c, 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec,
+ 0x1c, 0x0c, 0x3c, 0x2c, 0x5c, 0x4c, 0x7c, 0x6c,
+ 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, 0xe1, 0xf1,
+ 0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
+ 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6,
+ 0x26, 0x36, 0x06, 0x16, 0x66, 0x76, 0x46, 0x56,
+ 0xbb, 0xab, 0x9b, 0x8b, 0xfb, 0xeb, 0xdb, 0xcb,
+ 0x3b, 0x2b, 0x1b, 0x0b, 0x7b, 0x6b, 0x5b, 0x4b,
+ },
+ {
+ 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
+ 0x0d, 0x1c, 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a,
+ 0x85, 0x94, 0xa7, 0xb6, 0xc1, 0xd0, 0xe3, 0xf2,
+ 0x1a, 0x0b, 0x38, 0x29, 0x5e, 0x4f, 0x7c, 0x6d,
+ 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5,
+ 0x17, 0x06, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60,
+ 0x9f, 0x8e, 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8,
+ 0x34, 0x25, 0x16, 0x07, 0x70, 0x61, 0x52, 0x43,
+ 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, 0xda, 0xcb,
+ 0x39, 0x28, 0x1b, 0x0a, 0x7d, 0x6c, 0x5f, 0x4e,
+ 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6,
+ 0x2e, 0x3f, 0x0c, 0x1d, 0x6a, 0x7b, 0x48, 0x59,
+ 0xa6, 0xb7, 0x84, 0x95, 0xe2, 0xf3, 0xc0, 0xd1,
+ 0x23, 0x32, 0x01, 0x10, 0x67, 0x76, 0x45, 0x54,
+ 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc,
+ 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0x0e, 0x1f,
+ 0xe0, 0xf1, 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97,
+ 0x65, 0x74, 0x47, 0x56, 0x21, 0x30, 0x03, 0x12,
+ 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, 0x8b, 0x9a,
+ 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x05,
+ 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d,
+ 0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x08,
+ 0xf7, 0xe6, 0xd5, 0xc4, 0xb3, 0xa2, 0x91, 0x80,
+ 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x09, 0x3a, 0x2b,
+ 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3,
+ 0x51, 0x40, 0x73, 0x62, 0x15, 0x04, 0x37, 0x26,
+ 0xd9, 0xc8, 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae,
+ 0x46, 0x57, 0x64, 0x75, 0x02, 0x13, 0x20, 0x31,
+ 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, 0xa8, 0xb9,
+ 0x4b, 0x5a, 0x69, 0x78, 0x0f, 0x1e, 0x2d, 0x3c,
+ 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4,
+ },
+ {
+ 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+ 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+ 0x3d, 0x2f, 0x19, 0x0b, 0x75, 0x67, 0x51, 0x43,
+ 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3,
+ 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x04,
+ 0xea, 0xf8, 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94,
+ 0x47, 0x55, 0x63, 0x71, 0x0f, 0x1d, 0x2b, 0x39,
+ 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, 0xbb, 0xa9,
+ 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+ 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+ 0xc9, 0xdb, 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7,
+ 0x59, 0x4b, 0x7d, 0x6f, 0x11, 0x03, 0x35, 0x27,
+ 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, 0xe2, 0xf0,
+ 0x1e, 0x0c, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60,
+ 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd,
+ 0x23, 0x31, 0x07, 0x15, 0x6b, 0x79, 0x4f, 0x5d,
+ 0xf5, 0xe7, 0xd1, 0xc3, 0xbd, 0xaf, 0x99, 0x8b,
+ 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, 0x09, 0x1b,
+ 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+ 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+ 0x8f, 0x9d, 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1,
+ 0x1f, 0x0d, 0x3b, 0x29, 0x57, 0x45, 0x73, 0x61,
+ 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, 0xde, 0xcc,
+ 0x22, 0x30, 0x06, 0x14, 0x6a, 0x78, 0x4e, 0x5c,
+ 0x01, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f,
+ 0x91, 0x83, 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef,
+ 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+ 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+ 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x05,
+ 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95,
+ 0x46, 0x54, 0x62, 0x70, 0x0e, 0x1c, 0x2a, 0x38,
+ 0xd6, 0xc4, 0xf2, 0xe0, 0x9e, 0x8c, 0xba, 0xa8,
+ },
+ {
+ 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79,
+ 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1,
+ 0x2d, 0x3e, 0x0b, 0x18, 0x61, 0x72, 0x47, 0x54,
+ 0xb5, 0xa6, 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc,
+ 0x5a, 0x49, 0x7c, 0x6f, 0x16, 0x05, 0x30, 0x23,
+ 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, 0xa8, 0xbb,
+ 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0x0e,
+ 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96,
+ 0xb4, 0xa7, 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd,
+ 0x2c, 0x3f, 0x0a, 0x19, 0x60, 0x73, 0x46, 0x55,
+ 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, 0xf3, 0xe0,
+ 0x01, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78,
+ 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97,
+ 0x76, 0x65, 0x50, 0x43, 0x3a, 0x29, 0x1c, 0x0f,
+ 0xc3, 0xd0, 0xe5, 0xf6, 0x8f, 0x9c, 0xa9, 0xba,
+ 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x04, 0x31, 0x22,
+ 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0x0c,
+ 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94,
+ 0x58, 0x4b, 0x7e, 0x6d, 0x14, 0x07, 0x32, 0x21,
+ 0xc0, 0xd3, 0xe6, 0xf5, 0x8c, 0x9f, 0xaa, 0xb9,
+ 0x2f, 0x3c, 0x09, 0x1a, 0x63, 0x70, 0x45, 0x56,
+ 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce,
+ 0x02, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b,
+ 0x9a, 0x89, 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3,
+ 0xc1, 0xd2, 0xe7, 0xf4, 0x8d, 0x9e, 0xab, 0xb8,
+ 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x06, 0x33, 0x20,
+ 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95,
+ 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0x0d,
+ 0x9b, 0x88, 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2,
+ 0x03, 0x10, 0x25, 0x36, 0x4f, 0x5c, 0x69, 0x7a,
+ 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, 0xdc, 0xcf,
+ 0x2e, 0x3d, 0x08, 0x1b, 0x62, 0x71, 0x44, 0x57,
+ },
+ {
+ 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c,
+ 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc,
+ 0x5d, 0x49, 0x75, 0x61, 0x0d, 0x19, 0x25, 0x31,
+ 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, 0x85, 0x91,
+ 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6,
+ 0x1a, 0x0e, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76,
+ 0xe7, 0xf3, 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b,
+ 0x47, 0x53, 0x6f, 0x7b, 0x17, 0x03, 0x3f, 0x2b,
+ 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, 0x11, 0x05,
+ 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5,
+ 0x34, 0x20, 0x1c, 0x08, 0x64, 0x70, 0x4c, 0x58,
+ 0x94, 0x80, 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8,
+ 0xd3, 0xc7, 0xfb, 0xef, 0x83, 0x97, 0xab, 0xbf,
+ 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, 0x0b, 0x1f,
+ 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2,
+ 0x2e, 0x3a, 0x06, 0x12, 0x7e, 0x6a, 0x56, 0x42,
+ 0xd2, 0xc6, 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe,
+ 0x72, 0x66, 0x5a, 0x4e, 0x22, 0x36, 0x0a, 0x1e,
+ 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, 0xf7, 0xe3,
+ 0x2f, 0x3b, 0x07, 0x13, 0x7f, 0x6b, 0x57, 0x43,
+ 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x04,
+ 0xc8, 0xdc, 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4,
+ 0x35, 0x21, 0x1d, 0x09, 0x65, 0x71, 0x4d, 0x59,
+ 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, 0xed, 0xf9,
+ 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7,
+ 0x1b, 0x0f, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77,
+ 0xe6, 0xf2, 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a,
+ 0x46, 0x52, 0x6e, 0x7a, 0x16, 0x02, 0x3e, 0x2a,
+ 0x01, 0x15, 0x29, 0x3d, 0x51, 0x45, 0x79, 0x6d,
+ 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd,
+ 0x5c, 0x48, 0x74, 0x60, 0x0c, 0x18, 0x24, 0x30,
+ 0xfc, 0xe8, 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90,
+ },
+ {
+ 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b,
+ 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3,
+ 0x4d, 0x58, 0x67, 0x72, 0x19, 0x0c, 0x33, 0x26,
+ 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e,
+ 0x9a, 0x8f, 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1,
+ 0x32, 0x27, 0x18, 0x0d, 0x66, 0x73, 0x4c, 0x59,
+ 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, 0xa9, 0xbc,
+ 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x01, 0x14,
+ 0x29, 0x3c, 0x03, 0x16, 0x7d, 0x68, 0x57, 0x42,
+ 0x81, 0x94, 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea,
+ 0x64, 0x71, 0x4e, 0x5b, 0x30, 0x25, 0x1a, 0x0f,
+ 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, 0xb2, 0xa7,
+ 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8,
+ 0x1b, 0x0e, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70,
+ 0xfe, 0xeb, 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95,
+ 0x56, 0x43, 0x7c, 0x69, 0x02, 0x17, 0x28, 0x3d,
+ 0x52, 0x47, 0x78, 0x6d, 0x06, 0x13, 0x2c, 0x39,
+ 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91,
+ 0x1f, 0x0a, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74,
+ 0xb7, 0xa2, 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc,
+ 0xc8, 0xdd, 0xe2, 0xf7, 0x9c, 0x89, 0xb6, 0xa3,
+ 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, 0x1e, 0x0b,
+ 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee,
+ 0x2d, 0x38, 0x07, 0x12, 0x79, 0x6c, 0x53, 0x46,
+ 0x7b, 0x6e, 0x51, 0x44, 0x2f, 0x3a, 0x05, 0x10,
+ 0xd3, 0xc6, 0xf9, 0xec, 0x87, 0x92, 0xad, 0xb8,
+ 0x36, 0x23, 0x1c, 0x09, 0x62, 0x77, 0x48, 0x5d,
+ 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5,
+ 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a,
+ 0x49, 0x5c, 0x63, 0x76, 0x1d, 0x08, 0x37, 0x22,
+ 0xac, 0xb9, 0x86, 0x93, 0xf8, 0xed, 0xd2, 0xc7,
+ 0x04, 0x11, 0x2e, 0x3b, 0x50, 0x45, 0x7a, 0x6f,
+ },
+ {
+ 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62,
+ 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2,
+ 0x7d, 0x6b, 0x51, 0x47, 0x25, 0x33, 0x09, 0x1f,
+ 0xcd, 0xdb, 0xe1, 0xf7, 0x95, 0x83, 0xb9, 0xaf,
+ 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, 0x8e, 0x98,
+ 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x04, 0x3e, 0x28,
+ 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5,
+ 0x37, 0x21, 0x1b, 0x0d, 0x6f, 0x79, 0x43, 0x55,
+ 0xe9, 0xff, 0xc5, 0xd3, 0xb1, 0xa7, 0x9d, 0x8b,
+ 0x59, 0x4f, 0x75, 0x63, 0x01, 0x17, 0x2d, 0x3b,
+ 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6,
+ 0x24, 0x32, 0x08, 0x1e, 0x7c, 0x6a, 0x50, 0x46,
+ 0x13, 0x05, 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71,
+ 0xa3, 0xb5, 0x8f, 0x99, 0xfb, 0xed, 0xd7, 0xc1,
+ 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, 0x1a, 0x0c,
+ 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc,
+ 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad,
+ 0x7f, 0x69, 0x53, 0x45, 0x27, 0x31, 0x0b, 0x1d,
+ 0xb2, 0xa4, 0x9e, 0x88, 0xea, 0xfc, 0xc6, 0xd0,
+ 0x02, 0x14, 0x2e, 0x38, 0x5a, 0x4c, 0x76, 0x60,
+ 0x35, 0x23, 0x19, 0x0f, 0x6d, 0x7b, 0x41, 0x57,
+ 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7,
+ 0x48, 0x5e, 0x64, 0x72, 0x10, 0x06, 0x3c, 0x2a,
+ 0xf8, 0xee, 0xd4, 0xc2, 0xa0, 0xb6, 0x8c, 0x9a,
+ 0x26, 0x30, 0x0a, 0x1c, 0x7e, 0x68, 0x52, 0x44,
+ 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4,
+ 0x5b, 0x4d, 0x77, 0x61, 0x03, 0x15, 0x2f, 0x39,
+ 0xeb, 0xfd, 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89,
+ 0xdc, 0xca, 0xf0, 0xe6, 0x84, 0x92, 0xa8, 0xbe,
+ 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, 0x18, 0x0e,
+ 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3,
+ 0x11, 0x07, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73,
+ },
+ {
+ 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65,
+ 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd,
+ 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, 0x1f, 0x08,
+ 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0,
+ 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf,
+ 0x62, 0x75, 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x07,
+ 0xb7, 0xa0, 0x99, 0x8e, 0xeb, 0xfc, 0xc5, 0xd2,
+ 0x0f, 0x18, 0x21, 0x36, 0x53, 0x44, 0x7d, 0x6a,
+ 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc,
+ 0x11, 0x06, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74,
+ 0xc4, 0xd3, 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1,
+ 0x7c, 0x6b, 0x52, 0x45, 0x20, 0x37, 0x0e, 0x19,
+ 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, 0x01, 0x16,
+ 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae,
+ 0x1e, 0x09, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b,
+ 0xa6, 0xb1, 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3,
+ 0x4f, 0x58, 0x61, 0x76, 0x13, 0x04, 0x3d, 0x2a,
+ 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, 0x85, 0x92,
+ 0x22, 0x35, 0x0c, 0x1b, 0x7e, 0x69, 0x50, 0x47,
+ 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff,
+ 0x95, 0x82, 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0,
+ 0x2d, 0x3a, 0x03, 0x14, 0x71, 0x66, 0x5f, 0x48,
+ 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, 0x8a, 0x9d,
+ 0x40, 0x57, 0x6e, 0x79, 0x1c, 0x0b, 0x32, 0x25,
+ 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83,
+ 0x5e, 0x49, 0x70, 0x67, 0x02, 0x15, 0x2c, 0x3b,
+ 0x8b, 0x9c, 0xa5, 0xb2, 0xd7, 0xc0, 0xf9, 0xee,
+ 0x33, 0x24, 0x1d, 0x0a, 0x6f, 0x78, 0x41, 0x56,
+ 0x3c, 0x2b, 0x12, 0x05, 0x60, 0x77, 0x4e, 0x59,
+ 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1,
+ 0x51, 0x46, 0x7f, 0x68, 0x0d, 0x1a, 0x23, 0x34,
+ 0xe9, 0xfe, 0xc7, 0xd0, 0xb5, 0xa2, 0x9b, 0x8c,
+ },
+ {
+ 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48,
+ 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88,
+ 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5,
+ 0x5d, 0x45, 0x6d, 0x75, 0x3d, 0x25, 0x0d, 0x15,
+ 0x27, 0x3f, 0x17, 0x0f, 0x47, 0x5f, 0x77, 0x6f,
+ 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, 0xb7, 0xaf,
+ 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2,
+ 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x02, 0x2a, 0x32,
+ 0x4e, 0x56, 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x06,
+ 0x8e, 0x96, 0xbe, 0xa6, 0xee, 0xf6, 0xde, 0xc6,
+ 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, 0x83, 0x9b,
+ 0x13, 0x0b, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b,
+ 0x69, 0x71, 0x59, 0x41, 0x09, 0x11, 0x39, 0x21,
+ 0xa9, 0xb1, 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1,
+ 0xf4, 0xec, 0xc4, 0xdc, 0x94, 0x8c, 0xa4, 0xbc,
+ 0x34, 0x2c, 0x04, 0x1c, 0x54, 0x4c, 0x64, 0x7c,
+ 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4,
+ 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0x0c, 0x14,
+ 0x01, 0x19, 0x31, 0x29, 0x61, 0x79, 0x51, 0x49,
+ 0xc1, 0xd9, 0xf1, 0xe9, 0xa1, 0xb9, 0x91, 0x89,
+ 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, 0xeb, 0xf3,
+ 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x03, 0x2b, 0x33,
+ 0x26, 0x3e, 0x16, 0x0e, 0x46, 0x5e, 0x76, 0x6e,
+ 0xe6, 0xfe, 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae,
+ 0xd2, 0xca, 0xe2, 0xfa, 0xb2, 0xaa, 0x82, 0x9a,
+ 0x12, 0x0a, 0x22, 0x3a, 0x72, 0x6a, 0x42, 0x5a,
+ 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x07,
+ 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7,
+ 0xf5, 0xed, 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd,
+ 0x35, 0x2d, 0x05, 0x1d, 0x55, 0x4d, 0x65, 0x7d,
+ 0x68, 0x70, 0x58, 0x40, 0x08, 0x10, 0x38, 0x20,
+ 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0,
+ },
+ {
+ 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f,
+ 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87,
+ 0x8d, 0x94, 0xbf, 0xa6, 0xe9, 0xf0, 0xdb, 0xc2,
+ 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, 0x13, 0x0a,
+ 0x07, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48,
+ 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80,
+ 0x8a, 0x93, 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5,
+ 0x42, 0x5b, 0x70, 0x69, 0x26, 0x3f, 0x14, 0x0d,
+ 0x0e, 0x17, 0x3c, 0x25, 0x6a, 0x73, 0x58, 0x41,
+ 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89,
+ 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc,
+ 0x4b, 0x52, 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x04,
+ 0x09, 0x10, 0x3b, 0x22, 0x6d, 0x74, 0x5f, 0x46,
+ 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, 0x97, 0x8e,
+ 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb,
+ 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x03,
+ 0x1c, 0x05, 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53,
+ 0xd4, 0xcd, 0xe6, 0xff, 0xb0, 0xa9, 0x82, 0x9b,
+ 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, 0xc7, 0xde,
+ 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0x0f, 0x16,
+ 0x1b, 0x02, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54,
+ 0xd3, 0xca, 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c,
+ 0x96, 0x8f, 0xa4, 0xbd, 0xf2, 0xeb, 0xc0, 0xd9,
+ 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, 0x08, 0x11,
+ 0x12, 0x0b, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d,
+ 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95,
+ 0x9f, 0x86, 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0,
+ 0x57, 0x4e, 0x65, 0x7c, 0x33, 0x2a, 0x01, 0x18,
+ 0x15, 0x0c, 0x27, 0x3e, 0x71, 0x68, 0x43, 0x5a,
+ 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92,
+ 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7,
+ 0x50, 0x49, 0x62, 0x7b, 0x34, 0x2d, 0x06, 0x1f,
+ },
+ {
+ 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46,
+ 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96,
+ 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb,
+ 0x6d, 0x77, 0x59, 0x43, 0x05, 0x1f, 0x31, 0x2b,
+ 0x67, 0x7d, 0x53, 0x49, 0x0f, 0x15, 0x3b, 0x21,
+ 0xb7, 0xad, 0x83, 0x99, 0xdf, 0xc5, 0xeb, 0xf1,
+ 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, 0x86, 0x9c,
+ 0x0a, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c,
+ 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88,
+ 0x1e, 0x04, 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58,
+ 0x73, 0x69, 0x47, 0x5d, 0x1b, 0x01, 0x2f, 0x35,
+ 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, 0xff, 0xe5,
+ 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef,
+ 0x79, 0x63, 0x4d, 0x57, 0x11, 0x0b, 0x25, 0x3f,
+ 0x14, 0x0e, 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52,
+ 0xc4, 0xde, 0xf0, 0xea, 0xac, 0xb6, 0x98, 0x82,
+ 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, 0xdd, 0xc7,
+ 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0x0d, 0x17,
+ 0x3c, 0x26, 0x08, 0x12, 0x54, 0x4e, 0x60, 0x7a,
+ 0xec, 0xf6, 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa,
+ 0xe6, 0xfc, 0xd2, 0xc8, 0x8e, 0x94, 0xba, 0xa0,
+ 0x36, 0x2c, 0x02, 0x18, 0x5e, 0x44, 0x6a, 0x70,
+ 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x07, 0x1d,
+ 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd,
+ 0x4f, 0x55, 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x09,
+ 0x9f, 0x85, 0xab, 0xb1, 0xf7, 0xed, 0xc3, 0xd9,
+ 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, 0xae, 0xb4,
+ 0x22, 0x38, 0x16, 0x0c, 0x4a, 0x50, 0x7e, 0x64,
+ 0x28, 0x32, 0x1c, 0x06, 0x40, 0x5a, 0x74, 0x6e,
+ 0xf8, 0xe2, 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe,
+ 0x95, 0x8f, 0xa1, 0xbb, 0xfd, 0xe7, 0xc9, 0xd3,
+ 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, 0x19, 0x03,
+ },
+ {
+ 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,
+ 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99,
+ 0xad, 0xb6, 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec,
+ 0x75, 0x6e, 0x43, 0x58, 0x19, 0x02, 0x2f, 0x34,
+ 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, 0x1d, 0x06,
+ 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde,
+ 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab,
+ 0x32, 0x29, 0x04, 0x1f, 0x5e, 0x45, 0x68, 0x73,
+ 0x8e, 0x95, 0xb8, 0xa3, 0xe2, 0xf9, 0xd4, 0xcf,
+ 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, 0x0c, 0x17,
+ 0x23, 0x38, 0x15, 0x0e, 0x4f, 0x54, 0x79, 0x62,
+ 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba,
+ 0xc9, 0xd2, 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88,
+ 0x11, 0x0a, 0x27, 0x3c, 0x7d, 0x66, 0x4b, 0x50,
+ 0x64, 0x7f, 0x52, 0x49, 0x08, 0x13, 0x3e, 0x25,
+ 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd,
+ 0x01, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40,
+ 0xd9, 0xc2, 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98,
+ 0xac, 0xb7, 0x9a, 0x81, 0xc0, 0xdb, 0xf6, 0xed,
+ 0x74, 0x6f, 0x42, 0x59, 0x18, 0x03, 0x2e, 0x35,
+ 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x07,
+ 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf,
+ 0xeb, 0xf0, 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa,
+ 0x33, 0x28, 0x05, 0x1e, 0x5f, 0x44, 0x69, 0x72,
+ 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, 0xd5, 0xce,
+ 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0x0d, 0x16,
+ 0x22, 0x39, 0x14, 0x0f, 0x4e, 0x55, 0x78, 0x63,
+ 0xfa, 0xe1, 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb,
+ 0xc8, 0xd3, 0xfe, 0xe5, 0xa4, 0xbf, 0x92, 0x89,
+ 0x10, 0x0b, 0x26, 0x3d, 0x7c, 0x67, 0x4a, 0x51,
+ 0x65, 0x7e, 0x53, 0x48, 0x09, 0x12, 0x3f, 0x24,
+ 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc,
+ },
+ {
+ 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54,
+ 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4,
+ 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, 0x95, 0x89,
+ 0x3d, 0x21, 0x05, 0x19, 0x4d, 0x51, 0x75, 0x69,
+ 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3,
+ 0x47, 0x5b, 0x7f, 0x63, 0x37, 0x2b, 0x0f, 0x13,
+ 0x7a, 0x66, 0x42, 0x5e, 0x0a, 0x16, 0x32, 0x2e,
+ 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, 0xd2, 0xce,
+ 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x07,
+ 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7,
+ 0x8e, 0x92, 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda,
+ 0x6e, 0x72, 0x56, 0x4a, 0x1e, 0x02, 0x26, 0x3a,
+ 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, 0xbc, 0xa0,
+ 0x14, 0x08, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40,
+ 0x29, 0x35, 0x11, 0x0d, 0x59, 0x45, 0x61, 0x7d,
+ 0xc9, 0xd5, 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d,
+ 0xa6, 0xba, 0x9e, 0x82, 0xd6, 0xca, 0xee, 0xf2,
+ 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, 0x0e, 0x12,
+ 0x7b, 0x67, 0x43, 0x5f, 0x0b, 0x17, 0x33, 0x2f,
+ 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf,
+ 0x01, 0x1d, 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55,
+ 0xe1, 0xfd, 0xd9, 0xc5, 0x91, 0x8d, 0xa9, 0xb5,
+ 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, 0x94, 0x88,
+ 0x3c, 0x20, 0x04, 0x18, 0x4c, 0x50, 0x74, 0x68,
+ 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1,
+ 0x15, 0x09, 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41,
+ 0x28, 0x34, 0x10, 0x0c, 0x58, 0x44, 0x60, 0x7c,
+ 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, 0x80, 0x9c,
+ 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x06,
+ 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6,
+ 0x8f, 0x93, 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb,
+ 0x6f, 0x73, 0x57, 0x4b, 0x1f, 0x03, 0x27, 0x3b,
+ },
+ {
+ 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb,
+ 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e,
+ 0x25, 0x38, 0x1f, 0x02, 0x51, 0x4c, 0x6b, 0x76,
+ 0x87, 0x9a, 0xbd, 0xa0, 0xf3, 0xee, 0xc9, 0xd4,
+ 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x06, 0x21, 0x3c,
+ 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x04, 0x19,
+ 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1,
+ 0x13, 0x0e, 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40,
+ 0xfb, 0xe6, 0xc1, 0xdc, 0x8f, 0x92, 0xb5, 0xa8,
+ 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, 0x90, 0x8d,
+ 0x36, 0x2b, 0x0c, 0x11, 0x42, 0x5f, 0x78, 0x65,
+ 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7,
+ 0x7c, 0x61, 0x46, 0x5b, 0x08, 0x15, 0x32, 0x2f,
+ 0x59, 0x44, 0x63, 0x7e, 0x2d, 0x30, 0x17, 0x0a,
+ 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, 0xff, 0xe2,
+ 0x26, 0x3b, 0x1c, 0x01, 0x52, 0x4f, 0x68, 0x75,
+ 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d,
+ 0xeb, 0xf6, 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8,
+ 0x03, 0x1e, 0x39, 0x24, 0x77, 0x6a, 0x4d, 0x50,
+ 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, 0xef, 0xf2,
+ 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x07, 0x1a,
+ 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x05, 0x22, 0x3f,
+ 0x84, 0x99, 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7,
+ 0x35, 0x28, 0x0f, 0x12, 0x41, 0x5c, 0x7b, 0x66,
+ 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, 0x93, 0x8e,
+ 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab,
+ 0x10, 0x0d, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43,
+ 0xb2, 0xaf, 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1,
+ 0x5a, 0x47, 0x60, 0x7d, 0x2e, 0x33, 0x14, 0x09,
+ 0x7f, 0x62, 0x45, 0x58, 0x0b, 0x16, 0x31, 0x2c,
+ 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4,
+ },
+ {
+ 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a,
+ 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa,
+ 0xfd, 0xe3, 0xc1, 0xdf, 0x85, 0x9b, 0xb9, 0xa7,
+ 0x0d, 0x13, 0x31, 0x2f, 0x75, 0x6b, 0x49, 0x57,
+ 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd,
+ 0x17, 0x09, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d,
+ 0x1a, 0x04, 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40,
+ 0xea, 0xf4, 0xd6, 0xc8, 0x92, 0x8c, 0xae, 0xb0,
+ 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, 0x97, 0x89,
+ 0x23, 0x3d, 0x1f, 0x01, 0x5b, 0x45, 0x67, 0x79,
+ 0x2e, 0x30, 0x12, 0x0c, 0x56, 0x48, 0x6a, 0x74,
+ 0xde, 0xc0, 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84,
+ 0x34, 0x2a, 0x08, 0x16, 0x4c, 0x52, 0x70, 0x6e,
+ 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, 0x80, 0x9e,
+ 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93,
+ 0x39, 0x27, 0x05, 0x1b, 0x41, 0x5f, 0x7d, 0x63,
+ 0xbb, 0xa5, 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1,
+ 0x4b, 0x55, 0x77, 0x69, 0x33, 0x2d, 0x0f, 0x11,
+ 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, 0x02, 0x1c,
+ 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec,
+ 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x06,
+ 0xac, 0xb2, 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6,
+ 0xa1, 0xbf, 0x9d, 0x83, 0xd9, 0xc7, 0xe5, 0xfb,
+ 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, 0x15, 0x0b,
+ 0x68, 0x76, 0x54, 0x4a, 0x10, 0x0e, 0x2c, 0x32,
+ 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2,
+ 0x95, 0x8b, 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf,
+ 0x65, 0x7b, 0x59, 0x47, 0x1d, 0x03, 0x21, 0x3f,
+ 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, 0xcb, 0xd5,
+ 0x7f, 0x61, 0x43, 0x5d, 0x07, 0x19, 0x3b, 0x25,
+ 0x72, 0x6c, 0x4e, 0x50, 0x0a, 0x14, 0x36, 0x28,
+ 0x82, 0x9c, 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8,
+ },
+ {
+ 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d,
+ 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5,
+ 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0,
+ 0x15, 0x0a, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48,
+ 0xc7, 0xd8, 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a,
+ 0x3f, 0x20, 0x01, 0x1e, 0x43, 0x5c, 0x7d, 0x62,
+ 0x2a, 0x35, 0x14, 0x0b, 0x56, 0x49, 0x68, 0x77,
+ 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f,
+ 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce,
+ 0x6b, 0x74, 0x55, 0x4a, 0x17, 0x08, 0x29, 0x36,
+ 0x7e, 0x61, 0x40, 0x5f, 0x02, 0x1d, 0x3c, 0x23,
+ 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, 0xc4, 0xdb,
+ 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x09,
+ 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1,
+ 0xb9, 0xa6, 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4,
+ 0x41, 0x5e, 0x7f, 0x60, 0x3d, 0x22, 0x03, 0x1c,
+ 0x3b, 0x24, 0x05, 0x1a, 0x47, 0x58, 0x79, 0x66,
+ 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e,
+ 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b,
+ 0x2e, 0x31, 0x10, 0x0f, 0x52, 0x4d, 0x6c, 0x73,
+ 0xfc, 0xe3, 0xc2, 0xdd, 0x80, 0x9f, 0xbe, 0xa1,
+ 0x04, 0x1b, 0x3a, 0x25, 0x78, 0x67, 0x46, 0x59,
+ 0x11, 0x0e, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c,
+ 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4,
+ 0xa8, 0xb7, 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5,
+ 0x50, 0x4f, 0x6e, 0x71, 0x2c, 0x33, 0x12, 0x0d,
+ 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, 0x07, 0x18,
+ 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0,
+ 0x6f, 0x70, 0x51, 0x4e, 0x13, 0x0c, 0x2d, 0x32,
+ 0x97, 0x88, 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca,
+ 0x82, 0x9d, 0xbc, 0xa3, 0xfe, 0xe1, 0xc0, 0xdf,
+ 0x7a, 0x65, 0x44, 0x5b, 0x06, 0x19, 0x38, 0x27,
+ },
+ {
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd,
+ 0x3a, 0x1a, 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda,
+ 0x27, 0x07, 0x67, 0x47, 0xa7, 0x87, 0xe7, 0xc7,
+ 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, 0xb4, 0x94,
+ 0x69, 0x49, 0x29, 0x09, 0xe9, 0xc9, 0xa9, 0x89,
+ 0x4e, 0x6e, 0x0e, 0x2e, 0xce, 0xee, 0x8e, 0xae,
+ 0x53, 0x73, 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3,
+ 0xe8, 0xc8, 0xa8, 0x88, 0x68, 0x48, 0x28, 0x08,
+ 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, 0x35, 0x15,
+ 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32,
+ 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0x0f, 0x2f,
+ 0x9c, 0xbc, 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c,
+ 0x81, 0xa1, 0xc1, 0xe1, 0x01, 0x21, 0x41, 0x61,
+ 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x06, 0x66, 0x46,
+ 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b,
+ 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0x0d, 0x2d,
+ 0xd0, 0xf0, 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30,
+ 0xf7, 0xd7, 0xb7, 0x97, 0x77, 0x57, 0x37, 0x17,
+ 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, 0x2a, 0x0a,
+ 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59,
+ 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x04, 0x64, 0x44,
+ 0x83, 0xa3, 0xc3, 0xe3, 0x03, 0x23, 0x43, 0x63,
+ 0x9e, 0xbe, 0xde, 0xfe, 0x1e, 0x3e, 0x5e, 0x7e,
+ 0x25, 0x05, 0x65, 0x45, 0xa5, 0x85, 0xe5, 0xc5,
+ 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8,
+ 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff,
+ 0x02, 0x22, 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2,
+ 0x51, 0x71, 0x11, 0x31, 0xd1, 0xf1, 0x91, 0xb1,
+ 0x4c, 0x6c, 0x0c, 0x2c, 0xcc, 0xec, 0x8c, 0xac,
+ 0x6b, 0x4b, 0x2b, 0x0b, 0xeb, 0xcb, 0xab, 0x8b,
+ 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96,
+ },
+ {
+ 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7,
+ 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2,
+ 0x2a, 0x0b, 0x68, 0x49, 0xae, 0x8f, 0xec, 0xcd,
+ 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8,
+ 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3,
+ 0x41, 0x60, 0x03, 0x22, 0xc5, 0xe4, 0x87, 0xa6,
+ 0x7e, 0x5f, 0x3c, 0x1d, 0xfa, 0xdb, 0xb8, 0x99,
+ 0x6b, 0x4a, 0x29, 0x08, 0xef, 0xce, 0xad, 0x8c,
+ 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0x0d, 0x6e, 0x4f,
+ 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a,
+ 0x82, 0xa3, 0xc0, 0xe1, 0x06, 0x27, 0x44, 0x65,
+ 0x97, 0xb6, 0xd5, 0xf4, 0x13, 0x32, 0x51, 0x70,
+ 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, 0x3a, 0x1b,
+ 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0x0e,
+ 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31,
+ 0xc3, 0xe2, 0x81, 0xa0, 0x47, 0x66, 0x05, 0x24,
+ 0x4d, 0x6c, 0x0f, 0x2e, 0xc9, 0xe8, 0x8b, 0xaa,
+ 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, 0x9e, 0xbf,
+ 0x67, 0x46, 0x25, 0x04, 0xe3, 0xc2, 0xa1, 0x80,
+ 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95,
+ 0x19, 0x38, 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe,
+ 0x0c, 0x2d, 0x4e, 0x6f, 0x88, 0xa9, 0xca, 0xeb,
+ 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, 0xf5, 0xd4,
+ 0x26, 0x07, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1,
+ 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x02,
+ 0xf0, 0xd1, 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17,
+ 0xcf, 0xee, 0x8d, 0xac, 0x4b, 0x6a, 0x09, 0x28,
+ 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, 0x1c, 0x3d,
+ 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56,
+ 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x01, 0x62, 0x43,
+ 0x9b, 0xba, 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c,
+ 0x8e, 0xaf, 0xcc, 0xed, 0x0a, 0x2b, 0x48, 0x69,
+ },
+ {
+ 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee,
+ 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3,
+ 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4,
+ 0x17, 0x35, 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9,
+ 0x34, 0x16, 0x70, 0x52, 0xbc, 0x9e, 0xf8, 0xda,
+ 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, 0xf5, 0xd7,
+ 0x2e, 0x0c, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0,
+ 0x23, 0x01, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd,
+ 0x68, 0x4a, 0x2c, 0x0e, 0xe0, 0xc2, 0xa4, 0x86,
+ 0x65, 0x47, 0x21, 0x03, 0xed, 0xcf, 0xa9, 0x8b,
+ 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, 0xbe, 0x9c,
+ 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91,
+ 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2,
+ 0x51, 0x73, 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf,
+ 0x46, 0x64, 0x02, 0x20, 0xce, 0xec, 0x8a, 0xa8,
+ 0x4b, 0x69, 0x0f, 0x2d, 0xc3, 0xe1, 0x87, 0xa5,
+ 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e,
+ 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33,
+ 0xca, 0xe8, 0x8e, 0xac, 0x42, 0x60, 0x06, 0x24,
+ 0xc7, 0xe5, 0x83, 0xa1, 0x4f, 0x6d, 0x0b, 0x29,
+ 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, 0x28, 0x0a,
+ 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x07,
+ 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
+ 0xf3, 0xd1, 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d,
+ 0xb8, 0x9a, 0xfc, 0xde, 0x30, 0x12, 0x74, 0x56,
+ 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, 0x79, 0x5b,
+ 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x08, 0x6e, 0x4c,
+ 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x05, 0x63, 0x41,
+ 0x8c, 0xae, 0xc8, 0xea, 0x04, 0x26, 0x40, 0x62,
+ 0x81, 0xa3, 0xc5, 0xe7, 0x09, 0x2b, 0x4d, 0x6f,
+ 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, 0x5a, 0x78,
+ 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75,
+ },
+ {
+ 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9,
+ 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec,
+ 0x0a, 0x29, 0x4c, 0x6f, 0x86, 0xa5, 0xc0, 0xe3,
+ 0x0f, 0x2c, 0x49, 0x6a, 0x83, 0xa0, 0xc5, 0xe6,
+ 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd,
+ 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8,
+ 0x1e, 0x3d, 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7,
+ 0x1b, 0x38, 0x5d, 0x7e, 0x97, 0xb4, 0xd1, 0xf2,
+ 0x28, 0x0b, 0x6e, 0x4d, 0xa4, 0x87, 0xe2, 0xc1,
+ 0x2d, 0x0e, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4,
+ 0x22, 0x01, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb,
+ 0x27, 0x04, 0x61, 0x42, 0xab, 0x88, 0xed, 0xce,
+ 0x3c, 0x1f, 0x7a, 0x59, 0xb0, 0x93, 0xf6, 0xd5,
+ 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, 0xf3, 0xd0,
+ 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf,
+ 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda,
+ 0x50, 0x73, 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9,
+ 0x55, 0x76, 0x13, 0x30, 0xd9, 0xfa, 0x9f, 0xbc,
+ 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, 0x90, 0xb3,
+ 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6,
+ 0x44, 0x67, 0x02, 0x21, 0xc8, 0xeb, 0x8e, 0xad,
+ 0x41, 0x62, 0x07, 0x24, 0xcd, 0xee, 0x8b, 0xa8,
+ 0x4e, 0x6d, 0x08, 0x2b, 0xc2, 0xe1, 0x84, 0xa7,
+ 0x4b, 0x68, 0x0d, 0x2e, 0xc7, 0xe4, 0x81, 0xa2,
+ 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91,
+ 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94,
+ 0x72, 0x51, 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b,
+ 0x77, 0x54, 0x31, 0x12, 0xfb, 0xd8, 0xbd, 0x9e,
+ 0x6c, 0x4f, 0x2a, 0x09, 0xe0, 0xc3, 0xa6, 0x85,
+ 0x69, 0x4a, 0x2f, 0x0c, 0xe5, 0xc6, 0xa3, 0x80,
+ 0x66, 0x45, 0x20, 0x03, 0xea, 0xc9, 0xac, 0x8f,
+ 0x63, 0x40, 0x25, 0x06, 0xef, 0xcc, 0xa9, 0x8a,
+ },
+ {
+ 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc,
+ 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1,
+ 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86,
+ 0x47, 0x63, 0x0f, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb,
+ 0xf4, 0xd0, 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x08,
+ 0xc9, 0xed, 0x81, 0xa5, 0x59, 0x7d, 0x11, 0x35,
+ 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, 0x56, 0x72,
+ 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x07, 0x6b, 0x4f,
+ 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x09,
+ 0xc8, 0xec, 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34,
+ 0x8f, 0xab, 0xc7, 0xe3, 0x1f, 0x3b, 0x57, 0x73,
+ 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x06, 0x6a, 0x4e,
+ 0x01, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd,
+ 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0,
+ 0x7b, 0x5f, 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87,
+ 0x46, 0x62, 0x0e, 0x2a, 0xd6, 0xf2, 0x9e, 0xba,
+ 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, 0x2f, 0x0b,
+ 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36,
+ 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71,
+ 0xb0, 0x94, 0xf8, 0xdc, 0x20, 0x04, 0x68, 0x4c,
+ 0x03, 0x27, 0x4b, 0x6f, 0x93, 0xb7, 0xdb, 0xff,
+ 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, 0xe6, 0xc2,
+ 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85,
+ 0x44, 0x60, 0x0c, 0x28, 0xd4, 0xf0, 0x9c, 0xb8,
+ 0x02, 0x26, 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe,
+ 0x3f, 0x1b, 0x77, 0x53, 0xaf, 0x8b, 0xe7, 0xc3,
+ 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, 0xa0, 0x84,
+ 0x45, 0x61, 0x0d, 0x29, 0xd5, 0xf1, 0x9d, 0xb9,
+ 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0x0a,
+ 0xcb, 0xef, 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37,
+ 0x8c, 0xa8, 0xc4, 0xe0, 0x1c, 0x38, 0x54, 0x70,
+ 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x05, 0x69, 0x4d,
+ },
+ {
+ 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb,
+ 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce,
+ 0x6a, 0x4f, 0x20, 0x05, 0xfe, 0xdb, 0xb4, 0x91,
+ 0x5f, 0x7a, 0x15, 0x30, 0xcb, 0xee, 0x81, 0xa4,
+ 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, 0x0a, 0x2f,
+ 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a,
+ 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0x0f, 0x60, 0x45,
+ 0x8b, 0xae, 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70,
+ 0xb5, 0x90, 0xff, 0xda, 0x21, 0x04, 0x6b, 0x4e,
+ 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, 0x5e, 0x7b,
+ 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x01, 0x24,
+ 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11,
+ 0x61, 0x44, 0x2b, 0x0e, 0xf5, 0xd0, 0xbf, 0x9a,
+ 0x54, 0x71, 0x1e, 0x3b, 0xc0, 0xe5, 0x8a, 0xaf,
+ 0x0b, 0x2e, 0x41, 0x64, 0x9f, 0xba, 0xd5, 0xf0,
+ 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5,
+ 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c,
+ 0x42, 0x67, 0x08, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9,
+ 0x1d, 0x38, 0x57, 0x72, 0x89, 0xac, 0xc3, 0xe6,
+ 0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3,
+ 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58,
+ 0x96, 0xb3, 0xdc, 0xf9, 0x02, 0x27, 0x48, 0x6d,
+ 0xc9, 0xec, 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32,
+ 0xfc, 0xd9, 0xb6, 0x93, 0x68, 0x4d, 0x22, 0x07,
+ 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, 0x1c, 0x39,
+ 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0x0c,
+ 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53,
+ 0x9d, 0xb8, 0xd7, 0xf2, 0x09, 0x2c, 0x43, 0x66,
+ 0x16, 0x33, 0x5c, 0x79, 0x82, 0xa7, 0xc8, 0xed,
+ 0x23, 0x06, 0x69, 0x4c, 0xb7, 0x92, 0xfd, 0xd8,
+ 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87,
+ 0x49, 0x6c, 0x03, 0x26, 0xdd, 0xf8, 0x97, 0xb2,
+ },
+ {
+ 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2,
+ 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf,
+ 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, 0x8e, 0xa8,
+ 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85,
+ 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0x0a, 0x60, 0x46,
+ 0x99, 0xbf, 0xd5, 0xf3, 0x01, 0x27, 0x4d, 0x6b,
+ 0xee, 0xc8, 0xa2, 0x84, 0x76, 0x50, 0x3a, 0x1c,
+ 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, 0x17, 0x31,
+ 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87,
+ 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa,
+ 0x2f, 0x09, 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd,
+ 0x02, 0x24, 0x4e, 0x68, 0x9a, 0xbc, 0xd6, 0xf0,
+ 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, 0x15, 0x33,
+ 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e,
+ 0x9b, 0xbd, 0xd7, 0xf1, 0x03, 0x25, 0x4f, 0x69,
+ 0xb6, 0x90, 0xfa, 0xdc, 0x2e, 0x08, 0x62, 0x44,
+ 0xea, 0xcc, 0xa6, 0x80, 0x72, 0x54, 0x3e, 0x18,
+ 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, 0x13, 0x35,
+ 0xb0, 0x96, 0xfc, 0xda, 0x28, 0x0e, 0x64, 0x42,
+ 0x9d, 0xbb, 0xd1, 0xf7, 0x05, 0x23, 0x49, 0x6f,
+ 0x5e, 0x78, 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac,
+ 0x73, 0x55, 0x3f, 0x19, 0xeb, 0xcd, 0xa7, 0x81,
+ 0x04, 0x22, 0x48, 0x6e, 0x9c, 0xba, 0xd0, 0xf6,
+ 0x29, 0x0f, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb,
+ 0x9f, 0xb9, 0xd3, 0xf5, 0x07, 0x21, 0x4b, 0x6d,
+ 0xb2, 0x94, 0xfe, 0xd8, 0x2a, 0x0c, 0x66, 0x40,
+ 0xc5, 0xe3, 0x89, 0xaf, 0x5d, 0x7b, 0x11, 0x37,
+ 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, 0x3c, 0x1a,
+ 0x2b, 0x0d, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9,
+ 0x06, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4,
+ 0x71, 0x57, 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83,
+ 0x5c, 0x7a, 0x10, 0x36, 0xc4, 0xe2, 0x88, 0xae,
+ },
+ {
+ 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0,
+ 0x4a, 0x6d, 0x04, 0x23, 0xd6, 0xf1, 0x98, 0xbf,
+ 0x6f, 0x48, 0x21, 0x06, 0xf3, 0xd4, 0xbd, 0x9a,
+ 0x94, 0xb3, 0xda, 0xfd, 0x08, 0x2f, 0x46, 0x61,
+ 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0x0a, 0x63, 0x44,
+ 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0x0c, 0x2b,
+ 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0x0e,
+ 0x35, 0x12, 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0,
+ 0x10, 0x37, 0x5e, 0x79, 0x8c, 0xab, 0xc2, 0xe5,
+ 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, 0xad, 0x8a,
+ 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf,
+ 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54,
+ 0x84, 0xa3, 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71,
+ 0xeb, 0xcc, 0xa5, 0x82, 0x77, 0x50, 0x39, 0x1e,
+ 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, 0x1c, 0x3b,
+ 0x6a, 0x4d, 0x24, 0x03, 0xf6, 0xd1, 0xb8, 0x9f,
+ 0x4f, 0x68, 0x01, 0x26, 0xd3, 0xf4, 0x9d, 0xba,
+ 0x20, 0x07, 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5,
+ 0x05, 0x22, 0x4b, 0x6c, 0x99, 0xbe, 0xd7, 0xf0,
+ 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, 0x2c, 0x0b,
+ 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x09, 0x2e,
+ 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0x0f, 0x66, 0x41,
+ 0x91, 0xb6, 0xdf, 0xf8, 0x0d, 0x2a, 0x43, 0x64,
+ 0x5f, 0x78, 0x11, 0x36, 0xc3, 0xe4, 0x8d, 0xaa,
+ 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, 0xa8, 0x8f,
+ 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0,
+ 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5,
+ 0xcb, 0xec, 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e,
+ 0xee, 0xc9, 0xa0, 0x87, 0x72, 0x55, 0x3c, 0x1b,
+ 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, 0x53, 0x74,
+ 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51,
+ },
+ {
+ 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8,
+ 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85,
+ 0xba, 0x92, 0xea, 0xc2, 0x1a, 0x32, 0x4a, 0x62,
+ 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, 0x17, 0x3f,
+ 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1,
+ 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec,
+ 0xd3, 0xfb, 0x83, 0xab, 0x73, 0x5b, 0x23, 0x0b,
+ 0x8e, 0xa6, 0xde, 0xf6, 0x2e, 0x06, 0x7e, 0x56,
+ 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, 0x22, 0x0a,
+ 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x07, 0x7f, 0x57,
+ 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0,
+ 0x35, 0x1d, 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed,
+ 0xbb, 0x93, 0xeb, 0xc3, 0x1b, 0x33, 0x4b, 0x63,
+ 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, 0x16, 0x3e,
+ 0x01, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9,
+ 0x5c, 0x74, 0x0c, 0x24, 0xfc, 0xd4, 0xac, 0x84,
+ 0xb9, 0x91, 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61,
+ 0xe4, 0xcc, 0xb4, 0x9c, 0x44, 0x6c, 0x14, 0x3c,
+ 0x03, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, 0xf3, 0xdb,
+ 0x5e, 0x76, 0x0e, 0x26, 0xfe, 0xd6, 0xae, 0x86,
+ 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x08,
+ 0x8d, 0xa5, 0xdd, 0xf5, 0x2d, 0x05, 0x7d, 0x55,
+ 0x6a, 0x42, 0x3a, 0x12, 0xca, 0xe2, 0x9a, 0xb2,
+ 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, 0xc7, 0xef,
+ 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3,
+ 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee,
+ 0xd1, 0xf9, 0x81, 0xa9, 0x71, 0x59, 0x21, 0x09,
+ 0x8c, 0xa4, 0xdc, 0xf4, 0x2c, 0x04, 0x7c, 0x54,
+ 0x02, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, 0xf2, 0xda,
+ 0x5f, 0x77, 0x0f, 0x27, 0xff, 0xd7, 0xaf, 0x87,
+ 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60,
+ 0xe5, 0xcd, 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d,
+ },
+ {
+ 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf,
+ 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a,
+ 0xaa, 0x83, 0xf8, 0xd1, 0x0e, 0x27, 0x5c, 0x75,
+ 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x09, 0x20,
+ 0x49, 0x60, 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96,
+ 0x1c, 0x35, 0x4e, 0x67, 0xb8, 0x91, 0xea, 0xc3,
+ 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, 0x15, 0x3c,
+ 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69,
+ 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d,
+ 0xc7, 0xee, 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18,
+ 0x38, 0x11, 0x6a, 0x43, 0x9c, 0xb5, 0xce, 0xe7,
+ 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, 0x9b, 0xb2,
+ 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x04,
+ 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x03, 0x78, 0x51,
+ 0x71, 0x58, 0x23, 0x0a, 0xd5, 0xfc, 0x87, 0xae,
+ 0x24, 0x0d, 0x76, 0x5f, 0x80, 0xa9, 0xd2, 0xfb,
+ 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, 0xcf, 0xe6,
+ 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3,
+ 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c,
+ 0xc6, 0xef, 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19,
+ 0x70, 0x59, 0x22, 0x0b, 0xd4, 0xfd, 0x86, 0xaf,
+ 0x25, 0x0c, 0x77, 0x5e, 0x81, 0xa8, 0xd3, 0xfa,
+ 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x05,
+ 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x02, 0x79, 0x50,
+ 0xab, 0x82, 0xf9, 0xd0, 0x0f, 0x26, 0x5d, 0x74,
+ 0xfe, 0xd7, 0xac, 0x85, 0x5a, 0x73, 0x08, 0x21,
+ 0x01, 0x28, 0x53, 0x7a, 0xa5, 0x8c, 0xf7, 0xde,
+ 0x54, 0x7d, 0x06, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b,
+ 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d,
+ 0xb7, 0x9e, 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68,
+ 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97,
+ 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, 0xeb, 0xc2,
+ },
+ {
+ 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6,
+ 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b,
+ 0x9a, 0xb0, 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c,
+ 0xd7, 0xfd, 0x83, 0xa9, 0x7f, 0x55, 0x2b, 0x01,
+ 0x29, 0x03, 0x7d, 0x57, 0x81, 0xab, 0xd5, 0xff,
+ 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2,
+ 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65,
+ 0xfe, 0xd4, 0xaa, 0x80, 0x56, 0x7c, 0x02, 0x28,
+ 0x52, 0x78, 0x06, 0x2c, 0xfa, 0xd0, 0xae, 0x84,
+ 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, 0xe3, 0xc9,
+ 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e,
+ 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x07, 0x79, 0x53,
+ 0x7b, 0x51, 0x2f, 0x05, 0xd3, 0xf9, 0x87, 0xad,
+ 0x36, 0x1c, 0x62, 0x48, 0x9e, 0xb4, 0xca, 0xe0,
+ 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, 0x1d, 0x37,
+ 0xac, 0x86, 0xf8, 0xd2, 0x04, 0x2e, 0x50, 0x7a,
+ 0xa4, 0x8e, 0xf0, 0xda, 0x0c, 0x26, 0x58, 0x72,
+ 0xe9, 0xc3, 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f,
+ 0x3e, 0x14, 0x6a, 0x40, 0x96, 0xbc, 0xc2, 0xe8,
+ 0x73, 0x59, 0x27, 0x0d, 0xdb, 0xf1, 0x8f, 0xa5,
+ 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0x0f, 0x71, 0x5b,
+ 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16,
+ 0x17, 0x3d, 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1,
+ 0x5a, 0x70, 0x0e, 0x24, 0xf2, 0xd8, 0xa6, 0x8c,
+ 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, 0x0a, 0x20,
+ 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d,
+ 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba,
+ 0x21, 0x0b, 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7,
+ 0xdf, 0xf5, 0x8b, 0xa1, 0x77, 0x5d, 0x23, 0x09,
+ 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, 0x6e, 0x44,
+ 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93,
+ 0x08, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde,
+ },
+ {
+ 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1,
+ 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94,
+ 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0x0d, 0x70, 0x5b,
+ 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e,
+ 0x09, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8,
+ 0x4c, 0x67, 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d,
+ 0x83, 0xa8, 0xd5, 0xfe, 0x2f, 0x04, 0x79, 0x52,
+ 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, 0x3c, 0x17,
+ 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3,
+ 0x57, 0x7c, 0x01, 0x2a, 0xfb, 0xd0, 0xad, 0x86,
+ 0x98, 0xb3, 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49,
+ 0xdd, 0xf6, 0x8b, 0xa0, 0x71, 0x5a, 0x27, 0x0c,
+ 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, 0xe1, 0xca,
+ 0x5e, 0x75, 0x08, 0x23, 0xf2, 0xd9, 0xa4, 0x8f,
+ 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40,
+ 0xd4, 0xff, 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x05,
+ 0x24, 0x0f, 0x72, 0x59, 0x88, 0xa3, 0xde, 0xf5,
+ 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, 0x9b, 0xb0,
+ 0xae, 0x85, 0xf8, 0xd3, 0x02, 0x29, 0x54, 0x7f,
+ 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a,
+ 0x2d, 0x06, 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc,
+ 0x68, 0x43, 0x3e, 0x15, 0xc4, 0xef, 0x92, 0xb9,
+ 0xa7, 0x8c, 0xf1, 0xda, 0x0b, 0x20, 0x5d, 0x76,
+ 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33,
+ 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7,
+ 0x73, 0x58, 0x25, 0x0e, 0xdf, 0xf4, 0x89, 0xa2,
+ 0xbc, 0x97, 0xea, 0xc1, 0x10, 0x3b, 0x46, 0x6d,
+ 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, 0x03, 0x28,
+ 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee,
+ 0x7a, 0x51, 0x2c, 0x07, 0xd6, 0xfd, 0x80, 0xab,
+ 0xb5, 0x9e, 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64,
+ 0xf0, 0xdb, 0xa6, 0x8d, 0x5c, 0x77, 0x0a, 0x21,
+ },
+ {
+ 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4,
+ 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9,
+ 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e,
+ 0x87, 0xab, 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43,
+ 0xe9, 0xc5, 0xb1, 0x9d, 0x59, 0x75, 0x01, 0x2d,
+ 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x08, 0x7c, 0x50,
+ 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7,
+ 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa,
+ 0xcf, 0xe3, 0x97, 0xbb, 0x7f, 0x53, 0x27, 0x0b,
+ 0xb2, 0x9e, 0xea, 0xc6, 0x02, 0x2e, 0x5a, 0x76,
+ 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, 0xdd, 0xf1,
+ 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c,
+ 0x26, 0x0a, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2,
+ 0x5b, 0x77, 0x03, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f,
+ 0xdc, 0xf0, 0x84, 0xa8, 0x6c, 0x40, 0x34, 0x18,
+ 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, 0x49, 0x65,
+ 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47,
+ 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a,
+ 0x79, 0x55, 0x21, 0x0d, 0xc9, 0xe5, 0x91, 0xbd,
+ 0x04, 0x28, 0x5c, 0x70, 0xb4, 0x98, 0xec, 0xc0,
+ 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, 0x82, 0xae,
+ 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3,
+ 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0x0c, 0x78, 0x54,
+ 0xed, 0xc1, 0xb5, 0x99, 0x5d, 0x71, 0x05, 0x29,
+ 0x4c, 0x60, 0x14, 0x38, 0xfc, 0xd0, 0xa4, 0x88,
+ 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, 0xd9, 0xf5,
+ 0xb6, 0x9a, 0xee, 0xc2, 0x06, 0x2a, 0x5e, 0x72,
+ 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0x0f,
+ 0xa5, 0x89, 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61,
+ 0xd8, 0xf4, 0x80, 0xac, 0x68, 0x44, 0x30, 0x1c,
+ 0x5f, 0x73, 0x07, 0x2b, 0xef, 0xc3, 0xb7, 0x9b,
+ 0x22, 0x0e, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6,
+ },
+ {
+ 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3,
+ 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6,
+ 0xea, 0xc7, 0xb0, 0x9d, 0x5e, 0x73, 0x04, 0x29,
+ 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x06, 0x71, 0x5c,
+ 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0x0a,
+ 0xbc, 0x91, 0xe6, 0xcb, 0x08, 0x25, 0x52, 0x7f,
+ 0x23, 0x0e, 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0,
+ 0x56, 0x7b, 0x0c, 0x21, 0xe2, 0xcf, 0xb8, 0x95,
+ 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, 0x61, 0x4c,
+ 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39,
+ 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6,
+ 0x10, 0x3d, 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3,
+ 0x46, 0x6b, 0x1c, 0x31, 0xf2, 0xdf, 0xa8, 0x85,
+ 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, 0xdd, 0xf0,
+ 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f,
+ 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a,
+ 0x03, 0x2e, 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0,
+ 0x76, 0x5b, 0x2c, 0x01, 0xc2, 0xef, 0x98, 0xb5,
+ 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, 0x07, 0x2a,
+ 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x05, 0x72, 0x5f,
+ 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x09,
+ 0xbf, 0x92, 0xe5, 0xc8, 0x0b, 0x26, 0x51, 0x7c,
+ 0x20, 0x0d, 0x7a, 0x57, 0x94, 0xb9, 0xce, 0xe3,
+ 0x55, 0x78, 0x0f, 0x22, 0xe1, 0xcc, 0xbb, 0x96,
+ 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f,
+ 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a,
+ 0x66, 0x4b, 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5,
+ 0x13, 0x3e, 0x49, 0x64, 0xa7, 0x8a, 0xfd, 0xd0,
+ 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, 0xab, 0x86,
+ 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3,
+ 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c,
+ 0xda, 0xf7, 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19,
+ },
+ {
+ 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca,
+ 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7,
+ 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10,
+ 0xb7, 0x99, 0xeb, 0xc5, 0x0f, 0x21, 0x53, 0x7d,
+ 0xa9, 0x87, 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63,
+ 0xc4, 0xea, 0x98, 0xb6, 0x7c, 0x52, 0x20, 0x0e,
+ 0x73, 0x5d, 0x2f, 0x01, 0xcb, 0xe5, 0x97, 0xb9,
+ 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4,
+ 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85,
+ 0x22, 0x0c, 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8,
+ 0x95, 0xbb, 0xc9, 0xe7, 0x2d, 0x03, 0x71, 0x5f,
+ 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, 0x1c, 0x32,
+ 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x02, 0x2c,
+ 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41,
+ 0x3c, 0x12, 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6,
+ 0x51, 0x7f, 0x0d, 0x23, 0xe9, 0xc7, 0xb5, 0x9b,
+ 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x08, 0x7a, 0x54,
+ 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39,
+ 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e,
+ 0x29, 0x07, 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3,
+ 0x37, 0x19, 0x6b, 0x45, 0x8f, 0xa1, 0xd3, 0xfd,
+ 0x5a, 0x74, 0x06, 0x28, 0xe2, 0xcc, 0xbe, 0x90,
+ 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x09, 0x27,
+ 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a,
+ 0xd1, 0xff, 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b,
+ 0xbc, 0x92, 0xe0, 0xce, 0x04, 0x2a, 0x58, 0x76,
+ 0x0b, 0x25, 0x57, 0x79, 0xb3, 0x9d, 0xef, 0xc1,
+ 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac,
+ 0x78, 0x56, 0x24, 0x0a, 0xc0, 0xee, 0x9c, 0xb2,
+ 0x15, 0x3b, 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf,
+ 0xa2, 0x8c, 0xfe, 0xd0, 0x1a, 0x34, 0x46, 0x68,
+ 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, 0x2b, 0x05,
+ },
+ {
+ 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd,
+ 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8,
+ 0xca, 0xe5, 0x94, 0xbb, 0x76, 0x59, 0x28, 0x07,
+ 0xaf, 0x80, 0xf1, 0xde, 0x13, 0x3c, 0x4d, 0x62,
+ 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, 0x6b, 0x44,
+ 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0x0e, 0x21,
+ 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e,
+ 0x26, 0x09, 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb,
+ 0x0f, 0x20, 0x51, 0x7e, 0xb3, 0x9c, 0xed, 0xc2,
+ 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, 0x88, 0xa7,
+ 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x08,
+ 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d,
+ 0x86, 0xa9, 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b,
+ 0xe3, 0xcc, 0xbd, 0x92, 0x5f, 0x70, 0x01, 0x2e,
+ 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, 0xae, 0x81,
+ 0x29, 0x06, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4,
+ 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3,
+ 0x7b, 0x54, 0x25, 0x0a, 0xc7, 0xe8, 0x99, 0xb6,
+ 0xd4, 0xfb, 0x8a, 0xa5, 0x68, 0x47, 0x36, 0x19,
+ 0xb1, 0x9e, 0xef, 0xc0, 0x0d, 0x22, 0x53, 0x7c,
+ 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x04, 0x75, 0x5a,
+ 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f,
+ 0x5d, 0x72, 0x03, 0x2c, 0xe1, 0xce, 0xbf, 0x90,
+ 0x38, 0x17, 0x66, 0x49, 0x84, 0xab, 0xda, 0xf5,
+ 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, 0xf3, 0xdc,
+ 0x74, 0x5b, 0x2a, 0x05, 0xc8, 0xe7, 0x96, 0xb9,
+ 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16,
+ 0xbe, 0x91, 0xe0, 0xcf, 0x02, 0x2d, 0x5c, 0x73,
+ 0x98, 0xb7, 0xc6, 0xe9, 0x24, 0x0b, 0x7a, 0x55,
+ 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, 0x1f, 0x30,
+ 0x52, 0x7d, 0x0c, 0x23, 0xee, 0xc1, 0xb0, 0x9f,
+ 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa,
+ },
+ {
+ 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d,
+ 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, 0x87, 0xb7,
+ 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a,
+ 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde,
+ 0xd3, 0xe3, 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43,
+ 0x69, 0x59, 0x09, 0x39, 0xa9, 0x99, 0xc9, 0xf9,
+ 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x04, 0x54, 0x64,
+ 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0x0c,
+ 0x01, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91,
+ 0xbb, 0x8b, 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b,
+ 0x26, 0x16, 0x46, 0x76, 0xe6, 0xd6, 0x86, 0xb6,
+ 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, 0x72, 0x42,
+ 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf,
+ 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x05, 0x55, 0x65,
+ 0x68, 0x58, 0x08, 0x38, 0xa8, 0x98, 0xc8, 0xf8,
+ 0x25, 0x15, 0x45, 0x75, 0xe5, 0xd5, 0x85, 0xb5,
+ 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, 0x18, 0x28,
+ 0x02, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92,
+ 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0x0f,
+ 0x6b, 0x5b, 0x0b, 0x3b, 0xab, 0x9b, 0xcb, 0xfb,
+ 0xf6, 0xc6, 0x96, 0xa6, 0x36, 0x06, 0x56, 0x66,
+ 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, 0xec, 0xdc,
+ 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41,
+ 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29,
+ 0x24, 0x14, 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4,
+ 0x9e, 0xae, 0xfe, 0xce, 0x5e, 0x6e, 0x3e, 0x0e,
+ 0x03, 0x33, 0x63, 0x53, 0xc3, 0xf3, 0xa3, 0x93,
+ 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x07, 0x57, 0x67,
+ 0x6a, 0x5a, 0x0a, 0x3a, 0xaa, 0x9a, 0xca, 0xfa,
+ 0xd0, 0xe0, 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40,
+ 0x4d, 0x7d, 0x2d, 0x1d, 0x8d, 0xbd, 0xed, 0xdd,
+ },
+ {
+ 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97,
+ 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02,
+ 0x37, 0x06, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0,
+ 0xa2, 0x93, 0xc0, 0xf1, 0x66, 0x57, 0x04, 0x35,
+ 0x6e, 0x5f, 0x0c, 0x3d, 0xaa, 0x9b, 0xc8, 0xf9,
+ 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0x0e, 0x5d, 0x6c,
+ 0x59, 0x68, 0x3b, 0x0a, 0x9d, 0xac, 0xff, 0xce,
+ 0xcc, 0xfd, 0xae, 0x9f, 0x08, 0x39, 0x6a, 0x5b,
+ 0xdc, 0xed, 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b,
+ 0x49, 0x78, 0x2b, 0x1a, 0x8d, 0xbc, 0xef, 0xde,
+ 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, 0x4d, 0x7c,
+ 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9,
+ 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25,
+ 0x27, 0x16, 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0,
+ 0x85, 0xb4, 0xe7, 0xd6, 0x41, 0x70, 0x23, 0x12,
+ 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, 0xb6, 0x87,
+ 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x03, 0x32,
+ 0x30, 0x01, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7,
+ 0x92, 0xa3, 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x05,
+ 0x07, 0x36, 0x65, 0x54, 0xc3, 0xf2, 0xa1, 0x90,
+ 0xcb, 0xfa, 0xa9, 0x98, 0x0f, 0x3e, 0x6d, 0x5c,
+ 0x5e, 0x6f, 0x3c, 0x0d, 0x9a, 0xab, 0xf8, 0xc9,
+ 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x09, 0x5a, 0x6b,
+ 0x69, 0x58, 0x0b, 0x3a, 0xad, 0x9c, 0xcf, 0xfe,
+ 0x79, 0x48, 0x1b, 0x2a, 0xbd, 0x8c, 0xdf, 0xee,
+ 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, 0x4a, 0x7b,
+ 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9,
+ 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c,
+ 0x17, 0x26, 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80,
+ 0x82, 0xb3, 0xe0, 0xd1, 0x46, 0x77, 0x24, 0x15,
+ 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, 0x86, 0xb7,
+ 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22,
+ },
+ {
+ 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e,
+ 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13,
+ 0x07, 0x35, 0x63, 0x51, 0xcf, 0xfd, 0xab, 0x99,
+ 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, 0x26, 0x14,
+ 0x0e, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90,
+ 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d,
+ 0x09, 0x3b, 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97,
+ 0x84, 0xb6, 0xe0, 0xd2, 0x4c, 0x7e, 0x28, 0x1a,
+ 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, 0xb0, 0x82,
+ 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0x0f,
+ 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85,
+ 0x96, 0xa4, 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x08,
+ 0x12, 0x20, 0x76, 0x44, 0xda, 0xe8, 0xbe, 0x8c,
+ 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, 0x33, 0x01,
+ 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b,
+ 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x06,
+ 0x38, 0x0a, 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6,
+ 0xb5, 0x87, 0xd1, 0xe3, 0x7d, 0x4f, 0x19, 0x2b,
+ 0x3f, 0x0d, 0x5b, 0x69, 0xf7, 0xc5, 0x93, 0xa1,
+ 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c,
+ 0x36, 0x04, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8,
+ 0xbb, 0x89, 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25,
+ 0x31, 0x03, 0x55, 0x67, 0xf9, 0xcb, 0x9d, 0xaf,
+ 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, 0x10, 0x22,
+ 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba,
+ 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x05, 0x37,
+ 0x23, 0x11, 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd,
+ 0xae, 0x9c, 0xca, 0xf8, 0x66, 0x54, 0x02, 0x30,
+ 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, 0x86, 0xb4,
+ 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0x0b, 0x39,
+ 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3,
+ 0xa0, 0x92, 0xc4, 0xf6, 0x68, 0x5a, 0x0c, 0x3e,
+ },
+ {
+ 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99,
+ 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c,
+ 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e,
+ 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0x0b,
+ 0x2e, 0x1d, 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7,
+ 0xab, 0x98, 0xcd, 0xfe, 0x67, 0x54, 0x01, 0x32,
+ 0x39, 0x0a, 0x5f, 0x6c, 0xf5, 0xc6, 0x93, 0xa0,
+ 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25,
+ 0x5c, 0x6f, 0x3a, 0x09, 0x90, 0xa3, 0xf6, 0xc5,
+ 0xd9, 0xea, 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40,
+ 0x4b, 0x78, 0x2d, 0x1e, 0x87, 0xb4, 0xe1, 0xd2,
+ 0xce, 0xfd, 0xa8, 0x9b, 0x02, 0x31, 0x64, 0x57,
+ 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb,
+ 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x08, 0x5d, 0x6e,
+ 0x65, 0x56, 0x03, 0x30, 0xa9, 0x9a, 0xcf, 0xfc,
+ 0xe0, 0xd3, 0x86, 0xb5, 0x2c, 0x1f, 0x4a, 0x79,
+ 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, 0x12, 0x21,
+ 0x3d, 0x0e, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4,
+ 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x05, 0x36,
+ 0x2a, 0x19, 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3,
+ 0x96, 0xa5, 0xf0, 0xc3, 0x5a, 0x69, 0x3c, 0x0f,
+ 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, 0xb9, 0x8a,
+ 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18,
+ 0x04, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d,
+ 0xe4, 0xd7, 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d,
+ 0x61, 0x52, 0x07, 0x34, 0xad, 0x9e, 0xcb, 0xf8,
+ 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0x0c, 0x59, 0x6a,
+ 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef,
+ 0xca, 0xf9, 0xac, 0x9f, 0x06, 0x35, 0x60, 0x53,
+ 0x4f, 0x7c, 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6,
+ 0xdd, 0xee, 0xbb, 0x88, 0x11, 0x22, 0x77, 0x44,
+ 0x58, 0x6b, 0x3e, 0x0d, 0x94, 0xa7, 0xf2, 0xc1,
+ },
+ {
+ 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c,
+ 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31,
+ 0x67, 0x53, 0x0f, 0x3b, 0xb7, 0x83, 0xdf, 0xeb,
+ 0xda, 0xee, 0xb2, 0x86, 0x0a, 0x3e, 0x62, 0x56,
+ 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, 0x76, 0x42,
+ 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff,
+ 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25,
+ 0x14, 0x20, 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98,
+ 0x81, 0xb5, 0xe9, 0xdd, 0x51, 0x65, 0x39, 0x0d,
+ 0x3c, 0x08, 0x54, 0x60, 0xec, 0xd8, 0x84, 0xb0,
+ 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x02, 0x5e, 0x6a,
+ 0x5b, 0x6f, 0x33, 0x07, 0x8b, 0xbf, 0xe3, 0xd7,
+ 0x4f, 0x7b, 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3,
+ 0xf2, 0xc6, 0x9a, 0xae, 0x22, 0x16, 0x4a, 0x7e,
+ 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, 0x90, 0xa4,
+ 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19,
+ 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93,
+ 0xa2, 0x96, 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e,
+ 0x78, 0x4c, 0x10, 0x24, 0xa8, 0x9c, 0xc0, 0xf4,
+ 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, 0x7d, 0x49,
+ 0xd1, 0xe5, 0xb9, 0x8d, 0x01, 0x35, 0x69, 0x5d,
+ 0x6c, 0x58, 0x04, 0x30, 0xbc, 0x88, 0xd4, 0xe0,
+ 0xb6, 0x82, 0xde, 0xea, 0x66, 0x52, 0x0e, 0x3a,
+ 0x0b, 0x3f, 0x63, 0x57, 0xdb, 0xef, 0xb3, 0x87,
+ 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, 0x26, 0x12,
+ 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf,
+ 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75,
+ 0x44, 0x70, 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8,
+ 0x50, 0x64, 0x38, 0x0c, 0x80, 0xb4, 0xe8, 0xdc,
+ 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x09, 0x55, 0x61,
+ 0x37, 0x03, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb,
+ 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x06,
+ },
+ {
+ 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b,
+ 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e,
+ 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, 0xc9, 0xfc,
+ 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49,
+ 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0x0f, 0x50, 0x65,
+ 0x5b, 0x6e, 0x31, 0x04, 0x8f, 0xba, 0xe5, 0xd0,
+ 0x99, 0xac, 0xf3, 0xc6, 0x4d, 0x78, 0x27, 0x12,
+ 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, 0x92, 0xa7,
+ 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a,
+ 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff,
+ 0xb6, 0x83, 0xdc, 0xe9, 0x62, 0x57, 0x08, 0x3d,
+ 0x03, 0x36, 0x69, 0x5c, 0xd7, 0xe2, 0xbd, 0x88,
+ 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, 0x91, 0xa4,
+ 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11,
+ 0x58, 0x6d, 0x32, 0x07, 0x8c, 0xb9, 0xe6, 0xd3,
+ 0xed, 0xd8, 0x87, 0xb2, 0x39, 0x0c, 0x53, 0x66,
+ 0x9f, 0xaa, 0xf5, 0xc0, 0x4b, 0x7e, 0x21, 0x14,
+ 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, 0x94, 0xa1,
+ 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x09, 0x56, 0x63,
+ 0x5d, 0x68, 0x37, 0x02, 0x89, 0xbc, 0xe3, 0xd6,
+ 0x71, 0x44, 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa,
+ 0xc4, 0xf1, 0xae, 0x9b, 0x10, 0x25, 0x7a, 0x4f,
+ 0x06, 0x33, 0x6c, 0x59, 0xd2, 0xe7, 0xb8, 0x8d,
+ 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0x0d, 0x38,
+ 0x5e, 0x6b, 0x34, 0x01, 0x8a, 0xbf, 0xe0, 0xd5,
+ 0xeb, 0xde, 0x81, 0xb4, 0x3f, 0x0a, 0x55, 0x60,
+ 0x29, 0x1c, 0x43, 0x76, 0xfd, 0xc8, 0x97, 0xa2,
+ 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, 0x22, 0x17,
+ 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0x0e, 0x3b,
+ 0x05, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e,
+ 0xc7, 0xf2, 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c,
+ 0x72, 0x47, 0x18, 0x2d, 0xa6, 0x93, 0xcc, 0xf9,
+ },
+ {
+ 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82,
+ 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f,
+ 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5,
+ 0xea, 0xdc, 0x86, 0xb0, 0x32, 0x04, 0x5e, 0x68,
+ 0x8e, 0xb8, 0xe2, 0xd4, 0x56, 0x60, 0x3a, 0x0c,
+ 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, 0x97, 0xa1,
+ 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b,
+ 0x64, 0x52, 0x08, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6,
+ 0x01, 0x37, 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83,
+ 0xac, 0x9a, 0xc0, 0xf6, 0x74, 0x42, 0x18, 0x2e,
+ 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, 0xf2, 0xc4,
+ 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x05, 0x5f, 0x69,
+ 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0x0d,
+ 0x22, 0x14, 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0,
+ 0xc8, 0xfe, 0xa4, 0x92, 0x10, 0x26, 0x7c, 0x4a,
+ 0x65, 0x53, 0x09, 0x3f, 0xbd, 0x8b, 0xd1, 0xe7,
+ 0x02, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80,
+ 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d,
+ 0x45, 0x73, 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7,
+ 0xe8, 0xde, 0x84, 0xb2, 0x30, 0x06, 0x5c, 0x6a,
+ 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, 0x38, 0x0e,
+ 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3,
+ 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49,
+ 0x66, 0x50, 0x0a, 0x3c, 0xbe, 0x88, 0xd2, 0xe4,
+ 0x03, 0x35, 0x6f, 0x59, 0xdb, 0xed, 0xb7, 0x81,
+ 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, 0x1a, 0x2c,
+ 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6,
+ 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x07, 0x5d, 0x6b,
+ 0x8d, 0xbb, 0xe1, 0xd7, 0x55, 0x63, 0x39, 0x0f,
+ 0x20, 0x16, 0x4c, 0x7a, 0xf8, 0xce, 0x94, 0xa2,
+ 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, 0x7e, 0x48,
+ 0x67, 0x51, 0x0b, 0x3d, 0xbf, 0x89, 0xd3, 0xe5,
+ },
+ {
+ 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85,
+ 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20,
+ 0x57, 0x60, 0x39, 0x0e, 0x8b, 0xbc, 0xe5, 0xd2,
+ 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, 0x40, 0x77,
+ 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b,
+ 0x0b, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e,
+ 0xf9, 0xce, 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c,
+ 0x5c, 0x6b, 0x32, 0x05, 0x80, 0xb7, 0xee, 0xd9,
+ 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, 0xf3, 0xc4,
+ 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0x0f, 0x56, 0x61,
+ 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93,
+ 0xb3, 0x84, 0xdd, 0xea, 0x6f, 0x58, 0x01, 0x36,
+ 0xef, 0xd8, 0x81, 0xb6, 0x33, 0x04, 0x5d, 0x6a,
+ 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, 0xf8, 0xcf,
+ 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d,
+ 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98,
+ 0x82, 0xb5, 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x07,
+ 0x27, 0x10, 0x49, 0x7e, 0xfb, 0xcc, 0x95, 0xa2,
+ 0xd5, 0xe2, 0xbb, 0x8c, 0x09, 0x3e, 0x67, 0x50,
+ 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5,
+ 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9,
+ 0x89, 0xbe, 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0x0c,
+ 0x7b, 0x4c, 0x15, 0x22, 0xa7, 0x90, 0xc9, 0xfe,
+ 0xde, 0xe9, 0xb0, 0x87, 0x02, 0x35, 0x6c, 0x5b,
+ 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46,
+ 0x66, 0x51, 0x08, 0x3f, 0xba, 0x8d, 0xd4, 0xe3,
+ 0x94, 0xa3, 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11,
+ 0x31, 0x06, 0x5f, 0x68, 0xed, 0xda, 0x83, 0xb4,
+ 0x6d, 0x5a, 0x03, 0x34, 0xb1, 0x86, 0xdf, 0xe8,
+ 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d,
+ 0x3a, 0x0d, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf,
+ 0x9f, 0xa8, 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a,
+ },
+ {
+ 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8,
+ 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75,
+ 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0x0f,
+ 0x7a, 0x42, 0x0a, 0x32, 0x9a, 0xa2, 0xea, 0xd2,
+ 0x53, 0x6b, 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb,
+ 0x8e, 0xb6, 0xfe, 0xc6, 0x6e, 0x56, 0x1e, 0x26,
+ 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, 0x64, 0x5c,
+ 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81,
+ 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0x0e,
+ 0x7b, 0x43, 0x0b, 0x33, 0x9b, 0xa3, 0xeb, 0xd3,
+ 0x01, 0x39, 0x71, 0x49, 0xe1, 0xd9, 0x91, 0xa9,
+ 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x04, 0x4c, 0x74,
+ 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d,
+ 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80,
+ 0x52, 0x6a, 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa,
+ 0x8f, 0xb7, 0xff, 0xc7, 0x6f, 0x57, 0x1f, 0x27,
+ 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, 0xc1, 0xf9,
+ 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24,
+ 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e,
+ 0x2b, 0x13, 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83,
+ 0x02, 0x3a, 0x72, 0x4a, 0xe2, 0xda, 0x92, 0xaa,
+ 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x07, 0x4f, 0x77,
+ 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0x0d,
+ 0x78, 0x40, 0x08, 0x30, 0x98, 0xa0, 0xe8, 0xd0,
+ 0xf7, 0xcf, 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f,
+ 0x2a, 0x12, 0x5a, 0x62, 0xca, 0xf2, 0xba, 0x82,
+ 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, 0xc0, 0xf8,
+ 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25,
+ 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0x0c,
+ 0x79, 0x41, 0x09, 0x31, 0x99, 0xa1, 0xe9, 0xd1,
+ 0x03, 0x3b, 0x73, 0x4b, 0xe3, 0xdb, 0x93, 0xab,
+ 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x06, 0x4e, 0x76,
+ },
+ {
+ 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf,
+ 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a,
+ 0xb7, 0x8e, 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18,
+ 0x62, 0x5b, 0x10, 0x29, 0x86, 0xbf, 0xf4, 0xcd,
+ 0x73, 0x4a, 0x01, 0x38, 0x97, 0xae, 0xe5, 0xdc,
+ 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x09,
+ 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b,
+ 0x11, 0x28, 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe,
+ 0xe6, 0xdf, 0x94, 0xad, 0x02, 0x3b, 0x70, 0x49,
+ 0x33, 0x0a, 0x41, 0x78, 0xd7, 0xee, 0xa5, 0x9c,
+ 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe,
+ 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b,
+ 0x95, 0xac, 0xe7, 0xde, 0x71, 0x48, 0x03, 0x3a,
+ 0x40, 0x79, 0x32, 0x0b, 0xa4, 0x9d, 0xd6, 0xef,
+ 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, 0xb4, 0x8d,
+ 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58,
+ 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0x0c, 0x47, 0x7e,
+ 0x04, 0x3d, 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab,
+ 0x66, 0x5f, 0x14, 0x2d, 0x82, 0xbb, 0xf0, 0xc9,
+ 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, 0x25, 0x1c,
+ 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0x0d,
+ 0x77, 0x4e, 0x05, 0x3c, 0x93, 0xaa, 0xe1, 0xd8,
+ 0x15, 0x2c, 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba,
+ 0xc0, 0xf9, 0xb2, 0x8b, 0x24, 0x1d, 0x56, 0x6f,
+ 0x37, 0x0e, 0x45, 0x7c, 0xd3, 0xea, 0xa1, 0x98,
+ 0xe2, 0xdb, 0x90, 0xa9, 0x06, 0x3f, 0x74, 0x4d,
+ 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f,
+ 0x55, 0x6c, 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa,
+ 0x44, 0x7d, 0x36, 0x0f, 0xa0, 0x99, 0xd2, 0xeb,
+ 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, 0x07, 0x3e,
+ 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c,
+ 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89,
+ },
+ {
+ 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b,
+ 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, 0x1b, 0x21,
+ 0x4a, 0x70, 0x3e, 0x04, 0xa2, 0x98, 0xd6, 0xec,
+ 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5,
+ 0xde, 0xe4, 0xaa, 0x90, 0x36, 0x0c, 0x42, 0x78,
+ 0x94, 0xae, 0xe0, 0xda, 0x7c, 0x46, 0x08, 0x32,
+ 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, 0xc5, 0xff,
+ 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80,
+ 0xeb, 0xd1, 0x9f, 0xa5, 0x03, 0x39, 0x77, 0x4d,
+ 0xa1, 0x9b, 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x07,
+ 0x6c, 0x56, 0x18, 0x22, 0x84, 0xbe, 0xf0, 0xca,
+ 0x35, 0x0f, 0x41, 0x7b, 0xdd, 0xe7, 0xa9, 0x93,
+ 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e,
+ 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14,
+ 0x7f, 0x45, 0x0b, 0x31, 0x97, 0xad, 0xe3, 0xd9,
+ 0x4c, 0x76, 0x38, 0x02, 0xa4, 0x9e, 0xd0, 0xea,
+ 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, 0x1d, 0x27,
+ 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d,
+ 0x06, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0,
+ 0x5f, 0x65, 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9,
+ 0x92, 0xa8, 0xe6, 0xdc, 0x7a, 0x40, 0x0e, 0x34,
+ 0xd8, 0xe2, 0xac, 0x96, 0x30, 0x0a, 0x44, 0x7e,
+ 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3,
+ 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc,
+ 0xa7, 0x9d, 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x01,
+ 0xed, 0xd7, 0x99, 0xa3, 0x05, 0x3f, 0x71, 0x4b,
+ 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, 0xbc, 0x86,
+ 0x79, 0x43, 0x0d, 0x37, 0x91, 0xab, 0xe5, 0xdf,
+ 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12,
+ 0xfe, 0xc4, 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58,
+ 0x33, 0x09, 0x47, 0x7d, 0xdb, 0xe1, 0xaf, 0x95,
+ },
+ {
+ 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1,
+ 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64,
+ 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0x0d, 0x36,
+ 0x52, 0x69, 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3,
+ 0x33, 0x08, 0x45, 0x7e, 0xdf, 0xe4, 0xa9, 0x92,
+ 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, 0x6c, 0x57,
+ 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x05,
+ 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0,
+ 0x66, 0x5d, 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7,
+ 0xa3, 0x98, 0xd5, 0xee, 0x4f, 0x74, 0x39, 0x02,
+ 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, 0x6b, 0x50,
+ 0x34, 0x0f, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95,
+ 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4,
+ 0x90, 0xab, 0xe6, 0xdd, 0x7c, 0x47, 0x0a, 0x31,
+ 0xc2, 0xf9, 0xb4, 0x8f, 0x2e, 0x15, 0x58, 0x63,
+ 0x07, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, 0x9d, 0xa6,
+ 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d,
+ 0x09, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8,
+ 0x5b, 0x60, 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa,
+ 0x9e, 0xa5, 0xe8, 0xd3, 0x72, 0x49, 0x04, 0x3f,
+ 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, 0x65, 0x5e,
+ 0x3a, 0x01, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b,
+ 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9,
+ 0xad, 0x96, 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0x0c,
+ 0xaa, 0x91, 0xdc, 0xe7, 0x46, 0x7d, 0x30, 0x0b,
+ 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, 0xf5, 0xce,
+ 0x3d, 0x06, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c,
+ 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59,
+ 0x99, 0xa2, 0xef, 0xd4, 0x75, 0x4e, 0x03, 0x38,
+ 0x5c, 0x67, 0x2a, 0x11, 0xb0, 0x8b, 0xc6, 0xfd,
+ 0x0e, 0x35, 0x78, 0x43, 0xe2, 0xd9, 0x94, 0xaf,
+ 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a,
+ },
+ {
+ 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4,
+ 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49,
+ 0xe7, 0xdb, 0x9f, 0xa3, 0x17, 0x2b, 0x6f, 0x53,
+ 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, 0x92, 0xae,
+ 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67,
+ 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a,
+ 0x34, 0x08, 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80,
+ 0xc9, 0xf5, 0xb1, 0x8d, 0x39, 0x05, 0x41, 0x7d,
+ 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, 0x33, 0x0f,
+ 0x46, 0x7a, 0x3e, 0x02, 0xb6, 0x8a, 0xce, 0xf2,
+ 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8,
+ 0xa1, 0x9d, 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15,
+ 0x68, 0x54, 0x10, 0x2c, 0x98, 0xa4, 0xe0, 0xdc,
+ 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, 0x1d, 0x21,
+ 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x07, 0x3b,
+ 0x72, 0x4e, 0x0a, 0x36, 0x82, 0xbe, 0xfa, 0xc6,
+ 0x6b, 0x57, 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf,
+ 0x96, 0xaa, 0xee, 0xd2, 0x66, 0x5a, 0x1e, 0x22,
+ 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, 0x04, 0x38,
+ 0x71, 0x4d, 0x09, 0x35, 0x81, 0xbd, 0xf9, 0xc5,
+ 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0x0c,
+ 0x45, 0x79, 0x3d, 0x01, 0xb5, 0x89, 0xcd, 0xf1,
+ 0x5f, 0x63, 0x27, 0x1b, 0xaf, 0x93, 0xd7, 0xeb,
+ 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, 0x2a, 0x16,
+ 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64,
+ 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99,
+ 0x37, 0x0b, 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83,
+ 0xca, 0xf6, 0xb2, 0x8e, 0x3a, 0x06, 0x42, 0x7e,
+ 0x03, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, 0x8b, 0xb7,
+ 0xfe, 0xc2, 0x86, 0xba, 0x0e, 0x32, 0x76, 0x4a,
+ 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50,
+ 0x19, 0x25, 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad,
+ },
+ {
+ 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3,
+ 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46,
+ 0xf7, 0xca, 0x8d, 0xb0, 0x03, 0x3e, 0x79, 0x44,
+ 0x02, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1,
+ 0xf3, 0xce, 0x89, 0xb4, 0x07, 0x3a, 0x7d, 0x40,
+ 0x06, 0x3b, 0x7c, 0x41, 0xf2, 0xcf, 0x88, 0xb5,
+ 0x04, 0x39, 0x7e, 0x43, 0xf0, 0xcd, 0x8a, 0xb7,
+ 0xf1, 0xcc, 0x8b, 0xb6, 0x05, 0x38, 0x7f, 0x42,
+ 0xfb, 0xc6, 0x81, 0xbc, 0x0f, 0x32, 0x75, 0x48,
+ 0x0e, 0x33, 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd,
+ 0x0c, 0x31, 0x76, 0x4b, 0xf8, 0xc5, 0x82, 0xbf,
+ 0xf9, 0xc4, 0x83, 0xbe, 0x0d, 0x30, 0x77, 0x4a,
+ 0x08, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb,
+ 0xfd, 0xc0, 0x87, 0xba, 0x09, 0x34, 0x73, 0x4e,
+ 0xff, 0xc2, 0x85, 0xb8, 0x0b, 0x36, 0x71, 0x4c,
+ 0x0a, 0x37, 0x70, 0x4d, 0xfe, 0xc3, 0x84, 0xb9,
+ 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, 0x65, 0x58,
+ 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad,
+ 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf,
+ 0xe9, 0xd4, 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a,
+ 0x18, 0x25, 0x62, 0x5f, 0xec, 0xd1, 0x96, 0xab,
+ 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, 0x63, 0x5e,
+ 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c,
+ 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9,
+ 0x10, 0x2d, 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3,
+ 0xe5, 0xd8, 0x9f, 0xa2, 0x11, 0x2c, 0x6b, 0x56,
+ 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, 0x69, 0x54,
+ 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1,
+ 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50,
+ 0x16, 0x2b, 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5,
+ 0x14, 0x29, 0x6e, 0x53, 0xe0, 0xdd, 0x9a, 0xa7,
+ 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, 0x6f, 0x52,
+ },
+ {
+ 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba,
+ 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57,
+ 0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d,
+ 0x2a, 0x14, 0x56, 0x68, 0xd2, 0xec, 0xae, 0x90,
+ 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, 0x17, 0x29,
+ 0x7e, 0x40, 0x02, 0x3c, 0x86, 0xb8, 0xfa, 0xc4,
+ 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee,
+ 0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03,
+ 0x3b, 0x05, 0x47, 0x79, 0xc3, 0xfd, 0xbf, 0x81,
+ 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, 0x52, 0x6c,
+ 0xfc, 0xc2, 0x80, 0xbe, 0x04, 0x3a, 0x78, 0x46,
+ 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab,
+ 0xa8, 0x96, 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12,
+ 0x45, 0x7b, 0x39, 0x07, 0xbd, 0x83, 0xc1, 0xff,
+ 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, 0xeb, 0xd5,
+ 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x06, 0x38,
+ 0x76, 0x48, 0x0a, 0x34, 0x8e, 0xb0, 0xf2, 0xcc,
+ 0x9b, 0xa5, 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21,
+ 0xb1, 0x8f, 0xcd, 0xf3, 0x49, 0x77, 0x35, 0x0b,
+ 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, 0xd8, 0xe6,
+ 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f,
+ 0x08, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2,
+ 0x22, 0x1c, 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98,
+ 0xcf, 0xf1, 0xb3, 0x8d, 0x37, 0x09, 0x4b, 0x75,
+ 0x4d, 0x73, 0x31, 0x0f, 0xb5, 0x8b, 0xc9, 0xf7,
+ 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a,
+ 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0x0e, 0x30,
+ 0x67, 0x59, 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd,
+ 0xde, 0xe0, 0xa2, 0x9c, 0x26, 0x18, 0x5a, 0x64,
+ 0x33, 0x0d, 0x4f, 0x71, 0xcb, 0xf5, 0xb7, 0x89,
+ 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3,
+ 0xf4, 0xca, 0x88, 0xb6, 0x0c, 0x32, 0x70, 0x4e,
+ },
+ {
+ 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd,
+ 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58,
+ 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, 0x55, 0x6a,
+ 0x32, 0x0d, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f,
+ 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0x0e,
+ 0x56, 0x69, 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb,
+ 0x64, 0x5b, 0x1a, 0x25, 0x98, 0xa7, 0xe6, 0xd9,
+ 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, 0x03, 0x3c,
+ 0x7b, 0x44, 0x05, 0x3a, 0x87, 0xb8, 0xf9, 0xc6,
+ 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23,
+ 0xac, 0x93, 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11,
+ 0x49, 0x76, 0x37, 0x08, 0xb5, 0x8a, 0xcb, 0xf4,
+ 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0x0b, 0x4a, 0x75,
+ 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90,
+ 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2,
+ 0xfa, 0xc5, 0x84, 0xbb, 0x06, 0x39, 0x78, 0x47,
+ 0xf6, 0xc9, 0x88, 0xb7, 0x0a, 0x35, 0x74, 0x4b,
+ 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, 0x91, 0xae,
+ 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c,
+ 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x07, 0x46, 0x79,
+ 0x45, 0x7a, 0x3b, 0x04, 0xb9, 0x86, 0xc7, 0xf8,
+ 0xa0, 0x9f, 0xde, 0xe1, 0x5c, 0x63, 0x22, 0x1d,
+ 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, 0x10, 0x2f,
+ 0x77, 0x48, 0x09, 0x36, 0x8b, 0xb4, 0xf5, 0xca,
+ 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0x0f, 0x30,
+ 0x68, 0x57, 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5,
+ 0x5a, 0x65, 0x24, 0x1b, 0xa6, 0x99, 0xd8, 0xe7,
+ 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, 0x3d, 0x02,
+ 0x3e, 0x01, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83,
+ 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66,
+ 0xe9, 0xd6, 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54,
+ 0x0c, 0x33, 0x72, 0x4d, 0xf0, 0xcf, 0x8e, 0xb1,
+ },
+ {
+ 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd,
+ 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7,
+ 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9,
+ 0x4e, 0x0e, 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93,
+ 0xe8, 0xa8, 0x68, 0x28, 0xf5, 0xb5, 0x75, 0x35,
+ 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, 0x4f, 0x0f,
+ 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x01, 0x41,
+ 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b,
+ 0xcd, 0x8d, 0x4d, 0x0d, 0xd0, 0x90, 0x50, 0x10,
+ 0xf7, 0xb7, 0x77, 0x37, 0xea, 0xaa, 0x6a, 0x2a,
+ 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, 0x24, 0x64,
+ 0x83, 0xc3, 0x03, 0x43, 0x9e, 0xde, 0x1e, 0x5e,
+ 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8,
+ 0x1f, 0x5f, 0x9f, 0xdf, 0x02, 0x42, 0x82, 0xc2,
+ 0x51, 0x11, 0xd1, 0x91, 0x4c, 0x0c, 0xcc, 0x8c,
+ 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, 0xf6, 0xb6,
+ 0x87, 0xc7, 0x07, 0x47, 0x9a, 0xda, 0x1a, 0x5a,
+ 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60,
+ 0xf3, 0xb3, 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e,
+ 0xc9, 0x89, 0x49, 0x09, 0xd4, 0x94, 0x54, 0x14,
+ 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, 0xf2, 0xb2,
+ 0x55, 0x15, 0xd5, 0x95, 0x48, 0x08, 0xc8, 0x88,
+ 0x1b, 0x5b, 0x9b, 0xdb, 0x06, 0x46, 0x86, 0xc6,
+ 0x21, 0x61, 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc,
+ 0x4a, 0x0a, 0xca, 0x8a, 0x57, 0x17, 0xd7, 0x97,
+ 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, 0xed, 0xad,
+ 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3,
+ 0x04, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9,
+ 0xa2, 0xe2, 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f,
+ 0x98, 0xd8, 0x18, 0x58, 0x85, 0xc5, 0x05, 0x45,
+ 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, 0x4b, 0x0b,
+ 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31,
+ },
+ {
+ 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda,
+ 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8,
+ 0x64, 0x25, 0xe6, 0xa7, 0x7d, 0x3c, 0xff, 0xbe,
+ 0x56, 0x17, 0xd4, 0x95, 0x4f, 0x0e, 0xcd, 0x8c,
+ 0xc8, 0x89, 0x4a, 0x0b, 0xd1, 0x90, 0x53, 0x12,
+ 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20,
+ 0xac, 0xed, 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76,
+ 0x9e, 0xdf, 0x1c, 0x5d, 0x87, 0xc6, 0x05, 0x44,
+ 0x8d, 0xcc, 0x0f, 0x4e, 0x94, 0xd5, 0x16, 0x57,
+ 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65,
+ 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33,
+ 0xdb, 0x9a, 0x59, 0x18, 0xc2, 0x83, 0x40, 0x01,
+ 0x45, 0x04, 0xc7, 0x86, 0x5c, 0x1d, 0xde, 0x9f,
+ 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, 0xec, 0xad,
+ 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb,
+ 0x13, 0x52, 0x91, 0xd0, 0x0a, 0x4b, 0x88, 0xc9,
+ 0x07, 0x46, 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd,
+ 0x35, 0x74, 0xb7, 0xf6, 0x2c, 0x6d, 0xae, 0xef,
+ 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, 0xf8, 0xb9,
+ 0x51, 0x10, 0xd3, 0x92, 0x48, 0x09, 0xca, 0x8b,
+ 0xcf, 0x8e, 0x4d, 0x0c, 0xd6, 0x97, 0x54, 0x15,
+ 0xfd, 0xbc, 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27,
+ 0xab, 0xea, 0x29, 0x68, 0xb2, 0xf3, 0x30, 0x71,
+ 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, 0x02, 0x43,
+ 0x8a, 0xcb, 0x08, 0x49, 0x93, 0xd2, 0x11, 0x50,
+ 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62,
+ 0xee, 0xaf, 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34,
+ 0xdc, 0x9d, 0x5e, 0x1f, 0xc5, 0x84, 0x47, 0x06,
+ 0x42, 0x03, 0xc0, 0x81, 0x5b, 0x1a, 0xd9, 0x98,
+ 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa,
+ 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc,
+ 0x14, 0x55, 0x96, 0xd7, 0x0d, 0x4c, 0x8f, 0xce,
+ },
+ {
+ 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3,
+ 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9,
+ 0x54, 0x16, 0xd0, 0x92, 0x41, 0x03, 0xc5, 0x87,
+ 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad,
+ 0xa8, 0xea, 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b,
+ 0x82, 0xc0, 0x06, 0x44, 0x97, 0xd5, 0x13, 0x51,
+ 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, 0x6d, 0x2f,
+ 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x05,
+ 0x4d, 0x0f, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e,
+ 0x67, 0x25, 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4,
+ 0x19, 0x5b, 0x9d, 0xdf, 0x0c, 0x4e, 0x88, 0xca,
+ 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, 0xa2, 0xe0,
+ 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36,
+ 0xcf, 0x8d, 0x4b, 0x09, 0xda, 0x98, 0x5e, 0x1c,
+ 0xb1, 0xf3, 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62,
+ 0x9b, 0xd9, 0x1f, 0x5d, 0x8e, 0xcc, 0x0a, 0x48,
+ 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, 0x0b, 0x49,
+ 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63,
+ 0xce, 0x8c, 0x4a, 0x08, 0xdb, 0x99, 0x5f, 0x1d,
+ 0xe4, 0xa6, 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37,
+ 0x32, 0x70, 0xb6, 0xf4, 0x27, 0x65, 0xa3, 0xe1,
+ 0x18, 0x5a, 0x9c, 0xde, 0x0d, 0x4f, 0x89, 0xcb,
+ 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5,
+ 0x4c, 0x0e, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f,
+ 0xd7, 0x95, 0x53, 0x11, 0xc2, 0x80, 0x46, 0x04,
+ 0xfd, 0xbf, 0x79, 0x3b, 0xe8, 0xaa, 0x6c, 0x2e,
+ 0x83, 0xc1, 0x07, 0x45, 0x96, 0xd4, 0x12, 0x50,
+ 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a,
+ 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac,
+ 0x55, 0x17, 0xd1, 0x93, 0x40, 0x02, 0xc4, 0x86,
+ 0x2b, 0x69, 0xaf, 0xed, 0x3e, 0x7c, 0xba, 0xf8,
+ 0x01, 0x43, 0x85, 0xc7, 0x14, 0x56, 0x90, 0xd2,
+ },
+ {
+ 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4,
+ 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6,
+ 0x44, 0x07, 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90,
+ 0x66, 0x25, 0xe0, 0xa3, 0x77, 0x34, 0xf1, 0xb2,
+ 0x88, 0xcb, 0x0e, 0x4d, 0x99, 0xda, 0x1f, 0x5c,
+ 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e,
+ 0xcc, 0x8f, 0x4a, 0x09, 0xdd, 0x9e, 0x5b, 0x18,
+ 0xee, 0xad, 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a,
+ 0x0d, 0x4e, 0x8b, 0xc8, 0x1c, 0x5f, 0x9a, 0xd9,
+ 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, 0xb8, 0xfb,
+ 0x49, 0x0a, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d,
+ 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf,
+ 0x85, 0xc6, 0x03, 0x40, 0x94, 0xd7, 0x12, 0x51,
+ 0xa7, 0xe4, 0x21, 0x62, 0xb6, 0xf5, 0x30, 0x73,
+ 0xc1, 0x82, 0x47, 0x04, 0xd0, 0x93, 0x56, 0x15,
+ 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37,
+ 0x1a, 0x59, 0x9c, 0xdf, 0x0b, 0x48, 0x8d, 0xce,
+ 0x38, 0x7b, 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec,
+ 0x5e, 0x1d, 0xd8, 0x9b, 0x4f, 0x0c, 0xc9, 0x8a,
+ 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, 0xeb, 0xa8,
+ 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x05, 0x46,
+ 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64,
+ 0xd6, 0x95, 0x50, 0x13, 0xc7, 0x84, 0x41, 0x02,
+ 0xf4, 0xb7, 0x72, 0x31, 0xe5, 0xa6, 0x63, 0x20,
+ 0x17, 0x54, 0x91, 0xd2, 0x06, 0x45, 0x80, 0xc3,
+ 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1,
+ 0x53, 0x10, 0xd5, 0x96, 0x42, 0x01, 0xc4, 0x87,
+ 0x71, 0x32, 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5,
+ 0x9f, 0xdc, 0x19, 0x5a, 0x8e, 0xcd, 0x08, 0x4b,
+ 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, 0x2a, 0x69,
+ 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0x0f,
+ 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d,
+ },
+ {
+ 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1,
+ 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb,
+ 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, 0xb1, 0xf5,
+ 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef,
+ 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9,
+ 0x72, 0x36, 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3,
+ 0x5c, 0x18, 0xd4, 0x90, 0x51, 0x15, 0xd9, 0x9d,
+ 0x46, 0x02, 0xce, 0x8a, 0x4b, 0x0f, 0xc3, 0x87,
+ 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11,
+ 0xca, 0x8e, 0x42, 0x06, 0xc7, 0x83, 0x4f, 0x0b,
+ 0xe4, 0xa0, 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25,
+ 0xfe, 0xba, 0x76, 0x32, 0xf3, 0xb7, 0x7b, 0x3f,
+ 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, 0x3d, 0x79,
+ 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63,
+ 0x8c, 0xc8, 0x04, 0x40, 0x81, 0xc5, 0x09, 0x4d,
+ 0x96, 0xd2, 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57,
+ 0xbd, 0xf9, 0x35, 0x71, 0xb0, 0xf4, 0x38, 0x7c,
+ 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, 0x22, 0x66,
+ 0x89, 0xcd, 0x01, 0x45, 0x84, 0xc0, 0x0c, 0x48,
+ 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52,
+ 0xd5, 0x91, 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14,
+ 0xcf, 0x8b, 0x47, 0x03, 0xc2, 0x86, 0x4a, 0x0e,
+ 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, 0x64, 0x20,
+ 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a,
+ 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac,
+ 0x77, 0x33, 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6,
+ 0x59, 0x1d, 0xd1, 0x95, 0x54, 0x10, 0xdc, 0x98,
+ 0x43, 0x07, 0xcb, 0x8f, 0x4e, 0x0a, 0xc6, 0x82,
+ 0x05, 0x41, 0x8d, 0xc9, 0x08, 0x4c, 0x80, 0xc4,
+ 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde,
+ 0x31, 0x75, 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0,
+ 0x2b, 0x6f, 0xa3, 0xe7, 0x26, 0x62, 0xae, 0xea,
+ },
+ {
+ 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6,
+ 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4,
+ 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2,
+ 0x36, 0x73, 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0,
+ 0x48, 0x0d, 0xc2, 0x87, 0x41, 0x04, 0xcb, 0x8e,
+ 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, 0xd9, 0x9c,
+ 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa,
+ 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8,
+ 0x90, 0xd5, 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56,
+ 0x82, 0xc7, 0x08, 0x4d, 0x8b, 0xce, 0x01, 0x44,
+ 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, 0x37, 0x72,
+ 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60,
+ 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e,
+ 0xca, 0x8f, 0x40, 0x05, 0xc3, 0x86, 0x49, 0x0c,
+ 0xfc, 0xb9, 0x76, 0x33, 0xf5, 0xb0, 0x7f, 0x3a,
+ 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, 0x6d, 0x28,
+ 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb,
+ 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9,
+ 0x19, 0x5c, 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf,
+ 0x0b, 0x4e, 0x81, 0xc4, 0x02, 0x47, 0x88, 0xcd,
+ 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, 0xf6, 0xb3,
+ 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1,
+ 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97,
+ 0x43, 0x06, 0xc9, 0x8c, 0x4a, 0x0f, 0xc0, 0x85,
+ 0xad, 0xe8, 0x27, 0x62, 0xa4, 0xe1, 0x2e, 0x6b,
+ 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, 0x3c, 0x79,
+ 0x89, 0xcc, 0x03, 0x46, 0x80, 0xc5, 0x0a, 0x4f,
+ 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d,
+ 0xe5, 0xa0, 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23,
+ 0xf7, 0xb2, 0x7d, 0x38, 0xfe, 0xbb, 0x74, 0x31,
+ 0xc1, 0x84, 0x4b, 0x0e, 0xc8, 0x8d, 0x42, 0x07,
+ 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15,
+ },
+ {
+ 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf,
+ 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5,
+ 0x14, 0x52, 0x98, 0xde, 0x11, 0x57, 0x9d, 0xdb,
+ 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, 0x97, 0xd1,
+ 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7,
+ 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed,
+ 0x3c, 0x7a, 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3,
+ 0x36, 0x70, 0xba, 0xfc, 0x33, 0x75, 0xbf, 0xf9,
+ 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, 0xd9, 0x9f,
+ 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95,
+ 0x44, 0x02, 0xc8, 0x8e, 0x41, 0x07, 0xcd, 0x8b,
+ 0x4e, 0x08, 0xc2, 0x84, 0x4b, 0x0d, 0xc7, 0x81,
+ 0x78, 0x3e, 0xf4, 0xb2, 0x7d, 0x3b, 0xf1, 0xb7,
+ 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, 0xfb, 0xbd,
+ 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3,
+ 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9,
+ 0xa0, 0xe6, 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f,
+ 0xaa, 0xec, 0x26, 0x60, 0xaf, 0xe9, 0x23, 0x65,
+ 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, 0x3d, 0x7b,
+ 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71,
+ 0x88, 0xce, 0x04, 0x42, 0x8d, 0xcb, 0x01, 0x47,
+ 0x82, 0xc4, 0x0e, 0x48, 0x87, 0xc1, 0x0b, 0x4d,
+ 0x9c, 0xda, 0x10, 0x56, 0x99, 0xdf, 0x15, 0x53,
+ 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, 0x1f, 0x59,
+ 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f,
+ 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35,
+ 0xe4, 0xa2, 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b,
+ 0xee, 0xa8, 0x62, 0x24, 0xeb, 0xad, 0x67, 0x21,
+ 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, 0x51, 0x17,
+ 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d,
+ 0xcc, 0x8a, 0x40, 0x06, 0xc9, 0x8f, 0x45, 0x03,
+ 0xc6, 0x80, 0x4a, 0x0c, 0xc3, 0x85, 0x4f, 0x09,
+ },
+ {
+ 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8,
+ 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca,
+ 0x04, 0x43, 0x8a, 0xcd, 0x05, 0x42, 0x8b, 0xcc,
+ 0x06, 0x41, 0x88, 0xcf, 0x07, 0x40, 0x89, 0xce,
+ 0x08, 0x4f, 0x86, 0xc1, 0x09, 0x4e, 0x87, 0xc0,
+ 0x0a, 0x4d, 0x84, 0xc3, 0x0b, 0x4c, 0x85, 0xc2,
+ 0x0c, 0x4b, 0x82, 0xc5, 0x0d, 0x4a, 0x83, 0xc4,
+ 0x0e, 0x49, 0x80, 0xc7, 0x0f, 0x48, 0x81, 0xc6,
+ 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8,
+ 0x12, 0x55, 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda,
+ 0x14, 0x53, 0x9a, 0xdd, 0x15, 0x52, 0x9b, 0xdc,
+ 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, 0x99, 0xde,
+ 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0,
+ 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2,
+ 0x1c, 0x5b, 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4,
+ 0x1e, 0x59, 0x90, 0xd7, 0x1f, 0x58, 0x91, 0xd6,
+ 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, 0xaf, 0xe8,
+ 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea,
+ 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec,
+ 0x26, 0x61, 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee,
+ 0x28, 0x6f, 0xa6, 0xe1, 0x29, 0x6e, 0xa7, 0xe0,
+ 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, 0xa5, 0xe2,
+ 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4,
+ 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6,
+ 0x30, 0x77, 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8,
+ 0x32, 0x75, 0xbc, 0xfb, 0x33, 0x74, 0xbd, 0xfa,
+ 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, 0xbb, 0xfc,
+ 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe,
+ 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0,
+ 0x3a, 0x7d, 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2,
+ 0x3c, 0x7b, 0xb2, 0xf5, 0x3d, 0x7a, 0xb3, 0xf4,
+ 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, 0xb1, 0xf6,
+ },
+ {
+ 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5,
+ 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f,
+ 0xf4, 0xbc, 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11,
+ 0x8e, 0xc6, 0x1e, 0x56, 0xb3, 0xfb, 0x23, 0x6b,
+ 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, 0x58, 0x10,
+ 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a,
+ 0x01, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4,
+ 0x7b, 0x33, 0xeb, 0xa3, 0x46, 0x0e, 0xd6, 0x9e,
+ 0xf7, 0xbf, 0x67, 0x2f, 0xca, 0x82, 0x5a, 0x12,
+ 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, 0x20, 0x68,
+ 0x03, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6,
+ 0x79, 0x31, 0xe9, 0xa1, 0x44, 0x0c, 0xd4, 0x9c,
+ 0x02, 0x4a, 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7,
+ 0x78, 0x30, 0xe8, 0xa0, 0x45, 0x0d, 0xd5, 0x9d,
+ 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, 0x5b, 0x13,
+ 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69,
+ 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16,
+ 0x89, 0xc1, 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c,
+ 0x07, 0x4f, 0x97, 0xdf, 0x3a, 0x72, 0xaa, 0xe2,
+ 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x08, 0xd0, 0x98,
+ 0x06, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3,
+ 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x09, 0xd1, 0x99,
+ 0xf2, 0xba, 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17,
+ 0x88, 0xc0, 0x18, 0x50, 0xb5, 0xfd, 0x25, 0x6d,
+ 0x04, 0x4c, 0x94, 0xdc, 0x39, 0x71, 0xa9, 0xe1,
+ 0x7e, 0x36, 0xee, 0xa6, 0x43, 0x0b, 0xd3, 0x9b,
+ 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15,
+ 0x8a, 0xc2, 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f,
+ 0xf1, 0xb9, 0x61, 0x29, 0xcc, 0x84, 0x5c, 0x14,
+ 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, 0x26, 0x6e,
+ 0x05, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0,
+ 0x7f, 0x37, 0xef, 0xa7, 0x42, 0x0a, 0xd2, 0x9a,
+ },
+ {
+ 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2,
+ 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90,
+ 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, 0x4f, 0x06,
+ 0x96, 0xdf, 0x04, 0x4d, 0xaf, 0xe6, 0x3d, 0x74,
+ 0xd5, 0x9c, 0x47, 0x0e, 0xec, 0xa5, 0x7e, 0x37,
+ 0xa7, 0xee, 0x35, 0x7c, 0x9e, 0xd7, 0x0c, 0x45,
+ 0x31, 0x78, 0xa3, 0xea, 0x08, 0x41, 0x9a, 0xd3,
+ 0x43, 0x0a, 0xd1, 0x98, 0x7a, 0x33, 0xe8, 0xa1,
+ 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55,
+ 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27,
+ 0x53, 0x1a, 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1,
+ 0x21, 0x68, 0xb3, 0xfa, 0x18, 0x51, 0x8a, 0xc3,
+ 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, 0xc9, 0x80,
+ 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2,
+ 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64,
+ 0xf4, 0xbd, 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16,
+ 0x73, 0x3a, 0xe1, 0xa8, 0x4a, 0x03, 0xd8, 0x91,
+ 0x01, 0x48, 0x93, 0xda, 0x38, 0x71, 0xaa, 0xe3,
+ 0x97, 0xde, 0x05, 0x4c, 0xae, 0xe7, 0x3c, 0x75,
+ 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x07,
+ 0xa6, 0xef, 0x34, 0x7d, 0x9f, 0xd6, 0x0d, 0x44,
+ 0xd4, 0x9d, 0x46, 0x0f, 0xed, 0xa4, 0x7f, 0x36,
+ 0x42, 0x0b, 0xd0, 0x99, 0x7b, 0x32, 0xe9, 0xa0,
+ 0x30, 0x79, 0xa2, 0xeb, 0x09, 0x40, 0x9b, 0xd2,
+ 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26,
+ 0xb6, 0xff, 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54,
+ 0x20, 0x69, 0xb2, 0xfb, 0x19, 0x50, 0x8b, 0xc2,
+ 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, 0xf9, 0xb0,
+ 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3,
+ 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81,
+ 0xf5, 0xbc, 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17,
+ 0x87, 0xce, 0x15, 0x5c, 0xbe, 0xf7, 0x2c, 0x65,
+ },
+ {
+ 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb,
+ 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81,
+ 0xd4, 0x9e, 0x40, 0x0a, 0xe1, 0xab, 0x75, 0x3f,
+ 0xbe, 0xf4, 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55,
+ 0xb5, 0xff, 0x21, 0x6b, 0x80, 0xca, 0x14, 0x5e,
+ 0xdf, 0x95, 0x4b, 0x01, 0xea, 0xa0, 0x7e, 0x34,
+ 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a,
+ 0x0b, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0,
+ 0x77, 0x3d, 0xe3, 0xa9, 0x42, 0x08, 0xd6, 0x9c,
+ 0x1d, 0x57, 0x89, 0xc3, 0x28, 0x62, 0xbc, 0xf6,
+ 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, 0x02, 0x48,
+ 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22,
+ 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29,
+ 0xa8, 0xe2, 0x3c, 0x76, 0x9d, 0xd7, 0x09, 0x43,
+ 0x16, 0x5c, 0x82, 0xc8, 0x23, 0x69, 0xb7, 0xfd,
+ 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x03, 0xdd, 0x97,
+ 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x05,
+ 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f,
+ 0x3a, 0x70, 0xae, 0xe4, 0x0f, 0x45, 0x9b, 0xd1,
+ 0x50, 0x1a, 0xc4, 0x8e, 0x65, 0x2f, 0xf1, 0xbb,
+ 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, 0xfa, 0xb0,
+ 0x31, 0x7b, 0xa5, 0xef, 0x04, 0x4e, 0x90, 0xda,
+ 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64,
+ 0xe5, 0xaf, 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0x0e,
+ 0x99, 0xd3, 0x0d, 0x47, 0xac, 0xe6, 0x38, 0x72,
+ 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, 0x52, 0x18,
+ 0x4d, 0x07, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6,
+ 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc,
+ 0x2c, 0x66, 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7,
+ 0x46, 0x0c, 0xd2, 0x98, 0x73, 0x39, 0xe7, 0xad,
+ 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, 0x59, 0x13,
+ 0x92, 0xd8, 0x06, 0x4c, 0xa7, 0xed, 0x33, 0x79,
+ },
+ {
+ 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec,
+ 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e,
+ 0xc4, 0x8f, 0x52, 0x19, 0xf5, 0xbe, 0x63, 0x28,
+ 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, 0x01, 0x4a,
+ 0x95, 0xde, 0x03, 0x48, 0xa4, 0xef, 0x32, 0x79,
+ 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b,
+ 0x51, 0x1a, 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd,
+ 0x33, 0x78, 0xa5, 0xee, 0x02, 0x49, 0x94, 0xdf,
+ 0x37, 0x7c, 0xa1, 0xea, 0x06, 0x4d, 0x90, 0xdb,
+ 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9,
+ 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f,
+ 0x91, 0xda, 0x07, 0x4c, 0xa0, 0xeb, 0x36, 0x7d,
+ 0xa2, 0xe9, 0x34, 0x7f, 0x93, 0xd8, 0x05, 0x4e,
+ 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, 0x67, 0x2c,
+ 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a,
+ 0x04, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8,
+ 0x6e, 0x25, 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82,
+ 0x0c, 0x47, 0x9a, 0xd1, 0x3d, 0x76, 0xab, 0xe0,
+ 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, 0x0d, 0x46,
+ 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24,
+ 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17,
+ 0x99, 0xd2, 0x0f, 0x44, 0xa8, 0xe3, 0x3e, 0x75,
+ 0x3f, 0x74, 0xa9, 0xe2, 0x0e, 0x45, 0x98, 0xd3,
+ 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, 0xfa, 0xb1,
+ 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5,
+ 0x3b, 0x70, 0xad, 0xe6, 0x0a, 0x41, 0x9c, 0xd7,
+ 0x9d, 0xd6, 0x0b, 0x40, 0xac, 0xe7, 0x3a, 0x71,
+ 0xff, 0xb4, 0x69, 0x22, 0xce, 0x85, 0x58, 0x13,
+ 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, 0x6b, 0x20,
+ 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x09, 0x42,
+ 0x08, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4,
+ 0x6a, 0x21, 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86,
+ },
+ {
+ 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9,
+ 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3,
+ 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x01, 0x4d,
+ 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17,
+ 0x75, 0x39, 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c,
+ 0x2f, 0x63, 0xb7, 0xfb, 0x02, 0x4e, 0x9a, 0xd6,
+ 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, 0x74, 0x38,
+ 0x9b, 0xd7, 0x03, 0x4f, 0xb6, 0xfa, 0x2e, 0x62,
+ 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13,
+ 0xb0, 0xfc, 0x28, 0x64, 0x9d, 0xd1, 0x05, 0x49,
+ 0x5e, 0x12, 0xc6, 0x8a, 0x73, 0x3f, 0xeb, 0xa7,
+ 0x04, 0x48, 0x9c, 0xd0, 0x29, 0x65, 0xb1, 0xfd,
+ 0x9f, 0xd3, 0x07, 0x4b, 0xb2, 0xfe, 0x2a, 0x66,
+ 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c,
+ 0x2b, 0x67, 0xb3, 0xff, 0x06, 0x4a, 0x9e, 0xd2,
+ 0x71, 0x3d, 0xe9, 0xa5, 0x5c, 0x10, 0xc4, 0x88,
+ 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, 0x7c, 0x30,
+ 0x93, 0xdf, 0x0b, 0x47, 0xbe, 0xf2, 0x26, 0x6a,
+ 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84,
+ 0x27, 0x6b, 0xbf, 0xf3, 0x0a, 0x46, 0x92, 0xde,
+ 0xbc, 0xf0, 0x24, 0x68, 0x91, 0xdd, 0x09, 0x45,
+ 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, 0x53, 0x1f,
+ 0x08, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1,
+ 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab,
+ 0x23, 0x6f, 0xbb, 0xf7, 0x0e, 0x42, 0x96, 0xda,
+ 0x79, 0x35, 0xe1, 0xad, 0x54, 0x18, 0xcc, 0x80,
+ 0x97, 0xdb, 0x0f, 0x43, 0xba, 0xf6, 0x22, 0x6e,
+ 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34,
+ 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf,
+ 0x0c, 0x40, 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5,
+ 0xe2, 0xae, 0x7a, 0x36, 0xcf, 0x83, 0x57, 0x1b,
+ 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, 0x0d, 0x41,
+ },
+ {
+ 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe,
+ 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac,
+ 0xa4, 0xe9, 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a,
+ 0xf6, 0xbb, 0x6c, 0x21, 0xdf, 0x92, 0x45, 0x08,
+ 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, 0xe6, 0xab,
+ 0x07, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9,
+ 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0x0f,
+ 0xa3, 0xee, 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d,
+ 0xaa, 0xe7, 0x30, 0x7d, 0x83, 0xce, 0x19, 0x54,
+ 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, 0x4b, 0x06,
+ 0x0e, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0,
+ 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2,
+ 0xff, 0xb2, 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x01,
+ 0xad, 0xe0, 0x37, 0x7a, 0x84, 0xc9, 0x1e, 0x53,
+ 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, 0xe8, 0xa5,
+ 0x09, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7,
+ 0x49, 0x04, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7,
+ 0x1b, 0x56, 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5,
+ 0xed, 0xa0, 0x77, 0x3a, 0xc4, 0x89, 0x5e, 0x13,
+ 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, 0x0c, 0x41,
+ 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2,
+ 0x4e, 0x03, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0,
+ 0xb8, 0xf5, 0x22, 0x6f, 0x91, 0xdc, 0x0b, 0x46,
+ 0xea, 0xa7, 0x70, 0x3d, 0xc3, 0x8e, 0x59, 0x14,
+ 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, 0x50, 0x1d,
+ 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x02, 0x4f,
+ 0x47, 0x0a, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9,
+ 0x15, 0x58, 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb,
+ 0xb6, 0xfb, 0x2c, 0x61, 0x9f, 0xd2, 0x05, 0x48,
+ 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, 0x57, 0x1a,
+ 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec,
+ 0x40, 0x0d, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe,
+ },
+ {
+ 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd,
+ 0x94, 0xda, 0x08, 0x46, 0xb1, 0xff, 0x2d, 0x63,
+ 0xde, 0x90, 0x42, 0x0c, 0xfb, 0xb5, 0x67, 0x29,
+ 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2,
+ 0x7f, 0x31, 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88,
+ 0xa1, 0xef, 0x3d, 0x73, 0x84, 0xca, 0x18, 0x56,
+ 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, 0x52, 0x1c,
+ 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x01, 0xd3, 0x9d,
+ 0x20, 0x6e, 0xbc, 0xf2, 0x05, 0x4b, 0x99, 0xd7,
+ 0xfe, 0xb0, 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x09,
+ 0xb4, 0xfa, 0x28, 0x66, 0x91, 0xdf, 0x0d, 0x43,
+ 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, 0xe6, 0xa8,
+ 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2,
+ 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c,
+ 0x81, 0xcf, 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76,
+ 0xd4, 0x9a, 0x48, 0x06, 0xf1, 0xbf, 0x6d, 0x23,
+ 0x9e, 0xd0, 0x02, 0x4c, 0xbb, 0xf5, 0x27, 0x69,
+ 0x40, 0x0e, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7,
+ 0x0a, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd,
+ 0xe1, 0xaf, 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16,
+ 0xab, 0xe5, 0x37, 0x79, 0x8e, 0xc0, 0x12, 0x5c,
+ 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, 0xcc, 0x82,
+ 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8,
+ 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x07, 0x49,
+ 0xf4, 0xba, 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x03,
+ 0x2a, 0x64, 0xb6, 0xf8, 0x0f, 0x41, 0x93, 0xdd,
+ 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0x0b, 0xd9, 0x97,
+ 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c,
+ 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36,
+ 0x1f, 0x51, 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8,
+ 0x55, 0x1b, 0xc9, 0x87, 0x70, 0x3e, 0xec, 0xa2,
+ },
+ {
+ 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0,
+ 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2,
+ 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74,
+ 0xc6, 0x89, 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36,
+ 0x15, 0x5a, 0x8b, 0xc4, 0x34, 0x7b, 0xaa, 0xe5,
+ 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, 0xe8, 0xa7,
+ 0x91, 0xde, 0x0f, 0x40, 0xb0, 0xff, 0x2e, 0x61,
+ 0xd3, 0x9c, 0x4d, 0x02, 0xf2, 0xbd, 0x6c, 0x23,
+ 0x2a, 0x65, 0xb4, 0xfb, 0x0b, 0x44, 0x95, 0xda,
+ 0x68, 0x27, 0xf6, 0xb9, 0x49, 0x06, 0xd7, 0x98,
+ 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, 0x11, 0x5e,
+ 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c,
+ 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf,
+ 0x7d, 0x32, 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d,
+ 0xbb, 0xf4, 0x25, 0x6a, 0x9a, 0xd5, 0x04, 0x4b,
+ 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, 0x46, 0x09,
+ 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4,
+ 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6,
+ 0xd0, 0x9f, 0x4e, 0x01, 0xf1, 0xbe, 0x6f, 0x20,
+ 0x92, 0xdd, 0x0c, 0x43, 0xb3, 0xfc, 0x2d, 0x62,
+ 0x41, 0x0e, 0xdf, 0x90, 0x60, 0x2f, 0xfe, 0xb1,
+ 0x03, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3,
+ 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35,
+ 0x87, 0xc8, 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77,
+ 0x7e, 0x31, 0xe0, 0xaf, 0x5f, 0x10, 0xc1, 0x8e,
+ 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, 0x83, 0xcc,
+ 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0x0a,
+ 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x07, 0x48,
+ 0x6b, 0x24, 0xf5, 0xba, 0x4a, 0x05, 0xd4, 0x9b,
+ 0x29, 0x66, 0xb7, 0xf8, 0x08, 0x47, 0x96, 0xd9,
+ 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, 0x50, 0x1f,
+ 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d,
+ },
+ {
+ 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad,
+ 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17,
+ 0x69, 0x39, 0xc9, 0x99, 0x34, 0x64, 0x94, 0xc4,
+ 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, 0x2e, 0x7e,
+ 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f,
+ 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5,
+ 0xbb, 0xeb, 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16,
+ 0x01, 0x51, 0xa1, 0xf1, 0x5c, 0x0c, 0xfc, 0xac,
+ 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, 0x44, 0x14,
+ 0x03, 0x53, 0xa3, 0xf3, 0x5e, 0x0e, 0xfe, 0xae,
+ 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d,
+ 0x6a, 0x3a, 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7,
+ 0x6b, 0x3b, 0xcb, 0x9b, 0x36, 0x66, 0x96, 0xc6,
+ 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, 0x2c, 0x7c,
+ 0x02, 0x52, 0xa2, 0xf2, 0x5f, 0x0f, 0xff, 0xaf,
+ 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15,
+ 0x6f, 0x3f, 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2,
+ 0xd5, 0x85, 0x75, 0x25, 0x88, 0xd8, 0x28, 0x78,
+ 0x06, 0x56, 0xa6, 0xf6, 0x5b, 0x0b, 0xfb, 0xab,
+ 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11,
+ 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10,
+ 0x07, 0x57, 0xa7, 0xf7, 0x5a, 0x0a, 0xfa, 0xaa,
+ 0xd4, 0x84, 0x74, 0x24, 0x89, 0xd9, 0x29, 0x79,
+ 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, 0x93, 0xc3,
+ 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b,
+ 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1,
+ 0xbf, 0xef, 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12,
+ 0x05, 0x55, 0xa5, 0xf5, 0x58, 0x08, 0xf8, 0xa8,
+ 0x04, 0x54, 0xa4, 0xf4, 0x59, 0x09, 0xf9, 0xa9,
+ 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13,
+ 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0,
+ 0xd7, 0x87, 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a,
+ },
+ {
+ 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa,
+ 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18,
+ 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3,
+ 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61,
+ 0xf2, 0xa3, 0x50, 0x01, 0xab, 0xfa, 0x09, 0x58,
+ 0x40, 0x11, 0xe2, 0xb3, 0x19, 0x48, 0xbb, 0xea,
+ 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, 0x70, 0x21,
+ 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93,
+ 0xf9, 0xa8, 0x5b, 0x0a, 0xa0, 0xf1, 0x02, 0x53,
+ 0x4b, 0x1a, 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1,
+ 0x80, 0xd1, 0x22, 0x73, 0xd9, 0x88, 0x7b, 0x2a,
+ 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, 0xc9, 0x98,
+ 0x0b, 0x5a, 0xa9, 0xf8, 0x52, 0x03, 0xf0, 0xa1,
+ 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13,
+ 0x72, 0x23, 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8,
+ 0xc0, 0x91, 0x62, 0x33, 0x99, 0xc8, 0x3b, 0x6a,
+ 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, 0x14, 0x45,
+ 0x5d, 0x0c, 0xff, 0xae, 0x04, 0x55, 0xa6, 0xf7,
+ 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c,
+ 0x24, 0x75, 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e,
+ 0x1d, 0x4c, 0xbf, 0xee, 0x44, 0x15, 0xe6, 0xb7,
+ 0xaf, 0xfe, 0x0d, 0x5c, 0xf6, 0xa7, 0x54, 0x05,
+ 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce,
+ 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c,
+ 0x16, 0x47, 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc,
+ 0xa4, 0xf5, 0x06, 0x57, 0xfd, 0xac, 0x5f, 0x0e,
+ 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, 0x94, 0xc5,
+ 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77,
+ 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e,
+ 0x56, 0x07, 0xf4, 0xa5, 0x0f, 0x5e, 0xad, 0xfc,
+ 0x9d, 0xcc, 0x3f, 0x6e, 0xc4, 0x95, 0x66, 0x37,
+ 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, 0xd4, 0x85,
+ },
+ {
+ 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3,
+ 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09,
+ 0x49, 0x1b, 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea,
+ 0xe3, 0xb1, 0x47, 0x15, 0xb6, 0xe4, 0x12, 0x40,
+ 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, 0x63, 0x31,
+ 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b,
+ 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78,
+ 0x71, 0x23, 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2,
+ 0x39, 0x6b, 0x9d, 0xcf, 0x6c, 0x3e, 0xc8, 0x9a,
+ 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, 0x62, 0x30,
+ 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3,
+ 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79,
+ 0xab, 0xf9, 0x0f, 0x5d, 0xfe, 0xac, 0x5a, 0x08,
+ 0x01, 0x53, 0xa5, 0xf7, 0x54, 0x06, 0xf0, 0xa2,
+ 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, 0x13, 0x41,
+ 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb,
+ 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1,
+ 0xd8, 0x8a, 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b,
+ 0x3b, 0x69, 0x9f, 0xcd, 0x6e, 0x3c, 0xca, 0x98,
+ 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, 0x60, 0x32,
+ 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43,
+ 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9,
+ 0xa9, 0xfb, 0x0d, 0x5f, 0xfc, 0xae, 0x58, 0x0a,
+ 0x03, 0x51, 0xa7, 0xf5, 0x56, 0x04, 0xf2, 0xa0,
+ 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, 0xba, 0xe8,
+ 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42,
+ 0x02, 0x50, 0xa6, 0xf4, 0x57, 0x05, 0xf3, 0xa1,
+ 0xa8, 0xfa, 0x0c, 0x5e, 0xfd, 0xaf, 0x59, 0x0b,
+ 0xd9, 0x8b, 0x7d, 0x2f, 0x8c, 0xde, 0x28, 0x7a,
+ 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, 0x82, 0xd0,
+ 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33,
+ 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99,
+ },
+ {
+ 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06,
+ 0x59, 0x0a, 0xff, 0xac, 0x08, 0x5b, 0xae, 0xfd,
+ 0xfb, 0xa8, 0x5d, 0x0e, 0xaa, 0xf9, 0x0c, 0x5f,
+ 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16,
+ 0x10, 0x43, 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4,
+ 0xeb, 0xb8, 0x4d, 0x1e, 0xba, 0xe9, 0x1c, 0x4f,
+ 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, 0xbe, 0xed,
+ 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd,
+ 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f,
+ 0x20, 0x73, 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84,
+ 0x82, 0xd1, 0x24, 0x77, 0xd3, 0x80, 0x75, 0x26,
+ 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, 0x3c, 0x6f,
+ 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd,
+ 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36,
+ 0x30, 0x63, 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94,
+ 0xf2, 0xa1, 0x54, 0x07, 0xa3, 0xf0, 0x05, 0x56,
+ 0x50, 0x03, 0xf6, 0xa5, 0x01, 0x52, 0xa7, 0xf4,
+ 0xab, 0xf8, 0x0d, 0x5e, 0xfa, 0xa9, 0x5c, 0x0f,
+ 0x09, 0x5a, 0xaf, 0xfc, 0x58, 0x0b, 0xfe, 0xad,
+ 0x40, 0x13, 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4,
+ 0xe2, 0xb1, 0x44, 0x17, 0xb3, 0xe0, 0x15, 0x46,
+ 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, 0xee, 0xbd,
+ 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f,
+ 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f,
+ 0x29, 0x7a, 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d,
+ 0xd2, 0x81, 0x74, 0x27, 0x83, 0xd0, 0x25, 0x76,
+ 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, 0x87, 0xd4,
+ 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d,
+ 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f,
+ 0x60, 0x33, 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4,
+ 0xc2, 0x91, 0x64, 0x37, 0x93, 0xc0, 0x35, 0x66,
+ },
+ {
+ 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1,
+ 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b,
+ 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98,
+ 0xb3, 0xe7, 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x02,
+ 0x52, 0x06, 0xfa, 0xae, 0x1f, 0x4b, 0xb7, 0xe3,
+ 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, 0x2d, 0x79,
+ 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca,
+ 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x04, 0x50,
+ 0xa4, 0xf0, 0x0c, 0x58, 0xe9, 0xbd, 0x41, 0x15,
+ 0x3e, 0x6a, 0x96, 0xc2, 0x73, 0x27, 0xdb, 0x8f,
+ 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, 0x68, 0x3c,
+ 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0x0e, 0xf2, 0xa6,
+ 0xf6, 0xa2, 0x5e, 0x0a, 0xbb, 0xef, 0x13, 0x47,
+ 0x6c, 0x38, 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd,
+ 0xdf, 0x8b, 0x77, 0x23, 0x92, 0xc6, 0x3a, 0x6e,
+ 0x45, 0x11, 0xed, 0xb9, 0x08, 0x5c, 0xa0, 0xf4,
+ 0x55, 0x01, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4,
+ 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e,
+ 0x7c, 0x28, 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd,
+ 0xe6, 0xb2, 0x4e, 0x1a, 0xab, 0xff, 0x03, 0x57,
+ 0x07, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, 0xe2, 0xb6,
+ 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c,
+ 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f,
+ 0xb4, 0xe0, 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x05,
+ 0xf1, 0xa5, 0x59, 0x0d, 0xbc, 0xe8, 0x14, 0x40,
+ 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, 0x8e, 0xda,
+ 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69,
+ 0x42, 0x16, 0xea, 0xbe, 0x0f, 0x5b, 0xa7, 0xf3,
+ 0xa3, 0xf7, 0x0b, 0x5f, 0xee, 0xba, 0x46, 0x12,
+ 0x39, 0x6d, 0x91, 0xc5, 0x74, 0x20, 0xdc, 0x88,
+ 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, 0x6f, 0x3b,
+ 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x09, 0xf5, 0xa1,
+ },
+ {
+ 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6,
+ 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24,
+ 0x39, 0x6c, 0x93, 0xc6, 0x70, 0x25, 0xda, 0x8f,
+ 0xab, 0xfe, 0x01, 0x54, 0xe2, 0xb7, 0x48, 0x1d,
+ 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4,
+ 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x03, 0x56,
+ 0x4b, 0x1e, 0xe1, 0xb4, 0x02, 0x57, 0xa8, 0xfd,
+ 0xd9, 0x8c, 0x73, 0x26, 0x90, 0xc5, 0x3a, 0x6f,
+ 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, 0x07, 0x52,
+ 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0,
+ 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b,
+ 0x4f, 0x1a, 0xe5, 0xb0, 0x06, 0x53, 0xac, 0xf9,
+ 0x96, 0xc3, 0x3c, 0x69, 0xdf, 0x8a, 0x75, 0x20,
+ 0x04, 0x51, 0xae, 0xfb, 0x4d, 0x18, 0xe7, 0xb2,
+ 0xaf, 0xfa, 0x05, 0x50, 0xe6, 0xb3, 0x4c, 0x19,
+ 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b,
+ 0xd5, 0x80, 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63,
+ 0x47, 0x12, 0xed, 0xb8, 0x0e, 0x5b, 0xa4, 0xf1,
+ 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, 0x0f, 0x5a,
+ 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8,
+ 0xa7, 0xf2, 0x0d, 0x58, 0xee, 0xbb, 0x44, 0x11,
+ 0x35, 0x60, 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83,
+ 0x9e, 0xcb, 0x34, 0x61, 0xd7, 0x82, 0x7d, 0x28,
+ 0x0c, 0x59, 0xa6, 0xf3, 0x45, 0x10, 0xef, 0xba,
+ 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87,
+ 0xa3, 0xf6, 0x09, 0x5c, 0xea, 0xbf, 0x40, 0x15,
+ 0x08, 0x5d, 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe,
+ 0x9a, 0xcf, 0x30, 0x65, 0xd3, 0x86, 0x79, 0x2c,
+ 0x43, 0x16, 0xe9, 0xbc, 0x0a, 0x5f, 0xa0, 0xf5,
+ 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67,
+ 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc,
+ 0xe8, 0xbd, 0x42, 0x17, 0xa1, 0xf4, 0x0b, 0x5e,
+ },
+ {
+ 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf,
+ 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35,
+ 0x09, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6,
+ 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c,
+ 0x12, 0x44, 0xbe, 0xe8, 0x57, 0x01, 0xfb, 0xad,
+ 0x98, 0xce, 0x34, 0x62, 0xdd, 0x8b, 0x71, 0x27,
+ 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x08, 0xf2, 0xa4,
+ 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e,
+ 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b,
+ 0xae, 0xf8, 0x02, 0x54, 0xeb, 0xbd, 0x47, 0x11,
+ 0x2d, 0x7b, 0x81, 0xd7, 0x68, 0x3e, 0xc4, 0x92,
+ 0xa7, 0xf1, 0x0b, 0x5d, 0xe2, 0xb4, 0x4e, 0x18,
+ 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89,
+ 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x03,
+ 0x3f, 0x69, 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80,
+ 0xb5, 0xe3, 0x19, 0x4f, 0xf0, 0xa6, 0x5c, 0x0a,
+ 0x48, 0x1e, 0xe4, 0xb2, 0x0d, 0x5b, 0xa1, 0xf7,
+ 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d,
+ 0x41, 0x17, 0xed, 0xbb, 0x04, 0x52, 0xa8, 0xfe,
+ 0xcb, 0x9d, 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74,
+ 0x5a, 0x0c, 0xf6, 0xa0, 0x1f, 0x49, 0xb3, 0xe5,
+ 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, 0x39, 0x6f,
+ 0x53, 0x05, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec,
+ 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66,
+ 0x6c, 0x3a, 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3,
+ 0xe6, 0xb0, 0x4a, 0x1c, 0xa3, 0xf5, 0x0f, 0x59,
+ 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, 0x8c, 0xda,
+ 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x06, 0x50,
+ 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1,
+ 0xf4, 0xa2, 0x58, 0x0e, 0xb1, 0xe7, 0x1d, 0x4b,
+ 0x77, 0x21, 0xdb, 0x8d, 0x32, 0x64, 0x9e, 0xc8,
+ 0xfd, 0xab, 0x51, 0x07, 0xb8, 0xee, 0x14, 0x42,
+ },
+ {
+ 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8,
+ 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a,
+ 0x19, 0x4e, 0xb7, 0xe0, 0x58, 0x0f, 0xf6, 0xa1,
+ 0x9b, 0xcc, 0x35, 0x62, 0xda, 0x8d, 0x74, 0x23,
+ 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, 0xdd, 0x8a,
+ 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x08,
+ 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93,
+ 0xa9, 0xfe, 0x07, 0x50, 0xe8, 0xbf, 0x46, 0x11,
+ 0x64, 0x33, 0xca, 0x9d, 0x25, 0x72, 0x8b, 0xdc,
+ 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, 0x09, 0x5e,
+ 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5,
+ 0xff, 0xa8, 0x51, 0x06, 0xbe, 0xe9, 0x10, 0x47,
+ 0x56, 0x01, 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee,
+ 0xd4, 0x83, 0x7a, 0x2d, 0x95, 0xc2, 0x3b, 0x6c,
+ 0x4f, 0x18, 0xe1, 0xb6, 0x0e, 0x59, 0xa0, 0xf7,
+ 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75,
+ 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70,
+ 0x4a, 0x1d, 0xe4, 0xb3, 0x0b, 0x5c, 0xa5, 0xf2,
+ 0xd1, 0x86, 0x7f, 0x28, 0x90, 0xc7, 0x3e, 0x69,
+ 0x53, 0x04, 0xfd, 0xaa, 0x12, 0x45, 0xbc, 0xeb,
+ 0xfa, 0xad, 0x54, 0x03, 0xbb, 0xec, 0x15, 0x42,
+ 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0,
+ 0xe3, 0xb4, 0x4d, 0x1a, 0xa2, 0xf5, 0x0c, 0x5b,
+ 0x61, 0x36, 0xcf, 0x98, 0x20, 0x77, 0x8e, 0xd9,
+ 0xac, 0xfb, 0x02, 0x55, 0xed, 0xba, 0x43, 0x14,
+ 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96,
+ 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0x0d,
+ 0x37, 0x60, 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f,
+ 0x9e, 0xc9, 0x30, 0x67, 0xdf, 0x88, 0x71, 0x26,
+ 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0x0a, 0xf3, 0xa4,
+ 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f,
+ 0x05, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd,
+ },
+ {
+ 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95,
+ 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f,
+ 0xe9, 0xb1, 0x59, 0x01, 0x94, 0xcc, 0x24, 0x7c,
+ 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86,
+ 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x02, 0x5a,
+ 0x35, 0x6d, 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0,
+ 0x26, 0x7e, 0x96, 0xce, 0x5b, 0x03, 0xeb, 0xb3,
+ 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, 0x11, 0x49,
+ 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16,
+ 0x79, 0x21, 0xc9, 0x91, 0x04, 0x5c, 0xb4, 0xec,
+ 0x6a, 0x32, 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff,
+ 0x90, 0xc8, 0x20, 0x78, 0xed, 0xb5, 0x5d, 0x05,
+ 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, 0x81, 0xd9,
+ 0xb6, 0xee, 0x06, 0x5e, 0xcb, 0x93, 0x7b, 0x23,
+ 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30,
+ 0x5f, 0x07, 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca,
+ 0x1b, 0x43, 0xab, 0xf3, 0x66, 0x3e, 0xd6, 0x8e,
+ 0xe1, 0xb9, 0x51, 0x09, 0x9c, 0xc4, 0x2c, 0x74,
+ 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67,
+ 0x08, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d,
+ 0xd4, 0x8c, 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41,
+ 0x2e, 0x76, 0x9e, 0xc6, 0x53, 0x0b, 0xe3, 0xbb,
+ 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, 0xf0, 0xa8,
+ 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0x0a, 0x52,
+ 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0x0d,
+ 0x62, 0x3a, 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7,
+ 0x71, 0x29, 0xc1, 0x99, 0x0c, 0x54, 0xbc, 0xe4,
+ 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, 0x46, 0x1e,
+ 0x57, 0x0f, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2,
+ 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38,
+ 0xbe, 0xe6, 0x0e, 0x56, 0xc3, 0x9b, 0x73, 0x2b,
+ 0x44, 0x1c, 0xf4, 0xac, 0x39, 0x61, 0x89, 0xd1,
+ },
+ {
+ 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92,
+ 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60,
+ 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b,
+ 0x0b, 0x52, 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99,
+ 0xef, 0xb6, 0x5d, 0x04, 0x96, 0xcf, 0x24, 0x7d,
+ 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, 0xd6, 0x8f,
+ 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84,
+ 0xe4, 0xbd, 0x56, 0x0f, 0x9d, 0xc4, 0x2f, 0x76,
+ 0xc3, 0x9a, 0x71, 0x28, 0xba, 0xe3, 0x08, 0x51,
+ 0x31, 0x68, 0x83, 0xda, 0x48, 0x11, 0xfa, 0xa3,
+ 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, 0xf1, 0xa8,
+ 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x03, 0x5a,
+ 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0x0c, 0xe7, 0xbe,
+ 0xde, 0x87, 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c,
+ 0xd5, 0x8c, 0x67, 0x3e, 0xac, 0xf5, 0x1e, 0x47,
+ 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x07, 0xec, 0xb5,
+ 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x09,
+ 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb,
+ 0x62, 0x3b, 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0,
+ 0x90, 0xc9, 0x22, 0x7b, 0xe9, 0xb0, 0x5b, 0x02,
+ 0x74, 0x2d, 0xc6, 0x9f, 0x0d, 0x54, 0xbf, 0xe6,
+ 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14,
+ 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f,
+ 0x7f, 0x26, 0xcd, 0x94, 0x06, 0x5f, 0xb4, 0xed,
+ 0x58, 0x01, 0xea, 0xb3, 0x21, 0x78, 0x93, 0xca,
+ 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, 0x61, 0x38,
+ 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33,
+ 0x53, 0x0a, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1,
+ 0xb7, 0xee, 0x05, 0x5c, 0xce, 0x97, 0x7c, 0x25,
+ 0x45, 0x1c, 0xf7, 0xae, 0x3c, 0x65, 0x8e, 0xd7,
+ 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, 0x85, 0xdc,
+ 0xbc, 0xe5, 0x0e, 0x57, 0xc5, 0x9c, 0x77, 0x2e,
+ },
+ {
+ 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b,
+ 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71,
+ 0xc9, 0x93, 0x7d, 0x27, 0xbc, 0xe6, 0x08, 0x52,
+ 0x23, 0x79, 0x97, 0xcd, 0x56, 0x0c, 0xe2, 0xb8,
+ 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14,
+ 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe,
+ 0x46, 0x1c, 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd,
+ 0xac, 0xf6, 0x18, 0x42, 0xd9, 0x83, 0x6d, 0x37,
+ 0x03, 0x59, 0xb7, 0xed, 0x76, 0x2c, 0xc2, 0x98,
+ 0xe9, 0xb3, 0x5d, 0x07, 0x9c, 0xc6, 0x28, 0x72,
+ 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0x0b, 0x51,
+ 0x20, 0x7a, 0x94, 0xce, 0x55, 0x0f, 0xe1, 0xbb,
+ 0x8c, 0xd6, 0x38, 0x62, 0xf9, 0xa3, 0x4d, 0x17,
+ 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, 0xa7, 0xfd,
+ 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde,
+ 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34,
+ 0x06, 0x5c, 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d,
+ 0xec, 0xb6, 0x58, 0x02, 0x99, 0xc3, 0x2d, 0x77,
+ 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, 0x0e, 0x54,
+ 0x25, 0x7f, 0x91, 0xcb, 0x50, 0x0a, 0xe4, 0xbe,
+ 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12,
+ 0x63, 0x39, 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8,
+ 0x40, 0x1a, 0xf4, 0xae, 0x35, 0x6f, 0x81, 0xdb,
+ 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, 0x6b, 0x31,
+ 0x05, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e,
+ 0xef, 0xb5, 0x5b, 0x01, 0x9a, 0xc0, 0x2e, 0x74,
+ 0xcc, 0x96, 0x78, 0x22, 0xb9, 0xe3, 0x0d, 0x57,
+ 0x26, 0x7c, 0x92, 0xc8, 0x53, 0x09, 0xe7, 0xbd,
+ 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, 0x4b, 0x11,
+ 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb,
+ 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8,
+ 0xa9, 0xf3, 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32,
+ },
+ {
+ 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c,
+ 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e,
+ 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45,
+ 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7,
+ 0xaf, 0xf4, 0x19, 0x42, 0xde, 0x85, 0x68, 0x33,
+ 0x4d, 0x16, 0xfb, 0xa0, 0x3c, 0x67, 0x8a, 0xd1,
+ 0x76, 0x2d, 0xc0, 0x9b, 0x07, 0x5c, 0xb1, 0xea,
+ 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x08,
+ 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf,
+ 0xa1, 0xfa, 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d,
+ 0x9a, 0xc1, 0x2c, 0x77, 0xeb, 0xb0, 0x5d, 0x06,
+ 0x78, 0x23, 0xce, 0x95, 0x09, 0x52, 0xbf, 0xe4,
+ 0xec, 0xb7, 0x5a, 0x01, 0x9d, 0xc6, 0x2b, 0x70,
+ 0x0e, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92,
+ 0x35, 0x6e, 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9,
+ 0xd7, 0x8c, 0x61, 0x3a, 0xa6, 0xfd, 0x10, 0x4b,
+ 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, 0x41, 0x1a,
+ 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8,
+ 0x5f, 0x04, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3,
+ 0xbd, 0xe6, 0x0b, 0x50, 0xcc, 0x97, 0x7a, 0x21,
+ 0x29, 0x72, 0x9f, 0xc4, 0x58, 0x03, 0xee, 0xb5,
+ 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, 0x0c, 0x57,
+ 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c,
+ 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e,
+ 0xc5, 0x9e, 0x73, 0x28, 0xb4, 0xef, 0x02, 0x59,
+ 0x27, 0x7c, 0x91, 0xca, 0x56, 0x0d, 0xe0, 0xbb,
+ 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, 0xdb, 0x80,
+ 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62,
+ 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6,
+ 0x88, 0xd3, 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14,
+ 0xb3, 0xe8, 0x05, 0x5e, 0xc2, 0x99, 0x74, 0x2f,
+ 0x51, 0x0a, 0xe7, 0xbc, 0x20, 0x7b, 0x96, 0xcd,
+ },
+ {
+ 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89,
+ 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53,
+ 0xa9, 0xf5, 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20,
+ 0x73, 0x2f, 0xcb, 0x97, 0x1e, 0x42, 0xa6, 0xfa,
+ 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, 0x9a, 0xc6,
+ 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c,
+ 0xe6, 0xba, 0x5e, 0x02, 0x8b, 0xd7, 0x33, 0x6f,
+ 0x3c, 0x60, 0x84, 0xd8, 0x51, 0x0d, 0xe9, 0xb5,
+ 0x9e, 0xc2, 0x26, 0x7a, 0xf3, 0xaf, 0x4b, 0x17,
+ 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, 0x91, 0xcd,
+ 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x06, 0xe2, 0xbe,
+ 0xed, 0xb1, 0x55, 0x09, 0x80, 0xdc, 0x38, 0x64,
+ 0xd1, 0x8d, 0x69, 0x35, 0xbc, 0xe0, 0x04, 0x58,
+ 0x0b, 0x57, 0xb3, 0xef, 0x66, 0x3a, 0xde, 0x82,
+ 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, 0xad, 0xf1,
+ 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b,
+ 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8,
+ 0xfb, 0xa7, 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72,
+ 0x88, 0xd4, 0x30, 0x6c, 0xe5, 0xb9, 0x5d, 0x01,
+ 0x52, 0x0e, 0xea, 0xb6, 0x3f, 0x63, 0x87, 0xdb,
+ 0x6e, 0x32, 0xd6, 0x8a, 0x03, 0x5f, 0xbb, 0xe7,
+ 0xb4, 0xe8, 0x0c, 0x50, 0xd9, 0x85, 0x61, 0x3d,
+ 0xc7, 0x9b, 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e,
+ 0x1d, 0x41, 0xa5, 0xf9, 0x70, 0x2c, 0xc8, 0x94,
+ 0xbf, 0xe3, 0x07, 0x5b, 0xd2, 0x8e, 0x6a, 0x36,
+ 0x65, 0x39, 0xdd, 0x81, 0x08, 0x54, 0xb0, 0xec,
+ 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f,
+ 0xcc, 0x90, 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45,
+ 0xf0, 0xac, 0x48, 0x14, 0x9d, 0xc1, 0x25, 0x79,
+ 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, 0xff, 0xa3,
+ 0x59, 0x05, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0,
+ 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0x0a,
+ },
+ {
+ 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e,
+ 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c,
+ 0xb9, 0xe4, 0x03, 0x5e, 0xd0, 0x8d, 0x6a, 0x37,
+ 0x6b, 0x36, 0xd1, 0x8c, 0x02, 0x5f, 0xb8, 0xe5,
+ 0x6f, 0x32, 0xd5, 0x88, 0x06, 0x5b, 0xbc, 0xe1,
+ 0xbd, 0xe0, 0x07, 0x5a, 0xd4, 0x89, 0x6e, 0x33,
+ 0xd6, 0x8b, 0x6c, 0x31, 0xbf, 0xe2, 0x05, 0x58,
+ 0x04, 0x59, 0xbe, 0xe3, 0x6d, 0x30, 0xd7, 0x8a,
+ 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0x0d, 0x50,
+ 0x0c, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82,
+ 0x67, 0x3a, 0xdd, 0x80, 0x0e, 0x53, 0xb4, 0xe9,
+ 0xb5, 0xe8, 0x0f, 0x52, 0xdc, 0x81, 0x66, 0x3b,
+ 0xb1, 0xec, 0x0b, 0x56, 0xd8, 0x85, 0x62, 0x3f,
+ 0x63, 0x3e, 0xd9, 0x84, 0x0a, 0x57, 0xb0, 0xed,
+ 0x08, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86,
+ 0xda, 0x87, 0x60, 0x3d, 0xb3, 0xee, 0x09, 0x54,
+ 0xa1, 0xfc, 0x1b, 0x46, 0xc8, 0x95, 0x72, 0x2f,
+ 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, 0xa0, 0xfd,
+ 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96,
+ 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44,
+ 0xce, 0x93, 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40,
+ 0x1c, 0x41, 0xa6, 0xfb, 0x75, 0x28, 0xcf, 0x92,
+ 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, 0xa4, 0xf9,
+ 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b,
+ 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1,
+ 0xad, 0xf0, 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23,
+ 0xc6, 0x9b, 0x7c, 0x21, 0xaf, 0xf2, 0x15, 0x48,
+ 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, 0xc7, 0x9a,
+ 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e,
+ 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c,
+ 0xa9, 0xf4, 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27,
+ 0x7b, 0x26, 0xc1, 0x9c, 0x12, 0x4f, 0xa8, 0xf5,
+ },
+ {
+ 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87,
+ 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d,
+ 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0x0e,
+ 0x43, 0x1d, 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4,
+ 0x0f, 0x51, 0xb3, 0xed, 0x6a, 0x34, 0xd6, 0x88,
+ 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, 0x1c, 0x42,
+ 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x01,
+ 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb,
+ 0x1e, 0x40, 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99,
+ 0xd4, 0x8a, 0x68, 0x36, 0xb1, 0xef, 0x0d, 0x53,
+ 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, 0x4e, 0x10,
+ 0x5d, 0x03, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda,
+ 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96,
+ 0xdb, 0x85, 0x67, 0x39, 0xbe, 0xe0, 0x02, 0x5c,
+ 0x98, 0xc6, 0x24, 0x7a, 0xfd, 0xa3, 0x41, 0x1f,
+ 0x52, 0x0c, 0xee, 0xb0, 0x37, 0x69, 0x8b, 0xd5,
+ 0x3c, 0x62, 0x80, 0xde, 0x59, 0x07, 0xe5, 0xbb,
+ 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71,
+ 0xb5, 0xeb, 0x09, 0x57, 0xd0, 0x8e, 0x6c, 0x32,
+ 0x7f, 0x21, 0xc3, 0x9d, 0x1a, 0x44, 0xa6, 0xf8,
+ 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x08, 0xea, 0xb4,
+ 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e,
+ 0xba, 0xe4, 0x06, 0x58, 0xdf, 0x81, 0x63, 0x3d,
+ 0x70, 0x2e, 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7,
+ 0x22, 0x7c, 0x9e, 0xc0, 0x47, 0x19, 0xfb, 0xa5,
+ 0xe8, 0xb6, 0x54, 0x0a, 0x8d, 0xd3, 0x31, 0x6f,
+ 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c,
+ 0x61, 0x3f, 0xdd, 0x83, 0x04, 0x5a, 0xb8, 0xe6,
+ 0x2d, 0x73, 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa,
+ 0xe7, 0xb9, 0x5b, 0x05, 0x82, 0xdc, 0x3e, 0x60,
+ 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, 0x7d, 0x23,
+ 0x6e, 0x30, 0xd2, 0x8c, 0x0b, 0x55, 0xb7, 0xe9,
+ },
+ {
+ 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80,
+ 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42,
+ 0x99, 0xc6, 0x27, 0x78, 0xf8, 0xa7, 0x46, 0x19,
+ 0x5b, 0x04, 0xe5, 0xba, 0x3a, 0x65, 0x84, 0xdb,
+ 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf,
+ 0xed, 0xb2, 0x53, 0x0c, 0x8c, 0xd3, 0x32, 0x6d,
+ 0xb6, 0xe9, 0x08, 0x57, 0xd7, 0x88, 0x69, 0x36,
+ 0x74, 0x2b, 0xca, 0x95, 0x15, 0x4a, 0xab, 0xf4,
+ 0x5e, 0x01, 0xe0, 0xbf, 0x3f, 0x60, 0x81, 0xde,
+ 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c,
+ 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47,
+ 0x05, 0x5a, 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85,
+ 0x71, 0x2e, 0xcf, 0x90, 0x10, 0x4f, 0xae, 0xf1,
+ 0xb3, 0xec, 0x0d, 0x52, 0xd2, 0x8d, 0x6c, 0x33,
+ 0xe8, 0xb7, 0x56, 0x09, 0x89, 0xd6, 0x37, 0x68,
+ 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa,
+ 0xbc, 0xe3, 0x02, 0x5d, 0xdd, 0x82, 0x63, 0x3c,
+ 0x7e, 0x21, 0xc0, 0x9f, 0x1f, 0x40, 0xa1, 0xfe,
+ 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, 0xfa, 0xa5,
+ 0xe7, 0xb8, 0x59, 0x06, 0x86, 0xd9, 0x38, 0x67,
+ 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13,
+ 0x51, 0x0e, 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1,
+ 0x0a, 0x55, 0xb4, 0xeb, 0x6b, 0x34, 0xd5, 0x8a,
+ 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, 0x17, 0x48,
+ 0xe2, 0xbd, 0x5c, 0x03, 0x83, 0xdc, 0x3d, 0x62,
+ 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0,
+ 0x7b, 0x24, 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb,
+ 0xb9, 0xe6, 0x07, 0x58, 0xd8, 0x87, 0x66, 0x39,
+ 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, 0x12, 0x4d,
+ 0x0f, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f,
+ 0x54, 0x0b, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4,
+ 0x96, 0xc9, 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16,
+ },
+ {
+ 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d,
+ 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a,
+ 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73,
+ 0x69, 0x09, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54,
+ 0x9c, 0xfc, 0x5c, 0x3c, 0x01, 0x61, 0xc1, 0xa1,
+ 0xbb, 0xdb, 0x7b, 0x1b, 0x26, 0x46, 0xe6, 0x86,
+ 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, 0x8f, 0xef,
+ 0xf5, 0x95, 0x35, 0x55, 0x68, 0x08, 0xa8, 0xc8,
+ 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18,
+ 0x02, 0x62, 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f,
+ 0x6b, 0x0b, 0xab, 0xcb, 0xf6, 0x96, 0x36, 0x56,
+ 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, 0x11, 0x71,
+ 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84,
+ 0x9e, 0xfe, 0x5e, 0x3e, 0x03, 0x63, 0xc3, 0xa3,
+ 0xf7, 0x97, 0x37, 0x57, 0x6a, 0x0a, 0xaa, 0xca,
+ 0xd0, 0xb0, 0x10, 0x70, 0x4d, 0x2d, 0x8d, 0xed,
+ 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, 0x17, 0x77,
+ 0x6d, 0x0d, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50,
+ 0x04, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39,
+ 0x23, 0x43, 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e,
+ 0xd6, 0xb6, 0x16, 0x76, 0x4b, 0x2b, 0x8b, 0xeb,
+ 0xf1, 0x91, 0x31, 0x51, 0x6c, 0x0c, 0xac, 0xcc,
+ 0x98, 0xf8, 0x58, 0x38, 0x05, 0x65, 0xc5, 0xa5,
+ 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82,
+ 0x6f, 0x0f, 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52,
+ 0x48, 0x28, 0x88, 0xe8, 0xd5, 0xb5, 0x15, 0x75,
+ 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, 0x7c, 0x1c,
+ 0x06, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b,
+ 0xf3, 0x93, 0x33, 0x53, 0x6e, 0x0e, 0xae, 0xce,
+ 0xd4, 0xb4, 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9,
+ 0xbd, 0xdd, 0x7d, 0x1d, 0x20, 0x40, 0xe0, 0x80,
+ 0x9a, 0xfa, 0x5a, 0x3a, 0x07, 0x67, 0xc7, 0xa7,
+ },
+ {
+ 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a,
+ 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15,
+ 0x5e, 0x3f, 0x9c, 0xfd, 0xc7, 0xa6, 0x05, 0x64,
+ 0x71, 0x10, 0xb3, 0xd2, 0xe8, 0x89, 0x2a, 0x4b,
+ 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, 0xe7, 0x86,
+ 0x93, 0xf2, 0x51, 0x30, 0x0a, 0x6b, 0xc8, 0xa9,
+ 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8,
+ 0xcd, 0xac, 0x0f, 0x6e, 0x54, 0x35, 0x96, 0xf7,
+ 0x65, 0x04, 0xa7, 0xc6, 0xfc, 0x9d, 0x3e, 0x5f,
+ 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, 0x11, 0x70,
+ 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x01,
+ 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e,
+ 0xd9, 0xb8, 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3,
+ 0xf6, 0x97, 0x34, 0x55, 0x6f, 0x0e, 0xad, 0xcc,
+ 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, 0xdc, 0xbd,
+ 0xa8, 0xc9, 0x6a, 0x0b, 0x31, 0x50, 0xf3, 0x92,
+ 0xca, 0xab, 0x08, 0x69, 0x53, 0x32, 0x91, 0xf0,
+ 0xe5, 0x84, 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf,
+ 0x94, 0xf5, 0x56, 0x37, 0x0d, 0x6c, 0xcf, 0xae,
+ 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, 0xe0, 0x81,
+ 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c,
+ 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x02, 0x63,
+ 0x28, 0x49, 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12,
+ 0x07, 0x66, 0xc5, 0xa4, 0x9e, 0xff, 0x5c, 0x3d,
+ 0xaf, 0xce, 0x6d, 0x0c, 0x36, 0x57, 0xf4, 0x95,
+ 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba,
+ 0xf1, 0x90, 0x33, 0x52, 0x68, 0x09, 0xaa, 0xcb,
+ 0xde, 0xbf, 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4,
+ 0x13, 0x72, 0xd1, 0xb0, 0x8a, 0xeb, 0x48, 0x29,
+ 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, 0x67, 0x06,
+ 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77,
+ 0x62, 0x03, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58,
+ },
+ {
+ 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33,
+ 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04,
+ 0x6e, 0x0c, 0xaa, 0xc8, 0xfb, 0x99, 0x3f, 0x5d,
+ 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x08, 0x6a,
+ 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef,
+ 0xeb, 0x89, 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8,
+ 0xb2, 0xd0, 0x76, 0x14, 0x27, 0x45, 0xe3, 0x81,
+ 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, 0xd4, 0xb6,
+ 0xa5, 0xc7, 0x61, 0x03, 0x30, 0x52, 0xf4, 0x96,
+ 0x92, 0xf0, 0x56, 0x34, 0x07, 0x65, 0xc3, 0xa1,
+ 0xcb, 0xa9, 0x0f, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8,
+ 0xfc, 0x9e, 0x38, 0x5a, 0x69, 0x0b, 0xad, 0xcf,
+ 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, 0x28, 0x4a,
+ 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d,
+ 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24,
+ 0x20, 0x42, 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13,
+ 0x57, 0x35, 0x93, 0xf1, 0xc2, 0xa0, 0x06, 0x64,
+ 0x60, 0x02, 0xa4, 0xc6, 0xf5, 0x97, 0x31, 0x53,
+ 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0x0a,
+ 0x0e, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d,
+ 0x8b, 0xe9, 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8,
+ 0xbc, 0xde, 0x78, 0x1a, 0x29, 0x4b, 0xed, 0x8f,
+ 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, 0xb4, 0xd6,
+ 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1,
+ 0xf2, 0x90, 0x36, 0x54, 0x67, 0x05, 0xa3, 0xc1,
+ 0xc5, 0xa7, 0x01, 0x63, 0x50, 0x32, 0x94, 0xf6,
+ 0x9c, 0xfe, 0x58, 0x3a, 0x09, 0x6b, 0xcd, 0xaf,
+ 0xab, 0xc9, 0x6f, 0x0d, 0x3e, 0x5c, 0xfa, 0x98,
+ 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d,
+ 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a,
+ 0x40, 0x22, 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73,
+ 0x77, 0x15, 0xb3, 0xd1, 0xe2, 0x80, 0x26, 0x44,
+ },
+ {
+ 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34,
+ 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b,
+ 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a,
+ 0x41, 0x22, 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75,
+ 0xfc, 0x9f, 0x3a, 0x59, 0x6d, 0x0e, 0xab, 0xc8,
+ 0xc3, 0xa0, 0x05, 0x66, 0x52, 0x31, 0x94, 0xf7,
+ 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6,
+ 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89,
+ 0xe5, 0x86, 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1,
+ 0xda, 0xb9, 0x1c, 0x7f, 0x4b, 0x28, 0x8d, 0xee,
+ 0x9b, 0xf8, 0x5d, 0x3e, 0x0a, 0x69, 0xcc, 0xaf,
+ 0xa4, 0xc7, 0x62, 0x01, 0x35, 0x56, 0xf3, 0x90,
+ 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d,
+ 0x26, 0x45, 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12,
+ 0x67, 0x04, 0xa1, 0xc2, 0xf6, 0x95, 0x30, 0x53,
+ 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, 0x0f, 0x6c,
+ 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3,
+ 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc,
+ 0xa9, 0xca, 0x6f, 0x0c, 0x38, 0x5b, 0xfe, 0x9d,
+ 0x96, 0xf5, 0x50, 0x33, 0x07, 0x64, 0xc1, 0xa2,
+ 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, 0x7c, 0x1f,
+ 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20,
+ 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x02, 0x61,
+ 0x6a, 0x09, 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e,
+ 0x32, 0x51, 0xf4, 0x97, 0xa3, 0xc0, 0x65, 0x06,
+ 0x0d, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, 0x5a, 0x39,
+ 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78,
+ 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47,
+ 0xce, 0xad, 0x08, 0x6b, 0x5f, 0x3c, 0x99, 0xfa,
+ 0xf1, 0x92, 0x37, 0x54, 0x60, 0x03, 0xa6, 0xc5,
+ 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, 0xe7, 0x84,
+ 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb,
+ },
+ {
+ 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21,
+ 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26,
+ 0x0e, 0x6a, 0xc6, 0xa2, 0x83, 0xe7, 0x4b, 0x2f,
+ 0x09, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, 0x4c, 0x28,
+ 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d,
+ 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a,
+ 0x12, 0x76, 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33,
+ 0x15, 0x71, 0xdd, 0xb9, 0x98, 0xfc, 0x50, 0x34,
+ 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, 0x7d, 0x19,
+ 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e,
+ 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17,
+ 0x31, 0x55, 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10,
+ 0x24, 0x40, 0xec, 0x88, 0xa9, 0xcd, 0x61, 0x05,
+ 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, 0x66, 0x02,
+ 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0x0b,
+ 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0x0c,
+ 0x70, 0x14, 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51,
+ 0x77, 0x13, 0xbf, 0xdb, 0xfa, 0x9e, 0x32, 0x56,
+ 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, 0x3b, 0x5f,
+ 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58,
+ 0x6c, 0x08, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d,
+ 0x6b, 0x0f, 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a,
+ 0x62, 0x06, 0xaa, 0xce, 0xef, 0x8b, 0x27, 0x43,
+ 0x65, 0x01, 0xad, 0xc9, 0xe8, 0x8c, 0x20, 0x44,
+ 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0x0d, 0x69,
+ 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0x0a, 0x6e,
+ 0x46, 0x22, 0x8e, 0xea, 0xcb, 0xaf, 0x03, 0x67,
+ 0x41, 0x25, 0x89, 0xed, 0xcc, 0xa8, 0x04, 0x60,
+ 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, 0x11, 0x75,
+ 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72,
+ 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b,
+ 0x5d, 0x39, 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c,
+ },
+ {
+ 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26,
+ 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29,
+ 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38,
+ 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37,
+ 0x3c, 0x59, 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a,
+ 0x33, 0x56, 0xf9, 0x9c, 0xba, 0xdf, 0x70, 0x15,
+ 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, 0x61, 0x04,
+ 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0x0b,
+ 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e,
+ 0x77, 0x12, 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51,
+ 0x66, 0x03, 0xac, 0xc9, 0xef, 0x8a, 0x25, 0x40,
+ 0x69, 0x0c, 0xa3, 0xc6, 0xe0, 0x85, 0x2a, 0x4f,
+ 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x07, 0x62,
+ 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x08, 0x6d,
+ 0x5a, 0x3f, 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c,
+ 0x55, 0x30, 0x9f, 0xfa, 0xdc, 0xb9, 0x16, 0x73,
+ 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, 0xb3, 0xd6,
+ 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9,
+ 0xee, 0x8b, 0x24, 0x41, 0x67, 0x02, 0xad, 0xc8,
+ 0xe1, 0x84, 0x2b, 0x4e, 0x68, 0x0d, 0xa2, 0xc7,
+ 0xcc, 0xa9, 0x06, 0x63, 0x45, 0x20, 0x8f, 0xea,
+ 0xc3, 0xa6, 0x09, 0x6c, 0x4a, 0x2f, 0x80, 0xe5,
+ 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4,
+ 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb,
+ 0x88, 0xed, 0x42, 0x27, 0x01, 0x64, 0xcb, 0xae,
+ 0x87, 0xe2, 0x4d, 0x28, 0x0e, 0x6b, 0xc4, 0xa1,
+ 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, 0xd5, 0xb0,
+ 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf,
+ 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92,
+ 0xbb, 0xde, 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d,
+ 0xaa, 0xcf, 0x60, 0x05, 0x23, 0x46, 0xe9, 0x8c,
+ 0xa5, 0xc0, 0x6f, 0x0a, 0x2c, 0x49, 0xe6, 0x83,
+ },
+ {
+ 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f,
+ 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38,
+ 0x2e, 0x48, 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x01,
+ 0x39, 0x5f, 0xf5, 0x93, 0xbc, 0xda, 0x70, 0x16,
+ 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, 0x15, 0x73,
+ 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x02, 0x64,
+ 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d,
+ 0x65, 0x03, 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a,
+ 0xb8, 0xde, 0x74, 0x12, 0x3d, 0x5b, 0xf1, 0x97,
+ 0xaf, 0xc9, 0x63, 0x05, 0x2a, 0x4c, 0xe6, 0x80,
+ 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9,
+ 0x81, 0xe7, 0x4d, 0x2b, 0x04, 0x62, 0xc8, 0xae,
+ 0xe4, 0x82, 0x28, 0x4e, 0x61, 0x07, 0xad, 0xcb,
+ 0xf3, 0x95, 0x3f, 0x59, 0x76, 0x10, 0xba, 0xdc,
+ 0xca, 0xac, 0x06, 0x60, 0x4f, 0x29, 0x83, 0xe5,
+ 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2,
+ 0x6d, 0x0b, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42,
+ 0x7a, 0x1c, 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55,
+ 0x43, 0x25, 0x8f, 0xe9, 0xc6, 0xa0, 0x0a, 0x6c,
+ 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, 0x1d, 0x7b,
+ 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e,
+ 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x09,
+ 0x1f, 0x79, 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30,
+ 0x08, 0x6e, 0xc4, 0xa2, 0x8d, 0xeb, 0x41, 0x27,
+ 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, 0x9c, 0xfa,
+ 0xc2, 0xa4, 0x0e, 0x68, 0x47, 0x21, 0x8b, 0xed,
+ 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4,
+ 0xec, 0x8a, 0x20, 0x46, 0x69, 0x0f, 0xa5, 0xc3,
+ 0x89, 0xef, 0x45, 0x23, 0x0c, 0x6a, 0xc0, 0xa6,
+ 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, 0xd7, 0xb1,
+ 0xa7, 0xc1, 0x6b, 0x0d, 0x22, 0x44, 0xee, 0x88,
+ 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f,
+ },
+ {
+ 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28,
+ 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37,
+ 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, 0x71, 0x16,
+ 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x09,
+ 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54,
+ 0x63, 0x04, 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b,
+ 0x42, 0x25, 0x8c, 0xeb, 0xc3, 0xa4, 0x0d, 0x6a,
+ 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, 0x12, 0x75,
+ 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0,
+ 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x01, 0xa8, 0xcf,
+ 0xc6, 0xa1, 0x08, 0x6f, 0x47, 0x20, 0x89, 0xee,
+ 0xd9, 0xbe, 0x17, 0x70, 0x58, 0x3f, 0x96, 0xf1,
+ 0x84, 0xe3, 0x4a, 0x2d, 0x05, 0x62, 0xcb, 0xac,
+ 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3,
+ 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92,
+ 0xa5, 0xc2, 0x6b, 0x0c, 0x24, 0x43, 0xea, 0x8d,
+ 0xed, 0x8a, 0x23, 0x44, 0x6c, 0x0b, 0xa2, 0xc5,
+ 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, 0xbd, 0xda,
+ 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb,
+ 0xcc, 0xab, 0x02, 0x65, 0x4d, 0x2a, 0x83, 0xe4,
+ 0x91, 0xf6, 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9,
+ 0x8e, 0xe9, 0x40, 0x27, 0x0f, 0x68, 0xc1, 0xa6,
+ 0xaf, 0xc8, 0x61, 0x06, 0x2e, 0x49, 0xe0, 0x87,
+ 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98,
+ 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d,
+ 0x0a, 0x6d, 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22,
+ 0x2b, 0x4c, 0xe5, 0x82, 0xaa, 0xcd, 0x64, 0x03,
+ 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, 0x7b, 0x1c,
+ 0x69, 0x0e, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41,
+ 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e,
+ 0x57, 0x30, 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f,
+ 0x48, 0x2f, 0x86, 0xe1, 0xc9, 0xae, 0x07, 0x60,
+ },
+ {
+ 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05,
+ 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62,
+ 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb,
+ 0xa9, 0xc1, 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac,
+ 0x81, 0xe9, 0x51, 0x39, 0x3c, 0x54, 0xec, 0x84,
+ 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, 0x8b, 0xe3,
+ 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a,
+ 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d,
+ 0x1f, 0x77, 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a,
+ 0x78, 0x10, 0xa8, 0xc0, 0xc5, 0xad, 0x15, 0x7d,
+ 0xd1, 0xb9, 0x01, 0x69, 0x6c, 0x04, 0xbc, 0xd4,
+ 0xb6, 0xde, 0x66, 0x0e, 0x0b, 0x63, 0xdb, 0xb3,
+ 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b,
+ 0xf9, 0x91, 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc,
+ 0x50, 0x38, 0x80, 0xe8, 0xed, 0x85, 0x3d, 0x55,
+ 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, 0x5a, 0x32,
+ 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b,
+ 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c,
+ 0xf0, 0x98, 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5,
+ 0x97, 0xff, 0x47, 0x2f, 0x2a, 0x42, 0xfa, 0x92,
+ 0xbf, 0xd7, 0x6f, 0x07, 0x02, 0x6a, 0xd2, 0xba,
+ 0xd8, 0xb0, 0x08, 0x60, 0x65, 0x0d, 0xb5, 0xdd,
+ 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74,
+ 0x16, 0x7e, 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13,
+ 0x21, 0x49, 0xf1, 0x99, 0x9c, 0xf4, 0x4c, 0x24,
+ 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, 0x2b, 0x43,
+ 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea,
+ 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d,
+ 0xa0, 0xc8, 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5,
+ 0xc7, 0xaf, 0x17, 0x7f, 0x7a, 0x12, 0xaa, 0xc2,
+ 0x6e, 0x06, 0xbe, 0xd6, 0xd3, 0xbb, 0x03, 0x6b,
+ 0x09, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0x0c,
+ },
+ {
+ 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d,
+ 0xde, 0xb7, 0x0c, 0x65, 0x67, 0x0e, 0xb5, 0xdc,
+ 0xb1, 0xd8, 0x63, 0x0a, 0x08, 0x61, 0xda, 0xb3,
+ 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3,
+ 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc,
+ 0x7f, 0x16, 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d,
+ 0x10, 0x79, 0xc2, 0xab, 0xa9, 0xc0, 0x7b, 0x12,
+ 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, 0x34, 0x5d,
+ 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32,
+ 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83,
+ 0xee, 0x87, 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec,
+ 0xfe, 0x97, 0x2c, 0x45, 0x47, 0x2e, 0x95, 0xfc,
+ 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, 0xfa, 0x93,
+ 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22,
+ 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d,
+ 0xbe, 0xd7, 0x6c, 0x05, 0x07, 0x6e, 0xd5, 0xbc,
+ 0xd1, 0xb8, 0x03, 0x6a, 0x68, 0x01, 0xba, 0xd3,
+ 0x60, 0x09, 0xb2, 0xdb, 0xd9, 0xb0, 0x0b, 0x62,
+ 0x0f, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0x0d,
+ 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d,
+ 0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72,
+ 0xc1, 0xa8, 0x13, 0x7a, 0x78, 0x11, 0xaa, 0xc3,
+ 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, 0xc5, 0xac,
+ 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3,
+ 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c,
+ 0x3f, 0x56, 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d,
+ 0x50, 0x39, 0x82, 0xeb, 0xe9, 0x80, 0x3b, 0x52,
+ 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, 0x2b, 0x42,
+ 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d,
+ 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c,
+ 0xf1, 0x98, 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3,
+ },
+ {
+ 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b,
+ 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c,
+ 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5,
+ 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92,
+ 0xc1, 0xab, 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca,
+ 0xb6, 0xdc, 0x62, 0x08, 0x03, 0x69, 0xd7, 0xbd,
+ 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, 0x4e, 0x24,
+ 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53,
+ 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94,
+ 0xe8, 0x82, 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3,
+ 0x71, 0x1b, 0xa5, 0xcf, 0xc4, 0xae, 0x10, 0x7a,
+ 0x06, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, 0x67, 0x0d,
+ 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55,
+ 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22,
+ 0xb0, 0xda, 0x64, 0x0e, 0x05, 0x6f, 0xd1, 0xbb,
+ 0xc7, 0xad, 0x13, 0x79, 0x72, 0x18, 0xa6, 0xcc,
+ 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, 0x42, 0x28,
+ 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f,
+ 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6,
+ 0xba, 0xd0, 0x6e, 0x04, 0x0f, 0x65, 0xdb, 0xb1,
+ 0xe2, 0x88, 0x36, 0x5c, 0x57, 0x3d, 0x83, 0xe9,
+ 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, 0xf4, 0x9e,
+ 0x0c, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x07,
+ 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70,
+ 0xbc, 0xd6, 0x68, 0x02, 0x09, 0x63, 0xdd, 0xb7,
+ 0xcb, 0xa1, 0x1f, 0x75, 0x7e, 0x14, 0xaa, 0xc0,
+ 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, 0x33, 0x59,
+ 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e,
+ 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76,
+ 0x0a, 0x60, 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x01,
+ 0x93, 0xf9, 0x47, 0x2d, 0x26, 0x4c, 0xf2, 0x98,
+ 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, 0x85, 0xef,
+ },
+ {
+ 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c,
+ 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73,
+ 0xfe, 0x95, 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2,
+ 0x81, 0xea, 0x57, 0x3c, 0x30, 0x5b, 0xe6, 0x8d,
+ 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, 0x86, 0xed,
+ 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92,
+ 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13,
+ 0x60, 0x0b, 0xb6, 0xdd, 0xd1, 0xba, 0x07, 0x6c,
+ 0xdf, 0xb4, 0x09, 0x62, 0x6e, 0x05, 0xb8, 0xd3,
+ 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, 0xc7, 0xac,
+ 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d,
+ 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52,
+ 0x3e, 0x55, 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32,
+ 0x41, 0x2a, 0x97, 0xfc, 0xf0, 0x9b, 0x26, 0x4d,
+ 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, 0xa7, 0xcc,
+ 0xbf, 0xd4, 0x69, 0x02, 0x0e, 0x65, 0xd8, 0xb3,
+ 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf,
+ 0xdc, 0xb7, 0x0a, 0x61, 0x6d, 0x06, 0xbb, 0xd0,
+ 0x5d, 0x36, 0x8b, 0xe0, 0xec, 0x87, 0x3a, 0x51,
+ 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, 0x45, 0x2e,
+ 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e,
+ 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31,
+ 0xbc, 0xd7, 0x6a, 0x01, 0x0d, 0x66, 0xdb, 0xb0,
+ 0xc3, 0xa8, 0x15, 0x7e, 0x72, 0x19, 0xa4, 0xcf,
+ 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, 0x1b, 0x70,
+ 0x03, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0x0f,
+ 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e,
+ 0xfd, 0x96, 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1,
+ 0x9d, 0xf6, 0x4b, 0x20, 0x2c, 0x47, 0xfa, 0x91,
+ 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, 0x85, 0xee,
+ 0x63, 0x08, 0xb5, 0xde, 0xd2, 0xb9, 0x04, 0x6f,
+ 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10,
+ },
+ {
+ 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19,
+ 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e,
+ 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, 0xfb, 0x97,
+ 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x08, 0xbc, 0xd0,
+ 0x01, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18,
+ 0x46, 0x2a, 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f,
+ 0x8f, 0xe3, 0x57, 0x3b, 0x22, 0x4e, 0xfa, 0x96,
+ 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x09, 0xbd, 0xd1,
+ 0x02, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b,
+ 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c,
+ 0x8c, 0xe0, 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95,
+ 0xcb, 0xa7, 0x13, 0x7f, 0x66, 0x0a, 0xbe, 0xd2,
+ 0x03, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, 0x76, 0x1a,
+ 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d,
+ 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94,
+ 0xca, 0xa6, 0x12, 0x7e, 0x67, 0x0b, 0xbf, 0xd3,
+ 0x04, 0x68, 0xdc, 0xb0, 0xa9, 0xc5, 0x71, 0x1d,
+ 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, 0x36, 0x5a,
+ 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93,
+ 0xcd, 0xa1, 0x15, 0x79, 0x60, 0x0c, 0xb8, 0xd4,
+ 0x05, 0x69, 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c,
+ 0x42, 0x2e, 0x9a, 0xf6, 0xef, 0x83, 0x37, 0x5b,
+ 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, 0xfe, 0x92,
+ 0xcc, 0xa0, 0x14, 0x78, 0x61, 0x0d, 0xb9, 0xd5,
+ 0x06, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f,
+ 0x41, 0x2d, 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58,
+ 0x88, 0xe4, 0x50, 0x3c, 0x25, 0x49, 0xfd, 0x91,
+ 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0x0e, 0xba, 0xd6,
+ 0x07, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e,
+ 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59,
+ 0x89, 0xe5, 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90,
+ 0xce, 0xa2, 0x16, 0x7a, 0x63, 0x0f, 0xbb, 0xd7,
+ },
+ {
+ 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e,
+ 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51,
+ 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80,
+ 0xd1, 0xbc, 0x0b, 0x66, 0x78, 0x15, 0xa2, 0xcf,
+ 0x21, 0x4c, 0xfb, 0x96, 0x88, 0xe5, 0x52, 0x3f,
+ 0x6e, 0x03, 0xb4, 0xd9, 0xc7, 0xaa, 0x1d, 0x70,
+ 0xbf, 0xd2, 0x65, 0x08, 0x16, 0x7b, 0xcc, 0xa1,
+ 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee,
+ 0x42, 0x2f, 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c,
+ 0x0d, 0x60, 0xd7, 0xba, 0xa4, 0xc9, 0x7e, 0x13,
+ 0xdc, 0xb1, 0x06, 0x6b, 0x75, 0x18, 0xaf, 0xc2,
+ 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d,
+ 0x63, 0x0e, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d,
+ 0x2c, 0x41, 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32,
+ 0xfd, 0x90, 0x27, 0x4a, 0x54, 0x39, 0x8e, 0xe3,
+ 0xb2, 0xdf, 0x68, 0x05, 0x1b, 0x76, 0xc1, 0xac,
+ 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a,
+ 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0x0f, 0xb8, 0xd5,
+ 0x1a, 0x77, 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x04,
+ 0x55, 0x38, 0x8f, 0xe2, 0xfc, 0x91, 0x26, 0x4b,
+ 0xa5, 0xc8, 0x7f, 0x12, 0x0c, 0x61, 0xd6, 0xbb,
+ 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4,
+ 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25,
+ 0x74, 0x19, 0xae, 0xc3, 0xdd, 0xb0, 0x07, 0x6a,
+ 0xc6, 0xab, 0x1c, 0x71, 0x6f, 0x02, 0xb5, 0xd8,
+ 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, 0xfa, 0x97,
+ 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46,
+ 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x09,
+ 0xe7, 0x8a, 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9,
+ 0xa8, 0xc5, 0x72, 0x1f, 0x01, 0x6c, 0xdb, 0xb6,
+ 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, 0x0a, 0x67,
+ 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28,
+ },
+ {
+ 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17,
+ 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40,
+ 0xae, 0xc0, 0x72, 0x1c, 0x0b, 0x65, 0xd7, 0xb9,
+ 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, 0x80, 0xee,
+ 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56,
+ 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x01,
+ 0xef, 0x81, 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8,
+ 0xb8, 0xd6, 0x64, 0x0a, 0x1d, 0x73, 0xc1, 0xaf,
+ 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, 0xfb, 0x95,
+ 0xd5, 0xbb, 0x09, 0x67, 0x70, 0x1e, 0xac, 0xc2,
+ 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b,
+ 0x7b, 0x15, 0xa7, 0xc9, 0xde, 0xb0, 0x02, 0x6c,
+ 0xc3, 0xad, 0x1f, 0x71, 0x66, 0x08, 0xba, 0xd4,
+ 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, 0xed, 0x83,
+ 0x6d, 0x03, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a,
+ 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d,
+ 0x19, 0x77, 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0x0e,
+ 0x4e, 0x20, 0x92, 0xfc, 0xeb, 0x85, 0x37, 0x59,
+ 0xb7, 0xd9, 0x6b, 0x05, 0x12, 0x7c, 0xce, 0xa0,
+ 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7,
+ 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f,
+ 0x0f, 0x61, 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18,
+ 0xf6, 0x98, 0x2a, 0x44, 0x53, 0x3d, 0x8f, 0xe1,
+ 0xa1, 0xcf, 0x7d, 0x13, 0x04, 0x6a, 0xd8, 0xb6,
+ 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c,
+ 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x07, 0xb5, 0xdb,
+ 0x35, 0x5b, 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22,
+ 0x62, 0x0c, 0xbe, 0xd0, 0xc7, 0xa9, 0x1b, 0x75,
+ 0xda, 0xb4, 0x06, 0x68, 0x7f, 0x11, 0xa3, 0xcd,
+ 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a,
+ 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0x0d, 0x63,
+ 0x23, 0x4d, 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34,
+ },
+ {
+ 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10,
+ 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f,
+ 0xbe, 0xd1, 0x60, 0x0f, 0x1f, 0x70, 0xc1, 0xae,
+ 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1,
+ 0x61, 0x0e, 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71,
+ 0x3e, 0x51, 0xe0, 0x8f, 0x9f, 0xf0, 0x41, 0x2e,
+ 0xdf, 0xb0, 0x01, 0x6e, 0x7e, 0x11, 0xa0, 0xcf,
+ 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90,
+ 0xc2, 0xad, 0x1c, 0x73, 0x63, 0x0c, 0xbd, 0xd2,
+ 0x9d, 0xf2, 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d,
+ 0x7c, 0x13, 0xa2, 0xcd, 0xdd, 0xb2, 0x03, 0x6c,
+ 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, 0x5c, 0x33,
+ 0xa3, 0xcc, 0x7d, 0x12, 0x02, 0x6d, 0xdc, 0xb3,
+ 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec,
+ 0x1d, 0x72, 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0x0d,
+ 0x42, 0x2d, 0x9c, 0xf3, 0xe3, 0x8c, 0x3d, 0x52,
+ 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, 0xe6, 0x89,
+ 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x08, 0xb9, 0xd6,
+ 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37,
+ 0x78, 0x17, 0xa6, 0xc9, 0xd9, 0xb6, 0x07, 0x68,
+ 0xf8, 0x97, 0x26, 0x49, 0x59, 0x36, 0x87, 0xe8,
+ 0xa7, 0xc8, 0x79, 0x16, 0x06, 0x69, 0xd8, 0xb7,
+ 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56,
+ 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x09,
+ 0x5b, 0x34, 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b,
+ 0x04, 0x6b, 0xda, 0xb5, 0xa5, 0xca, 0x7b, 0x14,
+ 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, 0x9a, 0xf5,
+ 0xba, 0xd5, 0x64, 0x0b, 0x1b, 0x74, 0xc5, 0xaa,
+ 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a,
+ 0x65, 0x0a, 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75,
+ 0x84, 0xeb, 0x5a, 0x35, 0x25, 0x4a, 0xfb, 0x94,
+ 0xdb, 0xb4, 0x05, 0x6a, 0x7a, 0x15, 0xa4, 0xcb,
+ },
+ {
+ 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d,
+ 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea,
+ 0x53, 0x23, 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e,
+ 0xf4, 0x84, 0x14, 0x64, 0x29, 0x59, 0xc9, 0xb9,
+ 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0x0b, 0x9b, 0xeb,
+ 0x01, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c,
+ 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8,
+ 0x52, 0x22, 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f,
+ 0x51, 0x21, 0xb1, 0xc1, 0x8c, 0xfc, 0x6c, 0x1c,
+ 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, 0xcb, 0xbb,
+ 0x02, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f,
+ 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x08, 0x98, 0xe8,
+ 0xf7, 0x87, 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba,
+ 0x50, 0x20, 0xb0, 0xc0, 0x8d, 0xfd, 0x6d, 0x1d,
+ 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x09, 0x99, 0xe9,
+ 0x03, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e,
+ 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0x0f, 0x9f, 0xef,
+ 0x05, 0x75, 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48,
+ 0xf1, 0x81, 0x11, 0x61, 0x2c, 0x5c, 0xcc, 0xbc,
+ 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, 0x6b, 0x1b,
+ 0x04, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49,
+ 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0x0e, 0x9e, 0xee,
+ 0x57, 0x27, 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a,
+ 0xf0, 0x80, 0x10, 0x60, 0x2d, 0x5d, 0xcd, 0xbd,
+ 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, 0xce, 0xbe,
+ 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19,
+ 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0x0d, 0x9d, 0xed,
+ 0x07, 0x77, 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a,
+ 0x55, 0x25, 0xb5, 0xc5, 0x88, 0xf8, 0x68, 0x18,
+ 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, 0xcf, 0xbf,
+ 0x06, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b,
+ 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0x0c, 0x9c, 0xec,
+ },
+ {
+ 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a,
+ 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5,
+ 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, 0x78, 0x09,
+ 0xec, 0x9d, 0x0e, 0x7f, 0x35, 0x44, 0xd7, 0xa6,
+ 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc,
+ 0x29, 0x58, 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63,
+ 0xc5, 0xb4, 0x27, 0x56, 0x1c, 0x6d, 0xfe, 0x8f,
+ 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, 0x51, 0x20,
+ 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b,
+ 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4,
+ 0x52, 0x23, 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18,
+ 0xfd, 0x8c, 0x1f, 0x6e, 0x24, 0x55, 0xc6, 0xb7,
+ 0x97, 0xe6, 0x75, 0x04, 0x4e, 0x3f, 0xac, 0xdd,
+ 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x03, 0x72,
+ 0xd4, 0xa5, 0x36, 0x47, 0x0d, 0x7c, 0xef, 0x9e,
+ 0x7b, 0x0a, 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31,
+ 0x22, 0x53, 0xc0, 0xb1, 0xfb, 0x8a, 0x19, 0x68,
+ 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, 0xb6, 0xc7,
+ 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b,
+ 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84,
+ 0xa4, 0xd5, 0x46, 0x37, 0x7d, 0x0c, 0x9f, 0xee,
+ 0x0b, 0x7a, 0xe9, 0x98, 0xd2, 0xa3, 0x30, 0x41,
+ 0xe7, 0x96, 0x05, 0x74, 0x3e, 0x4f, 0xdc, 0xad,
+ 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x02,
+ 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x08, 0x79,
+ 0x9c, 0xed, 0x7e, 0x0f, 0x45, 0x34, 0xa7, 0xd6,
+ 0x70, 0x01, 0x92, 0xe3, 0xa9, 0xd8, 0x4b, 0x3a,
+ 0xdf, 0xae, 0x3d, 0x4c, 0x06, 0x77, 0xe4, 0x95,
+ 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff,
+ 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50,
+ 0xf6, 0x87, 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc,
+ 0x59, 0x28, 0xbb, 0xca, 0x80, 0xf1, 0x62, 0x13,
+ },
+ {
+ 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43,
+ 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4,
+ 0x73, 0x01, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30,
+ 0xc4, 0xb6, 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87,
+ 0xe6, 0x94, 0x02, 0x70, 0x33, 0x41, 0xd7, 0xa5,
+ 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, 0x60, 0x12,
+ 0x95, 0xe7, 0x71, 0x03, 0x40, 0x32, 0xa4, 0xd6,
+ 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61,
+ 0xd1, 0xa3, 0x35, 0x47, 0x04, 0x76, 0xe0, 0x92,
+ 0x66, 0x14, 0x82, 0xf0, 0xb3, 0xc1, 0x57, 0x25,
+ 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x05, 0x93, 0xe1,
+ 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56,
+ 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x06, 0x74,
+ 0x80, 0xf2, 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3,
+ 0x44, 0x36, 0xa0, 0xd2, 0x91, 0xe3, 0x75, 0x07,
+ 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, 0xc2, 0xb0,
+ 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc,
+ 0x08, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b,
+ 0xcc, 0xbe, 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f,
+ 0x7b, 0x09, 0x9f, 0xed, 0xae, 0xdc, 0x4a, 0x38,
+ 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, 0x68, 0x1a,
+ 0xee, 0x9c, 0x0a, 0x78, 0x3b, 0x49, 0xdf, 0xad,
+ 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69,
+ 0x9d, 0xef, 0x79, 0x0b, 0x48, 0x3a, 0xac, 0xde,
+ 0x6e, 0x1c, 0x8a, 0xf8, 0xbb, 0xc9, 0x5f, 0x2d,
+ 0xd9, 0xab, 0x3d, 0x4f, 0x0c, 0x7e, 0xe8, 0x9a,
+ 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e,
+ 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0x0d, 0x9b, 0xe9,
+ 0x88, 0xfa, 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb,
+ 0x3f, 0x4d, 0xdb, 0xa9, 0xea, 0x98, 0x0e, 0x7c,
+ 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, 0xca, 0xb8,
+ 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0x0f,
+ },
+ {
+ 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44,
+ 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb,
+ 0x63, 0x10, 0x85, 0xf6, 0xb2, 0xc1, 0x54, 0x27,
+ 0xdc, 0xaf, 0x3a, 0x49, 0x0d, 0x7e, 0xeb, 0x98,
+ 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82,
+ 0x79, 0x0a, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d,
+ 0xa5, 0xd6, 0x43, 0x30, 0x74, 0x07, 0x92, 0xe1,
+ 0x1a, 0x69, 0xfc, 0x8f, 0xcb, 0xb8, 0x2d, 0x5e,
+ 0x91, 0xe2, 0x77, 0x04, 0x40, 0x33, 0xa6, 0xd5,
+ 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a,
+ 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6,
+ 0x4d, 0x3e, 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x09,
+ 0x57, 0x24, 0xb1, 0xc2, 0x86, 0xf5, 0x60, 0x13,
+ 0xe8, 0x9b, 0x0e, 0x7d, 0x39, 0x4a, 0xdf, 0xac,
+ 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x03, 0x70,
+ 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf,
+ 0x3f, 0x4c, 0xd9, 0xaa, 0xee, 0x9d, 0x08, 0x7b,
+ 0x80, 0xf3, 0x66, 0x15, 0x51, 0x22, 0xb7, 0xc4,
+ 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, 0x6b, 0x18,
+ 0xe3, 0x90, 0x05, 0x76, 0x32, 0x41, 0xd4, 0xa7,
+ 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd,
+ 0x46, 0x35, 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x02,
+ 0x9a, 0xe9, 0x7c, 0x0f, 0x4b, 0x38, 0xad, 0xde,
+ 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, 0x12, 0x61,
+ 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0x0c, 0x99, 0xea,
+ 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55,
+ 0xcd, 0xbe, 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89,
+ 0x72, 0x01, 0x94, 0xe7, 0xa3, 0xd0, 0x45, 0x36,
+ 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, 0x5f, 0x2c,
+ 0xd7, 0xa4, 0x31, 0x42, 0x06, 0x75, 0xe0, 0x93,
+ 0x0b, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f,
+ 0xb4, 0xc7, 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0,
+ },
+ {
+ 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6,
+ 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42,
+ 0x94, 0xe0, 0x7c, 0x08, 0x59, 0x2d, 0xb1, 0xc5,
+ 0x26, 0x52, 0xce, 0xba, 0xeb, 0x9f, 0x03, 0x77,
+ 0xa1, 0xd5, 0x49, 0x3d, 0x6c, 0x18, 0x84, 0xf0,
+ 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, 0x10, 0x64,
+ 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0x0b, 0x97, 0xe3,
+ 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d,
+ 0xcb, 0xbf, 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a,
+ 0x5f, 0x2b, 0xb7, 0xc3, 0x92, 0xe6, 0x7a, 0x0e,
+ 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, 0xfd, 0x89,
+ 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b,
+ 0xed, 0x99, 0x05, 0x71, 0x20, 0x54, 0xc8, 0xbc,
+ 0x79, 0x0d, 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28,
+ 0xfe, 0x8a, 0x16, 0x62, 0x33, 0x47, 0xdb, 0xaf,
+ 0x98, 0xec, 0x70, 0x04, 0x55, 0x21, 0xbd, 0xc9,
+ 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e,
+ 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda,
+ 0x0c, 0x78, 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d,
+ 0xbe, 0xca, 0x56, 0x22, 0x73, 0x07, 0x9b, 0xef,
+ 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, 0x1c, 0x68,
+ 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc,
+ 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0x0f, 0x7b,
+ 0xd4, 0xa0, 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85,
+ 0x53, 0x27, 0xbb, 0xcf, 0x9e, 0xea, 0x76, 0x02,
+ 0xc7, 0xb3, 0x2f, 0x5b, 0x0a, 0x7e, 0xe2, 0x96,
+ 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11,
+ 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3,
+ 0x75, 0x01, 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24,
+ 0xe1, 0x95, 0x09, 0x7d, 0x2c, 0x58, 0xc4, 0xb0,
+ 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, 0x43, 0x37,
+ },
+ {
+ 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56,
+ 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9,
+ 0x03, 0x76, 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55,
+ 0x8c, 0xf9, 0x66, 0x13, 0x45, 0x30, 0xaf, 0xda,
+ 0x06, 0x73, 0xec, 0x99, 0xcf, 0xba, 0x25, 0x50,
+ 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf,
+ 0x05, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53,
+ 0x8a, 0xff, 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc,
+ 0x0c, 0x79, 0xe6, 0x93, 0xc5, 0xb0, 0x2f, 0x5a,
+ 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, 0xa0, 0xd5,
+ 0x0f, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59,
+ 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6,
+ 0x0a, 0x7f, 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c,
+ 0x85, 0xf0, 0x6f, 0x1a, 0x4c, 0x39, 0xa6, 0xd3,
+ 0x09, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, 0x2a, 0x5f,
+ 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0,
+ 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e,
+ 0x97, 0xe2, 0x7d, 0x08, 0x5e, 0x2b, 0xb4, 0xc1,
+ 0x1b, 0x6e, 0xf1, 0x84, 0xd2, 0xa7, 0x38, 0x4d,
+ 0x94, 0xe1, 0x7e, 0x0b, 0x5d, 0x28, 0xb7, 0xc2,
+ 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48,
+ 0x91, 0xe4, 0x7b, 0x0e, 0x58, 0x2d, 0xb2, 0xc7,
+ 0x1d, 0x68, 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b,
+ 0x92, 0xe7, 0x78, 0x0d, 0x5b, 0x2e, 0xb1, 0xc4,
+ 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, 0x37, 0x42,
+ 0x9b, 0xee, 0x71, 0x04, 0x52, 0x27, 0xb8, 0xcd,
+ 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41,
+ 0x98, 0xed, 0x72, 0x07, 0x51, 0x24, 0xbb, 0xce,
+ 0x12, 0x67, 0xf8, 0x8d, 0xdb, 0xae, 0x31, 0x44,
+ 0x9d, 0xe8, 0x77, 0x02, 0x54, 0x21, 0xbe, 0xcb,
+ 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47,
+ 0x9e, 0xeb, 0x74, 0x01, 0x57, 0x22, 0xbd, 0xc8,
+ },
+ {
+ 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f,
+ 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8,
+ 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, 0x1a, 0x6c,
+ 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb,
+ 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39,
+ 0xf1, 0x87, 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae,
+ 0x55, 0x23, 0xb9, 0xcf, 0x90, 0xe6, 0x7c, 0x0a,
+ 0xc2, 0xb4, 0x2e, 0x58, 0x07, 0x71, 0xeb, 0x9d,
+ 0xcc, 0xba, 0x20, 0x56, 0x09, 0x7f, 0xe5, 0x93,
+ 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x04,
+ 0xff, 0x89, 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0,
+ 0x68, 0x1e, 0x84, 0xf2, 0xad, 0xdb, 0x41, 0x37,
+ 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, 0x83, 0xf5,
+ 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62,
+ 0x99, 0xef, 0x75, 0x03, 0x5c, 0x2a, 0xb0, 0xc6,
+ 0x0e, 0x78, 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51,
+ 0x85, 0xf3, 0x69, 0x1f, 0x40, 0x36, 0xac, 0xda,
+ 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, 0x3b, 0x4d,
+ 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x05, 0x9f, 0xe9,
+ 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x08, 0x7e,
+ 0xe3, 0x95, 0x0f, 0x79, 0x26, 0x50, 0xca, 0xbc,
+ 0x74, 0x02, 0x98, 0xee, 0xb1, 0xc7, 0x5d, 0x2b,
+ 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, 0xf9, 0x8f,
+ 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18,
+ 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16,
+ 0xde, 0xa8, 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81,
+ 0x7a, 0x0c, 0x96, 0xe0, 0xbf, 0xc9, 0x53, 0x25,
+ 0xed, 0x9b, 0x01, 0x77, 0x28, 0x5e, 0xc4, 0xb2,
+ 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x06, 0x70,
+ 0xb8, 0xce, 0x54, 0x22, 0x7d, 0x0b, 0x91, 0xe7,
+ 0x1c, 0x6a, 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43,
+ 0x8b, 0xfd, 0x67, 0x11, 0x4e, 0x38, 0xa2, 0xd4,
+ },
+ {
+ 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58,
+ 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7,
+ 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0x0c, 0x7b,
+ 0xbc, 0xcb, 0x52, 0x25, 0x7d, 0x0a, 0x93, 0xe4,
+ 0x46, 0x31, 0xa8, 0xdf, 0x87, 0xf0, 0x69, 0x1e,
+ 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, 0xf6, 0x81,
+ 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d,
+ 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2,
+ 0x8c, 0xfb, 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4,
+ 0x13, 0x64, 0xfd, 0x8a, 0xd2, 0xa5, 0x3c, 0x4b,
+ 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, 0x80, 0xf7,
+ 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68,
+ 0xca, 0xbd, 0x24, 0x53, 0x0b, 0x7c, 0xe5, 0x92,
+ 0x55, 0x22, 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0x0d,
+ 0xe9, 0x9e, 0x07, 0x70, 0x28, 0x5f, 0xc6, 0xb1,
+ 0x76, 0x01, 0x98, 0xef, 0xb7, 0xc0, 0x59, 0x2e,
+ 0x05, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d,
+ 0x9a, 0xed, 0x74, 0x03, 0x5b, 0x2c, 0xb5, 0xc2,
+ 0x26, 0x51, 0xc8, 0xbf, 0xe7, 0x90, 0x09, 0x7e,
+ 0xb9, 0xce, 0x57, 0x20, 0x78, 0x0f, 0x96, 0xe1,
+ 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, 0x6c, 0x1b,
+ 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84,
+ 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38,
+ 0xff, 0x88, 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7,
+ 0x89, 0xfe, 0x67, 0x10, 0x48, 0x3f, 0xa6, 0xd1,
+ 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, 0x39, 0x4e,
+ 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2,
+ 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d,
+ 0xcf, 0xb8, 0x21, 0x56, 0x0e, 0x79, 0xe0, 0x97,
+ 0x50, 0x27, 0xbe, 0xc9, 0x91, 0xe6, 0x7f, 0x08,
+ 0xec, 0x9b, 0x02, 0x75, 0x2d, 0x5a, 0xc3, 0xb4,
+ 0x73, 0x04, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b,
+ },
+ {
+ 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75,
+ 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92,
+ 0xd3, 0xab, 0x23, 0x5b, 0x2e, 0x56, 0xde, 0xa6,
+ 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, 0x39, 0x41,
+ 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce,
+ 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29,
+ 0x68, 0x10, 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d,
+ 0x8f, 0xf7, 0x7f, 0x07, 0x72, 0x0a, 0x82, 0xfa,
+ 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, 0x66, 0x1e,
+ 0x8c, 0xf4, 0x7c, 0x04, 0x71, 0x09, 0x81, 0xf9,
+ 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd,
+ 0x5f, 0x27, 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a,
+ 0xd0, 0xa8, 0x20, 0x58, 0x2d, 0x55, 0xdd, 0xa5,
+ 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, 0x3a, 0x42,
+ 0x03, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0x0e, 0x76,
+ 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91,
+ 0xd6, 0xae, 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3,
+ 0x31, 0x49, 0xc1, 0xb9, 0xcc, 0xb4, 0x3c, 0x44,
+ 0x05, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, 0x08, 0x70,
+ 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97,
+ 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18,
+ 0x8a, 0xf2, 0x7a, 0x02, 0x77, 0x0f, 0x87, 0xff,
+ 0xbe, 0xc6, 0x4e, 0x36, 0x43, 0x3b, 0xb3, 0xcb,
+ 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, 0x54, 0x2c,
+ 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8,
+ 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f,
+ 0x6e, 0x16, 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b,
+ 0x89, 0xf1, 0x79, 0x01, 0x74, 0x0c, 0x84, 0xfc,
+ 0x06, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, 0x0b, 0x73,
+ 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94,
+ 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0,
+ 0x32, 0x4a, 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47,
+ },
+ {
+ 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72,
+ 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d,
+ 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1,
+ 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e,
+ 0x9b, 0xe2, 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9,
+ 0x74, 0x0d, 0x86, 0xff, 0x8d, 0xf4, 0x7f, 0x06,
+ 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, 0x53, 0x2a,
+ 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5,
+ 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59,
+ 0xc4, 0xbd, 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6,
+ 0xe8, 0x91, 0x1a, 0x63, 0x11, 0x68, 0xe3, 0x9a,
+ 0x07, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, 0x0c, 0x75,
+ 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2,
+ 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d,
+ 0x73, 0x0a, 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x01,
+ 0x9c, 0xe5, 0x6e, 0x17, 0x65, 0x1c, 0x97, 0xee,
+ 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, 0x5d, 0x24,
+ 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb,
+ 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7,
+ 0x7a, 0x03, 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x08,
+ 0xcd, 0xb4, 0x3f, 0x46, 0x34, 0x4d, 0xc6, 0xbf,
+ 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, 0x29, 0x50,
+ 0x0e, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x05, 0x7c,
+ 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93,
+ 0x7d, 0x04, 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0x0f,
+ 0x92, 0xeb, 0x60, 0x19, 0x6b, 0x12, 0x99, 0xe0,
+ 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, 0xb5, 0xcc,
+ 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23,
+ 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94,
+ 0x09, 0x70, 0xfb, 0x82, 0xf0, 0x89, 0x02, 0x7b,
+ 0x25, 0x5c, 0xd7, 0xae, 0xdc, 0xa5, 0x2e, 0x57,
+ 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, 0xc1, 0xb8,
+ },
+ {
+ 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b,
+ 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c,
+ 0xf3, 0x89, 0x07, 0x7d, 0x06, 0x7c, 0xf2, 0x88,
+ 0x04, 0x7e, 0xf0, 0x8a, 0xf1, 0x8b, 0x05, 0x7f,
+ 0xfb, 0x81, 0x0f, 0x75, 0x0e, 0x74, 0xfa, 0x80,
+ 0x0c, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0x0d, 0x77,
+ 0x08, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x09, 0x73,
+ 0xff, 0x85, 0x0b, 0x71, 0x0a, 0x70, 0xfe, 0x84,
+ 0xeb, 0x91, 0x1f, 0x65, 0x1e, 0x64, 0xea, 0x90,
+ 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, 0x1d, 0x67,
+ 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63,
+ 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94,
+ 0x10, 0x6a, 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b,
+ 0xe7, 0x9d, 0x13, 0x69, 0x12, 0x68, 0xe6, 0x9c,
+ 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, 0xe2, 0x98,
+ 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f,
+ 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0,
+ 0x3c, 0x46, 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47,
+ 0x38, 0x42, 0xcc, 0xb6, 0xcd, 0xb7, 0x39, 0x43,
+ 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, 0xce, 0xb4,
+ 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b,
+ 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc,
+ 0xc3, 0xb9, 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8,
+ 0x34, 0x4e, 0xc0, 0xba, 0xc1, 0xbb, 0x35, 0x4f,
+ 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, 0x21, 0x5b,
+ 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac,
+ 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8,
+ 0x24, 0x5e, 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f,
+ 0xdb, 0xa1, 0x2f, 0x55, 0x2e, 0x54, 0xda, 0xa0,
+ 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, 0x2d, 0x57,
+ 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53,
+ 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4,
+ },
+ {
+ 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c,
+ 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83,
+ 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, 0xe4, 0x9f,
+ 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60,
+ 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7,
+ 0x24, 0x5f, 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58,
+ 0x38, 0x43, 0xce, 0xb5, 0xc9, 0xb2, 0x3f, 0x44,
+ 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, 0xc0, 0xbb,
+ 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7,
+ 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28,
+ 0x48, 0x33, 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34,
+ 0xb7, 0xcc, 0x41, 0x3a, 0x46, 0x3d, 0xb0, 0xcb,
+ 0x70, 0x0b, 0x86, 0xfd, 0x81, 0xfa, 0x77, 0x0c,
+ 0x8f, 0xf4, 0x79, 0x02, 0x7e, 0x05, 0x88, 0xf3,
+ 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef,
+ 0x6c, 0x17, 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10,
+ 0x4b, 0x30, 0xbd, 0xc6, 0xba, 0xc1, 0x4c, 0x37,
+ 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, 0xb3, 0xc8,
+ 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4,
+ 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b,
+ 0x90, 0xeb, 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec,
+ 0x6f, 0x14, 0x99, 0xe2, 0x9e, 0xe5, 0x68, 0x13,
+ 0x73, 0x08, 0x85, 0xfe, 0x82, 0xf9, 0x74, 0x0f,
+ 0x8c, 0xf7, 0x7a, 0x01, 0x7d, 0x06, 0x8b, 0xf0,
+ 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c,
+ 0x1f, 0x64, 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63,
+ 0x03, 0x78, 0xf5, 0x8e, 0xf2, 0x89, 0x04, 0x7f,
+ 0xfc, 0x87, 0x0a, 0x71, 0x0d, 0x76, 0xfb, 0x80,
+ 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47,
+ 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8,
+ 0xd8, 0xa3, 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4,
+ 0x27, 0x5c, 0xd1, 0xaa, 0xd6, 0xad, 0x20, 0x5b,
+ },
+ {
+ 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69,
+ 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae,
+ 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x02, 0x86, 0xfa,
+ 0x54, 0x28, 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d,
+ 0x3b, 0x47, 0xc3, 0xbf, 0xd6, 0xaa, 0x2e, 0x52,
+ 0xfc, 0x80, 0x04, 0x78, 0x11, 0x6d, 0xe9, 0x95,
+ 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1,
+ 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x06,
+ 0x76, 0x0a, 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f,
+ 0xb1, 0xcd, 0x49, 0x35, 0x5c, 0x20, 0xa4, 0xd8,
+ 0xe5, 0x99, 0x1d, 0x61, 0x08, 0x74, 0xf0, 0x8c,
+ 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b,
+ 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24,
+ 0x8a, 0xf6, 0x72, 0x0e, 0x67, 0x1b, 0x9f, 0xe3,
+ 0xde, 0xa2, 0x26, 0x5a, 0x33, 0x4f, 0xcb, 0xb7,
+ 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, 0x0c, 0x70,
+ 0xec, 0x90, 0x14, 0x68, 0x01, 0x7d, 0xf9, 0x85,
+ 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42,
+ 0x7f, 0x03, 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16,
+ 0xb8, 0xc4, 0x40, 0x3c, 0x55, 0x29, 0xad, 0xd1,
+ 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, 0xc2, 0xbe,
+ 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x05, 0x79,
+ 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d,
+ 0x83, 0xff, 0x7b, 0x07, 0x6e, 0x12, 0x96, 0xea,
+ 0x9a, 0xe6, 0x62, 0x1e, 0x77, 0x0b, 0x8f, 0xf3,
+ 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, 0x48, 0x34,
+ 0x09, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60,
+ 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7,
+ 0xa1, 0xdd, 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8,
+ 0x66, 0x1a, 0x9e, 0xe2, 0x8b, 0xf7, 0x73, 0x0f,
+ 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, 0x27, 0x5b,
+ 0xf5, 0x89, 0x0d, 0x71, 0x18, 0x64, 0xe0, 0x9c,
+ },
+ {
+ 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e,
+ 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1,
+ 0x83, 0xfe, 0x79, 0x04, 0x6a, 0x17, 0x90, 0xed,
+ 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, 0x5f, 0x22,
+ 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x08, 0x75,
+ 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba,
+ 0x98, 0xe5, 0x62, 0x1f, 0x71, 0x0c, 0x8b, 0xf6,
+ 0x57, 0x2a, 0xad, 0xd0, 0xbe, 0xc3, 0x44, 0x39,
+ 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, 0x25, 0x58,
+ 0xf9, 0x84, 0x03, 0x7e, 0x10, 0x6d, 0xea, 0x97,
+ 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb,
+ 0x7a, 0x07, 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14,
+ 0x2d, 0x50, 0xd7, 0xaa, 0xc4, 0xb9, 0x3e, 0x43,
+ 0xe2, 0x9f, 0x18, 0x65, 0x0b, 0x76, 0xf1, 0x8c,
+ 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0,
+ 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0x0f,
+ 0x6c, 0x11, 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x02,
+ 0xa3, 0xde, 0x59, 0x24, 0x4a, 0x37, 0xb0, 0xcd,
+ 0xef, 0x92, 0x15, 0x68, 0x06, 0x7b, 0xfc, 0x81,
+ 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e,
+ 0x77, 0x0a, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19,
+ 0xb8, 0xc5, 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6,
+ 0xf4, 0x89, 0x0e, 0x73, 0x1d, 0x60, 0xe7, 0x9a,
+ 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, 0x28, 0x55,
+ 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34,
+ 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x01, 0x86, 0xfb,
+ 0xd9, 0xa4, 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7,
+ 0x16, 0x6b, 0xec, 0x91, 0xff, 0x82, 0x05, 0x78,
+ 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, 0x52, 0x2f,
+ 0x8e, 0xf3, 0x74, 0x09, 0x67, 0x1a, 0x9d, 0xe0,
+ 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac,
+ 0x0d, 0x70, 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63,
+ },
+ {
+ 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67,
+ 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0,
+ 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4,
+ 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x03,
+ 0x7b, 0x05, 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c,
+ 0xac, 0xd2, 0x50, 0x2e, 0x49, 0x37, 0xb5, 0xcb,
+ 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, 0xd1, 0xaf,
+ 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x06, 0x78,
+ 0xf6, 0x88, 0x0a, 0x74, 0x13, 0x6d, 0xef, 0x91,
+ 0x21, 0x5f, 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46,
+ 0x45, 0x3b, 0xb9, 0xc7, 0xa0, 0xde, 0x5c, 0x22,
+ 0x92, 0xec, 0x6e, 0x10, 0x77, 0x09, 0x8b, 0xf5,
+ 0x8d, 0xf3, 0x71, 0x0f, 0x68, 0x16, 0x94, 0xea,
+ 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d,
+ 0x3e, 0x40, 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59,
+ 0xe9, 0x97, 0x15, 0x6b, 0x0c, 0x72, 0xf0, 0x8e,
+ 0xf1, 0x8f, 0x0d, 0x73, 0x14, 0x6a, 0xe8, 0x96,
+ 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41,
+ 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25,
+ 0x95, 0xeb, 0x69, 0x17, 0x70, 0x0e, 0x8c, 0xf2,
+ 0x8a, 0xf4, 0x76, 0x08, 0x6f, 0x11, 0x93, 0xed,
+ 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, 0x44, 0x3a,
+ 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e,
+ 0xee, 0x90, 0x12, 0x6c, 0x0b, 0x75, 0xf7, 0x89,
+ 0x07, 0x79, 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60,
+ 0xd0, 0xae, 0x2c, 0x52, 0x35, 0x4b, 0xc9, 0xb7,
+ 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, 0xad, 0xd3,
+ 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x04,
+ 0x7c, 0x02, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b,
+ 0xab, 0xd5, 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc,
+ 0xcf, 0xb1, 0x33, 0x4d, 0x2a, 0x54, 0xd6, 0xa8,
+ 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, 0x01, 0x7f,
+ },
+ {
+ 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60,
+ 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf,
+ 0xa3, 0xdc, 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3,
+ 0x7c, 0x03, 0x82, 0xfd, 0x9d, 0xe2, 0x63, 0x1c,
+ 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, 0x44, 0x3b,
+ 0x84, 0xfb, 0x7a, 0x05, 0x65, 0x1a, 0x9b, 0xe4,
+ 0xf8, 0x87, 0x06, 0x79, 0x19, 0x66, 0xe7, 0x98,
+ 0x27, 0x58, 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47,
+ 0xb6, 0xc9, 0x48, 0x37, 0x57, 0x28, 0xa9, 0xd6,
+ 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, 0x76, 0x09,
+ 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0x0a, 0x75,
+ 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa,
+ 0xed, 0x92, 0x13, 0x6c, 0x0c, 0x73, 0xf2, 0x8d,
+ 0x32, 0x4d, 0xcc, 0xb3, 0xd3, 0xac, 0x2d, 0x52,
+ 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, 0x51, 0x2e,
+ 0x91, 0xee, 0x6f, 0x10, 0x70, 0x0f, 0x8e, 0xf1,
+ 0x71, 0x0e, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11,
+ 0xae, 0xd1, 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce,
+ 0xd2, 0xad, 0x2c, 0x53, 0x33, 0x4c, 0xcd, 0xb2,
+ 0x0d, 0x72, 0xf3, 0x8c, 0xec, 0x93, 0x12, 0x6d,
+ 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a,
+ 0xf5, 0x8a, 0x0b, 0x74, 0x14, 0x6b, 0xea, 0x95,
+ 0x89, 0xf6, 0x77, 0x08, 0x68, 0x17, 0x96, 0xe9,
+ 0x56, 0x29, 0xa8, 0xd7, 0xb7, 0xc8, 0x49, 0x36,
+ 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, 0xd8, 0xa7,
+ 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x07, 0x78,
+ 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x04,
+ 0xbb, 0xc4, 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb,
+ 0x9c, 0xe3, 0x62, 0x1d, 0x7d, 0x02, 0x83, 0xfc,
+ 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, 0x5c, 0x23,
+ 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f,
+ 0xe0, 0x9f, 0x1e, 0x61, 0x01, 0x7e, 0xff, 0x80,
+ },
+ {
+ 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7,
+ 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3,
+ 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, 0xcf, 0x4f,
+ 0x9c, 0x1c, 0x81, 0x01, 0xa6, 0x26, 0xbb, 0x3b,
+ 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a,
+ 0xb9, 0x39, 0xa4, 0x24, 0x83, 0x03, 0x9e, 0x1e,
+ 0x25, 0xa5, 0x38, 0xb8, 0x1f, 0x9f, 0x02, 0x82,
+ 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, 0x76, 0xf6,
+ 0x87, 0x07, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20,
+ 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54,
+ 0x6f, 0xef, 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8,
+ 0x1b, 0x9b, 0x06, 0x86, 0x21, 0xa1, 0x3c, 0xbc,
+ 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, 0x6d, 0xed,
+ 0x3e, 0xbe, 0x23, 0xa3, 0x04, 0x84, 0x19, 0x99,
+ 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x05,
+ 0xd6, 0x56, 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71,
+ 0x13, 0x93, 0x0e, 0x8e, 0x29, 0xa9, 0x34, 0xb4,
+ 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, 0x40, 0xc0,
+ 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c,
+ 0x8f, 0x0f, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28,
+ 0xde, 0x5e, 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79,
+ 0xaa, 0x2a, 0xb7, 0x37, 0x90, 0x10, 0x8d, 0x0d,
+ 0x36, 0xb6, 0x2b, 0xab, 0x0c, 0x8c, 0x11, 0x91,
+ 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5,
+ 0x94, 0x14, 0x89, 0x09, 0xae, 0x2e, 0xb3, 0x33,
+ 0xe0, 0x60, 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47,
+ 0x7c, 0xfc, 0x61, 0xe1, 0x46, 0xc6, 0x5b, 0xdb,
+ 0x08, 0x88, 0x15, 0x95, 0x32, 0xb2, 0x2f, 0xaf,
+ 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe,
+ 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0x0a, 0x8a,
+ 0xb1, 0x31, 0xac, 0x2c, 0x8b, 0x0b, 0x96, 0x16,
+ 0xc5, 0x45, 0xd8, 0x58, 0xff, 0x7f, 0xe2, 0x62,
+ },
+ {
+ 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc,
+ 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58,
+ 0x84, 0x05, 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24,
+ 0xed, 0x6c, 0xf2, 0x73, 0xd3, 0x52, 0xcc, 0x4d,
+ 0x91, 0x10, 0x8e, 0x0f, 0xaf, 0x2e, 0xb0, 0x31,
+ 0x15, 0x94, 0x0a, 0x8b, 0x2b, 0xaa, 0x34, 0xb5,
+ 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9,
+ 0xc7, 0x46, 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67,
+ 0xbb, 0x3a, 0xa4, 0x25, 0x85, 0x04, 0x9a, 0x1b,
+ 0x3f, 0xbe, 0x20, 0xa1, 0x01, 0x80, 0x1e, 0x9f,
+ 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3,
+ 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0x0b, 0x8a,
+ 0x56, 0xd7, 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6,
+ 0xd2, 0x53, 0xcd, 0x4c, 0xec, 0x6d, 0xf3, 0x72,
+ 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, 0x8f, 0x0e,
+ 0x93, 0x12, 0x8c, 0x0d, 0xad, 0x2c, 0xb2, 0x33,
+ 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f,
+ 0x6b, 0xea, 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb,
+ 0x17, 0x96, 0x08, 0x89, 0x29, 0xa8, 0x36, 0xb7,
+ 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, 0x5f, 0xde,
+ 0x02, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2,
+ 0x86, 0x07, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26,
+ 0xfa, 0x7b, 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a,
+ 0x54, 0xd5, 0x4b, 0xca, 0x6a, 0xeb, 0x75, 0xf4,
+ 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, 0x09, 0x88,
+ 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0x0c,
+ 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70,
+ 0xb9, 0x38, 0xa6, 0x27, 0x87, 0x06, 0x98, 0x19,
+ 0xc5, 0x44, 0xda, 0x5b, 0xfb, 0x7a, 0xe4, 0x65,
+ 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, 0x60, 0xe1,
+ 0x3d, 0xbc, 0x22, 0xa3, 0x03, 0x82, 0x1c, 0x9d,
+ },
+ {
+ 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9,
+ 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd,
+ 0xc8, 0x4a, 0xd1, 0x53, 0xfa, 0x78, 0xe3, 0x61,
+ 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, 0x87, 0x05,
+ 0x8d, 0x0f, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24,
+ 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40,
+ 0x45, 0xc7, 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec,
+ 0x21, 0xa3, 0x38, 0xba, 0x13, 0x91, 0x0a, 0x88,
+ 0x07, 0x85, 0x1e, 0x9c, 0x35, 0xb7, 0x2c, 0xae,
+ 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca,
+ 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66,
+ 0xab, 0x29, 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x02,
+ 0x8a, 0x08, 0x93, 0x11, 0xb8, 0x3a, 0xa1, 0x23,
+ 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, 0xc5, 0x47,
+ 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb,
+ 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0x0d, 0x8f,
+ 0x0e, 0x8c, 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7,
+ 0x6a, 0xe8, 0x73, 0xf1, 0x58, 0xda, 0x41, 0xc3,
+ 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, 0xed, 0x6f,
+ 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0x0b,
+ 0x83, 0x01, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a,
+ 0xe7, 0x65, 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e,
+ 0x4b, 0xc9, 0x52, 0xd0, 0x79, 0xfb, 0x60, 0xe2,
+ 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, 0x04, 0x86,
+ 0x09, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0,
+ 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4,
+ 0xc1, 0x43, 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68,
+ 0xa5, 0x27, 0xbc, 0x3e, 0x97, 0x15, 0x8e, 0x0c,
+ 0x84, 0x06, 0x9d, 0x1f, 0xb6, 0x34, 0xaf, 0x2d,
+ 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49,
+ 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5,
+ 0x28, 0xaa, 0x31, 0xb3, 0x1a, 0x98, 0x03, 0x81,
+ },
+ {
+ 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae,
+ 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2,
+ 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76,
+ 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x01, 0x99, 0x1a,
+ 0xad, 0x2e, 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x03,
+ 0xc1, 0x42, 0xda, 0x59, 0xf7, 0x74, 0xec, 0x6f,
+ 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, 0x58, 0xdb,
+ 0x19, 0x9a, 0x02, 0x81, 0x2f, 0xac, 0x34, 0xb7,
+ 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9,
+ 0x2b, 0xa8, 0x30, 0xb3, 0x1d, 0x9e, 0x06, 0x85,
+ 0x9f, 0x1c, 0x84, 0x07, 0xa9, 0x2a, 0xb2, 0x31,
+ 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, 0xde, 0x5d,
+ 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44,
+ 0x86, 0x05, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28,
+ 0x32, 0xb1, 0x29, 0xaa, 0x04, 0x87, 0x1f, 0x9c,
+ 0x5e, 0xdd, 0x45, 0xc6, 0x68, 0xeb, 0x73, 0xf0,
+ 0x8e, 0x0d, 0x95, 0x16, 0xb8, 0x3b, 0xa3, 0x20,
+ 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c,
+ 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8,
+ 0x3a, 0xb9, 0x21, 0xa2, 0x0c, 0x8f, 0x17, 0x94,
+ 0x23, 0xa0, 0x38, 0xbb, 0x15, 0x96, 0x0e, 0x8d,
+ 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, 0x62, 0xe1,
+ 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55,
+ 0x97, 0x14, 0x8c, 0x0f, 0xa1, 0x22, 0xba, 0x39,
+ 0xc9, 0x4a, 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67,
+ 0xa5, 0x26, 0xbe, 0x3d, 0x93, 0x10, 0x88, 0x0b,
+ 0x11, 0x92, 0x0a, 0x89, 0x27, 0xa4, 0x3c, 0xbf,
+ 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3,
+ 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca,
+ 0x08, 0x8b, 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6,
+ 0xbc, 0x3f, 0xa7, 0x24, 0x8a, 0x09, 0x91, 0x12,
+ 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, 0xfd, 0x7e,
+ },
+ {
+ 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb,
+ 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef,
+ 0xa8, 0x2c, 0xbd, 0x39, 0x82, 0x06, 0x97, 0x13,
+ 0xfc, 0x78, 0xe9, 0x6d, 0xd6, 0x52, 0xc3, 0x47,
+ 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, 0x72, 0xf6,
+ 0x19, 0x9d, 0x0c, 0x88, 0x33, 0xb7, 0x26, 0xa2,
+ 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e,
+ 0xb1, 0x35, 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0x0a,
+ 0x9a, 0x1e, 0x8f, 0x0b, 0xb0, 0x34, 0xa5, 0x21,
+ 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, 0xf1, 0x75,
+ 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0x0d, 0x89,
+ 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd,
+ 0xd7, 0x53, 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c,
+ 0x83, 0x07, 0x96, 0x12, 0xa9, 0x2d, 0xbc, 0x38,
+ 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, 0x40, 0xc4,
+ 0x2b, 0xaf, 0x3e, 0xba, 0x01, 0x85, 0x14, 0x90,
+ 0x29, 0xad, 0x3c, 0xb8, 0x03, 0x87, 0x16, 0x92,
+ 0x7d, 0xf9, 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6,
+ 0x81, 0x05, 0x94, 0x10, 0xab, 0x2f, 0xbe, 0x3a,
+ 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, 0xea, 0x6e,
+ 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf,
+ 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0x0f, 0x8b,
+ 0xcc, 0x48, 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77,
+ 0x98, 0x1c, 0x8d, 0x09, 0xb2, 0x36, 0xa7, 0x23,
+ 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, 0x8c, 0x08,
+ 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c,
+ 0x1b, 0x9f, 0x0e, 0x8a, 0x31, 0xb5, 0x24, 0xa0,
+ 0x4f, 0xcb, 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4,
+ 0xfe, 0x7a, 0xeb, 0x6f, 0xd4, 0x50, 0xc1, 0x45,
+ 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x04, 0x95, 0x11,
+ 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed,
+ 0x02, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9,
+ },
+ {
+ 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc,
+ 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0,
+ 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, 0x81, 0x04,
+ 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58,
+ 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1,
+ 0x31, 0xb4, 0x26, 0xa3, 0x1f, 0x9a, 0x08, 0x8d,
+ 0xd5, 0x50, 0xc2, 0x47, 0xfb, 0x7e, 0xec, 0x69,
+ 0x89, 0x0c, 0x9e, 0x1b, 0xa7, 0x22, 0xb0, 0x35,
+ 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66,
+ 0x86, 0x03, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a,
+ 0x62, 0xe7, 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde,
+ 0x3e, 0xbb, 0x29, 0xac, 0x10, 0x95, 0x07, 0x82,
+ 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, 0x8e, 0x0b,
+ 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57,
+ 0x0f, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3,
+ 0x53, 0xd6, 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef,
+ 0xa9, 0x2c, 0xbe, 0x3b, 0x87, 0x02, 0x90, 0x15,
+ 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, 0xcc, 0x49,
+ 0x11, 0x94, 0x06, 0x83, 0x3f, 0xba, 0x28, 0xad,
+ 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1,
+ 0xc4, 0x41, 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78,
+ 0x98, 0x1d, 0x8f, 0x0a, 0xb6, 0x33, 0xa1, 0x24,
+ 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, 0x45, 0xc0,
+ 0x20, 0xa5, 0x37, 0xb2, 0x0e, 0x8b, 0x19, 0x9c,
+ 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf,
+ 0x2f, 0xaa, 0x38, 0xbd, 0x01, 0x84, 0x16, 0x93,
+ 0xcb, 0x4e, 0xdc, 0x59, 0xe5, 0x60, 0xf2, 0x77,
+ 0x97, 0x12, 0x80, 0x05, 0xb9, 0x3c, 0xae, 0x2b,
+ 0x1e, 0x9b, 0x09, 0x8c, 0x30, 0xb5, 0x27, 0xa2,
+ 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe,
+ 0xa6, 0x23, 0xb1, 0x34, 0x88, 0x0d, 0x9f, 0x1a,
+ 0xfa, 0x7f, 0xed, 0x68, 0xd4, 0x51, 0xc3, 0x46,
+ },
+ {
+ 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5,
+ 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1,
+ 0x88, 0x0e, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d,
+ 0xcc, 0x4a, 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79,
+ 0x0d, 0x8b, 0x1c, 0x9a, 0x2f, 0xa9, 0x3e, 0xb8,
+ 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, 0x7a, 0xfc,
+ 0x85, 0x03, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30,
+ 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74,
+ 0x1a, 0x9c, 0x0b, 0x8d, 0x38, 0xbe, 0x29, 0xaf,
+ 0x5e, 0xd8, 0x4f, 0xc9, 0x7c, 0xfa, 0x6d, 0xeb,
+ 0x92, 0x14, 0x83, 0x05, 0xb0, 0x36, 0xa1, 0x27,
+ 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63,
+ 0x17, 0x91, 0x06, 0x80, 0x35, 0xb3, 0x24, 0xa2,
+ 0x53, 0xd5, 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6,
+ 0x9f, 0x19, 0x8e, 0x08, 0xbd, 0x3b, 0xac, 0x2a,
+ 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, 0xe8, 0x6e,
+ 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x07, 0x81,
+ 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5,
+ 0xbc, 0x3a, 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x09,
+ 0xf8, 0x7e, 0xe9, 0x6f, 0xda, 0x5c, 0xcb, 0x4d,
+ 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, 0x0a, 0x8c,
+ 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8,
+ 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x04,
+ 0xf5, 0x73, 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40,
+ 0x2e, 0xa8, 0x3f, 0xb9, 0x0c, 0x8a, 0x1d, 0x9b,
+ 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, 0x59, 0xdf,
+ 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x02, 0x95, 0x13,
+ 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57,
+ 0x23, 0xa5, 0x32, 0xb4, 0x01, 0x87, 0x10, 0x96,
+ 0x67, 0xe1, 0x76, 0xf0, 0x45, 0xc3, 0x54, 0xd2,
+ 0xab, 0x2d, 0xba, 0x3c, 0x89, 0x0f, 0x98, 0x1e,
+ 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a,
+ },
+ {
+ 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2,
+ 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe,
+ 0x98, 0x1f, 0x8b, 0x0c, 0xbe, 0x39, 0xad, 0x2a,
+ 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, 0xe1, 0x66,
+ 0x2d, 0xaa, 0x3e, 0xb9, 0x0b, 0x8c, 0x18, 0x9f,
+ 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3,
+ 0xb5, 0x32, 0xa6, 0x21, 0x93, 0x14, 0x80, 0x07,
+ 0xf9, 0x7e, 0xea, 0x6d, 0xdf, 0x58, 0xcc, 0x4b,
+ 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, 0x6f, 0xe8,
+ 0x16, 0x91, 0x05, 0x82, 0x30, 0xb7, 0x23, 0xa4,
+ 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70,
+ 0x8e, 0x09, 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c,
+ 0x77, 0xf0, 0x64, 0xe3, 0x51, 0xd6, 0x42, 0xc5,
+ 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, 0x0e, 0x89,
+ 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d,
+ 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x02, 0x96, 0x11,
+ 0xb4, 0x33, 0xa7, 0x20, 0x92, 0x15, 0x81, 0x06,
+ 0xf8, 0x7f, 0xeb, 0x6c, 0xde, 0x59, 0xcd, 0x4a,
+ 0x2c, 0xab, 0x3f, 0xb8, 0x0a, 0x8d, 0x19, 0x9e,
+ 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2,
+ 0x99, 0x1e, 0x8a, 0x0d, 0xbf, 0x38, 0xac, 0x2b,
+ 0xd5, 0x52, 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67,
+ 0x01, 0x86, 0x12, 0x95, 0x27, 0xa0, 0x34, 0xb3,
+ 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, 0x78, 0xff,
+ 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c,
+ 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x03, 0x97, 0x10,
+ 0x76, 0xf1, 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4,
+ 0x3a, 0xbd, 0x29, 0xae, 0x1c, 0x9b, 0x0f, 0x88,
+ 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, 0xf6, 0x71,
+ 0x8f, 0x08, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d,
+ 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9,
+ 0x17, 0x90, 0x04, 0x83, 0x31, 0xb6, 0x22, 0xa5,
+ },
+ {
+ 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f,
+ 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab,
+ 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7,
+ 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3,
+ 0xd0, 0x58, 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f,
+ 0xe4, 0x6c, 0xe9, 0x61, 0xfe, 0x76, 0xf3, 0x7b,
+ 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, 0xaf, 0x27,
+ 0x8c, 0x04, 0x81, 0x09, 0x96, 0x1e, 0x9b, 0x13,
+ 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22,
+ 0x89, 0x01, 0x84, 0x0c, 0x93, 0x1b, 0x9e, 0x16,
+ 0xd5, 0x5d, 0xd8, 0x50, 0xcf, 0x47, 0xc2, 0x4a,
+ 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, 0xf6, 0x7e,
+ 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2,
+ 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6,
+ 0x05, 0x8d, 0x08, 0x80, 0x1f, 0x97, 0x12, 0x9a,
+ 0x31, 0xb9, 0x3c, 0xb4, 0x2b, 0xa3, 0x26, 0xae,
+ 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, 0x70, 0xf8,
+ 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc,
+ 0x0f, 0x87, 0x02, 0x8a, 0x15, 0x9d, 0x18, 0x90,
+ 0x3b, 0xb3, 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4,
+ 0xb7, 0x3f, 0xba, 0x32, 0xad, 0x25, 0xa0, 0x28,
+ 0x83, 0x0b, 0x8e, 0x06, 0x99, 0x11, 0x94, 0x1c,
+ 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40,
+ 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74,
+ 0xda, 0x52, 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45,
+ 0xee, 0x66, 0xe3, 0x6b, 0xf4, 0x7c, 0xf9, 0x71,
+ 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, 0xa5, 0x2d,
+ 0x86, 0x0e, 0x8b, 0x03, 0x9c, 0x14, 0x91, 0x19,
+ 0x0a, 0x82, 0x07, 0x8f, 0x10, 0x98, 0x1d, 0x95,
+ 0x3e, 0xb6, 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1,
+ 0x62, 0xea, 0x6f, 0xe7, 0x78, 0xf0, 0x75, 0xfd,
+ 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, 0x41, 0xc9,
+ },
+ {
+ 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98,
+ 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4,
+ 0x78, 0xf1, 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0,
+ 0x44, 0xcd, 0x4b, 0xc2, 0x5a, 0xd3, 0x55, 0xdc,
+ 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, 0xe1, 0x68,
+ 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54,
+ 0x88, 0x01, 0x87, 0x0e, 0x96, 0x1f, 0x99, 0x10,
+ 0xb4, 0x3d, 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c,
+ 0xfd, 0x74, 0xf2, 0x7b, 0xe3, 0x6a, 0xec, 0x65,
+ 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, 0xd0, 0x59,
+ 0x85, 0x0c, 0x8a, 0x03, 0x9b, 0x12, 0x94, 0x1d,
+ 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21,
+ 0x0d, 0x84, 0x02, 0x8b, 0x13, 0x9a, 0x1c, 0x95,
+ 0x31, 0xb8, 0x3e, 0xb7, 0x2f, 0xa6, 0x20, 0xa9,
+ 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, 0x64, 0xed,
+ 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1,
+ 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f,
+ 0xdb, 0x52, 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43,
+ 0x9f, 0x16, 0x90, 0x19, 0x81, 0x08, 0x8e, 0x07,
+ 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, 0xb2, 0x3b,
+ 0x17, 0x9e, 0x18, 0x91, 0x09, 0x80, 0x06, 0x8f,
+ 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3,
+ 0x6f, 0xe6, 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7,
+ 0x53, 0xda, 0x5c, 0xd5, 0x4d, 0xc4, 0x42, 0xcb,
+ 0x1a, 0x93, 0x15, 0x9c, 0x04, 0x8d, 0x0b, 0x82,
+ 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe,
+ 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa,
+ 0x5e, 0xd7, 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6,
+ 0xea, 0x63, 0xe5, 0x6c, 0xf4, 0x7d, 0xfb, 0x72,
+ 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, 0xc7, 0x4e,
+ 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x05, 0x83, 0x0a,
+ 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36,
+ },
+ {
+ 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91,
+ 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5,
+ 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, 0x53, 0xd9,
+ 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd,
+ 0x90, 0x1a, 0x99, 0x13, 0x82, 0x08, 0x8b, 0x01,
+ 0xb4, 0x3e, 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25,
+ 0xd8, 0x52, 0xd1, 0x5b, 0xca, 0x40, 0xc3, 0x49,
+ 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, 0xe7, 0x6d,
+ 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac,
+ 0x19, 0x93, 0x10, 0x9a, 0x0b, 0x81, 0x02, 0x88,
+ 0x75, 0xff, 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4,
+ 0x51, 0xdb, 0x58, 0xd2, 0x43, 0xc9, 0x4a, 0xc0,
+ 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, 0xb6, 0x3c,
+ 0x89, 0x03, 0x80, 0x0a, 0x9b, 0x11, 0x92, 0x18,
+ 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74,
+ 0xc1, 0x4b, 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50,
+ 0x7a, 0xf0, 0x73, 0xf9, 0x68, 0xe2, 0x61, 0xeb,
+ 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, 0x45, 0xcf,
+ 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3,
+ 0x16, 0x9c, 0x1f, 0x95, 0x04, 0x8e, 0x0d, 0x87,
+ 0xea, 0x60, 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b,
+ 0xce, 0x44, 0xc7, 0x4d, 0xdc, 0x56, 0xd5, 0x5f,
+ 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, 0xb9, 0x33,
+ 0x86, 0x0c, 0x8f, 0x05, 0x94, 0x1e, 0x9d, 0x17,
+ 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6,
+ 0x63, 0xe9, 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2,
+ 0x0f, 0x85, 0x06, 0x8c, 0x1d, 0x97, 0x14, 0x9e,
+ 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, 0x30, 0xba,
+ 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46,
+ 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62,
+ 0x9f, 0x15, 0x96, 0x1c, 0x8d, 0x07, 0x84, 0x0e,
+ 0xbb, 0x31, 0xb2, 0x38, 0xa9, 0x23, 0xa0, 0x2a,
+ },
+ {
+ 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96,
+ 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba,
+ 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce,
+ 0x74, 0xff, 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2,
+ 0xb0, 0x3b, 0xbb, 0x30, 0xa6, 0x2d, 0xad, 0x26,
+ 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x01, 0x81, 0x0a,
+ 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e,
+ 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52,
+ 0x7d, 0xf6, 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb,
+ 0x51, 0xda, 0x5a, 0xd1, 0x47, 0xcc, 0x4c, 0xc7,
+ 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, 0x38, 0xb3,
+ 0x09, 0x82, 0x02, 0x89, 0x1f, 0x94, 0x14, 0x9f,
+ 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b,
+ 0xe1, 0x6a, 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77,
+ 0x95, 0x1e, 0x9e, 0x15, 0x83, 0x08, 0x88, 0x03,
+ 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, 0xa4, 0x2f,
+ 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c,
+ 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40,
+ 0xa2, 0x29, 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34,
+ 0x8e, 0x05, 0x85, 0x0e, 0x98, 0x13, 0x93, 0x18,
+ 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, 0x57, 0xdc,
+ 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0,
+ 0x12, 0x99, 0x19, 0x92, 0x04, 0x8f, 0x0f, 0x84,
+ 0x3e, 0xb5, 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8,
+ 0x87, 0x0c, 0x8c, 0x07, 0x91, 0x1a, 0x9a, 0x11,
+ 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, 0xb6, 0x3d,
+ 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49,
+ 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65,
+ 0x37, 0xbc, 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1,
+ 0x1b, 0x90, 0x10, 0x9b, 0x0d, 0x86, 0x06, 0x8d,
+ 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, 0x72, 0xf9,
+ 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5,
+ },
+ {
+ 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83,
+ 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97,
+ 0x28, 0xa4, 0x2d, 0xa1, 0x22, 0xae, 0x27, 0xab,
+ 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, 0x33, 0xbf,
+ 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3,
+ 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7,
+ 0x78, 0xf4, 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb,
+ 0x6c, 0xe0, 0x69, 0xe5, 0x66, 0xea, 0x63, 0xef,
+ 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, 0xaf, 0x23,
+ 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37,
+ 0x88, 0x04, 0x8d, 0x01, 0x82, 0x0e, 0x87, 0x0b,
+ 0x9c, 0x10, 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f,
+ 0xf0, 0x7c, 0xf5, 0x79, 0xfa, 0x76, 0xff, 0x73,
+ 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, 0xeb, 0x67,
+ 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b,
+ 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f,
+ 0x5d, 0xd1, 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde,
+ 0x49, 0xc5, 0x4c, 0xc0, 0x43, 0xcf, 0x46, 0xca,
+ 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, 0x7a, 0xf6,
+ 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2,
+ 0x0d, 0x81, 0x08, 0x84, 0x07, 0x8b, 0x02, 0x8e,
+ 0x19, 0x95, 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a,
+ 0x25, 0xa9, 0x20, 0xac, 0x2f, 0xa3, 0x2a, 0xa6,
+ 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, 0x3e, 0xb2,
+ 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e,
+ 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a,
+ 0xd5, 0x59, 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56,
+ 0xc1, 0x4d, 0xc4, 0x48, 0xcb, 0x47, 0xce, 0x42,
+ 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, 0xa2, 0x2e,
+ 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a,
+ 0x85, 0x09, 0x80, 0x0c, 0x8f, 0x03, 0x8a, 0x06,
+ 0x91, 0x1d, 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12,
+ },
+ {
+ 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84,
+ 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98,
+ 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc,
+ 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0,
+ 0x70, 0xfd, 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4,
+ 0x6c, 0xe1, 0x6b, 0xe6, 0x62, 0xef, 0x65, 0xe8,
+ 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, 0x41, 0xcc,
+ 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0,
+ 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64,
+ 0xfc, 0x71, 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78,
+ 0xd8, 0x55, 0xdf, 0x52, 0xd6, 0x5b, 0xd1, 0x5c,
+ 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, 0xcd, 0x40,
+ 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14,
+ 0x8c, 0x01, 0x8b, 0x06, 0x82, 0x0f, 0x85, 0x08,
+ 0xa8, 0x25, 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c,
+ 0xb4, 0x39, 0xb3, 0x3e, 0xba, 0x37, 0xbd, 0x30,
+ 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, 0xd4, 0x59,
+ 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45,
+ 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61,
+ 0xf9, 0x74, 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d,
+ 0xad, 0x20, 0xaa, 0x27, 0xa3, 0x2e, 0xa4, 0x29,
+ 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, 0xb8, 0x35,
+ 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11,
+ 0x89, 0x04, 0x8e, 0x03, 0x87, 0x0a, 0x80, 0x0d,
+ 0x3d, 0xb0, 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9,
+ 0x21, 0xac, 0x26, 0xab, 0x2f, 0xa2, 0x28, 0xa5,
+ 0x05, 0x88, 0x02, 0x8f, 0x0b, 0x86, 0x0c, 0x81,
+ 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d,
+ 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9,
+ 0x51, 0xdc, 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5,
+ 0x75, 0xf8, 0x72, 0xff, 0x7b, 0xf6, 0x7c, 0xf1,
+ 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, 0x60, 0xed,
+ },
+ {
+ 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d,
+ 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89,
+ 0x08, 0x86, 0x09, 0x87, 0x0a, 0x84, 0x0b, 0x85,
+ 0x0c, 0x82, 0x0d, 0x83, 0x0e, 0x80, 0x0f, 0x81,
+ 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, 0x13, 0x9d,
+ 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99,
+ 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95,
+ 0x1c, 0x92, 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91,
+ 0x20, 0xae, 0x21, 0xaf, 0x22, 0xac, 0x23, 0xad,
+ 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, 0x27, 0xa9,
+ 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5,
+ 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1,
+ 0x30, 0xbe, 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd,
+ 0x34, 0xba, 0x35, 0xbb, 0x36, 0xb8, 0x37, 0xb9,
+ 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, 0x3b, 0xb5,
+ 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1,
+ 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd,
+ 0x44, 0xca, 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9,
+ 0x48, 0xc6, 0x49, 0xc7, 0x4a, 0xc4, 0x4b, 0xc5,
+ 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, 0x4f, 0xc1,
+ 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd,
+ 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9,
+ 0x58, 0xd6, 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5,
+ 0x5c, 0xd2, 0x5d, 0xd3, 0x5e, 0xd0, 0x5f, 0xd1,
+ 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, 0x63, 0xed,
+ 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9,
+ 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5,
+ 0x6c, 0xe2, 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1,
+ 0x70, 0xfe, 0x71, 0xff, 0x72, 0xfc, 0x73, 0xfd,
+ 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, 0x77, 0xf9,
+ 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5,
+ 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1,
+ },
+ {
+ 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a,
+ 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86,
+ 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, 0x1d, 0x92,
+ 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e,
+ 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba,
+ 0x3c, 0xb3, 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6,
+ 0x28, 0xa7, 0x2b, 0xa4, 0x2e, 0xa1, 0x2d, 0xa2,
+ 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, 0x21, 0xae,
+ 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea,
+ 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6,
+ 0x78, 0xf7, 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2,
+ 0x74, 0xfb, 0x77, 0xf8, 0x72, 0xfd, 0x71, 0xfe,
+ 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, 0x55, 0xda,
+ 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6,
+ 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2,
+ 0x44, 0xcb, 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce,
+ 0xc0, 0x4f, 0xc3, 0x4c, 0xc6, 0x49, 0xc5, 0x4a,
+ 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, 0xc9, 0x46,
+ 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52,
+ 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e,
+ 0xf0, 0x7f, 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a,
+ 0xfc, 0x73, 0xff, 0x70, 0xfa, 0x75, 0xf9, 0x76,
+ 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, 0xed, 0x62,
+ 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e,
+ 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a,
+ 0xac, 0x23, 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26,
+ 0xb8, 0x37, 0xbb, 0x34, 0xbe, 0x31, 0xbd, 0x32,
+ 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, 0xb1, 0x3e,
+ 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a,
+ 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16,
+ 0x88, 0x07, 0x8b, 0x04, 0x8e, 0x01, 0x8d, 0x02,
+ 0x84, 0x0b, 0x87, 0x08, 0x82, 0x0d, 0x81, 0x0e,
+ },
+ {
+ 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7,
+ 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23,
+ 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22,
+ 0x01, 0x91, 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6,
+ 0xf7, 0x67, 0xca, 0x5a, 0x8d, 0x1d, 0xb0, 0x20,
+ 0x03, 0x93, 0x3e, 0xae, 0x79, 0xe9, 0x44, 0xd4,
+ 0x02, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5,
+ 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21,
+ 0xf3, 0x63, 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24,
+ 0x07, 0x97, 0x3a, 0xaa, 0x7d, 0xed, 0x40, 0xd0,
+ 0x06, 0x96, 0x3b, 0xab, 0x7c, 0xec, 0x41, 0xd1,
+ 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25,
+ 0x04, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3,
+ 0xf0, 0x60, 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27,
+ 0xf1, 0x61, 0xcc, 0x5c, 0x8b, 0x1b, 0xb6, 0x26,
+ 0x05, 0x95, 0x38, 0xa8, 0x7f, 0xef, 0x42, 0xd2,
+ 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c,
+ 0x0f, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8,
+ 0x0e, 0x9e, 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9,
+ 0xfa, 0x6a, 0xc7, 0x57, 0x80, 0x10, 0xbd, 0x2d,
+ 0x0c, 0x9c, 0x31, 0xa1, 0x76, 0xe6, 0x4b, 0xdb,
+ 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f,
+ 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e,
+ 0x0d, 0x9d, 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda,
+ 0x08, 0x98, 0x35, 0xa5, 0x72, 0xe2, 0x4f, 0xdf,
+ 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, 0xbb, 0x2b,
+ 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a,
+ 0x09, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde,
+ 0xff, 0x6f, 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28,
+ 0x0b, 0x9b, 0x36, 0xa6, 0x71, 0xe1, 0x4c, 0xdc,
+ 0x0a, 0x9a, 0x37, 0xa7, 0x70, 0xe0, 0x4d, 0xdd,
+ 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29,
+ },
+ {
+ 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0,
+ 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c,
+ 0xe5, 0x74, 0xda, 0x4b, 0x9b, 0x0a, 0xa4, 0x35,
+ 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, 0x58, 0xc9,
+ 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x07,
+ 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb,
+ 0x32, 0xa3, 0x0d, 0x9c, 0x4c, 0xdd, 0x73, 0xe2,
+ 0xce, 0x5f, 0xf1, 0x60, 0xb0, 0x21, 0x8f, 0x1e,
+ 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, 0xf2, 0x63,
+ 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0x0e, 0x9f,
+ 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86,
+ 0xaa, 0x3b, 0x95, 0x04, 0xd4, 0x45, 0xeb, 0x7a,
+ 0x64, 0xf5, 0x5b, 0xca, 0x1a, 0x8b, 0x25, 0xb4,
+ 0x98, 0x09, 0xa7, 0x36, 0xe6, 0x77, 0xd9, 0x48,
+ 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51,
+ 0x7d, 0xec, 0x42, 0xd3, 0x03, 0x92, 0x3c, 0xad,
+ 0x7b, 0xea, 0x44, 0xd5, 0x05, 0x94, 0x3a, 0xab,
+ 0x87, 0x16, 0xb8, 0x29, 0xf9, 0x68, 0xc6, 0x57,
+ 0x9e, 0x0f, 0xa1, 0x30, 0xe0, 0x71, 0xdf, 0x4e,
+ 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2,
+ 0xac, 0x3d, 0x93, 0x02, 0xd2, 0x43, 0xed, 0x7c,
+ 0x50, 0xc1, 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80,
+ 0x49, 0xd8, 0x76, 0xe7, 0x37, 0xa6, 0x08, 0x99,
+ 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, 0xf4, 0x65,
+ 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18,
+ 0x34, 0xa5, 0x0b, 0x9a, 0x4a, 0xdb, 0x75, 0xe4,
+ 0x2d, 0xbc, 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd,
+ 0xd1, 0x40, 0xee, 0x7f, 0xaf, 0x3e, 0x90, 0x01,
+ 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, 0x5e, 0xcf,
+ 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0x0c, 0xa2, 0x33,
+ 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a,
+ 0x06, 0x97, 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6,
+ },
+ {
+ 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9,
+ 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d,
+ 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0x0c,
+ 0x31, 0xa3, 0x08, 0x9a, 0x43, 0xd1, 0x7a, 0xe8,
+ 0xb7, 0x25, 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e,
+ 0x53, 0xc1, 0x6a, 0xf8, 0x21, 0xb3, 0x18, 0x8a,
+ 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, 0x29, 0xbb,
+ 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f,
+ 0x73, 0xe1, 0x4a, 0xd8, 0x01, 0x93, 0x38, 0xaa,
+ 0x97, 0x05, 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e,
+ 0xa6, 0x34, 0x9f, 0x0d, 0xd4, 0x46, 0xed, 0x7f,
+ 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, 0x09, 0x9b,
+ 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d,
+ 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9,
+ 0x11, 0x83, 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8,
+ 0xf5, 0x67, 0xcc, 0x5e, 0x87, 0x15, 0xbe, 0x2c,
+ 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x06, 0xad, 0x3f,
+ 0x02, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb,
+ 0x33, 0xa1, 0x0a, 0x98, 0x41, 0xd3, 0x78, 0xea,
+ 0xd7, 0x45, 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0x0e,
+ 0x51, 0xc3, 0x68, 0xfa, 0x23, 0xb1, 0x1a, 0x88,
+ 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, 0xfe, 0x6c,
+ 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d,
+ 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9,
+ 0x95, 0x07, 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c,
+ 0x71, 0xe3, 0x48, 0xda, 0x03, 0x91, 0x3a, 0xa8,
+ 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, 0x0b, 0x99,
+ 0xa4, 0x36, 0x9d, 0x0f, 0xd6, 0x44, 0xef, 0x7d,
+ 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb,
+ 0xc6, 0x54, 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f,
+ 0xf7, 0x65, 0xce, 0x5c, 0x85, 0x17, 0xbc, 0x2e,
+ 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, 0x58, 0xca,
+ },
+ {
+ 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde,
+ 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32,
+ 0xc5, 0x56, 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b,
+ 0x29, 0xba, 0x12, 0x81, 0x5f, 0xcc, 0x64, 0xf7,
+ 0x97, 0x04, 0xac, 0x3f, 0xe1, 0x72, 0xda, 0x49,
+ 0x7b, 0xe8, 0x40, 0xd3, 0x0d, 0x9e, 0x36, 0xa5,
+ 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c,
+ 0xbe, 0x2d, 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60,
+ 0x33, 0xa0, 0x08, 0x9b, 0x45, 0xd6, 0x7e, 0xed,
+ 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, 0x92, 0x01,
+ 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28,
+ 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4,
+ 0xa4, 0x37, 0x9f, 0x0c, 0xd2, 0x41, 0xe9, 0x7a,
+ 0x48, 0xdb, 0x73, 0xe0, 0x3e, 0xad, 0x05, 0x96,
+ 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, 0x2c, 0xbf,
+ 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53,
+ 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8,
+ 0x8a, 0x19, 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54,
+ 0xa3, 0x30, 0x98, 0x0b, 0xd5, 0x46, 0xee, 0x7d,
+ 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, 0x02, 0x91,
+ 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f,
+ 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3,
+ 0x34, 0xa7, 0x0f, 0x9c, 0x42, 0xd1, 0x79, 0xea,
+ 0xd8, 0x4b, 0xe3, 0x70, 0xae, 0x3d, 0x95, 0x06,
+ 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, 0x18, 0x8b,
+ 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67,
+ 0x90, 0x03, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e,
+ 0x7c, 0xef, 0x47, 0xd4, 0x0a, 0x99, 0x31, 0xa2,
+ 0xc2, 0x51, 0xf9, 0x6a, 0xb4, 0x27, 0x8f, 0x1c,
+ 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, 0x63, 0xf0,
+ 0x07, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9,
+ 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0x0e, 0xa6, 0x35,
+ },
+ {
+ 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb,
+ 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f,
+ 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, 0xea, 0x7e,
+ 0x61, 0xf5, 0x54, 0xc0, 0x0b, 0x9f, 0x3e, 0xaa,
+ 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc,
+ 0xa3, 0x37, 0x96, 0x02, 0xc9, 0x5d, 0xfc, 0x68,
+ 0xc2, 0x56, 0xf7, 0x63, 0xa8, 0x3c, 0x9d, 0x09,
+ 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, 0x49, 0xdd,
+ 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25,
+ 0x3a, 0xae, 0x0f, 0x9b, 0x50, 0xc4, 0x65, 0xf1,
+ 0x5b, 0xcf, 0x6e, 0xfa, 0x31, 0xa5, 0x04, 0x90,
+ 0x8f, 0x1b, 0xba, 0x2e, 0xe5, 0x71, 0xd0, 0x44,
+ 0x99, 0x0d, 0xac, 0x38, 0xf3, 0x67, 0xc6, 0x52,
+ 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86,
+ 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7,
+ 0xf8, 0x6c, 0xcd, 0x59, 0x92, 0x06, 0xa7, 0x33,
+ 0xc1, 0x55, 0xf4, 0x60, 0xab, 0x3f, 0x9e, 0x0a,
+ 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, 0x4a, 0xde,
+ 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf,
+ 0xa0, 0x34, 0x95, 0x01, 0xca, 0x5e, 0xff, 0x6b,
+ 0xb6, 0x22, 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d,
+ 0x62, 0xf6, 0x57, 0xc3, 0x08, 0x9c, 0x3d, 0xa9,
+ 0x03, 0x97, 0x36, 0xa2, 0x69, 0xfd, 0x5c, 0xc8,
+ 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c,
+ 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4,
+ 0xfb, 0x6f, 0xce, 0x5a, 0x91, 0x05, 0xa4, 0x30,
+ 0x9a, 0x0e, 0xaf, 0x3b, 0xf0, 0x64, 0xc5, 0x51,
+ 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, 0x11, 0x85,
+ 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x07, 0x93,
+ 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47,
+ 0xed, 0x79, 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26,
+ 0x39, 0xad, 0x0c, 0x98, 0x53, 0xc7, 0x66, 0xf2,
+ },
+ {
+ 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc,
+ 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10,
+ 0xa5, 0x30, 0x92, 0x07, 0xcb, 0x5e, 0xfc, 0x69,
+ 0x79, 0xec, 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5,
+ 0x57, 0xc2, 0x60, 0xf5, 0x39, 0xac, 0x0e, 0x9b,
+ 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, 0xd2, 0x47,
+ 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x09, 0xab, 0x3e,
+ 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2,
+ 0xae, 0x3b, 0x99, 0x0c, 0xc0, 0x55, 0xf7, 0x62,
+ 0x72, 0xe7, 0x45, 0xd0, 0x1c, 0x89, 0x2b, 0xbe,
+ 0x0b, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, 0x52, 0xc7,
+ 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b,
+ 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x02, 0xa0, 0x35,
+ 0x25, 0xb0, 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9,
+ 0x5c, 0xc9, 0x6b, 0xfe, 0x32, 0xa7, 0x05, 0x90,
+ 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, 0xd9, 0x4c,
+ 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d,
+ 0x9d, 0x08, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51,
+ 0xe4, 0x71, 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28,
+ 0x38, 0xad, 0x0f, 0x9a, 0x56, 0xc3, 0x61, 0xf4,
+ 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, 0x4f, 0xda,
+ 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x06,
+ 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f,
+ 0x6f, 0xfa, 0x58, 0xcd, 0x01, 0x94, 0x36, 0xa3,
+ 0xef, 0x7a, 0xd8, 0x4d, 0x81, 0x14, 0xb6, 0x23,
+ 0x33, 0xa6, 0x04, 0x91, 0x5d, 0xc8, 0x6a, 0xff,
+ 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86,
+ 0x96, 0x03, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a,
+ 0xb8, 0x2d, 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74,
+ 0x64, 0xf1, 0x53, 0xc6, 0x0a, 0x9f, 0x3d, 0xa8,
+ 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, 0x44, 0xd1,
+ 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0x0d,
+ },
+ {
+ 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5,
+ 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01,
+ 0x95, 0x03, 0xa4, 0x32, 0xf7, 0x61, 0xc6, 0x50,
+ 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, 0x02, 0x94,
+ 0x37, 0xa1, 0x06, 0x90, 0x55, 0xc3, 0x64, 0xf2,
+ 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x07, 0xa0, 0x36,
+ 0xa2, 0x34, 0x93, 0x05, 0xc0, 0x56, 0xf1, 0x67,
+ 0x66, 0xf0, 0x57, 0xc1, 0x04, 0x92, 0x35, 0xa3,
+ 0x6e, 0xf8, 0x5f, 0xc9, 0x0c, 0x9a, 0x3d, 0xab,
+ 0xaa, 0x3c, 0x9b, 0x0d, 0xc8, 0x5e, 0xf9, 0x6f,
+ 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0x0f, 0xa8, 0x3e,
+ 0x3f, 0xa9, 0x0e, 0x98, 0x5d, 0xcb, 0x6c, 0xfa,
+ 0x59, 0xcf, 0x68, 0xfe, 0x3b, 0xad, 0x0a, 0x9c,
+ 0x9d, 0x0b, 0xac, 0x3a, 0xff, 0x69, 0xce, 0x58,
+ 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x09,
+ 0x08, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd,
+ 0xdc, 0x4a, 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19,
+ 0x18, 0x8e, 0x29, 0xbf, 0x7a, 0xec, 0x4b, 0xdd,
+ 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, 0x1a, 0x8c,
+ 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48,
+ 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e,
+ 0x2f, 0xb9, 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea,
+ 0x7e, 0xe8, 0x4f, 0xd9, 0x1c, 0x8a, 0x2d, 0xbb,
+ 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, 0xe9, 0x7f,
+ 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77,
+ 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3,
+ 0x27, 0xb1, 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2,
+ 0xe3, 0x75, 0xd2, 0x44, 0x81, 0x17, 0xb0, 0x26,
+ 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, 0xd6, 0x40,
+ 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84,
+ 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5,
+ 0xd4, 0x42, 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11,
+ },
+ {
+ 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2,
+ 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e,
+ 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47,
+ 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b,
+ 0x17, 0x80, 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5,
+ 0xdb, 0x4c, 0xe8, 0x7f, 0xbd, 0x2a, 0x8e, 0x19,
+ 0x92, 0x05, 0xa1, 0x36, 0xf4, 0x63, 0xc7, 0x50,
+ 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0x0b, 0x9c,
+ 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec,
+ 0xe2, 0x75, 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20,
+ 0xab, 0x3c, 0x98, 0x0f, 0xcd, 0x5a, 0xfe, 0x69,
+ 0x67, 0xf0, 0x54, 0xc3, 0x01, 0x96, 0x32, 0xa5,
+ 0x39, 0xae, 0x0a, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb,
+ 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x04, 0xa0, 0x37,
+ 0xbc, 0x2b, 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e,
+ 0x70, 0xe7, 0x43, 0xd4, 0x16, 0x81, 0x25, 0xb2,
+ 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, 0x09, 0x9e,
+ 0x90, 0x07, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52,
+ 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b,
+ 0x15, 0x82, 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7,
+ 0x4b, 0xdc, 0x78, 0xef, 0x2d, 0xba, 0x1e, 0x89,
+ 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, 0xd2, 0x45,
+ 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0x0c,
+ 0x02, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0,
+ 0x72, 0xe5, 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0,
+ 0xbe, 0x29, 0x8d, 0x1a, 0xd8, 0x4f, 0xeb, 0x7c,
+ 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x06, 0xa2, 0x35,
+ 0x3b, 0xac, 0x08, 0x9f, 0x5d, 0xca, 0x6e, 0xf9,
+ 0x65, 0xf2, 0x56, 0xc1, 0x03, 0x94, 0x30, 0xa7,
+ 0xa9, 0x3e, 0x9a, 0x0d, 0xcf, 0x58, 0xfc, 0x6b,
+ 0xe0, 0x77, 0xd3, 0x44, 0x86, 0x11, 0xb5, 0x22,
+ 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, 0x79, 0xee,
+ },
+ {
+ 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef,
+ 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b,
+ 0x75, 0xed, 0x58, 0xc0, 0x2f, 0xb7, 0x02, 0x9a,
+ 0xc1, 0x59, 0xec, 0x74, 0x9b, 0x03, 0xb6, 0x2e,
+ 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, 0x9d, 0x05,
+ 0x5e, 0xc6, 0x73, 0xeb, 0x04, 0x9c, 0x29, 0xb1,
+ 0x9f, 0x07, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70,
+ 0x2b, 0xb3, 0x06, 0x9e, 0x71, 0xe9, 0x5c, 0xc4,
+ 0xc9, 0x51, 0xe4, 0x7c, 0x93, 0x0b, 0xbe, 0x26,
+ 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, 0x0a, 0x92,
+ 0xbc, 0x24, 0x91, 0x09, 0xe6, 0x7e, 0xcb, 0x53,
+ 0x08, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7,
+ 0x23, 0xbb, 0x0e, 0x96, 0x79, 0xe1, 0x54, 0xcc,
+ 0x97, 0x0f, 0xba, 0x22, 0xcd, 0x55, 0xe0, 0x78,
+ 0x56, 0xce, 0x7b, 0xe3, 0x0c, 0x94, 0x21, 0xb9,
+ 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0x0d,
+ 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60,
+ 0x3b, 0xa3, 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4,
+ 0xfa, 0x62, 0xd7, 0x4f, 0xa0, 0x38, 0x8d, 0x15,
+ 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, 0x39, 0xa1,
+ 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a,
+ 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e,
+ 0x10, 0x88, 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff,
+ 0xa4, 0x3c, 0x89, 0x11, 0xfe, 0x66, 0xd3, 0x4b,
+ 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, 0x31, 0xa9,
+ 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d,
+ 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc,
+ 0x87, 0x1f, 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68,
+ 0xac, 0x34, 0x81, 0x19, 0xf6, 0x6e, 0xdb, 0x43,
+ 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, 0x6f, 0xf7,
+ 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36,
+ 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82,
+ },
+ {
+ 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8,
+ 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54,
+ 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, 0x14, 0x8d,
+ 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31,
+ 0xca, 0x53, 0xe5, 0x7c, 0x94, 0x0d, 0xbb, 0x22,
+ 0x76, 0xef, 0x59, 0xc0, 0x28, 0xb1, 0x07, 0x9e,
+ 0xaf, 0x36, 0x80, 0x19, 0xf1, 0x68, 0xde, 0x47,
+ 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, 0x62, 0xfb,
+ 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61,
+ 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd,
+ 0xec, 0x75, 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x04,
+ 0x50, 0xc9, 0x7f, 0xe6, 0x0e, 0x97, 0x21, 0xb8,
+ 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, 0x32, 0xab,
+ 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17,
+ 0x26, 0xbf, 0x09, 0x90, 0x78, 0xe1, 0x57, 0xce,
+ 0x9a, 0x03, 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72,
+ 0x0f, 0x96, 0x20, 0xb9, 0x51, 0xc8, 0x7e, 0xe7,
+ 0xb3, 0x2a, 0x9c, 0x05, 0xed, 0x74, 0xc2, 0x5b,
+ 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82,
+ 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e,
+ 0xc5, 0x5c, 0xea, 0x73, 0x9b, 0x02, 0xb4, 0x2d,
+ 0x79, 0xe0, 0x56, 0xcf, 0x27, 0xbe, 0x08, 0x91,
+ 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, 0xd1, 0x48,
+ 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4,
+ 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e,
+ 0x3a, 0xa3, 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2,
+ 0xe3, 0x7a, 0xcc, 0x55, 0xbd, 0x24, 0x92, 0x0b,
+ 0x5f, 0xc6, 0x70, 0xe9, 0x01, 0x98, 0x2e, 0xb7,
+ 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4,
+ 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18,
+ 0x29, 0xb0, 0x06, 0x9f, 0x77, 0xee, 0x58, 0xc1,
+ 0x95, 0x0c, 0xba, 0x23, 0xcb, 0x52, 0xe4, 0x7d,
+ },
+ {
+ 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1,
+ 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45,
+ 0x55, 0xcf, 0x7c, 0xe6, 0x07, 0x9d, 0x2e, 0xb4,
+ 0xf1, 0x6b, 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10,
+ 0xaa, 0x30, 0x83, 0x19, 0xf8, 0x62, 0xd1, 0x4b,
+ 0x0e, 0x94, 0x27, 0xbd, 0x5c, 0xc6, 0x75, 0xef,
+ 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e,
+ 0x5b, 0xc1, 0x72, 0xe8, 0x09, 0x93, 0x20, 0xba,
+ 0x49, 0xd3, 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8,
+ 0xed, 0x77, 0xc4, 0x5e, 0xbf, 0x25, 0x96, 0x0c,
+ 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, 0x67, 0xfd,
+ 0xb8, 0x22, 0x91, 0x0b, 0xea, 0x70, 0xc3, 0x59,
+ 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x02,
+ 0x47, 0xdd, 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6,
+ 0xb6, 0x2c, 0x9f, 0x05, 0xe4, 0x7e, 0xcd, 0x57,
+ 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, 0x69, 0xf3,
+ 0x92, 0x08, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73,
+ 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7,
+ 0xc7, 0x5d, 0xee, 0x74, 0x95, 0x0f, 0xbc, 0x26,
+ 0x63, 0xf9, 0x4a, 0xd0, 0x31, 0xab, 0x18, 0x82,
+ 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, 0x43, 0xd9,
+ 0x9c, 0x06, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d,
+ 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c,
+ 0xc9, 0x53, 0xe0, 0x7a, 0x9b, 0x01, 0xb2, 0x28,
+ 0xdb, 0x41, 0xf2, 0x68, 0x89, 0x13, 0xa0, 0x3a,
+ 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, 0x04, 0x9e,
+ 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f,
+ 0x2a, 0xb0, 0x03, 0x99, 0x78, 0xe2, 0x51, 0xcb,
+ 0x71, 0xeb, 0x58, 0xc2, 0x23, 0xb9, 0x0a, 0x90,
+ 0xd5, 0x4f, 0xfc, 0x66, 0x87, 0x1d, 0xae, 0x34,
+ 0x24, 0xbe, 0x0d, 0x97, 0x76, 0xec, 0x5f, 0xc5,
+ 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61,
+ },
+ {
+ 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6,
+ 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a,
+ 0x45, 0xde, 0x6e, 0xf5, 0x13, 0x88, 0x38, 0xa3,
+ 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, 0x94, 0x0f,
+ 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c,
+ 0x26, 0xbd, 0x0d, 0x96, 0x70, 0xeb, 0x5b, 0xc0,
+ 0xcf, 0x54, 0xe4, 0x7f, 0x99, 0x02, 0xb2, 0x29,
+ 0x63, 0xf8, 0x48, 0xd3, 0x35, 0xae, 0x1e, 0x85,
+ 0x09, 0x92, 0x22, 0xb9, 0x5f, 0xc4, 0x74, 0xef,
+ 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43,
+ 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa,
+ 0xe0, 0x7b, 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x06,
+ 0x83, 0x18, 0xa8, 0x33, 0xd5, 0x4e, 0xfe, 0x65,
+ 0x2f, 0xb4, 0x04, 0x9f, 0x79, 0xe2, 0x52, 0xc9,
+ 0xc6, 0x5d, 0xed, 0x76, 0x90, 0x0b, 0xbb, 0x20,
+ 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c,
+ 0x12, 0x89, 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4,
+ 0xbe, 0x25, 0x95, 0x0e, 0xe8, 0x73, 0xc3, 0x58,
+ 0x57, 0xcc, 0x7c, 0xe7, 0x01, 0x9a, 0x2a, 0xb1,
+ 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d,
+ 0x98, 0x03, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e,
+ 0x34, 0xaf, 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2,
+ 0xdd, 0x46, 0xf6, 0x6d, 0x8b, 0x10, 0xa0, 0x3b,
+ 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, 0x0c, 0x97,
+ 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd,
+ 0xb7, 0x2c, 0x9c, 0x07, 0xe1, 0x7a, 0xca, 0x51,
+ 0x5e, 0xc5, 0x75, 0xee, 0x08, 0x93, 0x23, 0xb8,
+ 0xf2, 0x69, 0xd9, 0x42, 0xa4, 0x3f, 0x8f, 0x14,
+ 0x91, 0x0a, 0xba, 0x21, 0xc7, 0x5c, 0xec, 0x77,
+ 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb,
+ 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32,
+ 0x78, 0xe3, 0x53, 0xc8, 0x2e, 0xb5, 0x05, 0x9e,
+ },
+ {
+ 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67,
+ 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6,
+ 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52,
+ 0x6a, 0xf6, 0x4f, 0xd3, 0x20, 0xbc, 0x05, 0x99,
+ 0xfe, 0x62, 0xdb, 0x47, 0xb4, 0x28, 0x91, 0x0d,
+ 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, 0x30, 0xac,
+ 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38,
+ 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x02, 0xbb, 0x27,
+ 0x40, 0xdc, 0x65, 0xf9, 0x0a, 0x96, 0x2f, 0xb3,
+ 0xe1, 0x7d, 0xc4, 0x58, 0xab, 0x37, 0x8e, 0x12,
+ 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, 0x1a, 0x86,
+ 0xbe, 0x22, 0x9b, 0x07, 0xf4, 0x68, 0xd1, 0x4d,
+ 0x2a, 0xb6, 0x0f, 0x93, 0x60, 0xfc, 0x45, 0xd9,
+ 0x8b, 0x17, 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78,
+ 0x1f, 0x83, 0x3a, 0xa6, 0x55, 0xc9, 0x70, 0xec,
+ 0xb5, 0x29, 0x90, 0x0c, 0xff, 0x63, 0xda, 0x46,
+ 0x21, 0xbd, 0x04, 0x98, 0x6b, 0xf7, 0x4e, 0xd2,
+ 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73,
+ 0x14, 0x88, 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7,
+ 0xdf, 0x43, 0xfa, 0x66, 0x95, 0x09, 0xb0, 0x2c,
+ 0x4b, 0xd7, 0x6e, 0xf2, 0x01, 0x9d, 0x24, 0xb8,
+ 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19,
+ 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d,
+ 0x61, 0xfd, 0x44, 0xd8, 0x2b, 0xb7, 0x0e, 0x92,
+ 0xf5, 0x69, 0xd0, 0x4c, 0xbf, 0x23, 0x9a, 0x06,
+ 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, 0x3b, 0xa7,
+ 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33,
+ 0x0b, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8,
+ 0x9f, 0x03, 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c,
+ 0x3e, 0xa2, 0x1b, 0x87, 0x74, 0xe8, 0x51, 0xcd,
+ 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, 0xc5, 0x59,
+ },
+ {
+ 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4,
+ 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68,
+ 0x25, 0xb8, 0x02, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1,
+ 0xb9, 0x24, 0x9e, 0x03, 0xf7, 0x6a, 0xd0, 0x4d,
+ 0x4a, 0xd7, 0x6d, 0xf0, 0x04, 0x99, 0x23, 0xbe,
+ 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x05, 0xbf, 0x22,
+ 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x06, 0x9b,
+ 0xf3, 0x6e, 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x07,
+ 0x94, 0x09, 0xb3, 0x2e, 0xda, 0x47, 0xfd, 0x60,
+ 0x08, 0x95, 0x2f, 0xb2, 0x46, 0xdb, 0x61, 0xfc,
+ 0xb1, 0x2c, 0x96, 0x0b, 0xff, 0x62, 0xd8, 0x45,
+ 0x2d, 0xb0, 0x0a, 0x97, 0x63, 0xfe, 0x44, 0xd9,
+ 0xde, 0x43, 0xf9, 0x64, 0x90, 0x0d, 0xb7, 0x2a,
+ 0x42, 0xdf, 0x65, 0xf8, 0x0c, 0x91, 0x2b, 0xb6,
+ 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, 0x92, 0x0f,
+ 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0x0e, 0x93,
+ 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1,
+ 0xa9, 0x34, 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d,
+ 0x10, 0x8d, 0x37, 0xaa, 0x5e, 0xc3, 0x79, 0xe4,
+ 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, 0xe5, 0x78,
+ 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b,
+ 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17,
+ 0x5a, 0xc7, 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae,
+ 0xc6, 0x5b, 0xe1, 0x7c, 0x88, 0x15, 0xaf, 0x32,
+ 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, 0xc8, 0x55,
+ 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9,
+ 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70,
+ 0x18, 0x85, 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec,
+ 0xeb, 0x76, 0xcc, 0x51, 0xa5, 0x38, 0x82, 0x1f,
+ 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, 0x1e, 0x83,
+ 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a,
+ 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6,
+ },
+ {
+ 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd,
+ 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79,
+ 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, 0x76, 0xe8,
+ 0x91, 0x0f, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c,
+ 0x2a, 0xb4, 0x0b, 0x95, 0x68, 0xf6, 0x49, 0xd7,
+ 0xae, 0x30, 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53,
+ 0x3f, 0xa1, 0x1e, 0x80, 0x7d, 0xe3, 0x5c, 0xc2,
+ 0xbb, 0x25, 0x9a, 0x04, 0xf9, 0x67, 0xd8, 0x46,
+ 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9,
+ 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0x0c, 0xb3, 0x2d,
+ 0x41, 0xdf, 0x60, 0xfe, 0x03, 0x9d, 0x22, 0xbc,
+ 0xc5, 0x5b, 0xe4, 0x7a, 0x87, 0x19, 0xa6, 0x38,
+ 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, 0x1d, 0x83,
+ 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x07,
+ 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x08, 0x96,
+ 0xef, 0x71, 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12,
+ 0xa8, 0x36, 0x89, 0x17, 0xea, 0x74, 0xcb, 0x55,
+ 0x2c, 0xb2, 0x0d, 0x93, 0x6e, 0xf0, 0x4f, 0xd1,
+ 0xbd, 0x23, 0x9c, 0x02, 0xff, 0x61, 0xde, 0x40,
+ 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4,
+ 0x82, 0x1c, 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f,
+ 0x06, 0x98, 0x27, 0xb9, 0x44, 0xda, 0x65, 0xfb,
+ 0x97, 0x09, 0xb6, 0x28, 0xd5, 0x4b, 0xf4, 0x6a,
+ 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee,
+ 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x01,
+ 0x78, 0xe6, 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85,
+ 0xe9, 0x77, 0xc8, 0x56, 0xab, 0x35, 0x8a, 0x14,
+ 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, 0x0e, 0x90,
+ 0xd6, 0x48, 0xf7, 0x69, 0x94, 0x0a, 0xb5, 0x2b,
+ 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf,
+ 0xc3, 0x5d, 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e,
+ 0x47, 0xd9, 0x66, 0xf8, 0x05, 0x9b, 0x24, 0xba,
+ },
+ {
+ 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa,
+ 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76,
+ 0x05, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff,
+ 0x89, 0x16, 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73,
+ 0x0a, 0x95, 0x29, 0xb6, 0x4c, 0xd3, 0x6f, 0xf0,
+ 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, 0xe3, 0x7c,
+ 0x0f, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5,
+ 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79,
+ 0x14, 0x8b, 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee,
+ 0x98, 0x07, 0xbb, 0x24, 0xde, 0x41, 0xfd, 0x62,
+ 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, 0x74, 0xeb,
+ 0x9d, 0x02, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67,
+ 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4,
+ 0x92, 0x0d, 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68,
+ 0x1b, 0x84, 0x38, 0xa7, 0x5d, 0xc2, 0x7e, 0xe1,
+ 0x97, 0x08, 0xb4, 0x2b, 0xd1, 0x4e, 0xf2, 0x6d,
+ 0x28, 0xb7, 0x0b, 0x94, 0x6e, 0xf1, 0x4d, 0xd2,
+ 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e,
+ 0x2d, 0xb2, 0x0e, 0x91, 0x6b, 0xf4, 0x48, 0xd7,
+ 0xa1, 0x3e, 0x82, 0x1d, 0xe7, 0x78, 0xc4, 0x5b,
+ 0x22, 0xbd, 0x01, 0x9e, 0x64, 0xfb, 0x47, 0xd8,
+ 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54,
+ 0x27, 0xb8, 0x04, 0x9b, 0x61, 0xfe, 0x42, 0xdd,
+ 0xab, 0x34, 0x88, 0x17, 0xed, 0x72, 0xce, 0x51,
+ 0x3c, 0xa3, 0x1f, 0x80, 0x7a, 0xe5, 0x59, 0xc6,
+ 0xb0, 0x2f, 0x93, 0x0c, 0xf6, 0x69, 0xd5, 0x4a,
+ 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3,
+ 0xb5, 0x2a, 0x96, 0x09, 0xf3, 0x6c, 0xd0, 0x4f,
+ 0x36, 0xa9, 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc,
+ 0xba, 0x25, 0x99, 0x06, 0xfc, 0x63, 0xdf, 0x40,
+ 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, 0x56, 0xc9,
+ 0xbf, 0x20, 0x9c, 0x03, 0xf9, 0x66, 0xda, 0x45,
+ },
+ {
+ 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47,
+ 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e,
+ 0xd2, 0x72, 0x8f, 0x2f, 0x68, 0xc8, 0x35, 0x95,
+ 0xbb, 0x1b, 0xe6, 0x46, 0x01, 0xa1, 0x5c, 0xfc,
+ 0xb9, 0x19, 0xe4, 0x44, 0x03, 0xa3, 0x5e, 0xfe,
+ 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97,
+ 0x6b, 0xcb, 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c,
+ 0x02, 0xa2, 0x5f, 0xff, 0xb8, 0x18, 0xe5, 0x45,
+ 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, 0x88, 0x28,
+ 0x06, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41,
+ 0xbd, 0x1d, 0xe0, 0x40, 0x07, 0xa7, 0x5a, 0xfa,
+ 0xd4, 0x74, 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93,
+ 0xd6, 0x76, 0x8b, 0x2b, 0x6c, 0xcc, 0x31, 0x91,
+ 0xbf, 0x1f, 0xe2, 0x42, 0x05, 0xa5, 0x58, 0xf8,
+ 0x04, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43,
+ 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a,
+ 0xde, 0x7e, 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99,
+ 0xb7, 0x17, 0xea, 0x4a, 0x0d, 0xad, 0x50, 0xf0,
+ 0x0c, 0xac, 0x51, 0xf1, 0xb6, 0x16, 0xeb, 0x4b,
+ 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22,
+ 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20,
+ 0x0e, 0xae, 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49,
+ 0xb5, 0x15, 0xe8, 0x48, 0x0f, 0xaf, 0x52, 0xf2,
+ 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, 0x3b, 0x9b,
+ 0xb1, 0x11, 0xec, 0x4c, 0x0b, 0xab, 0x56, 0xf6,
+ 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f,
+ 0x63, 0xc3, 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24,
+ 0x0a, 0xaa, 0x57, 0xf7, 0xb0, 0x10, 0xed, 0x4d,
+ 0x08, 0xa8, 0x55, 0xf5, 0xb2, 0x12, 0xef, 0x4f,
+ 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26,
+ 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d,
+ 0xb3, 0x13, 0xee, 0x4e, 0x09, 0xa9, 0x54, 0xf4,
+ },
+ {
+ 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40,
+ 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21,
+ 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82,
+ 0xa3, 0x02, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3,
+ 0x99, 0x38, 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9,
+ 0xf8, 0x59, 0xa7, 0x06, 0x46, 0xe7, 0x19, 0xb8,
+ 0x5b, 0xfa, 0x04, 0xa5, 0xe5, 0x44, 0xba, 0x1b,
+ 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a,
+ 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f,
+ 0x4e, 0xef, 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0x0e,
+ 0xed, 0x4c, 0xb2, 0x13, 0x53, 0xf2, 0x0c, 0xad,
+ 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, 0x6d, 0xcc,
+ 0xb6, 0x17, 0xe9, 0x48, 0x08, 0xa9, 0x57, 0xf6,
+ 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97,
+ 0x74, 0xd5, 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34,
+ 0x15, 0xb4, 0x4a, 0xeb, 0xab, 0x0a, 0xf4, 0x55,
+ 0x5e, 0xff, 0x01, 0xa0, 0xe0, 0x41, 0xbf, 0x1e,
+ 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f,
+ 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc,
+ 0xfd, 0x5c, 0xa2, 0x03, 0x43, 0xe2, 0x1c, 0xbd,
+ 0xc7, 0x66, 0x98, 0x39, 0x79, 0xd8, 0x26, 0x87,
+ 0xa6, 0x07, 0xf9, 0x58, 0x18, 0xb9, 0x47, 0xe6,
+ 0x05, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45,
+ 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24,
+ 0x71, 0xd0, 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31,
+ 0x10, 0xb1, 0x4f, 0xee, 0xae, 0x0f, 0xf1, 0x50,
+ 0xb3, 0x12, 0xec, 0x4d, 0x0d, 0xac, 0x52, 0xf3,
+ 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92,
+ 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x09, 0xa8,
+ 0x89, 0x28, 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9,
+ 0x2a, 0x8b, 0x75, 0xd4, 0x94, 0x35, 0xcb, 0x6a,
+ 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, 0xaa, 0x0b,
+ },
+ {
+ 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49,
+ 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30,
+ 0xf2, 0x50, 0xab, 0x09, 0x40, 0xe2, 0x19, 0xbb,
+ 0x8b, 0x29, 0xd2, 0x70, 0x39, 0x9b, 0x60, 0xc2,
+ 0xf9, 0x5b, 0xa0, 0x02, 0x4b, 0xe9, 0x12, 0xb0,
+ 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9,
+ 0x0b, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42,
+ 0x72, 0xd0, 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b,
+ 0xef, 0x4d, 0xb6, 0x14, 0x5d, 0xff, 0x04, 0xa6,
+ 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, 0x7d, 0xdf,
+ 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0x0d, 0xf6, 0x54,
+ 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d,
+ 0x16, 0xb4, 0x4f, 0xed, 0xa4, 0x06, 0xfd, 0x5f,
+ 0x6f, 0xcd, 0x36, 0x94, 0xdd, 0x7f, 0x84, 0x26,
+ 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, 0x0f, 0xad,
+ 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4,
+ 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a,
+ 0xba, 0x18, 0xe3, 0x41, 0x08, 0xaa, 0x51, 0xf3,
+ 0x31, 0x93, 0x68, 0xca, 0x83, 0x21, 0xda, 0x78,
+ 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, 0xa3, 0x01,
+ 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73,
+ 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0x0a,
+ 0xc8, 0x6a, 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81,
+ 0xb1, 0x13, 0xe8, 0x4a, 0x03, 0xa1, 0x5a, 0xf8,
+ 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, 0xc7, 0x65,
+ 0x55, 0xf7, 0x0c, 0xae, 0xe7, 0x45, 0xbe, 0x1c,
+ 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97,
+ 0xa7, 0x05, 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee,
+ 0xd5, 0x77, 0x8c, 0x2e, 0x67, 0xc5, 0x3e, 0x9c,
+ 0xac, 0x0e, 0xf5, 0x57, 0x1e, 0xbc, 0x47, 0xe5,
+ 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e,
+ 0x5e, 0xfc, 0x07, 0xa5, 0xec, 0x4e, 0xb5, 0x17,
+ },
+ {
+ 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e,
+ 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f,
+ 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, 0x0f, 0xac,
+ 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd,
+ 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97,
+ 0xa8, 0x0b, 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6,
+ 0x3b, 0x98, 0x60, 0xc3, 0x8d, 0x2e, 0xd6, 0x75,
+ 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, 0xa7, 0x04,
+ 0xaf, 0x0c, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1,
+ 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90,
+ 0x4d, 0xee, 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x03,
+ 0x3c, 0x9f, 0x67, 0xc4, 0x8a, 0x29, 0xd1, 0x72,
+ 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, 0x9b, 0x38,
+ 0x07, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49,
+ 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda,
+ 0xe5, 0x46, 0xbe, 0x1d, 0x53, 0xf0, 0x08, 0xab,
+ 0x43, 0xe0, 0x18, 0xbb, 0xf5, 0x56, 0xae, 0x0d,
+ 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, 0xdf, 0x7c,
+ 0xa1, 0x02, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef,
+ 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e,
+ 0x9a, 0x39, 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4,
+ 0xeb, 0x48, 0xb0, 0x13, 0x5d, 0xfe, 0x06, 0xa5,
+ 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, 0x95, 0x36,
+ 0x09, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47,
+ 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x01, 0xa2,
+ 0x9d, 0x3e, 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3,
+ 0x0e, 0xad, 0x55, 0xf6, 0xb8, 0x1b, 0xe3, 0x40,
+ 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, 0x92, 0x31,
+ 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b,
+ 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0x0a,
+ 0xd7, 0x74, 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99,
+ 0xa6, 0x05, 0xfd, 0x5e, 0x10, 0xb3, 0x4b, 0xe8,
+ },
+ {
+ 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b,
+ 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12,
+ 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9,
+ 0xdb, 0x7f, 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80,
+ 0x39, 0x9d, 0x6c, 0xc8, 0x93, 0x37, 0xc6, 0x62,
+ 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, 0x8f, 0x2b,
+ 0xab, 0x0f, 0xfe, 0x5a, 0x01, 0xa5, 0x54, 0xf0,
+ 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9,
+ 0x72, 0xd6, 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29,
+ 0x3b, 0x9f, 0x6e, 0xca, 0x91, 0x35, 0xc4, 0x60,
+ 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, 0x1f, 0xbb,
+ 0xa9, 0x0d, 0xfc, 0x58, 0x03, 0xa7, 0x56, 0xf2,
+ 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10,
+ 0x02, 0xa6, 0x57, 0xf3, 0xa8, 0x0c, 0xfd, 0x59,
+ 0xd9, 0x7d, 0x8c, 0x28, 0x73, 0xd7, 0x26, 0x82,
+ 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, 0x6f, 0xcb,
+ 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf,
+ 0xad, 0x09, 0xf8, 0x5c, 0x07, 0xa3, 0x52, 0xf6,
+ 0x76, 0xd2, 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d,
+ 0x3f, 0x9b, 0x6a, 0xce, 0x95, 0x31, 0xc0, 0x64,
+ 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, 0x22, 0x86,
+ 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf,
+ 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14,
+ 0x06, 0xa2, 0x53, 0xf7, 0xac, 0x08, 0xf9, 0x5d,
+ 0x96, 0x32, 0xc3, 0x67, 0x3c, 0x98, 0x69, 0xcd,
+ 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, 0x20, 0x84,
+ 0x04, 0xa0, 0x51, 0xf5, 0xae, 0x0a, 0xfb, 0x5f,
+ 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16,
+ 0xaf, 0x0b, 0xfa, 0x5e, 0x05, 0xa1, 0x50, 0xf4,
+ 0xe6, 0x42, 0xb3, 0x17, 0x4c, 0xe8, 0x19, 0xbd,
+ 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, 0xc2, 0x66,
+ 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f,
+ },
+ {
+ 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c,
+ 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d,
+ 0x82, 0x27, 0xd5, 0x70, 0x2c, 0x89, 0x7b, 0xde,
+ 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, 0x3a, 0x9f,
+ 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45,
+ 0x58, 0xfd, 0x0f, 0xaa, 0xf6, 0x53, 0xa1, 0x04,
+ 0x9b, 0x3e, 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7,
+ 0xda, 0x7f, 0x8d, 0x28, 0x74, 0xd1, 0x23, 0x86,
+ 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, 0xcb, 0x6e,
+ 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f,
+ 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec,
+ 0xf1, 0x54, 0xa6, 0x03, 0x5f, 0xfa, 0x08, 0xad,
+ 0x2b, 0x8e, 0x7c, 0xd9, 0x85, 0x20, 0xd2, 0x77,
+ 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, 0x93, 0x36,
+ 0xa9, 0x0c, 0xfe, 0x5b, 0x07, 0xa2, 0x50, 0xf5,
+ 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4,
+ 0x64, 0xc1, 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38,
+ 0x25, 0x80, 0x72, 0xd7, 0x8b, 0x2e, 0xdc, 0x79,
+ 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, 0x1f, 0xba,
+ 0xa7, 0x02, 0xf0, 0x55, 0x09, 0xac, 0x5e, 0xfb,
+ 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21,
+ 0x3c, 0x99, 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60,
+ 0xff, 0x5a, 0xa8, 0x0d, 0x51, 0xf4, 0x06, 0xa3,
+ 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, 0x47, 0xe2,
+ 0x56, 0xf3, 0x01, 0xa4, 0xf8, 0x5d, 0xaf, 0x0a,
+ 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b,
+ 0xd4, 0x71, 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88,
+ 0x95, 0x30, 0xc2, 0x67, 0x3b, 0x9e, 0x6c, 0xc9,
+ 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, 0xb6, 0x13,
+ 0x0e, 0xab, 0x59, 0xfc, 0xa0, 0x05, 0xf7, 0x52,
+ 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91,
+ 0x8c, 0x29, 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0,
+ },
+ {
+ 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c,
+ 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7,
+ 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe,
+ 0x79, 0xdf, 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c,
+ 0x20, 0x86, 0x71, 0xd7, 0x82, 0x24, 0xd3, 0x75,
+ 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, 0x38, 0x9e,
+ 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7,
+ 0xf2, 0x54, 0xa3, 0x05, 0x50, 0xf6, 0x01, 0xa7,
+ 0xab, 0x0d, 0xfa, 0x5c, 0x09, 0xaf, 0x58, 0xfe,
+ 0x40, 0xe6, 0x11, 0xb7, 0xe2, 0x44, 0xb3, 0x15,
+ 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, 0xea, 0x4c,
+ 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde,
+ 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87,
+ 0x39, 0x9f, 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c,
+ 0x60, 0xc6, 0x31, 0x97, 0xc2, 0x64, 0x93, 0x35,
+ 0xf9, 0x5f, 0xa8, 0x0e, 0x5b, 0xfd, 0x0a, 0xac,
+ 0xa0, 0x06, 0xf1, 0x57, 0x02, 0xa4, 0x53, 0xf5,
+ 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e,
+ 0x12, 0xb4, 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47,
+ 0x80, 0x26, 0xd1, 0x77, 0x22, 0x84, 0x73, 0xd5,
+ 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, 0x2a, 0x8c,
+ 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67,
+ 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e,
+ 0x0b, 0xad, 0x5a, 0xfc, 0xa9, 0x0f, 0xf8, 0x5e,
+ 0x52, 0xf4, 0x03, 0xa5, 0xf0, 0x56, 0xa1, 0x07,
+ 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, 0x4a, 0xec,
+ 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5,
+ 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27,
+ 0x2b, 0x8d, 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e,
+ 0xc0, 0x66, 0x91, 0x37, 0x62, 0xc4, 0x33, 0x95,
+ 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, 0x6a, 0xcc,
+ },
+ {
+ 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52,
+ 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03,
+ 0xa2, 0x05, 0xf1, 0x56, 0x04, 0xa3, 0x57, 0xf0,
+ 0xf3, 0x54, 0xa0, 0x07, 0x55, 0xf2, 0x06, 0xa1,
+ 0x59, 0xfe, 0x0a, 0xad, 0xff, 0x58, 0xac, 0x0b,
+ 0x08, 0xaf, 0x5b, 0xfc, 0xae, 0x09, 0xfd, 0x5a,
+ 0xfb, 0x5c, 0xa8, 0x0f, 0x5d, 0xfa, 0x0e, 0xa9,
+ 0xaa, 0x0d, 0xf9, 0x5e, 0x0c, 0xab, 0x5f, 0xf8,
+ 0xb2, 0x15, 0xe1, 0x46, 0x14, 0xb3, 0x47, 0xe0,
+ 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, 0x16, 0xb1,
+ 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42,
+ 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13,
+ 0xeb, 0x4c, 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9,
+ 0xba, 0x1d, 0xe9, 0x4e, 0x1c, 0xbb, 0x4f, 0xe8,
+ 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, 0xbc, 0x1b,
+ 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a,
+ 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b,
+ 0x28, 0x8f, 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a,
+ 0xdb, 0x7c, 0x88, 0x2f, 0x7d, 0xda, 0x2e, 0x89,
+ 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, 0x7f, 0xd8,
+ 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72,
+ 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23,
+ 0x82, 0x25, 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0,
+ 0xd3, 0x74, 0x80, 0x27, 0x75, 0xd2, 0x26, 0x81,
+ 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, 0x3e, 0x99,
+ 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8,
+ 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b,
+ 0x38, 0x9f, 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a,
+ 0x92, 0x35, 0xc1, 0x66, 0x34, 0x93, 0x67, 0xc0,
+ 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, 0x36, 0x91,
+ 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62,
+ 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33,
+ },
+ {
+ 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f,
+ 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56,
+ 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, 0x85, 0x2d,
+ 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x04,
+ 0xa4, 0x0c, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb,
+ 0x8d, 0x25, 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2,
+ 0xf6, 0x5e, 0xbb, 0x13, 0x6c, 0xc4, 0x21, 0x89,
+ 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, 0x08, 0xa0,
+ 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a,
+ 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x03,
+ 0x07, 0xaf, 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78,
+ 0x2e, 0x86, 0x63, 0xcb, 0xb4, 0x1c, 0xf9, 0x51,
+ 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, 0x26, 0x8e,
+ 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0x0f, 0xa7,
+ 0xa3, 0x0b, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc,
+ 0x8a, 0x22, 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5,
+ 0xaa, 0x02, 0xe7, 0x4f, 0x30, 0x98, 0x7d, 0xd5,
+ 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, 0x54, 0xfc,
+ 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87,
+ 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x06, 0xae,
+ 0x0e, 0xa6, 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71,
+ 0x27, 0x8f, 0x6a, 0xc2, 0xbd, 0x15, 0xf0, 0x58,
+ 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, 0x8b, 0x23,
+ 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0x0a,
+ 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80,
+ 0xd6, 0x7e, 0x9b, 0x33, 0x4c, 0xe4, 0x01, 0xa9,
+ 0xad, 0x05, 0xe0, 0x48, 0x37, 0x9f, 0x7a, 0xd2,
+ 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, 0x53, 0xfb,
+ 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24,
+ 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0x0d,
+ 0x09, 0xa1, 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76,
+ 0x20, 0x88, 0x6d, 0xc5, 0xba, 0x12, 0xf7, 0x5f,
+ },
+ {
+ 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78,
+ 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59,
+ 0x42, 0xeb, 0x0d, 0xa4, 0xdc, 0x75, 0x93, 0x3a,
+ 0x63, 0xca, 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b,
+ 0x84, 0x2d, 0xcb, 0x62, 0x1a, 0xb3, 0x55, 0xfc,
+ 0xa5, 0x0c, 0xea, 0x43, 0x3b, 0x92, 0x74, 0xdd,
+ 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe,
+ 0xe7, 0x4e, 0xa8, 0x01, 0x79, 0xd0, 0x36, 0x9f,
+ 0x15, 0xbc, 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d,
+ 0x34, 0x9d, 0x7b, 0xd2, 0xaa, 0x03, 0xe5, 0x4c,
+ 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, 0x86, 0x2f,
+ 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0x0e,
+ 0x91, 0x38, 0xde, 0x77, 0x0f, 0xa6, 0x40, 0xe9,
+ 0xb0, 0x19, 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8,
+ 0xd3, 0x7a, 0x9c, 0x35, 0x4d, 0xe4, 0x02, 0xab,
+ 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, 0x23, 0x8a,
+ 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52,
+ 0x0b, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73,
+ 0x68, 0xc1, 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10,
+ 0x49, 0xe0, 0x06, 0xaf, 0xd7, 0x7e, 0x98, 0x31,
+ 0xae, 0x07, 0xe1, 0x48, 0x30, 0x99, 0x7f, 0xd6,
+ 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7,
+ 0xec, 0x45, 0xa3, 0x0a, 0x72, 0xdb, 0x3d, 0x94,
+ 0xcd, 0x64, 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5,
+ 0x3f, 0x96, 0x70, 0xd9, 0xa1, 0x08, 0xee, 0x47,
+ 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, 0xcf, 0x66,
+ 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x05,
+ 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24,
+ 0xbb, 0x12, 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3,
+ 0x9a, 0x33, 0xd5, 0x7c, 0x04, 0xad, 0x4b, 0xe2,
+ 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, 0x28, 0x81,
+ 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x09, 0xa0,
+ },
+ {
+ 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71,
+ 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48,
+ 0x72, 0xd8, 0x3b, 0x91, 0xe0, 0x4a, 0xa9, 0x03,
+ 0x4b, 0xe1, 0x02, 0xa8, 0xd9, 0x73, 0x90, 0x3a,
+ 0xe4, 0x4e, 0xad, 0x07, 0x76, 0xdc, 0x3f, 0x95,
+ 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x06, 0xac,
+ 0x96, 0x3c, 0xdf, 0x75, 0x04, 0xae, 0x4d, 0xe7,
+ 0xaf, 0x05, 0xe6, 0x4c, 0x3d, 0x97, 0x74, 0xde,
+ 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, 0x0e, 0xa4,
+ 0xec, 0x46, 0xa5, 0x0f, 0x7e, 0xd4, 0x37, 0x9d,
+ 0xa7, 0x0d, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6,
+ 0x9e, 0x34, 0xd7, 0x7d, 0x0c, 0xa6, 0x45, 0xef,
+ 0x31, 0x9b, 0x78, 0xd2, 0xa3, 0x09, 0xea, 0x40,
+ 0x08, 0xa2, 0x41, 0xeb, 0x9a, 0x30, 0xd3, 0x79,
+ 0x43, 0xe9, 0x0a, 0xa0, 0xd1, 0x7b, 0x98, 0x32,
+ 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0x0b,
+ 0xb7, 0x1d, 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6,
+ 0x8e, 0x24, 0xc7, 0x6d, 0x1c, 0xb6, 0x55, 0xff,
+ 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, 0x1e, 0xb4,
+ 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d,
+ 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22,
+ 0x6a, 0xc0, 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b,
+ 0x21, 0x8b, 0x68, 0xc2, 0xb3, 0x19, 0xfa, 0x50,
+ 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, 0xc3, 0x69,
+ 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13,
+ 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a,
+ 0x10, 0xba, 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61,
+ 0x29, 0x83, 0x60, 0xca, 0xbb, 0x11, 0xf2, 0x58,
+ 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, 0x5d, 0xf7,
+ 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce,
+ 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85,
+ 0xcd, 0x67, 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc,
+ },
+ {
+ 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76,
+ 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47,
+ 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14,
+ 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25,
+ 0xc4, 0x6f, 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2,
+ 0xf5, 0x5e, 0xbe, 0x15, 0x63, 0xc8, 0x28, 0x83,
+ 0xa6, 0x0d, 0xed, 0x46, 0x30, 0x9b, 0x7b, 0xd0,
+ 0x97, 0x3c, 0xdc, 0x77, 0x01, 0xaa, 0x4a, 0xe1,
+ 0x95, 0x3e, 0xde, 0x75, 0x03, 0xa8, 0x48, 0xe3,
+ 0xa4, 0x0f, 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2,
+ 0xf7, 0x5c, 0xbc, 0x17, 0x61, 0xca, 0x2a, 0x81,
+ 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, 0x1b, 0xb0,
+ 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27,
+ 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16,
+ 0x33, 0x98, 0x78, 0xd3, 0xa5, 0x0e, 0xee, 0x45,
+ 0x02, 0xa9, 0x49, 0xe2, 0x94, 0x3f, 0xdf, 0x74,
+ 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0x0a, 0xea, 0x41,
+ 0x06, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70,
+ 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23,
+ 0x64, 0xcf, 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12,
+ 0xf3, 0x58, 0xb8, 0x13, 0x65, 0xce, 0x2e, 0x85,
+ 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, 0x1f, 0xb4,
+ 0x91, 0x3a, 0xda, 0x71, 0x07, 0xac, 0x4c, 0xe7,
+ 0xa0, 0x0b, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6,
+ 0xa2, 0x09, 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4,
+ 0x93, 0x38, 0xd8, 0x73, 0x05, 0xae, 0x4e, 0xe5,
+ 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, 0x1d, 0xb6,
+ 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87,
+ 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10,
+ 0x57, 0xfc, 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21,
+ 0x04, 0xaf, 0x4f, 0xe4, 0x92, 0x39, 0xd9, 0x72,
+ 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x08, 0xe8, 0x43,
+ },
+ {
+ 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63,
+ 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a,
+ 0x12, 0xbe, 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71,
+ 0x1b, 0xb7, 0x5e, 0xf2, 0x91, 0x3d, 0xd4, 0x78,
+ 0x24, 0x88, 0x61, 0xcd, 0xae, 0x02, 0xeb, 0x47,
+ 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0x0b, 0xe2, 0x4e,
+ 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55,
+ 0x3f, 0x93, 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c,
+ 0x48, 0xe4, 0x0d, 0xa1, 0xc2, 0x6e, 0x87, 0x2b,
+ 0x41, 0xed, 0x04, 0xa8, 0xcb, 0x67, 0x8e, 0x22,
+ 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39,
+ 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30,
+ 0x6c, 0xc0, 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0x0f,
+ 0x65, 0xc9, 0x20, 0x8c, 0xef, 0x43, 0xaa, 0x06,
+ 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, 0xb1, 0x1d,
+ 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14,
+ 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3,
+ 0x99, 0x35, 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa,
+ 0x82, 0x2e, 0xc7, 0x6b, 0x08, 0xa4, 0x4d, 0xe1,
+ 0x8b, 0x27, 0xce, 0x62, 0x01, 0xad, 0x44, 0xe8,
+ 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7,
+ 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde,
+ 0xa6, 0x0a, 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5,
+ 0xaf, 0x03, 0xea, 0x46, 0x25, 0x89, 0x60, 0xcc,
+ 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, 0x17, 0xbb,
+ 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2,
+ 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x05, 0xa9,
+ 0xc3, 0x6f, 0x86, 0x2a, 0x49, 0xe5, 0x0c, 0xa0,
+ 0xfc, 0x50, 0xb9, 0x15, 0x76, 0xda, 0x33, 0x9f,
+ 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, 0x3a, 0x96,
+ 0xee, 0x42, 0xab, 0x07, 0x64, 0xc8, 0x21, 0x8d,
+ 0xe7, 0x4b, 0xa2, 0x0e, 0x6d, 0xc1, 0x28, 0x84,
+ },
+ {
+ 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64,
+ 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65,
+ 0x02, 0xaf, 0x45, 0xe8, 0x8c, 0x21, 0xcb, 0x66,
+ 0x03, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67,
+ 0x04, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60,
+ 0x05, 0xa8, 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61,
+ 0x06, 0xab, 0x41, 0xec, 0x88, 0x25, 0xcf, 0x62,
+ 0x07, 0xaa, 0x40, 0xed, 0x89, 0x24, 0xce, 0x63,
+ 0x08, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c,
+ 0x09, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d,
+ 0x0a, 0xa7, 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e,
+ 0x0b, 0xa6, 0x4c, 0xe1, 0x85, 0x28, 0xc2, 0x6f,
+ 0x0c, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, 0xc5, 0x68,
+ 0x0d, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69,
+ 0x0e, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a,
+ 0x0f, 0xa2, 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b,
+ 0x10, 0xbd, 0x57, 0xfa, 0x9e, 0x33, 0xd9, 0x74,
+ 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, 0xd8, 0x75,
+ 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76,
+ 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77,
+ 0x14, 0xb9, 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70,
+ 0x15, 0xb8, 0x52, 0xff, 0x9b, 0x36, 0xdc, 0x71,
+ 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, 0xdf, 0x72,
+ 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73,
+ 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c,
+ 0x19, 0xb4, 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d,
+ 0x1a, 0xb7, 0x5d, 0xf0, 0x94, 0x39, 0xd3, 0x7e,
+ 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, 0xd2, 0x7f,
+ 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78,
+ 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79,
+ 0x1e, 0xb3, 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a,
+ 0x1f, 0xb2, 0x58, 0xf5, 0x91, 0x3c, 0xd6, 0x7b,
+ },
+ {
+ 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d,
+ 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74,
+ 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f,
+ 0x2b, 0x85, 0x6a, 0xc4, 0xa9, 0x07, 0xe8, 0x46,
+ 0x64, 0xca, 0x25, 0x8b, 0xe6, 0x48, 0xa7, 0x09,
+ 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, 0xbe, 0x10,
+ 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b,
+ 0x4f, 0xe1, 0x0e, 0xa0, 0xcd, 0x63, 0x8c, 0x22,
+ 0xc8, 0x66, 0x89, 0x27, 0x4a, 0xe4, 0x0b, 0xa5,
+ 0xd1, 0x7f, 0x90, 0x3e, 0x53, 0xfd, 0x12, 0xbc,
+ 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, 0x39, 0x97,
+ 0xe3, 0x4d, 0xa2, 0x0c, 0x61, 0xcf, 0x20, 0x8e,
+ 0xac, 0x02, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1,
+ 0xb5, 0x1b, 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8,
+ 0x9e, 0x30, 0xdf, 0x71, 0x1c, 0xb2, 0x5d, 0xf3,
+ 0x87, 0x29, 0xc6, 0x68, 0x05, 0xab, 0x44, 0xea,
+ 0x8d, 0x23, 0xcc, 0x62, 0x0f, 0xa1, 0x4e, 0xe0,
+ 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9,
+ 0xbf, 0x11, 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2,
+ 0xa6, 0x08, 0xe7, 0x49, 0x24, 0x8a, 0x65, 0xcb,
+ 0xe9, 0x47, 0xa8, 0x06, 0x6b, 0xc5, 0x2a, 0x84,
+ 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d,
+ 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6,
+ 0xc2, 0x6c, 0x83, 0x2d, 0x40, 0xee, 0x01, 0xaf,
+ 0x45, 0xeb, 0x04, 0xaa, 0xc7, 0x69, 0x86, 0x28,
+ 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, 0x9f, 0x31,
+ 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a,
+ 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x03,
+ 0x21, 0x8f, 0x60, 0xce, 0xa3, 0x0d, 0xe2, 0x4c,
+ 0x38, 0x96, 0x79, 0xd7, 0xba, 0x14, 0xfb, 0x55,
+ 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, 0xd0, 0x7e,
+ 0x0a, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67,
+ },
+ {
+ 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a,
+ 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b,
+ 0x22, 0x8d, 0x61, 0xce, 0xa4, 0x0b, 0xe7, 0x48,
+ 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, 0xf6, 0x59,
+ 0x44, 0xeb, 0x07, 0xa8, 0xc2, 0x6d, 0x81, 0x2e,
+ 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f,
+ 0x66, 0xc9, 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0x0c,
+ 0x77, 0xd8, 0x34, 0x9b, 0xf1, 0x5e, 0xb2, 0x1d,
+ 0x88, 0x27, 0xcb, 0x64, 0x0e, 0xa1, 0x4d, 0xe2,
+ 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3,
+ 0xaa, 0x05, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0,
+ 0xbb, 0x14, 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1,
+ 0xcc, 0x63, 0x8f, 0x20, 0x4a, 0xe5, 0x09, 0xa6,
+ 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, 0x18, 0xb7,
+ 0xee, 0x41, 0xad, 0x02, 0x68, 0xc7, 0x2b, 0x84,
+ 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95,
+ 0x0d, 0xa2, 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67,
+ 0x1c, 0xb3, 0x5f, 0xf0, 0x9a, 0x35, 0xd9, 0x76,
+ 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x06, 0xea, 0x45,
+ 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54,
+ 0x49, 0xe6, 0x0a, 0xa5, 0xcf, 0x60, 0x8c, 0x23,
+ 0x58, 0xf7, 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32,
+ 0x6b, 0xc4, 0x28, 0x87, 0xed, 0x42, 0xae, 0x01,
+ 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, 0xbf, 0x10,
+ 0x85, 0x2a, 0xc6, 0x69, 0x03, 0xac, 0x40, 0xef,
+ 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe,
+ 0xa7, 0x08, 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd,
+ 0xb6, 0x19, 0xf5, 0x5a, 0x30, 0x9f, 0x73, 0xdc,
+ 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, 0x04, 0xab,
+ 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba,
+ 0xe3, 0x4c, 0xa0, 0x0f, 0x65, 0xca, 0x26, 0x89,
+ 0xf2, 0x5d, 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98,
+ },
+ {
+ 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37,
+ 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde,
+ 0xcf, 0x7f, 0xb2, 0x02, 0x35, 0x85, 0x48, 0xf8,
+ 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11,
+ 0x83, 0x33, 0xfe, 0x4e, 0x79, 0xc9, 0x04, 0xb4,
+ 0x6a, 0xda, 0x17, 0xa7, 0x90, 0x20, 0xed, 0x5d,
+ 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x06, 0xcb, 0x7b,
+ 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92,
+ 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c,
+ 0xf2, 0x42, 0x8f, 0x3f, 0x08, 0xb8, 0x75, 0xc5,
+ 0xd4, 0x64, 0xa9, 0x19, 0x2e, 0x9e, 0x53, 0xe3,
+ 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, 0xba, 0x0a,
+ 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf,
+ 0x71, 0xc1, 0x0c, 0xbc, 0x8b, 0x3b, 0xf6, 0x46,
+ 0x57, 0xe7, 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60,
+ 0xbe, 0x0e, 0xc3, 0x73, 0x44, 0xf4, 0x39, 0x89,
+ 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, 0xb1, 0x01,
+ 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8,
+ 0xf9, 0x49, 0x84, 0x34, 0x03, 0xb3, 0x7e, 0xce,
+ 0x10, 0xa0, 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27,
+ 0xb5, 0x05, 0xc8, 0x78, 0x4f, 0xff, 0x32, 0x82,
+ 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, 0xdb, 0x6b,
+ 0x7a, 0xca, 0x07, 0xb7, 0x80, 0x30, 0xfd, 0x4d,
+ 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4,
+ 0x2d, 0x9d, 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a,
+ 0xc4, 0x74, 0xb9, 0x09, 0x3e, 0x8e, 0x43, 0xf3,
+ 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, 0x65, 0xd5,
+ 0x0b, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c,
+ 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99,
+ 0x47, 0xf7, 0x3a, 0x8a, 0xbd, 0x0d, 0xc0, 0x70,
+ 0x61, 0xd1, 0x1c, 0xac, 0x9b, 0x2b, 0xe6, 0x56,
+ 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, 0x0f, 0xbf,
+ },
+ {
+ 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30,
+ 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1,
+ 0xdf, 0x6e, 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef,
+ 0x3e, 0x8f, 0x41, 0xf0, 0xc0, 0x71, 0xbf, 0x0e,
+ 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, 0x22, 0x93,
+ 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0x0d, 0xc3, 0x72,
+ 0x7c, 0xcd, 0x03, 0xb2, 0x82, 0x33, 0xfd, 0x4c,
+ 0x9d, 0x2c, 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad,
+ 0x5b, 0xea, 0x24, 0x95, 0xa5, 0x14, 0xda, 0x6b,
+ 0xba, 0x0b, 0xc5, 0x74, 0x44, 0xf5, 0x3b, 0x8a,
+ 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x05, 0xb4,
+ 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55,
+ 0xf8, 0x49, 0x87, 0x36, 0x06, 0xb7, 0x79, 0xc8,
+ 0x19, 0xa8, 0x66, 0xd7, 0xe7, 0x56, 0x98, 0x29,
+ 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, 0xa6, 0x17,
+ 0xc6, 0x77, 0xb9, 0x08, 0x38, 0x89, 0x47, 0xf6,
+ 0xb6, 0x07, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86,
+ 0x57, 0xe6, 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67,
+ 0x69, 0xd8, 0x16, 0xa7, 0x97, 0x26, 0xe8, 0x59,
+ 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, 0x09, 0xb8,
+ 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25,
+ 0xf4, 0x45, 0x8b, 0x3a, 0x0a, 0xbb, 0x75, 0xc4,
+ 0xca, 0x7b, 0xb5, 0x04, 0x34, 0x85, 0x4b, 0xfa,
+ 0x2b, 0x9a, 0x54, 0xe5, 0xd5, 0x64, 0xaa, 0x1b,
+ 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, 0x6c, 0xdd,
+ 0x0c, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c,
+ 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x02,
+ 0xd3, 0x62, 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3,
+ 0x4e, 0xff, 0x31, 0x80, 0xb0, 0x01, 0xcf, 0x7e,
+ 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, 0x2e, 0x9f,
+ 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1,
+ 0x70, 0xc1, 0x0f, 0xbe, 0x8e, 0x3f, 0xf1, 0x40,
+ },
+ {
+ 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39,
+ 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0,
+ 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, 0x64, 0xd6,
+ 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f,
+ 0xc3, 0x71, 0xba, 0x08, 0x31, 0x83, 0x48, 0xfa,
+ 0x3a, 0x88, 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x03,
+ 0x2c, 0x9e, 0x55, 0xe7, 0xde, 0x6c, 0xa7, 0x15,
+ 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, 0x5e, 0xec,
+ 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2,
+ 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b,
+ 0x74, 0xc6, 0x0d, 0xbf, 0x86, 0x34, 0xff, 0x4d,
+ 0x8d, 0x3f, 0xf4, 0x46, 0x7f, 0xcd, 0x06, 0xb4,
+ 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, 0xd3, 0x61,
+ 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98,
+ 0xb7, 0x05, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e,
+ 0x4e, 0xfc, 0x37, 0x85, 0xbc, 0x0e, 0xc5, 0x77,
+ 0x2b, 0x99, 0x52, 0xe0, 0xd9, 0x6b, 0xa0, 0x12,
+ 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, 0x59, 0xeb,
+ 0xc4, 0x76, 0xbd, 0x0f, 0x36, 0x84, 0x4f, 0xfd,
+ 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x04,
+ 0xe8, 0x5a, 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1,
+ 0x11, 0xa3, 0x68, 0xda, 0xe3, 0x51, 0x9a, 0x28,
+ 0x07, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, 0x8c, 0x3e,
+ 0xfe, 0x4c, 0x87, 0x35, 0x0c, 0xbe, 0x75, 0xc7,
+ 0xb0, 0x02, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89,
+ 0x49, 0xfb, 0x30, 0x82, 0xbb, 0x09, 0xc2, 0x70,
+ 0x5f, 0xed, 0x26, 0x94, 0xad, 0x1f, 0xd4, 0x66,
+ 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, 0x2d, 0x9f,
+ 0x73, 0xc1, 0x0a, 0xb8, 0x81, 0x33, 0xf8, 0x4a,
+ 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x01, 0xb3,
+ 0x9c, 0x2e, 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5,
+ 0x65, 0xd7, 0x1c, 0xae, 0x97, 0x25, 0xee, 0x5c,
+ },
+ {
+ 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e,
+ 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf,
+ 0xff, 0x4c, 0x84, 0x37, 0x09, 0xba, 0x72, 0xc1,
+ 0x0e, 0xbd, 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30,
+ 0xe3, 0x50, 0x98, 0x2b, 0x15, 0xa6, 0x6e, 0xdd,
+ 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, 0x9f, 0x2c,
+ 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22,
+ 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3,
+ 0xdb, 0x68, 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5,
+ 0x2a, 0x99, 0x51, 0xe2, 0xdc, 0x6f, 0xa7, 0x14,
+ 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, 0xa9, 0x1a,
+ 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb,
+ 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x06,
+ 0xc9, 0x7a, 0xb2, 0x01, 0x3f, 0x8c, 0x44, 0xf7,
+ 0xc7, 0x74, 0xbc, 0x0f, 0x31, 0x82, 0x4a, 0xf9,
+ 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, 0xbb, 0x08,
+ 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95,
+ 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64,
+ 0x54, 0xe7, 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a,
+ 0xa5, 0x16, 0xde, 0x6d, 0x53, 0xe0, 0x28, 0x9b,
+ 0x48, 0xfb, 0x33, 0x80, 0xbe, 0x0d, 0xc5, 0x76,
+ 0xb9, 0x0a, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87,
+ 0xb7, 0x04, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89,
+ 0x46, 0xf5, 0x3d, 0x8e, 0xb0, 0x03, 0xcb, 0x78,
+ 0x70, 0xc3, 0x0b, 0xb8, 0x86, 0x35, 0xfd, 0x4e,
+ 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, 0x0c, 0xbf,
+ 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x02, 0xb1,
+ 0x7e, 0xcd, 0x05, 0xb6, 0x88, 0x3b, 0xf3, 0x40,
+ 0x93, 0x20, 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad,
+ 0x62, 0xd1, 0x19, 0xaa, 0x94, 0x27, 0xef, 0x5c,
+ 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, 0xe1, 0x52,
+ 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3,
+ },
+ {
+ 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b,
+ 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2,
+ 0x8f, 0x3b, 0xfa, 0x4e, 0x65, 0xd1, 0x10, 0xa4,
+ 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, 0xd9, 0x6d,
+ 0x03, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28,
+ 0xca, 0x7e, 0xbf, 0x0b, 0x20, 0x94, 0x55, 0xe1,
+ 0x8c, 0x38, 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7,
+ 0x45, 0xf1, 0x30, 0x84, 0xaf, 0x1b, 0xda, 0x6e,
+ 0x06, 0xb2, 0x73, 0xc7, 0xec, 0x58, 0x99, 0x2d,
+ 0xcf, 0x7b, 0xba, 0x0e, 0x25, 0x91, 0x50, 0xe4,
+ 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2,
+ 0x40, 0xf4, 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b,
+ 0x05, 0xb1, 0x70, 0xc4, 0xef, 0x5b, 0x9a, 0x2e,
+ 0xcc, 0x78, 0xb9, 0x0d, 0x26, 0x92, 0x53, 0xe7,
+ 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1,
+ 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68,
+ 0x0c, 0xb8, 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27,
+ 0xc5, 0x71, 0xb0, 0x04, 0x2f, 0x9b, 0x5a, 0xee,
+ 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, 0x1c, 0xa8,
+ 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61,
+ 0x0f, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24,
+ 0xc6, 0x72, 0xb3, 0x07, 0x2c, 0x98, 0x59, 0xed,
+ 0x80, 0x34, 0xf5, 0x41, 0x6a, 0xde, 0x1f, 0xab,
+ 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, 0xd6, 0x62,
+ 0x0a, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21,
+ 0xc3, 0x77, 0xb6, 0x02, 0x29, 0x9d, 0x5c, 0xe8,
+ 0x85, 0x31, 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae,
+ 0x4c, 0xf8, 0x39, 0x8d, 0xa6, 0x12, 0xd3, 0x67,
+ 0x09, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, 0x96, 0x22,
+ 0xc0, 0x74, 0xb5, 0x01, 0x2a, 0x9e, 0x5f, 0xeb,
+ 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad,
+ 0x4f, 0xfb, 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64,
+ },
+ {
+ 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c,
+ 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed,
+ 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x06, 0xb3,
+ 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x05, 0xc7, 0x72,
+ 0x23, 0x96, 0x54, 0xe1, 0xcd, 0x78, 0xba, 0x0f,
+ 0xe2, 0x57, 0x95, 0x20, 0x0c, 0xb9, 0x7b, 0xce,
+ 0xbc, 0x09, 0xcb, 0x7e, 0x52, 0xe7, 0x25, 0x90,
+ 0x7d, 0xc8, 0x0a, 0xbf, 0x93, 0x26, 0xe4, 0x51,
+ 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a,
+ 0x87, 0x32, 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab,
+ 0xd9, 0x6c, 0xae, 0x1b, 0x37, 0x82, 0x40, 0xf5,
+ 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, 0x81, 0x34,
+ 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49,
+ 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88,
+ 0xfa, 0x4f, 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6,
+ 0x3b, 0x8e, 0x4c, 0xf9, 0xd5, 0x60, 0xa2, 0x17,
+ 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, 0x15, 0xa0,
+ 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61,
+ 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f,
+ 0xd2, 0x67, 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe,
+ 0xaf, 0x1a, 0xd8, 0x6d, 0x41, 0xf4, 0x36, 0x83,
+ 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, 0xf7, 0x42,
+ 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c,
+ 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd,
+ 0xca, 0x7f, 0xbd, 0x08, 0x24, 0x91, 0x53, 0xe6,
+ 0x0b, 0xbe, 0x7c, 0xc9, 0xe5, 0x50, 0x92, 0x27,
+ 0x55, 0xe0, 0x22, 0x97, 0xbb, 0x0e, 0xcc, 0x79,
+ 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0x0d, 0xb8,
+ 0xe9, 0x5c, 0x9e, 0x2b, 0x07, 0xb2, 0x70, 0xc5,
+ 0x28, 0x9d, 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x04,
+ 0x76, 0xc3, 0x01, 0xb4, 0x98, 0x2d, 0xef, 0x5a,
+ 0xb7, 0x02, 0xc0, 0x75, 0x59, 0xec, 0x2e, 0x9b,
+ },
+ {
+ 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25,
+ 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc,
+ 0xaf, 0x19, 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a,
+ 0x76, 0xc0, 0x07, 0xb1, 0x94, 0x22, 0xe5, 0x53,
+ 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, 0xd0, 0x66,
+ 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x09, 0xbf,
+ 0xec, 0x5a, 0x9d, 0x2b, 0x0e, 0xb8, 0x7f, 0xc9,
+ 0x35, 0x83, 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10,
+ 0x86, 0x30, 0xf7, 0x41, 0x64, 0xd2, 0x15, 0xa3,
+ 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0x0b, 0xcc, 0x7a,
+ 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0x0c,
+ 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5,
+ 0xc5, 0x73, 0xb4, 0x02, 0x27, 0x91, 0x56, 0xe0,
+ 0x1c, 0xaa, 0x6d, 0xdb, 0xfe, 0x48, 0x8f, 0x39,
+ 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, 0xf9, 0x4f,
+ 0xb3, 0x05, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96,
+ 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34,
+ 0xc8, 0x7e, 0xb9, 0x0f, 0x2a, 0x9c, 0x5b, 0xed,
+ 0xbe, 0x08, 0xcf, 0x79, 0x5c, 0xea, 0x2d, 0x9b,
+ 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, 0xf4, 0x42,
+ 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x06, 0xc1, 0x77,
+ 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae,
+ 0xfd, 0x4b, 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8,
+ 0x24, 0x92, 0x55, 0xe3, 0xc6, 0x70, 0xb7, 0x01,
+ 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, 0x04, 0xb2,
+ 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b,
+ 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d,
+ 0xe1, 0x57, 0x90, 0x26, 0x03, 0xb5, 0x72, 0xc4,
+ 0xd4, 0x62, 0xa5, 0x13, 0x36, 0x80, 0x47, 0xf1,
+ 0x0d, 0xbb, 0x7c, 0xca, 0xef, 0x59, 0x9e, 0x28,
+ 0x7b, 0xcd, 0x0a, 0xbc, 0x99, 0x2f, 0xe8, 0x5e,
+ 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87,
+ },
+ {
+ 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22,
+ 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3,
+ 0xbf, 0x08, 0xcc, 0x7b, 0x59, 0xee, 0x2a, 0x9d,
+ 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c,
+ 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41,
+ 0xb2, 0x05, 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90,
+ 0xdc, 0x6b, 0xaf, 0x18, 0x3a, 0x8d, 0x49, 0xfe,
+ 0x0d, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, 0x98, 0x2f,
+ 0xc6, 0x71, 0xb5, 0x02, 0x20, 0x97, 0x53, 0xe4,
+ 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35,
+ 0x79, 0xce, 0x0a, 0xbd, 0x9f, 0x28, 0xec, 0x5b,
+ 0xa8, 0x1f, 0xdb, 0x6c, 0x4e, 0xf9, 0x3d, 0x8a,
+ 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, 0x30, 0x87,
+ 0x74, 0xc3, 0x07, 0xb0, 0x92, 0x25, 0xe1, 0x56,
+ 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38,
+ 0xcb, 0x7c, 0xb8, 0x0f, 0x2d, 0x9a, 0x5e, 0xe9,
+ 0x91, 0x26, 0xe2, 0x55, 0x77, 0xc0, 0x04, 0xb3,
+ 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, 0xd5, 0x62,
+ 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0x0c,
+ 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd,
+ 0xf2, 0x45, 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0,
+ 0x23, 0x94, 0x50, 0xe7, 0xc5, 0x72, 0xb6, 0x01,
+ 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, 0xd8, 0x6f,
+ 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x09, 0xbe,
+ 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x06, 0xc2, 0x75,
+ 0x86, 0x31, 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4,
+ 0xe8, 0x5f, 0x9b, 0x2c, 0x0e, 0xb9, 0x7d, 0xca,
+ 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, 0xac, 0x1b,
+ 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16,
+ 0xe5, 0x52, 0x96, 0x21, 0x03, 0xb4, 0x70, 0xc7,
+ 0x8b, 0x3c, 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9,
+ 0x5a, 0xed, 0x29, 0x9e, 0xbc, 0x0b, 0xcf, 0x78,
+ },
+ {
+ 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f,
+ 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6,
+ 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40,
+ 0xe6, 0x5e, 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9,
+ 0x9e, 0x26, 0xf3, 0x4b, 0x44, 0xfc, 0x29, 0x91,
+ 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, 0x80, 0x38,
+ 0xd1, 0x69, 0xbc, 0x04, 0x0b, 0xb3, 0x66, 0xde,
+ 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77,
+ 0x21, 0x99, 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e,
+ 0x88, 0x30, 0xe5, 0x5d, 0x52, 0xea, 0x3f, 0x87,
+ 0x6e, 0xd6, 0x03, 0xbb, 0xb4, 0x0c, 0xd9, 0x61,
+ 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8,
+ 0xbf, 0x07, 0xd2, 0x6a, 0x65, 0xdd, 0x08, 0xb0,
+ 0x16, 0xae, 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19,
+ 0xf0, 0x48, 0x9d, 0x25, 0x2a, 0x92, 0x47, 0xff,
+ 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, 0xee, 0x56,
+ 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d,
+ 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4,
+ 0x0d, 0xb5, 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x02,
+ 0xa4, 0x1c, 0xc9, 0x71, 0x7e, 0xc6, 0x13, 0xab,
+ 0xdc, 0x64, 0xb1, 0x09, 0x06, 0xbe, 0x6b, 0xd3,
+ 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a,
+ 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c,
+ 0x3a, 0x82, 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35,
+ 0x63, 0xdb, 0x0e, 0xb6, 0xb9, 0x01, 0xd4, 0x6c,
+ 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, 0x7d, 0xc5,
+ 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23,
+ 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a,
+ 0xfd, 0x45, 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2,
+ 0x54, 0xec, 0x39, 0x81, 0x8e, 0x36, 0xe3, 0x5b,
+ 0xb2, 0x0a, 0xdf, 0x67, 0x68, 0xd0, 0x05, 0xbd,
+ 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14,
+ },
+ {
+ 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08,
+ 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9,
+ 0x5f, 0xe6, 0x30, 0x89, 0x81, 0x38, 0xee, 0x57,
+ 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, 0x4f, 0xf6,
+ 0xbe, 0x07, 0xd1, 0x68, 0x60, 0xd9, 0x0f, 0xb6,
+ 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17,
+ 0xe1, 0x58, 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9,
+ 0x40, 0xf9, 0x2f, 0x96, 0x9e, 0x27, 0xf1, 0x48,
+ 0x61, 0xd8, 0x0e, 0xb7, 0xbf, 0x06, 0xd0, 0x69,
+ 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8,
+ 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36,
+ 0x9f, 0x26, 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97,
+ 0xdf, 0x66, 0xb0, 0x09, 0x01, 0xb8, 0x6e, 0xd7,
+ 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, 0xcf, 0x76,
+ 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88,
+ 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29,
+ 0xc2, 0x7b, 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca,
+ 0x63, 0xda, 0x0c, 0xb5, 0xbd, 0x04, 0xd2, 0x6b,
+ 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, 0x2c, 0x95,
+ 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34,
+ 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74,
+ 0xdd, 0x64, 0xb2, 0x0b, 0x03, 0xba, 0x6c, 0xd5,
+ 0x23, 0x9a, 0x4c, 0xf5, 0xfd, 0x44, 0x92, 0x2b,
+ 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, 0x33, 0x8a,
+ 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab,
+ 0x02, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0x0a,
+ 0xfc, 0x45, 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4,
+ 0x5d, 0xe4, 0x32, 0x8b, 0x83, 0x3a, 0xec, 0x55,
+ 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, 0xac, 0x15,
+ 0xbc, 0x05, 0xd3, 0x6a, 0x62, 0xdb, 0x0d, 0xb4,
+ 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a,
+ 0xe3, 0x5a, 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb,
+ },
+ {
+ 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01,
+ 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8,
+ 0x6f, 0xd5, 0x06, 0xbc, 0xbd, 0x07, 0xd4, 0x6e,
+ 0xd6, 0x6c, 0xbf, 0x05, 0x04, 0xbe, 0x6d, 0xd7,
+ 0xde, 0x64, 0xb7, 0x0d, 0x0c, 0xb6, 0x65, 0xdf,
+ 0x67, 0xdd, 0x0e, 0xb4, 0xb5, 0x0f, 0xdc, 0x66,
+ 0xb1, 0x0b, 0xd8, 0x62, 0x63, 0xd9, 0x0a, 0xb0,
+ 0x08, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x09,
+ 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0,
+ 0x18, 0xa2, 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19,
+ 0xce, 0x74, 0xa7, 0x1d, 0x1c, 0xa6, 0x75, 0xcf,
+ 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, 0xcc, 0x76,
+ 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e,
+ 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7,
+ 0x10, 0xaa, 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11,
+ 0xa9, 0x13, 0xc0, 0x7a, 0x7b, 0xc1, 0x12, 0xa8,
+ 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, 0xe4, 0x5e,
+ 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7,
+ 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31,
+ 0x89, 0x33, 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88,
+ 0x81, 0x3b, 0xe8, 0x52, 0x53, 0xe9, 0x3a, 0x80,
+ 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, 0x83, 0x39,
+ 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef,
+ 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56,
+ 0xfe, 0x44, 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff,
+ 0x47, 0xfd, 0x2e, 0x94, 0x95, 0x2f, 0xfc, 0x46,
+ 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, 0x2a, 0x90,
+ 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29,
+ 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21,
+ 0x99, 0x23, 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98,
+ 0x4f, 0xf5, 0x26, 0x9c, 0x9d, 0x27, 0xf4, 0x4e,
+ 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, 0x4d, 0xf7,
+ },
+ {
+ 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7,
+ 0x7f, 0xc4, 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79,
+ 0xce, 0x75, 0xa5, 0x1e, 0x18, 0xa3, 0x73, 0xc8,
+ 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, 0x43, 0xf8,
+ 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49,
+ 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87,
+ 0x30, 0x8b, 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36,
+ 0xe1, 0x5a, 0x8a, 0x31, 0x37, 0x8c, 0x5c, 0xe7,
+ 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, 0xed, 0x56,
+ 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98,
+ 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29,
+ 0x1f, 0xa4, 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19,
+ 0xae, 0x15, 0xc5, 0x7e, 0x78, 0xc3, 0x13, 0xa8,
+ 0x60, 0xdb, 0x0b, 0xb0, 0xb6, 0x0d, 0xdd, 0x66,
+ 0xd1, 0x6a, 0xba, 0x01, 0x07, 0xbc, 0x6c, 0xd7,
+ 0xdf, 0x64, 0xb4, 0x0f, 0x09, 0xb2, 0x62, 0xd9,
+ 0x6e, 0xd5, 0x05, 0xbe, 0xb8, 0x03, 0xd3, 0x68,
+ 0xa0, 0x1b, 0xcb, 0x70, 0x76, 0xcd, 0x1d, 0xa6,
+ 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, 0xac, 0x17,
+ 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27,
+ 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96,
+ 0x5e, 0xe5, 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58,
+ 0xef, 0x54, 0x84, 0x3f, 0x39, 0x82, 0x52, 0xe9,
+ 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, 0x83, 0x38,
+ 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89,
+ 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47,
+ 0xf0, 0x4b, 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6,
+ 0xc0, 0x7b, 0xab, 0x10, 0x16, 0xad, 0x7d, 0xc6,
+ 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, 0xcc, 0x77,
+ 0xbf, 0x04, 0xd4, 0x6f, 0x69, 0xd2, 0x02, 0xb9,
+ 0x0e, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x08,
+ },
+ {
+ 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13,
+ 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a,
+ 0x0f, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, 0xa0, 0x1c,
+ 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95,
+ 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0x0d,
+ 0x97, 0x2b, 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84,
+ 0x11, 0xad, 0x74, 0xc8, 0xdb, 0x67, 0xbe, 0x02,
+ 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, 0x37, 0x8b,
+ 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f,
+ 0xb5, 0x09, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6,
+ 0x33, 0x8f, 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20,
+ 0xba, 0x06, 0xdf, 0x63, 0x70, 0xcc, 0x15, 0xa9,
+ 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, 0x8d, 0x31,
+ 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x04, 0xb8,
+ 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e,
+ 0xa4, 0x18, 0xc1, 0x7d, 0x6e, 0xd2, 0x0b, 0xb7,
+ 0x78, 0xc4, 0x1d, 0xa1, 0xb2, 0x0e, 0xd7, 0x6b,
+ 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, 0x5e, 0xe2,
+ 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x01, 0xd8, 0x64,
+ 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed,
+ 0x66, 0xda, 0x03, 0xbf, 0xac, 0x10, 0xc9, 0x75,
+ 0xef, 0x53, 0x8a, 0x36, 0x25, 0x99, 0x40, 0xfc,
+ 0x69, 0xd5, 0x0c, 0xb0, 0xa3, 0x1f, 0xc6, 0x7a,
+ 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3,
+ 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57,
+ 0xcd, 0x71, 0xa8, 0x14, 0x07, 0xbb, 0x62, 0xde,
+ 0x4b, 0xf7, 0x2e, 0x92, 0x81, 0x3d, 0xe4, 0x58,
+ 0xc2, 0x7e, 0xa7, 0x1b, 0x08, 0xb4, 0x6d, 0xd1,
+ 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49,
+ 0xd3, 0x6f, 0xb6, 0x0a, 0x19, 0xa5, 0x7c, 0xc0,
+ 0x55, 0xe9, 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46,
+ 0xdc, 0x60, 0xb9, 0x05, 0x16, 0xaa, 0x73, 0xcf,
+ },
+ {
+ 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14,
+ 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95,
+ 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0x0b,
+ 0x9e, 0x23, 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a,
+ 0x3e, 0x83, 0x59, 0xe4, 0xf0, 0x4d, 0x97, 0x2a,
+ 0xbf, 0x02, 0xd8, 0x65, 0x71, 0xcc, 0x16, 0xab,
+ 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35,
+ 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x09, 0xb4,
+ 0x7c, 0xc1, 0x1b, 0xa6, 0xb2, 0x0f, 0xd5, 0x68,
+ 0xfd, 0x40, 0x9a, 0x27, 0x33, 0x8e, 0x54, 0xe9,
+ 0x63, 0xde, 0x04, 0xb9, 0xad, 0x10, 0xca, 0x77,
+ 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6,
+ 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56,
+ 0xc3, 0x7e, 0xa4, 0x19, 0x0d, 0xb0, 0x6a, 0xd7,
+ 0x5d, 0xe0, 0x3a, 0x87, 0x93, 0x2e, 0xf4, 0x49,
+ 0xdc, 0x61, 0xbb, 0x06, 0x12, 0xaf, 0x75, 0xc8,
+ 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec,
+ 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0x0a, 0xd0, 0x6d,
+ 0xe7, 0x5a, 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3,
+ 0x66, 0xdb, 0x01, 0xbc, 0xa8, 0x15, 0xcf, 0x72,
+ 0xc6, 0x7b, 0xa1, 0x1c, 0x08, 0xb5, 0x6f, 0xd2,
+ 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53,
+ 0xd9, 0x64, 0xbe, 0x03, 0x17, 0xaa, 0x70, 0xcd,
+ 0x58, 0xe5, 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c,
+ 0x84, 0x39, 0xe3, 0x5e, 0x4a, 0xf7, 0x2d, 0x90,
+ 0x05, 0xb8, 0x62, 0xdf, 0xcb, 0x76, 0xac, 0x11,
+ 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f,
+ 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0x0e,
+ 0xba, 0x07, 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae,
+ 0x3b, 0x86, 0x5c, 0xe1, 0xf5, 0x48, 0x92, 0x2f,
+ 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, 0x0c, 0xb1,
+ 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30,
+ },
+ {
+ 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d,
+ 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84,
+ 0x2f, 0x91, 0x4e, 0xf0, 0xed, 0x53, 0x8c, 0x32,
+ 0xb6, 0x08, 0xd7, 0x69, 0x74, 0xca, 0x15, 0xab,
+ 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43,
+ 0xc7, 0x79, 0xa6, 0x18, 0x05, 0xbb, 0x64, 0xda,
+ 0x71, 0xcf, 0x10, 0xae, 0xb3, 0x0d, 0xd2, 0x6c,
+ 0xe8, 0x56, 0x89, 0x37, 0x2a, 0x94, 0x4b, 0xf5,
+ 0xbc, 0x02, 0xdd, 0x63, 0x7e, 0xc0, 0x1f, 0xa1,
+ 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38,
+ 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e,
+ 0x0a, 0xb4, 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17,
+ 0xe2, 0x5c, 0x83, 0x3d, 0x20, 0x9e, 0x41, 0xff,
+ 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x07, 0xd8, 0x66,
+ 0xcd, 0x73, 0xac, 0x12, 0x0f, 0xb1, 0x6e, 0xd0,
+ 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49,
+ 0x65, 0xdb, 0x04, 0xba, 0xa7, 0x19, 0xc6, 0x78,
+ 0xfc, 0x42, 0x9d, 0x23, 0x3e, 0x80, 0x5f, 0xe1,
+ 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, 0xe9, 0x57,
+ 0xd3, 0x6d, 0xb2, 0x0c, 0x11, 0xaf, 0x70, 0xce,
+ 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26,
+ 0xa2, 0x1c, 0xc3, 0x7d, 0x60, 0xde, 0x01, 0xbf,
+ 0x14, 0xaa, 0x75, 0xcb, 0xd6, 0x68, 0xb7, 0x09,
+ 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, 0x2e, 0x90,
+ 0xd9, 0x67, 0xb8, 0x06, 0x1b, 0xa5, 0x7a, 0xc4,
+ 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d,
+ 0xf6, 0x48, 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb,
+ 0x6f, 0xd1, 0x0e, 0xb0, 0xad, 0x13, 0xcc, 0x72,
+ 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, 0x24, 0x9a,
+ 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x03,
+ 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0x0b, 0xb5,
+ 0x31, 0x8f, 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c,
+ },
+ {
+ 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a,
+ 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b,
+ 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25,
+ 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0x0b, 0xb4,
+ 0x7e, 0xc1, 0x1d, 0xa2, 0xb8, 0x07, 0xdb, 0x64,
+ 0xef, 0x50, 0x8c, 0x33, 0x29, 0x96, 0x4a, 0xf5,
+ 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, 0xe4, 0x5b,
+ 0xd0, 0x6f, 0xb3, 0x0c, 0x16, 0xa9, 0x75, 0xca,
+ 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6,
+ 0x6d, 0xd2, 0x0e, 0xb1, 0xab, 0x14, 0xc8, 0x77,
+ 0xc3, 0x7c, 0xa0, 0x1f, 0x05, 0xba, 0x66, 0xd9,
+ 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, 0xf7, 0x48,
+ 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98,
+ 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x09,
+ 0xbd, 0x02, 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7,
+ 0x2c, 0x93, 0x4f, 0xf0, 0xea, 0x55, 0x89, 0x36,
+ 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, 0x40, 0xff,
+ 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0x0d, 0xd1, 0x6e,
+ 0xda, 0x65, 0xb9, 0x06, 0x1c, 0xa3, 0x7f, 0xc0,
+ 0x4b, 0xf4, 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51,
+ 0x9b, 0x24, 0xf8, 0x47, 0x5d, 0xe2, 0x3e, 0x81,
+ 0x0a, 0xb5, 0x69, 0xd6, 0xcc, 0x73, 0xaf, 0x10,
+ 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x01, 0xbe,
+ 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f,
+ 0x19, 0xa6, 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x03,
+ 0x88, 0x37, 0xeb, 0x54, 0x4e, 0xf1, 0x2d, 0x92,
+ 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, 0x83, 0x3c,
+ 0xb7, 0x08, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad,
+ 0x67, 0xd8, 0x04, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d,
+ 0xf6, 0x49, 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec,
+ 0x58, 0xe7, 0x3b, 0x84, 0x9e, 0x21, 0xfd, 0x42,
+ 0xc9, 0x76, 0xaa, 0x15, 0x0f, 0xb0, 0x6c, 0xd3,
+ },
+ {
+ 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a,
+ 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34,
+ 0x9c, 0x5c, 0x01, 0xc1, 0xbb, 0x7b, 0x26, 0xe6,
+ 0xd2, 0x12, 0x4f, 0x8f, 0xf5, 0x35, 0x68, 0xa8,
+ 0x25, 0xe5, 0xb8, 0x78, 0x02, 0xc2, 0x9f, 0x5f,
+ 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11,
+ 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x03, 0xc3,
+ 0xf7, 0x37, 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d,
+ 0x4a, 0x8a, 0xd7, 0x17, 0x6d, 0xad, 0xf0, 0x30,
+ 0x04, 0xc4, 0x99, 0x59, 0x23, 0xe3, 0xbe, 0x7e,
+ 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac,
+ 0x98, 0x58, 0x05, 0xc5, 0xbf, 0x7f, 0x22, 0xe2,
+ 0x6f, 0xaf, 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15,
+ 0x21, 0xe1, 0xbc, 0x7c, 0x06, 0xc6, 0x9b, 0x5b,
+ 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, 0x49, 0x89,
+ 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x07, 0xc7,
+ 0x94, 0x54, 0x09, 0xc9, 0xb3, 0x73, 0x2e, 0xee,
+ 0xda, 0x1a, 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0,
+ 0x08, 0xc8, 0x95, 0x55, 0x2f, 0xef, 0xb2, 0x72,
+ 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, 0xfc, 0x3c,
+ 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0x0b, 0xcb,
+ 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85,
+ 0x2d, 0xed, 0xb0, 0x70, 0x0a, 0xca, 0x97, 0x57,
+ 0x63, 0xa3, 0xfe, 0x3e, 0x44, 0x84, 0xd9, 0x19,
+ 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, 0x64, 0xa4,
+ 0x90, 0x50, 0x0d, 0xcd, 0xb7, 0x77, 0x2a, 0xea,
+ 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38,
+ 0x0c, 0xcc, 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76,
+ 0xfb, 0x3b, 0x66, 0xa6, 0xdc, 0x1c, 0x41, 0x81,
+ 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, 0x0f, 0xcf,
+ 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d,
+ 0x29, 0xe9, 0xb4, 0x74, 0x0e, 0xce, 0x93, 0x53,
+ },
+ {
+ 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d,
+ 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b,
+ 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, 0x30, 0xf1,
+ 0xca, 0x0b, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7,
+ 0x05, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78,
+ 0x43, 0x82, 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e,
+ 0x89, 0x48, 0x16, 0xd7, 0xaa, 0x6b, 0x35, 0xf4,
+ 0xcf, 0x0e, 0x50, 0x91, 0xec, 0x2d, 0x73, 0xb2,
+ 0x0a, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77,
+ 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31,
+ 0x86, 0x47, 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb,
+ 0xc0, 0x01, 0x5f, 0x9e, 0xe3, 0x22, 0x7c, 0xbd,
+ 0x0f, 0xce, 0x90, 0x51, 0x2c, 0xed, 0xb3, 0x72,
+ 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34,
+ 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe,
+ 0xc5, 0x04, 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8,
+ 0x14, 0xd5, 0x8b, 0x4a, 0x37, 0xf6, 0xa8, 0x69,
+ 0x52, 0x93, 0xcd, 0x0c, 0x71, 0xb0, 0xee, 0x2f,
+ 0x98, 0x59, 0x07, 0xc6, 0xbb, 0x7a, 0x24, 0xe5,
+ 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3,
+ 0x11, 0xd0, 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c,
+ 0x57, 0x96, 0xc8, 0x09, 0x74, 0xb5, 0xeb, 0x2a,
+ 0x9d, 0x5c, 0x02, 0xc3, 0xbe, 0x7f, 0x21, 0xe0,
+ 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6,
+ 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63,
+ 0x58, 0x99, 0xc7, 0x06, 0x7b, 0xba, 0xe4, 0x25,
+ 0x92, 0x53, 0x0d, 0xcc, 0xb1, 0x70, 0x2e, 0xef,
+ 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, 0x68, 0xa9,
+ 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66,
+ 0x5d, 0x9c, 0xc2, 0x03, 0x7e, 0xbf, 0xe1, 0x20,
+ 0x97, 0x56, 0x08, 0xc9, 0xb4, 0x75, 0x2b, 0xea,
+ 0xd1, 0x10, 0x4e, 0x8f, 0xf2, 0x33, 0x6d, 0xac,
+ },
+ {
+ 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74,
+ 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a,
+ 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0x0a, 0xc8,
+ 0xe2, 0x20, 0x7b, 0xb9, 0xcd, 0x0f, 0x54, 0x96,
+ 0x65, 0xa7, 0xfc, 0x3e, 0x4a, 0x88, 0xd3, 0x11,
+ 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, 0x8d, 0x4f,
+ 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad,
+ 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3,
+ 0xca, 0x08, 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe,
+ 0x94, 0x56, 0x0d, 0xcf, 0xbb, 0x79, 0x22, 0xe0,
+ 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, 0xc0, 0x02,
+ 0x28, 0xea, 0xb1, 0x73, 0x07, 0xc5, 0x9e, 0x5c,
+ 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb,
+ 0xf1, 0x33, 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85,
+ 0x13, 0xd1, 0x8a, 0x48, 0x3c, 0xfe, 0xa5, 0x67,
+ 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, 0xfb, 0x39,
+ 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd,
+ 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3,
+ 0x35, 0xf7, 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41,
+ 0x6b, 0xa9, 0xf2, 0x30, 0x44, 0x86, 0xdd, 0x1f,
+ 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x01, 0x5a, 0x98,
+ 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x04, 0xc6,
+ 0x50, 0x92, 0xc9, 0x0b, 0x7f, 0xbd, 0xe6, 0x24,
+ 0x0e, 0xcc, 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a,
+ 0x43, 0x81, 0xda, 0x18, 0x6c, 0xae, 0xf5, 0x37,
+ 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, 0xab, 0x69,
+ 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b,
+ 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5,
+ 0x26, 0xe4, 0xbf, 0x7d, 0x09, 0xcb, 0x90, 0x52,
+ 0x78, 0xba, 0xe1, 0x23, 0x57, 0x95, 0xce, 0x0c,
+ 0x9a, 0x58, 0x03, 0xc1, 0xb5, 0x77, 0x2c, 0xee,
+ 0xc4, 0x06, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0,
+ },
+ {
+ 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73,
+ 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25,
+ 0xac, 0x6f, 0x37, 0xf4, 0x87, 0x44, 0x1c, 0xdf,
+ 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, 0x4a, 0x89,
+ 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36,
+ 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60,
+ 0xe9, 0x2a, 0x72, 0xb1, 0xc2, 0x01, 0x59, 0x9a,
+ 0xbf, 0x7c, 0x24, 0xe7, 0x94, 0x57, 0x0f, 0xcc,
+ 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, 0x3a, 0xf9,
+ 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf,
+ 0x26, 0xe5, 0xbd, 0x7e, 0x0d, 0xce, 0x96, 0x55,
+ 0x70, 0xb3, 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x03,
+ 0xcf, 0x0c, 0x54, 0x97, 0xe4, 0x27, 0x7f, 0xbc,
+ 0x99, 0x5a, 0x02, 0xc1, 0xb2, 0x71, 0x29, 0xea,
+ 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10,
+ 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46,
+ 0x09, 0xca, 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a,
+ 0x5f, 0x9c, 0xc4, 0x07, 0x74, 0xb7, 0xef, 0x2c,
+ 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, 0x15, 0xd6,
+ 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80,
+ 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f,
+ 0x1a, 0xd9, 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69,
+ 0xe0, 0x23, 0x7b, 0xb8, 0xcb, 0x08, 0x50, 0x93,
+ 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, 0x06, 0xc5,
+ 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0,
+ 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6,
+ 0x2f, 0xec, 0xb4, 0x77, 0x04, 0xc7, 0x9f, 0x5c,
+ 0x79, 0xba, 0xe2, 0x21, 0x52, 0x91, 0xc9, 0x0a,
+ 0xc6, 0x05, 0x5d, 0x9e, 0xed, 0x2e, 0x76, 0xb5,
+ 0x90, 0x53, 0x0b, 0xc8, 0xbb, 0x78, 0x20, 0xe3,
+ 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19,
+ 0x3c, 0xff, 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f,
+ },
+ {
+ 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66,
+ 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08,
+ 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba,
+ 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4,
+ 0xa5, 0x61, 0x30, 0xf4, 0x92, 0x56, 0x07, 0xc3,
+ 0xcb, 0x0f, 0x5e, 0x9a, 0xfc, 0x38, 0x69, 0xad,
+ 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, 0xdb, 0x1f,
+ 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71,
+ 0x57, 0x93, 0xc2, 0x06, 0x60, 0xa4, 0xf5, 0x31,
+ 0x39, 0xfd, 0xac, 0x68, 0x0e, 0xca, 0x9b, 0x5f,
+ 0x8b, 0x4f, 0x1e, 0xda, 0xbc, 0x78, 0x29, 0xed,
+ 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, 0x47, 0x83,
+ 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x01, 0x50, 0x94,
+ 0x9c, 0x58, 0x09, 0xcd, 0xab, 0x6f, 0x3e, 0xfa,
+ 0x2e, 0xea, 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48,
+ 0x40, 0x84, 0xd5, 0x11, 0x77, 0xb3, 0xe2, 0x26,
+ 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, 0x0c, 0xc8,
+ 0xc0, 0x04, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6,
+ 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14,
+ 0x1c, 0xd8, 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a,
+ 0x0b, 0xcf, 0x9e, 0x5a, 0x3c, 0xf8, 0xa9, 0x6d,
+ 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, 0xc7, 0x03,
+ 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1,
+ 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf,
+ 0xf9, 0x3d, 0x6c, 0xa8, 0xce, 0x0a, 0x5b, 0x9f,
+ 0x97, 0x53, 0x02, 0xc6, 0xa0, 0x64, 0x35, 0xf1,
+ 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, 0x87, 0x43,
+ 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d,
+ 0x5c, 0x98, 0xc9, 0x0d, 0x6b, 0xaf, 0xfe, 0x3a,
+ 0x32, 0xf6, 0xa7, 0x63, 0x05, 0xc1, 0x90, 0x54,
+ 0x80, 0x44, 0x15, 0xd1, 0xb7, 0x73, 0x22, 0xe6,
+ 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, 0x4c, 0x88,
+ },
+ {
+ 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61,
+ 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07,
+ 0xcc, 0x09, 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad,
+ 0xaa, 0x6f, 0x3d, 0xf8, 0x99, 0x5c, 0x0e, 0xcb,
+ 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, 0x21, 0xe4,
+ 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82,
+ 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28,
+ 0x2f, 0xea, 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e,
+ 0x17, 0xd2, 0x80, 0x45, 0x24, 0xe1, 0xb3, 0x76,
+ 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, 0xd5, 0x10,
+ 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba,
+ 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc,
+ 0x92, 0x57, 0x05, 0xc0, 0xa1, 0x64, 0x36, 0xf3,
+ 0xf4, 0x31, 0x63, 0xa6, 0xc7, 0x02, 0x50, 0x95,
+ 0x5e, 0x9b, 0xc9, 0x0c, 0x6d, 0xa8, 0xfa, 0x3f,
+ 0x38, 0xfd, 0xaf, 0x6a, 0x0b, 0xce, 0x9c, 0x59,
+ 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f,
+ 0x48, 0x8d, 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29,
+ 0xe2, 0x27, 0x75, 0xb0, 0xd1, 0x14, 0x46, 0x83,
+ 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, 0x20, 0xe5,
+ 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0x0f, 0xca,
+ 0xcd, 0x08, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac,
+ 0x67, 0xa2, 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x06,
+ 0x01, 0xc4, 0x96, 0x53, 0x32, 0xf7, 0xa5, 0x60,
+ 0x39, 0xfc, 0xae, 0x6b, 0x0a, 0xcf, 0x9d, 0x58,
+ 0x5f, 0x9a, 0xc8, 0x0d, 0x6c, 0xa9, 0xfb, 0x3e,
+ 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x03, 0x51, 0x94,
+ 0x93, 0x56, 0x04, 0xc1, 0xa0, 0x65, 0x37, 0xf2,
+ 0xbc, 0x79, 0x2b, 0xee, 0x8f, 0x4a, 0x18, 0xdd,
+ 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, 0x7e, 0xbb,
+ 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11,
+ 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77,
+ },
+ {
+ 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68,
+ 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16,
+ 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x05, 0x52, 0x94,
+ 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea,
+ 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d,
+ 0x9b, 0x5d, 0x0a, 0xcc, 0xa4, 0x62, 0x35, 0xf3,
+ 0x19, 0xdf, 0x88, 0x4e, 0x26, 0xe0, 0xb7, 0x71,
+ 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, 0xc9, 0x0f,
+ 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf,
+ 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x07, 0xc1,
+ 0x2b, 0xed, 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43,
+ 0x55, 0x93, 0xc4, 0x02, 0x6a, 0xac, 0xfb, 0x3d,
+ 0x32, 0xf4, 0xa3, 0x65, 0x0d, 0xcb, 0x9c, 0x5a,
+ 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24,
+ 0xce, 0x08, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6,
+ 0xb0, 0x76, 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8,
+ 0xb3, 0x75, 0x22, 0xe4, 0x8c, 0x4a, 0x1d, 0xdb,
+ 0xcd, 0x0b, 0x5c, 0x9a, 0xf2, 0x34, 0x63, 0xa5,
+ 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27,
+ 0x31, 0xf7, 0xa0, 0x66, 0x0e, 0xc8, 0x9f, 0x59,
+ 0x56, 0x90, 0xc7, 0x01, 0x69, 0xaf, 0xf8, 0x3e,
+ 0x28, 0xee, 0xb9, 0x7f, 0x17, 0xd1, 0x86, 0x40,
+ 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, 0x04, 0xc2,
+ 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc,
+ 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0x0c,
+ 0x1a, 0xdc, 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72,
+ 0x98, 0x5e, 0x09, 0xcf, 0xa7, 0x61, 0x36, 0xf0,
+ 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, 0x48, 0x8e,
+ 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9,
+ 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x06, 0x51, 0x97,
+ 0x7d, 0xbb, 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15,
+ 0x03, 0xc5, 0x92, 0x54, 0x3c, 0xfa, 0xad, 0x6b,
+ },
+ {
+ 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f,
+ 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19,
+ 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83,
+ 0x9a, 0x5d, 0x09, 0xce, 0xa1, 0x66, 0x32, 0xf5,
+ 0xc5, 0x02, 0x56, 0x91, 0xfe, 0x39, 0x6d, 0xaa,
+ 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, 0x1b, 0xdc,
+ 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46,
+ 0x5f, 0x98, 0xcc, 0x0b, 0x64, 0xa3, 0xf7, 0x30,
+ 0x97, 0x50, 0x04, 0xc3, 0xac, 0x6b, 0x3f, 0xf8,
+ 0xe1, 0x26, 0x72, 0xb5, 0xda, 0x1d, 0x49, 0x8e,
+ 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, 0xd3, 0x14,
+ 0x0d, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62,
+ 0x52, 0x95, 0xc1, 0x06, 0x69, 0xae, 0xfa, 0x3d,
+ 0x24, 0xe3, 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b,
+ 0xbe, 0x79, 0x2d, 0xea, 0x85, 0x42, 0x16, 0xd1,
+ 0xc8, 0x0f, 0x5b, 0x9c, 0xf3, 0x34, 0x60, 0xa7,
+ 0x33, 0xf4, 0xa0, 0x67, 0x08, 0xcf, 0x9b, 0x5c,
+ 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a,
+ 0xdf, 0x18, 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0,
+ 0xa9, 0x6e, 0x3a, 0xfd, 0x92, 0x55, 0x01, 0xc6,
+ 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0x0a, 0x5e, 0x99,
+ 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef,
+ 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75,
+ 0x6c, 0xab, 0xff, 0x38, 0x57, 0x90, 0xc4, 0x03,
+ 0xa4, 0x63, 0x37, 0xf0, 0x9f, 0x58, 0x0c, 0xcb,
+ 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, 0x7a, 0xbd,
+ 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27,
+ 0x3e, 0xf9, 0xad, 0x6a, 0x05, 0xc2, 0x96, 0x51,
+ 0x61, 0xa6, 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0x0e,
+ 0x17, 0xd0, 0x84, 0x43, 0x2c, 0xeb, 0xbf, 0x78,
+ 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, 0x25, 0xe2,
+ 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x07, 0x53, 0x94,
+ },
+ {
+ 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42,
+ 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c,
+ 0x1c, 0xd4, 0x91, 0x59, 0x1b, 0xd3, 0x96, 0x5e,
+ 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, 0x98, 0x50,
+ 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a,
+ 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74,
+ 0x24, 0xec, 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66,
+ 0x2a, 0xe2, 0xa7, 0x6f, 0x2d, 0xe5, 0xa0, 0x68,
+ 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, 0xfa, 0x32,
+ 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c,
+ 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e,
+ 0x62, 0xaa, 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20,
+ 0x48, 0x80, 0xc5, 0x0d, 0x4f, 0x87, 0xc2, 0x0a,
+ 0x46, 0x8e, 0xcb, 0x03, 0x41, 0x89, 0xcc, 0x04,
+ 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16,
+ 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18,
+ 0xe0, 0x28, 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2,
+ 0xee, 0x26, 0x63, 0xab, 0xe9, 0x21, 0x64, 0xac,
+ 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, 0x76, 0xbe,
+ 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0,
+ 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a,
+ 0xd6, 0x1e, 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94,
+ 0xc4, 0x0c, 0x49, 0x81, 0xc3, 0x0b, 0x4e, 0x86,
+ 0xca, 0x02, 0x47, 0x8f, 0xcd, 0x05, 0x40, 0x88,
+ 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2,
+ 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc,
+ 0x8c, 0x44, 0x01, 0xc9, 0x8b, 0x43, 0x06, 0xce,
+ 0x82, 0x4a, 0x0f, 0xc7, 0x85, 0x4d, 0x08, 0xc0,
+ 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, 0x22, 0xea,
+ 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4,
+ 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6,
+ 0xba, 0x72, 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8,
+ },
+ {
+ 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45,
+ 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43,
+ 0x0c, 0xc5, 0x83, 0x4a, 0x0f, 0xc6, 0x80, 0x49,
+ 0x0a, 0xc3, 0x85, 0x4c, 0x09, 0xc0, 0x86, 0x4f,
+ 0x18, 0xd1, 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d,
+ 0x1e, 0xd7, 0x91, 0x58, 0x1d, 0xd4, 0x92, 0x5b,
+ 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, 0x98, 0x51,
+ 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57,
+ 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75,
+ 0x36, 0xff, 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73,
+ 0x3c, 0xf5, 0xb3, 0x7a, 0x3f, 0xf6, 0xb0, 0x79,
+ 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, 0xb6, 0x7f,
+ 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d,
+ 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b,
+ 0x24, 0xed, 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61,
+ 0x22, 0xeb, 0xad, 0x64, 0x21, 0xe8, 0xae, 0x67,
+ 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, 0xec, 0x25,
+ 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23,
+ 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29,
+ 0x6a, 0xa3, 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f,
+ 0x78, 0xb1, 0xf7, 0x3e, 0x7b, 0xb2, 0xf4, 0x3d,
+ 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, 0xf2, 0x3b,
+ 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31,
+ 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37,
+ 0x50, 0x99, 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15,
+ 0x56, 0x9f, 0xd9, 0x10, 0x55, 0x9c, 0xda, 0x13,
+ 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, 0xd0, 0x19,
+ 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f,
+ 0x48, 0x81, 0xc7, 0x0e, 0x4b, 0x82, 0xc4, 0x0d,
+ 0x4e, 0x87, 0xc1, 0x08, 0x4d, 0x84, 0xc2, 0x0b,
+ 0x44, 0x8d, 0xcb, 0x02, 0x47, 0x8e, 0xc8, 0x01,
+ 0x42, 0x8b, 0xcd, 0x04, 0x41, 0x88, 0xce, 0x07,
+ },
+ {
+ 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c,
+ 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52,
+ 0x3c, 0xf6, 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70,
+ 0x22, 0xe8, 0xab, 0x61, 0x2d, 0xe7, 0xa4, 0x6e,
+ 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, 0xfe, 0x34,
+ 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a,
+ 0x44, 0x8e, 0xcd, 0x07, 0x4b, 0x81, 0xc2, 0x08,
+ 0x5a, 0x90, 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16,
+ 0xf0, 0x3a, 0x79, 0xb3, 0xff, 0x35, 0x76, 0xbc,
+ 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, 0x68, 0xa2,
+ 0xcc, 0x06, 0x45, 0x8f, 0xc3, 0x09, 0x4a, 0x80,
+ 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e,
+ 0x88, 0x42, 0x01, 0xcb, 0x87, 0x4d, 0x0e, 0xc4,
+ 0x96, 0x5c, 0x1f, 0xd5, 0x99, 0x53, 0x10, 0xda,
+ 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, 0x32, 0xf8,
+ 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6,
+ 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1,
+ 0xe3, 0x29, 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf,
+ 0xc1, 0x0b, 0x48, 0x82, 0xce, 0x04, 0x47, 0x8d,
+ 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, 0x59, 0x93,
+ 0x85, 0x4f, 0x0c, 0xc6, 0x8a, 0x40, 0x03, 0xc9,
+ 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7,
+ 0xb9, 0x73, 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5,
+ 0xa7, 0x6d, 0x2e, 0xe4, 0xa8, 0x62, 0x21, 0xeb,
+ 0x0d, 0xc7, 0x84, 0x4e, 0x02, 0xc8, 0x8b, 0x41,
+ 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f,
+ 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d,
+ 0x2f, 0xe5, 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63,
+ 0x75, 0xbf, 0xfc, 0x36, 0x7a, 0xb0, 0xf3, 0x39,
+ 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, 0xed, 0x27,
+ 0x49, 0x83, 0xc0, 0x0a, 0x46, 0x8c, 0xcf, 0x05,
+ 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b,
+ },
+ {
+ 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b,
+ 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d,
+ 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, 0xac, 0x67,
+ 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71,
+ 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13,
+ 0x4e, 0x85, 0xc5, 0x0e, 0x45, 0x8e, 0xce, 0x05,
+ 0x74, 0xbf, 0xff, 0x34, 0x7f, 0xb4, 0xf4, 0x3f,
+ 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, 0xe2, 0x29,
+ 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb,
+ 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed,
+ 0x9c, 0x57, 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7,
+ 0x8a, 0x41, 0x01, 0xca, 0x81, 0x4a, 0x0a, 0xc1,
+ 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, 0x68, 0xa3,
+ 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5,
+ 0xc4, 0x0f, 0x4f, 0x84, 0xcf, 0x04, 0x44, 0x8f,
+ 0xd2, 0x19, 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99,
+ 0x7d, 0xb6, 0xf6, 0x3d, 0x76, 0xbd, 0xfd, 0x36,
+ 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, 0xeb, 0x20,
+ 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a,
+ 0x47, 0x8c, 0xcc, 0x07, 0x4c, 0x87, 0xc7, 0x0c,
+ 0x25, 0xee, 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e,
+ 0x33, 0xf8, 0xb8, 0x73, 0x38, 0xf3, 0xb3, 0x78,
+ 0x09, 0xc2, 0x82, 0x49, 0x02, 0xc9, 0x89, 0x42,
+ 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54,
+ 0xcd, 0x06, 0x46, 0x8d, 0xc6, 0x0d, 0x4d, 0x86,
+ 0xdb, 0x10, 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90,
+ 0xe1, 0x2a, 0x6a, 0xa1, 0xea, 0x21, 0x61, 0xaa,
+ 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, 0x77, 0xbc,
+ 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde,
+ 0x83, 0x48, 0x08, 0xc3, 0x88, 0x43, 0x03, 0xc8,
+ 0xb9, 0x72, 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2,
+ 0xaf, 0x64, 0x24, 0xef, 0xa4, 0x6f, 0x2f, 0xe4,
+ },
+ {
+ 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e,
+ 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70,
+ 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x02,
+ 0x72, 0xbe, 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c,
+ 0xb8, 0x74, 0x3d, 0xf1, 0xaf, 0x63, 0x2a, 0xe6,
+ 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, 0x04, 0xc8,
+ 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba,
+ 0xca, 0x06, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94,
+ 0x6d, 0xa1, 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33,
+ 0x43, 0x8f, 0xc6, 0x0a, 0x54, 0x98, 0xd1, 0x1d,
+ 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, 0xa3, 0x6f,
+ 0x1f, 0xd3, 0x9a, 0x56, 0x08, 0xc4, 0x8d, 0x41,
+ 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0x0e, 0x47, 0x8b,
+ 0xfb, 0x37, 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5,
+ 0x89, 0x45, 0x0c, 0xc0, 0x9e, 0x52, 0x1b, 0xd7,
+ 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, 0x35, 0xf9,
+ 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x01, 0x48, 0x84,
+ 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa,
+ 0x86, 0x4a, 0x03, 0xcf, 0x91, 0x5d, 0x14, 0xd8,
+ 0xa8, 0x64, 0x2d, 0xe1, 0xbf, 0x73, 0x3a, 0xf6,
+ 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, 0xf0, 0x3c,
+ 0x4c, 0x80, 0xc9, 0x05, 0x5b, 0x97, 0xde, 0x12,
+ 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60,
+ 0x10, 0xdc, 0x95, 0x59, 0x07, 0xcb, 0x82, 0x4e,
+ 0xb7, 0x7b, 0x32, 0xfe, 0xa0, 0x6c, 0x25, 0xe9,
+ 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, 0x0b, 0xc7,
+ 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5,
+ 0xc5, 0x09, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b,
+ 0x0f, 0xc3, 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51,
+ 0x21, 0xed, 0xa4, 0x68, 0x36, 0xfa, 0xb3, 0x7f,
+ 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, 0xc1, 0x0d,
+ 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23,
+ },
+ {
+ 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59,
+ 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f,
+ 0x4c, 0x81, 0xcb, 0x06, 0x5f, 0x92, 0xd8, 0x15,
+ 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, 0xfe, 0x33,
+ 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0x0c, 0xc1,
+ 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7,
+ 0xd4, 0x19, 0x53, 0x9e, 0xc7, 0x0a, 0x40, 0x8d,
+ 0xf2, 0x3f, 0x75, 0xb8, 0xe1, 0x2c, 0x66, 0xab,
+ 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, 0xb9, 0x74,
+ 0x0b, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52,
+ 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38,
+ 0x47, 0x8a, 0xc0, 0x0d, 0x54, 0x99, 0xd3, 0x1e,
+ 0xb5, 0x78, 0x32, 0xff, 0xa6, 0x6b, 0x21, 0xec,
+ 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, 0x07, 0xca,
+ 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0,
+ 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x01, 0x4b, 0x86,
+ 0x5a, 0x97, 0xdd, 0x10, 0x49, 0x84, 0xce, 0x03,
+ 0x7c, 0xb1, 0xfb, 0x36, 0x6f, 0xa2, 0xe8, 0x25,
+ 0x16, 0xdb, 0x91, 0x5c, 0x05, 0xc8, 0x82, 0x4f,
+ 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69,
+ 0xc2, 0x0f, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b,
+ 0xe4, 0x29, 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd,
+ 0x8e, 0x43, 0x09, 0xc4, 0x9d, 0x50, 0x1a, 0xd7,
+ 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, 0x3c, 0xf1,
+ 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e,
+ 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x08,
+ 0x3b, 0xf6, 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62,
+ 0x1d, 0xd0, 0x9a, 0x57, 0x0e, 0xc3, 0x89, 0x44,
+ 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, 0x7b, 0xb6,
+ 0xc9, 0x04, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90,
+ 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa,
+ 0x85, 0x48, 0x02, 0xcf, 0x96, 0x5b, 0x11, 0xdc,
+ },
+ {
+ 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50,
+ 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e,
+ 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c,
+ 0x42, 0x8c, 0xc3, 0x0d, 0x5d, 0x93, 0xdc, 0x12,
+ 0xf8, 0x36, 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8,
+ 0xc6, 0x08, 0x47, 0x89, 0xd9, 0x17, 0x58, 0x96,
+ 0x84, 0x4a, 0x05, 0xcb, 0x9b, 0x55, 0x1a, 0xd4,
+ 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea,
+ 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd,
+ 0xd3, 0x1d, 0x52, 0x9c, 0xcc, 0x02, 0x4d, 0x83,
+ 0x91, 0x5f, 0x10, 0xde, 0x8e, 0x40, 0x0f, 0xc1,
+ 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, 0x31, 0xff,
+ 0x15, 0xdb, 0x94, 0x5a, 0x0a, 0xc4, 0x8b, 0x45,
+ 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b,
+ 0x69, 0xa7, 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39,
+ 0x57, 0x99, 0xd6, 0x18, 0x48, 0x86, 0xc9, 0x07,
+ 0xc7, 0x09, 0x46, 0x88, 0xd8, 0x16, 0x59, 0x97,
+ 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9,
+ 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb,
+ 0x85, 0x4b, 0x04, 0xca, 0x9a, 0x54, 0x1b, 0xd5,
+ 0x3f, 0xf1, 0xbe, 0x70, 0x20, 0xee, 0xa1, 0x6f,
+ 0x01, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, 0x9f, 0x51,
+ 0x43, 0x8d, 0xc2, 0x0c, 0x5c, 0x92, 0xdd, 0x13,
+ 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d,
+ 0x2a, 0xe4, 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a,
+ 0x14, 0xda, 0x95, 0x5b, 0x0b, 0xc5, 0x8a, 0x44,
+ 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, 0xc8, 0x06,
+ 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38,
+ 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x03, 0x4c, 0x82,
+ 0xec, 0x22, 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc,
+ 0xae, 0x60, 0x2f, 0xe1, 0xb1, 0x7f, 0x30, 0xfe,
+ 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, 0x0e, 0xc0,
+ },
+ {
+ 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61,
+ 0x6c, 0xa3, 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b,
+ 0x5a, 0x95, 0xd9, 0x16, 0x41, 0x8e, 0xc2, 0x0d,
+ 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0x0c, 0x40, 0x8f,
+ 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9,
+ 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3,
+ 0x82, 0x4d, 0x01, 0xce, 0x99, 0x56, 0x1a, 0xd5,
+ 0xad, 0x62, 0x2e, 0xe1, 0xb6, 0x79, 0x35, 0xfa,
+ 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, 0x03, 0xcc,
+ 0xc1, 0x0e, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96,
+ 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0,
+ 0x75, 0xba, 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22,
+ 0x43, 0x8c, 0xc0, 0x0f, 0x58, 0x97, 0xdb, 0x14,
+ 0x19, 0xd6, 0x9a, 0x55, 0x02, 0xcd, 0x81, 0x4e,
+ 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78,
+ 0x47, 0x88, 0xc4, 0x0b, 0x5c, 0x93, 0xdf, 0x10,
+ 0x71, 0xbe, 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26,
+ 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c,
+ 0x1d, 0xd2, 0x9e, 0x51, 0x06, 0xc9, 0x85, 0x4a,
+ 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x07, 0xc8,
+ 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe,
+ 0xf3, 0x3c, 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4,
+ 0xc5, 0x0a, 0x46, 0x89, 0xde, 0x11, 0x5d, 0x92,
+ 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, 0x72, 0xbd,
+ 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x08, 0x44, 0x8b,
+ 0x86, 0x49, 0x05, 0xca, 0x9d, 0x52, 0x1e, 0xd1,
+ 0xb0, 0x7f, 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7,
+ 0x32, 0xfd, 0xb1, 0x7e, 0x29, 0xe6, 0xaa, 0x65,
+ 0x04, 0xcb, 0x87, 0x48, 0x1f, 0xd0, 0x9c, 0x53,
+ 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x09,
+ 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f,
+ },
+ {
+ 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a,
+ 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4,
+ 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, 0x5b, 0x8b,
+ 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45,
+ 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15,
+ 0xd1, 0x01, 0x6c, 0xbc, 0xb6, 0x66, 0x0b, 0xdb,
+ 0x9e, 0x4e, 0x23, 0xf3, 0xf9, 0x29, 0x44, 0x94,
+ 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, 0x8a, 0x5a,
+ 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34,
+ 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa,
+ 0xbf, 0x6f, 0x02, 0xd2, 0xd8, 0x08, 0x65, 0xb5,
+ 0x71, 0xa1, 0xcc, 0x1c, 0x16, 0xc6, 0xab, 0x7b,
+ 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, 0xfb, 0x2b,
+ 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5,
+ 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa,
+ 0x6e, 0xbe, 0xd3, 0x03, 0x09, 0xd9, 0xb4, 0x64,
+ 0x7c, 0xac, 0xc1, 0x11, 0x1b, 0xcb, 0xa6, 0x76,
+ 0xb2, 0x62, 0x0f, 0xdf, 0xd5, 0x05, 0x68, 0xb8,
+ 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7,
+ 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39,
+ 0x63, 0xb3, 0xde, 0x0e, 0x04, 0xd4, 0xb9, 0x69,
+ 0xad, 0x7d, 0x10, 0xc0, 0xca, 0x1a, 0x77, 0xa7,
+ 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, 0x38, 0xe8,
+ 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26,
+ 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48,
+ 0x8c, 0x5c, 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86,
+ 0xc3, 0x13, 0x7e, 0xae, 0xa4, 0x74, 0x19, 0xc9,
+ 0x0d, 0xdd, 0xb0, 0x60, 0x6a, 0xba, 0xd7, 0x07,
+ 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57,
+ 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99,
+ 0xdc, 0x0c, 0x61, 0xb1, 0xbb, 0x6b, 0x06, 0xd6,
+ 0x12, 0xc2, 0xaf, 0x7f, 0x75, 0xa5, 0xc8, 0x18,
+ },
+ {
+ 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d,
+ 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb,
+ 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c,
+ 0x57, 0x86, 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a,
+ 0x3f, 0xee, 0x80, 0x51, 0x5c, 0x8d, 0xe3, 0x32,
+ 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, 0x25, 0xf4,
+ 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3,
+ 0x68, 0xb9, 0xd7, 0x06, 0x0b, 0xda, 0xb4, 0x65,
+ 0x7e, 0xaf, 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73,
+ 0xb8, 0x69, 0x07, 0xd6, 0xdb, 0x0a, 0x64, 0xb5,
+ 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, 0x33, 0xe2,
+ 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24,
+ 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c,
+ 0x87, 0x56, 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a,
+ 0xd0, 0x01, 0x6f, 0xbe, 0xb3, 0x62, 0x0c, 0xdd,
+ 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, 0xca, 0x1b,
+ 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1,
+ 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37,
+ 0x6d, 0xbc, 0xd2, 0x03, 0x0e, 0xdf, 0xb1, 0x60,
+ 0xab, 0x7a, 0x14, 0xc5, 0xc8, 0x19, 0x77, 0xa6,
+ 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, 0x1f, 0xce,
+ 0x05, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x08,
+ 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f,
+ 0x94, 0x45, 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99,
+ 0x82, 0x53, 0x3d, 0xec, 0xe1, 0x30, 0x5e, 0x8f,
+ 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, 0x98, 0x49,
+ 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e,
+ 0xd5, 0x04, 0x6a, 0xbb, 0xb6, 0x67, 0x09, 0xd8,
+ 0xbd, 0x6c, 0x02, 0xd3, 0xde, 0x0f, 0x61, 0xb0,
+ 0x7b, 0xaa, 0xc4, 0x15, 0x18, 0xc9, 0xa7, 0x76,
+ 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, 0xf0, 0x21,
+ 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7,
+ },
+ {
+ 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda,
+ 0xa1, 0x73, 0x18, 0xca, 0xce, 0x1c, 0x77, 0xa5,
+ 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, 0xa9, 0x7b,
+ 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b,
+ 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85,
+ 0xfe, 0x2c, 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa,
+ 0x20, 0xf2, 0x99, 0x4b, 0x4f, 0x9d, 0xf6, 0x24,
+ 0xbe, 0x6c, 0x07, 0xd5, 0xd1, 0x03, 0x68, 0xba,
+ 0x60, 0xb2, 0xd9, 0x0b, 0x0f, 0xdd, 0xb6, 0x64,
+ 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b,
+ 0xc1, 0x13, 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5,
+ 0xe1, 0x33, 0x58, 0x8a, 0x8e, 0x5c, 0x37, 0xe5,
+ 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, 0xe9, 0x3b,
+ 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44,
+ 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a,
+ 0x61, 0xb3, 0xd8, 0x0a, 0x0e, 0xdc, 0xb7, 0x65,
+ 0xbf, 0x6d, 0x06, 0xd4, 0xd0, 0x02, 0x69, 0xbb,
+ 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, 0x16, 0xc4,
+ 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a,
+ 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a,
+ 0xe0, 0x32, 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4,
+ 0x9f, 0x4d, 0x26, 0xf4, 0xf0, 0x22, 0x49, 0x9b,
+ 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, 0x97, 0x45,
+ 0xdf, 0x0d, 0x66, 0xb4, 0xb0, 0x62, 0x09, 0xdb,
+ 0x01, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x05,
+ 0x7e, 0xac, 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a,
+ 0xa0, 0x72, 0x19, 0xcb, 0xcf, 0x1d, 0x76, 0xa4,
+ 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, 0x56, 0x84,
+ 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a,
+ 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25,
+ 0xff, 0x2d, 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb,
+ },
+ {
+ 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03,
+ 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5,
+ 0xb1, 0x62, 0x0a, 0xd9, 0xda, 0x09, 0x61, 0xb2,
+ 0x67, 0xb4, 0xdc, 0x0f, 0x0c, 0xdf, 0xb7, 0x64,
+ 0x7f, 0xac, 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c,
+ 0xa9, 0x7a, 0x12, 0xc1, 0xc2, 0x11, 0x79, 0xaa,
+ 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, 0x1e, 0xcd,
+ 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b,
+ 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd,
+ 0x28, 0xfb, 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b,
+ 0x4f, 0x9c, 0xf4, 0x27, 0x24, 0xf7, 0x9f, 0x4c,
+ 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, 0x49, 0x9a,
+ 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82,
+ 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54,
+ 0x30, 0xe3, 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33,
+ 0xe6, 0x35, 0x5d, 0x8e, 0x8d, 0x5e, 0x36, 0xe5,
+ 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, 0x31, 0xe2,
+ 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34,
+ 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53,
+ 0x86, 0x55, 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85,
+ 0x9e, 0x4d, 0x25, 0xf6, 0xf5, 0x26, 0x4e, 0x9d,
+ 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, 0x98, 0x4b,
+ 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c,
+ 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa,
+ 0x1f, 0xcc, 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c,
+ 0xc9, 0x1a, 0x72, 0xa1, 0xa2, 0x71, 0x19, 0xca,
+ 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, 0x7e, 0xad,
+ 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b,
+ 0x60, 0xb3, 0xdb, 0x08, 0x0b, 0xd8, 0xb0, 0x63,
+ 0xb6, 0x65, 0x0d, 0xde, 0xdd, 0x0e, 0x66, 0xb5,
+ 0xd1, 0x02, 0x6a, 0xb9, 0xba, 0x69, 0x01, 0xd2,
+ 0x07, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, 0xd7, 0x04,
+ },
+ {
+ 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16,
+ 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8,
+ 0xc1, 0x15, 0x74, 0xa0, 0xb6, 0x62, 0x03, 0xd7,
+ 0x2f, 0xfb, 0x9a, 0x4e, 0x58, 0x8c, 0xed, 0x39,
+ 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, 0x5d, 0x89,
+ 0x71, 0xa5, 0xc4, 0x10, 0x06, 0xd2, 0xb3, 0x67,
+ 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48,
+ 0xb0, 0x64, 0x05, 0xd1, 0xc7, 0x13, 0x72, 0xa6,
+ 0x23, 0xf7, 0x96, 0x42, 0x54, 0x80, 0xe1, 0x35,
+ 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, 0x0f, 0xdb,
+ 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4,
+ 0x0c, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a,
+ 0xbc, 0x68, 0x09, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa,
+ 0x52, 0x86, 0xe7, 0x33, 0x25, 0xf1, 0x90, 0x44,
+ 0x7d, 0xa9, 0xc8, 0x1c, 0x0a, 0xde, 0xbf, 0x6b,
+ 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85,
+ 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50,
+ 0xa8, 0x7c, 0x1d, 0xc9, 0xdf, 0x0b, 0x6a, 0xbe,
+ 0x87, 0x53, 0x32, 0xe6, 0xf0, 0x24, 0x45, 0x91,
+ 0x69, 0xbd, 0xdc, 0x08, 0x1e, 0xca, 0xab, 0x7f,
+ 0xd9, 0x0d, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf,
+ 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21,
+ 0x18, 0xcc, 0xad, 0x79, 0x6f, 0xbb, 0xda, 0x0e,
+ 0xf6, 0x22, 0x43, 0x97, 0x81, 0x55, 0x34, 0xe0,
+ 0x65, 0xb1, 0xd0, 0x04, 0x12, 0xc6, 0xa7, 0x73,
+ 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d,
+ 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x07, 0x66, 0xb2,
+ 0x4a, 0x9e, 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c,
+ 0xfa, 0x2e, 0x4f, 0x9b, 0x8d, 0x59, 0x38, 0xec,
+ 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, 0xd6, 0x02,
+ 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d,
+ 0xd5, 0x01, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3,
+ },
+ {
+ 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11,
+ 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7,
+ 0xd1, 0x04, 0x66, 0xb3, 0xa2, 0x77, 0x15, 0xc0,
+ 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26,
+ 0xbf, 0x6a, 0x08, 0xdd, 0xcc, 0x19, 0x7b, 0xae,
+ 0x59, 0x8c, 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48,
+ 0x6e, 0xbb, 0xd9, 0x0c, 0x1d, 0xc8, 0xaa, 0x7f,
+ 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, 0x4c, 0x99,
+ 0x63, 0xb6, 0xd4, 0x01, 0x10, 0xc5, 0xa7, 0x72,
+ 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94,
+ 0xb2, 0x67, 0x05, 0xd0, 0xc1, 0x14, 0x76, 0xa3,
+ 0x54, 0x81, 0xe3, 0x36, 0x27, 0xf2, 0x90, 0x45,
+ 0xdc, 0x09, 0x6b, 0xbe, 0xaf, 0x7a, 0x18, 0xcd,
+ 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b,
+ 0x0d, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c,
+ 0xeb, 0x3e, 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa,
+ 0xc6, 0x13, 0x71, 0xa4, 0xb5, 0x60, 0x02, 0xd7,
+ 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, 0xe4, 0x31,
+ 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x06,
+ 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0,
+ 0x79, 0xac, 0xce, 0x1b, 0x0a, 0xdf, 0xbd, 0x68,
+ 0x9f, 0x4a, 0x28, 0xfd, 0xec, 0x39, 0x5b, 0x8e,
+ 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0x0e, 0x6c, 0xb9,
+ 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f,
+ 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x03, 0x61, 0xb4,
+ 0x43, 0x96, 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52,
+ 0x74, 0xa1, 0xc3, 0x16, 0x07, 0xd2, 0xb0, 0x65,
+ 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, 0x56, 0x83,
+ 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0x0b,
+ 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed,
+ 0xcb, 0x1e, 0x7c, 0xa9, 0xb8, 0x6d, 0x0f, 0xda,
+ 0x2d, 0xf8, 0x9a, 0x4f, 0x5e, 0x8b, 0xe9, 0x3c,
+ },
+ {
+ 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18,
+ 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6,
+ 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9,
+ 0x1f, 0xc9, 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x07,
+ 0xdf, 0x09, 0x6e, 0xb8, 0xa0, 0x76, 0x11, 0xc7,
+ 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, 0xef, 0x39,
+ 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26,
+ 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0x0e, 0xd8,
+ 0xa3, 0x75, 0x12, 0xc4, 0xdc, 0x0a, 0x6d, 0xbb,
+ 0x5d, 0x8b, 0xec, 0x3a, 0x22, 0xf4, 0x93, 0x45,
+ 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, 0x8c, 0x5a,
+ 0xbc, 0x6a, 0x0d, 0xdb, 0xc3, 0x15, 0x72, 0xa4,
+ 0x7c, 0xaa, 0xcd, 0x1b, 0x03, 0xd5, 0xb2, 0x64,
+ 0x82, 0x54, 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a,
+ 0x9d, 0x4b, 0x2c, 0xfa, 0xe2, 0x34, 0x53, 0x85,
+ 0x63, 0xb5, 0xd2, 0x04, 0x1c, 0xca, 0xad, 0x7b,
+ 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43,
+ 0xa5, 0x73, 0x14, 0xc2, 0xda, 0x0c, 0x6b, 0xbd,
+ 0xba, 0x6c, 0x0b, 0xdd, 0xc5, 0x13, 0x74, 0xa2,
+ 0x44, 0x92, 0xf5, 0x23, 0x3b, 0xed, 0x8a, 0x5c,
+ 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, 0x4a, 0x9c,
+ 0x7a, 0xac, 0xcb, 0x1d, 0x05, 0xd3, 0xb4, 0x62,
+ 0x65, 0xb3, 0xd4, 0x02, 0x1a, 0xcc, 0xab, 0x7d,
+ 0x9b, 0x4d, 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83,
+ 0xf8, 0x2e, 0x49, 0x9f, 0x87, 0x51, 0x36, 0xe0,
+ 0x06, 0xd0, 0xb7, 0x61, 0x79, 0xaf, 0xc8, 0x1e,
+ 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x01,
+ 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff,
+ 0x27, 0xf1, 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f,
+ 0xd9, 0x0f, 0x68, 0xbe, 0xa6, 0x70, 0x17, 0xc1,
+ 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, 0x08, 0xde,
+ 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20,
+ },
+ {
+ 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f,
+ 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9,
+ 0xf1, 0x26, 0x42, 0x95, 0x8a, 0x5d, 0x39, 0xee,
+ 0x07, 0xd0, 0xb4, 0x63, 0x7c, 0xab, 0xcf, 0x18,
+ 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0,
+ 0x09, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16,
+ 0x0e, 0xd9, 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11,
+ 0xf8, 0x2f, 0x4b, 0x9c, 0x83, 0x54, 0x30, 0xe7,
+ 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, 0x2b, 0xfc,
+ 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0x0a,
+ 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0x0d,
+ 0xe4, 0x33, 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb,
+ 0x1c, 0xcb, 0xaf, 0x78, 0x67, 0xb0, 0xd4, 0x03,
+ 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, 0x22, 0xf5,
+ 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2,
+ 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x04,
+ 0xdb, 0x0c, 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4,
+ 0x2d, 0xfa, 0x9e, 0x49, 0x56, 0x81, 0xe5, 0x32,
+ 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, 0xe2, 0x35,
+ 0xdc, 0x0b, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3,
+ 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b,
+ 0xd2, 0x05, 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd,
+ 0xd5, 0x02, 0x66, 0xb1, 0xae, 0x79, 0x1d, 0xca,
+ 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, 0xeb, 0x3c,
+ 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27,
+ 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x06, 0xd1,
+ 0xc9, 0x1e, 0x7a, 0xad, 0xb2, 0x65, 0x01, 0xd6,
+ 0x3f, 0xe8, 0x8c, 0x5b, 0x44, 0x93, 0xf7, 0x20,
+ 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, 0x0f, 0xd8,
+ 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e,
+ 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29,
+ 0xc0, 0x17, 0x73, 0xa4, 0xbb, 0x6c, 0x08, 0xdf,
+ },
+ {
+ 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32,
+ 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc,
+ 0x01, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33,
+ 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd,
+ 0x02, 0xda, 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30,
+ 0x8c, 0x54, 0x21, 0xf9, 0xcb, 0x13, 0x66, 0xbe,
+ 0x03, 0xdb, 0xae, 0x76, 0x44, 0x9c, 0xe9, 0x31,
+ 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf,
+ 0x04, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36,
+ 0x8a, 0x52, 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8,
+ 0x05, 0xdd, 0xa8, 0x70, 0x42, 0x9a, 0xef, 0x37,
+ 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, 0x61, 0xb9,
+ 0x06, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34,
+ 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba,
+ 0x07, 0xdf, 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35,
+ 0x89, 0x51, 0x24, 0xfc, 0xce, 0x16, 0x63, 0xbb,
+ 0x08, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, 0xe2, 0x3a,
+ 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4,
+ 0x09, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b,
+ 0x87, 0x5f, 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5,
+ 0x0a, 0xd2, 0xa7, 0x7f, 0x4d, 0x95, 0xe0, 0x38,
+ 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, 0x6e, 0xb6,
+ 0x0b, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39,
+ 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7,
+ 0x0c, 0xd4, 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e,
+ 0x82, 0x5a, 0x2f, 0xf7, 0xc5, 0x1d, 0x68, 0xb0,
+ 0x0d, 0xd5, 0xa0, 0x78, 0x4a, 0x92, 0xe7, 0x3f,
+ 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1,
+ 0x0e, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c,
+ 0x80, 0x58, 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2,
+ 0x0f, 0xd7, 0xa2, 0x7a, 0x48, 0x90, 0xe5, 0x3d,
+ 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, 0x6b, 0xb3,
+ },
+ {
+ 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35,
+ 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3,
+ 0x11, 0xc8, 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24,
+ 0x97, 0x4e, 0x38, 0xe1, 0xd4, 0x0d, 0x7b, 0xa2,
+ 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, 0xce, 0x17,
+ 0xa4, 0x7d, 0x0b, 0xd2, 0xe7, 0x3e, 0x48, 0x91,
+ 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x06,
+ 0xb5, 0x6c, 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80,
+ 0x44, 0x9d, 0xeb, 0x32, 0x07, 0xde, 0xa8, 0x71,
+ 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, 0x2e, 0xf7,
+ 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60,
+ 0xd3, 0x0a, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6,
+ 0x66, 0xbf, 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53,
+ 0xe0, 0x39, 0x4f, 0x96, 0xa3, 0x7a, 0x0c, 0xd5,
+ 0x77, 0xae, 0xd8, 0x01, 0x34, 0xed, 0x9b, 0x42,
+ 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4,
+ 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd,
+ 0x0e, 0xd7, 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b,
+ 0x99, 0x40, 0x36, 0xef, 0xda, 0x03, 0x75, 0xac,
+ 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, 0xf3, 0x2a,
+ 0xaa, 0x73, 0x05, 0xdc, 0xe9, 0x30, 0x46, 0x9f,
+ 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19,
+ 0xbb, 0x62, 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e,
+ 0x3d, 0xe4, 0x92, 0x4b, 0x7e, 0xa7, 0xd1, 0x08,
+ 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, 0x20, 0xf9,
+ 0x4a, 0x93, 0xe5, 0x3c, 0x09, 0xd0, 0xa6, 0x7f,
+ 0xdd, 0x04, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8,
+ 0x5b, 0x82, 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e,
+ 0xee, 0x37, 0x41, 0x98, 0xad, 0x74, 0x02, 0xdb,
+ 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, 0x84, 0x5d,
+ 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca,
+ 0x79, 0xa0, 0xd6, 0x0f, 0x3a, 0xe3, 0x95, 0x4c,
+ },
+ {
+ 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c,
+ 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2,
+ 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, 0xc7, 0x1d,
+ 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83,
+ 0x42, 0x98, 0xeb, 0x31, 0x0d, 0xd7, 0xa4, 0x7e,
+ 0xdc, 0x06, 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0,
+ 0x63, 0xb9, 0xca, 0x10, 0x2c, 0xf6, 0x85, 0x5f,
+ 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, 0x1b, 0xc1,
+ 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8,
+ 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26,
+ 0xa5, 0x7f, 0x0c, 0xd6, 0xea, 0x30, 0x43, 0x99,
+ 0x3b, 0xe1, 0x92, 0x48, 0x74, 0xae, 0xdd, 0x07,
+ 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, 0x20, 0xfa,
+ 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64,
+ 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x01, 0xdb,
+ 0x79, 0xa3, 0xd0, 0x0a, 0x36, 0xec, 0x9f, 0x45,
+ 0x15, 0xcf, 0xbc, 0x66, 0x5a, 0x80, 0xf3, 0x29,
+ 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, 0x6d, 0xb7,
+ 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x08,
+ 0xaa, 0x70, 0x03, 0xd9, 0xe5, 0x3f, 0x4c, 0x96,
+ 0x57, 0x8d, 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b,
+ 0xc9, 0x13, 0x60, 0xba, 0x86, 0x5c, 0x2f, 0xf5,
+ 0x76, 0xac, 0xdf, 0x05, 0x39, 0xe3, 0x90, 0x4a,
+ 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0x0e, 0xd4,
+ 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x04, 0x77, 0xad,
+ 0x0f, 0xd5, 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33,
+ 0xb0, 0x6a, 0x19, 0xc3, 0xff, 0x25, 0x56, 0x8c,
+ 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, 0xc8, 0x12,
+ 0xd3, 0x09, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef,
+ 0x4d, 0x97, 0xe4, 0x3e, 0x02, 0xd8, 0xab, 0x71,
+ 0xf2, 0x28, 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce,
+ 0x6c, 0xb6, 0xc5, 0x1f, 0x23, 0xf9, 0x8a, 0x50,
+ },
+ {
+ 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b,
+ 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad,
+ 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0x0a,
+ 0xa7, 0x7c, 0x0c, 0xd7, 0xec, 0x37, 0x47, 0x9c,
+ 0x62, 0xb9, 0xc9, 0x12, 0x29, 0xf2, 0x82, 0x59,
+ 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, 0x14, 0xcf,
+ 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68,
+ 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe,
+ 0xc4, 0x1f, 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff,
+ 0x52, 0x89, 0xf9, 0x22, 0x19, 0xc2, 0xb2, 0x69,
+ 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, 0x15, 0xce,
+ 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58,
+ 0xa6, 0x7d, 0x0d, 0xd6, 0xed, 0x36, 0x46, 0x9d,
+ 0x30, 0xeb, 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0x0b,
+ 0x97, 0x4c, 0x3c, 0xe7, 0xdc, 0x07, 0x77, 0xac,
+ 0x01, 0xda, 0xaa, 0x71, 0x4a, 0x91, 0xe1, 0x3a,
+ 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x05, 0x75, 0xae,
+ 0x03, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38,
+ 0xa4, 0x7f, 0x0f, 0xd4, 0xef, 0x34, 0x44, 0x9f,
+ 0x32, 0xe9, 0x99, 0x42, 0x79, 0xa2, 0xd2, 0x09,
+ 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, 0x17, 0xcc,
+ 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a,
+ 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd,
+ 0x50, 0x8b, 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b,
+ 0x51, 0x8a, 0xfa, 0x21, 0x1a, 0xc1, 0xb1, 0x6a,
+ 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, 0x27, 0xfc,
+ 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b,
+ 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd,
+ 0x33, 0xe8, 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x08,
+ 0xa5, 0x7e, 0x0e, 0xd5, 0xee, 0x35, 0x45, 0x9e,
+ 0x02, 0xd9, 0xa9, 0x72, 0x49, 0x92, 0xe2, 0x39,
+ 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x04, 0x74, 0xaf,
+ },
+ {
+ 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e,
+ 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80,
+ 0x41, 0x9d, 0xe4, 0x38, 0x16, 0xca, 0xb3, 0x6f,
+ 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, 0x1d, 0xc1,
+ 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x09, 0x70, 0xac,
+ 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x02,
+ 0xc3, 0x1f, 0x66, 0xba, 0x94, 0x48, 0x31, 0xed,
+ 0x6d, 0xb1, 0xc8, 0x14, 0x3a, 0xe6, 0x9f, 0x43,
+ 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, 0xeb, 0x37,
+ 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99,
+ 0x58, 0x84, 0xfd, 0x21, 0x0f, 0xd3, 0xaa, 0x76,
+ 0xf6, 0x2a, 0x53, 0x8f, 0xa1, 0x7d, 0x04, 0xd8,
+ 0x9b, 0x47, 0x3e, 0xe2, 0xcc, 0x10, 0x69, 0xb5,
+ 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, 0xc7, 0x1b,
+ 0xda, 0x06, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4,
+ 0x74, 0xa8, 0xd1, 0x0d, 0x23, 0xff, 0x86, 0x5a,
+ 0x32, 0xee, 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c,
+ 0x9c, 0x40, 0x39, 0xe5, 0xcb, 0x17, 0x6e, 0xb2,
+ 0x73, 0xaf, 0xd6, 0x0a, 0x24, 0xf8, 0x81, 0x5d,
+ 0xdd, 0x01, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3,
+ 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e,
+ 0x1e, 0xc2, 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30,
+ 0xf1, 0x2d, 0x54, 0x88, 0xa6, 0x7a, 0x03, 0xdf,
+ 0x5f, 0x83, 0xfa, 0x26, 0x08, 0xd4, 0xad, 0x71,
+ 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x05,
+ 0x85, 0x59, 0x20, 0xfc, 0xd2, 0x0e, 0x77, 0xab,
+ 0x6a, 0xb6, 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44,
+ 0xc4, 0x18, 0x61, 0xbd, 0x93, 0x4f, 0x36, 0xea,
+ 0xa9, 0x75, 0x0c, 0xd0, 0xfe, 0x22, 0x5b, 0x87,
+ 0x07, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29,
+ 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6,
+ 0x46, 0x9a, 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68,
+ },
+ {
+ 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29,
+ 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f,
+ 0x51, 0x8c, 0xf6, 0x2b, 0x02, 0xdf, 0xa5, 0x78,
+ 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x03, 0xde,
+ 0xa2, 0x7f, 0x05, 0xd8, 0xf1, 0x2c, 0x56, 0x8b,
+ 0x04, 0xd9, 0xa3, 0x7e, 0x57, 0x8a, 0xf0, 0x2d,
+ 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, 0x07, 0xda,
+ 0x55, 0x88, 0xf2, 0x2f, 0x06, 0xdb, 0xa1, 0x7c,
+ 0x59, 0x84, 0xfe, 0x23, 0x0a, 0xd7, 0xad, 0x70,
+ 0xff, 0x22, 0x58, 0x85, 0xac, 0x71, 0x0b, 0xd6,
+ 0x08, 0xd5, 0xaf, 0x72, 0x5b, 0x86, 0xfc, 0x21,
+ 0xae, 0x73, 0x09, 0xd4, 0xfd, 0x20, 0x5a, 0x87,
+ 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0x0f, 0xd2,
+ 0x5d, 0x80, 0xfa, 0x27, 0x0e, 0xd3, 0xa9, 0x74,
+ 0xaa, 0x77, 0x0d, 0xd0, 0xf9, 0x24, 0x5e, 0x83,
+ 0x0c, 0xd1, 0xab, 0x76, 0x5f, 0x82, 0xf8, 0x25,
+ 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, 0x46, 0x9b,
+ 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d,
+ 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca,
+ 0x45, 0x98, 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c,
+ 0x10, 0xcd, 0xb7, 0x6a, 0x43, 0x9e, 0xe4, 0x39,
+ 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, 0x42, 0x9f,
+ 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68,
+ 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce,
+ 0xeb, 0x36, 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2,
+ 0x4d, 0x90, 0xea, 0x37, 0x1e, 0xc3, 0xb9, 0x64,
+ 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, 0x4e, 0x93,
+ 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35,
+ 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60,
+ 0xef, 0x32, 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6,
+ 0x18, 0xc5, 0xbf, 0x62, 0x4b, 0x96, 0xec, 0x31,
+ 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, 0x4a, 0x97,
+ },
+ {
+ 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20,
+ 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e,
+ 0x61, 0xbf, 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41,
+ 0xdf, 0x01, 0x7e, 0xa0, 0x80, 0x5e, 0x21, 0xff,
+ 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, 0x3c, 0xe2,
+ 0x7c, 0xa2, 0xdd, 0x03, 0x23, 0xfd, 0x82, 0x5c,
+ 0xa3, 0x7d, 0x02, 0xdc, 0xfc, 0x22, 0x5d, 0x83,
+ 0x1d, 0xc3, 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d,
+ 0x99, 0x47, 0x38, 0xe6, 0xc6, 0x18, 0x67, 0xb9,
+ 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, 0xd9, 0x07,
+ 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x06, 0xd8,
+ 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66,
+ 0x5b, 0x85, 0xfa, 0x24, 0x04, 0xda, 0xa5, 0x7b,
+ 0xe5, 0x3b, 0x44, 0x9a, 0xba, 0x64, 0x1b, 0xc5,
+ 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, 0xc4, 0x1a,
+ 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x05, 0x7a, 0xa4,
+ 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0x0f,
+ 0x91, 0x4f, 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1,
+ 0x4e, 0x90, 0xef, 0x31, 0x11, 0xcf, 0xb0, 0x6e,
+ 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, 0x0e, 0xd0,
+ 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd,
+ 0x53, 0x8d, 0xf2, 0x2c, 0x0c, 0xd2, 0xad, 0x73,
+ 0x8c, 0x52, 0x2d, 0xf3, 0xd3, 0x0d, 0x72, 0xac,
+ 0x32, 0xec, 0x93, 0x4d, 0x6d, 0xb3, 0xcc, 0x12,
+ 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, 0x48, 0x96,
+ 0x08, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28,
+ 0xd7, 0x09, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7,
+ 0x69, 0xb7, 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49,
+ 0x74, 0xaa, 0xd5, 0x0b, 0x2b, 0xf5, 0x8a, 0x54,
+ 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, 0x34, 0xea,
+ 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35,
+ 0xab, 0x75, 0x0a, 0xd4, 0xf4, 0x2a, 0x55, 0x8b,
+ },
+ {
+ 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27,
+ 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91,
+ 0x71, 0xae, 0xd2, 0x0d, 0x2a, 0xf5, 0x89, 0x56,
+ 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0,
+ 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5,
+ 0x54, 0x8b, 0xf7, 0x28, 0x0f, 0xd0, 0xac, 0x73,
+ 0x93, 0x4c, 0x30, 0xef, 0xc8, 0x17, 0x6b, 0xb4,
+ 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, 0xdd, 0x02,
+ 0xd9, 0x06, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe,
+ 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48,
+ 0xa8, 0x77, 0x0b, 0xd4, 0xf3, 0x2c, 0x50, 0x8f,
+ 0x1e, 0xc1, 0xbd, 0x62, 0x45, 0x9a, 0xe6, 0x39,
+ 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, 0xc3, 0x1c,
+ 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x09, 0x75, 0xaa,
+ 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d,
+ 0xfc, 0x23, 0x5f, 0x80, 0xa7, 0x78, 0x04, 0xdb,
+ 0xaf, 0x70, 0x0c, 0xd3, 0xf4, 0x2b, 0x57, 0x88,
+ 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, 0xe1, 0x3e,
+ 0xde, 0x01, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9,
+ 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f,
+ 0x4d, 0x92, 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a,
+ 0xfb, 0x24, 0x58, 0x87, 0xa0, 0x7f, 0x03, 0xdc,
+ 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, 0xc4, 0x1b,
+ 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0x0e, 0x72, 0xad,
+ 0x76, 0xa9, 0xd5, 0x0a, 0x2d, 0xf2, 0x8e, 0x51,
+ 0xc0, 0x1f, 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7,
+ 0x07, 0xd8, 0xa4, 0x7b, 0x5c, 0x83, 0xff, 0x20,
+ 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, 0x49, 0x96,
+ 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3,
+ 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x05,
+ 0xe5, 0x3a, 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2,
+ 0x53, 0x8c, 0xf0, 0x2f, 0x08, 0xd7, 0xab, 0x74,
+ },
+ {
+ 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a,
+ 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9,
+ 0xa6, 0x46, 0x7b, 0x9b, 0x01, 0xe1, 0xdc, 0x3c,
+ 0xf5, 0x15, 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f,
+ 0x51, 0xb1, 0x8c, 0x6c, 0xf6, 0x16, 0x2b, 0xcb,
+ 0x02, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, 0x78, 0x98,
+ 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d,
+ 0xa4, 0x44, 0x79, 0x99, 0x03, 0xe3, 0xde, 0x3e,
+ 0xa2, 0x42, 0x7f, 0x9f, 0x05, 0xe5, 0xd8, 0x38,
+ 0xf1, 0x11, 0x2c, 0xcc, 0x56, 0xb6, 0x8b, 0x6b,
+ 0x04, 0xe4, 0xd9, 0x39, 0xa3, 0x43, 0x7e, 0x9e,
+ 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd,
+ 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69,
+ 0xa0, 0x40, 0x7d, 0x9d, 0x07, 0xe7, 0xda, 0x3a,
+ 0x55, 0xb5, 0x88, 0x68, 0xf2, 0x12, 0x2f, 0xcf,
+ 0x06, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, 0x7c, 0x9c,
+ 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3,
+ 0x0a, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90,
+ 0xff, 0x1f, 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65,
+ 0xac, 0x4c, 0x71, 0x91, 0x0b, 0xeb, 0xd6, 0x36,
+ 0x08, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, 0x72, 0x92,
+ 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1,
+ 0xae, 0x4e, 0x73, 0x93, 0x09, 0xe9, 0xd4, 0x34,
+ 0xfd, 0x1d, 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67,
+ 0xfb, 0x1b, 0x26, 0xc6, 0x5c, 0xbc, 0x81, 0x61,
+ 0xa8, 0x48, 0x75, 0x95, 0x0f, 0xef, 0xd2, 0x32,
+ 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7,
+ 0x0e, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94,
+ 0xaa, 0x4a, 0x77, 0x97, 0x0d, 0xed, 0xd0, 0x30,
+ 0xf9, 0x19, 0x24, 0xc4, 0x5e, 0xbe, 0x83, 0x63,
+ 0x0c, 0xec, 0xd1, 0x31, 0xab, 0x4b, 0x76, 0x96,
+ 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5,
+ },
+ {
+ 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d,
+ 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6,
+ 0xb6, 0x57, 0x69, 0x88, 0x15, 0xf4, 0xca, 0x2b,
+ 0xed, 0x0c, 0x32, 0xd3, 0x4e, 0xaf, 0x91, 0x70,
+ 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0x0d, 0xec,
+ 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7,
+ 0xc7, 0x26, 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a,
+ 0x9c, 0x7d, 0x43, 0xa2, 0x3f, 0xde, 0xe0, 0x01,
+ 0xe2, 0x03, 0x3d, 0xdc, 0x41, 0xa0, 0x9e, 0x7f,
+ 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24,
+ 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9,
+ 0x0f, 0xee, 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92,
+ 0x93, 0x72, 0x4c, 0xad, 0x30, 0xd1, 0xef, 0x0e,
+ 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, 0xb4, 0x55,
+ 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8,
+ 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x02, 0xe3,
+ 0xd9, 0x38, 0x06, 0xe7, 0x7a, 0x9b, 0xa5, 0x44,
+ 0x82, 0x63, 0x5d, 0xbc, 0x21, 0xc0, 0xfe, 0x1f,
+ 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, 0x13, 0xf2,
+ 0x34, 0xd5, 0xeb, 0x0a, 0x97, 0x76, 0x48, 0xa9,
+ 0xa8, 0x49, 0x77, 0x96, 0x0b, 0xea, 0xd4, 0x35,
+ 0xf3, 0x12, 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e,
+ 0x1e, 0xff, 0xc1, 0x20, 0xbd, 0x5c, 0x62, 0x83,
+ 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x07, 0x39, 0xd8,
+ 0x3b, 0xda, 0xe4, 0x05, 0x98, 0x79, 0x47, 0xa6,
+ 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd,
+ 0x8d, 0x6c, 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10,
+ 0xd6, 0x37, 0x09, 0xe8, 0x75, 0x94, 0xaa, 0x4b,
+ 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x08, 0x36, 0xd7,
+ 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c,
+ 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61,
+ 0xa7, 0x46, 0x78, 0x99, 0x04, 0xe5, 0xdb, 0x3a,
+ },
+ {
+ 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94,
+ 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7,
+ 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12,
+ 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51,
+ 0x11, 0xf3, 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85,
+ 0x52, 0xb0, 0x8b, 0x69, 0xfd, 0x1f, 0x24, 0xc6,
+ 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, 0xe1, 0x03,
+ 0xd4, 0x36, 0x0d, 0xef, 0x7b, 0x99, 0xa2, 0x40,
+ 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6,
+ 0x61, 0x83, 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5,
+ 0xa4, 0x46, 0x7d, 0x9f, 0x0b, 0xe9, 0xd2, 0x30,
+ 0xe7, 0x05, 0x3e, 0xdc, 0x48, 0xaa, 0x91, 0x73,
+ 0x33, 0xd1, 0xea, 0x08, 0x9c, 0x7e, 0x45, 0xa7,
+ 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x06, 0xe4,
+ 0xb5, 0x57, 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21,
+ 0xf6, 0x14, 0x2f, 0xcd, 0x59, 0xbb, 0x80, 0x62,
+ 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x09, 0x32, 0xd0,
+ 0x07, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93,
+ 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56,
+ 0x81, 0x63, 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15,
+ 0x55, 0xb7, 0x8c, 0x6e, 0xfa, 0x18, 0x23, 0xc1,
+ 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, 0x60, 0x82,
+ 0xd3, 0x31, 0x0a, 0xe8, 0x7c, 0x9e, 0xa5, 0x47,
+ 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x04,
+ 0x66, 0x84, 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2,
+ 0x25, 0xc7, 0xfc, 0x1e, 0x8a, 0x68, 0x53, 0xb1,
+ 0xe0, 0x02, 0x39, 0xdb, 0x4f, 0xad, 0x96, 0x74,
+ 0xa3, 0x41, 0x7a, 0x98, 0x0c, 0xee, 0xd5, 0x37,
+ 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x01, 0xe3,
+ 0x34, 0xd6, 0xed, 0x0f, 0x9b, 0x79, 0x42, 0xa0,
+ 0xf1, 0x13, 0x28, 0xca, 0x5e, 0xbc, 0x87, 0x65,
+ 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, 0xc4, 0x26,
+ },
+ {
+ 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93,
+ 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8,
+ 0x96, 0x75, 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x05,
+ 0xdd, 0x3e, 0x06, 0xe5, 0x76, 0x95, 0xad, 0x4e,
+ 0x31, 0xd2, 0xea, 0x09, 0x9a, 0x79, 0x41, 0xa2,
+ 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0x0a, 0xe9,
+ 0xa7, 0x44, 0x7c, 0x9f, 0x0c, 0xef, 0xd7, 0x34,
+ 0xec, 0x0f, 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f,
+ 0x62, 0x81, 0xb9, 0x5a, 0xc9, 0x2a, 0x12, 0xf1,
+ 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, 0x59, 0xba,
+ 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67,
+ 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c,
+ 0x53, 0xb0, 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0,
+ 0x18, 0xfb, 0xc3, 0x20, 0xb3, 0x50, 0x68, 0x8b,
+ 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, 0xb5, 0x56,
+ 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d,
+ 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57,
+ 0x8f, 0x6c, 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c,
+ 0x52, 0xb1, 0x89, 0x6a, 0xf9, 0x1a, 0x22, 0xc1,
+ 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, 0x69, 0x8a,
+ 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66,
+ 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d,
+ 0x63, 0x80, 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0,
+ 0x28, 0xcb, 0xf3, 0x10, 0x83, 0x60, 0x58, 0xbb,
+ 0xa6, 0x45, 0x7d, 0x9e, 0x0d, 0xee, 0xd6, 0x35,
+ 0xed, 0x0e, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e,
+ 0x30, 0xd3, 0xeb, 0x08, 0x9b, 0x78, 0x40, 0xa3,
+ 0x7b, 0x98, 0xa0, 0x43, 0xd0, 0x33, 0x0b, 0xe8,
+ 0x97, 0x74, 0x4c, 0xaf, 0x3c, 0xdf, 0xe7, 0x04,
+ 0xdc, 0x3f, 0x07, 0xe4, 0x77, 0x94, 0xac, 0x4f,
+ 0x01, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92,
+ 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x02, 0x3a, 0xd9,
+ },
+ {
+ 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86,
+ 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5,
+ 0xe6, 0x02, 0x33, 0xd7, 0x51, 0xb5, 0x84, 0x60,
+ 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13,
+ 0xd1, 0x35, 0x04, 0xe0, 0x66, 0x82, 0xb3, 0x57,
+ 0xa2, 0x46, 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24,
+ 0x37, 0xd3, 0xe2, 0x06, 0x80, 0x64, 0x55, 0xb1,
+ 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, 0x26, 0xc2,
+ 0xbf, 0x5b, 0x6a, 0x8e, 0x08, 0xec, 0xdd, 0x39,
+ 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a,
+ 0x59, 0xbd, 0x8c, 0x68, 0xee, 0x0a, 0x3b, 0xdf,
+ 0x2a, 0xce, 0xff, 0x1b, 0x9d, 0x79, 0x48, 0xac,
+ 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, 0x0c, 0xe8,
+ 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b,
+ 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0x0e,
+ 0xfb, 0x1f, 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d,
+ 0x63, 0x87, 0xb6, 0x52, 0xd4, 0x30, 0x01, 0xe5,
+ 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, 0x72, 0x96,
+ 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x03,
+ 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70,
+ 0xb2, 0x56, 0x67, 0x83, 0x05, 0xe1, 0xd0, 0x34,
+ 0xc1, 0x25, 0x14, 0xf0, 0x76, 0x92, 0xa3, 0x47,
+ 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x07, 0x36, 0xd2,
+ 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1,
+ 0xdc, 0x38, 0x09, 0xed, 0x6b, 0x8f, 0xbe, 0x5a,
+ 0xaf, 0x4b, 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29,
+ 0x3a, 0xde, 0xef, 0x0b, 0x8d, 0x69, 0x58, 0xbc,
+ 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, 0x2b, 0xcf,
+ 0x0d, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b,
+ 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8,
+ 0xeb, 0x0f, 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d,
+ 0x98, 0x7c, 0x4d, 0xa9, 0x2f, 0xcb, 0xfa, 0x1e,
+ },
+ {
+ 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81,
+ 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa,
+ 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77,
+ 0x8d, 0x68, 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0x0c,
+ 0xf1, 0x14, 0x26, 0xc3, 0x42, 0xa7, 0x95, 0x70,
+ 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, 0xee, 0x0b,
+ 0x07, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86,
+ 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd,
+ 0xff, 0x1a, 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e,
+ 0x84, 0x61, 0x53, 0xb6, 0x37, 0xd2, 0xe0, 0x05,
+ 0x09, 0xec, 0xde, 0x3b, 0xba, 0x5f, 0x6d, 0x88,
+ 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3,
+ 0x0e, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f,
+ 0x75, 0x90, 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4,
+ 0xf8, 0x1d, 0x2f, 0xca, 0x4b, 0xae, 0x9c, 0x79,
+ 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, 0xe7, 0x02,
+ 0xe3, 0x06, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62,
+ 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19,
+ 0x15, 0xf0, 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94,
+ 0x6e, 0x8b, 0xb9, 0x5c, 0xdd, 0x38, 0x0a, 0xef,
+ 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, 0x76, 0x93,
+ 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0x0d, 0xe8,
+ 0xe4, 0x01, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65,
+ 0x9f, 0x7a, 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e,
+ 0x1c, 0xf9, 0xcb, 0x2e, 0xaf, 0x4a, 0x78, 0x9d,
+ 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, 0x03, 0xe6,
+ 0xea, 0x0f, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b,
+ 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10,
+ 0xed, 0x08, 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c,
+ 0x96, 0x73, 0x41, 0xa4, 0x25, 0xc0, 0xf2, 0x17,
+ 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, 0x7f, 0x9a,
+ 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x04, 0xe1,
+ },
+ {
+ 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88,
+ 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb,
+ 0xc6, 0x20, 0x17, 0xf1, 0x79, 0x9f, 0xa8, 0x4e,
+ 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, 0xcb, 0x2d,
+ 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19,
+ 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a,
+ 0x57, 0xb1, 0x86, 0x60, 0xe8, 0x0e, 0x39, 0xdf,
+ 0x34, 0xd2, 0xe5, 0x03, 0x8b, 0x6d, 0x5a, 0xbc,
+ 0x3f, 0xd9, 0xee, 0x08, 0x80, 0x66, 0x51, 0xb7,
+ 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x05, 0x32, 0xd4,
+ 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71,
+ 0x9a, 0x7c, 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12,
+ 0xae, 0x48, 0x7f, 0x99, 0x11, 0xf7, 0xc0, 0x26,
+ 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, 0xa3, 0x45,
+ 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x06, 0xe0,
+ 0x0b, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83,
+ 0x7e, 0x98, 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6,
+ 0x1d, 0xfb, 0xcc, 0x2a, 0xa2, 0x44, 0x73, 0x95,
+ 0xb8, 0x5e, 0x69, 0x8f, 0x07, 0xe1, 0xd6, 0x30,
+ 0xdb, 0x3d, 0x0a, 0xec, 0x64, 0x82, 0xb5, 0x53,
+ 0xef, 0x09, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67,
+ 0x8c, 0x6a, 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x04,
+ 0x29, 0xcf, 0xf8, 0x1e, 0x96, 0x70, 0x47, 0xa1,
+ 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, 0x24, 0xc2,
+ 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9,
+ 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa,
+ 0x87, 0x61, 0x56, 0xb0, 0x38, 0xde, 0xe9, 0x0f,
+ 0xe4, 0x02, 0x35, 0xd3, 0x5b, 0xbd, 0x8a, 0x6c,
+ 0xd0, 0x36, 0x01, 0xe7, 0x6f, 0x89, 0xbe, 0x58,
+ 0xb3, 0x55, 0x62, 0x84, 0x0c, 0xea, 0xdd, 0x3b,
+ 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e,
+ 0x75, 0x93, 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd,
+ },
+ {
+ 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f,
+ 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4,
+ 0xd6, 0x31, 0x05, 0xe2, 0x6d, 0x8a, 0xbe, 0x59,
+ 0xbd, 0x5a, 0x6e, 0x89, 0x06, 0xe1, 0xd5, 0x32,
+ 0xb1, 0x56, 0x62, 0x85, 0x0a, 0xed, 0xd9, 0x3e,
+ 0xda, 0x3d, 0x09, 0xee, 0x61, 0x86, 0xb2, 0x55,
+ 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, 0x0f, 0xe8,
+ 0x0c, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83,
+ 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0,
+ 0x14, 0xf3, 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b,
+ 0xa9, 0x4e, 0x7a, 0x9d, 0x12, 0xf5, 0xc1, 0x26,
+ 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, 0xaa, 0x4d,
+ 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41,
+ 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a,
+ 0x18, 0xff, 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97,
+ 0x73, 0x94, 0xa0, 0x47, 0xc8, 0x2f, 0x1b, 0xfc,
+ 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, 0x96, 0x71,
+ 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a,
+ 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7,
+ 0x43, 0xa4, 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc,
+ 0x4f, 0xa8, 0x9c, 0x7b, 0xf4, 0x13, 0x27, 0xc0,
+ 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, 0x4c, 0xab,
+ 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16,
+ 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d,
+ 0x81, 0x66, 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0x0e,
+ 0xea, 0x0d, 0x39, 0xde, 0x51, 0xb6, 0x82, 0x65,
+ 0x57, 0xb0, 0x84, 0x63, 0xec, 0x0b, 0x3f, 0xd8,
+ 0x3c, 0xdb, 0xef, 0x08, 0x87, 0x60, 0x54, 0xb3,
+ 0x30, 0xd7, 0xe3, 0x04, 0x8b, 0x6c, 0x58, 0xbf,
+ 0x5b, 0xbc, 0x88, 0x6f, 0xe0, 0x07, 0x33, 0xd4,
+ 0xe6, 0x01, 0x35, 0xd2, 0x5d, 0xba, 0x8e, 0x69,
+ 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, 0xe5, 0x02,
+ },
+ {
+ 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1,
+ 0x26, 0xce, 0xeb, 0x03, 0xa1, 0x49, 0x6c, 0x84,
+ 0x35, 0xdd, 0xf8, 0x10, 0xb2, 0x5a, 0x7f, 0x97,
+ 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, 0x06, 0xee,
+ 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd,
+ 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x05, 0x20, 0xc8,
+ 0x79, 0x91, 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb,
+ 0x98, 0x70, 0x55, 0xbd, 0x1f, 0xf7, 0xd2, 0x3a,
+ 0x8b, 0x63, 0x46, 0xae, 0x0c, 0xe4, 0xc1, 0x29,
+ 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c,
+ 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0x0f,
+ 0xd4, 0x3c, 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76,
+ 0xc7, 0x2f, 0x0a, 0xe2, 0x40, 0xa8, 0x8d, 0x65,
+ 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, 0xb8, 0x50,
+ 0xe1, 0x09, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43,
+ 0x2d, 0xc5, 0xe0, 0x08, 0xaa, 0x42, 0x67, 0x8f,
+ 0x3e, 0xd6, 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c,
+ 0x0b, 0xe3, 0xc6, 0x2e, 0x8c, 0x64, 0x41, 0xa9,
+ 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, 0x52, 0xba,
+ 0x61, 0x89, 0xac, 0x44, 0xe6, 0x0e, 0x2b, 0xc3,
+ 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0,
+ 0x47, 0xaf, 0x8a, 0x62, 0xc0, 0x28, 0x0d, 0xe5,
+ 0x54, 0xbc, 0x99, 0x71, 0xd3, 0x3b, 0x1e, 0xf6,
+ 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, 0xff, 0x17,
+ 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x04,
+ 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31,
+ 0x80, 0x68, 0x4d, 0xa5, 0x07, 0xef, 0xca, 0x22,
+ 0xf9, 0x11, 0x34, 0xdc, 0x7e, 0x96, 0xb3, 0x5b,
+ 0xea, 0x02, 0x27, 0xcf, 0x6d, 0x85, 0xa0, 0x48,
+ 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d,
+ 0xcc, 0x24, 0x01, 0xe9, 0x4b, 0xa3, 0x86, 0x6e,
+ },
+ {
+ 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5,
+ 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe,
+ 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, 0x7a, 0x93,
+ 0x2d, 0xc4, 0xe2, 0x0b, 0xae, 0x47, 0x61, 0x88,
+ 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x06, 0x20, 0xc9,
+ 0x77, 0x9e, 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2,
+ 0x5a, 0xb3, 0x95, 0x7c, 0xd9, 0x30, 0x16, 0xff,
+ 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, 0x0d, 0xe4,
+ 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d,
+ 0xc3, 0x2a, 0x0c, 0xe5, 0x40, 0xa9, 0x8f, 0x66,
+ 0xee, 0x07, 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b,
+ 0xf5, 0x1c, 0x3a, 0xd3, 0x76, 0x9f, 0xb9, 0x50,
+ 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, 0xf8, 0x11,
+ 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0x0a,
+ 0x82, 0x6b, 0x4d, 0xa4, 0x01, 0xe8, 0xce, 0x27,
+ 0x99, 0x70, 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c,
+ 0xad, 0x44, 0x62, 0x8b, 0x2e, 0xc7, 0xe1, 0x08,
+ 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, 0xfa, 0x13,
+ 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e,
+ 0x80, 0x69, 0x4f, 0xa6, 0x03, 0xea, 0xcc, 0x25,
+ 0xc1, 0x28, 0x0e, 0xe7, 0x42, 0xab, 0x8d, 0x64,
+ 0xda, 0x33, 0x15, 0xfc, 0x59, 0xb0, 0x96, 0x7f,
+ 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, 0xbb, 0x52,
+ 0xec, 0x05, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49,
+ 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0,
+ 0x6e, 0x87, 0xa1, 0x48, 0xed, 0x04, 0x22, 0xcb,
+ 0x43, 0xaa, 0x8c, 0x65, 0xc0, 0x29, 0x0f, 0xe6,
+ 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, 0x14, 0xfd,
+ 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc,
+ 0x02, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7,
+ 0x2f, 0xc6, 0xe0, 0x09, 0xac, 0x45, 0x63, 0x8a,
+ 0x34, 0xdd, 0xfb, 0x12, 0xb7, 0x5e, 0x78, 0x91,
+ },
+ {
+ 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac,
+ 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf,
+ 0x06, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa,
+ 0x05, 0xef, 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9,
+ 0x0c, 0xe6, 0xc5, 0x2f, 0x83, 0x69, 0x4a, 0xa0,
+ 0x0f, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, 0x49, 0xa3,
+ 0x0a, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6,
+ 0x09, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5,
+ 0x18, 0xf2, 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4,
+ 0x1b, 0xf1, 0xd2, 0x38, 0x94, 0x7e, 0x5d, 0xb7,
+ 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, 0x58, 0xb2,
+ 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1,
+ 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8,
+ 0x17, 0xfd, 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb,
+ 0x12, 0xf8, 0xdb, 0x31, 0x9d, 0x77, 0x54, 0xbe,
+ 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, 0x57, 0xbd,
+ 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c,
+ 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f,
+ 0x36, 0xdc, 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a,
+ 0x35, 0xdf, 0xfc, 0x16, 0xba, 0x50, 0x73, 0x99,
+ 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, 0x7a, 0x90,
+ 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93,
+ 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96,
+ 0x39, 0xd3, 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95,
+ 0x28, 0xc2, 0xe1, 0x0b, 0xa7, 0x4d, 0x6e, 0x84,
+ 0x2b, 0xc1, 0xe2, 0x08, 0xa4, 0x4e, 0x6d, 0x87,
+ 0x2e, 0xc4, 0xe7, 0x0d, 0xa1, 0x4b, 0x68, 0x82,
+ 0x2d, 0xc7, 0xe4, 0x0e, 0xa2, 0x48, 0x6b, 0x81,
+ 0x24, 0xce, 0xed, 0x07, 0xab, 0x41, 0x62, 0x88,
+ 0x27, 0xcd, 0xee, 0x04, 0xa8, 0x42, 0x61, 0x8b,
+ 0x22, 0xc8, 0xeb, 0x01, 0xad, 0x47, 0x64, 0x8e,
+ 0x21, 0xcb, 0xe8, 0x02, 0xae, 0x44, 0x67, 0x8d,
+ },
+ {
+ 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab,
+ 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0,
+ 0x16, 0xfd, 0xdd, 0x36, 0x9d, 0x76, 0x56, 0xbd,
+ 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, 0x5d, 0xb6,
+ 0x2c, 0xc7, 0xe7, 0x0c, 0xa7, 0x4c, 0x6c, 0x87,
+ 0x27, 0xcc, 0xec, 0x07, 0xac, 0x47, 0x67, 0x8c,
+ 0x3a, 0xd1, 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91,
+ 0x31, 0xda, 0xfa, 0x11, 0xba, 0x51, 0x71, 0x9a,
+ 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, 0x18, 0xf3,
+ 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8,
+ 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0x0e, 0xe5,
+ 0x45, 0xae, 0x8e, 0x65, 0xce, 0x25, 0x05, 0xee,
+ 0x74, 0x9f, 0xbf, 0x54, 0xff, 0x14, 0x34, 0xdf,
+ 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, 0x3f, 0xd4,
+ 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x02, 0x22, 0xc9,
+ 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x09, 0x29, 0xc2,
+ 0xb0, 0x5b, 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b,
+ 0xbb, 0x50, 0x70, 0x9b, 0x30, 0xdb, 0xfb, 0x10,
+ 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, 0xe6, 0x0d,
+ 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x06,
+ 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37,
+ 0x97, 0x7c, 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c,
+ 0x8a, 0x61, 0x41, 0xaa, 0x01, 0xea, 0xca, 0x21,
+ 0x81, 0x6a, 0x4a, 0xa1, 0x0a, 0xe1, 0xc1, 0x2a,
+ 0xe8, 0x03, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43,
+ 0xe3, 0x08, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48,
+ 0xfe, 0x15, 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55,
+ 0xf5, 0x1e, 0x3e, 0xd5, 0x7e, 0x95, 0xb5, 0x5e,
+ 0xc4, 0x2f, 0x0f, 0xe4, 0x4f, 0xa4, 0x84, 0x6f,
+ 0xcf, 0x24, 0x04, 0xef, 0x44, 0xaf, 0x8f, 0x64,
+ 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79,
+ 0xd9, 0x32, 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72,
+ },
+ {
+ 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe,
+ 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d,
+ 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8,
+ 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x07, 0xeb,
+ 0xcc, 0x20, 0x09, 0xe5, 0x5b, 0xb7, 0x9e, 0x72,
+ 0xff, 0x13, 0x3a, 0xd6, 0x68, 0x84, 0xad, 0x41,
+ 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, 0xf8, 0x14,
+ 0x99, 0x75, 0x5c, 0xb0, 0x0e, 0xe2, 0xcb, 0x27,
+ 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b,
+ 0xb6, 0x5a, 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x08,
+ 0xe3, 0x0f, 0x26, 0xca, 0x74, 0x98, 0xb1, 0x5d,
+ 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, 0x82, 0x6e,
+ 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7,
+ 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x01, 0x28, 0xc4,
+ 0x2f, 0xc3, 0xea, 0x06, 0xb8, 0x54, 0x7d, 0x91,
+ 0x1c, 0xf0, 0xd9, 0x35, 0x8b, 0x67, 0x4e, 0xa2,
+ 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, 0x45, 0xa9,
+ 0x24, 0xc8, 0xe1, 0x0d, 0xb3, 0x5f, 0x76, 0x9a,
+ 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0x0a, 0x23, 0xcf,
+ 0x42, 0xae, 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc,
+ 0xdb, 0x37, 0x1e, 0xf2, 0x4c, 0xa0, 0x89, 0x65,
+ 0xe8, 0x04, 0x2d, 0xc1, 0x7f, 0x93, 0xba, 0x56,
+ 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x03,
+ 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30,
+ 0x92, 0x7e, 0x57, 0xbb, 0x05, 0xe9, 0xc0, 0x2c,
+ 0xa1, 0x4d, 0x64, 0x88, 0x36, 0xda, 0xf3, 0x1f,
+ 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, 0xa6, 0x4a,
+ 0xc7, 0x2b, 0x02, 0xee, 0x50, 0xbc, 0x95, 0x79,
+ 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0x0c, 0xe0,
+ 0x6d, 0x81, 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3,
+ 0x38, 0xd4, 0xfd, 0x11, 0xaf, 0x43, 0x6a, 0x86,
+ 0x0b, 0xe7, 0xce, 0x22, 0x9c, 0x70, 0x59, 0xb5,
+ },
+ {
+ 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9,
+ 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82,
+ 0x76, 0x9b, 0xb1, 0x5c, 0xe5, 0x08, 0x22, 0xcf,
+ 0x4d, 0xa0, 0x8a, 0x67, 0xde, 0x33, 0x19, 0xf4,
+ 0xec, 0x01, 0x2b, 0xc6, 0x7f, 0x92, 0xb8, 0x55,
+ 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e,
+ 0x9a, 0x77, 0x5d, 0xb0, 0x09, 0xe4, 0xce, 0x23,
+ 0xa1, 0x4c, 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18,
+ 0xc5, 0x28, 0x02, 0xef, 0x56, 0xbb, 0x91, 0x7c,
+ 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, 0xaa, 0x47,
+ 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0x0a,
+ 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31,
+ 0x29, 0xc4, 0xee, 0x03, 0xba, 0x57, 0x7d, 0x90,
+ 0x12, 0xff, 0xd5, 0x38, 0x81, 0x6c, 0x46, 0xab,
+ 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, 0x0b, 0xe6,
+ 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd,
+ 0x97, 0x7a, 0x50, 0xbd, 0x04, 0xe9, 0xc3, 0x2e,
+ 0xac, 0x41, 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15,
+ 0xe1, 0x0c, 0x26, 0xcb, 0x72, 0x9f, 0xb5, 0x58,
+ 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, 0x8e, 0x63,
+ 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x05, 0x2f, 0xc2,
+ 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9,
+ 0x0d, 0xe0, 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4,
+ 0x36, 0xdb, 0xf1, 0x1c, 0xa5, 0x48, 0x62, 0x8f,
+ 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, 0x06, 0xeb,
+ 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0,
+ 0x24, 0xc9, 0xe3, 0x0e, 0xb7, 0x5a, 0x70, 0x9d,
+ 0x1f, 0xf2, 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6,
+ 0xbe, 0x53, 0x79, 0x94, 0x2d, 0xc0, 0xea, 0x07,
+ 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, 0xd1, 0x3c,
+ 0xc8, 0x25, 0x0f, 0xe2, 0x5b, 0xb6, 0x9c, 0x71,
+ 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a,
+ },
+ {
+ 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0,
+ 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93,
+ 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, 0x18, 0xf6,
+ 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5,
+ 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c,
+ 0xaf, 0x41, 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f,
+ 0xca, 0x24, 0x0b, 0xe5, 0x55, 0xbb, 0x94, 0x7a,
+ 0xe9, 0x07, 0x28, 0xc6, 0x76, 0x98, 0xb7, 0x59,
+ 0x05, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5,
+ 0x26, 0xc8, 0xe7, 0x09, 0xb9, 0x57, 0x78, 0x96,
+ 0x43, 0xad, 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3,
+ 0x60, 0x8e, 0xa1, 0x4f, 0xff, 0x11, 0x3e, 0xd0,
+ 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, 0xd7, 0x39,
+ 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a,
+ 0xcf, 0x21, 0x0e, 0xe0, 0x50, 0xbe, 0x91, 0x7f,
+ 0xec, 0x02, 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c,
+ 0x0a, 0xe4, 0xcb, 0x25, 0x95, 0x7b, 0x54, 0xba,
+ 0x29, 0xc7, 0xe8, 0x06, 0xb6, 0x58, 0x77, 0x99,
+ 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc,
+ 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf,
+ 0x86, 0x68, 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36,
+ 0xa5, 0x4b, 0x64, 0x8a, 0x3a, 0xd4, 0xfb, 0x15,
+ 0xc0, 0x2e, 0x01, 0xef, 0x5f, 0xb1, 0x9e, 0x70,
+ 0xe3, 0x0d, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53,
+ 0x0f, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf,
+ 0x2c, 0xc2, 0xed, 0x03, 0xb3, 0x5d, 0x72, 0x9c,
+ 0x49, 0xa7, 0x88, 0x66, 0xd6, 0x38, 0x17, 0xf9,
+ 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, 0x34, 0xda,
+ 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33,
+ 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10,
+ 0xc5, 0x2b, 0x04, 0xea, 0x5a, 0xb4, 0x9b, 0x75,
+ 0xe6, 0x08, 0x27, 0xc9, 0x79, 0x97, 0xb8, 0x56,
+ },
+ {
+ 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7,
+ 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c,
+ 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0x0e, 0xe1,
+ 0x7d, 0x92, 0xbe, 0x51, 0xe6, 0x09, 0x25, 0xca,
+ 0xac, 0x43, 0x6f, 0x80, 0x37, 0xd8, 0xf4, 0x1b,
+ 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, 0xdf, 0x30,
+ 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d,
+ 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66,
+ 0x45, 0xaa, 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2,
+ 0x6e, 0x81, 0xad, 0x42, 0xf5, 0x1a, 0x36, 0xd9,
+ 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, 0x4b, 0xa4,
+ 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f,
+ 0xe9, 0x06, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e,
+ 0xc2, 0x2d, 0x01, 0xee, 0x59, 0xb6, 0x9a, 0x75,
+ 0xbf, 0x50, 0x7c, 0x93, 0x24, 0xcb, 0xe7, 0x08,
+ 0x94, 0x7b, 0x57, 0xb8, 0x0f, 0xe0, 0xcc, 0x23,
+ 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d,
+ 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16,
+ 0xdc, 0x33, 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b,
+ 0xf7, 0x18, 0x34, 0xdb, 0x6c, 0x83, 0xaf, 0x40,
+ 0x26, 0xc9, 0xe5, 0x0a, 0xbd, 0x52, 0x7e, 0x91,
+ 0x0d, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba,
+ 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x04, 0x28, 0xc7,
+ 0x5b, 0xb4, 0x98, 0x77, 0xc0, 0x2f, 0x03, 0xec,
+ 0xcf, 0x20, 0x0c, 0xe3, 0x54, 0xbb, 0x97, 0x78,
+ 0xe4, 0x0b, 0x27, 0xc8, 0x7f, 0x90, 0xbc, 0x53,
+ 0x99, 0x76, 0x5a, 0xb5, 0x02, 0xed, 0xc1, 0x2e,
+ 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x05,
+ 0x63, 0x8c, 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4,
+ 0x48, 0xa7, 0x8b, 0x64, 0xd3, 0x3c, 0x10, 0xff,
+ 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, 0x6d, 0x82,
+ 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9,
+ },
+ {
+ 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea,
+ 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39,
+ 0xbb, 0x4b, 0x46, 0xb6, 0x5c, 0xac, 0xa1, 0x51,
+ 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, 0x72, 0x82,
+ 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81,
+ 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52,
+ 0xd0, 0x20, 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a,
+ 0x03, 0xf3, 0xfe, 0x0e, 0xe4, 0x14, 0x19, 0xe9,
+ 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, 0xcc, 0x3c,
+ 0x05, 0xf5, 0xf8, 0x08, 0xe2, 0x12, 0x1f, 0xef,
+ 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87,
+ 0xbe, 0x4e, 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54,
+ 0xbd, 0x4d, 0x40, 0xb0, 0x5a, 0xaa, 0xa7, 0x57,
+ 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, 0x74, 0x84,
+ 0x06, 0xf6, 0xfb, 0x0b, 0xe1, 0x11, 0x1c, 0xec,
+ 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f,
+ 0xb1, 0x41, 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b,
+ 0x62, 0x92, 0x9f, 0x6f, 0x85, 0x75, 0x78, 0x88,
+ 0x0a, 0xfa, 0xf7, 0x07, 0xed, 0x1d, 0x10, 0xe0,
+ 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33,
+ 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30,
+ 0x09, 0xf9, 0xf4, 0x04, 0xee, 0x1e, 0x13, 0xe3,
+ 0x61, 0x91, 0x9c, 0x6c, 0x86, 0x76, 0x7b, 0x8b,
+ 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, 0xa8, 0x58,
+ 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d,
+ 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e,
+ 0xdc, 0x2c, 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36,
+ 0x0f, 0xff, 0xf2, 0x02, 0xe8, 0x18, 0x15, 0xe5,
+ 0x0c, 0xfc, 0xf1, 0x01, 0xeb, 0x1b, 0x16, 0xe6,
+ 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35,
+ 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d,
+ 0x64, 0x94, 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e,
+ },
+ {
+ 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed,
+ 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36,
+ 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46,
+ 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d,
+ 0x4b, 0xba, 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6,
+ 0x90, 0x61, 0x6f, 0x9e, 0x73, 0x82, 0x8c, 0x7d,
+ 0xe0, 0x11, 0x1f, 0xee, 0x03, 0xf2, 0xfc, 0x0d,
+ 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6,
+ 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b,
+ 0x4d, 0xbc, 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0,
+ 0x3d, 0xcc, 0xc2, 0x33, 0xde, 0x2f, 0x21, 0xd0,
+ 0xe6, 0x17, 0x19, 0xe8, 0x05, 0xf4, 0xfa, 0x0b,
+ 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30,
+ 0x06, 0xf7, 0xf9, 0x08, 0xe5, 0x14, 0x1a, 0xeb,
+ 0x76, 0x87, 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b,
+ 0xad, 0x5c, 0x52, 0xa3, 0x4e, 0xbf, 0xb1, 0x40,
+ 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, 0x2d, 0xdc,
+ 0xea, 0x1b, 0x15, 0xe4, 0x09, 0xf8, 0xf6, 0x07,
+ 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77,
+ 0x41, 0xb0, 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac,
+ 0x7a, 0x8b, 0x85, 0x74, 0x99, 0x68, 0x66, 0x97,
+ 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, 0xbd, 0x4c,
+ 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c,
+ 0x0a, 0xfb, 0xf5, 0x04, 0xe9, 0x18, 0x16, 0xe7,
+ 0xa7, 0x56, 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a,
+ 0x7c, 0x8d, 0x83, 0x72, 0x9f, 0x6e, 0x60, 0x91,
+ 0x0c, 0xfd, 0xf3, 0x02, 0xef, 0x1e, 0x10, 0xe1,
+ 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a,
+ 0xec, 0x1d, 0x13, 0xe2, 0x0f, 0xfe, 0xf0, 0x01,
+ 0x37, 0xc6, 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda,
+ 0x47, 0xb6, 0xb8, 0x49, 0xa4, 0x55, 0x5b, 0xaa,
+ 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, 0x80, 0x71,
+ },
+ {
+ 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4,
+ 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27,
+ 0x9b, 0x69, 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f,
+ 0x58, 0xaa, 0xa1, 0x53, 0xb7, 0x45, 0x4e, 0xbc,
+ 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, 0x3d, 0xcf,
+ 0xe8, 0x1a, 0x11, 0xe3, 0x07, 0xf5, 0xfe, 0x0c,
+ 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54,
+ 0x73, 0x81, 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97,
+ 0x56, 0xa4, 0xaf, 0x5d, 0xb9, 0x4b, 0x40, 0xb2,
+ 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, 0x83, 0x71,
+ 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29,
+ 0x0e, 0xfc, 0xf7, 0x05, 0xe1, 0x13, 0x18, 0xea,
+ 0x7d, 0x8f, 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99,
+ 0xbe, 0x4c, 0x47, 0xb5, 0x51, 0xa3, 0xa8, 0x5a,
+ 0xe6, 0x14, 0x1f, 0xed, 0x09, 0xfb, 0xf0, 0x02,
+ 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1,
+ 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48,
+ 0x6f, 0x9d, 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b,
+ 0x37, 0xc5, 0xce, 0x3c, 0xd8, 0x2a, 0x21, 0xd3,
+ 0xf4, 0x06, 0x0d, 0xff, 0x1b, 0xe9, 0xe2, 0x10,
+ 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63,
+ 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0,
+ 0x1c, 0xee, 0xe5, 0x17, 0xf3, 0x01, 0x0a, 0xf8,
+ 0xdf, 0x2d, 0x26, 0xd4, 0x30, 0xc2, 0xc9, 0x3b,
+ 0xfa, 0x08, 0x03, 0xf1, 0x15, 0xe7, 0xec, 0x1e,
+ 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd,
+ 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85,
+ 0xa2, 0x50, 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46,
+ 0xd1, 0x23, 0x28, 0xda, 0x3e, 0xcc, 0xc7, 0x35,
+ 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0x0f, 0x04, 0xf6,
+ 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae,
+ 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d,
+ },
+ {
+ 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3,
+ 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28,
+ 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, 0x9b, 0x68,
+ 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3,
+ 0x0b, 0xf8, 0xf0, 0x03, 0xe0, 0x13, 0x1b, 0xe8,
+ 0xc0, 0x33, 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23,
+ 0x80, 0x73, 0x7b, 0x88, 0x6b, 0x98, 0x90, 0x63,
+ 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, 0x5b, 0xa8,
+ 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0x0e, 0x06, 0xf5,
+ 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e,
+ 0x9d, 0x6e, 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e,
+ 0x56, 0xa5, 0xad, 0x5e, 0xbd, 0x4e, 0x46, 0xb5,
+ 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x05, 0x0d, 0xfe,
+ 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35,
+ 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75,
+ 0x5d, 0xae, 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe,
+ 0x2c, 0xdf, 0xd7, 0x24, 0xc7, 0x34, 0x3c, 0xcf,
+ 0xe7, 0x14, 0x1c, 0xef, 0x0c, 0xff, 0xf7, 0x04,
+ 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44,
+ 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f,
+ 0x27, 0xd4, 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4,
+ 0xec, 0x1f, 0x17, 0xe4, 0x07, 0xf4, 0xfc, 0x0f,
+ 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, 0xbc, 0x4f,
+ 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84,
+ 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9,
+ 0xf1, 0x02, 0x0a, 0xf9, 0x1a, 0xe9, 0xe1, 0x12,
+ 0xb1, 0x42, 0x4a, 0xb9, 0x5a, 0xa9, 0xa1, 0x52,
+ 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, 0x6a, 0x99,
+ 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2,
+ 0xfa, 0x09, 0x01, 0xf2, 0x11, 0xe2, 0xea, 0x19,
+ 0xba, 0x49, 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59,
+ 0x71, 0x82, 0x8a, 0x79, 0x9a, 0x69, 0x61, 0x92,
+ },
+ {
+ 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6,
+ 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05,
+ 0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d,
+ 0x08, 0xfc, 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe,
+ 0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d,
+ 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee,
+ 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6,
+ 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15,
+ 0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d,
+ 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce,
+ 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6,
+ 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35,
+ 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6,
+ 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25,
+ 0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d,
+ 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde,
+ 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d,
+ 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e,
+ 0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86,
+ 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75,
+ 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96,
+ 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65,
+ 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d,
+ 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e,
+ 0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6,
+ 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45,
+ 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d,
+ 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe,
+ 0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d,
+ 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae,
+ 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6,
+ 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55,
+ },
+ {
+ 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a,
+ 0xeb, 0x1e, 0x1c, 0xe9, 0x18, 0xed, 0xef, 0x1a,
+ 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, 0x14, 0xe1,
+ 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a,
+ 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1,
+ 0x20, 0xd5, 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1,
+ 0xdb, 0x2e, 0x2c, 0xd9, 0x28, 0xdd, 0xdf, 0x2a,
+ 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, 0x8f, 0x7a,
+ 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81,
+ 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91,
+ 0x9b, 0x6e, 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a,
+ 0x40, 0xb5, 0xb7, 0x42, 0xb3, 0x46, 0x44, 0xb1,
+ 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, 0xbf, 0x4a,
+ 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a,
+ 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1,
+ 0x0b, 0xfe, 0xfc, 0x09, 0xf8, 0x0d, 0x0f, 0xfa,
+ 0xf0, 0x05, 0x07, 0xf2, 0x03, 0xf6, 0xf4, 0x01,
+ 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, 0xe4, 0x11,
+ 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea,
+ 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31,
+ 0x3b, 0xce, 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca,
+ 0x2b, 0xde, 0xdc, 0x29, 0xd8, 0x2d, 0x2f, 0xda,
+ 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, 0xd4, 0x21,
+ 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71,
+ 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a,
+ 0x6b, 0x9e, 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a,
+ 0x90, 0x65, 0x67, 0x92, 0x63, 0x96, 0x94, 0x61,
+ 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, 0x4f, 0xba,
+ 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41,
+ 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51,
+ 0x5b, 0xae, 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa,
+ },
+ {
+ 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8,
+ 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b,
+ 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23,
+ 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0,
+ 0xab, 0x5d, 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53,
+ 0x48, 0xbe, 0xb9, 0x4f, 0xb7, 0x41, 0x46, 0xb0,
+ 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, 0x7e, 0x88,
+ 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b,
+ 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3,
+ 0xa8, 0x5e, 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50,
+ 0x90, 0x66, 0x61, 0x97, 0x6f, 0x99, 0x9e, 0x68,
+ 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, 0x7d, 0x8b,
+ 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18,
+ 0x03, 0xf5, 0xf2, 0x04, 0xfc, 0x0a, 0x0d, 0xfb,
+ 0x3b, 0xcd, 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3,
+ 0xd8, 0x2e, 0x29, 0xdf, 0x27, 0xd1, 0xd6, 0x20,
+ 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, 0x98, 0x6e,
+ 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d,
+ 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5,
+ 0xae, 0x58, 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56,
+ 0x3d, 0xcb, 0xcc, 0x3a, 0xc2, 0x34, 0x33, 0xc5,
+ 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, 0xd0, 0x26,
+ 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e,
+ 0x05, 0xf3, 0xf4, 0x02, 0xfa, 0x0c, 0x0b, 0xfd,
+ 0xdd, 0x2b, 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25,
+ 0x3e, 0xc8, 0xcf, 0x39, 0xc1, 0x37, 0x30, 0xc6,
+ 0x06, 0xf0, 0xf7, 0x01, 0xf9, 0x0f, 0x08, 0xfe,
+ 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d,
+ 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e,
+ 0x95, 0x63, 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d,
+ 0xad, 0x5b, 0x5c, 0xaa, 0x52, 0xa4, 0xa3, 0x55,
+ 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, 0x40, 0xb6,
+ },
+ {
+ 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff,
+ 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14,
+ 0xcb, 0x3c, 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34,
+ 0x20, 0xd7, 0xd3, 0x24, 0xdb, 0x2c, 0x28, 0xdf,
+ 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, 0x83, 0x74,
+ 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f,
+ 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf,
+ 0xab, 0x5c, 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54,
+ 0x0b, 0xfc, 0xf8, 0x0f, 0xf0, 0x07, 0x03, 0xf4,
+ 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, 0xe8, 0x1f,
+ 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f,
+ 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4,
+ 0x80, 0x77, 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f,
+ 0x6b, 0x9c, 0x98, 0x6f, 0x90, 0x67, 0x63, 0x94,
+ 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, 0x43, 0xb4,
+ 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f,
+ 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9,
+ 0xfd, 0x0a, 0x0e, 0xf9, 0x06, 0xf1, 0xf5, 0x02,
+ 0xdd, 0x2a, 0x2e, 0xd9, 0x26, 0xd1, 0xd5, 0x22,
+ 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, 0x3e, 0xc9,
+ 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62,
+ 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89,
+ 0x56, 0xa1, 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9,
+ 0xbd, 0x4a, 0x4e, 0xb9, 0x46, 0xb1, 0xb5, 0x42,
+ 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, 0x15, 0xe2,
+ 0xf6, 0x01, 0x05, 0xf2, 0x0d, 0xfa, 0xfe, 0x09,
+ 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29,
+ 0x3d, 0xca, 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2,
+ 0x96, 0x61, 0x65, 0x92, 0x6d, 0x9a, 0x9e, 0x69,
+ 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, 0x75, 0x82,
+ 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2,
+ 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49,
+ },
+ {
+ 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2,
+ 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41,
+ 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x04, 0x11, 0xe9,
+ 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a,
+ 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4,
+ 0xe5, 0x1d, 0x08, 0xf0, 0x22, 0xda, 0xcf, 0x37,
+ 0x4d, 0xb5, 0xa0, 0x58, 0x8a, 0x72, 0x67, 0x9f,
+ 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, 0xf4, 0x0c,
+ 0xec, 0x14, 0x01, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e,
+ 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad,
+ 0xd7, 0x2f, 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x05,
+ 0x44, 0xbc, 0xa9, 0x51, 0x83, 0x7b, 0x6e, 0x96,
+ 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, 0xb0, 0x48,
+ 0x09, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb,
+ 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73,
+ 0x32, 0xca, 0xdf, 0x27, 0xf5, 0x0d, 0x18, 0xe0,
+ 0xc5, 0x3d, 0x28, 0xd0, 0x02, 0xfa, 0xef, 0x17,
+ 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, 0x7c, 0x84,
+ 0xfe, 0x06, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c,
+ 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf,
+ 0xb3, 0x4b, 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61,
+ 0x20, 0xd8, 0xcd, 0x35, 0xe7, 0x1f, 0x0a, 0xf2,
+ 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, 0xa2, 0x5a,
+ 0x1b, 0xe3, 0xf6, 0x0e, 0xdc, 0x24, 0x31, 0xc9,
+ 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x03, 0xfb,
+ 0xba, 0x42, 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68,
+ 0x12, 0xea, 0xff, 0x07, 0xd5, 0x2d, 0x38, 0xc0,
+ 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, 0xab, 0x53,
+ 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d,
+ 0xcc, 0x34, 0x21, 0xd9, 0x0b, 0xf3, 0xe6, 0x1e,
+ 0x64, 0x9c, 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6,
+ 0xf7, 0x0f, 0x1a, 0xe2, 0x30, 0xc8, 0xdd, 0x25,
+ },
+ {
+ 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5,
+ 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e,
+ 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x07, 0xfe,
+ 0xb0, 0x49, 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65,
+ 0x56, 0xaf, 0xb9, 0x40, 0x95, 0x6c, 0x7a, 0x83,
+ 0xcd, 0x34, 0x22, 0xdb, 0x0e, 0xf7, 0xe1, 0x18,
+ 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8,
+ 0xe6, 0x1f, 0x09, 0xf0, 0x25, 0xdc, 0xca, 0x33,
+ 0xac, 0x55, 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79,
+ 0x37, 0xce, 0xd8, 0x21, 0xf4, 0x0d, 0x1b, 0xe2,
+ 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, 0xab, 0x52,
+ 0x1c, 0xe5, 0xf3, 0x0a, 0xdf, 0x26, 0x30, 0xc9,
+ 0xfa, 0x03, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f,
+ 0x61, 0x98, 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4,
+ 0xd1, 0x28, 0x3e, 0xc7, 0x12, 0xeb, 0xfd, 0x04,
+ 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, 0x66, 0x9f,
+ 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90,
+ 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0x0b,
+ 0x6e, 0x97, 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb,
+ 0xf5, 0x0c, 0x1a, 0xe3, 0x36, 0xcf, 0xd9, 0x20,
+ 0x13, 0xea, 0xfc, 0x05, 0xd0, 0x29, 0x3f, 0xc6,
+ 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d,
+ 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x02, 0x14, 0xed,
+ 0xa3, 0x5a, 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76,
+ 0xe9, 0x10, 0x06, 0xff, 0x2a, 0xd3, 0xc5, 0x3c,
+ 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, 0x5e, 0xa7,
+ 0xc2, 0x3b, 0x2d, 0xd4, 0x01, 0xf8, 0xee, 0x17,
+ 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c,
+ 0xbf, 0x46, 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a,
+ 0x24, 0xdd, 0xcb, 0x32, 0xe7, 0x1e, 0x08, 0xf1,
+ 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, 0xb8, 0x41,
+ 0x0f, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda,
+ },
+ {
+ 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc,
+ 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f,
+ 0x1b, 0xe1, 0xf2, 0x08, 0xd4, 0x2e, 0x3d, 0xc7,
+ 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, 0xbe, 0x44,
+ 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x03, 0x10, 0xea,
+ 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69,
+ 0x2d, 0xd7, 0xc4, 0x3e, 0xe2, 0x18, 0x0b, 0xf1,
+ 0xae, 0x54, 0x47, 0xbd, 0x61, 0x9b, 0x88, 0x72,
+ 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, 0x4a, 0xb0,
+ 0xef, 0x15, 0x06, 0xfc, 0x20, 0xda, 0xc9, 0x33,
+ 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab,
+ 0xf4, 0x0e, 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28,
+ 0x5a, 0xa0, 0xb3, 0x49, 0x95, 0x6f, 0x7c, 0x86,
+ 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, 0xff, 0x05,
+ 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d,
+ 0xc2, 0x38, 0x2b, 0xd1, 0x0d, 0xf7, 0xe4, 0x1e,
+ 0xd8, 0x22, 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x04,
+ 0x5b, 0xa1, 0xb2, 0x48, 0x94, 0x6e, 0x7d, 0x87,
+ 0xc3, 0x39, 0x2a, 0xd0, 0x0c, 0xf6, 0xe5, 0x1f,
+ 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c,
+ 0xee, 0x14, 0x07, 0xfd, 0x21, 0xdb, 0xc8, 0x32,
+ 0x6d, 0x97, 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1,
+ 0xf5, 0x0f, 0x1c, 0xe6, 0x3a, 0xc0, 0xd3, 0x29,
+ 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, 0x50, 0xaa,
+ 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68,
+ 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x02, 0x11, 0xeb,
+ 0xaf, 0x55, 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73,
+ 0x2c, 0xd6, 0xc5, 0x3f, 0xe3, 0x19, 0x0a, 0xf0,
+ 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, 0xa4, 0x5e,
+ 0x01, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd,
+ 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45,
+ 0x1a, 0xe0, 0xf3, 0x09, 0xd5, 0x2f, 0x3c, 0xc6,
+ },
+ {
+ 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb,
+ 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50,
+ 0x0b, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0,
+ 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b,
+ 0x16, 0xed, 0xfd, 0x06, 0xdd, 0x26, 0x36, 0xcd,
+ 0x9d, 0x66, 0x76, 0x8d, 0x56, 0xad, 0xbd, 0x46,
+ 0x1d, 0xe6, 0xf6, 0x0d, 0xd6, 0x2d, 0x3d, 0xc6,
+ 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d,
+ 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0x0c, 0xf7,
+ 0xa7, 0x5c, 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c,
+ 0x27, 0xdc, 0xcc, 0x37, 0xec, 0x17, 0x07, 0xfc,
+ 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, 0x8c, 0x77,
+ 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0x0a, 0x1a, 0xe1,
+ 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a,
+ 0x31, 0xca, 0xda, 0x21, 0xfa, 0x01, 0x11, 0xea,
+ 0xba, 0x41, 0x51, 0xaa, 0x71, 0x8a, 0x9a, 0x61,
+ 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, 0x78, 0x83,
+ 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x08,
+ 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88,
+ 0xd8, 0x23, 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x03,
+ 0x4e, 0xb5, 0xa5, 0x5e, 0x85, 0x7e, 0x6e, 0x95,
+ 0xc5, 0x3e, 0x2e, 0xd5, 0x0e, 0xf5, 0xe5, 0x1e,
+ 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e,
+ 0xce, 0x35, 0x25, 0xde, 0x05, 0xfe, 0xee, 0x15,
+ 0x74, 0x8f, 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf,
+ 0xff, 0x04, 0x14, 0xef, 0x34, 0xcf, 0xdf, 0x24,
+ 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, 0x5f, 0xa4,
+ 0xf4, 0x0f, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f,
+ 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9,
+ 0xe9, 0x12, 0x02, 0xf9, 0x22, 0xd9, 0xc9, 0x32,
+ 0x69, 0x92, 0x82, 0x79, 0xa2, 0x59, 0x49, 0xb2,
+ 0xe2, 0x19, 0x09, 0xf2, 0x29, 0xd2, 0xc2, 0x39,
+ },
+ {
+ 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce,
+ 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d,
+ 0x7b, 0x87, 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5,
+ 0xc8, 0x34, 0x2d, 0xd1, 0x1f, 0xe3, 0xfa, 0x06,
+ 0xf6, 0x0a, 0x13, 0xef, 0x21, 0xdd, 0xc4, 0x38,
+ 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b,
+ 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43,
+ 0x3e, 0xc2, 0xdb, 0x27, 0xe9, 0x15, 0x0c, 0xf0,
+ 0xf1, 0x0d, 0x14, 0xe8, 0x26, 0xda, 0xc3, 0x3f,
+ 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, 0x70, 0x8c,
+ 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44,
+ 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0x0b, 0xf7,
+ 0x07, 0xfb, 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9,
+ 0xb4, 0x48, 0x51, 0xad, 0x63, 0x9f, 0x86, 0x7a,
+ 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, 0x4e, 0xb2,
+ 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x01,
+ 0xff, 0x03, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31,
+ 0x4c, 0xb0, 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82,
+ 0x84, 0x78, 0x61, 0x9d, 0x53, 0xaf, 0xb6, 0x4a,
+ 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, 0x05, 0xf9,
+ 0x09, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7,
+ 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74,
+ 0x72, 0x8e, 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc,
+ 0xc1, 0x3d, 0x24, 0xd8, 0x16, 0xea, 0xf3, 0x0f,
+ 0x0e, 0xf2, 0xeb, 0x17, 0xd9, 0x25, 0x3c, 0xc0,
+ 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73,
+ 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb,
+ 0xc6, 0x3a, 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x08,
+ 0xf8, 0x04, 0x1d, 0xe1, 0x2f, 0xd3, 0xca, 0x36,
+ 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, 0x79, 0x85,
+ 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d,
+ 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x02, 0xfe,
+ },
+ {
+ 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9,
+ 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72,
+ 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, 0x5f, 0xa2,
+ 0xd0, 0x2d, 0x37, 0xca, 0x03, 0xfe, 0xe4, 0x19,
+ 0xd6, 0x2b, 0x31, 0xcc, 0x05, 0xf8, 0xe2, 0x1f,
+ 0x6d, 0x90, 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4,
+ 0xbd, 0x40, 0x5a, 0xa7, 0x6e, 0x93, 0x89, 0x74,
+ 0x06, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, 0x32, 0xcf,
+ 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78,
+ 0x0a, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3,
+ 0xda, 0x27, 0x3d, 0xc0, 0x09, 0xf4, 0xee, 0x13,
+ 0x61, 0x9c, 0x86, 0x7b, 0xb2, 0x4f, 0x55, 0xa8,
+ 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, 0x53, 0xae,
+ 0xdc, 0x21, 0x3b, 0xc6, 0x0f, 0xf2, 0xe8, 0x15,
+ 0x0c, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5,
+ 0xb7, 0x4a, 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e,
+ 0x7f, 0x82, 0x98, 0x65, 0xac, 0x51, 0x4b, 0xb6,
+ 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, 0xf0, 0x0d,
+ 0x14, 0xe9, 0xf3, 0x0e, 0xc7, 0x3a, 0x20, 0xdd,
+ 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66,
+ 0xa9, 0x54, 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60,
+ 0x12, 0xef, 0xf5, 0x08, 0xc1, 0x3c, 0x26, 0xdb,
+ 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, 0xf6, 0x0b,
+ 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0,
+ 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x07,
+ 0x75, 0x88, 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc,
+ 0xa5, 0x58, 0x42, 0xbf, 0x76, 0x8b, 0x91, 0x6c,
+ 0x1e, 0xe3, 0xf9, 0x04, 0xcd, 0x30, 0x2a, 0xd7,
+ 0x18, 0xe5, 0xff, 0x02, 0xcb, 0x36, 0x2c, 0xd1,
+ 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a,
+ 0x73, 0x8e, 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba,
+ 0xc8, 0x35, 0x2f, 0xd2, 0x1b, 0xe6, 0xfc, 0x01,
+ },
+ {
+ 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0,
+ 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63,
+ 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b,
+ 0xf8, 0x06, 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38,
+ 0xb6, 0x48, 0x57, 0xa9, 0x69, 0x97, 0x88, 0x76,
+ 0x15, 0xeb, 0xf4, 0x0a, 0xca, 0x34, 0x2b, 0xd5,
+ 0xed, 0x13, 0x0c, 0xf2, 0x32, 0xcc, 0xd3, 0x2d,
+ 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e,
+ 0x71, 0x8f, 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1,
+ 0xd2, 0x2c, 0x33, 0xcd, 0x0d, 0xf3, 0xec, 0x12,
+ 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0x0b, 0x14, 0xea,
+ 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49,
+ 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x07,
+ 0x64, 0x9a, 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4,
+ 0x9c, 0x62, 0x7d, 0x83, 0x43, 0xbd, 0xa2, 0x5c,
+ 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, 0x01, 0xff,
+ 0xe2, 0x1c, 0x03, 0xfd, 0x3d, 0xc3, 0xdc, 0x22,
+ 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81,
+ 0xb9, 0x47, 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79,
+ 0x1a, 0xe4, 0xfb, 0x05, 0xc5, 0x3b, 0x24, 0xda,
+ 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, 0x6a, 0x94,
+ 0xf7, 0x09, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37,
+ 0x0f, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf,
+ 0xac, 0x52, 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c,
+ 0x93, 0x6d, 0x72, 0x8c, 0x4c, 0xb2, 0xad, 0x53,
+ 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, 0x0e, 0xf0,
+ 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x08,
+ 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab,
+ 0x25, 0xdb, 0xc4, 0x3a, 0xfa, 0x04, 0x1b, 0xe5,
+ 0x86, 0x78, 0x67, 0x99, 0x59, 0xa7, 0xb8, 0x46,
+ 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, 0x40, 0xbe,
+ 0xdd, 0x23, 0x3c, 0xc2, 0x02, 0xfc, 0xe3, 0x1d,
+ },
+ {
+ 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7,
+ 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c,
+ 0x4b, 0xb4, 0xa8, 0x57, 0x90, 0x6f, 0x73, 0x8c,
+ 0xe0, 0x1f, 0x03, 0xfc, 0x3b, 0xc4, 0xd8, 0x27,
+ 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51,
+ 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x05, 0xfa,
+ 0xdd, 0x22, 0x3e, 0xc1, 0x06, 0xf9, 0xe5, 0x1a,
+ 0x76, 0x89, 0x95, 0x6a, 0xad, 0x52, 0x4e, 0xb1,
+ 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, 0x09, 0xf6,
+ 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d,
+ 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd,
+ 0xd1, 0x2e, 0x32, 0xcd, 0x0a, 0xf5, 0xe9, 0x16,
+ 0xa7, 0x58, 0x44, 0xbb, 0x7c, 0x83, 0x9f, 0x60,
+ 0x0c, 0xf3, 0xef, 0x10, 0xd7, 0x28, 0x34, 0xcb,
+ 0xec, 0x13, 0x0f, 0xf0, 0x37, 0xc8, 0xd4, 0x2b,
+ 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80,
+ 0x62, 0x9d, 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5,
+ 0xc9, 0x36, 0x2a, 0xd5, 0x12, 0xed, 0xf1, 0x0e,
+ 0x29, 0xd6, 0xca, 0x35, 0xf2, 0x0d, 0x11, 0xee,
+ 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45,
+ 0xf4, 0x0b, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33,
+ 0x5f, 0xa0, 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98,
+ 0xbf, 0x40, 0x5c, 0xa3, 0x64, 0x9b, 0x87, 0x78,
+ 0x14, 0xeb, 0xf7, 0x08, 0xcf, 0x30, 0x2c, 0xd3,
+ 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94,
+ 0xf8, 0x07, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f,
+ 0x18, 0xe7, 0xfb, 0x04, 0xc3, 0x3c, 0x20, 0xdf,
+ 0xb3, 0x4c, 0x50, 0xaf, 0x68, 0x97, 0x8b, 0x74,
+ 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, 0xfd, 0x02,
+ 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9,
+ 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49,
+ 0x25, 0xda, 0xc6, 0x39, 0xfe, 0x01, 0x1d, 0xe2,
+ },
+};
+
+const uint8_t __aligned(256) raid_gfexp[256] =
+{
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01,
+};
+
+const uint8_t __aligned(256) raid_gfinv[256] =
+{
+ /* note that the first element is not significative */
+ 0x00, 0x01, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba,
+ 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96,
+ 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66,
+ 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a,
+ 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a,
+ 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e,
+ 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e,
+ 0x50, 0x22, 0xcf, 0xa9, 0xab, 0x0c, 0x15, 0xe1,
+ 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x04,
+ 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93,
+ 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61,
+ 0x13, 0xc1, 0xcb, 0x63, 0x97, 0x0e, 0x37, 0x41,
+ 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d,
+ 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32,
+ 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79,
+ 0xdb, 0x77, 0x06, 0xbb, 0x84, 0xcd, 0xfe, 0xfc,
+ 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0,
+ 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x02, 0xf5,
+ 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0x0f, 0x5c,
+ 0x0b, 0xdc, 0xbd, 0x94, 0xac, 0x09, 0xc7, 0xa2,
+ 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x05,
+ 0xce, 0x3b, 0x0d, 0x3c, 0x9c, 0x08, 0xbe, 0xb7,
+ 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf,
+ 0xc5, 0x64, 0x07, 0x7b, 0x95, 0x9a, 0xae, 0xb6,
+ 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e,
+ 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a,
+ 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6,
+ 0x10, 0x73, 0x76, 0x78, 0x99, 0x0a, 0x19, 0x91,
+ 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1,
+ 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a,
+ 0xe3, 0xe7, 0xb5, 0xea, 0x03, 0x8f, 0xd3, 0xc9,
+ 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd,
+};
+
+/**
+ * Power matrix used to generate parity.
+ * This matrix is valid for up to 3 parity with 251 data disks.
+ *
+ * 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ * 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75 ea c9 8f 03 06 0c 18 30 60 c0 9d 27 4e 9c 25 4a 94 35 6a d4 b5 77 ee c1 9f 23 46 8c 05 0a 14 28 50 a0 5d ba 69 d2 b9 6f de a1 5f be 61 c2 99 2f 5e bc 65 ca 89 0f 1e 3c 78 f0 fd e7 d3 bb 6b d6 b1 7f fe e1 df a3 5b b6 71 e2 d9 af 43 86 11 22 44 88 0d 1a 34 68 d0 bd 67 ce 81 1f 3e 7c f8 ed c7 93 3b 76 ec c5 97 33 66 cc 85 17 2e 5c b8 6d da a9 4f 9e 21 42 84 15 2a 54 a8 4d 9a 29 52 a4 55 aa 49 92 39 72 e4 d5 b7 73 e6 d1 bf 63 c6 91 3f 7e fc e5 d7 b3 7b f6 f1 ff e3 db ab 4b 96 31 62 c4 95 37 6e dc a5 57 ae 41 82 19 32 64 c8 8d 07 0e 1c 38 70 e0 dd a7 53 a6 51 a2 59 b2 79 f2 f9 ef c3 9b 2b 56 ac 45 8a 09 12 24 48 90 3d 7a f4 f5 f7 f3 fb eb cb 8b 0b 16 2c 58 b0 7d fa e9 cf 83 1b 36 6c
+ * 01 8e 47 ad d8 6c 36 1b 83 cf e9 fa 7d b0 58 2c 16 0b 8b cb eb fb f3 f7 f5 f4 7a 3d 90 48 24 12 09 8a 45 ac 56 2b 9b c3 ef f9 f2 79 b2 59 a2 51 a6 53 a7 dd e0 70 38 1c 0e 07 8d c8 64 32 19 82 41 ae 57 a5 dc 6e 37 95 c4 62 31 96 4b ab db e3 ff f1 f6 7b b3 d7 e5 fc 7e 3f 91 c6 63 bf d1 e6 73 b7 d5 e4 72 39 92 49 aa 55 a4 52 29 9a 4d a8 54 2a 15 84 42 21 9e 4f a9 da 6d b8 5c 2e 17 85 cc 66 33 97 c5 ec 76 3b 93 c7 ed f8 7c 3e 1f 81 ce 67 bd d0 68 34 1a 0d 88 44 22 11 86 43 af d9 e2 71 b6 5b a3 df e1 fe 7f b1 d6 6b bb d3 e7 fd f0 78 3c 1e 0f 89 ca 65 bc 5e 2f 99 c2 61 be 5f a1 de 6f b9 d2 69 ba 5d a0 50 28 14 0a 05 8c 46 23 9f c1 ee 77 b5 d4 6a 35 94 4a 25 9c 4e 27 9d c0 60 30 18 0c 06 03 8f c9 ea 75 b4 5a 2d 98 4c 26 13 87 cd e8 74 3a 1d 80 40 20
+ */
+const uint8_t __aligned(256) raid_gfvandermonde[3][256] =
+{
+ {
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01,
+ },
+ {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c,
+ },
+ {
+ 0x01, 0x8e, 0x47, 0xad, 0xd8, 0x6c, 0x36, 0x1b,
+ 0x83, 0xcf, 0xe9, 0xfa, 0x7d, 0xb0, 0x58, 0x2c,
+ 0x16, 0x0b, 0x8b, 0xcb, 0xeb, 0xfb, 0xf3, 0xf7,
+ 0xf5, 0xf4, 0x7a, 0x3d, 0x90, 0x48, 0x24, 0x12,
+ 0x09, 0x8a, 0x45, 0xac, 0x56, 0x2b, 0x9b, 0xc3,
+ 0xef, 0xf9, 0xf2, 0x79, 0xb2, 0x59, 0xa2, 0x51,
+ 0xa6, 0x53, 0xa7, 0xdd, 0xe0, 0x70, 0x38, 0x1c,
+ 0x0e, 0x07, 0x8d, 0xc8, 0x64, 0x32, 0x19, 0x82,
+ 0x41, 0xae, 0x57, 0xa5, 0xdc, 0x6e, 0x37, 0x95,
+ 0xc4, 0x62, 0x31, 0x96, 0x4b, 0xab, 0xdb, 0xe3,
+ 0xff, 0xf1, 0xf6, 0x7b, 0xb3, 0xd7, 0xe5, 0xfc,
+ 0x7e, 0x3f, 0x91, 0xc6, 0x63, 0xbf, 0xd1, 0xe6,
+ 0x73, 0xb7, 0xd5, 0xe4, 0x72, 0x39, 0x92, 0x49,
+ 0xaa, 0x55, 0xa4, 0x52, 0x29, 0x9a, 0x4d, 0xa8,
+ 0x54, 0x2a, 0x15, 0x84, 0x42, 0x21, 0x9e, 0x4f,
+ 0xa9, 0xda, 0x6d, 0xb8, 0x5c, 0x2e, 0x17, 0x85,
+ 0xcc, 0x66, 0x33, 0x97, 0xc5, 0xec, 0x76, 0x3b,
+ 0x93, 0xc7, 0xed, 0xf8, 0x7c, 0x3e, 0x1f, 0x81,
+ 0xce, 0x67, 0xbd, 0xd0, 0x68, 0x34, 0x1a, 0x0d,
+ 0x88, 0x44, 0x22, 0x11, 0x86, 0x43, 0xaf, 0xd9,
+ 0xe2, 0x71, 0xb6, 0x5b, 0xa3, 0xdf, 0xe1, 0xfe,
+ 0x7f, 0xb1, 0xd6, 0x6b, 0xbb, 0xd3, 0xe7, 0xfd,
+ 0xf0, 0x78, 0x3c, 0x1e, 0x0f, 0x89, 0xca, 0x65,
+ 0xbc, 0x5e, 0x2f, 0x99, 0xc2, 0x61, 0xbe, 0x5f,
+ 0xa1, 0xde, 0x6f, 0xb9, 0xd2, 0x69, 0xba, 0x5d,
+ 0xa0, 0x50, 0x28, 0x14, 0x0a, 0x05, 0x8c, 0x46,
+ 0x23, 0x9f, 0xc1, 0xee, 0x77, 0xb5, 0xd4, 0x6a,
+ 0x35, 0x94, 0x4a, 0x25, 0x9c, 0x4e, 0x27, 0x9d,
+ 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x8f,
+ 0xc9, 0xea, 0x75, 0xb4, 0x5a, 0x2d, 0x98, 0x4c,
+ 0x26, 0x13, 0x87, 0xcd, 0xe8, 0x74, 0x3a, 0x1d,
+ 0x80, 0x40, 0x20,
+ },
+};
+
+/**
+ * Cauchy matrix used to generate parity.
+ * This matrix is valid for up to 6 parity with 251 data disks.
+ *
+ * 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01
+ * 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 4c 98 2d 5a b4 75 ea c9 8f 03 06 0c 18 30 60 c0 9d 27 4e 9c 25 4a 94 35 6a d4 b5 77 ee c1 9f 23 46 8c 05 0a 14 28 50 a0 5d ba 69 d2 b9 6f de a1 5f be 61 c2 99 2f 5e bc 65 ca 89 0f 1e 3c 78 f0 fd e7 d3 bb 6b d6 b1 7f fe e1 df a3 5b b6 71 e2 d9 af 43 86 11 22 44 88 0d 1a 34 68 d0 bd 67 ce 81 1f 3e 7c f8 ed c7 93 3b 76 ec c5 97 33 66 cc 85 17 2e 5c b8 6d da a9 4f 9e 21 42 84 15 2a 54 a8 4d 9a 29 52 a4 55 aa 49 92 39 72 e4 d5 b7 73 e6 d1 bf 63 c6 91 3f 7e fc e5 d7 b3 7b f6 f1 ff e3 db ab 4b 96 31 62 c4 95 37 6e dc a5 57 ae 41 82 19 32 64 c8 8d 07 0e 1c 38 70 e0 dd a7 53 a6 51 a2 59 b2 79 f2 f9 ef c3 9b 2b 56 ac 45 8a 09 12 24 48 90 3d 7a f4 f5 f7 f3 fb eb cb 8b 0b 16 2c 58 b0 7d fa e9 cf 83 1b 36 6c
+ * 01 f5 d2 c4 9a 71 f1 7f fc 87 c1 c6 19 2f 40 55 3d ba 53 04 9c 61 34 8c 46 68 70 3e cc 7d 74 75 b5 db 0c df 9e 6d 79 eb 63 9f 38 d0 94 a5 24 89 5c 65 5b ae 37 33 4c dd 47 f4 02 a6 39 d8 9d 2d 62 b9 2e 0f 2b 60 58 e4 f8 6c 72 b0 85 4d 95 41 1c 23 05 99 32 c5 0e 82 91 14 d1 af f9 b3 07 97 6e 0b 67 3b 78 e6 28 22 4f a3 ca 48 de 1d a8 17 6f 90 aa 31 5a f3 e9 a9 44 30 56 09 59 6a 42 cd e5 d6 86 d9 bf cb 26 66 7c d5 be 25 1f e0 98 27 92 51 c7 45 2c c0 ad a7 69 f7 b4 e8 84 e1 18 88 3c 76 20 5e 9b 1e 0d 81 4a bd 16 8a ac 93 ce 1a c2 0a 3f fd e3 77 6b d7 ef a4 80 a1 36 ed a2 12 57 b6 29 8d 7b c8 52 c3 bc b8 21 d4 ea d3 06 ab 2a 1b 5f b7 10 ec 64 f6 e2 11 50 83 54 3a fa fb f2 43 b1 ff e7 c9 03 bb ee 13 8b dc 35 b2 da cf a0 96 49 4e 08 73 f0 7e fe 15 4b
+ * 01 bb a6 d7 c7 07 ce 82 4a 2f a5 9b b6 60 f1 ad e7 f4 06 d2 df 2e ca 65 5c 48 21 aa cd 4e c1 61 38 0a 3e d1 d5 cb 10 dc 5e 24 b8 de 79 36 43 72 d9 f8 f9 a2 a4 6a 3d ea 8e 03 f5 ab b4 5d b5 53 6b 39 86 b0 50 74 96 84 5a 4b e8 49 e5 51 ef 12 bc 89 5b 2b 29 09 c3 57 1e 37 76 0b 64 8a 52 59 80 da a8 44 95 3c 33 e6 7c af 6c b1 9d fc 92 d6 d8 ff a7 77 04 13 73 66 28 7d 83 fb 5f 63 25 19 bd c5 3b 6e 20 35 55 42 31 e1 b9 9e 90 d4 ba db f7 2a e9 3a a0 75 7a d3 02 ee 9c c6 1f 14 cc 22 4d 30 71 58 11 85 4f 6f 6d 1d cf fa 54 a9 17 a3 0f ae 0d 1c c2 d0 32 16 f6 c0 7f 2d 15 f3 1b f2 ed b3 45 c8 ac 7b 2c e2 e4 bf be 9f 34 05 70 3f 98 fe 62 18 9a 56 8d 93 97 78 4c 7e 27 87 08 8b ec 67 0e 1a 23 8c 68 99 94 40 b2 a1 eb b7 26 f0 dd e3 69 0c c4 88 41 81 91 e0 fd
+ * 01 97 7f 9c 7c 18 bd a2 58 1a da 74 70 a3 e5 47 29 07 f5 80 23 e9 fa 46 54 a0 99 95 53 9b 0b c7 09 c0 78 89 92 e3 0d b0 2a 8c fb 17 3f 26 65 87 27 5c 66 61 79 4d 32 b3 8d 52 e2 82 3d f9 c5 02 bc 4c 73 48 62 af ba 41 d9 c4 2f b1 33 b8 15 7d cf 3a a9 5f 84 6d 34 1b 44 94 72 81 42 be cc 4b 0a 6f 5a 22 36 b5 3c 9d 13 7e 08 dd d6 5e 04 fc 5b ec ef f1 6e 1e 77 24 e6 c6 aa cb fd 51 67 06 6a 4a 88 db b2 c2 5d 43 40 f7 50 a8 f2 7a 71 a4 d2 bf 31 90 19 9a 8e f6 c3 a6 e7 60 12 ee 2d de 38 e8 b7 98 c1 28 f3 05 96 63 d1 b9 14 9f 1d 83 68 75 ed 16 03 ce e4 df e0 10 ae 69 55 91 2e 4e fe 21 1f 9e e1 d5 cd ca f0 8b 2b c9 8a 93 bb 57 20 86 1c a1 4f 3e 25 d4 6c a5 6b a7 37 ff 39 35 0c f8 ea 56 45 8f 2c 59 ab 85 eb 49 0f dc d8 76 b6 f4 0e 11 b4 d0 30 d3 3b ad d7
+ * 01 2b 3f cf 73 2c d6 ed cb 74 15 78 8a c1 17 c9 89 68 21 ab 76 3b 4b 5a 6e 0e b9 d3 b6 3e 36 86 bf a2 a7 30 14 eb c7 2d 96 67 20 b5 9a e0 a8 c6 80 04 8d fe 75 5e 23 ca 8f 48 99 0d df 8e b8 70 29 9c 44 69 3d a5 c2 90 d2 1c 9b 02 1d 98 93 ec 84 e8 64 4c 3a 8b 97 f3 e5 c0 7d 26 c8 08 a0 62 82 55 f7 33 f6 51 63 4d 77 da fd c3 38 6d ee 09 47 a3 05 de a6 f1 22 25 6a 0c 81 b2 6b 58 d5 b3 fc fb 28 7f 07 dc 7a 9e d0 37 b4 e1 1a 24 03 ae 94 ba 88 2f ea 2e 8c 5b bb 79 d1 11 ff a4 19 3c 2a 4e 52 e3 95 bd 31 5d 35 4a 41 c4 db 42 c5 0b 49 1b 7c e4 b0 9d 45 f0 a9 61 57 06 d4 40 91 56 13 fa 87 ac 27 54 dd 59 1f 71 39 43 6c f9 be 4f f4 1e 32 cd e9 7e 7b 66 5f ef e7 6f 0a 60 d7 b7 83 92 e2 af 72 f8 b1 50 10 ce 18 53 a1 cc ad 12 34 0f f5 aa 16 e6 f2 d8 85 9f bc
+ */
+const uint8_t __aligned(256) raid_gfcauchy[6][256] =
+{
+ {
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01,
+ },
+ {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c,
+ },
+ {
+ 0x01, 0xf5, 0xd2, 0xc4, 0x9a, 0x71, 0xf1, 0x7f,
+ 0xfc, 0x87, 0xc1, 0xc6, 0x19, 0x2f, 0x40, 0x55,
+ 0x3d, 0xba, 0x53, 0x04, 0x9c, 0x61, 0x34, 0x8c,
+ 0x46, 0x68, 0x70, 0x3e, 0xcc, 0x7d, 0x74, 0x75,
+ 0xb5, 0xdb, 0x0c, 0xdf, 0x9e, 0x6d, 0x79, 0xeb,
+ 0x63, 0x9f, 0x38, 0xd0, 0x94, 0xa5, 0x24, 0x89,
+ 0x5c, 0x65, 0x5b, 0xae, 0x37, 0x33, 0x4c, 0xdd,
+ 0x47, 0xf4, 0x02, 0xa6, 0x39, 0xd8, 0x9d, 0x2d,
+ 0x62, 0xb9, 0x2e, 0x0f, 0x2b, 0x60, 0x58, 0xe4,
+ 0xf8, 0x6c, 0x72, 0xb0, 0x85, 0x4d, 0x95, 0x41,
+ 0x1c, 0x23, 0x05, 0x99, 0x32, 0xc5, 0x0e, 0x82,
+ 0x91, 0x14, 0xd1, 0xaf, 0xf9, 0xb3, 0x07, 0x97,
+ 0x6e, 0x0b, 0x67, 0x3b, 0x78, 0xe6, 0x28, 0x22,
+ 0x4f, 0xa3, 0xca, 0x48, 0xde, 0x1d, 0xa8, 0x17,
+ 0x6f, 0x90, 0xaa, 0x31, 0x5a, 0xf3, 0xe9, 0xa9,
+ 0x44, 0x30, 0x56, 0x09, 0x59, 0x6a, 0x42, 0xcd,
+ 0xe5, 0xd6, 0x86, 0xd9, 0xbf, 0xcb, 0x26, 0x66,
+ 0x7c, 0xd5, 0xbe, 0x25, 0x1f, 0xe0, 0x98, 0x27,
+ 0x92, 0x51, 0xc7, 0x45, 0x2c, 0xc0, 0xad, 0xa7,
+ 0x69, 0xf7, 0xb4, 0xe8, 0x84, 0xe1, 0x18, 0x88,
+ 0x3c, 0x76, 0x20, 0x5e, 0x9b, 0x1e, 0x0d, 0x81,
+ 0x4a, 0xbd, 0x16, 0x8a, 0xac, 0x93, 0xce, 0x1a,
+ 0xc2, 0x0a, 0x3f, 0xfd, 0xe3, 0x77, 0x6b, 0xd7,
+ 0xef, 0xa4, 0x80, 0xa1, 0x36, 0xed, 0xa2, 0x12,
+ 0x57, 0xb6, 0x29, 0x8d, 0x7b, 0xc8, 0x52, 0xc3,
+ 0xbc, 0xb8, 0x21, 0xd4, 0xea, 0xd3, 0x06, 0xab,
+ 0x2a, 0x1b, 0x5f, 0xb7, 0x10, 0xec, 0x64, 0xf6,
+ 0xe2, 0x11, 0x50, 0x83, 0x54, 0x3a, 0xfa, 0xfb,
+ 0xf2, 0x43, 0xb1, 0xff, 0xe7, 0xc9, 0x03, 0xbb,
+ 0xee, 0x13, 0x8b, 0xdc, 0x35, 0xb2, 0xda, 0xcf,
+ 0xa0, 0x96, 0x49, 0x4e, 0x08, 0x73, 0xf0, 0x7e,
+ 0xfe, 0x15, 0x4b,
+ },
+ {
+ 0x01, 0xbb, 0xa6, 0xd7, 0xc7, 0x07, 0xce, 0x82,
+ 0x4a, 0x2f, 0xa5, 0x9b, 0xb6, 0x60, 0xf1, 0xad,
+ 0xe7, 0xf4, 0x06, 0xd2, 0xdf, 0x2e, 0xca, 0x65,
+ 0x5c, 0x48, 0x21, 0xaa, 0xcd, 0x4e, 0xc1, 0x61,
+ 0x38, 0x0a, 0x3e, 0xd1, 0xd5, 0xcb, 0x10, 0xdc,
+ 0x5e, 0x24, 0xb8, 0xde, 0x79, 0x36, 0x43, 0x72,
+ 0xd9, 0xf8, 0xf9, 0xa2, 0xa4, 0x6a, 0x3d, 0xea,
+ 0x8e, 0x03, 0xf5, 0xab, 0xb4, 0x5d, 0xb5, 0x53,
+ 0x6b, 0x39, 0x86, 0xb0, 0x50, 0x74, 0x96, 0x84,
+ 0x5a, 0x4b, 0xe8, 0x49, 0xe5, 0x51, 0xef, 0x12,
+ 0xbc, 0x89, 0x5b, 0x2b, 0x29, 0x09, 0xc3, 0x57,
+ 0x1e, 0x37, 0x76, 0x0b, 0x64, 0x8a, 0x52, 0x59,
+ 0x80, 0xda, 0xa8, 0x44, 0x95, 0x3c, 0x33, 0xe6,
+ 0x7c, 0xaf, 0x6c, 0xb1, 0x9d, 0xfc, 0x92, 0xd6,
+ 0xd8, 0xff, 0xa7, 0x77, 0x04, 0x13, 0x73, 0x66,
+ 0x28, 0x7d, 0x83, 0xfb, 0x5f, 0x63, 0x25, 0x19,
+ 0xbd, 0xc5, 0x3b, 0x6e, 0x20, 0x35, 0x55, 0x42,
+ 0x31, 0xe1, 0xb9, 0x9e, 0x90, 0xd4, 0xba, 0xdb,
+ 0xf7, 0x2a, 0xe9, 0x3a, 0xa0, 0x75, 0x7a, 0xd3,
+ 0x02, 0xee, 0x9c, 0xc6, 0x1f, 0x14, 0xcc, 0x22,
+ 0x4d, 0x30, 0x71, 0x58, 0x11, 0x85, 0x4f, 0x6f,
+ 0x6d, 0x1d, 0xcf, 0xfa, 0x54, 0xa9, 0x17, 0xa3,
+ 0x0f, 0xae, 0x0d, 0x1c, 0xc2, 0xd0, 0x32, 0x16,
+ 0xf6, 0xc0, 0x7f, 0x2d, 0x15, 0xf3, 0x1b, 0xf2,
+ 0xed, 0xb3, 0x45, 0xc8, 0xac, 0x7b, 0x2c, 0xe2,
+ 0xe4, 0xbf, 0xbe, 0x9f, 0x34, 0x05, 0x70, 0x3f,
+ 0x98, 0xfe, 0x62, 0x18, 0x9a, 0x56, 0x8d, 0x93,
+ 0x97, 0x78, 0x4c, 0x7e, 0x27, 0x87, 0x08, 0x8b,
+ 0xec, 0x67, 0x0e, 0x1a, 0x23, 0x8c, 0x68, 0x99,
+ 0x94, 0x40, 0xb2, 0xa1, 0xeb, 0xb7, 0x26, 0xf0,
+ 0xdd, 0xe3, 0x69, 0x0c, 0xc4, 0x88, 0x41, 0x81,
+ 0x91, 0xe0, 0xfd,
+ },
+ {
+ 0x01, 0x97, 0x7f, 0x9c, 0x7c, 0x18, 0xbd, 0xa2,
+ 0x58, 0x1a, 0xda, 0x74, 0x70, 0xa3, 0xe5, 0x47,
+ 0x29, 0x07, 0xf5, 0x80, 0x23, 0xe9, 0xfa, 0x46,
+ 0x54, 0xa0, 0x99, 0x95, 0x53, 0x9b, 0x0b, 0xc7,
+ 0x09, 0xc0, 0x78, 0x89, 0x92, 0xe3, 0x0d, 0xb0,
+ 0x2a, 0x8c, 0xfb, 0x17, 0x3f, 0x26, 0x65, 0x87,
+ 0x27, 0x5c, 0x66, 0x61, 0x79, 0x4d, 0x32, 0xb3,
+ 0x8d, 0x52, 0xe2, 0x82, 0x3d, 0xf9, 0xc5, 0x02,
+ 0xbc, 0x4c, 0x73, 0x48, 0x62, 0xaf, 0xba, 0x41,
+ 0xd9, 0xc4, 0x2f, 0xb1, 0x33, 0xb8, 0x15, 0x7d,
+ 0xcf, 0x3a, 0xa9, 0x5f, 0x84, 0x6d, 0x34, 0x1b,
+ 0x44, 0x94, 0x72, 0x81, 0x42, 0xbe, 0xcc, 0x4b,
+ 0x0a, 0x6f, 0x5a, 0x22, 0x36, 0xb5, 0x3c, 0x9d,
+ 0x13, 0x7e, 0x08, 0xdd, 0xd6, 0x5e, 0x04, 0xfc,
+ 0x5b, 0xec, 0xef, 0xf1, 0x6e, 0x1e, 0x77, 0x24,
+ 0xe6, 0xc6, 0xaa, 0xcb, 0xfd, 0x51, 0x67, 0x06,
+ 0x6a, 0x4a, 0x88, 0xdb, 0xb2, 0xc2, 0x5d, 0x43,
+ 0x40, 0xf7, 0x50, 0xa8, 0xf2, 0x7a, 0x71, 0xa4,
+ 0xd2, 0xbf, 0x31, 0x90, 0x19, 0x9a, 0x8e, 0xf6,
+ 0xc3, 0xa6, 0xe7, 0x60, 0x12, 0xee, 0x2d, 0xde,
+ 0x38, 0xe8, 0xb7, 0x98, 0xc1, 0x28, 0xf3, 0x05,
+ 0x96, 0x63, 0xd1, 0xb9, 0x14, 0x9f, 0x1d, 0x83,
+ 0x68, 0x75, 0xed, 0x16, 0x03, 0xce, 0xe4, 0xdf,
+ 0xe0, 0x10, 0xae, 0x69, 0x55, 0x91, 0x2e, 0x4e,
+ 0xfe, 0x21, 0x1f, 0x9e, 0xe1, 0xd5, 0xcd, 0xca,
+ 0xf0, 0x8b, 0x2b, 0xc9, 0x8a, 0x93, 0xbb, 0x57,
+ 0x20, 0x86, 0x1c, 0xa1, 0x4f, 0x3e, 0x25, 0xd4,
+ 0x6c, 0xa5, 0x6b, 0xa7, 0x37, 0xff, 0x39, 0x35,
+ 0x0c, 0xf8, 0xea, 0x56, 0x45, 0x8f, 0x2c, 0x59,
+ 0xab, 0x85, 0xeb, 0x49, 0x0f, 0xdc, 0xd8, 0x76,
+ 0xb6, 0xf4, 0x0e, 0x11, 0xb4, 0xd0, 0x30, 0xd3,
+ 0x3b, 0xad, 0xd7,
+ },
+ {
+ 0x01, 0x2b, 0x3f, 0xcf, 0x73, 0x2c, 0xd6, 0xed,
+ 0xcb, 0x74, 0x15, 0x78, 0x8a, 0xc1, 0x17, 0xc9,
+ 0x89, 0x68, 0x21, 0xab, 0x76, 0x3b, 0x4b, 0x5a,
+ 0x6e, 0x0e, 0xb9, 0xd3, 0xb6, 0x3e, 0x36, 0x86,
+ 0xbf, 0xa2, 0xa7, 0x30, 0x14, 0xeb, 0xc7, 0x2d,
+ 0x96, 0x67, 0x20, 0xb5, 0x9a, 0xe0, 0xa8, 0xc6,
+ 0x80, 0x04, 0x8d, 0xfe, 0x75, 0x5e, 0x23, 0xca,
+ 0x8f, 0x48, 0x99, 0x0d, 0xdf, 0x8e, 0xb8, 0x70,
+ 0x29, 0x9c, 0x44, 0x69, 0x3d, 0xa5, 0xc2, 0x90,
+ 0xd2, 0x1c, 0x9b, 0x02, 0x1d, 0x98, 0x93, 0xec,
+ 0x84, 0xe8, 0x64, 0x4c, 0x3a, 0x8b, 0x97, 0xf3,
+ 0xe5, 0xc0, 0x7d, 0x26, 0xc8, 0x08, 0xa0, 0x62,
+ 0x82, 0x55, 0xf7, 0x33, 0xf6, 0x51, 0x63, 0x4d,
+ 0x77, 0xda, 0xfd, 0xc3, 0x38, 0x6d, 0xee, 0x09,
+ 0x47, 0xa3, 0x05, 0xde, 0xa6, 0xf1, 0x22, 0x25,
+ 0x6a, 0x0c, 0x81, 0xb2, 0x6b, 0x58, 0xd5, 0xb3,
+ 0xfc, 0xfb, 0x28, 0x7f, 0x07, 0xdc, 0x7a, 0x9e,
+ 0xd0, 0x37, 0xb4, 0xe1, 0x1a, 0x24, 0x03, 0xae,
+ 0x94, 0xba, 0x88, 0x2f, 0xea, 0x2e, 0x8c, 0x5b,
+ 0xbb, 0x79, 0xd1, 0x11, 0xff, 0xa4, 0x19, 0x3c,
+ 0x2a, 0x4e, 0x52, 0xe3, 0x95, 0xbd, 0x31, 0x5d,
+ 0x35, 0x4a, 0x41, 0xc4, 0xdb, 0x42, 0xc5, 0x0b,
+ 0x49, 0x1b, 0x7c, 0xe4, 0xb0, 0x9d, 0x45, 0xf0,
+ 0xa9, 0x61, 0x57, 0x06, 0xd4, 0x40, 0x91, 0x56,
+ 0x13, 0xfa, 0x87, 0xac, 0x27, 0x54, 0xdd, 0x59,
+ 0x1f, 0x71, 0x39, 0x43, 0x6c, 0xf9, 0xbe, 0x4f,
+ 0xf4, 0x1e, 0x32, 0xcd, 0xe9, 0x7e, 0x7b, 0x66,
+ 0x5f, 0xef, 0xe7, 0x6f, 0x0a, 0x60, 0xd7, 0xb7,
+ 0x83, 0x92, 0xe2, 0xaf, 0x72, 0xf8, 0xb1, 0x50,
+ 0x10, 0xce, 0x18, 0x53, 0xa1, 0xcc, 0xad, 0x12,
+ 0x34, 0x0f, 0xf5, 0xaa, 0x16, 0xe6, 0xf2, 0xd8,
+ 0x85, 0x9f, 0xbc,
+ },
+};
+
+#ifdef CONFIG_X86
+/**
+ * PSHUFB tables for the Cauchy matrix.
+ *
+ * Indexes are [DISK][PARITY - 2][LH].
+ * Where DISK is from 0 to 250, PARITY from 2 to 5, LH from 0 to 1.
+ */
+const uint8_t __aligned(256) raid_gfcauchypshufb[251][4][2][16] =
+{
+ {
+ {
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ },
+ {
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ },
+ {
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ },
+ {
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ },
+ {
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ },
+ {
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ },
+ {
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ },
+ {
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ },
+ {
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ },
+ {
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ },
+ {
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ },
+ {
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ },
+ {
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ },
+ {
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ },
+ {
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ },
+ {
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ },
+ },
+ {
+ {
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ },
+ {
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ },
+ {
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ },
+ {
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ },
+ {
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ },
+ {
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ },
+ {
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ },
+ {
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ },
+ {
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ },
+ {
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ },
+ {
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ },
+ {
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ },
+ {
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ },
+ {
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ },
+ {
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ },
+ {
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ },
+ {
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ },
+ {
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ },
+ {
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ },
+ {
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ },
+ {
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ },
+ {
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ },
+ {
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ },
+ {
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ },
+ {
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ },
+ },
+ {
+ {
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ },
+ {
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ },
+ {
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ },
+ {
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ },
+ {
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ },
+ {
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ },
+ {
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ },
+ {
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ },
+ {
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ },
+ {
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ },
+ {
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ },
+ {
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ },
+ {
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ },
+ {
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ },
+ {
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ },
+ {
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ },
+ {
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ },
+ {
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ },
+ {
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ },
+ },
+ {
+ {
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ },
+ {
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ },
+ {
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ },
+ {
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ },
+ {
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ },
+ {
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ },
+ {
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ },
+ {
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ },
+ {
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ },
+ {
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ },
+ {
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ },
+ {
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ },
+ {
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ },
+ {
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ },
+ {
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ },
+ {
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ },
+ {
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ },
+ {
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ },
+ {
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ },
+ {
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ },
+ {
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ },
+ {
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ },
+ {
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ },
+ {
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ },
+ {
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ },
+ {
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ },
+ {
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ },
+ {
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ },
+ {
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ },
+ {
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ },
+ {
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ },
+ },
+ {
+ {
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ },
+ {
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ },
+ {
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ },
+ {
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ },
+ {
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ },
+ {
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ },
+ {
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ },
+ },
+ {
+ {
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ },
+ {
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ },
+ {
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ },
+ {
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ },
+ {
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ },
+ {
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ },
+ {
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ },
+ {
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ },
+ {
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ },
+ {
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ },
+ {
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ },
+ {
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ },
+ {
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ },
+ {
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ },
+ {
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ },
+ {
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ },
+ {
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ },
+ {
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ },
+ {
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ },
+ {
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ },
+ {
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ },
+ {
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ },
+ {
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ },
+ {
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ },
+ {
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ },
+ },
+ {
+ {
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ },
+ {
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ },
+ {
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ },
+ {
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ },
+ },
+ {
+ {
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ },
+ {
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ },
+ {
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ },
+ {
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ },
+ {
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ },
+ {
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ },
+ {
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ },
+ {
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ },
+ {
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ },
+ {
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ },
+ {
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ },
+ {
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ },
+ {
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ },
+ {
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ },
+ {
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ },
+ {
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ },
+ {
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ },
+ {
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ },
+ {
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ },
+ {
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ },
+ {
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ },
+ {
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ },
+ {
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ },
+ {
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ },
+ {
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ },
+ {
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ },
+ {
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ },
+ {
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ },
+ {
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ },
+ {
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ },
+ {
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ },
+ {
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ },
+ {
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ },
+ {
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ },
+ {
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ },
+ {
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ },
+ {
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ },
+ {
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ },
+ {
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ },
+ {
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ },
+ {
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ },
+ {
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ },
+ {
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ },
+ {
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ },
+ {
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ },
+ {
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ },
+ {
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ },
+ {
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ },
+ {
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ },
+ {
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ },
+ {
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ },
+ {
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ },
+ {
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ },
+ {
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ },
+ {
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ },
+ {
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ },
+ {
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ },
+ {
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ },
+ {
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ },
+ {
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ },
+ {
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ },
+ {
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ },
+ {
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ },
+ {
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ },
+ {
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ },
+ {
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ },
+ {
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ },
+ {
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ },
+ {
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ },
+ {
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ },
+ {
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ },
+ {
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ },
+ {
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ },
+ {
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ },
+ {
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ },
+ {
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ },
+ {
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ },
+ {
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ },
+ {
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ },
+ {
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ },
+ {
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ },
+ {
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ },
+ {
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ },
+ {
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ },
+ {
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ },
+ {
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ },
+ {
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ },
+ {
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ },
+ {
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ },
+ {
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ },
+ {
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ },
+ },
+ {
+ {
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ },
+ {
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ },
+ {
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ },
+ {
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ },
+ {
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ },
+ {
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ },
+ {
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ },
+ {
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ },
+ {
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ },
+ {
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ },
+ {
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ },
+ {
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ },
+ {
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ },
+ },
+ {
+ {
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ },
+ {
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ },
+ {
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ },
+ {
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ },
+ {
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ },
+ {
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ },
+ {
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ },
+ },
+ {
+ {
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ },
+ {
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ },
+ {
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ },
+ {
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ },
+ },
+ {
+ {
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ },
+ {
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ },
+ {
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ },
+ {
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ },
+ {
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ },
+ {
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ },
+ {
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ },
+ {
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ },
+ {
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ },
+ {
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ },
+ {
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ },
+ {
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ },
+ {
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ },
+ {
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ },
+ {
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ },
+ {
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ },
+ },
+ {
+ {
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ },
+ {
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ },
+ {
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ },
+ {
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ },
+ },
+ {
+ {
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ },
+ {
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ },
+ {
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ },
+ {
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ },
+ {
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ },
+ {
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ },
+ {
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ },
+ {
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ },
+ {
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ },
+ {
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ },
+ },
+ {
+ {
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ },
+ {
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ },
+ {
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ },
+ {
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ },
+ {
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ },
+ {
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ },
+ {
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ },
+ },
+ {
+ {
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ },
+ {
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ },
+ {
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ },
+ {
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ },
+ },
+ {
+ {
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ },
+ {
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ },
+ {
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ },
+ {
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ },
+ {
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ },
+ {
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ },
+ {
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ },
+ {
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ },
+ {
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ },
+ {
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ },
+ {
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ },
+ {
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ },
+ {
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ },
+ {
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ },
+ {
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ },
+ {
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ },
+ {
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ },
+ {
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ },
+ {
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ },
+ },
+ {
+ {
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ },
+ {
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ },
+ {
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ },
+ {
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ },
+ {
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ },
+ {
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ },
+ {
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ },
+ {
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ },
+ {
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ },
+ {
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ },
+ },
+ {
+ {
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ },
+ {
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ },
+ {
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ },
+ {
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ },
+ {
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ },
+ {
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ },
+ {
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ },
+ },
+ {
+ {
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ },
+ {
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ },
+ {
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ },
+ {
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ },
+ {
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ },
+ {
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ },
+ {
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ },
+ {
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ },
+ {
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ },
+ {
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ },
+ {
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ },
+ {
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ },
+ {
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ },
+ {
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ },
+ {
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ },
+ {
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ },
+ {
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ },
+ {
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ },
+ {
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ },
+ {
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ },
+ {
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ },
+ {
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ },
+ {
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ },
+ {
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ },
+ {
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ },
+ },
+ {
+ {
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ },
+ {
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ },
+ {
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ },
+ {
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ },
+ {
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ },
+ {
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ },
+ {
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ },
+ {
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ },
+ {
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ },
+ {
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ },
+ {
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ },
+ {
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ },
+ {
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ },
+ {
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ },
+ {
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ },
+ {
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ },
+ {
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ },
+ {
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ },
+ {
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ },
+ {
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ },
+ {
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ },
+ {
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ },
+ {
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ },
+ {
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ },
+ {
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ },
+ {
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ },
+ {
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ },
+ {
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ },
+ {
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ },
+ {
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ },
+ {
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ },
+ {
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ },
+ {
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ },
+ {
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ },
+ {
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ },
+ {
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ },
+ {
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ },
+ {
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ },
+ {
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ },
+ {
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ },
+ {
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ },
+ {
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ },
+ {
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ },
+ {
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ },
+ {
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ },
+ {
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ },
+ {
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ },
+ {
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ },
+ {
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ },
+ {
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ },
+ {
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ },
+ {
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ },
+ {
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ },
+ {
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ },
+ {
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ },
+ },
+ {
+ {
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ },
+ {
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ },
+ {
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ },
+ {
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ },
+ },
+ {
+ {
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ },
+ {
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ },
+ {
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ },
+ {
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ },
+ {
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ },
+ {
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ },
+ {
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ },
+ {
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ },
+ {
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ },
+ {
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ },
+ {
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ },
+ {
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ },
+ {
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ },
+ {
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ },
+ {
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ },
+ {
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ },
+ },
+ {
+ {
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ },
+ {
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ },
+ {
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ },
+ {
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ },
+ },
+ {
+ {
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ },
+ {
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ },
+ {
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ },
+ {
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ },
+ {
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ },
+ {
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ },
+ {
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ },
+ {
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ },
+ {
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ },
+ {
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ },
+ {
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ },
+ {
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ },
+ {
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ },
+ {
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ },
+ {
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ },
+ {
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ },
+ },
+ {
+ {
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ },
+ {
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ },
+ {
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ },
+ {
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ },
+ {
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ },
+ {
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ },
+ {
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ },
+ },
+ {
+ {
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ },
+ {
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ },
+ {
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ },
+ {
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ },
+ {
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ },
+ {
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ },
+ {
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ },
+ },
+ {
+ {
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ },
+ {
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ },
+ {
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ },
+ {
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ },
+ {
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ },
+ {
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ },
+ {
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ },
+ {
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ },
+ {
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ },
+ {
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ },
+ },
+ {
+ {
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ },
+ {
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ },
+ {
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ },
+ {
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ },
+ {
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ },
+ {
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ },
+ {
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ },
+ {
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ },
+ {
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ },
+ {
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ },
+ {
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ },
+ {
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ },
+ {
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ },
+ {
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ },
+ {
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ },
+ {
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ },
+ {
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ },
+ {
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ },
+ {
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ },
+ {
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ },
+ {
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ },
+ {
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ },
+ {
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ },
+ {
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ },
+ {
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ },
+ {
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ },
+ {
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ },
+ {
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ },
+ {
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ },
+ {
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ },
+ {
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ },
+ {
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ },
+ {
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ },
+ {
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ },
+ },
+ {
+ {
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ },
+ {
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ },
+ {
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ },
+ {
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ },
+ {
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ },
+ {
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ },
+ {
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ },
+ {
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ },
+ {
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ },
+ {
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ },
+ {
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ },
+ {
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ },
+ {
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ },
+ {
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ },
+ {
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ },
+ {
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ },
+ {
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ },
+ {
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ },
+ {
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ },
+ },
+ {
+ {
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ },
+ {
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ },
+ {
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ },
+ {
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ },
+ {
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ },
+ {
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ },
+ {
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ },
+ {
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ },
+ {
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ },
+ {
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ },
+ {
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ },
+ {
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ },
+ {
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ },
+ {
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ },
+ {
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ },
+ {
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ },
+ {
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ },
+ {
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ },
+ {
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ },
+ {
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ },
+ {
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ },
+ {
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ },
+ {
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ },
+ {
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ },
+ {
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ },
+ {
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ },
+ {
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ },
+ {
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ },
+ {
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ },
+ {
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ },
+ {
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ },
+ {
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ },
+ {
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ },
+ {
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ },
+ {
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ },
+ {
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ },
+ {
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ },
+ },
+ {
+ {
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ },
+ {
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ },
+ {
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ },
+ {
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ },
+ {
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ },
+ {
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ },
+ {
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ },
+ {
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ },
+ {
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ },
+ {
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ },
+ {
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ },
+ {
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ },
+ {
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ },
+ {
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ },
+ {
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ },
+ {
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ },
+ {
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ },
+ {
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ },
+ {
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ },
+ {
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ },
+ {
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ },
+ {
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ },
+ {
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ },
+ {
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ },
+ {
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ },
+ },
+ {
+ {
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ },
+ {
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ },
+ {
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ },
+ {
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ },
+ {
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ },
+ {
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ },
+ {
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ },
+ {
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ },
+ {
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ },
+ {
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ },
+ {
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ },
+ {
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ },
+ {
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ },
+ },
+ {
+ {
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ },
+ {
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ },
+ {
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ },
+ {
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ },
+ {
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ },
+ {
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ },
+ {
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ },
+ },
+ {
+ {
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ },
+ {
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ },
+ {
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ },
+ {
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ },
+ },
+ {
+ {
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ },
+ {
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ },
+ {
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ },
+ {
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ },
+ {
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ },
+ {
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ },
+ {
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ },
+ },
+ {
+ {
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ },
+ {
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ },
+ {
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ },
+ {
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ },
+ {
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ },
+ {
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ },
+ {
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ },
+ {
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ },
+ {
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ },
+ {
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ },
+ {
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ },
+ {
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ },
+ {
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ },
+ },
+ {
+ {
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ },
+ {
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ },
+ {
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ },
+ {
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ },
+ },
+ {
+ {
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ },
+ {
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ },
+ {
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ },
+ {
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ },
+ {
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ },
+ {
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ },
+ {
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ },
+ },
+ {
+ {
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ },
+ {
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ },
+ {
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ },
+ {
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ },
+ {
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ },
+ {
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ },
+ {
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ },
+ },
+ {
+ {
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ },
+ {
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ },
+ {
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ },
+ {
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ },
+ {
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ },
+ {
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ },
+ {
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ },
+ {
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ },
+ {
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ },
+ {
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ },
+ {
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ },
+ {
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ },
+ {
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ },
+ },
+ {
+ {
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ },
+ {
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ },
+ {
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ },
+ {
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ },
+ {
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ },
+ {
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ },
+ {
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ },
+ {
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ },
+ {
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ },
+ {
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ },
+ {
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ },
+ {
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ },
+ {
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ },
+ },
+ {
+ {
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ },
+ {
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ },
+ {
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ },
+ {
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ },
+ {
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ },
+ {
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ },
+ {
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ },
+ {
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ },
+ {
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ },
+ {
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ },
+ {
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ },
+ {
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ },
+ {
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ },
+ {
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ },
+ {
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ },
+ {
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ },
+ },
+ {
+ {
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ },
+ {
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ },
+ {
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ },
+ {
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ },
+ },
+ {
+ {
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ },
+ {
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ },
+ {
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ },
+ {
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ },
+ {
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ },
+ {
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ },
+ {
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ },
+ {
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ },
+ {
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ },
+ {
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ },
+ {
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ },
+ {
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ },
+ {
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ },
+ {
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ },
+ {
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ },
+ {
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ },
+ {
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ },
+ {
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ },
+ {
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ },
+ {
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ },
+ {
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ },
+ {
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ },
+ },
+ {
+ {
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ },
+ {
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ },
+ {
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ },
+ {
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ },
+ },
+ {
+ {
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ },
+ {
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ },
+ {
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ },
+ {
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ },
+ {
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ },
+ {
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ },
+ {
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ },
+ {
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ },
+ {
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ },
+ {
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ },
+ },
+ {
+ {
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ },
+ {
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ },
+ {
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ },
+ {
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ },
+ {
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ },
+ {
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ },
+ {
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ },
+ {
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ },
+ {
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ },
+ {
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ },
+ {
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ },
+ {
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ },
+ {
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ },
+ },
+ {
+ {
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ },
+ {
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ },
+ {
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ },
+ {
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ },
+ {
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ },
+ {
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ },
+ {
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ },
+ {
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ },
+ {
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ },
+ {
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ },
+ },
+ {
+ {
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ },
+ {
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ },
+ {
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ },
+ {
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ },
+ },
+ {
+ {
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ },
+ {
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ },
+ {
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ },
+ {
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ },
+ },
+ {
+ {
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ },
+ {
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ },
+ {
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ },
+ {
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ },
+ {
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ },
+ {
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ },
+ {
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ },
+ {
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ },
+ {
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ },
+ {
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ },
+ },
+ {
+ {
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ },
+ {
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ },
+ {
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ },
+ {
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ },
+ {
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ },
+ {
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ },
+ {
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ },
+ },
+ {
+ {
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ },
+ {
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ },
+ {
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ },
+ {
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ },
+ {
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ },
+ {
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ },
+ {
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ },
+ {
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ },
+ {
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ },
+ {
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ },
+ {
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ },
+ {
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ },
+ {
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ },
+ {
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ },
+ {
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ },
+ {
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ },
+ {
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ },
+ {
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ },
+ {
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ },
+ {
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ },
+ {
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ },
+ {
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ },
+ },
+ {
+ {
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ },
+ {
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ },
+ {
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ },
+ {
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ },
+ },
+ {
+ {
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ },
+ {
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ },
+ {
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ },
+ {
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ },
+ },
+ {
+ {
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ },
+ {
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ },
+ {
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ },
+ {
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ },
+ {
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ },
+ {
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ },
+ {
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ },
+ },
+ {
+ {
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ },
+ {
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ },
+ {
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ },
+ {
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ },
+ },
+};
+#endif
+
+#ifdef CONFIG_X86
+/**
+ * PSHUFB tables for generic multiplication.
+ *
+ * Indexes are [MULTIPLER][LH].
+ * Where MULTIPLER is from 0 to 255, LH from 0 to 1.
+ */
+const uint8_t __aligned(256) raid_gfmulpshufb[256][2][16] =
+{
+ {
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ },
+ {
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ },
+ {
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ },
+ {
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ },
+ {
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ },
+ {
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ },
+ {
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ },
+ {
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ },
+ {
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ },
+ {
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ },
+ {
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ },
+ {
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ },
+ {
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ },
+ {
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ },
+ {
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ },
+ {
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ },
+ {
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ },
+ {
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ },
+ {
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ },
+ {
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ },
+ {
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ },
+ {
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ },
+ {
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ },
+ {
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ },
+ {
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ },
+ {
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ },
+ {
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ },
+ {
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ },
+ {
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ },
+ {
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ },
+ {
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ },
+ {
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ },
+ {
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ },
+ {
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ },
+ {
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ },
+ {
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ },
+ {
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ },
+ {
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ },
+ {
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ },
+ {
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ },
+ {
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ },
+ {
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ },
+ {
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b },
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ },
+ {
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ },
+ {
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ },
+ {
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6 },
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ },
+ {
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ },
+ {
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ },
+ {
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0x0d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ },
+ {
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ },
+ {
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ },
+ {
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ },
+ {
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ },
+ {
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ },
+ {
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ },
+ {
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ },
+ {
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ },
+ {
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ },
+ {
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ },
+ {
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ },
+ {
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ },
+ {
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, 0x7b, 0x46 },
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ },
+ {
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ },
+ {
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ },
+ {
+ { 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ },
+ {
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ },
+ {
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ },
+ {
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ },
+ {
+ { 0x00, 0x44, 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31 },
+ },
+ {
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1 },
+ },
+ {
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ },
+ {
+ { 0x00, 0x47, 0x8e, 0xc9, 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, 0x8d, 0xca },
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ },
+ {
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ },
+ {
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ },
+ {
+ { 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81 },
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ },
+ {
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ },
+ {
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ },
+ {
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac },
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ },
+ {
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ },
+ {
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ },
+ {
+ { 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ },
+ {
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ },
+ {
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x02, 0xd9, 0x90 },
+ },
+ {
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ },
+ {
+ { 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a },
+ },
+ {
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a },
+ },
+ {
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ },
+ {
+ { 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ },
+ {
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ },
+ {
+ { 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60 },
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ },
+ {
+ { 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71 },
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ },
+ {
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ },
+ {
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ },
+ {
+ { 0x00, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x01, 0x5c },
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ },
+ {
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ },
+ {
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ },
+ {
+ { 0x00, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ },
+ {
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d },
+ },
+ {
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ },
+ {
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ },
+ {
+ { 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26 },
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ },
+ {
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ },
+ {
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7 },
+ },
+ {
+ { 0x00, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37 },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57 },
+ },
+ {
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ },
+ {
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ },
+ {
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c },
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ },
+ {
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ },
+ {
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ },
+ {
+ { 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51 },
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ },
+ {
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40 },
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ },
+ {
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ },
+ {
+ { 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ },
+ {
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6 },
+ },
+ {
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ },
+ {
+ { 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b },
+ },
+ {
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ },
+ {
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ },
+ {
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c },
+ },
+ {
+ { 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7 },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec },
+ },
+ {
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ },
+ {
+ { 0x00, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d },
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ },
+ {
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c },
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ },
+ {
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ },
+ {
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ },
+ {
+ { 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1 },
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ },
+ {
+ { 0x00, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0 },
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ },
+ {
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ },
+ {
+ { 0x00, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ },
+ {
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ },
+ {
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ },
+ {
+ { 0x00, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2 },
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ },
+ {
+ { 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef },
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ },
+ {
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ },
+ {
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ },
+ {
+ { 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe },
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ },
+ {
+ { 0x00, 0x88, 0x0d, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62 },
+ },
+ {
+ { 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4 },
+ { 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92 },
+ },
+ {
+ { 0x00, 0x8a, 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f },
+ },
+ {
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f },
+ },
+ {
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85 },
+ },
+ {
+ { 0x00, 0x8d, 0x07, 0x8a, 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98 },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, 0x4d, 0x75 },
+ },
+ {
+ { 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89 },
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ },
+ {
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ },
+ {
+ { 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ },
+ {
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ },
+ {
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ },
+ {
+ { 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32 },
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ },
+ {
+ { 0x00, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f },
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ },
+ {
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ },
+ {
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ { 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10 },
+ },
+ {
+ { 0x00, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0x0e },
+ { 0x00, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0 },
+ },
+ {
+ { 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b },
+ { 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9 },
+ },
+ {
+ { 0x00, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, 0xe2, 0x7b, 0xcd, 0x54 },
+ { 0x00, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29 },
+ },
+ {
+ { 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24 },
+ },
+ {
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4 },
+ },
+ {
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0x0b, 0x3e },
+ },
+ {
+ { 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68 },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce },
+ },
+ {
+ { 0x00, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ },
+ {
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ },
+ {
+ { 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ },
+ {
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ },
+ {
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ },
+ {
+ { 0x00, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f },
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ },
+ {
+ { 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12 },
+ { 0x00, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, 0xaf, 0x3d },
+ },
+ {
+ { 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d },
+ { 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd },
+ },
+ {
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ },
+ {
+ { 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03 },
+ { 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30 },
+ },
+ {
+ { 0x00, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09 },
+ },
+ {
+ { 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9 },
+ },
+ {
+ { 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48 },
+ { 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4 },
+ },
+ {
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x04 },
+ },
+ {
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ },
+ {
+ { 0x00, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ },
+ {
+ { 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74 },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13 },
+ },
+ {
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3 },
+ },
+ {
+ { 0x00, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ },
+ {
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ },
+ {
+ { 0x00, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0x0b, 0xb9, 0x72, 0xc0 },
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ },
+ {
+ { 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf },
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ },
+ {
+ { 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2 },
+ { 0x00, 0x8f, 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, 0x0a, 0x85, 0x09, 0x86 },
+ },
+ {
+ { 0x00, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, 0x58, 0xed },
+ { 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76 },
+ },
+ {
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ { 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b },
+ },
+ {
+ { 0x00, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3 },
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ },
+ {
+ { 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2 },
+ },
+ {
+ { 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42 },
+ },
+ {
+ { 0x00, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, 0x02, 0xb8 },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f },
+ },
+ {
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf },
+ },
+ {
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ },
+ {
+ { 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ },
+ {
+ { 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84 },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8 },
+ },
+ {
+ { 0x00, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58 },
+ },
+ {
+ { 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ },
+ {
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ { 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97 },
+ },
+ {
+ { 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a },
+ { 0x00, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a },
+ },
+ {
+ { 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25 },
+ { 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a },
+ },
+ {
+ { 0x00, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x08 },
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ },
+ {
+ { 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07 },
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ },
+ {
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ },
+ {
+ { 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19 },
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ },
+ {
+ { 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c },
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ },
+ {
+ { 0x00, 0xc9, 0x8f, 0x46, 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, 0x8a, 0x43 },
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ },
+ {
+ { 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52 },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49 },
+ },
+ {
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9 },
+ },
+ {
+ { 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53 },
+ },
+ {
+ { 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3 },
+ },
+ {
+ { 0x00, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae },
+ },
+ {
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e },
+ },
+ {
+ { 0x00, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ },
+ {
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ { 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c },
+ },
+ {
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21 },
+ },
+ {
+ { 0x00, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, 0x06, 0xd5 },
+ { 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1 },
+ },
+ {
+ { 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8 },
+ { 0x00, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b },
+ },
+ {
+ { 0x00, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7 },
+ { 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb },
+ },
+ {
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ },
+ {
+ { 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9 },
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ },
+ {
+ { 0x00, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ },
+ {
+ { 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ },
+ {
+ { 0x00, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0x0b, 0x78, 0xa2 },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2 },
+ },
+ {
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02 },
+ },
+ {
+ { 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8 },
+ },
+ {
+ { 0x00, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, 0x52, 0x8f },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18 },
+ },
+ {
+ { 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15 },
+ },
+ {
+ { 0x00, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91 },
+ { 0x00, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x07, 0x94, 0xe5 },
+ },
+ {
+ { 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ },
+ {
+ { 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6 },
+ { 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc },
+ },
+ {
+ { 0x00, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, 0x35, 0xd7 },
+ { 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1 },
+ },
+ {
+ { 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8 },
+ { 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01 },
+ },
+ {
+ { 0x00, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5 },
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ },
+ {
+ { 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa },
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ },
+ {
+ { 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb },
+ { 0x00, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16 },
+ },
+ {
+ { 0x00, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x03, 0xe4 },
+ { 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6 },
+ },
+ {
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf },
+ },
+ {
+ { 0x00, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f },
+ },
+ {
+ { 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ },
+ {
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ },
+ {
+ { 0x00, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38 },
+ },
+ {
+ { 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82 },
+ { 0x00, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, 0x52, 0x24, 0xbe, 0xc8 },
+ },
+ {
+ { 0x00, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, 0xbc, 0x52, 0x7d, 0x93 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5 },
+ },
+ {
+ { 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c },
+ { 0x00, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35 },
+ },
+ {
+ { 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ },
+ {
+ { 0x00, 0xf1, 0xff, 0x0e, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36 },
+ { 0x00, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, 0xec, 0x47 },
+ },
+ {
+ { 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27 },
+ { 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a },
+ },
+ {
+ { 0x00, 0xf3, 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28 },
+ { 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba },
+ },
+ {
+ { 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05 },
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ },
+ {
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0 },
+ },
+ {
+ { 0x00, 0xf6, 0xf1, 0x07, 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b },
+ { 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad },
+ },
+ {
+ { 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14 },
+ { 0x00, 0xcb, 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d },
+ },
+ {
+ { 0x00, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41 },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64 },
+ },
+ {
+ { 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94 },
+ },
+ {
+ { 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ },
+ {
+ { 0x00, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ },
+ {
+ { 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, 0x0e, 0x75, 0xf8, 0x83 },
+ },
+ {
+ { 0x00, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72 },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73 },
+ },
+ {
+ { 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, 0x25, 0x7e },
+ },
+ {
+ { 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e },
+ },
+};
+#endif
+
diff --git a/c_src/raid/tag.c b/c_src/raid/tag.c
new file mode 100644
index 00000000..bfeefaad
--- /dev/null
+++ b/c_src/raid/tag.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+static struct raid_func {
+ const char *name;
+ void (*p)();
+} RAID_FUNC[] = {
+ { "int8", raid_gen3_int8 },
+ { "int8", raid_gen4_int8 },
+ { "int8", raid_gen5_int8 },
+ { "int8", raid_gen6_int8 },
+ { "int32", raid_gen1_int32 },
+ { "int64", raid_gen1_int64 },
+ { "int32", raid_gen2_int32 },
+ { "int64", raid_gen2_int64 },
+ { "int32", raid_genz_int32 },
+ { "int64", raid_genz_int64 },
+ { "int8", raid_rec1_int8 },
+ { "int8", raid_rec2_int8 },
+ { "int8", raid_recX_int8 },
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+ { "sse2", raid_gen1_sse2 },
+ { "sse2", raid_gen2_sse2 },
+ { "sse2", raid_genz_sse2 },
+#endif
+#ifdef CONFIG_SSSE3
+ { "ssse3", raid_gen3_ssse3 },
+ { "ssse3", raid_gen4_ssse3 },
+ { "ssse3", raid_gen5_ssse3 },
+ { "ssse3", raid_gen6_ssse3 },
+ { "ssse3", raid_rec1_ssse3 },
+ { "ssse3", raid_rec2_ssse3 },
+ { "ssse3", raid_recX_ssse3 },
+#endif
+#ifdef CONFIG_AVX2
+ { "avx2", raid_gen1_avx2 },
+ { "avx2", raid_gen2_avx2 },
+ { "avx2", raid_rec1_avx2 },
+ { "avx2", raid_rec2_avx2 },
+ { "avx2", raid_recX_avx2 },
+#endif
+#endif
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_SSE2
+ { "sse2e", raid_gen2_sse2ext },
+ { "sse2e", raid_genz_sse2ext },
+#endif
+#ifdef CONFIG_SSSE3
+ { "ssse3e", raid_gen3_ssse3ext },
+ { "ssse3e", raid_gen4_ssse3ext },
+ { "ssse3e", raid_gen5_ssse3ext },
+ { "ssse3e", raid_gen6_ssse3ext },
+#endif
+#ifdef CONFIG_AVX2
+ { "avx2e", raid_gen3_avx2ext },
+ { "avx2e", raid_genz_avx2ext },
+ { "avx2e", raid_gen4_avx2ext },
+ { "avx2e", raid_gen5_avx2ext },
+ { "avx2e", raid_gen6_avx2ext },
+#endif
+#endif
+ { 0, 0 }
+};
+
+static const char *raid_tag(void (*func)())
+{
+ struct raid_func *i = RAID_FUNC;
+
+ while (i->name != 0) {
+ if (i->p == func)
+ return i->name;
+ ++i;
+ }
+
+ /* LCOV_EXCL_START */
+ return "unknown";
+ /* LCOV_EXCL_STOP */
+}
+
+const char *raid_gen1_tag(void)
+{
+ return raid_tag(raid_gen_ptr[0]);
+}
+
+const char *raid_gen2_tag(void)
+{
+ return raid_tag(raid_gen_ptr[1]);
+}
+
+const char *raid_genz_tag(void)
+{
+ return raid_tag(raid_genz_ptr);
+}
+
+const char *raid_gen3_tag(void)
+{
+ return raid_tag(raid_gen_ptr[2]);
+}
+
+const char *raid_gen4_tag(void)
+{
+ return raid_tag(raid_gen_ptr[3]);
+}
+
+const char *raid_gen5_tag(void)
+{
+ return raid_tag(raid_gen_ptr[4]);
+}
+
+const char *raid_gen6_tag(void)
+{
+ return raid_tag(raid_gen_ptr[5]);
+}
+
+const char *raid_rec1_tag(void)
+{
+ return raid_tag(raid_rec_ptr[0]);
+}
+
+const char *raid_rec2_tag(void)
+{
+ return raid_tag(raid_rec_ptr[1]);
+}
+
+const char *raid_recX_tag(void)
+{
+ return raid_tag(raid_rec_ptr[2]);
+}
+
diff --git a/c_src/raid/test.c b/c_src/raid/test.c
new file mode 100644
index 00000000..feb8a415
--- /dev/null
+++ b/c_src/raid/test.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "cpu.h"
+#include "combo.h"
+#include "memory.h"
+
+/**
+ * Binomial coefficient of n over r.
+ */
+static int ibc(int n, int r)
+{
+ if (r == 0 || n == r)
+ return 1;
+ else
+ return ibc(n - 1, r - 1) + ibc(n - 1, r);
+}
+
+/**
+ * Power n ^ r;
+ */
+static int ipow(int n, int r)
+{
+ int v = 1;
+
+ while (r) {
+ v *= n;
+ --r;
+ }
+ return v;
+}
+
+int raid_test_combo(void)
+{
+ int r;
+ int count;
+ int p[RAID_PARITY_MAX];
+
+ for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+ /* count combination (r of RAID_PARITY_MAX) elements */
+ count = 0;
+ combination_first(r, RAID_PARITY_MAX, p);
+
+ do {
+ ++count;
+ } while (combination_next(r, RAID_PARITY_MAX, p));
+
+ if (count != ibc(RAID_PARITY_MAX, r)) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+ /* count permutation (r of RAID_PARITY_MAX) elements */
+ count = 0;
+ permutation_first(r, RAID_PARITY_MAX, p);
+
+ do {
+ ++count;
+ } while (permutation_next(r, RAID_PARITY_MAX, p));
+
+ if (count != ipow(RAID_PARITY_MAX, r)) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ return 0;
+}
+
+int raid_test_insert(void)
+{
+ int p[RAID_PARITY_MAX];
+ int r;
+
+ for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+ permutation_first(r, RAID_PARITY_MAX, p);
+ do {
+ int i[RAID_PARITY_MAX];
+ int j;
+
+ /* insert in order */
+ for (j = 0; j < r; ++j)
+ raid_insert(j, i, p[j]);
+
+ /* check order */
+ for (j = 1; j < r; ++j) {
+ if (i[j - 1] > i[j]) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+ } while (permutation_next(r, RAID_PARITY_MAX, p));
+ }
+
+ return 0;
+}
+
+int raid_test_sort(void)
+{
+ int p[RAID_PARITY_MAX];
+ int r;
+
+ for (r = 1; r <= RAID_PARITY_MAX; ++r) {
+ permutation_first(r, RAID_PARITY_MAX, p);
+ do {
+ int i[RAID_PARITY_MAX];
+ int j;
+
+ /* make a copy */
+ for (j = 0; j < r; ++j)
+ i[j] = p[j];
+
+ raid_sort(r, i);
+
+ /* check order */
+ for (j = 1; j < r; ++j) {
+ if (i[j - 1] > i[j]) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+ } while (permutation_next(r, RAID_PARITY_MAX, p));
+ }
+
+ return 0;
+}
+
+int raid_test_rec(int mode, int nd, size_t size)
+{
+ void (*f[RAID_PARITY_MAX][4])(
+ int nr, int *id, int *ip, int nd, size_t size, void **vbuf);
+ void *v_alloc;
+ void **v;
+ void **data;
+ void **parity;
+ void **test;
+ void *data_save[RAID_PARITY_MAX];
+ void *parity_save[RAID_PARITY_MAX];
+ void *waste;
+ int nv;
+ int id[RAID_PARITY_MAX];
+ int ip[RAID_PARITY_MAX];
+ int i;
+ int j;
+ int nr;
+ int nf[RAID_PARITY_MAX];
+ int np;
+
+ raid_mode(mode);
+ if (mode == RAID_MODE_CAUCHY)
+ np = RAID_PARITY_MAX;
+ else
+ np = 3;
+
+ nv = nd + np * 2 + 2;
+
+ v = raid_malloc_vector(nd, nv, size, &v_alloc);
+ if (!v) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+
+ data = v;
+ parity = v + nd;
+ test = v + nd + np;
+
+ for (i = 0; i < np; ++i)
+ parity_save[i] = parity[i];
+
+ memset(v[nv - 2], 0, size);
+ raid_zero(v[nv - 2]);
+
+ waste = v[nv - 1];
+
+ /* fill with pseudo-random data with the arbitrary seed "1" */
+ raid_mrand_vector(1, nd, size, v);
+
+ /* setup recov functions */
+ for (i = 0; i < np; ++i) {
+ nf[i] = 0;
+ if (i == 0) {
+ f[i][nf[i]++] = raid_rec1_int8;
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+ if (raid_cpu_has_ssse3())
+ f[i][nf[i]++] = raid_rec1_ssse3;
+#endif
+#ifdef CONFIG_AVX2
+ if (raid_cpu_has_avx2())
+ f[i][nf[i]++] = raid_rec1_avx2;
+#endif
+#endif
+ } else if (i == 1) {
+ f[i][nf[i]++] = raid_rec2_int8;
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+ if (raid_cpu_has_ssse3())
+ f[i][nf[i]++] = raid_rec2_ssse3;
+#endif
+#ifdef CONFIG_AVX2
+ if (raid_cpu_has_avx2())
+ f[i][nf[i]++] = raid_rec2_avx2;
+#endif
+#endif
+ } else {
+ f[i][nf[i]++] = raid_recX_int8;
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+ if (raid_cpu_has_ssse3())
+ f[i][nf[i]++] = raid_recX_ssse3;
+#endif
+#ifdef CONFIG_AVX2
+ if (raid_cpu_has_avx2())
+ f[i][nf[i]++] = raid_recX_avx2;
+#endif
+#endif
+ }
+ }
+
+ /* compute the parity */
+ raid_gen_ref(nd, np, size, v);
+
+ /* set all the parity to the waste v */
+ for (i = 0; i < np; ++i)
+ parity[i] = waste;
+
+ /* all parity levels */
+ for (nr = 1; nr <= np; ++nr) {
+ /* all combinations (nr of nd) disks */
+ combination_first(nr, nd, id);
+ do {
+ /* all combinations (nr of np) parities */
+ combination_first(nr, np, ip);
+ do {
+ /* for each recover function */
+ for (j = 0; j < nf[nr - 1]; ++j) {
+ /* set */
+ for (i = 0; i < nr; ++i) {
+ /* remove the missing data */
+ data_save[i] = data[id[i]];
+ data[id[i]] = test[i];
+ /* set the parity to use */
+ parity[ip[i]] = parity_save[ip[i]];
+ }
+
+ /* recover */
+ f[nr - 1][j](nr, id, ip, nd, size, v);
+
+ /* check */
+ for (i = 0; i < nr; ++i) {
+ if (memcmp(test[i], data_save[i], size) != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+
+ /* restore */
+ for (i = 0; i < nr; ++i) {
+ /* restore the data */
+ data[id[i]] = data_save[i];
+ /* restore the parity */
+ parity[ip[i]] = waste;
+ }
+ }
+ } while (combination_next(nr, np, ip));
+ } while (combination_next(nr, nd, id));
+ }
+
+ free(v_alloc);
+ free(v);
+ return 0;
+
+bail:
+ /* LCOV_EXCL_START */
+ free(v_alloc);
+ free(v);
+ return -1;
+ /* LCOV_EXCL_STOP */
+}
+
+int raid_test_par(int mode, int nd, size_t size)
+{
+ void (*f[64])(int nd, size_t size, void **vbuf);
+ void *v_alloc;
+ void **v;
+ int nv;
+ int i, j;
+ int nf;
+ int np;
+
+ raid_mode(mode);
+ if (mode == RAID_MODE_CAUCHY)
+ np = RAID_PARITY_MAX;
+ else
+ np = 3;
+
+ nv = nd + np * 2;
+
+ v = raid_malloc_vector(nd, nv, size, &v_alloc);
+ if (!v) {
+ /* LCOV_EXCL_START */
+ return -1;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* check memory */
+ if (raid_mtest_vector(nv, size, v) != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+
+ /* fill with pseudo-random data with the arbitrary seed "2" */
+ raid_mrand_vector(2, nv, size, v);
+
+ /* compute the parity */
+ raid_gen_ref(nd, np, size, v);
+
+ /* copy in back buffers */
+ for (i = 0; i < np; ++i)
+ memcpy(v[nd + np + i], v[nd + i], size);
+
+ /* load all the available functions */
+ nf = 0;
+
+ f[nf++] = raid_gen1_int32;
+ f[nf++] = raid_gen1_int64;
+ f[nf++] = raid_gen2_int32;
+ f[nf++] = raid_gen2_int64;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+ if (raid_cpu_has_sse2()) {
+ f[nf++] = raid_gen1_sse2;
+ f[nf++] = raid_gen2_sse2;
+#ifdef CONFIG_X86_64
+ f[nf++] = raid_gen2_sse2ext;
+#endif
+ }
+#endif
+
+#ifdef CONFIG_AVX2
+ if (raid_cpu_has_avx2()) {
+ f[nf++] = raid_gen1_avx2;
+ f[nf++] = raid_gen2_avx2;
+ }
+#endif
+#endif /* CONFIG_X86 */
+
+ if (mode == RAID_MODE_CAUCHY) {
+ f[nf++] = raid_gen3_int8;
+ f[nf++] = raid_gen4_int8;
+ f[nf++] = raid_gen5_int8;
+ f[nf++] = raid_gen6_int8;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSSE3
+ if (raid_cpu_has_ssse3()) {
+ f[nf++] = raid_gen3_ssse3;
+ f[nf++] = raid_gen4_ssse3;
+ f[nf++] = raid_gen5_ssse3;
+ f[nf++] = raid_gen6_ssse3;
+#ifdef CONFIG_X86_64
+ f[nf++] = raid_gen3_ssse3ext;
+ f[nf++] = raid_gen4_ssse3ext;
+ f[nf++] = raid_gen5_ssse3ext;
+ f[nf++] = raid_gen6_ssse3ext;
+#endif
+ }
+#endif
+
+#ifdef CONFIG_AVX2
+#ifdef CONFIG_X86_64
+ if (raid_cpu_has_avx2()) {
+ f[nf++] = raid_gen3_avx2ext;
+ f[nf++] = raid_gen4_avx2ext;
+ f[nf++] = raid_gen5_avx2ext;
+ f[nf++] = raid_gen6_avx2ext;
+ }
+#endif
+#endif
+#endif /* CONFIG_X86 */
+ } else {
+ f[nf++] = raid_genz_int32;
+ f[nf++] = raid_genz_int64;
+
+#ifdef CONFIG_X86
+#ifdef CONFIG_SSE2
+ if (raid_cpu_has_sse2()) {
+ f[nf++] = raid_genz_sse2;
+#ifdef CONFIG_X86_64
+ f[nf++] = raid_genz_sse2ext;
+#endif
+ }
+#endif
+
+#ifdef CONFIG_AVX2
+#ifdef CONFIG_X86_64
+ if (raid_cpu_has_avx2())
+ f[nf++] = raid_genz_avx2ext;
+#endif
+#endif
+#endif /* CONFIG_X86 */
+ }
+
+ /* check all the functions */
+ for (j = 0; j < nf; ++j) {
+ /* compute parity */
+ f[j](nd, size, v);
+
+ /* check it */
+ for (i = 0; i < np; ++i) {
+ if (memcmp(v[nd + np + i], v[nd + i], size) != 0) {
+ /* LCOV_EXCL_START */
+ goto bail;
+ /* LCOV_EXCL_STOP */
+ }
+ }
+ }
+
+ free(v_alloc);
+ free(v);
+ return 0;
+
+bail:
+ /* LCOV_EXCL_START */
+ free(v_alloc);
+ free(v);
+ return -1;
+ /* LCOV_EXCL_STOP */
+}
+
diff --git a/c_src/raid/test.h b/c_src/raid/test.h
new file mode 100644
index 00000000..6d902c7f
--- /dev/null
+++ b/c_src/raid/test.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __RAID_TEST_H
+#define __RAID_TEST_H
+
+/**
+ * Tests insertion function.
+ *
+ * Test raid_insert() with all the possible combinations of elements to insert.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_insert(void);
+
+/**
+ * Tests sorting function.
+ *
+ * Test raid_sort() with all the possible combinations of elements to sort.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_sort(void);
+
+/**
+ * Tests combination functions.
+ *
+ * Tests combination_first() and combination_next() for all the parity levels.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_combo(void);
+
+/**
+ * Tests recovering functions.
+ *
+ * All the recovering functions are tested with all the combinations
+ * of failing disks and recovering parities.
+ *
+ * Take care that the test time grows exponentially with the number of disks.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_rec(unsigned mode, int nd, size_t size);
+
+/**
+ * Tests parity generation functions.
+ *
+ * All the parity generation functions are tested with the specified
+ * number of disks.
+ *
+ * Returns 0 on success.
+ */
+int raid_test_par(unsigned mode, int nd, size_t size);
+
+#endif
+
diff --git a/c_src/raid/x86.c b/c_src/raid/x86.c
new file mode 100644
index 00000000..84b12c1c
--- /dev/null
+++ b/c_src/raid/x86.c
@@ -0,0 +1,2452 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+#include "gf.h"
+
+/*
+ * For x86 optimizations you can see:
+ *
+ * Software optimization resources
+ * http://www.agner.org/optimize/
+ *
+ * x86, x64 Instruction Latency, Memory Latency and CPUID dumps
+ * http://users.atw.hu/instlatx64/
+ */
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GEN1 (RAID5 with xor) SSE2 implementation
+ *
+ * Intentionally don't process more than 64 bytes because 64 is the typical
+ * cache block, and processing 128 bytes doesn't increase performance, and in
+ * some cases it even decreases it.
+ */
+void raid_gen1_sse2(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+
+ raid_sse_begin();
+
+ for (i = 0; i < size; i += 64) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (v[l][i + 16]));
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (v[l][i + 32]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (v[l][i + 48]));
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("pxor %0,%%xmm0" : : "m" (v[d][i]));
+ asm volatile ("pxor %0,%%xmm1" : : "m" (v[d][i + 16]));
+ asm volatile ("pxor %0,%%xmm2" : : "m" (v[d][i + 32]));
+ asm volatile ("pxor %0,%%xmm3" : : "m" (v[d][i + 48]));
+ }
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * GEN1 (RAID5 with xor) AVX2 implementation
+ *
+ * Intentionally don't process more than 64 bytes because 64 is the typical
+ * cache block, and processing 128 bytes doesn't increase performance, and in
+ * some cases it even decreases it.
+ */
+void raid_gen1_avx2(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+
+ raid_avx_begin();
+
+ for (i = 0; i < size; i += 64) {
+ asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+ asm volatile ("vmovdqa %0,%%ymm1" : : "m" (v[l][i + 32]));
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("vpxor %0,%%ymm0,%%ymm0" : : "m" (v[d][i]));
+ asm volatile ("vpxor %0,%%ymm1,%%ymm1" : : "m" (v[d][i + 32]));
+ }
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (p[i + 32]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+static const struct gfconst16 {
+ uint8_t poly[16];
+ uint8_t low4[16];
+} gfconst16 __aligned(32) = {
+ {
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
+ },
+ {
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
+ },
+};
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GEN2 (RAID6 with powers of 2) SSE2 implementation
+ */
+void raid_gen2_sse2(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+ for (i = 0; i < size; i += 32) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (v[l][i + 16]));
+ asm volatile ("movdqa %xmm0,%xmm2");
+ asm volatile ("movdqa %xmm1,%xmm3");
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("pxor %xmm4,%xmm4");
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm2,%xmm4");
+ asm volatile ("pcmpgtb %xmm3,%xmm5");
+ asm volatile ("paddb %xmm2,%xmm2");
+ asm volatile ("paddb %xmm3,%xmm3");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm5,%xmm3");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+ asm volatile ("movdqa %0,%%xmm5" : : "m" (v[d][i + 16]));
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm5,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm5,%xmm3");
+ }
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (q[i + 16]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * GEN2 (RAID6 with powers of 2) AVX2 implementation
+ */
+void raid_gen2_avx2(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+
+ raid_avx_begin();
+
+ asm volatile ("vbroadcasti128 %0, %%ymm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("vpxor %ymm6,%ymm6,%ymm6");
+
+ for (i = 0; i < size; i += 64) {
+ asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+ asm volatile ("vmovdqa %0,%%ymm1" : : "m" (v[l][i + 32]));
+ asm volatile ("vmovdqa %ymm0,%ymm2");
+ asm volatile ("vmovdqa %ymm1,%ymm3");
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("vpcmpgtb %ymm2,%ymm6,%ymm4");
+ asm volatile ("vpcmpgtb %ymm3,%ymm6,%ymm5");
+ asm volatile ("vpaddb %ymm2,%ymm2,%ymm2");
+ asm volatile ("vpaddb %ymm3,%ymm3,%ymm3");
+ asm volatile ("vpand %ymm7,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm5,%ymm3,%ymm3");
+
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+ asm volatile ("vmovdqa %0,%%ymm5" : : "m" (v[d][i + 32]));
+ asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm5,%ymm3,%ymm3");
+ }
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (p[i + 32]));
+ asm volatile ("vmovntdq %%ymm2,%0" : "=m" (q[i]));
+ asm volatile ("vmovntdq %%ymm3,%0" : "=m" (q[i + 32]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSE2)
+/*
+ * GEN2 (RAID6 with powers of 2) SSE2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen2_sse2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.poly[0]));
+
+ for (i = 0; i < size; i += 64) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (v[l][i + 16]));
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (v[l][i + 32]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (v[l][i + 48]));
+ asm volatile ("movdqa %xmm0,%xmm4");
+ asm volatile ("movdqa %xmm1,%xmm5");
+ asm volatile ("movdqa %xmm2,%xmm6");
+ asm volatile ("movdqa %xmm3,%xmm7");
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("pxor %xmm8,%xmm8");
+ asm volatile ("pxor %xmm9,%xmm9");
+ asm volatile ("pxor %xmm10,%xmm10");
+ asm volatile ("pxor %xmm11,%xmm11");
+ asm volatile ("pcmpgtb %xmm4,%xmm8");
+ asm volatile ("pcmpgtb %xmm5,%xmm9");
+ asm volatile ("pcmpgtb %xmm6,%xmm10");
+ asm volatile ("pcmpgtb %xmm7,%xmm11");
+ asm volatile ("paddb %xmm4,%xmm4");
+ asm volatile ("paddb %xmm5,%xmm5");
+ asm volatile ("paddb %xmm6,%xmm6");
+ asm volatile ("paddb %xmm7,%xmm7");
+ asm volatile ("pand %xmm15,%xmm8");
+ asm volatile ("pand %xmm15,%xmm9");
+ asm volatile ("pand %xmm15,%xmm10");
+ asm volatile ("pand %xmm15,%xmm11");
+ asm volatile ("pxor %xmm8,%xmm4");
+ asm volatile ("pxor %xmm9,%xmm5");
+ asm volatile ("pxor %xmm10,%xmm6");
+ asm volatile ("pxor %xmm11,%xmm7");
+
+ asm volatile ("movdqa %0,%%xmm8" : : "m" (v[d][i]));
+ asm volatile ("movdqa %0,%%xmm9" : : "m" (v[d][i + 16]));
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[d][i + 32]));
+ asm volatile ("movdqa %0,%%xmm11" : : "m" (v[d][i + 48]));
+ asm volatile ("pxor %xmm8,%xmm0");
+ asm volatile ("pxor %xmm9,%xmm1");
+ asm volatile ("pxor %xmm10,%xmm2");
+ asm volatile ("pxor %xmm11,%xmm3");
+ asm volatile ("pxor %xmm8,%xmm4");
+ asm volatile ("pxor %xmm9,%xmm5");
+ asm volatile ("pxor %xmm10,%xmm6");
+ asm volatile ("pxor %xmm11,%xmm7");
+ }
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (p[i + 16]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (p[i + 32]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (p[i + 48]));
+ asm volatile ("movntdq %%xmm4,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm5,%0" : "=m" (q[i + 16]));
+ asm volatile ("movntdq %%xmm6,%0" : "=m" (q[i + 32]));
+ asm volatile ("movntdq %%xmm7,%0" : "=m" (q[i + 48]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN3 (triple parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen3_ssse3(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 3; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 16) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+ asm volatile ("movdqa %xmm4,%xmm0");
+ asm volatile ("movdqa %xmm4,%xmm1");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm6");
+ asm volatile ("pxor %xmm6,%xmm2");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm3,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm1");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pxor %xmm6,%xmm2");
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("pshufb %xmm5,%xmm6");
+ asm volatile ("pxor %xmm6,%xmm2");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm3,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm1");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN3 (triple parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen3_ssse3ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 3; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm11" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 32) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[l][i + 16]));
+
+ asm volatile ("movdqa %xmm4,%xmm0");
+ asm volatile ("movdqa %xmm4,%xmm1");
+ asm volatile ("movdqa %xmm12,%xmm8");
+ asm volatile ("movdqa %xmm12,%xmm9");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("movdqa %xmm12,%xmm13");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("psrlw $4,%xmm13");
+ asm volatile ("pand %xmm11,%xmm4");
+ asm volatile ("pand %xmm11,%xmm12");
+ asm volatile ("pand %xmm11,%xmm5");
+ asm volatile ("pand %xmm11,%xmm13");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("movdqa %xmm2,%xmm10");
+ asm volatile ("movdqa %xmm7,%xmm15");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm12,%xmm10");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pshufb %xmm13,%xmm15");
+ asm volatile ("pxor %xmm7,%xmm2");
+ asm volatile ("pxor %xmm15,%xmm10");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pxor %xmm13,%xmm13");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("pcmpgtb %xmm9,%xmm13");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("paddb %xmm9,%xmm9");
+ asm volatile ("pand %xmm3,%xmm5");
+ asm volatile ("pand %xmm3,%xmm13");
+ asm volatile ("pxor %xmm5,%xmm1");
+ asm volatile ("pxor %xmm13,%xmm9");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm12,%xmm8");
+ asm volatile ("pxor %xmm12,%xmm9");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("movdqa %xmm12,%xmm13");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("psrlw $4,%xmm13");
+ asm volatile ("pand %xmm11,%xmm4");
+ asm volatile ("pand %xmm11,%xmm12");
+ asm volatile ("pand %xmm11,%xmm5");
+ asm volatile ("pand %xmm11,%xmm13");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("movdqa %xmm6,%xmm14");
+ asm volatile ("movdqa %xmm7,%xmm15");
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm12,%xmm14");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pshufb %xmm13,%xmm15");
+ asm volatile ("pxor %xmm6,%xmm2");
+ asm volatile ("pxor %xmm14,%xmm10");
+ asm volatile ("pxor %xmm7,%xmm2");
+ asm volatile ("pxor %xmm15,%xmm10");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[0][i + 16]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pxor %xmm13,%xmm13");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("pcmpgtb %xmm9,%xmm13");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("paddb %xmm9,%xmm9");
+ asm volatile ("pand %xmm3,%xmm5");
+ asm volatile ("pand %xmm3,%xmm13");
+ asm volatile ("pxor %xmm5,%xmm1");
+ asm volatile ("pxor %xmm13,%xmm9");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm12,%xmm8");
+ asm volatile ("pxor %xmm12,%xmm9");
+ asm volatile ("pxor %xmm12,%xmm10");
+
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN3 (triple parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen3_avx2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 3; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_avx_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("vbroadcasti128 %0, %%ymm3" : : "m" (gfconst16.poly[0]));
+ asm volatile ("vbroadcasti128 %0, %%ymm11" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 64) {
+ /* last disk without the by two multiplication */
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[l][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[l][i + 32]));
+
+ asm volatile ("vmovdqa %ymm4,%ymm0");
+ asm volatile ("vmovdqa %ymm4,%ymm1");
+ asm volatile ("vmovdqa %ymm12,%ymm8");
+ asm volatile ("vmovdqa %ymm12,%ymm9");
+
+ asm volatile ("vpsrlw $4,%ymm4,%ymm5");
+ asm volatile ("vpsrlw $4,%ymm12,%ymm13");
+ asm volatile ("vpand %ymm11,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm11,%ymm12,%ymm12");
+ asm volatile ("vpand %ymm11,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm11,%ymm13,%ymm13");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm10" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("vpshufb %ymm4,%ymm10,%ymm2");
+ asm volatile ("vpshufb %ymm12,%ymm10,%ymm10");
+ asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+ asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+ asm volatile ("vpxor %ymm7,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm15,%ymm10,%ymm10");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+
+ asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+ asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+ asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+ asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+ asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+ asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+ asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+
+ asm volatile ("vpsrlw $4,%ymm4,%ymm5");
+ asm volatile ("vpsrlw $4,%ymm12,%ymm13");
+ asm volatile ("vpand %ymm11,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm11,%ymm12,%ymm12");
+ asm volatile ("vpand %ymm11,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm11,%ymm13,%ymm13");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("vpshufb %ymm4,%ymm14,%ymm6");
+ asm volatile ("vpshufb %ymm12,%ymm14,%ymm14");
+ asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+ asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+ asm volatile ("vpxor %ymm6,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm14,%ymm10,%ymm10");
+ asm volatile ("vpxor %ymm7,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm15,%ymm10,%ymm10");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[0][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[0][i + 32]));
+
+ asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+ asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+ asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+ asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+ asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+ asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+ asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+ asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+ asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+ asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+ asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN4 (quad parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen4_ssse3(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 4; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ for (i = 0; i < size; i += 16) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+ asm volatile ("movdqa %xmm4,%xmm0");
+ asm volatile ("movdqa %xmm4,%xmm1");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm3");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm3");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm1");
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm2");
+ asm volatile ("pxor %xmm7,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm3");
+ asm volatile ("pxor %xmm7,%xmm3");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm1");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm4,%xmm3");
+
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN4 (quad parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen4_ssse3ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 4; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ for (i = 0; i < size; i += 32) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[l][i + 16]));
+
+ asm volatile ("movdqa %xmm4,%xmm0");
+ asm volatile ("movdqa %xmm4,%xmm1");
+ asm volatile ("movdqa %xmm12,%xmm8");
+ asm volatile ("movdqa %xmm12,%xmm9");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("movdqa %xmm12,%xmm13");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("psrlw $4,%xmm13");
+ asm volatile ("pand %xmm15,%xmm4");
+ asm volatile ("pand %xmm15,%xmm12");
+ asm volatile ("pand %xmm15,%xmm5");
+ asm volatile ("pand %xmm15,%xmm13");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("movdqa %xmm2,%xmm10");
+ asm volatile ("movdqa %xmm7,%xmm15");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm12,%xmm10");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pshufb %xmm13,%xmm15");
+ asm volatile ("pxor %xmm7,%xmm2");
+ asm volatile ("pxor %xmm15,%xmm10");
+
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("movdqa %xmm3,%xmm11");
+ asm volatile ("movdqa %xmm7,%xmm15");
+ asm volatile ("pshufb %xmm4,%xmm3");
+ asm volatile ("pshufb %xmm12,%xmm11");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pshufb %xmm13,%xmm15");
+ asm volatile ("pxor %xmm7,%xmm3");
+ asm volatile ("pxor %xmm15,%xmm11");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pxor %xmm13,%xmm13");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("pcmpgtb %xmm9,%xmm13");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("paddb %xmm9,%xmm9");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pand %xmm7,%xmm13");
+ asm volatile ("pxor %xmm5,%xmm1");
+ asm volatile ("pxor %xmm13,%xmm9");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm12,%xmm8");
+ asm volatile ("pxor %xmm12,%xmm9");
+
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("movdqa %xmm12,%xmm13");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("psrlw $4,%xmm13");
+ asm volatile ("pand %xmm15,%xmm4");
+ asm volatile ("pand %xmm15,%xmm12");
+ asm volatile ("pand %xmm15,%xmm5");
+ asm volatile ("pand %xmm15,%xmm13");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("movdqa %xmm6,%xmm14");
+ asm volatile ("movdqa %xmm7,%xmm15");
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm12,%xmm14");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pshufb %xmm13,%xmm15");
+ asm volatile ("pxor %xmm6,%xmm2");
+ asm volatile ("pxor %xmm14,%xmm10");
+ asm volatile ("pxor %xmm7,%xmm2");
+ asm volatile ("pxor %xmm15,%xmm10");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("movdqa %xmm6,%xmm14");
+ asm volatile ("movdqa %xmm7,%xmm15");
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm12,%xmm14");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pshufb %xmm13,%xmm15");
+ asm volatile ("pxor %xmm6,%xmm3");
+ asm volatile ("pxor %xmm14,%xmm11");
+ asm volatile ("pxor %xmm7,%xmm3");
+ asm volatile ("pxor %xmm15,%xmm11");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[0][i + 16]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pxor %xmm13,%xmm13");
+ asm volatile ("pcmpgtb %xmm1,%xmm5");
+ asm volatile ("pcmpgtb %xmm9,%xmm13");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("paddb %xmm9,%xmm9");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pand %xmm7,%xmm13");
+ asm volatile ("pxor %xmm5,%xmm1");
+ asm volatile ("pxor %xmm13,%xmm9");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm4,%xmm3");
+ asm volatile ("pxor %xmm12,%xmm8");
+ asm volatile ("pxor %xmm12,%xmm9");
+ asm volatile ("pxor %xmm12,%xmm10");
+ asm volatile ("pxor %xmm12,%xmm11");
+
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+ asm volatile ("movntdq %%xmm11,%0" : "=m" (s[i + 16]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN4 (quad parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen4_avx2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 4; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_avx_begin();
+
+ /* generic case with at least two data disks */
+ for (i = 0; i < size; i += 64) {
+ /* last disk without the by two multiplication */
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[l][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[l][i + 32]));
+
+ asm volatile ("vmovdqa %ymm4,%ymm0");
+ asm volatile ("vmovdqa %ymm4,%ymm1");
+ asm volatile ("vmovdqa %ymm12,%ymm8");
+ asm volatile ("vmovdqa %ymm12,%ymm9");
+
+ asm volatile ("vpsrlw $4,%ymm4,%ymm5");
+ asm volatile ("vpsrlw $4,%ymm12,%ymm13");
+ asm volatile ("vpand %ymm15,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm15,%ymm12,%ymm12");
+ asm volatile ("vpand %ymm15,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm15,%ymm13,%ymm13");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm10" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("vpshufb %ymm4,%ymm10,%ymm2");
+ asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+ asm volatile ("vpshufb %ymm12,%ymm10,%ymm10");
+ asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+ asm volatile ("vpxor %ymm7,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm15,%ymm10,%ymm10");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm11" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("vpshufb %ymm4,%ymm11,%ymm3");
+ asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+ asm volatile ("vpshufb %ymm12,%ymm11,%ymm11");
+ asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+ asm volatile ("vpxor %ymm7,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm15,%ymm11,%ymm11");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+
+ asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+ asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+ asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+ asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+ asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+ asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+
+ asm volatile ("vpsrlw $4,%ymm4,%ymm5");
+ asm volatile ("vpsrlw $4,%ymm12,%ymm13");
+ asm volatile ("vpand %ymm15,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm15,%ymm12,%ymm12");
+ asm volatile ("vpand %ymm15,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm15,%ymm13,%ymm13");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("vpshufb %ymm4,%ymm14,%ymm6");
+ asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+ asm volatile ("vpshufb %ymm12,%ymm14,%ymm14");
+ asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+ asm volatile ("vpxor %ymm6,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm14,%ymm10,%ymm10");
+ asm volatile ("vpxor %ymm7,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm15,%ymm10,%ymm10");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("vpshufb %ymm4,%ymm14,%ymm6");
+ asm volatile ("vpshufb %ymm5,%ymm15,%ymm7");
+ asm volatile ("vpshufb %ymm12,%ymm14,%ymm14");
+ asm volatile ("vpshufb %ymm13,%ymm15,%ymm15");
+ asm volatile ("vpxor %ymm6,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm14,%ymm11,%ymm11");
+ asm volatile ("vpxor %ymm7,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm15,%ymm11,%ymm11");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.poly[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[0][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[0][i + 32]));
+
+ asm volatile ("vpxor %ymm5,%ymm5,%ymm5");
+ asm volatile ("vpxor %ymm13,%ymm13,%ymm13");
+ asm volatile ("vpcmpgtb %ymm1,%ymm5,%ymm5");
+ asm volatile ("vpcmpgtb %ymm9,%ymm13,%ymm13");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm5,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm13,%ymm9,%ymm9");
+
+ asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm4,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+ asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+ asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+ asm volatile ("vpxor %ymm12,%ymm11,%ymm11");
+
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+ asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+ asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+ asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+ asm volatile ("vmovntdq %%ymm3,%0" : "=m" (s[i]));
+ asm volatile ("vmovntdq %%ymm11,%0" : "=m" (s[i + 32]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN5 (penta parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen5_ssse3(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ int d, l;
+ size_t i;
+ uint8_t buffer[16+16];
+ uint8_t *pd = __align_ptr(buffer, 16);
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 5; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ for (i = 0; i < size; i += 16) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+ asm volatile ("movdqa %xmm4,%xmm0");
+ asm volatile ("movdqa %%xmm4,%0" : "=m" (pd[0]));
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm1");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm1");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][2][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm3");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm3");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm0,%xmm5");
+ asm volatile ("paddb %xmm0,%xmm0");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm0");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm6");
+ asm volatile ("movdqa %%xmm6,%0" : "=m" (pd[0]));
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm1");
+ asm volatile ("pxor %xmm7,%xmm1");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm2");
+ asm volatile ("pxor %xmm7,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][2][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm3");
+ asm volatile ("pxor %xmm7,%xmm3");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pcmpgtb %xmm0,%xmm5");
+ asm volatile ("paddb %xmm0,%xmm0");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm0");
+
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm4,%xmm3");
+ asm volatile ("pxor %xmm4,%xmm6");
+
+ asm volatile ("movntdq %%xmm6,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (s[i]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (t[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN5 (penta parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen5_ssse3ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 5; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("movdqa %0,%%xmm14" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 16) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[l][i]));
+
+ asm volatile ("movdqa %xmm10,%xmm0");
+ asm volatile ("movdqa %xmm10,%xmm1");
+
+ asm volatile ("movdqa %xmm10,%xmm11");
+ asm volatile ("psrlw $4,%xmm11");
+ asm volatile ("pand %xmm15,%xmm10");
+ asm volatile ("pand %xmm15,%xmm11");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm2");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm3");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm3");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (gfgenpshufb[l][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][2][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm4");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm4");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[d][i]));
+
+ asm volatile ("pxor %xmm11,%xmm11");
+ asm volatile ("pcmpgtb %xmm1,%xmm11");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm14,%xmm11");
+ asm volatile ("pxor %xmm11,%xmm1");
+
+ asm volatile ("pxor %xmm10,%xmm0");
+ asm volatile ("pxor %xmm10,%xmm1");
+
+ asm volatile ("movdqa %xmm10,%xmm11");
+ asm volatile ("psrlw $4,%xmm11");
+ asm volatile ("pand %xmm15,%xmm10");
+ asm volatile ("pand %xmm15,%xmm11");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm2");
+ asm volatile ("pxor %xmm13,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm3");
+ asm volatile ("pxor %xmm13,%xmm3");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][2][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm4");
+ asm volatile ("pxor %xmm13,%xmm4");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[0][i]));
+
+ asm volatile ("pxor %xmm11,%xmm11");
+ asm volatile ("pcmpgtb %xmm1,%xmm11");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm14,%xmm11");
+ asm volatile ("pxor %xmm11,%xmm1");
+
+ asm volatile ("pxor %xmm10,%xmm0");
+ asm volatile ("pxor %xmm10,%xmm1");
+ asm volatile ("pxor %xmm10,%xmm2");
+ asm volatile ("pxor %xmm10,%xmm3");
+ asm volatile ("pxor %xmm10,%xmm4");
+
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+ asm volatile ("movntdq %%xmm4,%0" : "=m" (t[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN5 (penta parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen5_avx2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 5; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_avx_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("vpxor %ymm8,%ymm8,%ymm8");
+ asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfconst16.poly[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 32) {
+ /* last disk without the by two multiplication */
+ asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[l][i]));
+
+ asm volatile ("vmovdqa %ymm10,%ymm0");
+ asm volatile ("vmovdqa %ymm10,%ymm1");
+
+ asm volatile ("vpsrlw $4,%ymm10,%ymm11");
+ asm volatile ("vpand %ymm15,%ymm10,%ymm10");
+ asm volatile ("vpand %ymm15,%ymm11,%ymm11");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm2,%ymm2");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm3,%ymm3");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm3,%ymm3");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm4" : : "m" (gfgenpshufb[l][2][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][2][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm4,%ymm4");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm4,%ymm4");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[d][i]));
+
+ asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+ asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+ asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+
+ asm volatile ("vpsrlw $4,%ymm10,%ymm11");
+ asm volatile ("vpand %ymm15,%ymm10,%ymm10");
+ asm volatile ("vpand %ymm15,%ymm11,%ymm11");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm13,%ymm2,%ymm2");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm13,%ymm3,%ymm3");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][2][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][2][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm4,%ymm4");
+ asm volatile ("vpxor %ymm13,%ymm4,%ymm4");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[0][i]));
+
+ asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+ asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+ asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm10,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm10,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm10,%ymm4,%ymm4");
+
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+ asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+ asm volatile ("vmovntdq %%ymm3,%0" : "=m" (s[i]));
+ asm volatile ("vmovntdq %%ymm4,%0" : "=m" (t[i]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * GEN6 (hexa parity with Cauchy matrix) SSSE3 implementation
+ */
+void raid_gen6_ssse3(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ uint8_t *u;
+ int d, l;
+ size_t i;
+ uint8_t buffer[2*16+16];
+ uint8_t *pd = __align_ptr(buffer, 16);
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+ u = v[nd + 5];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 6; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ for (i = 0; i < size; i += 16) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[l][i]));
+
+ asm volatile ("movdqa %%xmm4,%0" : "=m" (pd[0]));
+ asm volatile ("movdqa %%xmm4,%0" : "=m" (pd[16]));
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm0");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm0");
+
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm1");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm1");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][2][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][3][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[l][3][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm3");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm7,%xmm3");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm5" : : "m" (pd[0]));
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[16]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+ asm volatile ("pxor %xmm4,%xmm4");
+ asm volatile ("pcmpgtb %xmm6,%xmm4");
+ asm volatile ("paddb %xmm6,%xmm6");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pxor %xmm4,%xmm6");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+
+ asm volatile ("pxor %xmm4,%xmm5");
+ asm volatile ("pxor %xmm4,%xmm6");
+ asm volatile ("movdqa %%xmm5,%0" : "=m" (pd[0]));
+ asm volatile ("movdqa %%xmm6,%0" : "=m" (pd[16]));
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm0");
+ asm volatile ("pxor %xmm7,%xmm0");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm1");
+ asm volatile ("pxor %xmm7,%xmm1");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][2][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm2");
+ asm volatile ("pxor %xmm7,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfgenpshufb[d][3][0][0]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfgenpshufb[d][3][1][0]));
+ asm volatile ("pshufb %xmm4,%xmm6");
+ asm volatile ("pshufb %xmm5,%xmm7");
+ asm volatile ("pxor %xmm6,%xmm3");
+ asm volatile ("pxor %xmm7,%xmm3");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm5" : : "m" (pd[0]));
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (pd[16]));
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.poly[0]));
+
+ asm volatile ("pxor %xmm4,%xmm4");
+ asm volatile ("pcmpgtb %xmm6,%xmm4");
+ asm volatile ("paddb %xmm6,%xmm6");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pxor %xmm4,%xmm6");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[0][i]));
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm4,%xmm3");
+ asm volatile ("pxor %xmm4,%xmm5");
+ asm volatile ("pxor %xmm4,%xmm6");
+
+ asm volatile ("movntdq %%xmm5,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm6,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (s[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (t[i]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (u[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSSE3)
+/*
+ * GEN6 (hexa parity with Cauchy matrix) SSSE3 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen6_ssse3ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ uint8_t *u;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+ u = v[nd + 5];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 6; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_sse_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("movdqa %0,%%xmm14" : : "m" (gfconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm15" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 16) {
+ /* last disk without the by two multiplication */
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[l][i]));
+
+ asm volatile ("movdqa %xmm10,%xmm0");
+ asm volatile ("movdqa %xmm10,%xmm1");
+
+ asm volatile ("movdqa %xmm10,%xmm11");
+ asm volatile ("psrlw $4,%xmm11");
+ asm volatile ("pand %xmm15,%xmm10");
+ asm volatile ("pand %xmm15,%xmm11");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm2");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm3");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm3");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (gfgenpshufb[l][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][2][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm4");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm4");
+
+ asm volatile ("movdqa %0,%%xmm5" : : "m" (gfgenpshufb[l][3][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[l][3][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm5");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm13,%xmm5");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[d][i]));
+
+ asm volatile ("pxor %xmm11,%xmm11");
+ asm volatile ("pcmpgtb %xmm1,%xmm11");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm14,%xmm11");
+ asm volatile ("pxor %xmm11,%xmm1");
+
+ asm volatile ("pxor %xmm10,%xmm0");
+ asm volatile ("pxor %xmm10,%xmm1");
+
+ asm volatile ("movdqa %xmm10,%xmm11");
+ asm volatile ("psrlw $4,%xmm11");
+ asm volatile ("pand %xmm15,%xmm10");
+ asm volatile ("pand %xmm15,%xmm11");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm2");
+ asm volatile ("pxor %xmm13,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm3");
+ asm volatile ("pxor %xmm13,%xmm3");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][2][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][2][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm4");
+ asm volatile ("pxor %xmm13,%xmm4");
+
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (gfgenpshufb[d][3][0][0]));
+ asm volatile ("movdqa %0,%%xmm13" : : "m" (gfgenpshufb[d][3][1][0]));
+ asm volatile ("pshufb %xmm10,%xmm12");
+ asm volatile ("pshufb %xmm11,%xmm13");
+ asm volatile ("pxor %xmm12,%xmm5");
+ asm volatile ("pxor %xmm13,%xmm5");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("movdqa %0,%%xmm10" : : "m" (v[0][i]));
+
+ asm volatile ("pxor %xmm11,%xmm11");
+ asm volatile ("pcmpgtb %xmm1,%xmm11");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm14,%xmm11");
+ asm volatile ("pxor %xmm11,%xmm1");
+
+ asm volatile ("pxor %xmm10,%xmm0");
+ asm volatile ("pxor %xmm10,%xmm1");
+ asm volatile ("pxor %xmm10,%xmm2");
+ asm volatile ("pxor %xmm10,%xmm3");
+ asm volatile ("pxor %xmm10,%xmm4");
+ asm volatile ("pxor %xmm10,%xmm5");
+
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm3,%0" : "=m" (s[i]));
+ asm volatile ("movntdq %%xmm4,%0" : "=m" (t[i]));
+ asm volatile ("movntdq %%xmm5,%0" : "=m" (u[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GEN6 (hexa parity with Cauchy matrix) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_gen6_avx2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ uint8_t *s;
+ uint8_t *t;
+ uint8_t *u;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+ s = v[nd + 3];
+ t = v[nd + 4];
+ u = v[nd + 5];
+
+ /* special case with only one data disk */
+ if (l == 0) {
+ for (i = 0; i < 6; ++i)
+ memcpy(v[1 + i], v[0], size);
+ return;
+ }
+
+ raid_avx_begin();
+
+ /* generic case with at least two data disks */
+ asm volatile ("vpxor %ymm8,%ymm8,%ymm8");
+ asm volatile ("vbroadcasti128 %0,%%ymm14" : : "m" (gfconst16.poly[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm15" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 32) {
+ /* last disk without the by two multiplication */
+ asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[l][i]));
+
+ asm volatile ("vmovdqa %ymm10,%ymm0");
+ asm volatile ("vmovdqa %ymm10,%ymm1");
+
+ asm volatile ("vpsrlw $4,%ymm10,%ymm11");
+ asm volatile ("vpand %ymm15,%ymm10,%ymm10");
+ asm volatile ("vpand %ymm15,%ymm11,%ymm11");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfgenpshufb[l][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][0][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm2,%ymm2");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfgenpshufb[l][1][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][1][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm3,%ymm3");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm3,%ymm3");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm4" : : "m" (gfgenpshufb[l][2][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][2][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm4,%ymm4");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm4,%ymm4");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm5" : : "m" (gfgenpshufb[l][3][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[l][3][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm5,%ymm5");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm13,%ymm5,%ymm5");
+
+ /* intermediate disks */
+ for (d = l - 1; d > 0; --d) {
+ asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[d][i]));
+
+ asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+ asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+ asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+
+ asm volatile ("vpsrlw $4,%ymm10,%ymm11");
+ asm volatile ("vpand %ymm15,%ymm10,%ymm10");
+ asm volatile ("vpand %ymm15,%ymm11,%ymm11");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][0][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][0][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm13,%ymm2,%ymm2");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][1][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][1][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm13,%ymm3,%ymm3");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][2][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][2][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm4,%ymm4");
+ asm volatile ("vpxor %ymm13,%ymm4,%ymm4");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm12" : : "m" (gfgenpshufb[d][3][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm13" : : "m" (gfgenpshufb[d][3][1][0]));
+ asm volatile ("vpshufb %ymm10,%ymm12,%ymm12");
+ asm volatile ("vpshufb %ymm11,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm12,%ymm5,%ymm5");
+ asm volatile ("vpxor %ymm13,%ymm5,%ymm5");
+ }
+
+ /* first disk with all coefficients at 1 */
+ asm volatile ("vmovdqa %0,%%ymm10" : : "m" (v[0][i]));
+
+ asm volatile ("vpcmpgtb %ymm1,%ymm8,%ymm11");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpand %ymm14,%ymm11,%ymm11");
+ asm volatile ("vpxor %ymm11,%ymm1,%ymm1");
+
+ asm volatile ("vpxor %ymm10,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm10,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm10,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm10,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm10,%ymm4,%ymm4");
+ asm volatile ("vpxor %ymm10,%ymm5,%ymm5");
+
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+ asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+ asm volatile ("vmovntdq %%ymm3,%0" : "=m" (s[i]));
+ asm volatile ("vmovntdq %%ymm4,%0" : "=m" (t[i]));
+ asm volatile ("vmovntdq %%ymm5,%0" : "=m" (u[i]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * RAID recovering for one disk SSSE3 implementation
+ */
+void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *pa;
+ uint8_t G;
+ uint8_t V;
+ size_t i;
+
+ (void)nr; /* unused, it's always 1 */
+
+ /* if it's RAID5 uses the faster function */
+ if (ip[0] == 0) {
+ raid_rec1of1(id, nd, size, vv);
+ return;
+ }
+
+ /* setup the coefficients matrix */
+ G = A(ip[0], id[0]);
+
+ /* invert it to solve the system of linear equations */
+ V = inv(G);
+
+ /* compute delta parity */
+ raid_delta_gen(1, id, ip, nd, size, vv);
+
+ p = v[nd + ip[0]];
+ pa = v[id[0]];
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (gfmulpshufb[V][0][0]));
+ asm volatile ("movdqa %0,%%xmm5" : : "m" (gfmulpshufb[V][1][0]));
+
+ for (i = 0; i < size; i += 16) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (p[i]));
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (pa[i]));
+ asm volatile ("movdqa %xmm4,%xmm2");
+ asm volatile ("movdqa %xmm5,%xmm3");
+ asm volatile ("pxor %xmm0,%xmm1");
+ asm volatile ("movdqa %xmm1,%xmm0");
+ asm volatile ("psrlw $4,%xmm1");
+ asm volatile ("pand %xmm7,%xmm0");
+ asm volatile ("pand %xmm7,%xmm1");
+ asm volatile ("pshufb %xmm0,%xmm2");
+ asm volatile ("pshufb %xmm1,%xmm3");
+ asm volatile ("pxor %xmm3,%xmm2");
+ asm volatile ("movdqa %%xmm2,%0" : "=m" (pa[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * RAID recovering for two disks SSSE3 implementation
+ */
+void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ const int N = 2;
+ uint8_t *p[N];
+ uint8_t *pa[N];
+ uint8_t G[N * N];
+ uint8_t V[N * N];
+ size_t i;
+ int j, k;
+
+ (void)nr; /* unused, it's always 2 */
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < N; ++j)
+ for (k = 0; k < N; ++k)
+ G[j * N + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, N);
+
+ /* compute delta parity */
+ raid_delta_gen(N, id, ip, nd, size, vv);
+
+ for (j = 0; j < N; ++j) {
+ p[j] = v[nd + ip[j]];
+ pa[j] = v[id[j]];
+ }
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 16) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (p[0][i]));
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (pa[0][i]));
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (p[1][i]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (pa[1][i]));
+ asm volatile ("pxor %xmm2,%xmm0");
+ asm volatile ("pxor %xmm3,%xmm1");
+
+ asm volatile ("pxor %xmm6,%xmm6");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[0]][0][0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[0]][1][0]));
+ asm volatile ("movdqa %xmm0,%xmm4");
+ asm volatile ("movdqa %xmm0,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm3");
+ asm volatile ("pxor %xmm2,%xmm6");
+ asm volatile ("pxor %xmm3,%xmm6");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[1]][0][0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[1]][1][0]));
+ asm volatile ("movdqa %xmm1,%xmm4");
+ asm volatile ("movdqa %xmm1,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm3");
+ asm volatile ("pxor %xmm2,%xmm6");
+ asm volatile ("pxor %xmm3,%xmm6");
+
+ asm volatile ("movdqa %%xmm6,%0" : "=m" (pa[0][i]));
+
+ asm volatile ("pxor %xmm6,%xmm6");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[2]][0][0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[2]][1][0]));
+ asm volatile ("movdqa %xmm0,%xmm4");
+ asm volatile ("movdqa %xmm0,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm3");
+ asm volatile ("pxor %xmm2,%xmm6");
+ asm volatile ("pxor %xmm3,%xmm6");
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[V[3]][0][0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[V[3]][1][0]));
+ asm volatile ("movdqa %xmm1,%xmm4");
+ asm volatile ("movdqa %xmm1,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm3");
+ asm volatile ("pxor %xmm2,%xmm6");
+ asm volatile ("pxor %xmm3,%xmm6");
+
+ asm volatile ("movdqa %%xmm6,%0" : "=m" (pa[1][i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSSE3)
+/*
+ * RAID recovering SSSE3 implementation
+ */
+void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ int N = nr;
+ uint8_t *p[RAID_PARITY_MAX];
+ uint8_t *pa[RAID_PARITY_MAX];
+ uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ uint8_t buffer[RAID_PARITY_MAX*16+16];
+ uint8_t *pd = __align_ptr(buffer, 16);
+ size_t i;
+ int j, k;
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < N; ++j)
+ for (k = 0; k < N; ++k)
+ G[j * N + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, N);
+
+ /* compute delta parity */
+ raid_delta_gen(N, id, ip, nd, size, vv);
+
+ for (j = 0; j < N; ++j) {
+ p[j] = v[nd + ip[j]];
+ pa[j] = v[id[j]];
+ }
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 16) {
+ /* delta */
+ for (j = 0; j < N; ++j) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (p[j][i]));
+ asm volatile ("movdqa %0,%%xmm1" : : "m" (pa[j][i]));
+ asm volatile ("pxor %xmm1,%xmm0");
+ asm volatile ("movdqa %%xmm0,%0" : "=m" (pd[j*16]));
+ }
+
+ /* reconstruct */
+ for (j = 0; j < N; ++j) {
+ asm volatile ("pxor %xmm0,%xmm0");
+ asm volatile ("pxor %xmm1,%xmm1");
+
+ for (k = 0; k < N; ++k) {
+ uint8_t m = V[j * N + k];
+
+ asm volatile ("movdqa %0,%%xmm2" : : "m" (gfmulpshufb[m][0][0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfmulpshufb[m][1][0]));
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (pd[k*16]));
+ asm volatile ("movdqa %xmm4,%xmm5");
+ asm volatile ("psrlw $4,%xmm5");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm5");
+ asm volatile ("pshufb %xmm4,%xmm2");
+ asm volatile ("pshufb %xmm5,%xmm3");
+ asm volatile ("pxor %xmm2,%xmm0");
+ asm volatile ("pxor %xmm3,%xmm1");
+ }
+
+ asm volatile ("pxor %xmm1,%xmm0");
+ asm volatile ("movdqa %%xmm0,%0" : "=m" (pa[j][i]));
+ }
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * RAID recovering for one disk AVX2 implementation
+ */
+void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ uint8_t *p;
+ uint8_t *pa;
+ uint8_t G;
+ uint8_t V;
+ size_t i;
+
+ (void)nr; /* unused, it's always 1 */
+
+ /* if it's RAID5 uses the faster function */
+ if (ip[0] == 0) {
+ raid_rec1of1(id, nd, size, vv);
+ return;
+ }
+
+ /* setup the coefficients matrix */
+ G = A(ip[0], id[0]);
+
+ /* invert it to solve the system of linear equations */
+ V = inv(G);
+
+ /* compute delta parity */
+ raid_delta_gen(1, id, ip, nd, size, vv);
+
+ p = v[nd + ip[0]];
+ pa = v[id[0]];
+
+ raid_avx_begin();
+
+ asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.low4[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm4" : : "m" (gfmulpshufb[V][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm5" : : "m" (gfmulpshufb[V][1][0]));
+
+ for (i = 0; i < size; i += 32) {
+ asm volatile ("vmovdqa %0,%%ymm0" : : "m" (p[i]));
+ asm volatile ("vmovdqa %0,%%ymm1" : : "m" (pa[i]));
+ asm volatile ("vpxor %ymm1,%ymm0,%ymm0");
+ asm volatile ("vpsrlw $4,%ymm0,%ymm1");
+ asm volatile ("vpand %ymm7,%ymm0,%ymm0");
+ asm volatile ("vpand %ymm7,%ymm1,%ymm1");
+ asm volatile ("vpshufb %ymm0,%ymm4,%ymm2");
+ asm volatile ("vpshufb %ymm1,%ymm5,%ymm3");
+ asm volatile ("vpxor %ymm3,%ymm2,%ymm2");
+ asm volatile ("vmovdqa %%ymm2,%0" : "=m" (pa[i]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * RAID recovering for two disks AVX2 implementation
+ */
+void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ const int N = 2;
+ uint8_t *p[N];
+ uint8_t *pa[N];
+ uint8_t G[N * N];
+ uint8_t V[N * N];
+ size_t i;
+ int j, k;
+
+ (void)nr; /* unused, it's always 2 */
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < N; ++j)
+ for (k = 0; k < N; ++k)
+ G[j * N + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, N);
+
+ /* compute delta parity */
+ raid_delta_gen(N, id, ip, nd, size, vv);
+
+ for (j = 0; j < N; ++j) {
+ p[j] = v[nd + ip[j]];
+ pa[j] = v[id[j]];
+ }
+
+ raid_avx_begin();
+
+ asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 32) {
+ asm volatile ("vmovdqa %0,%%ymm0" : : "m" (p[0][i]));
+ asm volatile ("vmovdqa %0,%%ymm2" : : "m" (pa[0][i]));
+ asm volatile ("vmovdqa %0,%%ymm1" : : "m" (p[1][i]));
+ asm volatile ("vmovdqa %0,%%ymm3" : : "m" (pa[1][i]));
+ asm volatile ("vpxor %ymm2,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm3,%ymm1,%ymm1");
+
+ asm volatile ("vpxor %ymm6,%ymm6,%ymm6");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[0]][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[0]][1][0]));
+ asm volatile ("vpsrlw $4,%ymm0,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm0,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm2,%ymm6,%ymm6");
+ asm volatile ("vpxor %ymm3,%ymm6,%ymm6");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[1]][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[1]][1][0]));
+ asm volatile ("vpsrlw $4,%ymm1,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm1,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm2,%ymm6,%ymm6");
+ asm volatile ("vpxor %ymm3,%ymm6,%ymm6");
+
+ asm volatile ("vmovdqa %%ymm6,%0" : "=m" (pa[0][i]));
+
+ asm volatile ("vpxor %ymm6,%ymm6,%ymm6");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[2]][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[2]][1][0]));
+ asm volatile ("vpsrlw $4,%ymm0,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm0,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm2,%ymm6,%ymm6");
+ asm volatile ("vpxor %ymm3,%ymm6,%ymm6");
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[V[3]][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[V[3]][1][0]));
+ asm volatile ("vpsrlw $4,%ymm1,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm1,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm2,%ymm6,%ymm6");
+ asm volatile ("vpxor %ymm3,%ymm6,%ymm6");
+
+ asm volatile ("vmovdqa %%ymm6,%0" : "=m" (pa[1][i]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_AVX2)
+/*
+ * RAID recovering AVX2 implementation
+ */
+void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t **)vv;
+ int N = nr;
+ uint8_t *p[RAID_PARITY_MAX];
+ uint8_t *pa[RAID_PARITY_MAX];
+ uint8_t G[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ uint8_t V[RAID_PARITY_MAX * RAID_PARITY_MAX];
+ uint8_t buffer[RAID_PARITY_MAX*32+32];
+ uint8_t *pd = __align_ptr(buffer, 32);
+ size_t i;
+ int j, k;
+
+ /* setup the coefficients matrix */
+ for (j = 0; j < N; ++j)
+ for (k = 0; k < N; ++k)
+ G[j * N + k] = A(ip[j], id[k]);
+
+ /* invert it to solve the system of linear equations */
+ raid_invert(G, V, N);
+
+ /* compute delta parity */
+ raid_delta_gen(N, id, ip, nd, size, vv);
+
+ for (j = 0; j < N; ++j) {
+ p[j] = v[nd + ip[j]];
+ pa[j] = v[id[j]];
+ }
+
+ raid_avx_begin();
+
+ asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfconst16.low4[0]));
+
+ for (i = 0; i < size; i += 32) {
+ /* delta */
+ for (j = 0; j < N; ++j) {
+ asm volatile ("vmovdqa %0,%%ymm0" : : "m" (p[j][i]));
+ asm volatile ("vmovdqa %0,%%ymm1" : : "m" (pa[j][i]));
+ asm volatile ("vpxor %ymm1,%ymm0,%ymm0");
+ asm volatile ("vmovdqa %%ymm0,%0" : "=m" (pd[j*32]));
+ }
+
+ /* reconstruct */
+ for (j = 0; j < N; ++j) {
+ asm volatile ("vpxor %ymm0,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm1,%ymm1,%ymm1");
+
+ for (k = 0; k < N; ++k) {
+ uint8_t m = V[j * N + k];
+
+ asm volatile ("vbroadcasti128 %0,%%ymm2" : : "m" (gfmulpshufb[m][0][0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfmulpshufb[m][1][0]));
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (pd[k*32]));
+ asm volatile ("vpsrlw $4,%ymm4,%ymm5");
+ asm volatile ("vpand %ymm7,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm5,%ymm5");
+ asm volatile ("vpshufb %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpshufb %ymm5,%ymm3,%ymm3");
+ asm volatile ("vpxor %ymm2,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm3,%ymm1,%ymm1");
+ }
+
+ asm volatile ("vpxor %ymm1,%ymm0,%ymm0");
+ asm volatile ("vmovdqa %%ymm0,%0" : "=m" (pa[j][i]));
+ }
+ }
+
+ raid_avx_end();
+}
+#endif
+
diff --git a/c_src/raid/x86z.c b/c_src/raid/x86z.c
new file mode 100644
index 00000000..1e3fe89a
--- /dev/null
+++ b/c_src/raid/x86z.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2013 Andrea Mazzoleni
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "internal.h"
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+static const struct gfzconst16 {
+ uint8_t poly[16];
+ uint8_t half[16];
+ uint8_t low7[16];
+} gfzconst16 __aligned(64) =
+{
+ {
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
+ },
+ {
+ 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e,
+ 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e
+ },
+ {
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+ }
+};
+#endif
+
+#if defined(CONFIG_X86) && defined(CONFIG_SSE2)
+/*
+ * GENz (triple parity with powers of 2^-1) SSE2 implementation
+ */
+void raid_genz_sse2(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t**)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
+ asm volatile ("movdqa %0,%%xmm6" : : "m" (gfzconst16.low7[0]));
+
+ for (i = 0; i < size; i += 16) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+ asm volatile ("movdqa %xmm0,%xmm1");
+ asm volatile ("movdqa %xmm0,%xmm2");
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("pxor %xmm4,%xmm4");
+ asm volatile ("pcmpgtb %xmm1,%xmm4");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pxor %xmm4,%xmm1");
+
+ asm volatile ("movdqa %xmm2,%xmm4");
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("psllw $7,%xmm4");
+ asm volatile ("psrlw $1,%xmm2");
+ asm volatile ("pcmpgtb %xmm4,%xmm5");
+ asm volatile ("pand %xmm6,%xmm2");
+ asm volatile ("pand %xmm3,%xmm5");
+ asm volatile ("pxor %xmm5,%xmm2");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ }
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SSE2)
+/*
+ * GENz (triple parity with powers of 2^-1) SSE2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_genz_sse2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t**)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ raid_sse_begin();
+
+ asm volatile ("movdqa %0,%%xmm7" : : "m" (gfzconst16.poly[0]));
+ asm volatile ("movdqa %0,%%xmm3" : : "m" (gfzconst16.half[0]));
+ asm volatile ("movdqa %0,%%xmm11" : : "m" (gfzconst16.low7[0]));
+
+ for (i = 0; i < size; i += 32) {
+ asm volatile ("movdqa %0,%%xmm0" : : "m" (v[l][i]));
+ asm volatile ("movdqa %0,%%xmm8" : : "m" (v[l][i + 16]));
+ asm volatile ("movdqa %xmm0,%xmm1");
+ asm volatile ("movdqa %xmm8,%xmm9");
+ asm volatile ("movdqa %xmm0,%xmm2");
+ asm volatile ("movdqa %xmm8,%xmm10");
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("movdqa %xmm2,%xmm6");
+ asm volatile ("movdqa %xmm10,%xmm14");
+ asm volatile ("pxor %xmm4,%xmm4");
+ asm volatile ("pxor %xmm12,%xmm12");
+ asm volatile ("pxor %xmm5,%xmm5");
+ asm volatile ("pxor %xmm13,%xmm13");
+ asm volatile ("psllw $7,%xmm6");
+ asm volatile ("psllw $7,%xmm14");
+ asm volatile ("psrlw $1,%xmm2");
+ asm volatile ("psrlw $1,%xmm10");
+ asm volatile ("pcmpgtb %xmm1,%xmm4");
+ asm volatile ("pcmpgtb %xmm9,%xmm12");
+ asm volatile ("pcmpgtb %xmm6,%xmm5");
+ asm volatile ("pcmpgtb %xmm14,%xmm13");
+ asm volatile ("paddb %xmm1,%xmm1");
+ asm volatile ("paddb %xmm9,%xmm9");
+ asm volatile ("pand %xmm11,%xmm2");
+ asm volatile ("pand %xmm11,%xmm10");
+ asm volatile ("pand %xmm7,%xmm4");
+ asm volatile ("pand %xmm7,%xmm12");
+ asm volatile ("pand %xmm3,%xmm5");
+ asm volatile ("pand %xmm3,%xmm13");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm12,%xmm9");
+ asm volatile ("pxor %xmm5,%xmm2");
+ asm volatile ("pxor %xmm13,%xmm10");
+
+ asm volatile ("movdqa %0,%%xmm4" : : "m" (v[d][i]));
+ asm volatile ("movdqa %0,%%xmm12" : : "m" (v[d][i + 16]));
+ asm volatile ("pxor %xmm4,%xmm0");
+ asm volatile ("pxor %xmm4,%xmm1");
+ asm volatile ("pxor %xmm4,%xmm2");
+ asm volatile ("pxor %xmm12,%xmm8");
+ asm volatile ("pxor %xmm12,%xmm9");
+ asm volatile ("pxor %xmm12,%xmm10");
+ }
+ asm volatile ("movntdq %%xmm0,%0" : "=m" (p[i]));
+ asm volatile ("movntdq %%xmm8,%0" : "=m" (p[i + 16]));
+ asm volatile ("movntdq %%xmm1,%0" : "=m" (q[i]));
+ asm volatile ("movntdq %%xmm9,%0" : "=m" (q[i + 16]));
+ asm volatile ("movntdq %%xmm2,%0" : "=m" (r[i]));
+ asm volatile ("movntdq %%xmm10,%0" : "=m" (r[i + 16]));
+ }
+
+ raid_sse_end();
+}
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AVX2)
+/*
+ * GENz (triple parity with powers of 2^-1) AVX2 implementation
+ *
+ * Note that it uses 16 registers, meaning that x64 is required.
+ */
+void raid_genz_avx2ext(int nd, size_t size, void **vv)
+{
+ uint8_t **v = (uint8_t**)vv;
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *r;
+ int d, l;
+ size_t i;
+
+ l = nd - 1;
+ p = v[nd];
+ q = v[nd + 1];
+ r = v[nd + 2];
+
+ raid_avx_begin();
+
+ asm volatile ("vbroadcasti128 %0,%%ymm7" : : "m" (gfzconst16.poly[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm3" : : "m" (gfzconst16.half[0]));
+ asm volatile ("vbroadcasti128 %0,%%ymm11" : : "m" (gfzconst16.low7[0]));
+ asm volatile ("vpxor %ymm15,%ymm15,%ymm15");
+
+ for (i = 0; i < size; i += 64) {
+ asm volatile ("vmovdqa %0,%%ymm0" : : "m" (v[l][i]));
+ asm volatile ("vmovdqa %0,%%ymm8" : : "m" (v[l][i + 32]));
+ asm volatile ("vmovdqa %ymm0,%ymm1");
+ asm volatile ("vmovdqa %ymm8,%ymm9");
+ asm volatile ("vmovdqa %ymm0,%ymm2");
+ asm volatile ("vmovdqa %ymm8,%ymm10");
+ for (d = l - 1; d >= 0; --d) {
+ asm volatile ("vpsllw $7,%ymm2,%ymm6");
+ asm volatile ("vpsllw $7,%ymm10,%ymm14");
+ asm volatile ("vpsrlw $1,%ymm2,%ymm2");
+ asm volatile ("vpsrlw $1,%ymm10,%ymm10");
+ asm volatile ("vpcmpgtb %ymm1,%ymm15,%ymm4");
+ asm volatile ("vpcmpgtb %ymm9,%ymm15,%ymm12");
+ asm volatile ("vpcmpgtb %ymm6,%ymm15,%ymm5");
+ asm volatile ("vpcmpgtb %ymm14,%ymm15,%ymm13");
+ asm volatile ("vpaddb %ymm1,%ymm1,%ymm1");
+ asm volatile ("vpaddb %ymm9,%ymm9,%ymm9");
+ asm volatile ("vpand %ymm11,%ymm2,%ymm2");
+ asm volatile ("vpand %ymm11,%ymm10,%ymm10");
+ asm volatile ("vpand %ymm7,%ymm4,%ymm4");
+ asm volatile ("vpand %ymm7,%ymm12,%ymm12");
+ asm volatile ("vpand %ymm3,%ymm5,%ymm5");
+ asm volatile ("vpand %ymm3,%ymm13,%ymm13");
+ asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+ asm volatile ("vpxor %ymm5,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm13,%ymm10,%ymm10");
+
+ asm volatile ("vmovdqa %0,%%ymm4" : : "m" (v[d][i]));
+ asm volatile ("vmovdqa %0,%%ymm12" : : "m" (v[d][i + 32]));
+ asm volatile ("vpxor %ymm4,%ymm0,%ymm0");
+ asm volatile ("vpxor %ymm4,%ymm1,%ymm1");
+ asm volatile ("vpxor %ymm4,%ymm2,%ymm2");
+ asm volatile ("vpxor %ymm12,%ymm8,%ymm8");
+ asm volatile ("vpxor %ymm12,%ymm9,%ymm9");
+ asm volatile ("vpxor %ymm12,%ymm10,%ymm10");
+ }
+ asm volatile ("vmovntdq %%ymm0,%0" : "=m" (p[i]));
+ asm volatile ("vmovntdq %%ymm8,%0" : "=m" (p[i + 32]));
+ asm volatile ("vmovntdq %%ymm1,%0" : "=m" (q[i]));
+ asm volatile ("vmovntdq %%ymm9,%0" : "=m" (q[i + 32]));
+ asm volatile ("vmovntdq %%ymm2,%0" : "=m" (r[i]));
+ asm volatile ("vmovntdq %%ymm10,%0" : "=m" (r[i + 32]));
+ }
+
+ raid_avx_end();
+}
+#endif
+
diff --git a/c_src/tools-util.c b/c_src/tools-util.c
new file mode 100644
index 00000000..a1bcd8eb
--- /dev/null
+++ b/c_src/tools-util.c
@@ -0,0 +1,667 @@
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <blkid.h>
+#include <uuid/uuid.h>
+
+#include "libbcachefs.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "linux/sort.h"
+#include "tools-util.h"
+#include "libbcachefs/util.h"
+
+void die(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+
+ _exit(EXIT_FAILURE);
+}
+
+char *mprintf(const char *fmt, ...)
+{
+ va_list args;
+ char *str;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vasprintf(&str, fmt, args);
+ va_end(args);
+
+ if (ret < 0)
+ die("insufficient memory");
+
+ return str;
+}
+
+void xpread(int fd, void *buf, size_t count, off_t offset)
+{
+ while (count) {
+ ssize_t r = pread(fd, buf, count, offset);
+
+ if (r < 0)
+ die("read error: %m");
+ if (!r)
+ die("pread error: unexpected eof");
+ count -= r;
+ offset += r;
+ }
+}
+
+void xpwrite(int fd, const void *buf, size_t count, off_t offset, const char *msg)
+{
+ ssize_t r = pwrite(fd, buf, count, offset);
+
+ if (r != count)
+ die("error writing %s (ret %zi err %m)", msg, r);
+}
+
+struct stat xfstatat(int dirfd, const char *path, int flags)
+{
+ struct stat stat;
+ if (fstatat(dirfd, path, &stat, flags))
+ die("stat error: %m");
+ return stat;
+}
+
+struct stat xfstat(int fd)
+{
+ struct stat stat;
+ if (fstat(fd, &stat))
+ die("stat error: %m");
+ return stat;
+}
+
+struct stat xstat(const char *path)
+{
+ struct stat statbuf;
+ if (stat(path, &statbuf))
+ die("stat error statting %s: %m", path);
+ return statbuf;
+}
+
+/* File parsing (i.e. sysfs) */
+
+void write_file_str(int dirfd, const char *path, const char *str)
+{
+ int fd = xopenat(dirfd, path, O_WRONLY);
+ ssize_t wrote, len = strlen(str);
+
+ wrote = write(fd, str, len);
+ if (wrote != len)
+ die("read error: %m");
+ close(fd);
+}
+
+char *read_file_str(int dirfd, const char *path)
+{
+ int fd = xopenat(dirfd, path, O_RDONLY);
+ ssize_t len = xfstat(fd).st_size;
+
+ char *buf = xmalloc(len + 1);
+
+ len = read(fd, buf, len);
+ if (len < 0)
+ die("read error: %m");
+
+ buf[len] = '\0';
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+ if (!strlen(buf)) {
+ free(buf);
+ buf = NULL;
+ }
+
+ close(fd);
+
+ return buf;
+}
+
+u64 read_file_u64(int dirfd, const char *path)
+{
+ char *buf = read_file_str(dirfd, path);
+ u64 v;
+ if (bch2_strtou64_h(buf, &v))
+ die("read_file_u64: error parsing %s (got %s)", path, buf);
+ free(buf);
+ return v;
+}
+
+/* String list options: */
+
+ssize_t read_string_list_or_die(const char *opt, const char * const list[],
+ const char *msg)
+{
+ ssize_t v = match_string(list, -1, opt);
+ if (v < 0)
+ die("Bad %s %s", msg, opt);
+
+ return v;
+}
+
+/* Returns size of file or block device: */
+u64 get_size(int fd)
+{
+ struct stat statbuf = xfstat(fd);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_size;
+
+ u64 ret;
+ xioctl(fd, BLKGETSIZE64, &ret);
+ return ret;
+}
+
+/* Returns blocksize, in bytes: */
+unsigned get_blocksize(int fd)
+{
+ struct stat statbuf = xfstat(fd);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_blksize;
+
+ unsigned ret;
+ xioctl(fd, BLKPBSZGET, &ret);
+ return ret;
+}
+
+/* Open a block device, do magic blkid stuff to probe for existing filesystems: */
+int open_for_format(struct dev_opts *dev, bool force)
+{
+ blkid_probe pr;
+ const char *fs_type = NULL, *fs_label = NULL;
+ size_t fs_type_len, fs_label_len;
+
+ dev->bdev = blkdev_get_by_path(dev->path,
+ BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL|BLK_OPEN_BUFFERED,
+ dev, NULL);
+ int ret = PTR_ERR_OR_ZERO(dev->bdev);
+ if (ret < 0)
+ die("Error opening device to format %s: %s", dev->path, strerror(-ret));
+
+ if (!(pr = blkid_new_probe()))
+ die("blkid error 1");
+ if (blkid_probe_set_device(pr, dev->bdev->bd_fd, 0, 0))
+ die("blkid error 2");
+ if (blkid_probe_enable_partitions(pr, true) ||
+ blkid_probe_enable_superblocks(pr, true) ||
+ blkid_probe_set_superblocks_flags(pr,
+ BLKID_SUBLKS_LABEL|BLKID_SUBLKS_TYPE|BLKID_SUBLKS_MAGIC))
+ die("blkid error 3");
+ if (blkid_do_fullprobe(pr) < 0)
+ die("blkid error 4");
+
+ blkid_probe_lookup_value(pr, "TYPE", &fs_type, &fs_type_len);
+ blkid_probe_lookup_value(pr, "LABEL", &fs_label, &fs_label_len);
+
+ if (fs_type) {
+ if (fs_label)
+ printf("%s contains a %s filesystem labelled '%s'\n",
+ dev->path, fs_type, fs_label);
+ else
+ printf("%s contains a %s filesystem\n",
+ dev->path, fs_type);
+ if (!force) {
+ fputs("Proceed anyway?", stdout);
+ if (!ask_yn())
+ exit(EXIT_FAILURE);
+ }
+ while (blkid_do_probe(pr) == 0) {
+ if (blkid_do_wipe(pr, 0))
+ die("Failed to wipe preexisting metadata.");
+ }
+ }
+
+ blkid_free_probe(pr);
+ return ret;
+}
+
+bool ask_yn(void)
+{
+ const char *short_yes = "yY";
+ char *buf = NULL;
+ size_t buflen = 0;
+ bool ret;
+
+ fputs(" (y,n) ", stdout);
+ fflush(stdout);
+
+ if (getline(&buf, &buflen, stdin) < 0)
+ die("error reading from standard input");
+
+ ret = strchr(short_yes, buf[0]);
+ free(buf);
+ return ret;
+}
+
+static int range_cmp(const void *_l, const void *_r)
+{
+ const struct range *l = _l, *r = _r;
+
+ if (l->start < r->start)
+ return -1;
+ if (l->start > r->start)
+ return 1;
+ return 0;
+}
+
+void ranges_sort_merge(ranges *r)
+{
+ ranges tmp = { 0 };
+
+ sort(r->data, r->nr, sizeof(r->data[0]), range_cmp, NULL);
+
+ /* Merge contiguous ranges: */
+ darray_for_each(*r, i) {
+ struct range *t = tmp.nr ? &tmp.data[tmp.nr - 1] : NULL;
+
+ if (t && t->end >= i->start)
+ t->end = max(t->end, i->end);
+ else
+ darray_push(&tmp, *i);
+ }
+
+ darray_exit(r);
+ *r = tmp;
+}
+
+void ranges_roundup(ranges *r, unsigned block_size)
+{
+ darray_for_each(*r, i) {
+ i->start = round_down(i->start, block_size);
+ i->end = round_up(i->end, block_size);
+ }
+}
+
+void ranges_rounddown(ranges *r, unsigned block_size)
+{
+ darray_for_each(*r, i) {
+ i->start = round_up(i->start, block_size);
+ i->end = round_down(i->end, block_size);
+ i->end = max(i->end, i->start);
+ }
+}
+
+struct fiemap_extent fiemap_iter_next(struct fiemap_iter *iter)
+{
+ struct fiemap_extent e;
+
+ BUG_ON(iter->idx > iter->f->fm_mapped_extents);
+
+ if (iter->idx == iter->f->fm_mapped_extents) {
+ xioctl(iter->fd, FS_IOC_FIEMAP, iter->f);
+
+ if (!iter->f->fm_mapped_extents)
+ return (struct fiemap_extent) { .fe_length = 0 };
+
+ iter->idx = 0;
+ }
+
+ e = iter->f->fm_extents[iter->idx++];
+ BUG_ON(!e.fe_length);
+
+ iter->f->fm_start = e.fe_logical + e.fe_length;
+
+ return e;
+}
+
+char *strcmp_prefix(char *a, const char *a_prefix)
+{
+ while (*a_prefix && *a == *a_prefix) {
+ a++;
+ a_prefix++;
+ }
+ return *a_prefix ? NULL : a;
+}
+
+/* crc32c */
+
+static u32 crc32c_default(u32 crc, const void *buf, size_t size)
+{
+ static const u32 crc32c_tab[] = {
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+ };
+ const u8 *p = buf;
+
+ while (size--)
+ crc = crc32c_tab[(crc ^ *p++) & 0xFFL] ^ (crc >> 8);
+
+ return crc;
+}
+
+#include <linux/compiler.h>
+
+#ifdef __x86_64__
+
+#ifdef CONFIG_X86_64
+#define REX_PRE "0x48, "
+#else
+#define REX_PRE
+#endif
+
+static u32 crc32c_sse42(u32 crc, const void *buf, size_t size)
+{
+ while (size >= sizeof(long)) {
+ const unsigned long *d = buf;
+
+ __asm__ __volatile__(
+ ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+ :"=S"(crc)
+ :"0"(crc), "c"(*d)
+ );
+ buf += sizeof(long);
+ size -= sizeof(long);
+ }
+
+ while (size) {
+ const u8 *d = buf;
+
+ __asm__ __volatile__(
+ ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+ :"=S"(crc)
+ :"0"(crc), "c"(*d)
+ );
+ buf += 1;
+ size -= 1;
+ }
+
+ return crc;
+}
+
+#endif
+
+static void *resolve_crc32c(void)
+{
+#ifdef __x86_64__
+ if (__builtin_cpu_supports("sse4.2"))
+ return crc32c_sse42;
+#endif
+ return crc32c_default;
+}
+
+/*
+ * ifunc is buggy and I don't know what breaks it (LTO?)
+ */
+#ifdef HAVE_WORKING_IFUNC
+
+static void *ifunc_resolve_crc32c(void)
+{
+ __builtin_cpu_init();
+
+ return resolve_crc32c
+}
+
+u32 crc32c(u32, const void *, size_t)
+ __attribute__((ifunc("ifunc_resolve_crc32c")));
+
+#else
+
+u32 crc32c(u32 crc, const void *buf, size_t size)
+{
+ static u32 (*real_crc32c)(u32, const void *, size_t);
+
+ if (unlikely(!real_crc32c))
+ real_crc32c = resolve_crc32c();
+
+ return real_crc32c(crc, buf, size);
+}
+
+#endif /* HAVE_WORKING_IFUNC */
+
+char *dev_to_name(dev_t dev)
+{
+ char *line = NULL, *name = NULL;
+ size_t n = 0;
+
+ FILE *f = fopen("/proc/partitions", "r");
+ if (!f)
+ die("error opening /proc/partitions: %m");
+
+ while (getline(&line, &n, f) != -1) {
+ unsigned ma, mi;
+ u64 sectors;
+
+ name = realloc(name, n + 1);
+
+ if (sscanf(line, " %u %u %llu %s", &ma, &mi, &sectors, name) == 4 &&
+ ma == major(dev) && mi == minor(dev))
+ goto found;
+ }
+
+ free(name);
+ name = NULL;
+found:
+ fclose(f);
+ free(line);
+ return name;
+}
+
+char *dev_to_path(dev_t dev)
+{
+ char *name = dev_to_name(dev);
+ if (!name)
+ return NULL;
+
+ char *path = mprintf("/dev/%s", name);
+
+ free(name);
+ return path;
+}
+
+struct mntent *dev_to_mount(char *dev)
+{
+ struct mntent *mnt, *ret = NULL;
+ FILE *f = setmntent("/proc/mounts", "r");
+ if (!f)
+ die("error opening /proc/mounts: %m");
+
+ struct stat d1 = xstat(dev);
+
+ while ((mnt = getmntent(f))) {
+ char *d, *p = mnt->mnt_fsname;
+
+ while ((d = strsep(&p, ":"))) {
+ struct stat d2;
+
+ if (stat(d, &d2))
+ continue;
+
+ if (S_ISBLK(d1.st_mode) != S_ISBLK(d2.st_mode))
+ continue;
+
+ if (S_ISBLK(d1.st_mode)) {
+ if (d1.st_rdev != d2.st_rdev)
+ continue;
+ } else {
+ if (d1.st_dev != d2.st_dev ||
+ d1.st_ino != d2.st_ino)
+ continue;
+ }
+
+ ret = mnt;
+ goto found;
+ }
+ }
+found:
+ fclose(f);
+ return ret;
+}
+
+int dev_mounted(char *dev)
+{
+ struct mntent *mnt = dev_to_mount(dev);
+
+ if (!mnt)
+ return 0;
+ if (hasmntopt(mnt, "ro"))
+ return 1;
+ return 2;
+}
+
+static int kstrtoull_symbolic(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (!strcmp(s, "U64_MAX")) {
+ *res = U64_MAX;
+ return 0;
+ }
+
+ if (!strcmp(s, "U32_MAX")) {
+ *res = U32_MAX;
+ return 0;
+ }
+
+ return kstrtoull(s, base, res);
+}
+
+static int kstrtouint_symbolic(const char *s, unsigned int base, unsigned *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull_symbolic(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(unsigned int)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+struct bpos bpos_parse(char *buf)
+{
+ char *orig = strdup(buf);
+ char *s = buf;
+
+ char *inode_s = strsep(&s, ":");
+ char *offset_s = strsep(&s, ":");
+ char *snapshot_s = strsep(&s, ":");
+
+ if (!inode_s || !offset_s || s)
+ die("invalid bpos %s", orig);
+ free(orig);
+
+ u64 inode_v = 0, offset_v = 0;
+ u32 snapshot_v = 0;
+ if (kstrtoull_symbolic(inode_s, 10, &inode_v))
+ die("invalid bpos.inode %s", inode_s);
+
+ if (kstrtoull_symbolic(offset_s, 10, &offset_v))
+ die("invalid bpos.offset %s", offset_s);
+
+ if (snapshot_s &&
+ kstrtouint_symbolic(snapshot_s, 10, &snapshot_v))
+ die("invalid bpos.snapshot %s", snapshot_s);
+
+ return (struct bpos) { .inode = inode_v, .offset = offset_v, .snapshot = snapshot_v };
+}
+
+struct bbpos bbpos_parse(char *buf)
+{
+ char *s = buf, *field;
+ struct bbpos ret;
+
+ if (!(field = strsep(&s, ":")))
+ die("invalid bbpos %s", buf);
+
+ ret.btree = read_string_list_or_die(field, __bch2_btree_ids, "btree id");
+
+ if (!s)
+ die("invalid bbpos %s", buf);
+
+ ret.pos = bpos_parse(s);
+ return ret;
+}
+
+darray_str get_or_split_cmdline_devs(int argc, char *argv[])
+{
+ darray_str ret = {};
+
+ if (argc == 1) {
+ bch2_split_devs(argv[0], &ret);
+ } else {
+ for (unsigned i = 0; i < argc; i++)
+ darray_push(&ret, strdup(argv[i]));
+ }
+
+ return ret;
+}
diff --git a/c_src/tools-util.h b/c_src/tools-util.h
new file mode 100644
index 00000000..56331384
--- /dev/null
+++ b/c_src/tools-util.h
@@ -0,0 +1,204 @@
+#ifndef _TOOLS_UTIL_H
+#define _TOOLS_UTIL_H
+
+#include <errno.h>
+#include <mntent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/bug.h>
+#include <linux/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bbpos.h"
+#include "libbcachefs/darray.h"
+
+#define noreturn __attribute__((noreturn))
+
+void die(const char *, ...)
+ __attribute__ ((format (printf, 1, 2))) noreturn;
+char *mprintf(const char *, ...)
+ __attribute__ ((format (printf, 1, 2)));
+void xpread(int, void *, size_t, off_t);
+void xpwrite(int, const void *, size_t, off_t, const char *);
+struct stat xfstatat(int, const char *, int);
+struct stat xfstat(int);
+struct stat xstat(const char *);
+
+static inline void *xmalloc(size_t size)
+{
+ void *p = malloc(size);
+
+ if (!p)
+ die("insufficient memory");
+
+ memset(p, 0, size);
+ return p;
+}
+
+static inline void *xcalloc(size_t count, size_t size)
+{
+ void *p = calloc(count, size);
+
+ if (!p)
+ die("insufficient memory");
+
+ return p;
+}
+
+static inline void *xrealloc(void *p, size_t size)
+{
+ p = realloc(p, size);
+ if (!p)
+ die("insufficient memory");
+
+ return p;
+}
+
+#define xopenat(_dirfd, _path, ...) \
+({ \
+ int _fd = openat((_dirfd), (_path), __VA_ARGS__); \
+ if (_fd < 0) \
+ die("Error opening %s: %m", (_path)); \
+ _fd; \
+})
+
+#define xopen(...) xopenat(AT_FDCWD, __VA_ARGS__)
+
+#define xioctl(_fd, _nr, ...) \
+({ \
+ int _ret = ioctl((_fd), (_nr), ##__VA_ARGS__); \
+ if (_ret < 0) \
+ die(#_nr " ioctl error: %m"); \
+ _ret; \
+})
+
+void write_file_str(int, const char *, const char *);
+char *read_file_str(int, const char *);
+u64 read_file_u64(int, const char *);
+
+ssize_t read_string_list_or_die(const char *, const char * const[],
+ const char *);
+
+u64 get_size(int);
+unsigned get_blocksize(int);
+struct dev_opts;
+int open_for_format(struct dev_opts *, bool);
+
+bool ask_yn(void);
+
+struct range {
+ u64 start;
+ u64 end;
+};
+
+typedef DARRAY(struct range) ranges;
+
+static inline void range_add(ranges *data, u64 offset, u64 size)
+{
+ darray_push(data, ((struct range) {
+ .start = offset,
+ .end = offset + size
+ }));
+}
+
+void ranges_sort_merge(ranges *);
+void ranges_roundup(ranges *, unsigned);
+void ranges_rounddown(ranges *, unsigned);
+
+struct hole_iter {
+ ranges r;
+ size_t idx;
+ u64 end;
+};
+
+static inline struct range hole_iter_next(struct hole_iter *iter)
+{
+ struct range r = {
+ .start = iter->idx ? iter->r.data[iter->idx - 1].end : 0,
+ .end = iter->idx < iter->r.nr
+ ? iter->r.data[iter->idx].start : iter->end,
+ };
+
+ BUG_ON(r.start > r.end);
+
+ iter->idx++;
+ return r;
+}
+
+#define for_each_hole(_iter, _ranges, _end, _i) \
+ for (_iter = (struct hole_iter) { .r = _ranges, .end = _end }; \
+ (_iter.idx <= _iter.r.nr && \
+ (_i = hole_iter_next(&_iter), true));)
+
+#include <linux/fiemap.h>
+
+struct fiemap_iter {
+ struct fiemap *f;
+ unsigned idx;
+ int fd;
+};
+
+static inline void fiemap_iter_init(struct fiemap_iter *iter, int fd)
+{
+ memset(iter, 0, sizeof(*iter));
+
+ iter->f = xmalloc(sizeof(struct fiemap) +
+ sizeof(struct fiemap_extent) * 1024);
+
+ iter->f->fm_extent_count = 1024;
+ iter->f->fm_length = FIEMAP_MAX_OFFSET;
+ iter->fd = fd;
+}
+
+static inline void fiemap_iter_exit(struct fiemap_iter *iter)
+{
+ free(iter->f);
+ memset(iter, 0, sizeof(*iter));
+}
+
+struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
+
+#define fiemap_for_each(fd, iter, extent) \
+ for (fiemap_iter_init(&iter, fd); \
+ (extent = fiemap_iter_next(&iter)).fe_length;)
+
+char *strcmp_prefix(char *, const char *);
+
+u32 crc32c(u32, const void *, size_t);
+
+char *dev_to_name(dev_t);
+char *dev_to_path(dev_t);
+struct mntent *dev_to_mount(char *);
+int dev_mounted(char *);
+
+#define args_shift(_nr) \
+do { \
+ unsigned _n = min((_nr), argc); \
+ argc -= _n; \
+ argv += _n; \
+} while (0)
+
+#define arg_pop() \
+({ \
+ char *_ret = argc ? argv[0] : NULL; \
+ if (_ret) \
+ args_shift(1); \
+ _ret; \
+})
+
+struct bpos bpos_parse(char *);
+struct bbpos bbpos_parse(char *);
+
+darray_str get_or_split_cmdline_devs(int argc, char *argv[]);
+
+#endif /* _TOOLS_UTIL_H */