summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--include/linux/zstd.h449
-rw-r--r--include/linux/zstd_errors.h77
-rw-r--r--libbcachefs/alloc_background.h3
-rw-r--r--libbcachefs/bkey_methods.h27
-rw-r--r--libbcachefs/btree_io.c3
-rw-r--r--libbcachefs/btree_iter.c156
-rw-r--r--libbcachefs/btree_iter.h32
-rw-r--r--libbcachefs/btree_key_cache.c4
-rw-r--r--libbcachefs/btree_locking.h10
-rw-r--r--libbcachefs/btree_types.h5
-rw-r--r--libbcachefs/btree_update_leaf.c64
-rw-r--r--libbcachefs/buckets.c168
-rw-r--r--libbcachefs/buckets.h13
-rw-r--r--libbcachefs/compress.c22
-rw-r--r--libbcachefs/dirent.c17
-rw-r--r--libbcachefs/ec.h2
-rw-r--r--libbcachefs/error.h20
-rw-r--r--libbcachefs/extents.h8
-rw-r--r--libbcachefs/fs-io.c29
-rw-r--r--libbcachefs/fs.c6
-rw-r--r--libbcachefs/inode.c4
-rw-r--r--libbcachefs/inode.h4
-rw-r--r--libbcachefs/journal.c88
-rw-r--r--libbcachefs/journal.h1
-rw-r--r--libbcachefs/journal_reclaim.c18
-rw-r--r--libbcachefs/journal_types.h4
-rw-r--r--libbcachefs/recovery.c3
-rw-r--r--libbcachefs/reflink.h6
-rw-r--r--libbcachefs/str_hash.h21
-rw-r--r--libbcachefs/sysfs.c24
-rw-r--r--libbcachefs/tests.c14
-rw-r--r--libbcachefs/util.h2
-rw-r--r--libbcachefs/xattr.c10
-rw-r--r--linux/zstd_compress_module.c157
-rw-r--r--linux/zstd_decompress_module.c103
36 files changed, 1273 insertions, 303 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 8e1032a..74f5970 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-e318fabeb424d4b8fdd46329125c30aaa4f9006a
+e48731a188639563444d475622782b7963df4b47
diff --git a/include/linux/zstd.h b/include/linux/zstd.h
index 0dd1b02..b0fa1ed 100644
--- a/include/linux/zstd.h
+++ b/include/linux/zstd.h
@@ -1,10 +1,447 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
+ */
+
+#ifndef LINUX_ZSTD_H
+#define LINUX_ZSTD_H
+
+/**
+ * This is a kernel-style API that wraps the upstream zstd API, which cannot be
+ * used directly because the symbols aren't exported. It exposes the minimal
+ * functionality which is currently required by users of zstd in the kernel.
+ * Expose extra functions from lib/zstd/zstd.h as needed.
+ */
+
+/* ====== Dependency ====== */
+#include <linux/types.h>
#include <zstd.h>
+#include <linux/zstd_errors.h>
+
+/* ====== Helper Functions ====== */
+/**
+ * zstd_compress_bound() - maximum compressed size in worst case scenario
+ * @src_size: The size of the data to compress.
+ *
+ * Return: The maximum compressed size in the worst case scenario.
+ */
+size_t zstd_compress_bound(size_t src_size);
+
+/**
+ * zstd_is_error() - tells if a size_t function result is an error code
+ * @code: The function result to check for error.
+ *
+ * Return: Non-zero iff the code is an error.
+ */
+unsigned int zstd_is_error(size_t code);
+
+/**
+ * enum zstd_error_code - zstd error codes
+ */
+typedef ZSTD_ErrorCode zstd_error_code;
+
+/**
+ * zstd_get_error_code() - translates an error function result to an error code
+ * @code: The function result for which zstd_is_error(code) is true.
+ *
+ * Return: A unique error code for this error.
+ */
+zstd_error_code zstd_get_error_code(size_t code);
+
+/**
+ * zstd_get_error_name() - translates an error function result to a string
+ * @code: The function result for which zstd_is_error(code) is true.
+ *
+ * Return: An error string corresponding to the error code.
+ */
+const char *zstd_get_error_name(size_t code);
+
+/**
+ * zstd_min_clevel() - minimum allowed compression level
+ *
+ * Return: The minimum allowed compression level.
+ */
+int zstd_min_clevel(void);
+
+/**
+ * zstd_max_clevel() - maximum allowed compression level
+ *
+ * Return: The maximum allowed compression level.
+ */
+int zstd_max_clevel(void);
+
+/* ====== Parameter Selection ====== */
+
+/**
+ * enum zstd_strategy - zstd compression search strategy
+ *
+ * From faster to stronger. See zstd_lib.h.
+ */
+typedef ZSTD_strategy zstd_strategy;
+
+/**
+ * struct zstd_compression_parameters - zstd compression parameters
+ * @windowLog: Log of the largest match distance. Larger means more
+ * compression, and more memory needed during decompression.
+ * @chainLog: Fully searched segment. Larger means more compression,
+ * slower, and more memory (useless for fast).
+ * @hashLog: Dispatch table. Larger means more compression,
+ * slower, and more memory.
+ * @searchLog: Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ * sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ * more compression, and slower.
+ * @strategy: The zstd compression strategy.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_compressionParameters zstd_compression_parameters;
+
+/**
+ * struct zstd_frame_parameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the
+ * frame header (when known).
+ * @checksumFlag: Controls whether a 32-bit checksum is generated at the
+ * end of the frame for error detection.
+ * @noDictIDFlag: Controls whether dictID will be saved into the frame
+ * header when using dictionary compression.
+ *
+ * The default value is all fields set to 0. See zstd_lib.h.
+ */
+typedef ZSTD_frameParameters zstd_frame_parameters;
+
+/**
+ * struct zstd_parameters - zstd parameters
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
+ */
+typedef ZSTD_parameters zstd_parameters;
+
+/**
+ * zstd_get_params() - returns zstd_parameters for selected level
+ * @level: The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ * if unknown.
+ *
+ * Return: The selected zstd_parameters.
+ */
+zstd_parameters zstd_get_params(int level,
+ unsigned long long estimated_src_size);
+
+/* ====== Single-pass Compression ====== */
+
+typedef ZSTD_CCtx zstd_cctx;
+
+/**
+ * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
+ * @parameters: The compression parameters to be used.
+ *
+ * If multiple compression parameters might be used, the caller must call
+ * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
+ * size.
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ * zstd_init_cctx().
+ */
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
+
+/**
+ * zstd_init_cctx() - initialize a zstd compression context
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
+ * determine how large the workspace must be.
+ *
+ * Return: A zstd compression context or NULL on error.
+ */
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
+
+/**
+ * zstd_compress_cctx() - compress src into dst with the initialized parameters
+ * @cctx: The context. Must have been initialized with zstd_init_cctx().
+ * @dst: The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ * ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src: The data to compress.
+ * @src_size: The size of the data to compress.
+ * @parameters: The compression parameters to be used.
+ *
+ * Return: The compressed size or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size, const zstd_parameters *parameters);
+
+/* ====== Single-pass Decompression ====== */
+
+typedef ZSTD_DCtx zstd_dctx;
+
+/**
+ * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ * zstd_init_dctx().
+ */
+size_t zstd_dctx_workspace_bound(void);
+
+/**
+ * zstd_init_dctx() - initialize a zstd decompression context
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
+ * determine how large the workspace must be.
+ *
+ * Return: A zstd decompression context or NULL on error.
+ */
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
+
+/**
+ * zstd_decompress_dctx() - decompress zstd compressed src into dst
+ * @dctx: The decompression context.
+ * @dst: The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ * as the decompressed size. If the caller cannot upper bound the
+ * decompressed size, then it's better to use the streaming API.
+ * @src: The zstd compressed data to decompress. Multiple concatenated
+ * frames and skippable frames are allowed.
+ * @src_size: The exact size of the data to decompress.
+ *
+ * Return: The decompressed size or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size);
+
+/* ====== Streaming Buffers ====== */
+
+/**
+ * struct zstd_in_buffer - input buffer for streaming
+ * @src: Start of the input buffer.
+ * @size: Size of the input buffer.
+ * @pos: Position where reading stopped. Will be updated.
+ * Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_inBuffer zstd_in_buffer;
+
+/**
+ * struct zstd_out_buffer - output buffer for streaming
+ * @dst: Start of the output buffer.
+ * @size: Size of the output buffer.
+ * @pos: Position where writing stopped. Will be updated.
+ * Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_outBuffer zstd_out_buffer;
+
+/* ====== Streaming Compression ====== */
+
+typedef ZSTD_CStream zstd_cstream;
+
+/**
+ * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
+ * @cparams: The compression parameters to be used for compression.
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ * zstd_init_cstream().
+ */
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
+
+/**
+ * zstd_init_cstream() - initialize a zstd streaming compression context
+ * @parameters The zstd parameters to use for compression.
+ * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
+ * must pass the source size (zero means empty source).
+ * Otherwise, the caller may optionally pass the source
+ * size, or zero if unknown.
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspace_size: The size of workspace.
+ * Use zstd_cstream_workspace_bound(params->cparams) to
+ * determine how large the workspace must be.
+ *
+ * Return: The zstd streaming compression context or NULL on error.
+ */
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+ unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
+
+/**
+ * zstd_reset_cstream() - reset the context using parameters from creation
+ * @cstream: The zstd streaming compression context to reset.
+ * @pledged_src_size: Optionally the source size, or zero if unknown.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
+ * content size is always written into the frame header.
+ *
+ * Return: Zero or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+ unsigned long long pledged_src_size);
+
+/**
+ * zstd_compress_stream() - streaming compress some of input into output
+ * @cstream: The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ * compressed data was written.
+ * @input: Source buffer. `input->pos` is updated to indicate how much data
+ * was read. Note that it may not consume the entire input, in which
+ * case `input->pos < input->size`, and it's up to the caller to
+ * present remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ *
+ * Return: A hint for the number of bytes to use as the input for the next
+ * function call or an error, which can be checked using
+ * zstd_is_error().
+ */
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+ zstd_in_buffer *input);
+
+/**
+ * zstd_flush_stream() - flush internal buffers into output
+ * @cstream: The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ * compressed data was written.
+ *
+ * zstd_flush_stream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since zstd_flush_stream() causes a block to be ended,
+ * calling it too often will degrade the compression ratio.
+ *
+ * Return: The number of bytes still present within internal buffers or an
+ * error, which can be checked using zstd_is_error().
+ */
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/**
+ * zstd_end_stream() - flush internal buffers into output and end the frame
+ * @cstream: The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ * compressed data was written.
+ *
+ * zstd_end_stream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
+ *
+ * Return: The number of bytes still present within internal buffers or an
+ * error, which can be checked using zstd_is_error().
+ */
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/* ====== Streaming Decompression ====== */
+
+typedef ZSTD_DStream zstd_dstream;
+
+/**
+ * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ *
+ * Return: A lower bound on the size of the workspace that is passed
+ * to zstd_init_dstream().
+ */
+size_t zstd_dstream_workspace_bound(size_t max_window_size);
+
+/**
+ * zstd_init_dstream() - initialize a zstd streaming decompression context
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ * @workspace: The workspace to emplace the context into. It must outlive
+ * the returned context.
+ * @workspaceSize: The size of workspace.
+ * Use zstd_dstream_workspace_bound(max_window_size) to
+ * determine how large the workspace must be.
+ *
+ * Return: The zstd streaming decompression context.
+ */
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+ size_t workspace_size);
+
+/**
+ * zstd_reset_dstream() - reset the context using parameters from creation
+ * @dstream: The zstd streaming decompression context to reset.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused.
+ *
+ * Return: Zero or an error, which can be checked using zstd_is_error().
+ */
+size_t zstd_reset_dstream(zstd_dstream *dstream);
+
+/**
+ * zstd_decompress_stream() - streaming decompress some of input into output
+ * @dstream: The zstd streaming decompression context.
+ * @output: Destination buffer. `output.pos` is updated to indicate how much
+ * decompressed data was written.
+ * @input: Source buffer. `input.pos` is updated to indicate how much data was
+ * read. Note that it may not consume the entire input, in which case
+ * `input.pos < input.size`, and it's up to the caller to present
+ * remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ * zstd_decompress_stream() will not consume the last byte of the frame until
+ * the entire frame is flushed.
+ *
+ * Return: Returns 0 iff a frame is completely decoded and fully flushed.
+ * Otherwise returns a hint for the number of bytes to use as the
+ * input for the next function call or an error, which can be checked
+ * using zstd_is_error(). The size hint will never load more than the
+ * frame.
+ */
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+ zstd_in_buffer *input);
+
+/* ====== Frame Inspection Functions ====== */
+
+/**
+ * zstd_find_frame_compressed_size() - returns the size of a compressed frame
+ * @src: Source buffer. It should point to the start of a zstd encoded
+ * frame or a skippable frame.
+ * @src_size: The size of the source buffer. It must be at least as large as the
+ * size of the frame.
+ *
+ * Return: The compressed size of the frame pointed to by `src` or an error,
+ * which can be check with zstd_is_error().
+ * Suitable to pass to ZSTD_decompress() or similar functions.
+ */
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
-#define ZSTD_initDCtx(w, s) ZSTD_initStaticDCtx(w, s)
-#define ZSTD_initCCtx(w, s) ZSTD_initStaticCCtx(w, s)
+/**
+ * struct zstd_frame_params - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ * present.
+ * @windowSize: The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax: The maximum block size.
+ * @frameType: The frame type (zstd or skippable)
+ * @headerSize: The size of the frame header.
+ * @dictID: The dictionary id, or 0 if not present.
+ * @checksumFlag: Whether a checksum was used.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_frameHeader zstd_frame_header;
-#define ZSTD_compressCCtx(w, dst, d_len, src, src_len, params) \
- ZSTD_compressCCtx(w, dst, d_len, src, src_len, 0)
+/**
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
+ * @params: On success the frame parameters are written here.
+ * @src: The source buffer. It must point to a zstd or skippable frame.
+ * @src_size: The size of the source buffer.
+ *
+ * Return: 0 on success. If more data is required it returns how many bytes
+ * must be provided to make forward progress. Otherwise it returns
+ * an error, which can be checked using zstd_is_error().
+ */
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
+ size_t src_size);
-#define ZSTD_CCtxWorkspaceBound(p) ZSTD_estimateCCtxSize(0)
-#define ZSTD_DCtxWorkspaceBound() ZSTD_estimateDCtxSize()
+#endif /* LINUX_ZSTD_H */
diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
new file mode 100644
index 0000000..58b6dd4
--- /dev/null
+++ b/include/linux/zstd_errors.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+
+/*===== dependency =====*/
+#include <linux/types.h> /* size_t */
+
+
+/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
+#define ZSTDERRORLIB_VISIBILITY
+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+
+/*-*********************************************
+ * Error codes list
+ *-*********************************************
+ * Error codes _values_ are pinned down since v1.3.1 only.
+ * Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ * Only values < 100 are considered stable.
+ *
+ * note 1 : this API shall be used with static linking only.
+ * dynamic linking is not yet officially supported.
+ * note 2 : Prefer relying on the enum than on its value whenever possible
+ * This is the only supported way to use the error list < v1.3.1
+ * note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+ ZSTD_error_no_error = 0,
+ ZSTD_error_GENERIC = 1,
+ ZSTD_error_prefix_unknown = 10,
+ ZSTD_error_version_unsupported = 12,
+ ZSTD_error_frameParameter_unsupported = 14,
+ ZSTD_error_frameParameter_windowTooLarge = 16,
+ ZSTD_error_corruption_detected = 20,
+ ZSTD_error_checksum_wrong = 22,
+ ZSTD_error_dictionary_corrupted = 30,
+ ZSTD_error_dictionary_wrong = 32,
+ ZSTD_error_dictionaryCreation_failed = 34,
+ ZSTD_error_parameter_unsupported = 40,
+ ZSTD_error_parameter_outOfBound = 42,
+ ZSTD_error_tableLog_tooLarge = 44,
+ ZSTD_error_maxSymbolValue_tooLarge = 46,
+ ZSTD_error_maxSymbolValue_tooSmall = 48,
+ ZSTD_error_stage_wrong = 60,
+ ZSTD_error_init_missing = 62,
+ ZSTD_error_memory_allocation = 64,
+ ZSTD_error_workSpace_tooSmall= 66,
+ ZSTD_error_dstSize_tooSmall = 70,
+ ZSTD_error_srcSize_wrong = 72,
+ ZSTD_error_dstBuffer_null = 74,
+ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+ ZSTD_error_frameIndex_tooLarge = 100,
+ ZSTD_error_seekableIO = 102,
+ ZSTD_error_dstBuffer_wrong = 104,
+ ZSTD_error_srcBuffer_wrong = 105,
+ ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+ convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+ which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 98c7866..3eaa6d2 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -65,16 +65,19 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc (struct bkey_ops) { \
.key_invalid = bch2_alloc_v1_invalid, \
.val_to_text = bch2_alloc_to_text, \
+ .atomic_trigger = bch2_mark_alloc, \
}
#define bch2_bkey_ops_alloc_v2 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v2_invalid, \
.val_to_text = bch2_alloc_to_text, \
+ .atomic_trigger = bch2_mark_alloc, \
}
#define bch2_bkey_ops_alloc_v3 (struct bkey_ops) { \
.key_invalid = bch2_alloc_v3_invalid, \
.val_to_text = bch2_alloc_to_text, \
+ .atomic_trigger = bch2_mark_alloc, \
}
static inline bool bkey_is_alloc(const struct bkey *k)
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
index 4fdac54..2289a09 100644
--- a/libbcachefs/bkey_methods.h
+++ b/libbcachefs/bkey_methods.h
@@ -6,6 +6,7 @@
struct bch_fs;
struct btree;
+struct btree_trans;
struct bkey;
enum btree_node_type;
@@ -20,6 +21,10 @@ struct bkey_ops {
void (*swab)(struct bkey_s);
bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+ int (*trans_trigger)(struct btree_trans *, struct bkey_s_c,
+ struct bkey_i *, unsigned);
+ int (*atomic_trigger)(struct btree_trans *, struct bkey_s_c,
+ struct bkey_s_c, unsigned);
void (*compat)(enum btree_id id, unsigned version,
unsigned big_endian, int write,
struct bkey_s);
@@ -57,6 +62,28 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+static inline int bch2_mark_key(struct btree_trans *trans,
+ struct bkey_s_c old,
+ struct bkey_s_c new,
+ unsigned flags)
+{
+ const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new.k->type];
+
+ return ops->atomic_trigger
+ ? ops->atomic_trigger(trans, old, new, flags)
+ : 0;
+}
+
+static inline int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
+ struct bkey_i *new, unsigned flags)
+{
+ const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new->k.type];
+
+ return ops->trans_trigger
+ ? ops->trans_trigger(trans, old, new, flags)
+ : 0;
+}
+
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 4f0ad06..e6cea4c 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1626,6 +1626,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
if (new & (1U << BTREE_NODE_write_in_flight))
__bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
+ else
+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
}
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
@@ -2094,7 +2096,6 @@ restart:
rcu_read_unlock();
wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
goto restart;
-
}
rcu_read_unlock();
}
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 8186ee7..7fd0379 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -189,7 +189,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
(btree_node_lock_seq_matches(path, b, level) &&
btree_node_lock_increment(trans, b, level, want))) {
- mark_btree_node_locked(path, level, want);
+ mark_btree_node_locked(trans, path, level, want);
return true;
}
fail:
@@ -240,7 +240,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
return false;
success:
- mark_btree_node_intent_locked(path, level);
+ mark_btree_node_intent_locked(trans, path, level);
return true;
}
@@ -486,14 +486,15 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
* before interior nodes - now that's handled by
* bch2_btree_path_traverse_all().
*/
- trans_for_each_path(trans, linked)
- if (linked != path &&
- linked->cached == path->cached &&
- linked->btree_id == path->btree_id &&
- linked->locks_want < new_locks_want) {
- linked->locks_want = new_locks_want;
- btree_path_get_locks(trans, linked, true);
- }
+ if (!path->cached && !trans->in_traverse_all)
+ trans_for_each_path(trans, linked)
+ if (linked != path &&
+ linked->cached == path->cached &&
+ linked->btree_id == path->btree_id &&
+ linked->locks_want < new_locks_want) {
+ linked->locks_want = new_locks_want;
+ btree_path_get_locks(trans, linked, true);
+ }
return false;
}
@@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
t != BTREE_NODE_UNLOCKED) {
btree_node_unlock(path, b->c.level);
six_lock_increment(&b->c.lock, t);
- mark_btree_node_locked(path, b->c.level, t);
+ mark_btree_node_locked(trans, path, b->c.level, t);
}
btree_path_level_init(trans, path, b);
@@ -1244,7 +1245,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
path->l[i].b = NULL;
- mark_btree_node_locked(path, path->level, lock_type);
+ mark_btree_node_locked(trans, path, path->level, lock_type);
btree_path_level_init(trans, path, b);
return 0;
}
@@ -1409,7 +1410,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (unlikely(ret))
goto err;
- mark_btree_node_locked(path, level, lock_type);
+ mark_btree_node_locked(trans, path, level, lock_type);
btree_path_level_init(trans, path, b);
if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
@@ -1442,6 +1443,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
trans->in_traverse_all = true;
retry_all:
trans->restarted = false;
+ trans->traverse_all_idx = U8_MAX;
trans_for_each_path(trans, path)
path->should_be_locked = false;
@@ -1474,9 +1476,9 @@ retry_all:
}
/* Now, redo traversals in correct order: */
- i = 0;
- while (i < trans->nr_sorted) {
- path = trans->paths + trans->sorted[i];
+ trans->traverse_all_idx = 0;
+ while (trans->traverse_all_idx < trans->nr_sorted) {
+ path = trans->paths + trans->sorted[trans->traverse_all_idx];
/*
* Traversing a path can cause another path to be added at about
@@ -1484,10 +1486,13 @@ retry_all:
*/
if (path->uptodate) {
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
- if (ret)
+ if (ret == -EINTR || ret == -ENOMEM)
goto retry_all;
+ if (ret)
+ goto err;
+ BUG_ON(path->uptodate);
} else {
- i++;
+ trans->traverse_all_idx++;
}
}
@@ -1498,7 +1503,7 @@ retry_all:
*/
trans_for_each_path(trans, path)
BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
-
+err:
bch2_btree_cache_cannibalize_unlock(c);
trans->in_traverse_all = false;
@@ -1807,18 +1812,48 @@ free:
__bch2_path_free(trans, path);
}
+void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ pr_buf(buf, "transaction updates for %s journal seq %llu\n",
+ trans->fn, trans->journal_res.seq);
+
+ trans_for_each_update(trans, i) {
+ struct bkey_s_c old = { &i->old_k, i->old_v };
+
+ pr_buf(buf, "update: btree %s %pS\n old ",
+ bch2_btree_ids[i->btree_id],
+ (void *) i->ip_allocated);
+
+ bch2_bkey_val_to_text(buf, trans->c, old);
+ pr_buf(buf, "\n new ");
+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
+ pr_buf(buf, "\n");
+ }
+}
+
+noinline __cold
+void bch2_dump_trans_updates(struct btree_trans *trans)
+{
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_updates_to_text(&buf, trans);
+ bch_err(trans->c, "%s", buf.buf);
+ printbuf_exit(&buf);
+}
+
noinline __cold
void bch2_dump_trans_paths_updates(struct btree_trans *trans)
{
struct btree_path *path;
- struct btree_insert_entry *i;
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
unsigned idx;
trans_for_each_path_inorder(trans, path, idx) {
- printbuf_reset(&buf1);
+ printbuf_reset(&buf);
- bch2_bpos_to_text(&buf1, path->pos);
+ bch2_bpos_to_text(&buf, path->pos);
printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
path->idx, path->ref, path->intent_ref,
@@ -1826,7 +1861,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
path->preserve ? " P" : "",
bch2_btree_ids[path->btree_id],
path->level,
- buf1.buf,
+ buf.buf,
path->nodes_locked,
#ifdef CONFIG_BCACHEFS_DEBUG
(void *) path->ip_allocated
@@ -1836,23 +1871,9 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
);
}
- trans_for_each_update(trans, i) {
- struct bkey u;
- struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);
-
- printbuf_reset(&buf1);
- printbuf_reset(&buf2);
- bch2_bkey_val_to_text(&buf1, trans->c, old);
- bch2_bkey_val_to_text(&buf2, trans->c, bkey_i_to_s_c(i->k));
+ printbuf_exit(&buf);
- printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s",
- bch2_btree_ids[i->btree_id],
- (void *) i->ip_allocated,
- buf1.buf, buf2.buf);
- }
-
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ bch2_dump_trans_updates(trans);
}
static struct btree_path *btree_path_alloc(struct btree_trans *trans,
@@ -2337,11 +2358,12 @@ out:
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position
*/
-struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
{
struct btree_trans *trans = iter->trans;
struct bpos search_key = btree_iter_search_key(iter);
struct bkey_s_c k;
+ struct bpos iter_pos;
int ret;
if (iter->update_path) {
@@ -2357,6 +2379,24 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
if (!k.k || bkey_err(k))
goto out;
+ /*
+ * iter->pos should be mononotically increasing, and always be
+ * equal to the key we just returned - except extents can
+ * straddle iter->pos:
+ */
+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+ iter_pos = k.k->p;
+ else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+ iter_pos = bkey_start_pos(k.k);
+ else
+ iter_pos = iter->pos;
+
+ if (bkey_cmp(iter_pos, end) > 0) {
+ bch2_btree_iter_set_pos(iter, end);
+ k = bkey_s_c_null;
+ goto out;
+ }
+
if (iter->update_path &&
bkey_cmp(iter->update_path->pos, k.k->p)) {
bch2_path_put(trans, iter->update_path,
@@ -2387,10 +2427,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
iter->update_path = bch2_btree_path_set_pos(trans,
iter->update_path, pos,
iter->flags & BTREE_ITER_INTENT,
- btree_iter_ip_allocated(iter));
-
- BUG_ON(!(iter->update_path->nodes_locked & 1));
- iter->update_path->should_be_locked = true;
+ _THIS_IP_);
}
/*
@@ -2414,14 +2451,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
break;
}
- /*
- * iter->pos should be mononotically increasing, and always be equal to
- * the key we just returned - except extents can straddle iter->pos:
- */
- if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
- iter->pos = k.k->p;
- else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
- iter->pos = bkey_start_pos(k.k);
+ iter->pos = iter_pos;
iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
iter->flags & BTREE_ITER_INTENT,
@@ -2429,8 +2459,13 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
BUG_ON(!iter->path->nodes_locked);
out:
if (iter->update_path) {
- BUG_ON(!(iter->update_path->nodes_locked & 1));
- iter->update_path->should_be_locked = true;
+ if (iter->update_path->uptodate &&
+ !bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) {
+ k = bkey_s_c_err(-EINTR);
+ } else {
+ BUG_ON(!(iter->update_path->nodes_locked & 1));
+ iter->update_path->should_be_locked = true;
+ }
}
iter->path->should_be_locked = true;
@@ -2661,9 +2696,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->flags & BTREE_ITER_INTENT) {
struct btree_iter iter2;
+ struct bpos end = iter->pos;
+
+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
+ end.offset = U64_MAX;
bch2_trans_copy_iter(&iter2, iter);
- k = bch2_btree_iter_peek(&iter2);
+ k = bch2_btree_iter_peek_upto(&iter2, end);
if (k.k && !bkey_err(k)) {
iter->k = iter2.k;
@@ -2831,6 +2870,11 @@ static inline void btree_path_list_add(struct btree_trans *trans,
path->sorted_idx = pos ? pos->sorted_idx + 1 : 0;
+ if (trans->in_traverse_all &&
+ trans->traverse_all_idx != U8_MAX &&
+ trans->traverse_all_idx >= path->sorted_idx)
+ trans->traverse_all_idx++;
+
array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
for (i = path->sorted_idx; i < trans->nr_sorted; i++)
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index d612aec..f670029 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -209,9 +209,14 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
-struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
+static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+{
+ return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
+}
+
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
@@ -306,13 +311,26 @@ static inline int bkey_err(struct bkey_s_c k)
}
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
- unsigned flags)
+ unsigned flags)
{
return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_peek_slot(iter)
: bch2_btree_iter_peek(iter);
}
+static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
+ struct bpos end,
+ unsigned flags)
+{
+ if (!(flags & BTREE_ITER_SLOTS))
+ return bch2_btree_iter_peek_upto(iter, end);
+
+ if (bkey_cmp(iter->pos, end) > 0)
+ return bkey_s_c_null;
+
+ return bch2_btree_iter_peek_slot(iter);
+}
+
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2
@@ -349,6 +367,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
+#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
+ _start, _end, _flags, _k, _ret) \
+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
+ (_start), (_flags)); \
+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
+ !((_ret) = bkey_err(_k)) && (_k).k; \
+ bch2_btree_iter_advance(&(_iter)))
+
#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \
for (; \
(_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
@@ -363,6 +389,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
/* new multiple iterator interface: */
+void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
+void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
unsigned, size_t, const char *);
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index ee89b65..b1b7a30 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -309,7 +309,7 @@ retry:
if (!ck)
goto retry;
- mark_btree_node_locked(path, 0, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
path->locks_want = 1;
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
@@ -330,7 +330,7 @@ retry:
goto retry;
}
- mark_btree_node_locked(path, 0, lock_want);
+ mark_btree_node_locked(trans, path, 0, lock_want);
}
path->l[0].lock_seq = ck->c.lock.state.seq;
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index b4434ec..67c970d 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -58,7 +58,8 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
path->nodes_intent_locked &= ~(1 << level);
}
-static inline void mark_btree_node_locked(struct btree_path *path,
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+ struct btree_path *path,
unsigned level,
enum six_lock_type type)
{
@@ -66,14 +67,17 @@ static inline void mark_btree_node_locked(struct btree_path *path,
BUILD_BUG_ON(SIX_LOCK_read != 0);
BUILD_BUG_ON(SIX_LOCK_intent != 1);
+ BUG_ON(trans->in_traverse_all && path->sorted_idx > trans->traverse_all_idx);
+
path->nodes_locked |= 1 << level;
path->nodes_intent_locked |= type << level;
}
-static inline void mark_btree_node_intent_locked(struct btree_path *path,
+static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
+ struct btree_path *path,
unsigned level)
{
- mark_btree_node_locked(path, level, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, level, SIX_LOCK_intent);
}
static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index c18e388..575635b 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -361,7 +361,11 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
+#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
+#else
+#define BTREE_ITER_MAX 32
+#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -388,6 +392,7 @@ struct btree_trans {
u8 nr_sorted;
u8 nr_updates;
+ u8 traverse_all_idx;
bool used_mempool:1;
bool in_traverse_all:1;
bool restarted:1;
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index ee978f3..47623f3 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -213,7 +213,7 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
/**
* btree_insert_key - insert a key one key into a leaf node
*/
-static bool btree_insert_key_leaf(struct btree_trans *trans,
+static void btree_insert_key_leaf(struct btree_trans *trans,
struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
@@ -226,7 +226,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
&insert_l(insert)->iter, insert->k)))
- return false;
+ return;
i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
le64_to_cpu(i->journal_seq)));
@@ -247,8 +247,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
if (u64s_added > live_u64s_added &&
bch2_maybe_compact_whiteouts(c, b))
bch2_trans_node_reinit_iter(trans, b);
-
- return true;
}
/* Cached btree updates: */
@@ -400,18 +398,16 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
- bool did_work;
EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
i->k->k.needs_whiteout = false;
- did_work = !i->cached
- ? btree_insert_key_leaf(trans, i)
- : bch2_btree_insert_key_cached(trans, i->path, i->k);
- if (!did_work)
- return;
+ if (!i->cached)
+ btree_insert_key_leaf(trans, i);
+ else
+ bch2_btree_insert_key_cached(trans, i->path, i->k);
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
bch2_journal_add_keys(j, &trans->journal_res,
@@ -440,7 +436,8 @@ static int run_one_mem_trigger(struct btree_trans *trans,
if (!btree_node_type_needs_gc(i->btree_id))
return 0;
- if (old.k->type == new->k.type &&
+ if (bch2_bkey_ops[old.k->type].atomic_trigger ==
+ bch2_bkey_ops[i->k->k.type].atomic_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
@@ -485,7 +482,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
if (overwrite) {
ret = bch2_trans_mark_old(trans, old, i->flags);
- } else if (old.k->type == i->k->k.type &&
+ } else if (bch2_bkey_ops[old.k->type].trans_trigger ==
+ bch2_bkey_ops[i->k->k.type].trans_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
i->overwrite_trigger_run = true;
ret = bch2_trans_mark_key(trans, old, i->k,
@@ -652,6 +650,32 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (btree_node_type_needs_gc(i->bkey_type))
marking = true;
+
+ /*
+ * Revalidate before calling mem triggers - XXX, ugly:
+ *
+ * - successful btree node splits don't cause transaction
+ * restarts and will have invalidated the pointer to the bkey
+ * value
+ * - btree_node_lock_for_insert() -> btree_node_prep_for_write()
+ * when it has to resort
+ * - btree_key_can_insert_cached() when it has to reallocate
+ *
+ * Ugly because we currently have no way to tell if the
+ * pointer's been invalidated, which means it's debatabale
+ * whether we should be stashing the old key at all.
+ */
+ i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
+
+ if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) {
+ struct bkey_i *j_k =
+ bch2_journal_keys_peek(c, i->btree_id, i->level, i->k->k.p);
+
+ if (j_k && !bpos_cmp(j_k->k.p, i->k->k.p)) {
+ i->old_k = j_k->k;
+ i->old_v = &j_k->v;
+ }
+ }
}
/*
@@ -1217,7 +1241,7 @@ int bch2_trans_update_extent(struct btree_trans *trans,
BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_NOT_EXTENTS);
- k = bch2_btree_iter_peek(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k)))
goto err;
if (!k.k)
@@ -1369,7 +1393,8 @@ nomerge1:
goto out;
}
next:
- k = bch2_btree_iter_next(&iter);
+ bch2_btree_iter_advance(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k)))
goto err;
if (!k.k)
@@ -1629,14 +1654,14 @@ int bch2_btree_delete_at(struct btree_trans *trans,
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
struct bpos start, struct bpos end,
- unsigned iter_flags,
+ unsigned update_flags,
u64 *journal_seq)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
- bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT|iter_flags);
+ bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
retry:
while ((bch2_trans_begin(trans),
(k = bch2_btree_iter_peek(&iter)).k) &&
@@ -1679,7 +1704,8 @@ retry:
ret = bch2_trans_update(trans, &iter, &delete, 0) ?:
bch2_trans_commit(trans, &disk_res, journal_seq,
- BTREE_INSERT_NOFAIL);
+ BTREE_INSERT_NOFAIL|
+ update_flags);
bch2_disk_reservation_put(trans->c, &disk_res);
if (ret)
break;
@@ -1701,10 +1727,10 @@ retry:
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
- unsigned iter_flags,
+ unsigned update_flags,
u64 *journal_seq)
{
return bch2_trans_do(c, NULL, journal_seq, 0,
bch2_btree_delete_range_trans(&trans, id, start, end,
- iter_flags, journal_seq));
+ update_flags, journal_seq));
}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 2c3b71b..d522637 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -497,20 +497,25 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
BUG_ON(owned_by_allocator == old.owned_by_allocator);
}
-static int bch2_mark_alloc(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_alloc(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
bool gc = flags & BTREE_TRIGGER_GC;
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old);
struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new);
- struct bch_dev *ca;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
struct bucket *g;
struct bucket_mark old_m, m;
int ret = 0;
+ if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket ||
+ new_u.bucket >= ca->mi.nbuckets, trans,
+ "alloc key outside range of device's buckets"))
+ return -EIO;
+
/*
* alloc btree is read in by bch2_alloc_read, not gc:
*/
@@ -550,11 +555,6 @@ static int bch2_mark_alloc(struct btree_trans *trans,
}
}
- ca = bch_dev_bkey_exists(c, new_u.dev);
-
- if (new_u.bucket >= ca->mi.nbuckets)
- return 0;
-
percpu_down_read(&c->mark_lock);
if (!gc && new_u.gen != old_u.gen)
*bucket_gen(ca, new_u.bucket) = new_u.gen;
@@ -929,9 +929,9 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans,
return 0;
}
-static int bch2_mark_extent(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_extent(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
@@ -1011,9 +1011,9 @@ static int bch2_mark_extent(struct btree_trans *trans,
return 0;
}
-static int bch2_mark_stripe(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_stripe(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
bool gc = flags & BTREE_TRIGGER_GC;
u64 journal_seq = trans->journal_res.seq;
@@ -1118,9 +1118,9 @@ static int bch2_mark_stripe(struct btree_trans *trans,
return 0;
}
-static int bch2_mark_inode(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_inode(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct bch_fs_usage __percpu *fs_usage;
@@ -1149,9 +1149,9 @@ static int bch2_mark_inode(struct btree_trans *trans,
return 0;
}
-static int bch2_mark_reservation(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_reservation(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@@ -1228,9 +1228,9 @@ fsck_err:
return ret;
}
-static int bch2_mark_reflink_p(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_reflink_p(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@@ -1267,39 +1267,6 @@ static int bch2_mark_reflink_p(struct btree_trans *trans,
return ret;
}
-int bch2_mark_key(struct btree_trans *trans,
- struct bkey_s_c old,
- struct bkey_s_c new,
- unsigned flags)
-{
- struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
-
- switch (k.k->type) {
- case KEY_TYPE_alloc:
- case KEY_TYPE_alloc_v2:
- case KEY_TYPE_alloc_v3:
- return bch2_mark_alloc(trans, old, new, flags);
- case KEY_TYPE_btree_ptr:
- case KEY_TYPE_btree_ptr_v2:
- case KEY_TYPE_extent:
- case KEY_TYPE_reflink_v:
- return bch2_mark_extent(trans, old, new, flags);
- case KEY_TYPE_stripe:
- return bch2_mark_stripe(trans, old, new, flags);
- case KEY_TYPE_inode:
- case KEY_TYPE_inode_v2:
- return bch2_mark_inode(trans, old, new, flags);
- case KEY_TYPE_reservation:
- return bch2_mark_reservation(trans, old, new, flags);
- case KEY_TYPE_reflink_p:
- return bch2_mark_reflink_p(trans, old, new, flags);
- case KEY_TYPE_snapshot:
- return bch2_mark_snapshot(trans, old, new, flags);
- default:
- return 0;
- }
-}
-
static noinline __cold
void fs_usage_apply_warn(struct btree_trans *trans,
unsigned disk_res_sectors,
@@ -1462,7 +1429,6 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct extent_ptr_decoded p,
s64 sectors, enum bch_data_type data_type)
{
- struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_i_stripe *s;
@@ -1478,16 +1444,15 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
goto err;
if (k.k->type != KEY_TYPE_stripe) {
- bch2_fs_inconsistent(c,
+ bch2_trans_inconsistent(trans,
"pointer to nonexistent stripe %llu",
(u64) p.ec.idx);
- bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
- bch2_fs_inconsistent(c,
+ bch2_trans_inconsistent(trans,
"stripe pointer doesn't match stripe %llu",
(u64) p.ec.idx);
ret = -EIO;
@@ -1516,10 +1481,14 @@ err:
return ret;
}
-static int bch2_trans_mark_extent(struct btree_trans *trans,
- struct bkey_s_c k, unsigned flags)
+int bch2_trans_mark_extent(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_i *new,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
+ struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
+ ? old
+ : bkey_i_to_s_c(new);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -1601,8 +1570,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
goto err;
if (!deleting) {
- if (bch2_fs_inconsistent_on(u.stripe ||
- u.stripe_redundancy, c,
+ if (bch2_trans_inconsistent_on(u.stripe ||
+ u.stripe_redundancy, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
@@ -1612,7 +1581,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
goto err;
}
- if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c,
+ if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans,
"bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
iter.pos.inode, iter.pos.offset, u.gen,
bch2_data_types[u.data_type],
@@ -1625,8 +1594,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
u.stripe = s.k->p.offset;
u.stripe_redundancy = s.v->nr_redundant;
} else {
- if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset ||
- u.stripe_redundancy != s.v->nr_redundant, c,
+ if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset ||
+ u.stripe_redundancy != s.v->nr_redundant, trans,
"bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
iter.pos.inode, iter.pos.offset, u.gen,
s.k->p.offset, u.stripe)) {
@@ -1650,9 +1619,9 @@ err:
return ret;
}
-static int bch2_trans_mark_stripe(struct btree_trans *trans,
- struct bkey_s_c old, struct bkey_i *new,
- unsigned flags)
+int bch2_trans_mark_stripe(struct btree_trans *trans,
+ struct bkey_s_c old, struct bkey_i *new,
+ unsigned flags)
{
const struct bch_stripe *old_s = NULL;
struct bch_stripe *new_s = NULL;
@@ -1720,10 +1689,10 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
return ret;
}
-static int bch2_trans_mark_inode(struct btree_trans *trans,
- struct bkey_s_c old,
- struct bkey_i *new,
- unsigned flags)
+int bch2_trans_mark_inode(struct btree_trans *trans,
+ struct bkey_s_c old,
+ struct bkey_i *new,
+ unsigned flags)
{
int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
@@ -1736,9 +1705,14 @@ static int bch2_trans_mark_inode(struct btree_trans *trans,
return 0;
}
-static int bch2_trans_mark_reservation(struct btree_trans *trans,
- struct bkey_s_c k, unsigned flags)
+int bch2_trans_mark_reservation(struct btree_trans *trans,
+ struct bkey_s_c old,
+ struct bkey_i *new,
+ unsigned flags)
{
+ struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
+ ? old
+ : bkey_i_to_s_c(new);
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
s64 sectors = (s64) k.k->size;
struct replicas_delta_list *d;
@@ -1787,7 +1761,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
refcount = bkey_refcount(n);
if (!refcount) {
bch2_bkey_val_to_text(&buf, c, p.s_c);
- bch2_fs_inconsistent(c,
+ bch2_trans_inconsistent(trans,
"nonexistent indirect extent at %llu while marking\n %s",
*idx, buf.buf);
ret = -EIO;
@@ -1796,7 +1770,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
bch2_bkey_val_to_text(&buf, c, p.s_c);
- bch2_fs_inconsistent(c,
+ bch2_trans_inconsistent(trans,
"indirect extent refcount underflow at %llu while marking\n %s",
*idx, buf.buf);
ret = -EIO;
@@ -1837,9 +1811,14 @@ err:
return ret;
}
-static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
- struct bkey_s_c k, unsigned flags)
+int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+ struct bkey_s_c old,
+ struct bkey_i *new,
+ unsigned flags)
{
+ struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
+ ? old
+ : bkey_i_to_s_c(new);
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
u64 idx, end_idx;
int ret = 0;
@@ -1860,33 +1839,6 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
return ret;
}
-int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
- struct bkey_i *new, unsigned flags)
-{
- struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
- ? old
- : bkey_i_to_s_c(new);
-
- switch (k.k->type) {
- case KEY_TYPE_btree_ptr:
- case KEY_TYPE_btree_ptr_v2:
- case KEY_TYPE_extent:
- case KEY_TYPE_reflink_v:
- return bch2_trans_mark_extent(trans, k, flags);
- case KEY_TYPE_stripe:
- return bch2_trans_mark_stripe(trans, old, new, flags);
- case KEY_TYPE_inode:
- case KEY_TYPE_inode_v2:
- return bch2_trans_mark_inode(trans, old, new, flags);
- case KEY_TYPE_reservation:
- return bch2_trans_mark_reservation(trans, k, flags);
- case KEY_TYPE_reflink_p:
- return bch2_trans_mark_reflink_p(trans, k, flags);
- default:
- return 0;
- }
-}
-
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
struct bch_dev *ca, size_t b,
enum bch_data_type type,
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index daf79a4..392e03d 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -229,6 +229,19 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
size_t, enum bch_data_type, unsigned,
struct gc_pos, unsigned);
+int bch2_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+
+int bch2_trans_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+
int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index 8e4179d..7d9ebcc 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -197,9 +197,11 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
goto err;
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
- ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
+ ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
- ret = ZSTD_decompressDCtx(ctx,
+ src_len = le32_to_cpup(src_data.b);
+
+ ret = zstd_decompress_dctx(ctx,
dst_data, dst_len,
src_data.b + 4, real_src_len);
@@ -333,8 +335,8 @@ static int attempt_compress(struct bch_fs *c,
return strm.total_out;
}
case BCH_COMPRESSION_TYPE_zstd: {
- ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
- ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
+ zstd_cctx_workspace_bound(&c->zstd_params.cParams));
/*
* ZSTD requires that when we decompress we pass in the exact
@@ -347,11 +349,11 @@ static int attempt_compress(struct bch_fs *c,
* factor (7 bytes) from the dst buffer size to account for
* that.
*/
- size_t len = ZSTD_compressCCtx(ctx,
+ size_t len = zstd_compress_cctx(ctx,
dst + 4, dst_len - 4 - 7,
src, src_len,
- c->zstd_params);
- if (ZSTD_isError(len))
+ &c->zstd_params);
+ if (zstd_is_error(len))
return 0;
*((__le32 *) dst) = cpu_to_le32(len);
@@ -546,7 +548,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
{
size_t decompress_workspace_size = 0;
bool decompress_workspace_needed;
- ZSTD_parameters params = ZSTD_getParams(0, c->opts.encoded_extent_max, 0);
+ ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
struct {
unsigned feature;
unsigned type;
@@ -558,8 +560,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
zlib_inflate_workspacesize(), },
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
- ZSTD_CCtxWorkspaceBound(params.cParams),
- ZSTD_DCtxWorkspaceBound() },
+ zstd_cctx_workspace_bound(&params.cParams),
+ zstd_dctx_workspace_bound() },
}, *i;
int ret = 0;
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index a43a244..760e4f7 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -470,16 +470,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
if (ret)
return ret;
- for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents,
- SPOS(dir.inum, 0, snapshot), 0, k, ret) {
- if (k.k->p.inode > dir.inum)
- break;
-
+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
+ SPOS(dir.inum, 0, snapshot),
+ POS(dir.inum, U64_MAX), 0, k, ret)
if (k.k->type == KEY_TYPE_dirent) {
ret = -ENOTEMPTY;
break;
}
- }
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -503,11 +500,9 @@ retry:
if (ret)
goto err;
- for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents,
- SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
- if (k.k->p.inode > inum.inum)
- break;
-
+ for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
+ SPOS(inum.inum, ctx->pos, snapshot),
+ POS(inum.inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_dirent)
continue;
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index 78d468c..9d508a2 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -14,6 +14,8 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
.key_invalid = bch2_stripe_invalid, \
.val_to_text = bch2_stripe_to_text, \
.swab = bch2_ptr_swab, \
+ .trans_trigger = bch2_trans_mark_stripe, \
+ .atomic_trigger = bch2_mark_stripe, \
}
static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index 4ab3cfe..6e63c38 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -67,6 +67,26 @@ do { \
})
/*
+ * When a transaction update discovers or is causing a fs inconsistency, it's
+ * helpful to also dump the pending updates:
+ */
+#define bch2_trans_inconsistent(trans, ...) \
+({ \
+ bch_err(trans->c, __VA_ARGS__); \
+ bch2_inconsistent_error(trans->c); \
+ bch2_dump_trans_updates(trans); \
+})
+
+#define bch2_trans_inconsistent_on(cond, trans, ...) \
+({ \
+ bool _ret = unlikely(!!(cond)); \
+ \
+ if (_ret) \
+ bch2_trans_inconsistent(trans, __VA_ARGS__); \
+ _ret; \
+})
+
+/*
* Fsck errors: inconsistency errors we detect at mount time, and should ideally
* be able to repair:
*/
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 9c25672..ae65084 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -381,6 +381,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
.key_invalid = bch2_btree_ptr_invalid, \
.val_to_text = bch2_btree_ptr_to_text, \
.swab = bch2_ptr_swab, \
+ .trans_trigger = bch2_trans_mark_extent, \
+ .atomic_trigger = bch2_mark_extent, \
}
#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) { \
@@ -388,6 +390,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
.val_to_text = bch2_btree_ptr_v2_to_text, \
.swab = bch2_ptr_swab, \
.compat = bch2_btree_ptr_v2_compat, \
+ .trans_trigger = bch2_trans_mark_extent, \
+ .atomic_trigger = bch2_mark_extent, \
}
/* KEY_TYPE_extent: */
@@ -402,6 +406,8 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.swab = bch2_ptr_swab, \
.key_normalize = bch2_extent_normalize, \
.key_merge = bch2_extent_merge, \
+ .trans_trigger = bch2_trans_mark_extent, \
+ .atomic_trigger = bch2_mark_extent, \
}
/* KEY_TYPE_reservation: */
@@ -414,6 +420,8 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.key_invalid = bch2_reservation_invalid, \
.val_to_text = bch2_reservation_to_text, \
.key_merge = bch2_reservation_merge, \
+ .trans_trigger = bch2_trans_mark_reservation, \
+ .atomic_trigger = bch2_mark_reservation, \
}
/* Extent checksum entries: */
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 1d0871f..b05d6e8 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -35,6 +35,15 @@
#include <trace/events/bcachefs.h>
#include <trace/events/writeback.h>
+static inline bool bio_full(struct bio *bio, unsigned len)
+{
+ if (bio->bi_vcnt >= bio->bi_max_vecs)
+ return true;
+ if (bio->bi_iter.bi_size > UINT_MAX - len)
+ return true;
+ return false;
+}
+
static inline struct address_space *faults_disabled_mapping(void)
{
return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
@@ -1808,11 +1817,11 @@ again:
* to check that the address is actually valid, when atomic
* usercopies are used, below.
*/
- if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
bytes = min_t(unsigned long, iov_iter_count(iter),
PAGE_SIZE - offset);
- if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
ret = -EFAULT;
break;
}
@@ -1870,7 +1879,7 @@ static void bch2_dio_read_complete(struct closure *cl)
{
struct dio_read *dio = container_of(cl, struct dio_read, cl);
- dio->req->ki_complete(dio->req, dio->ret, 0);
+ dio->req->ki_complete(dio->req, dio->ret);
bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
}
@@ -1919,7 +1928,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
iter->count -= shorten;
bio = bio_alloc_bioset(GFP_KERNEL,
- iov_iter_npages(iter, BIO_MAX_VECS),
+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
&c->dio_read_bioset);
bio->bi_end_io = bch2_direct_IO_read_endio;
@@ -1954,7 +1963,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
goto start;
while (iter->count) {
bio = bio_alloc_bioset(GFP_KERNEL,
- iov_iter_npages(iter, BIO_MAX_VECS),
+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
&c->bio_read);
bio->bi_end_io = bch2_direct_IO_read_split_endio;
start:
@@ -2101,7 +2110,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
while (1) {
iter_count = dio->iter.count;
- if (kthread)
+ if (kthread && dio->mm)
kthread_use_mm(dio->mm);
BUG_ON(current->faults_disabled_mapping);
current->faults_disabled_mapping = mapping;
@@ -2111,7 +2120,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dropped_locks = fdm_dropped_locks();
current->faults_disabled_mapping = NULL;
- if (kthread)
+ if (kthread && dio->mm)
kthread_unuse_mm(dio->mm);
/*
@@ -2244,7 +2253,7 @@ err:
inode_dio_end(&inode->v);
if (!sync) {
- req->ki_complete(req, ret, 0);
+ req->ki_complete(req, ret);
ret = -EIOCBQUEUED;
}
return ret;
@@ -2304,9 +2313,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
}
bio = bio_alloc_bioset(GFP_KERNEL,
- iov_iter_is_bvec(iter)
- ? 0
- : iov_iter_npages(iter, BIO_MAX_VECS),
+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
&c->dio_write_bioset);
dio = container_of(bio, struct dio_write, op.wbio.bio);
init_completion(&dio->done);
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 30720c1..9fc6c39 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -30,6 +30,7 @@
#include <linux/pagemap.h>
#include <linux/posix_acl.h>
#include <linux/random.h>
+#include <linux/seq_file.h>
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/xattr.h>
@@ -934,9 +935,8 @@ retry:
SPOS(ei->v.i_ino, start, snapshot), 0);
while (!(ret = btree_trans_too_many_iters(&trans)) &&
- (k = bch2_btree_iter_peek(&iter)).k &&
- !(ret = bkey_err(k)) &&
- bkey_cmp(iter.pos, end) < 0) {
+ (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
+ !(ret = bkey_err(k))) {
enum btree_id data_btree = BTREE_ID_extents;
if (!bkey_extent_is_data(k.k) &&
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 78e2db6..14b0b59 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -606,12 +606,12 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
bch2_btree_iter_set_snapshot(&iter, snapshot);
- k = bch2_btree_iter_peek(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;
- if (!k.k || iter.pos.inode != inum.inum)
+ if (!k.k)
break;
bkey_init(&delete.k);
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 77957cc..2337ecf 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -13,11 +13,15 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode (struct bkey_ops) { \
.key_invalid = bch2_inode_invalid, \
.val_to_text = bch2_inode_to_text, \
+ .trans_trigger = bch2_trans_mark_inode, \
+ .atomic_trigger = bch2_mark_inode, \
}
#define bch2_bkey_ops_inode_v2 (struct bkey_ops) { \
.key_invalid = bch2_inode_v2_invalid, \
.val_to_text = bch2_inode_to_text, \
+ .trans_trigger = bch2_trans_mark_inode, \
+ .atomic_trigger = bch2_mark_inode, \
}
static inline bool bkey_is_inode(const struct bkey *k)
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index ded4b68..eb556ec 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -241,6 +241,9 @@ static int journal_entry_open(struct journal *j)
if (u64s <= 0)
return cur_entry_journal_full;
+ if (fifo_empty(&j->pin) && j->reclaim_thread)
+ wake_up_process(j->reclaim_thread);
+
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
@@ -628,31 +631,6 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
return ret ?: ret2 < 0 ? ret2 : 0;
}
-int bch2_journal_meta(struct journal *j)
-{
- struct journal_buf *buf;
- struct journal_res res;
- int ret;
-
- memset(&res, 0, sizeof(res));
-
- ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
- if (ret)
- return ret;
-
- buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
- buf->must_flush = true;
-
- if (!buf->flush_time) {
- buf->flush_time = local_clock() ?: 1;
- buf->expires = jiffies;
- }
-
- bch2_journal_res_put(j, &res);
-
- return bch2_journal_flush_seq(j, res.seq);
-}
-
/*
* bch2_journal_flush_async - if there is an open journal entry, or a journal
* still being written, write it and wait for the write to complete
@@ -705,6 +683,64 @@ out:
return ret;
}
+int bch2_journal_meta(struct journal *j)
+{
+ struct journal_buf *buf;
+ struct journal_res res;
+ int ret;
+
+ memset(&res, 0, sizeof(res));
+
+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+ if (ret)
+ return ret;
+
+ buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
+ buf->must_flush = true;
+
+ if (!buf->flush_time) {
+ buf->flush_time = local_clock() ?: 1;
+ buf->expires = jiffies;
+ }
+
+ bch2_journal_res_put(j, &res);
+
+ return bch2_journal_flush_seq(j, res.seq);
+}
+
+int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
+{
+ struct jset_entry_log *entry;
+ struct journal_res res = { 0 };
+ unsigned msglen, u64s;
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ msglen = vsnprintf(NULL, 0, fmt, args) + 1;
+ va_end(args);
+
+ u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64)));
+
+ ret = bch2_journal_res_get(j, &res, u64s, 0);
+ if (ret)
+ return ret;
+
+ entry = container_of(journal_res_entry(j, &res),
+ struct jset_entry_log, entry);;
+ memset(entry, 0, u64s * sizeof(u64));
+ entry->entry.type = BCH_JSET_ENTRY_log;
+ entry->entry.u64s = u64s - 1;
+
+ va_start(args, fmt);
+ vsnprintf(entry->d, INT_MAX, fmt, args);
+ va_end(args);
+
+ bch2_journal_res_put(j, &res);
+
+ return bch2_journal_flush_seq(j, res.seq);
+}
+
/* block/unlock the journal: */
void bch2_journal_unblock(struct journal *j)
@@ -802,7 +838,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
* superblock before inserting into the journal array
*/
- pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+ pos = ja->discard_idx ?: ja->nr;
__array_insert_item(ja->buckets, ja->nr, pos);
__array_insert_item(ja->bucket_seq, ja->nr, pos);
__array_insert_item(journal_buckets->buckets, ja->nr, pos);
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 1bb0e00..989c331 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -480,6 +480,7 @@ int bch2_journal_flush_seq(struct journal *, u64);
int bch2_journal_flush(struct journal *);
bool bch2_journal_noflush_seq(struct journal *, u64);
int bch2_journal_meta(struct journal *);
+int bch2_journal_log_msg(struct journal *, const char *, ...);
void bch2_journal_halt(struct journal *);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index ec565ed..a920a11 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -670,6 +670,7 @@ static int bch2_journal_reclaim_thread(void *arg)
struct journal *j = arg;
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned long delay, now;
+ bool journal_empty;
int ret = 0;
set_freezable();
@@ -696,10 +697,17 @@ static int bch2_journal_reclaim_thread(void *arg)
break;
if (j->reclaim_kicked)
break;
- if (time_after_eq(jiffies, j->next_reclaim))
- break;
- freezable_schedule_timeout(j->next_reclaim - jiffies);
+ spin_lock(&j->lock);
+ journal_empty = fifo_empty(&j->pin);
+ spin_unlock(&j->lock);
+
+ if (journal_empty)
+ freezable_schedule();
+ else if (time_after(j->next_reclaim, jiffies))
+ freezable_schedule_timeout(j->next_reclaim - jiffies);
+ else
+ break;
}
__set_current_state(TASK_RUNNING);
}
@@ -809,10 +817,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
seq = 0;
spin_lock(&j->lock);
- while (!ret && seq < j->pin.back) {
+ while (!ret) {
struct bch_replicas_padded replicas;
seq = max(seq, journal_last_seq(j));
+ if (seq >= j->pin.back)
+ break;
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
journal_seq_pin(j, seq)->devs);
seq++;
diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h
index 6fd4581..071fcb4 100644
--- a/libbcachefs/journal_types.h
+++ b/libbcachefs/journal_types.h
@@ -240,6 +240,10 @@ struct journal {
spinlock_t err_lock;
struct mutex reclaim_lock;
+ /*
+ * Used for waiting until journal reclaim has freed up space in the
+ * journal:
+ */
wait_queue_head_t reclaim_wait;
struct task_struct *reclaim_thread;
bool reclaim_kicked;
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 6c4ffc5..8879715 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -578,6 +578,9 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_set_replay_done(j);
bch2_journal_flush_all_pins(j);
ret = bch2_journal_error(j);
+
+ if (keys->nr && !ret)
+ bch2_journal_log_msg(&c->journal, "journal replay finished");
err:
kvfree(keys_sorted);
return ret;
diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h
index 3745873..4da4330 100644
--- a/libbcachefs/reflink.h
+++ b/libbcachefs/reflink.h
@@ -10,7 +10,9 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
#define bch2_bkey_ops_reflink_p (struct bkey_ops) { \
.key_invalid = bch2_reflink_p_invalid, \
.val_to_text = bch2_reflink_p_to_text, \
- .key_merge = bch2_reflink_p_merge, \
+ .key_merge = bch2_reflink_p_merge, \
+ .trans_trigger = bch2_trans_mark_reflink_p, \
+ .atomic_trigger = bch2_mark_reflink_p, \
}
const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
@@ -21,6 +23,8 @@ void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
.key_invalid = bch2_reflink_v_invalid, \
.val_to_text = bch2_reflink_v_to_text, \
.swab = bch2_ptr_swab, \
+ .trans_trigger = bch2_trans_mark_extent, \
+ .atomic_trigger = bch2_mark_extent, \
}
const char *bch2_indirect_inline_data_invalid(const struct bch_fs *,
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index 57d6367..591bbb9 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -163,12 +163,10 @@ bch2_hash_lookup(struct btree_trans *trans,
if (ret)
return ret;
- for_each_btree_key_norestart(trans, *iter, desc.btree_id,
+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
+ POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|flags, k, ret) {
- if (iter->pos.inode != inum.inum)
- break;
-
if (is_visible_key(desc, inum, k)) {
if (!desc.cmp_key(k, key))
return 0;
@@ -199,15 +197,12 @@ bch2_hash_hole(struct btree_trans *trans,
if (ret)
return ret;
- for_each_btree_key_norestart(trans, *iter, desc.btree_id,
+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
- if (iter->pos.inode != inum.inum)
- break;
-
+ POS(inum.inum, U64_MAX),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
if (!is_visible_key(desc, inum, k))
return 0;
- }
bch2_trans_iter_exit(trans, iter);
return ret ?: -ENOSPC;
@@ -260,14 +255,12 @@ int bch2_hash_set(struct btree_trans *trans,
if (ret)
return ret;
- for_each_btree_key_norestart(trans, iter, desc.btree_id,
+ for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
SPOS(inum.inum,
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
snapshot),
+ POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
- if (iter.pos.inode != inum.inum)
- break;
-
if (is_visible_key(desc, inum, k)) {
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
goto found;
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 49e3885..3d6ece5 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -607,23 +607,32 @@ STORE(bch2_fs_opts_dir)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
const struct bch_option *opt = container_of(attr, struct bch_option, attr);
- int ret, id = opt - bch2_opt_table;
+ int ret = size, id = opt - bch2_opt_table;
char *tmp;
u64 v;
+ /*
+ * We don't need to take c->writes for correctness, but it eliminates an
+ * unsightly error message in the dmesg log when we're RO:
+ */
+ if (unlikely(!percpu_ref_tryget(&c->writes)))
+ return -EROFS;
+
tmp = kstrdup(buf, GFP_KERNEL);
- if (!tmp)
- return -ENOMEM;
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto err;
+ }
ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v);
kfree(tmp);
if (ret < 0)
- return ret;
+ goto err;
ret = bch2_opt_check_may_set(c, id, v);
if (ret < 0)
- return ret;
+ goto err;
bch2_opt_set_sb(c, opt, v);
bch2_opt_set_by_id(&c->opts, id, v);
@@ -633,8 +642,9 @@ STORE(bch2_fs_opts_dir)
bch2_rebalance_add_work(c, S64_MAX);
rebalance_wakeup(c);
}
-
- return size;
+err:
+ percpu_ref_put(&c->writes);
+ return ret;
}
SYSFS_OPS(bch2_fs_opts_dir);
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
index 3addf40..4369bfc 100644
--- a/libbcachefs/tests.c
+++ b/libbcachefs/tests.c
@@ -15,15 +15,14 @@ static void delete_test_keys(struct bch_fs *c)
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_extents,
- POS_MIN, SPOS_MAX,
- BTREE_ITER_ALL_SNAPSHOTS,
+ SPOS(0, 0, U32_MAX), SPOS_MAX,
+ 0,
NULL);
BUG_ON(ret);
ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
- POS_MIN, SPOS_MAX,
- BTREE_ITER_ALL_SNAPSHOTS,
- NULL);
+ SPOS(0, 0, U32_MAX), SPOS_MAX,
+ 0, NULL);
BUG_ON(ret);
}
@@ -814,9 +813,8 @@ static int seq_delete(struct bch_fs *c, u64 nr)
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
- POS_MIN, SPOS_MAX,
- BTREE_ITER_ALL_SNAPSHOTS,
- NULL);
+ SPOS(0, 0, U32_MAX), SPOS_MAX,
+ 0, NULL);
if (ret)
bch_err(c, "error in seq_delete: %i", ret);
return ret;
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index 2c9e910..d6d7f1b 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -372,7 +372,7 @@ static inline void pr_time(struct printbuf *out, u64 _time)
#ifdef __KERNEL__
static inline void uuid_unparse_lower(u8 *uuid, char *out)
{
- sprintf(out, "%plU", uuid);
+ sprintf(out, "%pUb", uuid);
}
#else
#include <uuid/uuid.h>
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index c2e9520..1c680b1 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -311,13 +311,9 @@ retry:
if (ret)
goto err;
- for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs,
- SPOS(inum, offset, snapshot), 0, k, ret) {
- BUG_ON(k.k->p.inode < inum);
-
- if (k.k->p.inode > inum)
- break;
-
+ for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs,
+ SPOS(inum, offset, snapshot),
+ POS(inum, U64_MAX), 0, k, ret) {
if (k.k->type != KEY_TYPE_xattr)
continue;
diff --git a/linux/zstd_compress_module.c b/linux/zstd_compress_module.c
new file mode 100644
index 0000000..35cc5cb
--- /dev/null
+++ b/linux/zstd_compress_module.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#define ZSTD_FORWARD_IF_ERR(ret) \
+ do { \
+ size_t const __ret = (ret); \
+ if (ZSTD_isError(__ret)) \
+ return __ret; \
+ } while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+ unsigned long long pledged_src_size)
+{
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+ cctx, ZSTD_reset_session_and_parameters));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+ cctx, pledged_src_size));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+ ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+ cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+ return 0;
+}
+
+int zstd_min_clevel(void)
+{
+ return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+ return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+ return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+ unsigned long long estimated_src_size)
+{
+ return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+ return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+ ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+ return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+ return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+ unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+ zstd_cstream *cstream;
+
+ if (workspace == NULL)
+ return NULL;
+
+ cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+ if (cstream == NULL)
+ return NULL;
+
+ /* 0 means unknown in linux zstd API but means 0 in new zstd API */
+ if (pledged_src_size == 0)
+ pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+ if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+ return NULL;
+
+ return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+ unsigned long long pledged_src_size)
+{
+ return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+ return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+ return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/linux/zstd_decompress_module.c b/linux/zstd_decompress_module.c
new file mode 100644
index 0000000..7e8cd44
--- /dev/null
+++ b/linux/zstd_decompress_module.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+ return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+ return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+ return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+ return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+ const void *src, size_t src_size)
+{
+ return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+ return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+ size_t workspace_size)
+{
+ if (workspace == NULL)
+ return NULL;
+ (void)max_window_size;
+ return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+ return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+ zstd_in_buffer *input)
+{
+ return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+ return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+ size_t src_size)
+{
+ return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");