summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 09:41:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 09:41:48 -0700
commit6dec9f406c1f2de6d750de0fc9d19872d9c4bf0d (patch)
tree4366d1a872855ce92df282963dc7441fa6abccff /include
parent92b7e4923fdbeda9b86b1398127449d5353f9123 (diff)
parent5e548b32018d96c377fda4bdac2bf511a448ca67 (diff)
Merge tag 'for-5.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "We don't have any big feature updates this time, there are lots of small enhacements or fixes. A highlight perhaps is the parallel fsync performance improvements, numbers below. Regarding the dio/iomap that was reverted last time, the required API changes are likely to land in the upcoming cycle, the btrfs part will be updated afterwards. User visible changes: - new mount option rescue= to group all recovery-related mount options so we don't have many specific options, currently introducing only aliases for existing options, future extensions are in development to allow read-only mount with partially damaged structures: - usebackuproot is an alias for rescue=usebackuproot - nologreplay is an alias for rescue=nologreplay - start deprecation of mount option inode_cache, removal scheduled to v5.11 - removed deprecated mount options alloc_start and subvolrootid - device stats corruption counter gets incremented when a checksum mismatch is found - qgroup information exported in /sys/fs/btrfs/<UUID>/qgroups/<id> using sysfs - add link /sys/fs/btrfs/<UUID>/bdi pointing to the associated backing dev info - FS_INFO ioctl enhancements: - add flags to request/describe newly added items - new item: numeric checksum type and checksum size - new item: generation - new item: metadata_uuid - seed device: with one new read-write device added, print the new device information in /proc/mounts - balance: detect cancellation by Ctrl-C in existing cancellation points Performance improvements: - optimized versions of various helpers on little-endian architectures, where we don't have to do LE/BE conversion from on-disk format - tree-log/fsync optimizations leading to lower max latency reported by dbench, reduced by about 12% - all chunk tree leaves are prefetched at mount time, can improve mount time on large (terabyte-sized) filesystems - speed up parallel fsync of files with reflinked/deduped extents, with jobs 16 to 1024 the throughput gets improved roughly by 50% on average and runtime decreased roughly by 30% on average, notable outlier is 128 jobs with +121.2% on throughput and -54.6% runtime - another speed up of parallel fsync, reduce number of checksum tree lookups and contention, the improvements start to show up with 2 tasks with +20% throughput and -16% runtime up to 64 with +200% throughput and -66% runtime Core: - umount-time qgroup leak checker - qgroups - add a way to unreserve partial range after failure, avoiding some EDQUOT errors - improved flushing logic when EDQUOT is hit - possible EINTR interruption caused by failed reservations after transaction start is better handled and documented - transaction abort errors are unified to EROFS in case it's not the original reason of abort or we don't have other way to determine the reason Fixes: - make truncate succeed on a NOCOW file even if data space is exhausted - fix cancelling balance on filesystem with exhausted metadata space - anon block device: - preallocate anon bdev when subvolume is created to report failure early - shorten time the anon bdev id is allocated - don't allocate anon bdev for internal roots - minor memory leak in ref-verify - refuse invalid combinations of compression and NOCOW file flags - lockdep fixes, updating the device locks - remove obsolete fallback logic for block group profile adjustments when switching from 1 to more devices, causing allocation of unwanted block groups Other cleanups, refactoring, simplifications: - conversions from struct inode to struct btrfs_inode in internal functions - removal of unused struct members" * tag 'for-5.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (151 commits) btrfs: do not set the full sync flag on the inode during page release btrfs: release old extent maps during page release btrfs: fix race between page release and a fast fsync btrfs: open-code remount flag setting in btrfs_remount btrfs: if we're restriping, use the target restripe profile btrfs: don't adjust bg flags and use default allocation profiles btrfs: fix lockdep splat from btrfs_dump_space_info btrfs: move the chunk_mutex in btrfs_read_chunk_tree btrfs: open device without device_list_mutex btrfs: sysfs: use NOFS for device creation btrfs: return EROFS for BTRFS_FS_STATE_ERROR cases btrfs: document special case error codes for fs errors btrfs: don't WARN if we abort a transaction with EROFS btrfs: reduce contention on log trees when logging checksums btrfs: remove done label in writepage_delalloc btrfs: add comments for btrfs_reserve_flush_enum btrfs: relocation: review the call sites which can be interrupted by signal btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree btrfs: relocation: allow signal to cancel balance btrfs: raid56: remove out label in __raid56_parity_recover ...
Diffstat (limited to 'include')
-rw-r--r--include/trace/events/btrfs.h137
-rw-r--r--include/uapi/linux/btrfs.h21
-rw-r--r--include/uapi/linux/btrfs_tree.h4
3 files changed, 98 insertions, 64 deletions
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 360b0f9d2220..863335ecb7e8 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -31,13 +31,6 @@ struct extent_io_tree;
struct prelim_ref;
struct btrfs_space_info;
-TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
-TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
-TRACE_DEFINE_ENUM(FLUSH_DELALLOC);
-TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT);
-TRACE_DEFINE_ENUM(ALLOC_CHUNK);
-TRACE_DEFINE_ENUM(COMMIT_TRANS);
-
#define show_ref_type(type) \
__print_symbolic(type, \
{ BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \
@@ -67,30 +60,72 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
(obj >= BTRFS_ROOT_TREE_OBJECTID && \
obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
-#define show_fi_type(type) \
- __print_symbolic(type, \
- { BTRFS_FILE_EXTENT_INLINE, "INLINE" }, \
- { BTRFS_FILE_EXTENT_REG, "REG" }, \
- { BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC"})
+#define FLUSH_ACTIONS \
+ EM( BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH") \
+ EM( BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT") \
+ EM( BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL") \
+ EMe(BTRFS_RESERVE_FLUSH_ALL_STEAL, "BTRFS_RESERVE_FLUSH_ALL_STEAL")
+
+#define FI_TYPES \
+ EM( BTRFS_FILE_EXTENT_INLINE, "INLINE") \
+ EM( BTRFS_FILE_EXTENT_REG, "REG") \
+ EMe(BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC")
+
+#define QGROUP_RSV_TYPES \
+ EM( BTRFS_QGROUP_RSV_DATA, "DATA") \
+ EM( BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS") \
+ EMe(BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC")
+
+#define IO_TREE_OWNER \
+ EM( IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS") \
+ EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
+ EM( IO_TREE_INODE_IO, "INODE_IO") \
+ EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
+ EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
+ EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
+ EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \
+ EM( IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT") \
+ EM( IO_TREE_LOG_CSUM_RANGE, "LOG_CSUM_RANGE") \
+ EMe(IO_TREE_SELFTEST, "SELFTEST")
+
+#define FLUSH_STATES \
+ EM( FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR") \
+ EM( FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS") \
+ EM( FLUSH_DELALLOC, "FLUSH_DELALLOC") \
+ EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \
+ EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \
+ EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \
+ EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
+ EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
+ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
+ EMe(COMMIT_TRANS, "COMMIT_TRANS")
+
+/*
+ * First define the enums in the above macros to be exported to userspace via
+ * TRACE_DEFINE_ENUM().
+ */
+
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+FLUSH_ACTIONS
+FI_TYPES
+QGROUP_RSV_TYPES
+IO_TREE_OWNER
+FLUSH_STATES
+
+/*
+ * Now redefine the EM and EMe macros to map the enums to the strings that will
+ * be printed in the output
+ */
+
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
-#define show_qgroup_rsv_type(type) \
- __print_symbolic(type, \
- { BTRFS_QGROUP_RSV_DATA, "DATA" }, \
- { BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" }, \
- { BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
-
-#define show_extent_io_tree_owner(owner) \
- __print_symbolic(owner, \
- { IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS" }, \
- { IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS" }, \
- { IO_TREE_INODE_IO, "INODE_IO" }, \
- { IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \
- { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \
- { IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \
- { IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \
- { IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \
- { IO_TREE_LOG_CSUM_RANGE, "LOG_CSUM_RANGE" }, \
- { IO_TREE_SELFTEST, "SELFTEST" })
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
@@ -380,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
__entry->disk_isize, __entry->extent_start,
__entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
__entry->disk_bytenr, __entry->disk_num_bytes,
- __entry->extent_offset, show_fi_type(__entry->extent_type),
+ __entry->extent_offset, __print_symbolic(__entry->extent_type, FI_TYPES),
__entry->compression)
);
@@ -421,7 +456,7 @@ DECLARE_EVENT_CLASS(
"extent_type=%s compression=%u",
show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
__entry->disk_isize, __entry->extent_start,
- __entry->extent_end, show_fi_type(__entry->extent_type),
+ __entry->extent_end, __print_symbolic(__entry->extent_type, FI_TYPES),
__entry->compression)
);
@@ -1042,12 +1077,6 @@ TRACE_EVENT(btrfs_space_reservation,
__entry->bytes)
);
-#define show_flush_action(action) \
- __print_symbolic(action, \
- { BTRFS_RESERVE_NO_FLUSH, "BTRFS_RESERVE_NO_FLUSH"}, \
- { BTRFS_RESERVE_FLUSH_LIMIT, "BTRFS_RESERVE_FLUSH_LIMIT"}, \
- { BTRFS_RESERVE_FLUSH_ALL, "BTRFS_RESERVE_FLUSH_ALL"})
-
TRACE_EVENT(btrfs_trigger_flush,
TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
@@ -1071,25 +1100,13 @@ TRACE_EVENT(btrfs_trigger_flush,
TP_printk_btrfs("%s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
__get_str(reason), __entry->flush,
- show_flush_action(__entry->flush),
+ __print_symbolic(__entry->flush, FLUSH_ACTIONS),
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
__entry->bytes)
);
-#define show_flush_state(state) \
- __print_symbolic(state, \
- { FLUSH_DELAYED_ITEMS_NR, "FLUSH_DELAYED_ITEMS_NR"}, \
- { FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS"}, \
- { FLUSH_DELALLOC, "FLUSH_DELALLOC"}, \
- { FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT"}, \
- { FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR"}, \
- { FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS"}, \
- { ALLOC_CHUNK, "ALLOC_CHUNK"}, \
- { ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE"}, \
- { RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS"}, \
- { COMMIT_TRANS, "COMMIT_TRANS"})
TRACE_EVENT(btrfs_flush_space,
@@ -1114,7 +1131,7 @@ TRACE_EVENT(btrfs_flush_space,
TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
__entry->state,
- show_flush_state(__entry->state),
+ __print_symbolic(__entry->state, FLUSH_STATES),
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
@@ -1690,7 +1707,7 @@ TRACE_EVENT(qgroup_update_reserve,
),
TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
- __entry->qgid, show_qgroup_rsv_type(__entry->type),
+ __entry->qgid, __print_symbolic(__entry->type, QGROUP_RSV_TYPES),
__entry->cur_reserved, __entry->diff)
);
@@ -1714,7 +1731,7 @@ TRACE_EVENT(qgroup_meta_reserve,
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
show_root_type(__entry->refroot),
- show_qgroup_rsv_type(__entry->type), __entry->diff)
+ __print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
);
TRACE_EVENT(qgroup_meta_convert,
@@ -1735,8 +1752,8 @@ TRACE_EVENT(qgroup_meta_convert,
TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
show_root_type(__entry->refroot),
- show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
- show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
+ __print_symbolic(BTRFS_QGROUP_RSV_META_PREALLOC, QGROUP_RSV_TYPES),
+ __print_symbolic(BTRFS_QGROUP_RSV_META_PERTRANS, QGROUP_RSV_TYPES),
__entry->diff)
);
@@ -1762,7 +1779,7 @@ TRACE_EVENT(qgroup_meta_free_all_pertrans,
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
show_root_type(__entry->refroot),
- show_qgroup_rsv_type(__entry->type), __entry->diff)
+ __print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff)
);
DECLARE_EVENT_CLASS(btrfs__prelim_ref,
@@ -1920,7 +1937,7 @@ TRACE_EVENT(btrfs_set_extent_bit,
TP_printk_btrfs(
"io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s",
- show_extent_io_tree_owner(__entry->owner), __entry->ino,
+ __print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
__entry->rootid, __entry->start, __entry->len,
__print_flags(__entry->set_bits, "|", EXTENT_FLAGS))
);
@@ -1959,7 +1976,7 @@ TRACE_EVENT(btrfs_clear_extent_bit,
TP_printk_btrfs(
"io_tree=%s ino=%llu root=%llu start=%llu len=%llu clear_bits=%s",
- show_extent_io_tree_owner(__entry->owner), __entry->ino,
+ __print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
__entry->rootid, __entry->start, __entry->len,
__print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
);
@@ -2000,7 +2017,7 @@ TRACE_EVENT(btrfs_convert_extent_bit,
TP_printk_btrfs(
"io_tree=%s ino=%llu root=%llu start=%llu len=%llu set_bits=%s clear_bits=%s",
- show_extent_io_tree_owner(__entry->owner), __entry->ino,
+ __print_symbolic(__entry->owner, IO_TREE_OWNER), __entry->ino,
__entry->rootid, __entry->start, __entry->len,
__print_flags(__entry->set_bits , "|", EXTENT_FLAGS),
__print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index e6b6cb0f8bc6..2c39d15a2beb 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -243,6 +243,18 @@ struct btrfs_ioctl_dev_info_args {
__u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
};
+/*
+ * Retrieve information about the filesystem
+ */
+
+/* Request information about checksum type and size */
+#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
+
+/* Request information about filesystem generation */
+#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
+/* Request information about filesystem metadata UUID */
+#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
+
struct btrfs_ioctl_fs_info_args {
__u64 max_id; /* out */
__u64 num_devices; /* out */
@@ -250,8 +262,13 @@ struct btrfs_ioctl_fs_info_args {
__u32 nodesize; /* out */
__u32 sectorsize; /* out */
__u32 clone_alignment; /* out */
- __u32 reserved32;
- __u64 reserved[122]; /* pad to 1k */
+ /* See BTRFS_FS_INFO_FLAG_* */
+ __u16 csum_type; /* out */
+ __u16 csum_size; /* out */
+ __u64 flags; /* in/out */
+ __u64 generation; /* out */
+ __u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
+ __u8 reserved[944]; /* pad to 1k */
};
/*
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index a3f3975df0de..9ba64ca6b4ac 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -913,9 +913,9 @@ struct btrfs_free_space_info {
#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
#define BTRFS_QGROUP_LEVEL_SHIFT 48
-static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
+static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
{
- return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+ return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
}
/*