summaryrefslogtreecommitdiff
path: root/fs/btrfs/subpage.c
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2021-06-07 17:02:58 +0800
committerDavid Sterba <dsterba@suse.com>2021-06-21 15:19:10 +0200
commit3d078efae6f3854eadf9def9cbb4f30389c0c504 (patch)
treead107df2ef923c9b8bb67043735063ebc1b82f8a /fs/btrfs/subpage.c
parentbcd77455d590eaa0422a5e84ae852007cfce574a (diff)
btrfs: subpage: fix a rare race between metadata endio and eb freeing
[BUG] There is a very rare ASSERT() triggering during full fstests run for subpage rw support. No other reproducer so far. The ASSERT() gets triggered for metadata read in btrfs_page_set_uptodate() inside end_page_read(). [CAUSE] There is still a small race window for metadata only, the race could happen like this: T1 | T2 ------------------------------------+----------------------------- end_bio_extent_readpage() | |- btrfs_validate_metadata_buffer() | | |- free_extent_buffer() | | Still have 2 refs | |- end_page_read() | |- if (unlikely(PagePrivate()) | | The page still has Private | | | free_extent_buffer() | | | Only one ref 1, will be | | | released | | |- detach_extent_buffer_page() | | |- btrfs_detach_subpage() |- btrfs_set_page_uptodate() | The page no longer has Private| >>> ASSERT() triggered <<< | This race window is super small, thus pretty hard to hit, even with so many runs of fstests. But the race window is still there, we have to go another way to solve it other than relying on random PagePrivate() check. Data path is not affected, as it will lock the page before reading, while unlocking the page after the last read has finished, thus no race window. [FIX] This patch will fix the bug by repurposing btrfs_subpage::readers. Now btrfs_subpage::readers will be a member shared by both metadata and data. For metadata path, we don't do the page unlock as metadata only relies on extent locking. At the same time, teach page_range_has_eb() to take btrfs_subpage::readers into consideration. So that even if the last eb of a page gets freed, page::private won't be detached as long as there still are pending end_page_read() calls. By this we eliminate the race window, this will slight increase the metadata memory usage, as the page may not be released as frequently as usual. But it should not be a big deal. The code got introduced in ("btrfs: submit read time repair only for each corrupted sector"), but the fix is in a separate patch to keep the problem description and the crash is rare so it should not hurt bisectability. Signed-off-by: Qu Wegruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/subpage.c')
-rw-r--r--fs/btrfs/subpage.c19
1 files changed, 15 insertions, 4 deletions
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 7d72eaf5f972..640bcd21bf28 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include "ctree.h"
#include "subpage.h"
+#include "btrfs_inode.h"
/*
* Subpage (sectorsize < PAGE_SIZE) support overview:
@@ -185,12 +186,10 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const int nbits = len >> fs_info->sectorsize_bits;
- int ret;
btrfs_subpage_assert(fs_info, page, start, len);
- ret = atomic_add_return(nbits, &subpage->readers);
- ASSERT(ret == nbits);
+ atomic_add(nbits, &subpage->readers);
}
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
@@ -198,10 +197,22 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const int nbits = len >> fs_info->sectorsize_bits;
+ bool is_data;
+ bool last;
btrfs_subpage_assert(fs_info, page, start, len);
+ is_data = is_data_inode(page->mapping->host);
ASSERT(atomic_read(&subpage->readers) >= nbits);
- if (atomic_sub_and_test(nbits, &subpage->readers))
+ last = atomic_sub_and_test(nbits, &subpage->readers);
+
+ /*
+ * For data we need to unlock the page if the last read has finished.
+ *
+ * And please don't replace @last with atomic_sub_and_test() call
+ * inside if () condition.
+ * As we want the atomic_sub_and_test() to be always executed.
+ */
+ if (is_data && last)
unlock_page(page);
}