summaryrefslogtreecommitdiff
path: root/drivers/md/raid5-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r--drivers/md/raid5-cache.c123
1 files changed, 85 insertions, 38 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index bfa1e907c472..f1c86d938502 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -23,7 +23,7 @@
#include <linux/types.h>
#include "md.h"
#include "raid5.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
#include "raid5-log.h"
/*
@@ -236,9 +236,10 @@ struct r5l_io_unit {
bool need_split_bio;
struct bio *split_bio;
- unsigned int has_flush:1; /* include flush request */
- unsigned int has_fua:1; /* include fua request */
- unsigned int has_null_flush:1; /* include empty flush request */
+ unsigned int has_flush:1; /* include flush request */
+ unsigned int has_fua:1; /* include fua request */
+ unsigned int has_null_flush:1; /* include null flush request */
+ unsigned int has_flush_payload:1; /* include flush payload */
/*
* io isn't sent yet, flush/fua request can only be submitted till it's
* the first IO in running_ios list
@@ -538,7 +539,7 @@ static void r5l_log_run_stripes(struct r5l_log *log)
{
struct r5l_io_unit *io, *next;
- assert_spin_locked(&log->io_list_lock);
+ lockdep_assert_held(&log->io_list_lock);
list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) {
/* don't change list order */
@@ -554,7 +555,7 @@ static void r5l_move_to_end_ios(struct r5l_log *log)
{
struct r5l_io_unit *io, *next;
- assert_spin_locked(&log->io_list_lock);
+ lockdep_assert_held(&log->io_list_lock);
list_for_each_entry_safe(io, next, &log->running_ios, log_sibling) {
/* don't change list order */
@@ -571,6 +572,8 @@ static void r5l_log_endio(struct bio *bio)
struct r5l_io_unit *io_deferred;
struct r5l_log *log = io->log;
unsigned long flags;
+ bool has_null_flush;
+ bool has_flush_payload;
if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
@@ -580,6 +583,16 @@ static void r5l_log_endio(struct bio *bio)
spin_lock_irqsave(&log->io_list_lock, flags);
__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
+
+ /*
+ * if the io doesn't not have null_flush or flush payload,
+ * it is not safe to access it after releasing io_list_lock.
+ * Therefore, it is necessary to check the condition with
+ * the lock held.
+ */
+ has_null_flush = io->has_null_flush;
+ has_flush_payload = io->has_flush_payload;
+
if (log->need_cache_flush && !list_empty(&io->stripe_list))
r5l_move_to_end_ios(log);
else
@@ -600,19 +613,23 @@ static void r5l_log_endio(struct bio *bio)
if (log->need_cache_flush)
md_wakeup_thread(log->rdev->mddev->thread);
- if (io->has_null_flush) {
+ /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */
+ if (has_null_flush) {
struct bio *bi;
WARN_ON(bio_list_empty(&io->flush_barriers));
while ((bi = bio_list_pop(&io->flush_barriers)) != NULL) {
bio_endio(bi);
- atomic_dec(&io->pending_stripe);
+ if (atomic_dec_and_test(&io->pending_stripe)) {
+ __r5l_stripe_write_finished(io);
+ return;
+ }
}
}
-
- /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */
- if (atomic_read(&io->pending_stripe) == 0)
- __r5l_stripe_write_finished(io);
+ /* decrease pending_stripe for flush payload */
+ if (has_flush_payload)
+ if (atomic_dec_and_test(&io->pending_stripe))
+ __r5l_stripe_write_finished(io);
}
static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io)
@@ -676,6 +693,8 @@ static void r5c_disable_writeback_async(struct work_struct *work)
struct r5l_log *log = container_of(work, struct r5l_log,
disable_writeback_work);
struct mddev *mddev = log->rdev->mddev;
+ struct r5conf *conf = mddev->private;
+ int locked = 0;
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
return;
@@ -684,11 +703,15 @@ static void r5c_disable_writeback_async(struct work_struct *work)
/* wait superblock change before suspend */
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
-
- mddev_suspend(mddev);
- log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
- mddev_resume(mddev);
+ conf->log == NULL ||
+ (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
+ (locked = mddev_trylock(mddev))));
+ if (locked) {
+ mddev_suspend(mddev);
+ log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+ mddev_resume(mddev);
+ mddev_unlock(mddev);
+ }
}
static void r5l_submit_current_io(struct r5l_log *log)
@@ -728,7 +751,7 @@ static struct bio *r5l_bio_alloc(struct r5l_log *log)
struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- bio->bi_bdev = log->rdev->bdev;
+ bio_set_dev(bio, log->rdev->bdev);
bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
return bio;
@@ -881,6 +904,11 @@ static void r5l_append_flush_payload(struct r5l_log *log, sector_t sect)
payload->size = cpu_to_le32(sizeof(__le64));
payload->flush_stripes[0] = cpu_to_le64(sect);
io->meta_offset += meta_size;
+ /* multiple flush payloads count as one pending_stripe */
+ if (!io->has_flush_payload) {
+ io->has_flush_payload = 1;
+ atomic_inc(&io->pending_stripe);
+ }
mutex_unlock(&log->io_mutex);
}
@@ -1172,7 +1200,7 @@ static void r5l_run_no_mem_stripe(struct r5l_log *log)
{
struct stripe_head *sh;
- assert_spin_locked(&log->io_list_lock);
+ lockdep_assert_held(&log->io_list_lock);
if (!list_empty(&log->no_mem_stripes)) {
sh = list_first_entry(&log->no_mem_stripes,
@@ -1188,7 +1216,7 @@ static bool r5l_complete_finished_ios(struct r5l_log *log)
struct r5l_io_unit *io, *next;
bool found = false;
- assert_spin_locked(&log->io_list_lock);
+ lockdep_assert_held(&log->io_list_lock);
list_for_each_entry_safe(io, next, &log->finished_ios, log_sibling) {
/* don't change list order */
@@ -1291,7 +1319,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
if (!do_flush)
return;
bio_reset(&log->flush_bio);
- log->flush_bio.bi_bdev = log->rdev->bdev;
+ bio_set_dev(&log->flush_bio, log->rdev->bdev);
log->flush_bio.bi_end_io = r5l_log_flush_endio;
log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
submit_bio(&log->flush_bio);
@@ -1360,7 +1388,7 @@ static void r5c_flush_stripe(struct r5conf *conf, struct stripe_head *sh)
* raid5_release_stripe() while holding conf->device_lock
*/
BUG_ON(test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
- assert_spin_locked(&conf->device_lock);
+ lockdep_assert_held(&conf->device_lock);
list_del_init(&sh->lru);
atomic_inc(&sh->count);
@@ -1387,7 +1415,7 @@ void r5c_flush_cache(struct r5conf *conf, int num)
int count;
struct stripe_head *sh, *next;
- assert_spin_locked(&conf->device_lock);
+ lockdep_assert_held(&conf->device_lock);
if (!conf->log)
return;
@@ -1561,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
md_wakeup_thread(log->reclaim_thread);
}
-void r5l_quiesce(struct r5l_log *log, int state)
+void r5l_quiesce(struct r5l_log *log, int quiesce)
{
struct mddev *mddev;
- if (!log || state == 2)
+ if (!log)
return;
- if (state == 0)
- kthread_unpark(log->reclaim_thread->tsk);
- else if (state == 1) {
+
+ if (quiesce) {
/* make sure r5l_write_super_and_discard_space exits */
mddev = log->rdev->mddev;
wake_up(&mddev->sb_wait);
kthread_park(log->reclaim_thread->tsk);
r5l_wake_reclaim(log, MaxSector);
r5l_do_reclaim(log);
- }
+ } else
+ kthread_unpark(log->reclaim_thread->tsk);
}
bool r5l_log_disk_error(struct r5conf *conf)
@@ -1669,7 +1697,7 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
sector_t offset)
{
bio_reset(ctx->ra_bio);
- ctx->ra_bio->bi_bdev = log->rdev->bdev;
+ bio_set_dev(ctx->ra_bio, log->rdev->bdev);
bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0);
ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
@@ -2507,11 +2535,18 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp)
static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
{
- struct r5conf *conf = mddev->private;
+ struct r5conf *conf;
int ret;
- if (!conf->log)
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ conf = mddev->private;
+ if (!conf || !conf->log) {
+ mddev_unlock(mddev);
return 0;
+ }
switch (conf->log->r5c_journal_mode) {
case R5C_JOURNAL_MODE_WRITE_THROUGH:
@@ -2529,6 +2564,7 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
default:
ret = 0;
}
+ mddev_unlock(mddev);
return ret;
}
@@ -2540,23 +2576,32 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
*/
int r5c_journal_mode_set(struct mddev *mddev, int mode)
{
- struct r5conf *conf = mddev->private;
- struct r5l_log *log = conf->log;
-
- if (!log)
- return -ENODEV;
+ struct r5conf *conf;
+ int err;
if (mode < R5C_JOURNAL_MODE_WRITE_THROUGH ||
mode > R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL;
+ err = mddev_lock(mddev);
+ if (err)
+ return err;
+ conf = mddev->private;
+ if (!conf || !conf->log) {
+ mddev_unlock(mddev);
+ return -ENODEV;
+ }
+
if (raid5_calc_degraded(conf) > 0 &&
- mode == R5C_JOURNAL_MODE_WRITE_BACK)
+ mode == R5C_JOURNAL_MODE_WRITE_BACK) {
+ mddev_unlock(mddev);
return -EINVAL;
+ }
mddev_suspend(mddev);
conf->log->r5c_journal_mode = mode;
mddev_resume(mddev);
+ mddev_unlock(mddev);
pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n",
mdname(mddev), mode, r5c_journal_mode_str[mode]);
@@ -3126,6 +3171,8 @@ void r5l_exit_log(struct r5conf *conf)
conf->log = NULL;
synchronize_rcu();
+ /* Ensure disable_writeback_work wakes up and exits */
+ wake_up(&conf->mddev->sb_wait);
flush_work(&log->disable_writeback_work);
md_unregister_thread(&log->reclaim_thread);
mempool_destroy(log->meta_pool);