diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 17:26:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-16 17:26:37 -0700 |
commit | 6968e6f8329d014920331dd2cf166ccd474b5299 (patch) | |
tree | a92c813c2f24b26c0eb6c294e1dc3d8d402211cc /drivers/md/dm-round-robin.c | |
parent | cae8da047b165aaf334fd87020c2ea7ee020c41c (diff) | |
parent | 98dbc9c6c61698792e3a66f32f3bf066201d42d7 (diff) |
Merge tag 'dm-4.6-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Most attention this cycle went to optimizing blk-mq request-based DM
(dm-mq) that is used exclussively by DM multipath:
- A stable fix for dm-mq that eliminates excessive context
switching offers the biggest performance improvement (for both
IOPs and throughput).
- But more work is needed, during the next cycle, to reduce
spinlock contention in DM multipath on large NUMA systems.
- A stable fix for a NULL pointer seen when DM stats is enabled on a DM
multipath device that must requeue an IO due to path failure.
- A stable fix for DM snapshot to disallow the COW and origin devices
from being identical. This amounts to graceful failure in the face
of userspace error because these devices shouldn't ever be identical.
- Stable fixes for DM cache and DM thin provisioning to address crashes
seen if/when their respective metadata device experiences failures
that cause the transition to 'fail_io' mode.
- The DM cache 'mq' policy is now an alias for the 'smq' policy. The
'smq' policy proved to be consistently better than 'mq'. As such
'mq', with all its complex user-facing tunables, has been eliminated.
- Improve DM thin provisioning to consistently return -ENOSPC once the
thin-pool's data volume is out of space.
- Improve DM core to properly handle error propagation if
bio_integrity_clone() fails in clone_bio().
- Other small cleanups and improvements to DM core.
* tag 'dm-4.6-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (41 commits)
dm: fix rq_end_stats() NULL pointer in dm_requeue_original_request()
dm thin: consistently return -ENOSPC if pool has run out of data space
dm cache: bump the target version
dm cache: make sure every metadata function checks fail_io
dm: add missing newline between DM_DEBUG_BLOCK_STACK_TRACING and DM_BUFIO
dm cache policy smq: clarify that mq registration failure was for 'mq'
dm: return error if bio_integrity_clone() fails in clone_bio()
dm thin metadata: don't issue prefetches if a transaction abort has failed
dm snapshot: disallow the COW and origin devices from being identical
dm cache: make the 'mq' policy an alias for 'smq'
dm: drop unnecessary assignment of md->queue
dm: reorder 'struct mapped_device' members to fix alignment and holes
dm: remove dummy definition of 'struct dm_table'
dm: add 'dm_numa_node' module parameter
dm thin metadata: remove needless newline from subtree_dec() DMERR message
dm mpath: cleanup reinstate_path() et al based on code review
dm mpath: remove __pgpath_busy forward declaration, rename to pgpath_busy
dm mpath: switch from 'unsigned' to 'bool' for flags where appropriate
dm round robin: use percpu 'repeat_count' and 'current_path'
dm path selector: remove 'repeat_count' return from .select_path hook
...
Diffstat (limited to 'drivers/md/dm-round-robin.c')
-rw-r--r-- | drivers/md/dm-round-robin.c | 85 |
1 files changed, 69 insertions, 16 deletions
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 6ab1192cdd5f..4ace1da17db8 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -17,6 +17,8 @@ #include <linux/module.h> #define DM_MSG_PREFIX "multipath round-robin" +#define RR_MIN_IO 1000 +#define RR_VERSION "1.1.0" /*----------------------------------------------------------------- * Path-handling code, paths are held in lists @@ -41,23 +43,48 @@ static void free_paths(struct list_head *paths) * Round-robin selector *---------------------------------------------------------------*/ -#define RR_MIN_IO 1000 - struct selector { struct list_head valid_paths; struct list_head invalid_paths; + spinlock_t lock; + struct dm_path * __percpu *current_path; + struct percpu_counter repeat_count; }; +static void set_percpu_current_path(struct selector *s, struct dm_path *path) +{ + int cpu; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(s->current_path, cpu) = path; +} + static struct selector *alloc_selector(void) { struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s) { - INIT_LIST_HEAD(&s->valid_paths); - INIT_LIST_HEAD(&s->invalid_paths); - } + if (!s) + return NULL; + + INIT_LIST_HEAD(&s->valid_paths); + INIT_LIST_HEAD(&s->invalid_paths); + spin_lock_init(&s->lock); + + s->current_path = alloc_percpu(struct dm_path *); + if (!s->current_path) + goto out_current_path; + set_percpu_current_path(s, NULL); + + if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL)) + goto out_repeat_count; return s; + +out_repeat_count: + free_percpu(s->current_path); +out_current_path: + kfree(s); + return NULL;; } static int rr_create(struct path_selector *ps, unsigned argc, char **argv) @@ -74,10 +101,12 @@ static int rr_create(struct path_selector *ps, unsigned argc, char **argv) static void rr_destroy(struct path_selector *ps) { - struct selector *s = (struct selector *) ps->context; + struct selector *s = ps->context; free_paths(&s->valid_paths); free_paths(&s->invalid_paths); + free_percpu(s->current_path); + percpu_counter_destroy(&s->repeat_count); kfree(s); ps->context = NULL; } @@ -111,10 +140,11 @@ static int rr_status(struct path_selector *ps, struct dm_path *path, static int rr_add_path(struct path_selector *ps, struct dm_path *path, int argc, char **argv, char **error) { - struct selector *s = (struct selector *) ps->context; + struct selector *s = ps->context; struct path_info *pi; unsigned repeat_count = RR_MIN_IO; char dummy; + unsigned long flags; if (argc > 1) { *error = "round-robin ps: incorrect number of arguments"; @@ -139,42 +169,65 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, path->pscontext = pi; + spin_lock_irqsave(&s->lock, flags); list_add_tail(&pi->list, &s->valid_paths); + spin_unlock_irqrestore(&s->lock, flags); return 0; } static void rr_fail_path(struct path_selector *ps, struct dm_path *p) { - struct selector *s = (struct selector *) ps->context; + unsigned long flags; + struct selector *s = ps->context; struct path_info *pi = p->pscontext; + spin_lock_irqsave(&s->lock, flags); + if (p == *this_cpu_ptr(s->current_path)) + set_percpu_current_path(s, NULL); + list_move(&pi->list, &s->invalid_paths); + spin_unlock_irqrestore(&s->lock, flags); } static int rr_reinstate_path(struct path_selector *ps, struct dm_path *p) { - struct selector *s = (struct selector *) ps->context; + unsigned long flags; + struct selector *s = ps->context; struct path_info *pi = p->pscontext; + spin_lock_irqsave(&s->lock, flags); list_move(&pi->list, &s->valid_paths); + spin_unlock_irqrestore(&s->lock, flags); return 0; } -static struct dm_path *rr_select_path(struct path_selector *ps, - unsigned *repeat_count, size_t nr_bytes) +static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) { - struct selector *s = (struct selector *) ps->context; + unsigned long flags; + struct selector *s = ps->context; struct path_info *pi = NULL; + struct dm_path *current_path = NULL; + + current_path = *this_cpu_ptr(s->current_path); + if (current_path) { + percpu_counter_dec(&s->repeat_count); + if (percpu_counter_read_positive(&s->repeat_count) > 0) + return current_path; + } + spin_lock_irqsave(&s->lock, flags); if (!list_empty(&s->valid_paths)) { pi = list_entry(s->valid_paths.next, struct path_info, list); list_move_tail(&pi->list, &s->valid_paths); - *repeat_count = pi->repeat_count; + percpu_counter_set(&s->repeat_count, pi->repeat_count); + set_percpu_current_path(s, pi->path); + current_path = pi->path; } + spin_unlock_irqrestore(&s->lock, flags); - return pi ? pi->path : NULL; + return current_path; } static struct path_selector_type rr_ps = { @@ -198,7 +251,7 @@ static int __init dm_rr_init(void) if (r < 0) DMERR("register failed %d", r); - DMINFO("version 1.0.0 loaded"); + DMINFO("version " RR_VERSION " loaded"); return r; } |