summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2013-02-14 12:33:30 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2013-02-14 12:33:30 +1100
commit7080014ae36e7a6067afcf12f133595708d11db8 (patch)
tree925e631a4c87073282d58fadebc85a5aad37ae2c /drivers/md
parente083aec2cf2cb3fbea0b9f68cfbc7c548c36710f (diff)
parent70cc472f9c0dbd12f4ed4e3e869bf763a59a413f (diff)
Merge branch 'device-mapper/master'
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig24
-rw-r--r--drivers/md/dm-bio-prison.c146
-rw-r--r--drivers/md/dm-bio-prison.h47
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-crypt.c6
-rw-r--r--drivers/md/dm-delay.c4
-rw-r--r--drivers/md/dm-flakey.c4
-rw-r--r--drivers/md/dm-ioctl.c116
-rw-r--r--drivers/md/dm-linear.c6
-rw-r--r--drivers/md/dm-mpath.c4
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-raid1.c4
-rw-r--r--drivers/md/dm-snap.c12
-rw-r--r--drivers/md/dm-stripe.c20
-rw-r--r--drivers/md/dm-table.c11
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin-metadata.c12
-rw-r--r--drivers/md/dm-thin.c192
-rw-r--r--drivers/md/dm-zero.c2
-rw-r--r--drivers/md/dm.c337
-rw-r--r--drivers/md/persistent-data/Kconfig2
-rw-r--r--drivers/md/persistent-data/Makefile2
-rw-r--r--drivers/md/persistent-data/dm-array.c808
-rw-r--r--drivers/md/persistent-data/dm-array.h166
-rw-r--r--drivers/md/persistent-data/dm-bitset.c163
-rw-r--r--drivers/md/persistent-data/dm-bitset.h165
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h1
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c7
-rw-r--r--drivers/md/persistent-data/dm-btree.c52
-rw-r--r--drivers/md/persistent-data/dm-btree.h15
30 files changed, 1940 insertions, 394 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 91a02eeeb319..7cdf359d6b23 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -210,7 +210,7 @@ config DM_DEBUG
config DM_BUFIO
tristate
- depends on BLK_DEV_DM && EXPERIMENTAL
+ depends on BLK_DEV_DM
---help---
This interface allows you to do buffered I/O on a device and acts
as a cache, holding recently-read blocks in memory and performing
@@ -218,7 +218,7 @@ config DM_BUFIO
config DM_BIO_PRISON
tristate
- depends on BLK_DEV_DM && EXPERIMENTAL
+ depends on BLK_DEV_DM
---help---
Some bio locking schemes used by other device-mapper targets
including thin provisioning.
@@ -251,8 +251,8 @@ config DM_SNAPSHOT
Allow volume managers to take writable snapshots of a device.
config DM_THIN_PROVISIONING
- tristate "Thin provisioning target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "Thin provisioning target"
+ depends on BLK_DEV_DM
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
@@ -302,8 +302,8 @@ config DM_RAID
in one of the available parity distribution methods.
config DM_LOG_USERSPACE
- tristate "Mirror userspace logging (EXPERIMENTAL)"
- depends on DM_MIRROR && EXPERIMENTAL && NET
+ tristate "Mirror userspace logging"
+ depends on DM_MIRROR && NET
select CONNECTOR
---help---
The userspace logging module provides a mechanism for
@@ -350,8 +350,8 @@ config DM_MULTIPATH_ST
If unsure, say N.
config DM_DELAY
- tristate "I/O delaying target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "I/O delaying target"
+ depends on BLK_DEV_DM
---help---
A target that delays reads and/or writes and can send
them to different devices. Useful for testing.
@@ -365,14 +365,14 @@ config DM_UEVENT
Generate udev events for DM events.
config DM_FLAKEY
- tristate "Flakey target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "Flakey target"
+ depends on BLK_DEV_DM
---help---
A target that intermittently fails I/O for debugging purposes.
config DM_VERITY
- tristate "Verity target support (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "Verity target support"
+ depends on BLK_DEV_DM
select CRYPTO
select CRYPTO_HASH
select DM_BUFIO
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index aefb78e3cbf9..4d670bbe963c 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -14,14 +14,6 @@
/*----------------------------------------------------------------*/
-struct dm_bio_prison_cell {
- struct hlist_node list;
- struct dm_bio_prison *prison;
- struct dm_cell_key key;
- struct bio *holder;
- struct bio_list bios;
-};
-
struct dm_bio_prison {
spinlock_t lock;
mempool_t *cell_pool;
@@ -87,6 +79,19 @@ void dm_bio_prison_destroy(struct dm_bio_prison *prison)
}
EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
+struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
+{
+ return mempool_alloc(prison->cell_pool, gfp);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
+
+void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell)
+{
+ mempool_free(cell, prison->cell_pool);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
+
static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
{
const unsigned long BIG_PRIME = 4294967291UL;
@@ -115,91 +120,86 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
return NULL;
}
-/*
- * This may block if a new cell needs allocating. You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
- *
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
- */
-int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
- struct bio *inmate, struct dm_bio_prison_cell **ref)
+static void __setup_new_cell(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *holder,
+ uint32_t hash,
+ struct dm_bio_prison_cell *cell)
{
- int r = 1;
- unsigned long flags;
- uint32_t hash = hash_key(prison, key);
- struct dm_bio_prison_cell *cell, *cell2;
-
- BUG_ON(hash > prison->nr_buckets);
-
- spin_lock_irqsave(&prison->lock, flags);
-
- cell = __search_bucket(prison->cells + hash, key);
- if (cell) {
- bio_list_add(&cell->bios, inmate);
- goto out;
- }
+ memcpy(&cell->key, key, sizeof(cell->key));
+ cell->holder = holder;
+ bio_list_init(&cell->bios);
+ hlist_add_head(&cell->list, prison->cells + hash);
+}
- /*
- * Allocate a new cell
- */
- spin_unlock_irqrestore(&prison->lock, flags);
- cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
- spin_lock_irqsave(&prison->lock, flags);
+static int __bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ uint32_t hash = hash_key(prison, key);
+ struct dm_bio_prison_cell *cell;
- /*
- * We've been unlocked, so we have to double check that
- * nobody else has inserted this cell in the meantime.
- */
cell = __search_bucket(prison->cells + hash, key);
if (cell) {
- mempool_free(cell2, prison->cell_pool);
- bio_list_add(&cell->bios, inmate);
- goto out;
+ if (inmate)
+ bio_list_add(&cell->bios, inmate);
+ *cell_result = cell;
+ return 1;
}
- /*
- * Use new cell.
- */
- cell = cell2;
-
- cell->prison = prison;
- memcpy(&cell->key, key, sizeof(cell->key));
- cell->holder = inmate;
- bio_list_init(&cell->bios);
- hlist_add_head(&cell->list, prison->cells + hash);
+ __setup_new_cell(prison, key, inmate, hash, cell_prealloc);
+ *cell_result = cell_prealloc;
+ return 0;
+}
- r = 0;
+static int bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ int r;
+ unsigned long flags;
-out:
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
spin_unlock_irqrestore(&prison->lock, flags);
- *ref = cell;
-
return r;
}
+
+int dm_bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
+}
EXPORT_SYMBOL_GPL(dm_bio_detain);
/*
* @inmates must have been initialised prior to this call
*/
-static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+static void __cell_release(struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates)
{
- struct dm_bio_prison *prison = cell->prison;
-
hlist_del(&cell->list);
if (inmates) {
- bio_list_add(inmates, cell->holder);
+ if (cell->holder)
+ bio_list_add(inmates, cell->holder);
bio_list_merge(inmates, &cell->bios);
}
-
- mempool_free(cell, prison->cell_pool);
}
-void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
+void dm_cell_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *bios)
{
unsigned long flags;
- struct dm_bio_prison *prison = cell->prison;
spin_lock_irqsave(&prison->lock, flags);
__cell_release(cell, bios);
@@ -210,20 +210,18 @@ EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
-static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates)
{
- struct dm_bio_prison *prison = cell->prison;
-
hlist_del(&cell->list);
bio_list_merge(inmates, &cell->bios);
-
- mempool_free(cell, prison->cell_pool);
}
-void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+void dm_cell_release_no_holder(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates)
{
unsigned long flags;
- struct dm_bio_prison *prison = cell->prison;
spin_lock_irqsave(&prison->lock, flags);
__cell_release_no_holder(cell, inmates);
@@ -231,9 +229,9 @@ void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
-void dm_cell_error(struct dm_bio_prison_cell *cell)
+void dm_cell_error(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell)
{
- struct dm_bio_prison *prison = cell->prison;
struct bio_list bios;
struct bio *bio;
unsigned long flags;
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 53d1a7a84e2f..981a02d3a055 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -22,7 +22,6 @@
* subsequently unlocked the bios become available.
*/
struct dm_bio_prison;
-struct dm_bio_prison_cell;
/* FIXME: this needs to be more abstract */
struct dm_cell_key {
@@ -31,21 +30,51 @@ struct dm_cell_key {
dm_block_t block;
};
+/*
+ * Treat this as opaque, only in header so callers can manage allocation
+ * themselves.
+ */
+struct dm_bio_prison_cell {
+ struct hlist_node list;
+ struct dm_cell_key key;
+ struct bio *holder;
+ struct bio_list bios;
+};
+
struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells);
void dm_bio_prison_destroy(struct dm_bio_prison *prison);
/*
- * This may block if a new cell needs allocating. You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
+ * These two functions just wrap a mempool. This is a transitory step:
+ * Eventually all bio prison clients should manage their own cell memory.
*
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
+ * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called
+ * in interrupt context or passed GFP_NOWAIT.
*/
-int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
- struct bio *inmate, struct dm_bio_prison_cell **ref);
+struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison,
+ gfp_t gfp);
+void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell);
-void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios);
-void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates);
-void dm_cell_error(struct dm_bio_prison_cell *cell);
+/*
+ * An atomic op that combines retrieving a cell, and adding a bio to it.
+ *
+ * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
+ */
+int dm_bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result);
+
+void dm_cell_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *bios);
+void dm_cell_release_no_holder(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates);
+void dm_cell_error(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell);
/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 651ca79881dd..f12fdbcc4f76 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1193,7 +1193,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
int dm_bufio_issue_flush(struct dm_bufio_client *c)
{
struct dm_io_request io_req = {
- .bi_rw = REQ_FLUSH,
+ .bi_rw = WRITE_FLUSH,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = c->dm_io,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f7369f9d8595..2f8312a043de 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1651,7 +1651,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (opt_params == 1 && opt_string &&
!strcasecmp(opt_string, "allow_discards"))
- ti->num_discard_requests = 1;
+ ti->num_discard_bios = 1;
else if (opt_params) {
ret = -EINVAL;
ti->error = "Invalid feature arguments";
@@ -1679,7 +1679,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->num_flush_requests = 1;
+ ti->num_flush_bios = 1;
ti->discard_zeroes_data_unsupported = true;
return 0;
@@ -1746,7 +1746,7 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
cc->dev->name, (unsigned long long)cc->start);
- if (ti->num_discard_requests)
+ if (ti->num_discard_bios)
DMEMIT(" 1 allow_discards");
break;
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index cc1bd048acb2..6ff7a7e390c7 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -198,8 +198,8 @@ out:
mutex_init(&dc->timer_lock);
atomic_set(&dc->may_delay, 1);
- ti->num_flush_requests = 1;
- ti->num_discard_requests = 1;
+ ti->num_flush_bios = 1;
+ ti->num_discard_bios = 1;
ti->private = dc;
return 0;
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 9721f2ffb1a2..12548de0ebc8 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->num_flush_requests = 1;
- ti->num_discard_requests = 1;
+ ti->num_flush_bios = 1;
+ ti->num_discard_bios = 1;
ti->per_bio_data_size = sizeof(struct per_bio_data);
ti->private = fc;
return 0;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0666b5d14b88..0b37697585dc 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1474,39 +1474,52 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
return r;
}
+/*
+ * The ioctl parameter block consists of two parts, a dm_ioctl struct
+ * followed by a data buffer. This flag is set if the second part,
+ * which has a variable size, is not used by the function processing
+ * the ioctl.
+ */
+#define IOCTL_FLAGS_NO_PARAMS 1
+
/*-----------------------------------------------------------------
* Implementation of open/close/ioctl on the special char
* device.
*---------------------------------------------------------------*/
-static ioctl_fn lookup_ioctl(unsigned int cmd)
+static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
{
static struct {
int cmd;
+ int flags;
ioctl_fn fn;
} _ioctls[] = {
- {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */
- {DM_REMOVE_ALL_CMD, remove_all},
- {DM_LIST_DEVICES_CMD, list_devices},
-
- {DM_DEV_CREATE_CMD, dev_create},
- {DM_DEV_REMOVE_CMD, dev_remove},
- {DM_DEV_RENAME_CMD, dev_rename},
- {DM_DEV_SUSPEND_CMD, dev_suspend},
- {DM_DEV_STATUS_CMD, dev_status},
- {DM_DEV_WAIT_CMD, dev_wait},
-
- {DM_TABLE_LOAD_CMD, table_load},
- {DM_TABLE_CLEAR_CMD, table_clear},
- {DM_TABLE_DEPS_CMD, table_deps},
- {DM_TABLE_STATUS_CMD, table_status},
-
- {DM_LIST_VERSIONS_CMD, list_versions},
-
- {DM_TARGET_MSG_CMD, target_message},
- {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
+ {DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */
+ {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all},
+ {DM_LIST_DEVICES_CMD, 0, list_devices},
+
+ {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create},
+ {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove},
+ {DM_DEV_RENAME_CMD, 0, dev_rename},
+ {DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend},
+ {DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status},
+ {DM_DEV_WAIT_CMD, 0, dev_wait},
+
+ {DM_TABLE_LOAD_CMD, 0, table_load},
+ {DM_TABLE_CLEAR_CMD, IOCTL_FLAGS_NO_PARAMS, table_clear},
+ {DM_TABLE_DEPS_CMD, 0, table_deps},
+ {DM_TABLE_STATUS_CMD, 0, table_status},
+
+ {DM_LIST_VERSIONS_CMD, 0, list_versions},
+
+ {DM_TARGET_MSG_CMD, 0, target_message},
+ {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}
};
- return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
+ if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
+ return NULL;
+
+ *ioctl_flags = _ioctls[cmd].flags;
+ return _ioctls[cmd].fn;
}
/*
@@ -1543,7 +1556,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
return r;
}
-#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */
+#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */
+#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */
#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */
static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags)
@@ -1551,66 +1565,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
if (param_flags & DM_WIPE_BUFFER)
memset(param, 0, param_size);
+ if (param_flags & DM_PARAMS_KMALLOC)
+ kfree(param);
if (param_flags & DM_PARAMS_VMALLOC)
vfree(param);
- else
- kfree(param);
}
-static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags)
+static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
+ int ioctl_flags,
+ struct dm_ioctl **param, int *param_flags)
{
- struct dm_ioctl tmp, *dmi;
+ struct dm_ioctl *dmi;
int secure_data;
+ const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data);
- if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data)))
+ if (copy_from_user(param_kernel, user, minimum_data_size))
return -EFAULT;
- if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data)))
+ if (param_kernel->data_size < minimum_data_size)
return -EINVAL;
- secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
+ secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG;
*param_flags = secure_data ? DM_WIPE_BUFFER : 0;
+ if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) {
+ dmi = param_kernel;
+ dmi->data_size = minimum_data_size;
+ goto data_copied;
+ }
+
/*
* Try to avoid low memory issues when a device is suspended.
* Use kmalloc() rather than vmalloc() when we can.
*/
dmi = NULL;
- if (tmp.data_size <= KMALLOC_MAX_SIZE)
- dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ if (param_kernel->data_size <= KMALLOC_MAX_SIZE) {
+ dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ if (dmi)
+ *param_flags |= DM_PARAMS_KMALLOC;
+ }
if (!dmi) {
- dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
- *param_flags |= DM_PARAMS_VMALLOC;
+ dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL);
+ if (dmi)
+ *param_flags |= DM_PARAMS_VMALLOC;
}
if (!dmi) {
- if (secure_data && clear_user(user, tmp.data_size))
+ if (secure_data && clear_user(user, param_kernel->data_size))
return -EFAULT;
return -ENOMEM;
}
- if (copy_from_user(dmi, user, tmp.data_size))
+ if (copy_from_user(dmi, user, param_kernel->data_size))
goto bad;
+data_copied:
/*
* Abort if something changed the ioctl data while it was being copied.
*/
- if (dmi->data_size != tmp.data_size) {
+ if (dmi->data_size != param_kernel->data_size) {
DMERR("rejecting ioctl: data size modified while processing parameters");
goto bad;
}
/* Wipe the user buffer so we do not return it to userspace */
- if (secure_data && clear_user(user, tmp.data_size))
+ if (secure_data && clear_user(user, param_kernel->data_size))
goto bad;
*param = dmi;
return 0;
bad:
- free_params(dmi, tmp.data_size, *param_flags);
+ free_params(dmi, param_kernel->data_size, *param_flags);
return -EFAULT;
}
@@ -1648,11 +1676,13 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
{
int r = 0;
+ int ioctl_flags;
int param_flags;
unsigned int cmd;
struct dm_ioctl *uninitialized_var(param);
ioctl_fn fn = NULL;
size_t input_param_size;
+ struct dm_ioctl param_kernel;
/* only root can play with this */
if (!capable(CAP_SYS_ADMIN))
@@ -1677,7 +1707,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
if (cmd == DM_VERSION_CMD)
return 0;
- fn = lookup_ioctl(cmd);
+ fn = lookup_ioctl(cmd, &ioctl_flags);
if (!fn) {
DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
return -ENOTTY;
@@ -1686,7 +1716,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
/*
* Copy the parameters into kernel space.
*/
- r = copy_params(user, &param, &param_flags);
+ r = copy_params(user, &param_kernel, ioctl_flags, &param, &param_flags);
if (r)
return r;
@@ -1699,6 +1729,10 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
param->data_size = sizeof(*param);
r = fn(param, input_param_size);
+ if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) &&
+ unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
+ DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd);
+
/*
* Copy the results back to userland.
*/
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 328cad5617ab..dd94a9866ba7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->num_flush_requests = 1;
- ti->num_discard_requests = 1;
- ti->num_write_same_requests = 1;
+ ti->num_flush_bios = 1;
+ ti->num_discard_bios = 1;
+ ti->num_write_same_bios = 1;
ti->private = lc;
return 0;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 573bd04591bf..9522f47d4f2b 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
goto bad;
}
- ti->num_flush_requests = 1;
- ti->num_discard_requests = 1;
+ ti->num_flush_bios = 1;
+ ti->num_discard_bios = 1;
return 0;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 9e58dbd8d8cb..a3e115b62186 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
- ti->num_flush_requests = 1;
+ ti->num_flush_bios = 1;
mutex_lock(&rs->md.reconfig_mutex);
ret = md_run(&rs->md);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index fa519185ebba..d6cdeef8c805 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1072,8 +1072,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto err_free_context;
- ti->num_flush_requests = 1;
- ti->num_discard_requests = 1;
+ ti->num_flush_bios = 1;
+ ti->num_discard_bios = 1;
ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
ti->discard_zeroes_data_unsupported = true;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 59fc18ae52c2..9fe53a360b4e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1038,7 +1038,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
int i;
int r = -EINVAL;
char *origin_path, *cow_path;
- unsigned args_used, num_flush_requests = 1;
+ unsigned args_used, num_flush_bios = 1;
fmode_t origin_mode = FMODE_READ;
if (argc != 4) {
@@ -1048,7 +1048,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
if (dm_target_is_snapshot_merge(ti)) {
- num_flush_requests = 2;
+ num_flush_bios = 2;
origin_mode = FMODE_WRITE;
}
@@ -1128,7 +1128,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock);
ti->private = s;
- ti->num_flush_requests = num_flush_requests;
+ ti->num_flush_bios = num_flush_bios;
ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk);
/* Add snapshot to the list of snapshots for this origin */
@@ -1692,7 +1692,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
init_tracked_chunk(bio);
if (bio->bi_rw & REQ_FLUSH) {
- if (!dm_bio_get_target_request_nr(bio))
+ if (!dm_bio_get_target_bio_nr(bio))
bio->bi_bdev = s->origin->bdev;
else
bio->bi_bdev = s->cow->bdev;
@@ -2105,7 +2105,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = dev;
- ti->num_flush_requests = 1;
+ ti->num_flush_bios = 1;
return 0;
}
@@ -2307,3 +2307,5 @@ module_exit(dm_snapshot_exit);
MODULE_DESCRIPTION(DM_NAME " snapshot target");
MODULE_AUTHOR("Joe Thornber");
MODULE_LICENSE("GPL");
+MODULE_ALIAS("dm-snapshot-origin");
+MODULE_ALIAS("dm-snapshot-merge");
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c89cde86d400..afc547d63b03 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
return r;
- ti->num_flush_requests = stripes;
- ti->num_discard_requests = stripes;
- ti->num_write_same_requests = stripes;
+ ti->num_flush_bios = stripes;
+ ti->num_discard_bios = stripes;
+ ti->num_write_same_bios = stripes;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
@@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
{
struct stripe_c *sc = ti->private;
uint32_t stripe;
- unsigned target_request_nr;
+ unsigned target_bio_nr;
if (bio->bi_rw & REQ_FLUSH) {
- target_request_nr = dm_bio_get_target_request_nr(bio);
- BUG_ON(target_request_nr >= sc->stripes);
- bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
+ target_bio_nr = dm_bio_get_target_bio_nr(bio);
+ BUG_ON(target_bio_nr >= sc->stripes);
+ bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev;
return DM_MAPIO_REMAPPED;
}
if (unlikely(bio->bi_rw & REQ_DISCARD) ||
unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
- target_request_nr = dm_bio_get_target_request_nr(bio);
- BUG_ON(target_request_nr >= sc->stripes);
- return stripe_map_range(sc, bio, target_request_nr);
+ target_bio_nr = dm_bio_get_target_bio_nr(bio);
+ BUG_ON(target_bio_nr >= sc->stripes);
+ return stripe_map_range(sc, bio, target_bio_nr);
}
stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index daf25d0890b3..e50dad0c65f4 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -217,7 +217,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
if (alloc_targets(t, num_targets)) {
kfree(t);
- t = NULL;
return -ENOMEM;
}
@@ -823,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
- if (!tgt->num_discard_requests && tgt->discards_supported)
- DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
+ if (!tgt->num_discard_bios && tgt->discards_supported)
+ DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);
return 0;
@@ -1360,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
- if (!ti->num_flush_requests)
+ if (!ti->num_flush_bios)
continue;
if (ti->flush_supported)
@@ -1439,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t)
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
- if (!ti->num_write_same_requests)
+ if (!ti->num_write_same_bios)
return false;
if (!ti->type->iterate_devices ||
@@ -1657,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t)
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
- if (!ti->num_discard_requests)
+ if (!ti->num_discard_bios)
continue;
if (ti->discards_supported)
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 617d21a77256..37ba5db71cd9 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
/*
* Return error for discards instead of -EOPNOTSUPP
*/
- tt->num_discard_requests = 1;
+ tt->num_discard_bios = 1;
return 0;
}
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 4d6e85367b84..00cee02f8fc9 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -280,7 +280,7 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
*t = v & ((1 << 24) - 1);
}
-static void data_block_inc(void *context, void *value_le)
+static void data_block_inc(void *context, const void *value_le)
{
struct dm_space_map *sm = context;
__le64 v_le;
@@ -292,7 +292,7 @@ static void data_block_inc(void *context, void *value_le)
dm_sm_inc_block(sm, b);
}
-static void data_block_dec(void *context, void *value_le)
+static void data_block_dec(void *context, const void *value_le)
{
struct dm_space_map *sm = context;
__le64 v_le;
@@ -304,7 +304,7 @@ static void data_block_dec(void *context, void *value_le)
dm_sm_dec_block(sm, b);
}
-static int data_block_equal(void *context, void *value1_le, void *value2_le)
+static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
{
__le64 v1_le, v2_le;
uint64_t b1, b2;
@@ -318,7 +318,7 @@ static int data_block_equal(void *context, void *value1_le, void *value2_le)
return b1 == b2;
}
-static void subtree_inc(void *context, void *value)
+static void subtree_inc(void *context, const void *value)
{
struct dm_btree_info *info = context;
__le64 root_le;
@@ -329,7 +329,7 @@ static void subtree_inc(void *context, void *value)
dm_tm_inc(info->tm, root);
}
-static void subtree_dec(void *context, void *value)
+static void subtree_dec(void *context, const void *value)
{
struct dm_btree_info *info = context;
__le64 root_le;
@@ -341,7 +341,7 @@ static void subtree_dec(void *context, void *value)
DMERR("btree delete failed\n");
}
-static int subtree_equal(void *context, void *value1_le, void *value2_le)
+static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
{
__le64 v1_le, v2_le;
memcpy(&v1_le, value1_le, sizeof(v1_le));
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 5409607d4875..c15d13cb0ad7 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -227,6 +227,78 @@ struct thin_c {
/*----------------------------------------------------------------*/
/*
+ * wake_worker() is used when new work is queued and when pool_resume is
+ * ready to continue deferred IO processing.
+ */
+static void wake_worker(struct pool *pool)
+{
+ queue_work(pool->wq, &pool->worker);
+}
+
+/*----------------------------------------------------------------*/
+
+static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio,
+ struct dm_bio_prison_cell **cell_result)
+{
+ int r;
+ struct dm_bio_prison_cell *cell_prealloc;
+
+ /*
+ * Allocate a cell from the prison's mempool.
+ * This might block but it can't fail.
+ */
+ cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
+
+ r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
+ if (r)
+ /*
+ * We reused an old cell; we can get rid of
+ * the new one.
+ */
+ dm_bio_prison_free_cell(pool->prison, cell_prealloc);
+
+ return r;
+}
+
+static void cell_release(struct pool *pool,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *bios)
+{
+ dm_cell_release(pool->prison, cell, bios);
+ dm_bio_prison_free_cell(pool->prison, cell);
+}
+
+static void cell_release_no_holder(struct pool *pool,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *bios)
+{
+ dm_cell_release_no_holder(pool->prison, cell, bios);
+ dm_bio_prison_free_cell(pool->prison, cell);
+}
+
+static void cell_defer_no_holder_no_free(struct thin_c *tc,
+ struct dm_bio_prison_cell *cell)
+{
+ struct pool *pool = tc->pool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios);
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ wake_worker(pool);
+}
+
+static void cell_error(struct pool *pool,
+ struct dm_bio_prison_cell *cell)
+{
+ dm_cell_error(pool->prison, cell);
+ dm_bio_prison_free_cell(pool->prison, cell);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
* A global list of pools that uses a struct mapped_device as a key.
*/
static struct dm_thin_pool_table {
@@ -330,14 +402,20 @@ static void requeue_io(struct thin_c *tc)
* target.
*/
+static bool block_size_is_power_of_two(struct pool *pool)
+{
+ return pool->sectors_per_block_shift >= 0;
+}
+
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
{
+ struct pool *pool = tc->pool;
sector_t block_nr = bio->bi_sector;
- if (tc->pool->sectors_per_block_shift < 0)
- (void) sector_div(block_nr, tc->pool->sectors_per_block);
+ if (block_size_is_power_of_two(pool))
+ block_nr >>= pool->sectors_per_block_shift;
else
- block_nr >>= tc->pool->sectors_per_block_shift;
+ (void) sector_div(block_nr, pool->sectors_per_block);
return block_nr;
}
@@ -348,12 +426,12 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
sector_t bi_sector = bio->bi_sector;
bio->bi_bdev = tc->pool_dev->bdev;
- if (tc->pool->sectors_per_block_shift < 0)
- bio->bi_sector = (block * pool->sectors_per_block) +
- sector_div(bi_sector, pool->sectors_per_block);
- else
+ if (block_size_is_power_of_two(pool))
bio->bi_sector = (block << pool->sectors_per_block_shift) |
(bi_sector & (pool->sectors_per_block - 1));
+ else
+ bio->bi_sector = (block * pool->sectors_per_block) +
+ sector_div(bi_sector, pool->sectors_per_block);
}
static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -420,15 +498,6 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
issue(tc, bio);
}
-/*
- * wake_worker() is used when new work is queued and when pool_resume is
- * ready to continue deferred IO processing.
- */
-static void wake_worker(struct pool *pool)
-{
- queue_work(pool->wq, &pool->worker);
-}
-
/*----------------------------------------------------------------*/
/*
@@ -515,14 +584,14 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
unsigned long flags;
spin_lock_irqsave(&pool->lock, flags);
- dm_cell_release(cell, &pool->deferred_bios);
+ cell_release(pool, cell, &pool->deferred_bios);
spin_unlock_irqrestore(&tc->pool->lock, flags);
wake_worker(pool);
}
/*
- * Same as cell_defer except it omits the original holder of the cell.
+ * Same as cell_defer above, except it omits the original holder of the cell.
*/
static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
{
@@ -530,7 +599,7 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
unsigned long flags;
spin_lock_irqsave(&pool->lock, flags);
- dm_cell_release_no_holder(cell, &pool->deferred_bios);
+ cell_release_no_holder(pool, cell, &pool->deferred_bios);
spin_unlock_irqrestore(&pool->lock, flags);
wake_worker(pool);
@@ -540,13 +609,15 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
{
if (m->bio)
m->bio->bi_end_io = m->saved_bi_end_io;
- dm_cell_error(m->cell);
+ cell_error(m->tc->pool, m->cell);
list_del(&m->list);
mempool_free(m, m->tc->pool->mapping_pool);
}
+
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
+ struct pool *pool = tc->pool;
struct bio *bio;
int r;
@@ -555,7 +626,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
bio->bi_end_io = m->saved_bi_end_io;
if (m->err) {
- dm_cell_error(m->cell);
+ cell_error(pool, m->cell);
goto out;
}
@@ -567,7 +638,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
if (r) {
DMERR_LIMIT("dm_thin_insert_block() failed");
- dm_cell_error(m->cell);
+ cell_error(pool, m->cell);
goto out;
}
@@ -585,7 +656,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
out:
list_del(&m->list);
- mempool_free(m, tc->pool->mapping_pool);
+ mempool_free(m, pool->mapping_pool);
}
static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
@@ -736,7 +807,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
if (r < 0) {
mempool_free(m, pool->mapping_pool);
DMERR_LIMIT("dm_kcopyd_copy() failed");
- dm_cell_error(cell);
+ cell_error(pool, cell);
}
}
}
@@ -802,7 +873,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
if (r < 0) {
mempool_free(m, pool->mapping_pool);
DMERR_LIMIT("dm_kcopyd_zero() failed");
- dm_cell_error(cell);
+ cell_error(pool, cell);
}
}
}
@@ -908,13 +979,13 @@ static void retry_on_resume(struct bio *bio)
spin_unlock_irqrestore(&pool->lock, flags);
}
-static void no_space(struct dm_bio_prison_cell *cell)
+static void no_space(struct pool *pool, struct dm_bio_prison_cell *cell)
{
struct bio *bio;
struct bio_list bios;
bio_list_init(&bios);
- dm_cell_release(cell, &bios);
+ cell_release(pool, cell, &bios);
while ((bio = bio_list_pop(&bios)))
retry_on_resume(bio);
@@ -932,7 +1003,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
struct dm_thin_new_mapping *m;
build_virtual_key(tc->td, block, &key);
- if (dm_bio_detain(tc->pool->prison, &key, bio, &cell))
+ if (bio_detain(tc->pool, &key, bio, &cell))
return;
r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
@@ -944,7 +1015,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
* on this block.
*/
build_data_key(tc->td, lookup_result.block, &key2);
- if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
+ if (bio_detain(tc->pool, &key2, bio, &cell2)) {
cell_defer_no_holder(tc, cell);
break;
}
@@ -1020,13 +1091,13 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
break;
case -ENOSPC:
- no_space(cell);
+ no_space(tc->pool, cell);
break;
default:
DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
__func__, r);
- dm_cell_error(cell);
+ cell_error(tc->pool, cell);
break;
}
}
@@ -1044,7 +1115,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
* of being broken so we have nothing further to do here.
*/
build_data_key(tc->td, lookup_result->block, &key);
- if (dm_bio_detain(pool->prison, &key, bio, &cell))
+ if (bio_detain(pool, &key, bio, &cell))
return;
if (bio_data_dir(bio) == WRITE && bio->bi_size)
@@ -1065,12 +1136,13 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
{
int r;
dm_block_t data_block;
+ struct pool *pool = tc->pool;
/*
* Remap empty bios (flushes) immediately, without provisioning.
*/
if (!bio->bi_size) {
- inc_all_io_entry(tc->pool, bio);
+ inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
remap_and_issue(tc, bio, 0);
@@ -1097,14 +1169,14 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
break;
case -ENOSPC:
- no_space(cell);
+ no_space(pool, cell);
break;
default:
DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
__func__, r);
- set_pool_mode(tc->pool, PM_READ_ONLY);
- dm_cell_error(cell);
+ set_pool_mode(pool, PM_READ_ONLY);
+ cell_error(pool, cell);
break;
}
}
@@ -1112,6 +1184,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
static void process_bio(struct thin_c *tc, struct bio *bio)
{
int r;
+ struct pool *pool = tc->pool;
dm_block_t block = get_bio_block(tc, bio);
struct dm_bio_prison_cell *cell;
struct dm_cell_key key;
@@ -1122,7 +1195,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
* being provisioned so we have nothing further to do here.
*/
build_virtual_key(tc->td, block, &key);
- if (dm_bio_detain(tc->pool->prison, &key, bio, &cell))
+ if (bio_detain(pool, &key, bio, &cell))
return;
r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
@@ -1130,9 +1203,9 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
case 0:
if (lookup_result.shared) {
process_shared_bio(tc, bio, block, &lookup_result);
- cell_defer_no_holder(tc, cell);
+ cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */
} else {
- inc_all_io_entry(tc->pool, bio);
+ inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
remap_and_issue(tc, bio, lookup_result.block);
@@ -1141,7 +1214,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
case -ENODATA:
if (bio_data_dir(bio) == READ && tc->origin_dev) {
- inc_all_io_entry(tc->pool, bio);
+ inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
remap_to_origin_and_issue(tc, bio);
@@ -1378,7 +1451,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
dm_block_t block = get_bio_block(tc, bio);
struct dm_thin_device *td = tc->td;
struct dm_thin_lookup_result result;
- struct dm_bio_prison_cell *cell1, *cell2;
+ struct dm_bio_prison_cell cell1, cell2;
+ struct dm_bio_prison_cell *cell_result;
struct dm_cell_key key;
thin_hook_bio(tc, bio);
@@ -1420,18 +1494,18 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
}
build_virtual_key(tc->td, block, &key);
- if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1))
+ if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result))
return DM_MAPIO_SUBMITTED;
build_data_key(tc->td, result.block, &key);
- if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) {
- cell_defer_no_holder(tc, cell1);
+ if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) {
+ cell_defer_no_holder_no_free(tc, &cell1);
return DM_MAPIO_SUBMITTED;
}
inc_all_io_entry(tc->pool, bio);
- cell_defer_no_holder(tc, cell2);
- cell_defer_no_holder(tc, cell1);
+ cell_defer_no_holder_no_free(tc, &cell2);
+ cell_defer_no_holder_no_free(tc, &cell1);
remap(tc, bio, result.block);
return DM_MAPIO_REMAPPED;
@@ -1938,7 +2012,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
pt->data_dev = data_dev;
pt->low_water_blocks = low_water_blocks;
pt->adjusted_pf = pt->requested_pf = pf;
- ti->num_flush_requests = 1;
+ ti->num_flush_bios = 1;
/*
* Only need to enable discards if the pool should pass
@@ -1946,7 +2020,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* processing will cause mappings to be removed from the btree.
*/
if (pf.discard_enabled && pf.discard_passdown) {
- ti->num_discard_requests = 1;
+ ti->num_discard_bios = 1;
/*
* Setting 'discards_supported' circumvents the normal
@@ -2414,11 +2488,6 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
}
-static bool block_size_is_power_of_two(struct pool *pool)
-{
- return pool->sectors_per_block_shift >= 0;
-}
-
static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
{
struct pool *pool = pt->pool;
@@ -2432,15 +2501,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
if (pt->adjusted_pf.discard_passdown) {
data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
limits->discard_granularity = data_limits->discard_granularity;
- } else if (block_size_is_power_of_two(pool))
+ } else
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
- else
- /*
- * Use largest power of 2 that is a factor of sectors_per_block
- * but at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS.
- */
- limits->discard_granularity = max(1 << (ffs(pool->sectors_per_block) - 1),
- DATA_DEV_BLOCK_SIZE_MIN_SECTORS) << SECTOR_SHIFT;
}
static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2588,17 +2650,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (r)
goto bad_thin_open;
- ti->num_flush_requests = 1;
+ ti->num_flush_bios = 1;
ti->flush_supported = true;
ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
/* In case the pool supports discards, pass them on. */
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
- ti->num_discard_requests = 1;
+ ti->num_discard_bios = 1;
ti->discard_zeroes_data_unsupported = true;
- /* Discard requests must be split on a block boundary */
- ti->split_discard_requests = true;
+ /* Discard bios must be split on a block boundary */
+ ti->split_discard_bios = true;
}
dm_put(pool_md);
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 69a5c3b3b340..c99003e0d47a 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
/*
* Silently drop discards, avoiding -EOPNOTSUPP.
*/
- ti->num_discard_requests = 1;
+ ti->num_discard_bios = 1;
return 0;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3dd2df2f9f91..5b213550af0b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -986,12 +986,13 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
}
EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
-static void __map_bio(struct dm_target *ti, struct dm_target_io *tio)
+static void __map_bio(struct dm_target_io *tio)
{
int r;
sector_t sector;
struct mapped_device *md;
struct bio *clone = &tio->clone;
+ struct dm_target *ti = tio->ti;
clone->bi_end_io = clone_endio;
clone->bi_private = tio;
@@ -1032,32 +1033,54 @@ struct clone_info {
unsigned short idx;
};
+static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len)
+{
+ bio->bi_sector = sector;
+ bio->bi_size = to_bytes(len);
+}
+
+static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count)
+{
+ bio->bi_idx = idx;
+ bio->bi_vcnt = idx + bv_count;
+ bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+}
+
+static void clone_bio_integrity(struct bio *bio, struct bio *clone,
+ unsigned short idx, unsigned len, unsigned offset,
+ unsigned trim)
+{
+ if (!bio_integrity(bio))
+ return;
+
+ bio_integrity_clone(clone, bio, GFP_NOIO);
+
+ if (trim)
+ bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len);
+}
+
/*
* Creates a little bio that just does part of a bvec.
*/
-static void split_bvec(struct dm_target_io *tio, struct bio *bio,
- sector_t sector, unsigned short idx, unsigned int offset,
- unsigned int len, struct bio_set *bs)
+static void clone_split_bio(struct dm_target_io *tio, struct bio *bio,
+ sector_t sector, unsigned short idx,
+ unsigned offset, unsigned len)
{
struct bio *clone = &tio->clone;
struct bio_vec *bv = bio->bi_io_vec + idx;
*clone->bi_io_vec = *bv;
- clone->bi_sector = sector;
+ bio_setup_sector(clone, sector, len);
+
clone->bi_bdev = bio->bi_bdev;
clone->bi_rw = bio->bi_rw;
clone->bi_vcnt = 1;
- clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
clone->bi_io_vec->bv_len = clone->bi_size;
clone->bi_flags |= 1 << BIO_CLONED;
- if (bio_integrity(bio)) {
- bio_integrity_clone(clone, bio, GFP_NOIO);
- bio_integrity_trim(clone,
- bio_sector_offset(bio, idx, offset), len);
- }
+ clone_bio_integrity(bio, clone, idx, len, offset, 1);
}
/*
@@ -1065,29 +1088,23 @@ static void split_bvec(struct dm_target_io *tio, struct bio *bio,
*/
static void clone_bio(struct dm_target_io *tio, struct bio *bio,
sector_t sector, unsigned short idx,
- unsigned short bv_count, unsigned int len,
- struct bio_set *bs)
+ unsigned short bv_count, unsigned len)
{
struct bio *clone = &tio->clone;
+ unsigned trim = 0;
__bio_clone(clone, bio);
- clone->bi_sector = sector;
- clone->bi_idx = idx;
- clone->bi_vcnt = idx + bv_count;
- clone->bi_size = to_bytes(len);
- clone->bi_flags &= ~(1 << BIO_SEG_VALID);
-
- if (bio_integrity(bio)) {
- bio_integrity_clone(clone, bio, GFP_NOIO);
-
- if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
- bio_integrity_trim(clone,
- bio_sector_offset(bio, idx, 0), len);
- }
+ bio_setup_sector(clone, sector, len);
+ bio_setup_bv(clone, idx, bv_count);
+
+ if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
+ trim = 1;
+ clone_bio_integrity(bio, clone, idx, len, 0, trim);
}
static struct dm_target_io *alloc_tio(struct clone_info *ci,
- struct dm_target *ti, int nr_iovecs)
+ struct dm_target *ti, int nr_iovecs,
+ unsigned target_bio_nr)
{
struct dm_target_io *tio;
struct bio *clone;
@@ -1098,96 +1115,104 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
tio->io = ci->io;
tio->ti = ti;
memset(&tio->info, 0, sizeof(tio->info));
- tio->target_request_nr = 0;
+ tio->target_bio_nr = target_bio_nr;
return tio;
}
-static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
- unsigned request_nr, sector_t len)
+static void __clone_and_map_simple_bio(struct clone_info *ci,
+ struct dm_target *ti,
+ unsigned target_bio_nr, sector_t len)
{
- struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs);
+ struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
struct bio *clone = &tio->clone;
- tio->target_request_nr = request_nr;
-
/*
* Discard requests require the bio's inline iovecs be initialized.
* ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
* and discard, so no need for concern about wasted bvec allocations.
*/
-
__bio_clone(clone, ci->bio);
- if (len) {
- clone->bi_sector = ci->sector;
- clone->bi_size = to_bytes(len);
- }
+ if (len)
+ bio_setup_sector(clone, ci->sector, len);
- __map_bio(ti, tio);
+ __map_bio(tio);
}
-static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
- unsigned num_requests, sector_t len)
+static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
+ unsigned num_bios, sector_t len)
{
- unsigned request_nr;
+ unsigned target_bio_nr;
- for (request_nr = 0; request_nr < num_requests; request_nr++)
- __issue_target_request(ci, ti, request_nr, len);
+ for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++)
+ __clone_and_map_simple_bio(ci, ti, target_bio_nr, len);
}
-static int __clone_and_map_empty_flush(struct clone_info *ci)
+static int __send_empty_flush(struct clone_info *ci)
{
unsigned target_nr = 0;
struct dm_target *ti;
BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
- __issue_target_requests(ci, ti, ti->num_flush_requests, 0);
+ __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0);
return 0;
}
-/*
- * Perform all io with a single clone.
- */
-static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
+static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
+ sector_t sector, int nr_iovecs,
+ unsigned short idx, unsigned short bv_count,
+ unsigned offset, unsigned len,
+ unsigned split_bvec)
{
struct bio *bio = ci->bio;
struct dm_target_io *tio;
+ unsigned target_bio_nr;
+ unsigned num_target_bios = 1;
- tio = alloc_tio(ci, ti, bio->bi_max_vecs);
- clone_bio(tio, bio, ci->sector, ci->idx, bio->bi_vcnt - ci->idx,
- ci->sector_count, ci->md->bs);
- __map_bio(ti, tio);
- ci->sector_count = 0;
+ /*
+ * Does the target want to receive duplicate copies of the bio?
+ */
+ if (bio_data_dir(bio) == WRITE && ti->num_write_bios)
+ num_target_bios = ti->num_write_bios(ti, bio);
+
+ for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
+ tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr);
+ if (split_bvec)
+ clone_split_bio(tio, bio, sector, idx, offset, len);
+ else
+ clone_bio(tio, bio, sector, idx, bv_count, len);
+ __map_bio(tio);
+ }
}
-typedef unsigned (*get_num_requests_fn)(struct dm_target *ti);
+typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
-static unsigned get_num_discard_requests(struct dm_target *ti)
+static unsigned get_num_discard_bios(struct dm_target *ti)
{
- return ti->num_discard_requests;
+ return ti->num_discard_bios;
}
-static unsigned get_num_write_same_requests(struct dm_target *ti)
+static unsigned get_num_write_same_bios(struct dm_target *ti)
{
- return ti->num_write_same_requests;
+ return ti->num_write_same_bios;
}
typedef bool (*is_split_required_fn)(struct dm_target *ti);
static bool is_split_required_for_discard(struct dm_target *ti)
{
- return ti->split_discard_requests;
+ return ti->split_discard_bios;
}
-static int __clone_and_map_changing_extent_only(struct clone_info *ci,
- get_num_requests_fn get_num_requests,
- is_split_required_fn is_split_required)
+static int __send_changing_extent_only(struct clone_info *ci,
+ get_num_bios_fn get_num_bios,
+ is_split_required_fn is_split_required)
{
struct dm_target *ti;
sector_t len;
- unsigned num_requests;
+ unsigned num_bios;
do {
ti = dm_table_find_target(ci->map, ci->sector);
@@ -1200,8 +1225,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
* reconfiguration might also have changed that since the
* check was performed.
*/
- num_requests = get_num_requests ? get_num_requests(ti) : 0;
- if (!num_requests)
+ num_bios = get_num_bios ? get_num_bios(ti) : 0;
+ if (!num_bios)
return -EOPNOTSUPP;
if (is_split_required && !is_split_required(ti))
@@ -1209,7 +1234,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
else
len = min(ci->sector_count, max_io_len(ci->sector, ti));
- __issue_target_requests(ci, ti, num_requests, len);
+ __send_duplicate_bios(ci, ti, num_bios, len);
ci->sector += len;
} while (ci->sector_count -= len);
@@ -1217,108 +1242,129 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
return 0;
}
-static int __clone_and_map_discard(struct clone_info *ci)
+static int __send_discard(struct clone_info *ci)
{
- return __clone_and_map_changing_extent_only(ci, get_num_discard_requests,
- is_split_required_for_discard);
+ return __send_changing_extent_only(ci, get_num_discard_bios,
+ is_split_required_for_discard);
}
-static int __clone_and_map_write_same(struct clone_info *ci)
+static int __send_write_same(struct clone_info *ci)
{
- return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL);
+ return __send_changing_extent_only(ci, get_num_write_same_bios, NULL);
}
-static int __clone_and_map(struct clone_info *ci)
+/*
+ * Find maximum number of sectors / bvecs we can process with a single bio.
+ */
+static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx)
{
struct bio *bio = ci->bio;
- struct dm_target *ti;
- sector_t len = 0, max;
- struct dm_target_io *tio;
+ sector_t bv_len, total_len = 0;
- if (unlikely(bio->bi_rw & REQ_DISCARD))
- return __clone_and_map_discard(ci);
- else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
- return __clone_and_map_write_same(ci);
+ for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) {
+ bv_len = to_sector(bio->bi_io_vec[*idx].bv_len);
- ti = dm_table_find_target(ci->map, ci->sector);
- if (!dm_target_is_valid(ti))
- return -EIO;
-
- max = max_io_len(ci->sector, ti);
+ if (bv_len > max)
+ break;
- if (ci->sector_count <= max) {
- /*
- * Optimise for the simple case where we can do all of
- * the remaining io with a single clone.
- */
- __clone_and_map_simple(ci, ti);
+ max -= bv_len;
+ total_len += bv_len;
+ }
- } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
- /*
- * There are some bvecs that don't span targets.
- * Do as many of these as possible.
- */
- int i;
- sector_t remaining = max;
- sector_t bv_len;
+ return total_len;
+}
- for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
- bv_len = to_sector(bio->bi_io_vec[i].bv_len);
+static int __split_bvec_across_targets(struct clone_info *ci,
+ struct dm_target *ti, sector_t max)
+{
+ struct bio *bio = ci->bio;
+ struct bio_vec *bv = bio->bi_io_vec + ci->idx;
+ sector_t remaining = to_sector(bv->bv_len);
+ unsigned offset = 0;
+ sector_t len;
- if (bv_len > remaining)
- break;
+ do {
+ if (offset) {
+ ti = dm_table_find_target(ci->map, ci->sector);
+ if (!dm_target_is_valid(ti))
+ return -EIO;
- remaining -= bv_len;
- len += bv_len;
+ max = max_io_len(ci->sector, ti);
}
- tio = alloc_tio(ci, ti, bio->bi_max_vecs);
- clone_bio(tio, bio, ci->sector, ci->idx, i - ci->idx, len,
- ci->md->bs);
- __map_bio(ti, tio);
+ len = min(remaining, max);
+
+ __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0,
+ bv->bv_offset + offset, len, 1);
ci->sector += len;
ci->sector_count -= len;
- ci->idx = i;
+ offset += to_bytes(len);
+ } while (remaining -= len);
- } else {
- /*
- * Handle a bvec that must be split between two or more targets.
- */
- struct bio_vec *bv = bio->bi_io_vec + ci->idx;
- sector_t remaining = to_sector(bv->bv_len);
- unsigned int offset = 0;
+ ci->idx++;
- do {
- if (offset) {
- ti = dm_table_find_target(ci->map, ci->sector);
- if (!dm_target_is_valid(ti))
- return -EIO;
+ return 0;
+}
- max = max_io_len(ci->sector, ti);
- }
+/*
+ * Select the correct strategy for processing a non-flush bio.
+ */
+static int __split_and_process_non_flush(struct clone_info *ci)
+{
+ struct bio *bio = ci->bio;
+ struct dm_target *ti;
+ sector_t len, max;
+ int idx;
- len = min(remaining, max);
+ if (unlikely(bio->bi_rw & REQ_DISCARD))
+ return __send_discard(ci);
+ else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
+ return __send_write_same(ci);
- tio = alloc_tio(ci, ti, 1);
- split_bvec(tio, bio, ci->sector, ci->idx,
- bv->bv_offset + offset, len, ci->md->bs);
+ ti = dm_table_find_target(ci->map, ci->sector);
+ if (!dm_target_is_valid(ti))
+ return -EIO;
- __map_bio(ti, tio);
+ max = max_io_len(ci->sector, ti);
- ci->sector += len;
- ci->sector_count -= len;
- offset += to_bytes(len);
- } while (remaining -= len);
+ /*
+ * Optimise for the simple case where we can do all of
+ * the remaining io with a single clone.
+ */
+ if (ci->sector_count <= max) {
+ __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
+ ci->idx, bio->bi_vcnt - ci->idx, 0,
+ ci->sector_count, 0);
+ ci->sector_count = 0;
+ return 0;
+ }
- ci->idx++;
+ /*
+ * There are some bvecs that don't span targets.
+ * Do as many of these as possible.
+ */
+ if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
+ len = __len_within_target(ci, max, &idx);
+
+ __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
+ ci->idx, idx - ci->idx, 0, len, 0);
+
+ ci->sector += len;
+ ci->sector_count -= len;
+ ci->idx = idx;
+
+ return 0;
}
- return 0;
+ /*
+ * Handle a bvec that must be split between two or more targets.
+ */
+ return __split_bvec_across_targets(ci, ti, max);
}
/*
- * Split the bio into several clones and submit it to targets.
+ * Entry point to split a bio into clones and submit them to the targets.
*/
static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{
@@ -1342,16 +1388,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
ci.idx = bio->bi_idx;
start_io_acct(ci.io);
+
if (bio->bi_rw & REQ_FLUSH) {
ci.bio = &ci.md->flush_bio;
ci.sector_count = 0;
- error = __clone_and_map_empty_flush(&ci);
+ error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
} else {
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
while (ci.sector_count && !error)
- error = __clone_and_map(&ci);
+ error = __split_and_process_non_flush(&ci);
}
/* drop the extra reference count */
@@ -2420,7 +2467,7 @@ static void dm_queue_flush(struct mapped_device *md)
*/
struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
{
- struct dm_table *live_map, *map = ERR_PTR(-EINVAL);
+ struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
struct queue_limits limits;
int r;
@@ -2443,10 +2490,12 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
dm_table_put(live_map);
}
- r = dm_calculate_queue_limits(table, &limits);
- if (r) {
- map = ERR_PTR(r);
- goto out;
+ if (!live_map) {
+ r = dm_calculate_queue_limits(table, &limits);
+ if (r) {
+ map = ERR_PTR(r);
+ goto out;
+ }
}
map = __bind(md, table, &limits);
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index ceb359050a59..19b268795415 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -1,6 +1,6 @@
config DM_PERSISTENT_DATA
tristate
- depends on BLK_DEV_DM && EXPERIMENTAL
+ depends on BLK_DEV_DM
select LIBCRC32C
select DM_BUFIO
---help---
diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile
index d8e7cb767c1e..ff528792c358 100644
--- a/drivers/md/persistent-data/Makefile
+++ b/drivers/md/persistent-data/Makefile
@@ -1,5 +1,7 @@
obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
dm-persistent-data-objs := \
+ dm-array.o \
+ dm-bitset.o \
dm-block-manager.o \
dm-space-map-common.o \
dm-space-map-disk.o \
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
new file mode 100644
index 000000000000..172147eb1d40
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-array.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "array"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The array is implemented as a fully populated btree, which points to
+ * blocks that contain the packed values. This is more space efficient
+ * than just using a btree since we don't store 1 key per value.
+ */
+struct array_block {
+ __le32 csum;
+ __le32 max_entries;
+ __le32 nr_entries;
+ __le32 value_size;
+ __le64 blocknr; /* Block this node is supposed to live in. */
+} __packed;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Validator methods. As usual we calculate a checksum, and also write the
+ * block location into the header (paranoia about ssds remapping areas by
+ * mistake).
+ */
+#define CSUM_XOR 595846735
+
+static void array_block_prepare_for_write(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t size_of_block)
+{
+ struct array_block *bh_le = dm_block_data(b);
+
+ bh_le->blocknr = cpu_to_le64(dm_block_location(b));
+ bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+ size_of_block - sizeof(__le32),
+ CSUM_XOR));
+}
+
+static int array_block_check(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t size_of_block)
+{
+ struct array_block *bh_le = dm_block_data(b);
+ __le32 csum_disk;
+
+ if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
+ DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
+ (unsigned long long) le64_to_cpu(bh_le->blocknr),
+ (unsigned long long) dm_block_location(b));
+ return -ENOTBLK;
+ }
+
+ csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
+ size_of_block - sizeof(__le32),
+ CSUM_XOR));
+ if (csum_disk != bh_le->csum) {
+ DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
+ (unsigned) le32_to_cpu(csum_disk),
+ (unsigned) le32_to_cpu(bh_le->csum));
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+static struct dm_block_validator array_validator = {
+ .name = "array",
+ .prepare_for_write = array_block_prepare_for_write,
+ .check = array_block_check
+};
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Functions for manipulating the array blocks.
+ */
+
+/*
+ * Returns a pointer to a value within an array block.
+ *
+ * index - The index into _this_ specific block.
+ */
+static void *element_at(struct dm_array_info *info, struct array_block *ab,
+ unsigned index)
+{
+ unsigned char *entry = (unsigned char *) (ab + 1);
+
+ entry += index * info->value_type.size;
+
+ return entry;
+}
+
+/*
+ * Utility function that calls one of the value_type methods on every value
+ * in an array block.
+ */
+static void on_entries(struct dm_array_info *info, struct array_block *ab,
+ void (*fn)(void *, const void *))
+{
+ unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
+
+ for (i = 0; i < nr_entries; i++)
+ fn(info->value_type.context, element_at(info, ab, i));
+}
+
+/*
+ * Increment every value in an array block.
+ */
+static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ if (vt->inc)
+ on_entries(info, ab, vt->inc);
+}
+
+/*
+ * Decrement every value in an array block.
+ */
+static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
+{
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ if (vt->dec)
+ on_entries(info, ab, vt->dec);
+}
+
+/*
+ * Each array block can hold this many values.
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
+{
+ return (size_of_block - sizeof(struct array_block)) / value_size;
+}
+
+/*
+ * Allocate a new array block. The caller will need to unlock block.
+ */
+static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
+ uint32_t max_entries,
+ struct dm_block **block, struct array_block **ab)
+{
+ int r;
+
+ r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
+ if (r)
+ return r;
+
+ (*ab) = dm_block_data(*block);
+ (*ab)->max_entries = cpu_to_le32(max_entries);
+ (*ab)->nr_entries = cpu_to_le32(0);
+ (*ab)->value_size = cpu_to_le32(info->value_type.size);
+
+ return 0;
+}
+
+/*
+ * Pad an array block out with a particular value. Every instance will
+ * cause an increment of the value_type. new_nr must always be more than
+ * the current number of entries.
+ */
+static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
+ const void *value, unsigned new_nr)
+{
+ unsigned i;
+ uint32_t nr_entries;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+ BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
+
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = nr_entries; i < new_nr; i++) {
+ if (vt->inc)
+ vt->inc(vt->context, value);
+ memcpy(element_at(info, ab, i), value, vt->size);
+ }
+ ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Remove some entries from the back of an array block. Every value
+ * removed will be decremented. new_nr must be <= the current number of
+ * entries.
+ */
+static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
+ unsigned new_nr)
+{
+ unsigned i;
+ uint32_t nr_entries;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
+ BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
+
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = nr_entries; i > new_nr; i--)
+ if (vt->dec)
+ vt->dec(vt->context, element_at(info, ab, i - 1));
+ ab->nr_entries = cpu_to_le32(new_nr);
+}
+
+/*
+ * Read locks a block, and coerces it to an array block. The caller must
+ * unlock 'block' when finished.
+ */
+static int get_ablock(struct dm_array_info *info, dm_block_t b,
+ struct dm_block **block, struct array_block **ab)
+{
+ int r;
+
+ r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
+ if (r)
+ return r;
+
+ *ab = dm_block_data(*block);
+ return 0;
+}
+
+/*
+ * Unlocks an array block.
+ */
+static int unlock_ablock(struct dm_array_info *info, struct dm_block *block)
+{
+ return dm_tm_unlock(info->btree_info.tm, block);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Btree manipulation.
+ */
+
+/*
+ * Looks up an array block in the btree, and then read locks it.
+ *
+ * index is the index of the index of the array_block, (ie. the array index
+ * / max_entries).
+ */
+static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
+ unsigned index, struct dm_block **block,
+ struct array_block **ab)
+{
+ int r;
+ uint64_t key = index;
+ __le64 block_le;
+
+ r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
+ if (r)
+ return r;
+
+ return get_ablock(info, le64_to_cpu(block_le), block, ab);
+}
+
+/*
+ * Insert an array block into the btree. The block is _not_ unlocked.
+ */
+static int insert_ablock(struct dm_array_info *info, uint64_t index,
+ struct dm_block *block, dm_block_t *root)
+{
+ __le64 block_le = cpu_to_le64(dm_block_location(block));
+
+ __dm_bless_for_disk(block_le);
+ return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
+}
+
+/*
+ * Looks up an array block in the btree. Then shadows it, and updates the
+ * btree to point to this new shadow. 'root' is an input/output parameter
+ * for both the current root block, and the new one.
+ */
+static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
+ unsigned index, struct dm_block **block,
+ struct array_block **ab)
+{
+ int r, inc;
+ uint64_t key = index;
+ dm_block_t b;
+ __le64 block_le;
+
+ /*
+ * lookup
+ */
+ r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
+ if (r)
+ return r;
+ b = le64_to_cpu(block_le);
+
+ /*
+ * shadow
+ */
+ r = dm_tm_shadow_block(info->btree_info.tm, b,
+ &array_validator, block, &inc);
+ if (r)
+ return r;
+
+ *ab = dm_block_data(*block);
+ if (inc)
+ inc_ablock_entries(info, *ab);
+
+ /*
+ * Reinsert.
+ *
+ * The shadow op will often be a noop. Only insert if it really
+ * copied data.
+ */
+ if (dm_block_location(*block) != b)
+ r = insert_ablock(info, index, *block, root);
+
+ return r;
+}
+
+/*
+ * Allocate an new array block, and fill it with some values.
+ */
+static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
+ uint32_t max_entries,
+ unsigned block_index, uint32_t nr,
+ const void *value, dm_block_t *root)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
+ if (r)
+ return r;
+
+ fill_ablock(info, ab, value, nr);
+ r = insert_ablock(info, block_index, block, root);
+ unlock_ablock(info, block);
+
+ return r;
+}
+
+static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
+ unsigned begin_block, unsigned end_block,
+ unsigned max_entries, const void *value,
+ dm_block_t *root)
+{
+ int r = 0;
+
+ for (; !r && begin_block != end_block; begin_block++)
+ r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
+
+ return r;
+}
+
+/*
+ * There are a bunch of functions involved with resizing an array. This
+ * structure holds information that commonly needed by them. Purely here
+ * to reduce parameter count.
+ */
+struct resize {
+ /*
+ * Describes the array.
+ */
+ struct dm_array_info *info;
+
+ /*
+ * The current root of the array. This gets updated.
+ */
+ dm_block_t root;
+
+ /*
+ * Metadata block size. Used to calculate the nr entries in an
+ * array block.
+ */
+ size_t size_of_block;
+
+ /*
+ * Maximum nr entries in an array block.
+ */
+ unsigned max_entries;
+
+ /*
+ * nr of completely full blocks in the array.
+ *
+ * 'old' refers to before the resize, 'new' after.
+ */
+ unsigned old_nr_full_blocks, new_nr_full_blocks;
+
+ /*
+ * Number of entries in the final block. 0 iff only full blocks in
+ * the array.
+ */
+ unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block;
+
+ /*
+ * The default value used when growing the array.
+ */
+ const void *value;
+};
+
+/*
+ * Removes a consecutive set of array blocks from the btree. The values
+ * in block are decremented as a side effect of the btree remove.
+ *
+ * begin_index - the index of the first array block to remove.
+ * end_index - the one-past-the-end value. ie. this block is not removed.
+ */
+static int drop_blocks(struct resize *resize, unsigned begin_index,
+ unsigned end_index)
+{
+ int r;
+
+ while (begin_index != end_index) {
+ uint64_t key = begin_index++;
+ r = dm_btree_remove(&resize->info->btree_info, resize->root,
+ &key, &resize->root);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/*
+ * Calculates how many blocks are needed for the array.
+ */
+static unsigned total_nr_blocks_needed(unsigned nr_full_blocks,
+ unsigned nr_entries_in_last_block)
+{
+ return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
+}
+
+/*
+ * Shrink an array.
+ */
+static int shrink(struct resize *resize)
+{
+ int r;
+ unsigned begin, end;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ /*
+ * Lose some blocks from the back?
+ */
+ if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
+ begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
+ resize->new_nr_entries_in_last_block);
+ end = total_nr_blocks_needed(resize->old_nr_full_blocks,
+ resize->old_nr_entries_in_last_block);
+
+ r = drop_blocks(resize, begin, end);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Trim the new tail block
+ */
+ if (resize->new_nr_entries_in_last_block) {
+ r = shadow_ablock(resize->info, &resize->root,
+ resize->new_nr_full_blocks, &block, &ab);
+ if (r)
+ return r;
+
+ trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
+ unlock_ablock(resize->info, block);
+ }
+
+ return 0;
+}
+
+/*
+ * Grow an array.
+ */
+static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ r = shadow_ablock(resize->info, &resize->root,
+ resize->old_nr_full_blocks, &block, &ab);
+ if (r)
+ return r;
+
+ fill_ablock(resize->info, ab, resize->value, new_nr_entries);
+ unlock_ablock(resize->info, block);
+
+ return r;
+}
+
+static int grow_add_tail_block(struct resize *resize)
+{
+ return insert_new_ablock(resize->info, resize->size_of_block,
+ resize->max_entries,
+ resize->new_nr_full_blocks,
+ resize->new_nr_entries_in_last_block,
+ resize->value, &resize->root);
+}
+
+static int grow_needs_more_blocks(struct resize *resize)
+{
+ int r;
+
+ if (resize->old_nr_entries_in_last_block > 0) {
+ r = grow_extend_tail_block(resize, resize->max_entries);
+ if (r)
+ return r;
+ }
+
+ r = insert_full_ablocks(resize->info, resize->size_of_block,
+ resize->old_nr_full_blocks,
+ resize->new_nr_full_blocks,
+ resize->max_entries, resize->value,
+ &resize->root);
+ if (r)
+ return r;
+
+ if (resize->new_nr_entries_in_last_block)
+ r = grow_add_tail_block(resize);
+
+ return r;
+}
+
+static int grow(struct resize *resize)
+{
+ if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
+ return grow_needs_more_blocks(resize);
+
+ else if (resize->old_nr_entries_in_last_block)
+ return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
+
+ else
+ return grow_add_tail_block(resize);
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * These are the value_type functions for the btree elements, which point
+ * to array blocks.
+ */
+static void block_inc(void *context, const void *value)
+{
+ __le64 block_le;
+ struct dm_array_info *info = context;
+
+ memcpy(&block_le, value, sizeof(block_le));
+ dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+}
+
+static void block_dec(void *context, const void *value)
+{
+ int r;
+ uint64_t b;
+ __le64 block_le;
+ uint32_t ref_count;
+ struct dm_block *block;
+ struct array_block *ab;
+ struct dm_array_info *info = context;
+
+ memcpy(&block_le, value, sizeof(block_le));
+ b = le64_to_cpu(block_le);
+
+ r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
+ if (r) {
+ DMERR_LIMIT("couldn't get reference count for block %llu",
+ (unsigned long long) b);
+ return;
+ }
+
+ if (ref_count == 1) {
+ /*
+ * We're about to drop the last reference to this ablock.
+ * So we need to decrement the ref count of the contents.
+ */
+ r = get_ablock(info, b, &block, &ab);
+ if (r) {
+ DMERR_LIMIT("couldn't get array block %llu",
+ (unsigned long long) b);
+ return;
+ }
+
+ dec_ablock_entries(info, ab);
+ unlock_ablock(info, block);
+ }
+
+ dm_tm_dec(info->btree_info.tm, b);
+}
+
+static int block_equal(void *context, const void *value1, const void *value2)
+{
+ return !memcmp(value1, value2, sizeof(__le64));
+}
+
+/*----------------------------------------------------------------*/
+
+void dm_array_info_init(struct dm_array_info *info,
+ struct dm_transaction_manager *tm,
+ struct dm_btree_value_type *vt)
+{
+ struct dm_btree_value_type *bvt = &info->btree_info.value_type;
+
+ memcpy(&info->value_type, vt, sizeof(info->value_type));
+ info->btree_info.tm = tm;
+ info->btree_info.levels = 1;
+
+ bvt->context = info;
+ bvt->size = sizeof(__le64);
+ bvt->inc = block_inc;
+ bvt->dec = block_dec;
+ bvt->equal = block_equal;
+}
+EXPORT_SYMBOL_GPL(dm_array_info_init);
+
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
+{
+ return dm_btree_empty(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_empty);
+
+static int array_resize(struct dm_array_info *info, dm_block_t root,
+ uint32_t old_size, uint32_t new_size,
+ const void *value, dm_block_t *new_root)
+{
+ int r;
+ struct resize resize;
+
+ if (old_size == new_size)
+ return 0;
+
+ resize.info = info;
+ resize.root = root;
+ resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ resize.max_entries = calc_max_entries(info->value_type.size,
+ resize.size_of_block);
+
+ resize.old_nr_full_blocks = old_size / resize.max_entries;
+ resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
+ resize.new_nr_full_blocks = new_size / resize.max_entries;
+ resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
+ resize.value = value;
+
+ r = ((new_size > old_size) ? grow : shrink)(&resize);
+ if (r)
+ return r;
+
+ *new_root = resize.root;
+ return 0;
+}
+
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+ uint32_t old_size, uint32_t new_size,
+ const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value)
+{
+ int r = array_resize(info, root, old_size, new_size, value, new_root);
+ __dm_unbless_for_disk(value);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_resize);
+
+int dm_array_del(struct dm_array_info *info, dm_block_t root)
+{
+ return dm_btree_del(&info->btree_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_array_del);
+
+int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, void *value_le)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+ size_t size_of_block;
+ unsigned entry, max_entries;
+
+ size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ max_entries = calc_max_entries(info->value_type.size, size_of_block);
+
+ r = lookup_ablock(info, root, index / max_entries, &block, &ab);
+ if (r)
+ return r;
+
+ entry = index % max_entries;
+ if (entry >= le32_to_cpu(ab->nr_entries))
+ r = -ENODATA;
+ else
+ memcpy(value_le, element_at(info, ab, entry),
+ info->value_type.size);
+
+ unlock_ablock(info, block);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_get_value);
+
+static int array_set_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, const void *value, dm_block_t *new_root)
+{
+ int r;
+ struct dm_block *block;
+ struct array_block *ab;
+ size_t size_of_block;
+ unsigned max_entries;
+ unsigned entry;
+ void *old_value;
+ struct dm_btree_value_type *vt = &info->value_type;
+
+ size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
+ max_entries = calc_max_entries(info->value_type.size, size_of_block);
+
+ r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
+ if (r)
+ return r;
+ *new_root = root;
+
+ entry = index % max_entries;
+ if (entry >= le32_to_cpu(ab->nr_entries)) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ old_value = element_at(info, ab, entry);
+ if (vt->dec &&
+ (!vt->equal || !vt->equal(vt->context, old_value, value))) {
+ vt->dec(vt->context, old_value);
+ if (vt->inc)
+ vt->inc(vt->context, value);
+ }
+
+ memcpy(old_value, value, info->value_type.size);
+
+out:
+ unlock_ablock(info, block);
+ return r;
+}
+
+int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value)
+{
+ int r;
+
+ r = array_set_value(info, root, index, value, new_root);
+ __dm_unbless_for_disk(value);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_array_set_value);
+
+struct walk_info {
+ struct dm_array_info *info;
+ int (*fn)(void *context, uint64_t key, void *leaf);
+ void *context;
+};
+
+static int walk_ablock(void *context, uint64_t *keys, void *leaf)
+{
+ struct walk_info *wi = context;
+
+ int r;
+ unsigned i;
+ __le64 block_le;
+ unsigned nr_entries, max_entries;
+ struct dm_block *block;
+ struct array_block *ab;
+
+ memcpy(&block_le, leaf, sizeof(block_le));
+ r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
+ if (r)
+ return r;
+
+ max_entries = le32_to_cpu(ab->max_entries);
+ nr_entries = le32_to_cpu(ab->nr_entries);
+ for (i = 0; i < nr_entries; i++) {
+ r = wi->fn(wi->context, keys[0] * max_entries + i,
+ element_at(wi->info, ab, i));
+
+ if (r)
+ break;
+ }
+
+ unlock_ablock(wi->info, block);
+ return r;
+}
+
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+ int (*fn)(void *, uint64_t key, void *leaf),
+ void *context)
+{
+ struct walk_info wi;
+
+ wi.info = info;
+ wi.fn = fn;
+ wi.context = context;
+
+ return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
+}
+EXPORT_SYMBOL_GPL(dm_array_walk);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h
new file mode 100644
index 000000000000..ea177d6fa58f
--- /dev/null
+++ b/drivers/md/persistent-data/dm-array.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_ARRAY_H
+#define _LINUX_DM_ARRAY_H
+
+#include "dm-btree.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * The dm-array is a persistent version of an array. It packs the data
+ * more efficiently than a btree which will result in less disk space use,
+ * and a performance boost. The element get and set operations are still
+ * O(ln(n)), but with a much smaller constant.
+ *
+ * The value type structure is reused from the btree type to support proper
+ * reference counting of values.
+ *
+ * The arrays implicitly know their length, and bounds are checked for
+ * lookups and updated. It doesn't store this in an accessible place
+ * because it would waste a whole metadata block. Make sure you store the
+ * size along with the array root in your encompassing data.
+ *
+ * Array entries are indexed via an unsigned integer starting from zero.
+ * Arrays are not sparse; if you resize an array to have 'n' entries then
+ * 'n - 1' will be the last valid index.
+ *
+ * Typical use:
+ *
+ * a) initialise a dm_array_info structure. This describes the array
+ * values and ties it into a specific transaction manager. It holds no
+ * instance data; the same info can be used for many similar arrays if
+ * you wish.
+ *
+ * b) Get yourself a root. The root is the index of a block of data on the
+ * disk that holds a particular instance of an array. You may have a
+ * pre existing root in your metadata that you wish to use, or you may
+ * want to create a brand new, empty array with dm_array_empty().
+ *
+ * Like the other data structures in this library, dm_array objects are
+ * immutable between transactions. Update functions will return you the
+ * root for a _new_ array. If you've incremented the old root, via
+ * dm_tm_inc(), before calling the update function you may continue to use
+ * it in parallel with the new root.
+ *
+ * c) resize an array with dm_array_resize().
+ *
+ * d) Get a value from the array with dm_array_get_value().
+ *
+ * e) Set a value in the array with dm_array_set_value().
+ *
+ * f) Walk an array of values in index order with dm_array_walk(). More
+ * efficient than making many calls to dm_array_get_value().
+ *
+ * g) Destroy the array with dm_array_del(). This tells the transaction
+ * manager that you're no longer using this data structure so it can
+ * recycle it's blocks. (dm_array_dec() would be a better name for it,
+ * but del is in keeping with dm_btree_del()).
+ */
+
+/*
+ * Describes an array. Don't initialise this structure yourself, use the
+ * init function below.
+ */
+struct dm_array_info {
+ struct dm_transaction_manager *tm;
+ struct dm_btree_value_type value_type;
+ struct dm_btree_info btree_info;
+};
+
+/*
+ * Sets up a dm_array_info structure. You don't need to do anything with
+ * this structure when you finish using it.
+ *
+ * info - the structure being filled in.
+ * tm - the transaction manager that should supervise this structure.
+ * vt - describes the leaf values.
+ */
+void dm_array_info_init(struct dm_array_info *info,
+ struct dm_transaction_manager *tm,
+ struct dm_btree_value_type *vt);
+
+/*
+ * Create an empty, zero length array.
+ *
+ * info - describes the array
+ * root - on success this will be filled out with the root block
+ */
+int dm_array_empty(struct dm_array_info *info, dm_block_t *root);
+
+/*
+ * Resizes the array.
+ *
+ * info - describes the array
+ * root - the root block of the array on disk
+ * old_size - the caller is responsible for remembering the size of
+ * the array
+ * new_size - can be bigger or smaller than old_size
+ * value - if we're growing the array the new entries will have this value
+ * new_root - on success, points to the new root block
+ *
+ * If growing the inc function for 'value' will be called the appropriate
+ * number of times. So if the caller is holding a reference they may want
+ * to drop it.
+ */
+int dm_array_resize(struct dm_array_info *info, dm_block_t root,
+ uint32_t old_size, uint32_t new_size,
+ const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value);
+
+/*
+ * Frees a whole array. The value_type's decrement operation will be called
+ * for all values in the array
+ */
+int dm_array_del(struct dm_array_info *info, dm_block_t root);
+
+/*
+ * Lookup a value in the array
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - the value to be read. Will be in on-disk format of course.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, void *value);
+
+/*
+ * Set an entry in the array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * index - array index
+ * value - value to be written to disk. Make sure you confirm the value is
+ * in on-disk format with__dm_bless_for_disk() before calling.
+ * new_root - the new root block
+ *
+ * The old value being overwritten will be decremented, the new value
+ * incremented.
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
+ uint32_t index, const void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value);
+
+/*
+ * Walk through all the entries in an array.
+ *
+ * info - describes the array
+ * root - root block of the array
+ * fn - called back for every element
+ * context - passed to the callback
+ */
+int dm_array_walk(struct dm_array_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t key, void *leaf),
+ void *context);
+
+/*----------------------------------------------------------------*/
+
+#endif /* _LINUX_DM_ARRAY_H */
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c
new file mode 100644
index 000000000000..cd9a86d4cdf0
--- /dev/null
+++ b/drivers/md/persistent-data/dm-bitset.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-bitset.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "bitset"
+#define BITS_PER_ARRAY_ENTRY 64
+
+/*----------------------------------------------------------------*/
+
+static struct dm_btree_value_type bitset_bvt = {
+ .context = NULL,
+ .size = sizeof(__le64),
+ .inc = NULL,
+ .dec = NULL,
+ .equal = NULL,
+};
+
+/*----------------------------------------------------------------*/
+
+void dm_disk_bitset_init(struct dm_transaction_manager *tm,
+ struct dm_disk_bitset *info)
+{
+ dm_array_info_init(&info->array_info, tm, &bitset_bvt);
+ info->current_index_set = false;
+}
+EXPORT_SYMBOL_GPL(dm_disk_bitset_init);
+
+int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root)
+{
+ return dm_array_empty(&info->array_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_empty);
+
+int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t old_nr_entries, uint32_t new_nr_entries,
+ bool default_value, dm_block_t *new_root)
+{
+ uint32_t old_blocks = dm_div_up(old_nr_entries, BITS_PER_ARRAY_ENTRY);
+ uint32_t new_blocks = dm_div_up(new_nr_entries, BITS_PER_ARRAY_ENTRY);
+ __le64 value = default_value ? cpu_to_le64(~0) : cpu_to_le64(0);
+
+ __dm_bless_for_disk(&value);
+ return dm_array_resize(&info->array_info, root, old_blocks, new_blocks,
+ &value, new_root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_resize);
+
+int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root)
+{
+ return dm_array_del(&info->array_info, root);
+}
+EXPORT_SYMBOL_GPL(dm_bitset_del);
+
+int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
+ dm_block_t *new_root)
+{
+ int r;
+ __le64 value;
+
+ if (!info->current_index_set)
+ return 0;
+
+ value = cpu_to_le64(info->current_bits);
+
+ __dm_bless_for_disk(&value);
+ r = dm_array_set_value(&info->array_info, root, info->current_index,
+ &value, new_root);
+ if (r)
+ return r;
+
+ info->current_index_set = false;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_flush);
+
+static int read_bits(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t array_index)
+{
+ int r;
+ __le64 value;
+
+ r = dm_array_get_value(&info->array_info, root, array_index, &value);
+ if (r)
+ return r;
+
+ info->current_bits = le64_to_cpu(value);
+ info->current_index_set = true;
+ info->current_index = array_index;
+ return 0;
+}
+
+static int get_array_entry(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root)
+{
+ int r;
+ unsigned array_index = index / BITS_PER_ARRAY_ENTRY;
+
+ if (info->current_index_set) {
+ if (info->current_index == array_index)
+ return 0;
+
+ r = dm_bitset_flush(info, root, new_root);
+ if (r)
+ return r;
+ }
+
+ return read_bits(info, root, array_index);
+}
+
+int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root)
+{
+ int r;
+ unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+ r = get_array_entry(info, root, index, new_root);
+ if (r)
+ return r;
+
+ set_bit(b, (unsigned long *) &info->current_bits);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_set_bit);
+
+int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root)
+{
+ int r;
+ unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+ r = get_array_entry(info, root, index, new_root);
+ if (r)
+ return r;
+
+ clear_bit(b, (unsigned long *) &info->current_bits);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_clear_bit);
+
+int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root, bool *result)
+{
+ int r;
+ unsigned b = index % BITS_PER_ARRAY_ENTRY;
+
+ r = get_array_entry(info, root, index, new_root);
+ if (r)
+ return r;
+
+ *result = test_bit(b, (unsigned long *) &info->current_bits);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bitset_test_bit);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h
new file mode 100644
index 000000000000..e1b9bea14aa1
--- /dev/null
+++ b/drivers/md/persistent-data/dm-bitset.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#ifndef _LINUX_DM_BITSET_H
+#define _LINUX_DM_BITSET_H
+
+#include "dm-array.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * This bitset type is a thin wrapper round a dm_array of 64bit words. It
+ * uses a tiny, one word cache to reduce the number of array lookups and so
+ * increase performance.
+ *
+ * Like the dm-array that it's based on, the caller needs to keep track of
+ * the size of the bitset separately. The underlying dm-array implicitly
+ * knows how many words it's storing and will return -ENODATA if you try
+ * and access an out of bounds word. However, an out of bounds bit in the
+ * final word will _not_ be detected, you have been warned.
+ *
+ * Bits are indexed from zero.
+
+ * Typical use:
+ *
+ * a) Initialise a dm_disk_bitset structure with dm_disk_bitset_init().
+ * This describes the bitset and includes the cache. It's not called it
+ * dm_bitset_info in line with other data structures because it does
+ * include instance data.
+ *
+ * b) Get yourself a root. The root is the index of a block of data on the
+ * disk that holds a particular instance of an bitset. You may have a
+ * pre existing root in your metadata that you wish to use, or you may
+ * want to create a brand new, empty bitset with dm_bitset_empty().
+ *
+ * Like the other data structures in this library, dm_bitset objects are
+ * immutable between transactions. Update functions will return you the
+ * root for a _new_ array. If you've incremented the old root, via
+ * dm_tm_inc(), before calling the update function you may continue to use
+ * it in parallel with the new root.
+ *
+ * Even read operations may trigger the cache to be flushed and as such
+ * return a root for a new, updated bitset.
+ *
+ * c) resize a bitset with dm_bitset_resize().
+ *
+ * d) Set a bit with dm_bitset_set_bit().
+ *
+ * e) Clear a bit with dm_bitset_clear_bit().
+ *
+ * f) Test a bit with dm_bitset_test_bit().
+ *
+ * g) Flush all updates from the cache with dm_bitset_flush().
+ *
+ * h) Destroy the bitset with dm_bitset_del(). This tells the transaction
+ * manager that you're no longer using this data structure so it can
+ * recycle it's blocks. (dm_bitset_dec() would be a better name for it,
+ * but del is in keeping with dm_btree_del()).
+ */
+
+/*
+ * Opaque object. Unlike dm_array_info, you should have one of these per
+ * bitset. Initialise with dm_disk_bitset_init().
+ */
+struct dm_disk_bitset {
+ struct dm_array_info array_info;
+
+ uint32_t current_index;
+ uint64_t current_bits;
+
+ bool current_index_set:1;
+};
+
+/*
+ * Sets up a dm_disk_bitset structure. You don't need to do anything with
+ * this structure when you finish using it.
+ *
+ * tm - the transaction manager that should supervise this structure
+ * info - the structure being initialised
+ */
+void dm_disk_bitset_init(struct dm_transaction_manager *tm,
+ struct dm_disk_bitset *info);
+
+/*
+ * Create an empty, zero length bitset.
+ *
+ * info - describes the bitset
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root);
+
+/*
+ * Resize the bitset.
+ *
+ * info - describes the bitset
+ * old_root - the root block of the array on disk
+ * old_nr_entries - the number of bits in the old bitset
+ * new_nr_entries - the number of bits you want in the new bitset
+ * default_value - the value for any new bits
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t old_root,
+ uint32_t old_nr_entries, uint32_t new_nr_entries,
+ bool default_value, dm_block_t *new_root);
+
+/*
+ * Frees the bitset.
+ */
+int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root);
+
+/*
+ * Set a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root);
+
+/*
+ * Clears a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root);
+
+/*
+ * Tests a bit.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * index - the bit index
+ * new_root - on success, points to the new root block (cached values may have been written)
+ * result - the bit value you're after
+ *
+ * -ENODATA will be returned if the index is out of bounds.
+ */
+int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
+ uint32_t index, dm_block_t *new_root, bool *result);
+
+/*
+ * Flush any cached changes to disk.
+ *
+ * info - describes the bitset
+ * root - the root block of the bitset
+ * new_root - on success, points to the new root block
+ */
+int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
+ dm_block_t *new_root);
+
+/*----------------------------------------------------------------*/
+
+#endif /* _LINUX_DM_BITSET_H */
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index accbb05f17b6..37d367bb9aa8 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -64,6 +64,7 @@ struct ro_spine {
void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
int exit_ro_spine(struct ro_spine *s);
int ro_step(struct ro_spine *s, dm_block_t new_child);
+void ro_pop(struct ro_spine *s);
struct btree_node *ro_node(struct ro_spine *s);
struct shadow_spine {
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index f199a0c4ed04..cf9fd676ae44 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -164,6 +164,13 @@ int ro_step(struct ro_spine *s, dm_block_t new_child)
return r;
}
+void ro_pop(struct ro_spine *s)
+{
+ BUG_ON(!s->count);
+ --s->count;
+ unlock_block(s->info, s->nodes[s->count]);
+}
+
struct btree_node *ro_node(struct ro_spine *s)
{
struct dm_block *block;
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 4caf66918cdb..35865425e4b4 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -807,3 +807,55 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
return r ? r : count;
}
EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+
+/*
+ * FIXME: We shouldn't use a recursive algorithm when we have limited stack
+ * space. Also this only works for single level trees.
+ */
+static int walk_node(struct ro_spine *s, dm_block_t block,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context)
+{
+ int r;
+ unsigned i, nr;
+ struct btree_node *n;
+ uint64_t keys;
+
+ r = ro_step(s, block);
+ n = ro_node(s);
+
+ nr = le32_to_cpu(n->header.nr_entries);
+ for (i = 0; i < nr; i++) {
+ if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
+ r = walk_node(s, value64(n, i), fn, context);
+ if (r)
+ goto out;
+ } else {
+ keys = le64_to_cpu(*key_ptr(n, i));
+ r = fn(context, &keys, value_ptr(n, i));
+ if (r)
+ goto out;
+ }
+ }
+
+out:
+ ro_pop(s);
+ return r;
+}
+
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context)
+{
+ int r;
+ struct ro_spine spine;
+
+ BUG_ON(info->levels > 1);
+
+ init_ro_spine(&spine, info);
+ r = walk_node(&spine, root, fn, context);
+ exit_ro_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_walk);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index a2cd50441ca1..8672d159e0b5 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -58,21 +58,21 @@ struct dm_btree_value_type {
* somewhere.) This method is _not_ called for insertion of a new
* value: It is assumed the ref count is already 1.
*/
- void (*inc)(void *context, void *value);
+ void (*inc)(void *context, const void *value);
/*
* This value is being deleted. The btree takes care of freeing
* the memory pointed to by @value. Often the del function just
* needs to decrement a reference count somewhere.
*/
- void (*dec)(void *context, void *value);
+ void (*dec)(void *context, const void *value);
/*
* A test for equality between two values. When a value is
* overwritten with a new one, the old one has the dec method
* called _unless_ the new and old value are deemed equal.
*/
- int (*equal)(void *context, void *value1, void *value2);
+ int (*equal)(void *context, const void *value1, const void *value2);
};
/*
@@ -142,4 +142,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
uint64_t *result_keys);
+/*
+ * Iterate through the a btree, calling fn() on each entry.
+ * It only works for single level trees and is internally recursive, so
+ * monitor stack usage carefully.
+ */
+int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
+ int (*fn)(void *context, uint64_t *keys, void *leaf),
+ void *context);
+
#endif /* _LINUX_DM_BTREE_H */