summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2009-04-29 17:36:31 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2009-07-29 10:25:54 +0200
commitc733785235d1cf1e37e70446182a21c06e69d519 (patch)
tree5c244ed206531f28dc42668b9e432c9d90c040b9 /drivers/block
parent1ff8d0aa6fb4011b8f49dcb79919946c1bd32196 (diff)
Tracking DRBD mainline
2fcb72c00e0fb35120665943cbe7003739bd1009 get rid of unneccessary fsync_bdev 2721065e39000629e87296676fd077bb0970417b Merge branch 'drbd-8.2' into drbd-8.3 93edff9e37a122b5f2d23ed2baef0dfeef1c5a17 Merge branch 'drbd-8.0' into drbd-8.2 0fb44e5ef678346808a9ab06a259112af7814fc8 dead code removal 13bb2bb8dd3fddbae88a1fcba757b567208e3ae1 fix config/deconfig race 4c0505a621e8c06eeac9a7d831621ea949770fe2 need to hold req_lock for_drbd_set_state (recently introduced bogon) 590cb50f41337c39b7d8095991fa1d144dbc3fd7 remove unnecessary variable, fix comment formatting 73abcf4b8f53f26fa4a6d524916fba142342df33 make bm_lock message moreinformative; hunting configuration race condition 0efb38921eff65f7f05aa2e028833aea5ed5b8f4 adjust loglevel: some dev_dbg should rather be warn, or info at least 50ddc402f72ab5b4486e0efa912a16b6f3d3012a bitmap compression stats: output percentage saved, not 'factor' 1737021364ea6da4d08bd08e99d1083587d271d4 compressed bitmap: finally settle for _one_ encoding c63bd973e2d52baeceb425592af371bf8f4fe2cd introduce disable_sendpage module parameter a87458a4243968229b9db85e680d464aae3c9bd4 fix double send_cnt accounting of _drbd_no_send_page fallbacks Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_bitmap.c4
-rw-r--r--drivers/block/drbd/drbd_buildtag.c4
-rw-r--r--drivers/block/drbd/drbd_int.h18
-rw-r--r--drivers/block/drbd/drbd_main.c171
-rw-r--r--drivers/block/drbd/drbd_nl.c156
-rw-r--r--drivers/block/drbd/drbd_receiver.c100
-rw-r--r--drivers/block/drbd/drbd_vli.h369
-rw-r--r--drivers/block/drbd/drbd_worker.c6
8 files changed, 295 insertions, 533 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e602c778e712..0614f2c96f15 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -115,10 +115,10 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
trylock_failed = down_trylock(&b->bm_change);
if (trylock_failed) {
- dev_dbg(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
+ dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
current == mdev->receiver.task ? "receiver" :
current == mdev->asender.task ? "asender" :
- current == mdev->worker.task ? "worker" : "?",
+ current == mdev->worker.task ? "worker" : current->comm,
why, b->bm_why ?: "?",
b->bm_task == mdev->receiver.task ? "receiver" :
b->bm_task == mdev->asender.task ? "asender" :
diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c
index 617078b3dc33..a58ad76078a5 100644
--- a/drivers/block/drbd/drbd_buildtag.c
+++ b/drivers/block/drbd/drbd_buildtag.c
@@ -2,6 +2,6 @@
#include <linux/drbd_config.h>
const char *drbd_buildtag(void)
{
- return "GIT-hash: 1a59b007715215697968cfaed3f2f159d262c030 drbd/drbd_nl.c"
- " build by phil@fat-tyre, 2009-04-22 11:36:29";
+ return "GIT-hash: 29ef4c01e46b0a269d7bec39d5178be06097fead drbd/Kconfig drbd/Makefile drbd/Makefile-2.6 drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_nl.c drbd/drbd_proc.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_req.h drbd/drbd_tracing.c drbd/drbd_tracing.h drbd/drbd_worker.c drbd/drbd_wrappers.h drbd/linux/drbd_config.h"
+ " build by phil@fat-tyre, 2009-04-29 15:43:41";
}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 94138cc08943..ba43fa57b750 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -58,6 +58,7 @@
/* module parameter, defined in drbd_main.c */
extern unsigned int minor_count;
+extern int disable_sendpage;
extern int allow_oos;
extern unsigned int cn_idx;
@@ -525,12 +526,10 @@ struct p_discard {
/* Valid values for the encoding field.
* Bump proto version when changing this. */
enum drbd_bitmap_code {
- RLE_VLI_Bytes = 0,
- RLE_VLI_BitsFibD_0_1 = 1,
- RLE_VLI_BitsFibD_1_1 = 2,
- RLE_VLI_BitsFibD_1_2 = 3,
- RLE_VLI_BitsFibD_2_3 = 4,
- RLE_VLI_BitsFibD_3_5 = 5,
+ /* RLE_VLI_Bytes = 0,
+ * and other bit variants had been defined during
+ * algorithm evaluation. */
+ RLE_VLI_Bits = 2,
};
struct p_compressed_bm {
@@ -777,6 +776,13 @@ enum {
BITMAP_IO_QUEUED, /* Started bitmap IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
+
+ CONFIG_PENDING, /* serialization of (re)configuration requests.
+ * if set, also prevents the device from dying */
+ DEVICE_DYING, /* device became unconfigured,
+ * but worker thread is still handling the cleanup.
+ * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
+ * while this is set. */
};
struct drbd_bitmap; /* opaque for drbd_conf */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 4c84365aeeef..79cb0183f817 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -92,6 +92,7 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!");
/* thanks to these macros, if compiled into the kernel (not-module),
* this becomes the boot parameter drbd.minor_count */
module_param(minor_count, uint, 0444);
+module_param(disable_sendpage, bool, 0644);
module_param(allow_oos, bool, 0);
module_param(cn_idx, uint, 0444);
@@ -112,6 +113,7 @@ module_param(fault_devs, int, 0644);
/* module parameter, defined */
unsigned int minor_count = 32;
+int disable_sendpage;
int allow_oos;
unsigned int cn_idx = CN_IDX_DRBD;
@@ -931,6 +933,20 @@ int __drbd_set_state(struct drbd_conf *mdev,
dev_info(DEV, "%s\n", pb);
}
+ /* solve the race between becoming unconfigured,
+ * worker doing the cleanup, and
+ * admin reconfiguring us:
+ * on (re)configure, first set CONFIG_PENDING,
+ * then wait for a potentially exiting worker,
+ * start the worker, and schedule one no_op.
+ * then proceed with configuration.
+ */
+ if (ns.disk == D_DISKLESS &&
+ ns.conn == C_STANDALONE &&
+ ns.role == R_SECONDARY &&
+ !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
+ set_bit(DEVICE_DYING, &mdev->flags);
+
mdev->state.i = ns.i;
wake_up(&mdev->misc_wait);
wake_up(&mdev->state_wait);
@@ -1192,9 +1208,9 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
mdev->resync = NULL;
lc_free(mdev->act_log);
mdev->act_log = NULL;
- __no_warn(local, drbd_free_bc(mdev->bc););
- wmb(); /* see begin of drbd_nl_disk_conf() */
- __no_warn(local, mdev->bc = NULL;);
+ __no_warn(local,
+ drbd_free_bc(mdev->bc);
+ mdev->bc = NULL;);
if (mdev->md_io_tmpp)
__free_page(mdev->md_io_tmpp);
@@ -1219,10 +1235,14 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* Terminate worker thread if we are unconfigured - it will be
restarted as needed... */
- if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) {
+ if (ns.disk == D_DISKLESS &&
+ ns.conn == C_STANDALONE &&
+ ns.role == R_SECONDARY) {
if (os.aftr_isp != ns.aftr_isp)
resume_next_sg(mdev);
- drbd_thread_stop_nowait(&mdev->worker);
+ /* set in __drbd_set_state, unless CONFIG_PENDING was set */
+ if (test_bit(DEVICE_DYING, &mdev->flags))
+ drbd_thread_stop_nowait(&mdev->worker);
}
drbd_md_sync(mdev);
@@ -1327,6 +1347,7 @@ int drbd_thread_start(struct drbd_thread *thi)
thi->t_state = Restarting;
dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
me, current->comm, current->pid);
+ /* fall through */
case Running:
case Restarting:
default:
@@ -1704,102 +1725,6 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
(struct p_header *)&p, sizeof(p));
}
-/* returns
- * positive: number of payload bytes needed in this packet.
- * zero: incompressible. */
-int fill_bitmap_rle_bytes(struct drbd_conf *mdev,
- struct p_compressed_bm *p,
- struct bm_xfer_ctx *c)
-{
- unsigned long plain_bits;
- unsigned long tmp;
- unsigned long rl;
- void *buffer;
- unsigned n;
- unsigned len;
- unsigned toggle;
-
- /* may we use this feature? */
- if ((mdev->sync_conf.use_rle_encoding == 0) ||
- (mdev->agreed_pro_version < 90))
- return 0;
-
- if (c->bit_offset >= c->bm_bits)
- return 0; /* nothing to do. */
-
- /* use at most thus many bytes */
- len = BM_PACKET_VLI_BYTES_MAX;
- buffer = p->code;
- /* plain bits covered in this code string */
- plain_bits = 0;
-
- /* p->encoding & 0x80 stores whether the first
- * run length is set.
- * bit offset is implicit.
- * start with toggle == 2 to be able to tell the first iteration */
- toggle = 2;
-
- /* see how much plain bits we can stuff into one packet
- * using RLE and VLI. */
- do {
- tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
- : _drbd_bm_find_next(mdev, c->bit_offset);
- if (tmp == -1UL)
- tmp = c->bm_bits;
- rl = tmp - c->bit_offset;
-
- if (toggle == 2) { /* first iteration */
- if (rl == 0) {
- /* the first checked bit was set,
- * store start value, */
- DCBP_set_start(p, 1);
- /* but skip encoding of zero run length */
- toggle = !toggle;
- continue;
- }
- DCBP_set_start(p, 0);
- }
-
- /* paranoia: catch zero runlength.
- * can only happen if bitmap is modified while we scan it. */
- if (rl == 0) {
- dev_err(DEV, "unexpected zero runlength while encoding bitmap "
- "t:%u bo:%lu\n", toggle, c->bit_offset);
- return -1;
- }
-
- n = vli_encode_bytes(buffer, rl, len);
- if (n == 0) /* buffer full */
- break;
-
- toggle = !toggle;
- buffer += n;
- len -= n;
- plain_bits += rl;
- c->bit_offset = tmp;
- } while (len && c->bit_offset < c->bm_bits);
-
- len = BM_PACKET_VLI_BYTES_MAX - len;
-
- if (plain_bits < (len << 3)) {
- /* incompressible with this method.
- * we need to rewind both word and bit position. */
- c->bit_offset -= plain_bits;
- bm_xfer_ctx_bit_to_word_offset(c);
- c->bit_offset = c->word_offset * BITS_PER_LONG;
- return 0;
- }
-
- /* RLE + VLI was able to compress it just fine.
- * update c->word_offset. */
- bm_xfer_ctx_bit_to_word_offset(c);
-
- /* store pad_bits */
- DCBP_set_pad_bits(p, 0);
-
- return len;
-}
-
int fill_bitmap_rle_bits(struct drbd_conf *mdev,
struct p_compressed_bm *p,
struct bm_xfer_ctx *c)
@@ -1826,8 +1751,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
/* plain bits covered in this code string */
plain_bits = 0;
- /* p->encoding & 0x80 stores whether the first
- * run length is set.
+ /* p->encoding & 0x80 stores whether the first run length is set.
* bit offset is implicit.
* start with toggle == 2 to be able to tell the first iteration */
toggle = 2;
@@ -1904,15 +1828,13 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
int len;
int ok;
- if (0)
- len = fill_bitmap_rle_bytes(mdev, p, c);
- else
- len = fill_bitmap_rle_bits(mdev, p, c);
+ len = fill_bitmap_rle_bits(mdev, p, c);
if (len < 0)
return FAILED;
+
if (len) {
- DCBP_set_code(p, 0 ? RLE_VLI_Bytes : RLE_VLI_BitsFibD_3_5);
+ DCBP_set_code(p, RLE_VLI_Bits);
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
sizeof(*p) + len, 0);
@@ -2191,7 +2113,7 @@ STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
kunmap(page);
if (sent == size)
mdev->send_cnt += size>>9;
- return sent;
+ return sent == size;
}
int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
@@ -2201,21 +2123,14 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
int sent, ok;
int len = size;
- /* PARANOIA. if this ever triggers,
- * something in the layers above us is really kaputt.
- *one roundtrip later:
- * doh. it triggered. so XFS _IS_ really kaputt ...
- * oh well...
- */
- if ((page_count(page) < 1) || PageSlab(page)) {
- /* e.g. XFS meta- & log-data is in slab pages, which have a
- * page_count of 0 and/or have PageSlab() set...
- */
- sent = _drbd_no_send_page(mdev, page, offset, size);
- if (likely(sent > 0))
- len -= sent;
- goto out;
- }
+ /* e.g. XFS meta- & log-data is in slab pages, which have a
+ * page_count of 0 and/or have PageSlab() set.
+ * we cannot use send_page for those, as that does get_page();
+ * put_page(); and would cause either a VM_BUG directly, or
+ * __page_cache_release a page that would actually still be referenced
+ * by someone, leading to some obscure delayed Oops somewhere else. */
+ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+ return _drbd_no_send_page(mdev, page, offset, size);
drbd_update_congested(mdev);
set_fs(KERNEL_DS);
@@ -2241,7 +2156,6 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
set_fs(oldfs);
clear_bit(NET_CONGESTED, &mdev->flags);
-out:
ok = (len == 0);
if (likely(ok))
mdev->send_cnt += size>>9;
@@ -2643,8 +2557,11 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
D_ASSERT(mdev->net_conf == NULL);
drbd_set_my_capacity(mdev, 0);
- drbd_bm_resize(mdev, 0);
- drbd_bm_cleanup(mdev);
+ if (mdev->bitmap) {
+ /* maybe never allocated. */
+ drbd_bm_resize(mdev, 0);
+ drbd_bm_cleanup(mdev);
+ }
drbd_free_resources(mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3b46a934c2d6..c72c5adbbfd6 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -27,7 +27,6 @@
#include <linux/module.h>
#include <linux/in.h>
#include <linux/fs.h>
-#include <linux/buffer_head.h> /* for fsync_bdev */
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/connector.h>
@@ -353,8 +352,6 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
if (forced)
dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
- fsync_bdev(mdev->this_bdev);
-
/* Wait until nothing is on the fly :) */
wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
@@ -493,18 +490,15 @@ char *ppsize(char *buf, unsigned long long size)
* waits for ap_bio_cnt == 0. -> deadlock.
* but this cannot happen, actually, because:
* R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
- * (not connected, * or bad/no disk on peer):
+ * (not connected, or bad/no disk on peer):
* see drbd_fail_request_early, ap_bio_cnt is zero.
* R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
* peer may not initiate a resize.
*/
void drbd_suspend_io(struct drbd_conf *mdev)
{
- int in_flight;
set_bit(SUSPEND_IO, &mdev->flags);
- in_flight = atomic_read(&mdev->ap_bio_cnt);
- if (in_flight)
- wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
+ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
}
void drbd_resume_io(struct drbd_conf *mdev)
@@ -761,6 +755,36 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
}
}
+/* serialize deconfig (worker exiting, doing cleanup)
+ * and reconfig (drbdsetup disk, drbdsetup net)
+ *
+ * wait for a potentially exiting worker, then restart it,
+ * or start a new one.
+ */
+static void drbd_reconfig_start(struct drbd_conf *mdev)
+{
+ wait_event(mdev->state_wait, test_and_set_bit(CONFIG_PENDING, &mdev->flags));
+ wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
+ drbd_thread_start(&mdev->worker);
+}
+
+/* if still unconfigured, stops worker again.
+ * if configured now, clears CONFIG_PENDING.
+ * wakes potential waiters */
+static void drbd_reconfig_done(struct drbd_conf *mdev)
+{
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.disk == D_DISKLESS &&
+ mdev->state.conn == C_STANDALONE &&
+ mdev->state.role == R_SECONDARY) {
+ set_bit(DEVICE_DYING, &mdev->flags);
+ drbd_thread_stop_nowait(&mdev->worker);
+ } else
+ clear_bit(CONFIG_PENDING, &mdev->flags);
+ spin_unlock_irq(&mdev->req_lock);
+ wake_up(&mdev->state_wait);
+}
+
/* does always return 0;
* interesting return code is in reply->ret_code */
STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@@ -774,33 +798,18 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
struct inode *inode, *inode2;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
- int rv, ntries = 0;
+ int rv;
int cp_discovered = 0;
int hardsect;
+ drbd_reconfig_start(mdev);
+
/* if you want to reconfigure, please tear down first */
if (mdev->state.disk > D_DISKLESS) {
retcode = ERR_DISK_CONFIGURED;
goto fail;
}
- /*
- * We may have gotten here very quickly from a detach. Wait for a bit
- * then fail.
- */
- while (1) {
- __no_warn(local, nbc = mdev->bc;);
- if (nbc == NULL)
- break;
- if (ntries++ >= 5) {
- dev_warn(DEV, "drbd_nl_disk_conf: mdev->bc not NULL.\n");
- retcode = ERR_DISK_CONFIGURED;
- goto fail;
- }
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ/10);
- }
-
nbc = kmalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
if (!nbc) {
retcode = ERR_NOMEM;
@@ -808,17 +817,11 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
}
memset(&nbc->md, 0, sizeof(struct drbd_md));
-
- if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_local(mdev)) {
- memcpy(&nbc->dc, &mdev->bc->dc, sizeof(struct disk_conf));
- dec_local(mdev);
- } else {
- memset(&nbc->dc, 0, sizeof(struct disk_conf));
- nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF;
- nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF;
- nbc->dc.fencing = DRBD_FENCING_DEF;
- nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
- }
+ memset(&nbc->dc, 0, sizeof(struct disk_conf));
+ nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF;
+ nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF;
+ nbc->dc.fencing = DRBD_FENCING_DEF;
+ nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
retcode = ERR_MANDATORY_TAG;
@@ -882,13 +885,6 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto release_bdev_fail;
}
- if (!mdev->bitmap) {
- if (drbd_bm_init(mdev)) {
- retcode = ERR_NOMEM;
- goto release_bdev_fail;
- }
- }
-
nbc->md_bdev = inode2->i_bdev;
if (bd_claim(nbc->md_bdev,
(nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
@@ -949,7 +945,9 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
drbd_suspend_io(mdev);
+ /* also wait for the last barrier ack. */
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
+
retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
drbd_resume_io(mdev);
if (retcode < SS_SUCCESS)
@@ -958,9 +956,15 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
if (!inc_local_if_state(mdev, D_ATTACHING))
goto force_diskless;
- drbd_thread_start(&mdev->worker);
drbd_md_set_sector_offsets(mdev, nbc);
+ if (!mdev->bitmap) {
+ if (drbd_bm_init(mdev)) {
+ retcode = ERR_NOMEM;
+ goto force_diskless_dec;
+ }
+ }
+
retcode = drbd_md_read(mdev, nbc);
if (retcode != NO_ERROR)
goto force_diskless_dec;
@@ -1150,6 +1154,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
dec_local(mdev);
reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
return 0;
force_diskless_dec:
@@ -1175,18 +1180,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
lc_free(resync_lru);
reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
return 0;
}
STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
- fsync_bdev(mdev->this_bdev);
reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ/20); /* 50ms; Time for worker to finally terminate */
-
return 0;
}
@@ -1208,6 +1209,8 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
void *int_dig_vv = NULL;
struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+ drbd_reconfig_start(mdev);
+
if (mdev->state.conn > C_STANDALONE) {
retcode = ERR_NET_CONFIGURED;
goto fail;
@@ -1219,28 +1222,23 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
goto fail;
}
- if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_net(mdev)) {
- memcpy(new_conf, mdev->net_conf, sizeof(struct net_conf));
- dec_net(mdev);
- } else {
- memset(new_conf, 0, sizeof(struct net_conf));
- new_conf->timeout = DRBD_TIMEOUT_DEF;
- new_conf->try_connect_int = DRBD_CONNECT_INT_DEF;
- new_conf->ping_int = DRBD_PING_INT_DEF;
- new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF;
- new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF;
- new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
- new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
- new_conf->ko_count = DRBD_KO_COUNT_DEF;
- new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
- new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
- new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
- new_conf->want_lose = 0;
- new_conf->two_primaries = 0;
- new_conf->wire_protocol = DRBD_PROT_C;
- new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
- new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
- }
+ memset(new_conf, 0, sizeof(struct net_conf));
+ new_conf->timeout = DRBD_TIMEOUT_DEF;
+ new_conf->try_connect_int = DRBD_CONNECT_INT_DEF;
+ new_conf->ping_int = DRBD_PING_INT_DEF;
+ new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF;
+ new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF;
+ new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
+ new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
+ new_conf->ko_count = DRBD_KO_COUNT_DEF;
+ new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
+ new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
+ new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
+ new_conf->want_lose = 0;
+ new_conf->two_primaries = 0;
+ new_conf->wire_protocol = DRBD_PROT_C;
+ new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
+ new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
retcode = ERR_MANDATORY_TAG;
@@ -1367,7 +1365,12 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
}
- D_ASSERT(mdev->net_conf == NULL);
+ spin_lock_irq(&mdev->req_lock);
+ if (mdev->net_conf != NULL) {
+ retcode = ERR_NET_CONFIGURED;
+ spin_unlock_irq(&mdev->req_lock);
+ goto fail;
+ }
mdev->net_conf = new_conf;
mdev->send_cnt = 0;
@@ -1400,13 +1403,13 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
mdev->int_dig_out=int_dig_out;
mdev->int_dig_in=int_dig_in;
mdev->int_dig_vv=int_dig_vv;
+ spin_unlock_irq(&mdev->req_lock);
retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
- if (retcode >= SS_SUCCESS)
- drbd_thread_start(&mdev->worker);
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
return 0;
fail:
@@ -1421,6 +1424,7 @@ fail:
kfree(new_conf);
reply->ret_code = retcode;
+ drbd_reconfig_done(mdev);
return 0;
}
@@ -1938,8 +1942,10 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
if (skip_initial_sync) {
drbd_send_uuids_skip_initial_sync(mdev);
_drbd_uuid_set(mdev, UI_BITMAP, 0);
+ spin_lock_irq(&mdev->req_lock);
_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
CS_VERBOSE, NULL);
+ spin_unlock_irq(&mdev->req_lock);
}
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 26ac8fd0e1f4..078ce728a2fc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3225,105 +3225,55 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
return (s == c->bm_bits) ? DONE : OK;
}
-
-static enum receive_bitmap_ret
-recv_bm_rle_bytes(struct drbd_conf *mdev,
- struct p_compressed_bm *p,
- struct bm_xfer_ctx *c)
-{
- u64 rl;
- unsigned char *buf = p->code;
- unsigned long s;
- unsigned long e;
- int len = p->head.length - (p->code - p->head.payload);
- int toggle;
- int n;
-
- s = c->bit_offset;
-
- /* decoding. the payload of bitmap rle packets is VLI encoded
- * runlength of set and unset bits, starting with set/unset as defined
- * in p->encoding & 0x80. */
- for (toggle = DCBP_get_start(p); len; s += rl, toggle = !toggle) {
- if (s >= c->bm_bits) {
- dev_err(DEV, "bitmap overflow (s:%lu) while decoding bitmap RLE packet\n", s);
- return FAILED;
- }
-
- n = vli_decode_bytes(&rl, buf, len);
- if (n == 0) /* incomplete buffer! */
- return FAILED;
- buf += n;
- len -= n;
-
- if (rl == 0) {
- dev_err(DEV, "unexpected zero runlength while decoding bitmap RLE packet\n");
- return FAILED;
- }
-
- /* unset bits: ignore, because of x | 0 == x. */
- if (!toggle)
- continue;
-
- /* set bits: merge into bitmap. */
- e = s + rl -1;
- if (e >= c->bm_bits) {
- dev_err(DEV, "bitmap overflow (e:%lu) while decoding bitmap RLE packet\n", e);
- return FAILED;
- }
- _drbd_bm_set_bits(mdev, s, e);
- }
-
- c->bit_offset = s;
- bm_xfer_ctx_bit_to_word_offset(c);
-
- return (s == c->bm_bits) ? DONE : OK;
-}
-
static enum receive_bitmap_ret
decode_bitmap_c(struct drbd_conf *mdev,
struct p_compressed_bm *p,
struct bm_xfer_ctx *c)
{
- switch (DCBP_get_code(p)) {
- /* no default! I want the compiler to warn me! */
- case RLE_VLI_BitsFibD_0_1:
- case RLE_VLI_BitsFibD_1_1:
- case RLE_VLI_BitsFibD_1_2:
- case RLE_VLI_BitsFibD_2_3:
- break; /* TODO */
- case RLE_VLI_BitsFibD_3_5:
+ if (DCBP_get_code(p) == RLE_VLI_Bits)
return recv_bm_rle_bits(mdev, p, c);
- case RLE_VLI_Bytes:
- return recv_bm_rle_bytes(mdev, p, c);
- }
+
+ /* other variants had been implemented for evaluation,
+ * but have been dropped as this one turned out to be "best"
+ * during all our tests. */
+
dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
+ drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
return FAILED;
}
void INFO_bm_xfer_stats(struct drbd_conf *mdev,
const char *direction, struct bm_xfer_ctx *c)
{
- unsigned plain_would_take = sizeof(struct p_header) *
+ /* what would it take to transfer it "plaintext" */
+ unsigned plain = sizeof(struct p_header) *
((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
+ c->bm_words * sizeof(long);
unsigned total = c->bytes[0] + c->bytes[1];
- unsigned q, r;
+ unsigned r;
/* total can not be zero. but just in case: */
if (total == 0)
return;
- q = plain_would_take / total;
- r = plain_would_take % total;
- r = (r > UINT_MAX/100) ? (r / (total+99/100)) : (100 * r / total);
+ /* don't report if not compressed */
+ if (total >= plain)
+ return;
+
+ /* total < plain. check for overflow, still */
+ r = (total > UINT_MAX/1000) ? (total / (plain/1000))
+ : (1000 * total / plain);
+
+ if (r > 1000)
+ r = 1000;
+ r = 1000 - r;
dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
- "total %u; compression factor: %u.%02u\n",
+ "total %u; compression: %u.%u%%\n",
direction,
c->bytes[1], c->packets[1],
c->bytes[0], c->packets[0],
- total, q, r);
+ total, r/10, r % 10);
}
/* Since we are processing the bitfield from lower addresses to higher,
@@ -3689,9 +3639,9 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev)
* we already released the socket!? */
i = atomic_read(&mdev->pp_in_use);
if (i)
- dev_dbg(DEV, "pp_in_use = %u, expected 0\n", i);
+ dev_info(DEV, "pp_in_use = %u, expected 0\n", i);
if (!list_empty(&mdev->net_ee))
- dev_dbg(DEV, "net_ee not empty!\n");
+ dev_info(DEV, "net_ee not empty!\n");
D_ASSERT(list_empty(&mdev->read_ee));
D_ASSERT(list_empty(&mdev->active_ee));
diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h
index 8f5e8186852d..fc824006e721 100644
--- a/drivers/block/drbd/drbd_vli.h
+++ b/drivers/block/drbd/drbd_vli.h
@@ -37,15 +37,15 @@
*
* We never actually need to encode a "zero" (runlengths are positive).
* But then we have to store the value of the first bit.
- * So we can as well have the "zero" be a valid runlength,
- * and start encoding/decoding by "number of _set_ bits" by convention.
+ * The first bit of information thus shall encode if the first runlength
+ * gives the number of set or unset bits.
*
* We assume that large areas are either completely set or unset,
* which gives good compression with any runlength method,
* even when encoding the runlength as fixed size 32bit/64bit integers.
*
* Still, there may be areas where the polarity flips every few bits,
- * and encoding the runlength sequence of those ares with fix size
+ * and encoding the runlength sequence of those areas with fix size
* integers would be much worse than plaintext.
*
* We want to encode small runlength values with minimum code length,
@@ -53,105 +53,147 @@
*
* Thus we need a Variable Length Integer encoding, VLI.
*
- * For runlength < 8, we produce more code bits than plaintext input.
- * we need to send incompressible chunks as plaintext, skip over them
+ * For some cases, we produce more code bits than plaintext input.
+ * We need to send incompressible chunks as plaintext, skip over them
* and then see if the next chunk compresses better.
*
- * We don't care too much about "excellent" compression ratio
- * for large runlengths, 249 bit/24 bit still gives a factor of > 10.
+ * We don't care too much about "excellent" compression ratio for large
+ * runlengths (all set/all clear): whether we achieve a factor of 100
+ * or 1000 is not that much of an issue.
+ * We do not want to waste too much on short runlengths in the "noisy"
+ * parts of the bitmap, though.
*
- * We care for cpu time needed to actually encode/decode
- * into the transmitted byte stream.
+ * There are endless variants of VLI, we experimented with:
+ * * simple byte-based
+ * * various bit based with different code word length.
*
- * There are endless variants of VLI.
- * For this special purpose, we just need something that is "good enough",
- * and easy to understand and code, fast to encode and decode,
- * and does not consume memory.
+ * To avoid yet an other configuration parameter (choice of bitmap compression
+ * algorithm) which was difficult to explain and tune, we just chose the one
+ * variant that turned out best in all test cases.
+ * Based on real world usage patterns, with device sizes ranging from a few GiB
+ * to several TiB, file server/mailserver/webserver/mysql/postgress,
+ * mostly idle to really busy, the all time winner (though sometimes only
+ * marginally better) is:
*/
/*
- * buf points to the current position in the tranfered byte stream.
- * stream is by definition little endian.
- * *buf_len gives the remaining number of bytes at that position.
- * *out will receive the decoded value.
- * returns number of bytes consumed,
- * or 0 if not enough bytes left in buffer (which would be invalid input).
- */
-static inline int vli_decode_bytes(u64 *out, unsigned char *buf, unsigned buf_len)
-{
- u64 tmp = 0;
- unsigned bytes; /* extra bytes after code byte */
+ * encoding is "visualised" as
+ * __little endian__ bitstream, least significant bit first (left most)
+ *
+ * this particular encoding is chosen so that the prefix code
+ * starts as unary encoding the level, then modified so that
+ * 10 levels can be described in 8bit, with minimal overhead
+ * for the smaller levels.
+ *
+ * Number of data bits follow fibonacci sequence, with the exception of the
+ * last level (+1 data bit, so it makes 64bit total). The only worse code when
+ * encoding bit polarity runlength is 1 plain bits => 2 code bits.
+prefix data bits max val Nº data bits
+0 x 0x2 1
+10 x 0x4 1
+110 xx 0x8 2
+1110 xxx 0x10 3
+11110 xxx xx 0x30 5
+111110 xx xxxxxx 0x130 8
+11111100 xxxxxxxx xxxxx 0x2130 13
+11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21
+11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34
+11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
+ * maximum encodable value: 0x100000400202130 == 2**56 + some */
+
+/* compression "table":
+ transmitted x 0.29
+ as plaintext x ........................
+ x ........................
+ x ........................
+ x 0.59 0.21........................
+ x ........................................................
+ x .. c ...................................................
+ x 0.44.. o ...................................................
+ x .......... d ...................................................
+ x .......... e ...................................................
+ X............. ...................................................
+ x.............. b ...................................................
+2.0x............... i ...................................................
+ #X................ t ...................................................
+ #................. s ........................... plain bits ..........
+-+-----------------------------------------------------------------------
+ 1 16 32 64
+*/
- if (buf_len == 0)
- return 0;
+/* LEVEL: (total bits, prefix bits, prefix value),
+ * sorted ascending by number of total bits.
+ * The rest of the code table is calculated at compiletime from this. */
- switch(*buf) {
- case 0xff: bytes = 8; break;
- case 0xfe: bytes = 7; break;
- case 0xfd: bytes = 6; break;
- case 0xfc: bytes = 5; break;
- case 0xfb: bytes = 4; break;
- case 0xfa: bytes = 3; break;
- case 0xf9: bytes = 2; break;
- default:
- *out = *buf;
- return 1;
- }
+/* fibonacci data 1, 1, ... */
+#define VLI_L_1_1() do { \
+ LEVEL( 2, 1, 0x00); \
+ LEVEL( 3, 2, 0x01); \
+ LEVEL( 5, 3, 0x03); \
+ LEVEL( 7, 4, 0x07); \
+ LEVEL(10, 5, 0x0f); \
+ LEVEL(14, 6, 0x1f); \
+ LEVEL(21, 8, 0x3f); \
+ LEVEL(29, 8, 0x7f); \
+ LEVEL(42, 8, 0xbf); \
+ LEVEL(64, 8, 0xff); \
+ } while (0)
- if (buf_len <= bytes)
- return 0;
+/* finds a suitable level to decode the least significant part of in.
+ * returns number of bits consumed.
+ *
+ * BUG() for bad input, as that would mean a buggy code table. */
+static inline int vli_decode_bits(u64 *out, const u64 in)
+{
+ u64 adj = 1;
+
+#define LEVEL(t,b,v) \
+ do { \
+ if ((in & ((1 << b) -1)) == v) { \
+ *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
+ return t; \
+ } \
+ adj += 1ULL << (t - b); \
+ } while (0)
+
+ VLI_L_1_1();
- /* no pointer cast assignment, there may be funny alignment
- * requirements on certain architectures */
- memcpy(&tmp, buf+1, bytes);
- *out = le64_to_cpu(tmp);
- return bytes+1;
+ /* NOT REACHED, if VLI_LEVELS code table is defined properly */
+ BUG();
+#undef LEVEL
}
-/*
- * similarly, encode n into buf.
- * returns consumed bytes,
- * or zero if not enough room left in buffer
- * (in which case the buf is left unchanged).
- *
- * encoding is little endian, first byte codes how much bytes follow.
- * first byte <= 0xf8 means just this byte, value = code byte.
- * first byte == 0xf9 .. 0xff: (code byte - 0xf7) data bytes follow.
- */
-static inline int vli_encode_bytes(unsigned char *buf, u64 n, unsigned buf_len)
+/* return number of code bits needed,
+ * or negative error number */
+static inline int __vli_encode_bits(u64 *out, const u64 in)
{
- unsigned bytes; /* _extra_ bytes after code byte */
-
- if (buf_len == 0)
- return 0;
+ u64 max = 0;
+ u64 adj = 1;
- if (n <= 0xf8) {
- *buf = (unsigned char)n;
- return 1;
- }
+ if (in == 0)
+ return -EINVAL;
- bytes = (n < (1ULL << 32))
- ? (n < (1ULL << 16)) ? 2
- : (n < (1ULL << 24)) ? 3 : 4
- : (n < (1ULL << 48)) ?
- (n < (1ULL << 40)) ? 5 : 6
- : (n < (1ULL << 56)) ? 7 : 8;
+#define LEVEL(t,b,v) do { \
+ max += 1ULL << (t - b); \
+ if (in <= max) { \
+ if (out) \
+ *out = ((in - adj) << b) | v; \
+ return t; \
+ } \
+ adj = max + 1; \
+ } while (0)
- if (buf_len <= bytes)
- return 0;
+ VLI_L_1_1();
- /* no pointer cast assignment, there may be funny alignment
- * requirements on certain architectures */
- *buf++ = 0xf7 + bytes; /* code, 0xf9 .. 0xff */
- n = cpu_to_le64(n);
- memcpy(buf, &n, bytes); /* plain */
- return bytes+1;
+ return -EOVERFLOW;
+#undef LEVEL
}
-/* ================================================================== */
+#undef VLI_L_1_1
-/* And here the more involved variants of VLI.
- *
+/* code from here down is independend of actually used bit code */
+
+/*
* Code length is determined by some unique (e.g. unary) prefix.
* This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
* not a byte stream.
@@ -287,164 +329,6 @@ static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
return bits;
}
-/* we still need to actually define the code. */
-
-/*
- * encoding is "visualised" as
- * __little endian__ bitstream, least significant bit first (left most)
- *
- * this particular encoding is chosen so that the prefix code
- * starts as unary encoding the level, then modified so that
- * 11 levels can be described in 8bit, with minimal overhead
- * for the smaller levels.
- *
- * Number of data bits follow fibonacci sequence, with the exception of the
- * last level (+1 data bit, so it makes 64bit total). The only worse code when
- * encoding bit polarity runlength is 2 plain bits => 3 code bits.
-prefix data bits max val Nº data bits
-0 0x1 0
-10 x 0x3 1
-110 x 0x5 1
-1110 xx 0x9 2
-11110 xxx 0x11 3
-1111100 x xxxx 0x31 5
-1111101 x xxxxxxx 0x131 8
-11111100 xxxxxxxx xxxxx 0x2131 13
-11111110 xxxxxxxx xxxxxxxx xxxxx 0x202131 21
-11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202131 34
-11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
- * maximum encodable value: 0x100000400202131 == 2**56 + some */
-
-/* LEVEL: (total bits, prefix bits, prefix value),
- * sorted ascending by number of total bits.
- * The rest of the code table is calculated at compiletime from this. */
-
-/* fibonacci data 0, 1, ... */
-#define VLI_L_0_1() do { \
- LEVEL( 1, 1, 0x00); \
- LEVEL( 3, 2, 0x01); \
- LEVEL( 4, 3, 0x03); \
- LEVEL( 6, 4, 0x07); \
- LEVEL( 8, 5, 0x0f); \
- LEVEL(12, 7, 0x1f); \
- LEVEL(15, 7, 0x5f); \
- LEVEL(21, 8, 0x3f); \
- LEVEL(29, 8, 0x7f); \
- LEVEL(42, 8, 0xbf); \
- LEVEL(64, 8, 0xff); \
- } while (0)
-
-/* Some variants, differeing in number of levels, prefix value, and number of
- * databits in each level. I tried a lot of variants. Those where the number
- * of data bits follows the fibonacci sequence (with a certain offset) simply
- * "look best" ;-)
- * All of these can encode at least "2 ** 56". */
-
-/* fibonacci data 1, 1, ... */
-#define VLI_L_1_1() do { \
- LEVEL( 2, 1, 0x00); \
- LEVEL( 3, 2, 0x01); \
- LEVEL( 5, 3, 0x03); \
- LEVEL( 7, 4, 0x07); \
- LEVEL(10, 5, 0x0f); \
- LEVEL(14, 6, 0x1f); \
- LEVEL(21, 8, 0x3f); \
- LEVEL(29, 8, 0x7f); \
- LEVEL(42, 8, 0xbf); \
- LEVEL(64, 8, 0xff); \
- } while (0)
-
-/* fibonacci data 1, 2, ... */
-#define VLI_L_1_2() do { \
- LEVEL( 2, 1, 0x00); \
- LEVEL( 4, 2, 0x01); \
- LEVEL( 6, 3, 0x03); \
- LEVEL( 9, 4, 0x07); \
- LEVEL(13, 5, 0x0f); \
- LEVEL(19, 6, 0x1f); \
- LEVEL(28, 7, 0x3f); \
- LEVEL(42, 8, 0x7f); \
- LEVEL(64, 8, 0xff); \
- } while (0)
-
-/* fibonacci data 2, 3, ... */
-#define VLI_L_2_3() do { \
- LEVEL( 3, 1, 0x00); \
- LEVEL( 5, 2, 0x01); \
- LEVEL( 8, 3, 0x03); \
- LEVEL(12, 4, 0x07); \
- LEVEL(18, 5, 0x0f); \
- LEVEL(27, 6, 0x1f); \
- LEVEL(41, 7, 0x3f); \
- LEVEL(64, 7, 0x5f); \
- } while (0)
-
-/* fibonacci data 3, 5, ... */
-#define VLI_L_3_5() do { \
- LEVEL( 4, 1, 0x00); \
- LEVEL( 7, 2, 0x01); \
- LEVEL(11, 3, 0x03); \
- LEVEL(17, 4, 0x07); \
- LEVEL(26, 5, 0x0f); \
- LEVEL(40, 6, 0x1f); \
- LEVEL(64, 6, 0x3f); \
- } while (0)
-
-/* CONFIG */
-#ifndef VLI_LEVELS
-#define VLI_LEVELS() VLI_L_3_5()
-#endif
-
-/* finds a suitable level to decode the least significant part of in.
- * returns number of bits consumed.
- *
- * BUG() for bad input, as that would mean a buggy code table. */
-static inline int vli_decode_bits(u64 *out, const u64 in)
-{
- u64 adj = 1;
-
-#define LEVEL(t,b,v) \
- do { \
- if ((in & ((1 << b) -1)) == v) { \
- *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
- return t; \
- } \
- adj += 1ULL << (t - b); \
- } while (0)
-
- VLI_LEVELS();
-
- /* NOT REACHED, if VLI_LEVELS code table is defined properly */
- BUG();
-#undef LEVEL
-}
-
-/* return number of code bits needed,
- * or negative error number */
-static inline int __vli_encode_bits(u64 *out, const u64 in)
-{
- u64 max = 0;
- u64 adj = 1;
-
- if (in == 0)
- return -EINVAL;
-
-#define LEVEL(t,b,v) do { \
- max += 1ULL << (t - b); \
- if (in <= max) { \
- if (out) \
- *out = ((in - adj) << b) | v; \
- return t; \
- } \
- adj = max + 1; \
- } while (0)
-
- VLI_LEVELS();
-
- return -EOVERFLOW;
-#undef LEVEL
-}
-
/* encodes @in as vli into @bs;
* return values
@@ -464,11 +348,4 @@ static inline int vli_encode_bits(struct bitstream *bs, u64 in)
return bitstream_put_bits(bs, code, bits);
}
-#undef VLI_L_0_1
-#undef VLI_L_1_1
-#undef VLI_L_1_2
-#undef VLI_L_2_3
-#undef VLI_L_3_5
-
-#undef VLI_LEVELS
#endif
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a39ba573743e..4be8814fd50d 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1428,6 +1428,8 @@ int drbd_worker(struct drbd_thread *thi)
NS(conn, C_NETWORK_FAILURE));
}
}
+ D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags));
+ D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags));
spin_lock_irq(&mdev->data.work.q_lock);
i = 0;
@@ -1460,5 +1462,9 @@ int drbd_worker(struct drbd_thread *thi)
dev_info(DEV, "worker terminated\n");
+ clear_bit(DEVICE_DYING, &mdev->flags);
+ clear_bit(CONFIG_PENDING, &mdev->flags);
+ wake_up(&mdev->state_wait);
+
return 0;
}