diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2009-04-29 17:36:31 +0200 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2009-07-29 10:25:54 +0200 |
commit | c733785235d1cf1e37e70446182a21c06e69d519 (patch) | |
tree | 5c244ed206531f28dc42668b9e432c9d90c040b9 /drivers/block | |
parent | 1ff8d0aa6fb4011b8f49dcb79919946c1bd32196 (diff) |
Tracking DRBD mainline
2fcb72c00e0fb35120665943cbe7003739bd1009 get rid of unneccessary fsync_bdev
2721065e39000629e87296676fd077bb0970417b Merge branch 'drbd-8.2' into drbd-8.3
93edff9e37a122b5f2d23ed2baef0dfeef1c5a17 Merge branch 'drbd-8.0' into drbd-8.2
0fb44e5ef678346808a9ab06a259112af7814fc8 dead code removal
13bb2bb8dd3fddbae88a1fcba757b567208e3ae1 fix config/deconfig race
4c0505a621e8c06eeac9a7d831621ea949770fe2 need to hold req_lock for_drbd_set_state (recently introduced bogon)
590cb50f41337c39b7d8095991fa1d144dbc3fd7 remove unnecessary variable, fix comment formatting
73abcf4b8f53f26fa4a6d524916fba142342df33 make bm_lock message moreinformative; hunting configuration race condition
0efb38921eff65f7f05aa2e028833aea5ed5b8f4 adjust loglevel: some dev_dbg should rather be warn, or info at least
50ddc402f72ab5b4486e0efa912a16b6f3d3012a bitmap compression stats: output percentage saved, not 'factor'
1737021364ea6da4d08bd08e99d1083587d271d4 compressed bitmap: finally settle for _one_ encoding
c63bd973e2d52baeceb425592af371bf8f4fe2cd introduce disable_sendpage module parameter
a87458a4243968229b9db85e680d464aae3c9bd4 fix double send_cnt accounting of _drbd_no_send_page fallbacks
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_buildtag.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 18 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 171 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 156 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 100 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_vli.h | 369 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 6 |
8 files changed, 295 insertions, 533 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e602c778e712..0614f2c96f15 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -115,10 +115,10 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) trylock_failed = down_trylock(&b->bm_change); if (trylock_failed) { - dev_dbg(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", + dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", current == mdev->receiver.task ? "receiver" : current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : "?", + current == mdev->worker.task ? "worker" : current->comm, why, b->bm_why ?: "?", b->bm_task == mdev->receiver.task ? "receiver" : b->bm_task == mdev->asender.task ? "asender" : diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index 617078b3dc33..a58ad76078a5 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include <linux/drbd_config.h> const char *drbd_buildtag(void) { - return "GIT-hash: 1a59b007715215697968cfaed3f2f159d262c030 drbd/drbd_nl.c" - " build by phil@fat-tyre, 2009-04-22 11:36:29"; + return "GIT-hash: 29ef4c01e46b0a269d7bec39d5178be06097fead drbd/Kconfig drbd/Makefile drbd/Makefile-2.6 drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_nl.c drbd/drbd_proc.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_req.h drbd/drbd_tracing.c drbd/drbd_tracing.h drbd/drbd_worker.c drbd/drbd_wrappers.h drbd/linux/drbd_config.h" + " build by phil@fat-tyre, 2009-04-29 15:43:41"; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 94138cc08943..ba43fa57b750 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -58,6 +58,7 @@ /* module parameter, defined in drbd_main.c */ extern unsigned int minor_count; +extern int disable_sendpage; extern int allow_oos; extern unsigned int cn_idx; @@ -525,12 +526,10 @@ struct p_discard { /* Valid values for the encoding field. * Bump proto version when changing this. */ enum drbd_bitmap_code { - RLE_VLI_Bytes = 0, - RLE_VLI_BitsFibD_0_1 = 1, - RLE_VLI_BitsFibD_1_1 = 2, - RLE_VLI_BitsFibD_1_2 = 3, - RLE_VLI_BitsFibD_2_3 = 4, - RLE_VLI_BitsFibD_3_5 = 5, + /* RLE_VLI_Bytes = 0, + * and other bit variants had been defined during + * algorithm evaluation. */ + RLE_VLI_Bits = 2, }; struct p_compressed_bm { @@ -777,6 +776,13 @@ enum { BITMAP_IO_QUEUED, /* Started bitmap IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ NET_CONGESTED, /* The data socket is congested */ + + CONFIG_PENDING, /* serialization of (re)configuration requests. + * if set, also prevents the device from dying */ + DEVICE_DYING, /* device became unconfigured, + * but worker thread is still handling the cleanup. + * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, + * while this is set. */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4c84365aeeef..79cb0183f817 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -92,6 +92,7 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!"); /* thanks to these macros, if compiled into the kernel (not-module), * this becomes the boot parameter drbd.minor_count */ module_param(minor_count, uint, 0444); +module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); module_param(cn_idx, uint, 0444); @@ -112,6 +113,7 @@ module_param(fault_devs, int, 0644); /* module parameter, defined */ unsigned int minor_count = 32; +int disable_sendpage; int allow_oos; unsigned int cn_idx = CN_IDX_DRBD; @@ -931,6 +933,20 @@ int __drbd_set_state(struct drbd_conf *mdev, dev_info(DEV, "%s\n", pb); } + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY && + !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) + set_bit(DEVICE_DYING, &mdev->flags); + mdev->state.i = ns.i; wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); @@ -1192,9 +1208,9 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->resync = NULL; lc_free(mdev->act_log); mdev->act_log = NULL; - __no_warn(local, drbd_free_bc(mdev->bc);); - wmb(); /* see begin of drbd_nl_disk_conf() */ - __no_warn(local, mdev->bc = NULL;); + __no_warn(local, + drbd_free_bc(mdev->bc); + mdev->bc = NULL;); if (mdev->md_io_tmpp) __free_page(mdev->md_io_tmpp); @@ -1219,10 +1235,14 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ - if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { if (os.aftr_isp != ns.aftr_isp) resume_next_sg(mdev); - drbd_thread_stop_nowait(&mdev->worker); + /* set in __drbd_set_state, unless CONFIG_PENDING was set */ + if (test_bit(DEVICE_DYING, &mdev->flags)) + drbd_thread_stop_nowait(&mdev->worker); } drbd_md_sync(mdev); @@ -1327,6 +1347,7 @@ int drbd_thread_start(struct drbd_thread *thi) thi->t_state = Restarting; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", me, current->comm, current->pid); + /* fall through */ case Running: case Restarting: default: @@ -1704,102 +1725,6 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) (struct p_header *)&p, sizeof(p)); } -/* returns - * positive: number of payload bytes needed in this packet. - * zero: incompressible. */ -int fill_bitmap_rle_bytes(struct drbd_conf *mdev, - struct p_compressed_bm *p, - struct bm_xfer_ctx *c) -{ - unsigned long plain_bits; - unsigned long tmp; - unsigned long rl; - void *buffer; - unsigned n; - unsigned len; - unsigned toggle; - - /* may we use this feature? */ - if ((mdev->sync_conf.use_rle_encoding == 0) || - (mdev->agreed_pro_version < 90)) - return 0; - - if (c->bit_offset >= c->bm_bits) - return 0; /* nothing to do. */ - - /* use at most thus many bytes */ - len = BM_PACKET_VLI_BYTES_MAX; - buffer = p->code; - /* plain bits covered in this code string */ - plain_bits = 0; - - /* p->encoding & 0x80 stores whether the first - * run length is set. - * bit offset is implicit. - * start with toggle == 2 to be able to tell the first iteration */ - toggle = 2; - - /* see how much plain bits we can stuff into one packet - * using RLE and VLI. */ - do { - tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) - : _drbd_bm_find_next(mdev, c->bit_offset); - if (tmp == -1UL) - tmp = c->bm_bits; - rl = tmp - c->bit_offset; - - if (toggle == 2) { /* first iteration */ - if (rl == 0) { - /* the first checked bit was set, - * store start value, */ - DCBP_set_start(p, 1); - /* but skip encoding of zero run length */ - toggle = !toggle; - continue; - } - DCBP_set_start(p, 0); - } - - /* paranoia: catch zero runlength. - * can only happen if bitmap is modified while we scan it. */ - if (rl == 0) { - dev_err(DEV, "unexpected zero runlength while encoding bitmap " - "t:%u bo:%lu\n", toggle, c->bit_offset); - return -1; - } - - n = vli_encode_bytes(buffer, rl, len); - if (n == 0) /* buffer full */ - break; - - toggle = !toggle; - buffer += n; - len -= n; - plain_bits += rl; - c->bit_offset = tmp; - } while (len && c->bit_offset < c->bm_bits); - - len = BM_PACKET_VLI_BYTES_MAX - len; - - if (plain_bits < (len << 3)) { - /* incompressible with this method. - * we need to rewind both word and bit position. */ - c->bit_offset -= plain_bits; - bm_xfer_ctx_bit_to_word_offset(c); - c->bit_offset = c->word_offset * BITS_PER_LONG; - return 0; - } - - /* RLE + VLI was able to compress it just fine. - * update c->word_offset. */ - bm_xfer_ctx_bit_to_word_offset(c); - - /* store pad_bits */ - DCBP_set_pad_bits(p, 0); - - return len; -} - int fill_bitmap_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) @@ -1826,8 +1751,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* plain bits covered in this code string */ plain_bits = 0; - /* p->encoding & 0x80 stores whether the first - * run length is set. + /* p->encoding & 0x80 stores whether the first run length is set. * bit offset is implicit. * start with toggle == 2 to be able to tell the first iteration */ toggle = 2; @@ -1904,15 +1828,13 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int len; int ok; - if (0) - len = fill_bitmap_rle_bytes(mdev, p, c); - else - len = fill_bitmap_rle_bits(mdev, p, c); + len = fill_bitmap_rle_bits(mdev, p, c); if (len < 0) return FAILED; + if (len) { - DCBP_set_code(p, 0 ? RLE_VLI_Bytes : RLE_VLI_BitsFibD_3_5); + DCBP_set_code(p, RLE_VLI_Bits); ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); @@ -2191,7 +2113,7 @@ STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, kunmap(page); if (sent == size) mdev->send_cnt += size>>9; - return sent; + return sent == size; } int _drbd_send_page(struct drbd_conf *mdev, struct page *page, @@ -2201,21 +2123,14 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, int sent, ok; int len = size; - /* PARANOIA. if this ever triggers, - * something in the layers above us is really kaputt. - *one roundtrip later: - * doh. it triggered. so XFS _IS_ really kaputt ... - * oh well... - */ - if ((page_count(page) < 1) || PageSlab(page)) { - /* e.g. XFS meta- & log-data is in slab pages, which have a - * page_count of 0 and/or have PageSlab() set... - */ - sent = _drbd_no_send_page(mdev, page, offset, size); - if (likely(sent > 0)) - len -= sent; - goto out; - } + /* e.g. XFS meta- & log-data is in slab pages, which have a + * page_count of 0 and/or have PageSlab() set. + * we cannot use send_page for those, as that does get_page(); + * put_page(); and would cause either a VM_BUG directly, or + * __page_cache_release a page that would actually still be referenced + * by someone, leading to some obscure delayed Oops somewhere else. */ + if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) + return _drbd_no_send_page(mdev, page, offset, size); drbd_update_congested(mdev); set_fs(KERNEL_DS); @@ -2241,7 +2156,6 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, set_fs(oldfs); clear_bit(NET_CONGESTED, &mdev->flags); -out: ok = (len == 0); if (likely(ok)) mdev->send_cnt += size>>9; @@ -2643,8 +2557,11 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) D_ASSERT(mdev->net_conf == NULL); drbd_set_my_capacity(mdev, 0); - drbd_bm_resize(mdev, 0); - drbd_bm_cleanup(mdev); + if (mdev->bitmap) { + /* maybe never allocated. */ + drbd_bm_resize(mdev, 0); + drbd_bm_cleanup(mdev); + } drbd_free_resources(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3b46a934c2d6..c72c5adbbfd6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <linux/in.h> #include <linux/fs.h> -#include <linux/buffer_head.h> /* for fsync_bdev */ #include <linux/file.h> #include <linux/slab.h> #include <linux/connector.h> @@ -353,8 +352,6 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (forced) dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); - fsync_bdev(mdev->this_bdev); - /* Wait until nothing is on the fly :) */ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); @@ -493,18 +490,15 @@ char *ppsize(char *buf, unsigned long long size) * waits for ap_bio_cnt == 0. -> deadlock. * but this cannot happen, actually, because: * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable - * (not connected, * or bad/no disk on peer): + * (not connected, or bad/no disk on peer): * see drbd_fail_request_early, ap_bio_cnt is zero. * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: * peer may not initiate a resize. */ void drbd_suspend_io(struct drbd_conf *mdev) { - int in_flight; set_bit(SUSPEND_IO, &mdev->flags); - in_flight = atomic_read(&mdev->ap_bio_cnt); - if (in_flight) - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); } void drbd_resume_io(struct drbd_conf *mdev) @@ -761,6 +755,36 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu } } +/* serialize deconfig (worker exiting, doing cleanup) + * and reconfig (drbdsetup disk, drbdsetup net) + * + * wait for a potentially exiting worker, then restart it, + * or start a new one. + */ +static void drbd_reconfig_start(struct drbd_conf *mdev) +{ + wait_event(mdev->state_wait, test_and_set_bit(CONFIG_PENDING, &mdev->flags)); + wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); + drbd_thread_start(&mdev->worker); +} + +/* if still unconfigured, stops worker again. + * if configured now, clears CONFIG_PENDING. + * wakes potential waiters */ +static void drbd_reconfig_done(struct drbd_conf *mdev) +{ + spin_lock_irq(&mdev->req_lock); + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + set_bit(DEVICE_DYING, &mdev->flags); + drbd_thread_stop_nowait(&mdev->worker); + } else + clear_bit(CONFIG_PENDING, &mdev->flags); + spin_unlock_irq(&mdev->req_lock); + wake_up(&mdev->state_wait); +} + /* does always return 0; * interesting return code is in reply->ret_code */ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, @@ -774,33 +798,18 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct inode *inode, *inode2; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; - int rv, ntries = 0; + int rv; int cp_discovered = 0; int hardsect; + drbd_reconfig_start(mdev); + /* if you want to reconfigure, please tear down first */ if (mdev->state.disk > D_DISKLESS) { retcode = ERR_DISK_CONFIGURED; goto fail; } - /* - * We may have gotten here very quickly from a detach. Wait for a bit - * then fail. - */ - while (1) { - __no_warn(local, nbc = mdev->bc;); - if (nbc == NULL) - break; - if (ntries++ >= 5) { - dev_warn(DEV, "drbd_nl_disk_conf: mdev->bc not NULL.\n"); - retcode = ERR_DISK_CONFIGURED; - goto fail; - } - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ/10); - } - nbc = kmalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; @@ -808,17 +817,11 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } memset(&nbc->md, 0, sizeof(struct drbd_md)); - - if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_local(mdev)) { - memcpy(&nbc->dc, &mdev->bc->dc, sizeof(struct disk_conf)); - dec_local(mdev); - } else { - memset(&nbc->dc, 0, sizeof(struct disk_conf)); - nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; - nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; - nbc->dc.fencing = DRBD_FENCING_DEF; - nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - } + memset(&nbc->dc, 0, sizeof(struct disk_conf)); + nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; + nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; + nbc->dc.fencing = DRBD_FENCING_DEF; + nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { retcode = ERR_MANDATORY_TAG; @@ -882,13 +885,6 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto release_bdev_fail; } - if (!mdev->bitmap) { - if (drbd_bm_init(mdev)) { - retcode = ERR_NOMEM; - goto release_bdev_fail; - } - } - nbc->md_bdev = inode2->i_bdev; if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || @@ -949,7 +945,9 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->known_size = drbd_get_capacity(nbc->backing_bdev); drbd_suspend_io(mdev); + /* also wait for the last barrier ack. */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt)); + retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); drbd_resume_io(mdev); if (retcode < SS_SUCCESS) @@ -958,9 +956,15 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (!inc_local_if_state(mdev, D_ATTACHING)) goto force_diskless; - drbd_thread_start(&mdev->worker); drbd_md_set_sector_offsets(mdev, nbc); + if (!mdev->bitmap) { + if (drbd_bm_init(mdev)) { + retcode = ERR_NOMEM; + goto force_diskless_dec; + } + } + retcode = drbd_md_read(mdev, nbc); if (retcode != NO_ERROR) goto force_diskless_dec; @@ -1150,6 +1154,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); dec_local(mdev); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; force_diskless_dec: @@ -1175,18 +1180,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp lc_free(resync_lru); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; } STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - fsync_bdev(mdev->this_bdev); reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ/20); /* 50ms; Time for worker to finally terminate */ - return 0; } @@ -1208,6 +1209,8 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, void *int_dig_vv = NULL; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; + drbd_reconfig_start(mdev); + if (mdev->state.conn > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; goto fail; @@ -1219,28 +1222,23 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_net(mdev)) { - memcpy(new_conf, mdev->net_conf, sizeof(struct net_conf)); - dec_net(mdev); - } else { - memset(new_conf, 0, sizeof(struct net_conf)); - new_conf->timeout = DRBD_TIMEOUT_DEF; - new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; - new_conf->ping_int = DRBD_PING_INT_DEF; - new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; - new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - new_conf->ko_count = DRBD_KO_COUNT_DEF; - new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - new_conf->want_lose = 0; - new_conf->two_primaries = 0; - new_conf->wire_protocol = DRBD_PROT_C; - new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; - new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; - } + memset(new_conf, 0, sizeof(struct net_conf)); + new_conf->timeout = DRBD_TIMEOUT_DEF; + new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; + new_conf->ping_int = DRBD_PING_INT_DEF; + new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; + new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; + new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; + new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; + new_conf->ko_count = DRBD_KO_COUNT_DEF; + new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; + new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; + new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; + new_conf->want_lose = 0; + new_conf->two_primaries = 0; + new_conf->wire_protocol = DRBD_PROT_C; + new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; + new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { retcode = ERR_MANDATORY_TAG; @@ -1367,7 +1365,12 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - D_ASSERT(mdev->net_conf == NULL); + spin_lock_irq(&mdev->req_lock); + if (mdev->net_conf != NULL) { + retcode = ERR_NET_CONFIGURED; + spin_unlock_irq(&mdev->req_lock); + goto fail; + } mdev->net_conf = new_conf; mdev->send_cnt = 0; @@ -1400,13 +1403,13 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->int_dig_out=int_dig_out; mdev->int_dig_in=int_dig_in; mdev->int_dig_vv=int_dig_vv; + spin_unlock_irq(&mdev->req_lock); retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE); - if (retcode >= SS_SUCCESS) - drbd_thread_start(&mdev->worker); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; fail: @@ -1421,6 +1424,7 @@ fail: kfree(new_conf); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; } @@ -1938,8 +1942,10 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl if (skip_initial_sync) { drbd_send_uuids_skip_initial_sync(mdev); _drbd_uuid_set(mdev, UI_BITMAP, 0); + spin_lock_irq(&mdev->req_lock); _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->req_lock); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 26ac8fd0e1f4..078ce728a2fc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3225,105 +3225,55 @@ recv_bm_rle_bits(struct drbd_conf *mdev, return (s == c->bm_bits) ? DONE : OK; } - -static enum receive_bitmap_ret -recv_bm_rle_bytes(struct drbd_conf *mdev, - struct p_compressed_bm *p, - struct bm_xfer_ctx *c) -{ - u64 rl; - unsigned char *buf = p->code; - unsigned long s; - unsigned long e; - int len = p->head.length - (p->code - p->head.payload); - int toggle; - int n; - - s = c->bit_offset; - - /* decoding. the payload of bitmap rle packets is VLI encoded - * runlength of set and unset bits, starting with set/unset as defined - * in p->encoding & 0x80. */ - for (toggle = DCBP_get_start(p); len; s += rl, toggle = !toggle) { - if (s >= c->bm_bits) { - dev_err(DEV, "bitmap overflow (s:%lu) while decoding bitmap RLE packet\n", s); - return FAILED; - } - - n = vli_decode_bytes(&rl, buf, len); - if (n == 0) /* incomplete buffer! */ - return FAILED; - buf += n; - len -= n; - - if (rl == 0) { - dev_err(DEV, "unexpected zero runlength while decoding bitmap RLE packet\n"); - return FAILED; - } - - /* unset bits: ignore, because of x | 0 == x. */ - if (!toggle) - continue; - - /* set bits: merge into bitmap. */ - e = s + rl -1; - if (e >= c->bm_bits) { - dev_err(DEV, "bitmap overflow (e:%lu) while decoding bitmap RLE packet\n", e); - return FAILED; - } - _drbd_bm_set_bits(mdev, s, e); - } - - c->bit_offset = s; - bm_xfer_ctx_bit_to_word_offset(c); - - return (s == c->bm_bits) ? DONE : OK; -} - static enum receive_bitmap_ret decode_bitmap_c(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) { - switch (DCBP_get_code(p)) { - /* no default! I want the compiler to warn me! */ - case RLE_VLI_BitsFibD_0_1: - case RLE_VLI_BitsFibD_1_1: - case RLE_VLI_BitsFibD_1_2: - case RLE_VLI_BitsFibD_2_3: - break; /* TODO */ - case RLE_VLI_BitsFibD_3_5: + if (DCBP_get_code(p) == RLE_VLI_Bits) return recv_bm_rle_bits(mdev, p, c); - case RLE_VLI_Bytes: - return recv_bm_rle_bytes(mdev, p, c); - } + + /* other variants had been implemented for evaluation, + * but have been dropped as this one turned out to be "best" + * during all our tests. */ + dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); return FAILED; } void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { - unsigned plain_would_take = sizeof(struct p_header) * + /* what would it take to transfer it "plaintext" */ + unsigned plain = sizeof(struct p_header) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; - unsigned q, r; + unsigned r; /* total can not be zero. but just in case: */ if (total == 0) return; - q = plain_would_take / total; - r = plain_would_take % total; - r = (r > UINT_MAX/100) ? (r / (total+99/100)) : (100 * r / total); + /* don't report if not compressed */ + if (total >= plain) + return; + + /* total < plain. check for overflow, still */ + r = (total > UINT_MAX/1000) ? (total / (plain/1000)) + : (1000 * total / plain); + + if (r > 1000) + r = 1000; + r = 1000 - r; dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " - "total %u; compression factor: %u.%02u\n", + "total %u; compression: %u.%u%%\n", direction, c->bytes[1], c->packets[1], c->bytes[0], c->packets[0], - total, q, r); + total, r/10, r % 10); } /* Since we are processing the bitfield from lower addresses to higher, @@ -3689,9 +3639,9 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) * we already released the socket!? */ i = atomic_read(&mdev->pp_in_use); if (i) - dev_dbg(DEV, "pp_in_use = %u, expected 0\n", i); + dev_info(DEV, "pp_in_use = %u, expected 0\n", i); if (!list_empty(&mdev->net_ee)) - dev_dbg(DEV, "net_ee not empty!\n"); + dev_info(DEV, "net_ee not empty!\n"); D_ASSERT(list_empty(&mdev->read_ee)); D_ASSERT(list_empty(&mdev->active_ee)); diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h index 8f5e8186852d..fc824006e721 100644 --- a/drivers/block/drbd/drbd_vli.h +++ b/drivers/block/drbd/drbd_vli.h @@ -37,15 +37,15 @@ * * We never actually need to encode a "zero" (runlengths are positive). * But then we have to store the value of the first bit. - * So we can as well have the "zero" be a valid runlength, - * and start encoding/decoding by "number of _set_ bits" by convention. + * The first bit of information thus shall encode if the first runlength + * gives the number of set or unset bits. * * We assume that large areas are either completely set or unset, * which gives good compression with any runlength method, * even when encoding the runlength as fixed size 32bit/64bit integers. * * Still, there may be areas where the polarity flips every few bits, - * and encoding the runlength sequence of those ares with fix size + * and encoding the runlength sequence of those areas with fix size * integers would be much worse than plaintext. * * We want to encode small runlength values with minimum code length, @@ -53,105 +53,147 @@ * * Thus we need a Variable Length Integer encoding, VLI. * - * For runlength < 8, we produce more code bits than plaintext input. - * we need to send incompressible chunks as plaintext, skip over them + * For some cases, we produce more code bits than plaintext input. + * We need to send incompressible chunks as plaintext, skip over them * and then see if the next chunk compresses better. * - * We don't care too much about "excellent" compression ratio - * for large runlengths, 249 bit/24 bit still gives a factor of > 10. + * We don't care too much about "excellent" compression ratio for large + * runlengths (all set/all clear): whether we achieve a factor of 100 + * or 1000 is not that much of an issue. + * We do not want to waste too much on short runlengths in the "noisy" + * parts of the bitmap, though. * - * We care for cpu time needed to actually encode/decode - * into the transmitted byte stream. + * There are endless variants of VLI, we experimented with: + * * simple byte-based + * * various bit based with different code word length. * - * There are endless variants of VLI. - * For this special purpose, we just need something that is "good enough", - * and easy to understand and code, fast to encode and decode, - * and does not consume memory. + * To avoid yet an other configuration parameter (choice of bitmap compression + * algorithm) which was difficult to explain and tune, we just chose the one + * variant that turned out best in all test cases. + * Based on real world usage patterns, with device sizes ranging from a few GiB + * to several TiB, file server/mailserver/webserver/mysql/postgress, + * mostly idle to really busy, the all time winner (though sometimes only + * marginally better) is: */ /* - * buf points to the current position in the tranfered byte stream. - * stream is by definition little endian. - * *buf_len gives the remaining number of bytes at that position. - * *out will receive the decoded value. - * returns number of bytes consumed, - * or 0 if not enough bytes left in buffer (which would be invalid input). - */ -static inline int vli_decode_bytes(u64 *out, unsigned char *buf, unsigned buf_len) -{ - u64 tmp = 0; - unsigned bytes; /* extra bytes after code byte */ + * encoding is "visualised" as + * __little endian__ bitstream, least significant bit first (left most) + * + * this particular encoding is chosen so that the prefix code + * starts as unary encoding the level, then modified so that + * 10 levels can be described in 8bit, with minimal overhead + * for the smaller levels. + * + * Number of data bits follow fibonacci sequence, with the exception of the + * last level (+1 data bit, so it makes 64bit total). The only worse code when + * encoding bit polarity runlength is 1 plain bits => 2 code bits. +prefix data bits max val Nº data bits +0 x 0x2 1 +10 x 0x4 1 +110 xx 0x8 2 +1110 xxx 0x10 3 +11110 xxx xx 0x30 5 +111110 xx xxxxxx 0x130 8 +11111100 xxxxxxxx xxxxx 0x2130 13 +11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21 +11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34 +11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 + * maximum encodable value: 0x100000400202130 == 2**56 + some */ + +/* compression "table": + transmitted x 0.29 + as plaintext x ........................ + x ........................ + x ........................ + x 0.59 0.21........................ + x ........................................................ + x .. c ................................................... + x 0.44.. o ................................................... + x .......... d ................................................... + x .......... e ................................................... + X............. ................................................... + x.............. b ................................................... +2.0x............... i ................................................... + #X................ t ................................................... + #................. s ........................... plain bits .......... +-+----------------------------------------------------------------------- + 1 16 32 64 +*/ - if (buf_len == 0) - return 0; +/* LEVEL: (total bits, prefix bits, prefix value), + * sorted ascending by number of total bits. + * The rest of the code table is calculated at compiletime from this. */ - switch(*buf) { - case 0xff: bytes = 8; break; - case 0xfe: bytes = 7; break; - case 0xfd: bytes = 6; break; - case 0xfc: bytes = 5; break; - case 0xfb: bytes = 4; break; - case 0xfa: bytes = 3; break; - case 0xf9: bytes = 2; break; - default: - *out = *buf; - return 1; - } +/* fibonacci data 1, 1, ... */ +#define VLI_L_1_1() do { \ + LEVEL( 2, 1, 0x00); \ + LEVEL( 3, 2, 0x01); \ + LEVEL( 5, 3, 0x03); \ + LEVEL( 7, 4, 0x07); \ + LEVEL(10, 5, 0x0f); \ + LEVEL(14, 6, 0x1f); \ + LEVEL(21, 8, 0x3f); \ + LEVEL(29, 8, 0x7f); \ + LEVEL(42, 8, 0xbf); \ + LEVEL(64, 8, 0xff); \ + } while (0) - if (buf_len <= bytes) - return 0; +/* finds a suitable level to decode the least significant part of in. + * returns number of bits consumed. + * + * BUG() for bad input, as that would mean a buggy code table. */ +static inline int vli_decode_bits(u64 *out, const u64 in) +{ + u64 adj = 1; + +#define LEVEL(t,b,v) \ + do { \ + if ((in & ((1 << b) -1)) == v) { \ + *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ + return t; \ + } \ + adj += 1ULL << (t - b); \ + } while (0) + + VLI_L_1_1(); - /* no pointer cast assignment, there may be funny alignment - * requirements on certain architectures */ - memcpy(&tmp, buf+1, bytes); - *out = le64_to_cpu(tmp); - return bytes+1; + /* NOT REACHED, if VLI_LEVELS code table is defined properly */ + BUG(); +#undef LEVEL } -/* - * similarly, encode n into buf. - * returns consumed bytes, - * or zero if not enough room left in buffer - * (in which case the buf is left unchanged). - * - * encoding is little endian, first byte codes how much bytes follow. - * first byte <= 0xf8 means just this byte, value = code byte. - * first byte == 0xf9 .. 0xff: (code byte - 0xf7) data bytes follow. - */ -static inline int vli_encode_bytes(unsigned char *buf, u64 n, unsigned buf_len) +/* return number of code bits needed, + * or negative error number */ +static inline int __vli_encode_bits(u64 *out, const u64 in) { - unsigned bytes; /* _extra_ bytes after code byte */ - - if (buf_len == 0) - return 0; + u64 max = 0; + u64 adj = 1; - if (n <= 0xf8) { - *buf = (unsigned char)n; - return 1; - } + if (in == 0) + return -EINVAL; - bytes = (n < (1ULL << 32)) - ? (n < (1ULL << 16)) ? 2 - : (n < (1ULL << 24)) ? 3 : 4 - : (n < (1ULL << 48)) ? - (n < (1ULL << 40)) ? 5 : 6 - : (n < (1ULL << 56)) ? 7 : 8; +#define LEVEL(t,b,v) do { \ + max += 1ULL << (t - b); \ + if (in <= max) { \ + if (out) \ + *out = ((in - adj) << b) | v; \ + return t; \ + } \ + adj = max + 1; \ + } while (0) - if (buf_len <= bytes) - return 0; + VLI_L_1_1(); - /* no pointer cast assignment, there may be funny alignment - * requirements on certain architectures */ - *buf++ = 0xf7 + bytes; /* code, 0xf9 .. 0xff */ - n = cpu_to_le64(n); - memcpy(buf, &n, bytes); /* plain */ - return bytes+1; + return -EOVERFLOW; +#undef LEVEL } -/* ================================================================== */ +#undef VLI_L_1_1 -/* And here the more involved variants of VLI. - * +/* code from here down is independend of actually used bit code */ + +/* * Code length is determined by some unique (e.g. unary) prefix. * This encodes arbitrary bit length, not whole bytes: we have a bit-stream, * not a byte stream. @@ -287,164 +329,6 @@ static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits) return bits; } -/* we still need to actually define the code. */ - -/* - * encoding is "visualised" as - * __little endian__ bitstream, least significant bit first (left most) - * - * this particular encoding is chosen so that the prefix code - * starts as unary encoding the level, then modified so that - * 11 levels can be described in 8bit, with minimal overhead - * for the smaller levels. - * - * Number of data bits follow fibonacci sequence, with the exception of the - * last level (+1 data bit, so it makes 64bit total). The only worse code when - * encoding bit polarity runlength is 2 plain bits => 3 code bits. -prefix data bits max val Nº data bits -0 0x1 0 -10 x 0x3 1 -110 x 0x5 1 -1110 xx 0x9 2 -11110 xxx 0x11 3 -1111100 x xxxx 0x31 5 -1111101 x xxxxxxx 0x131 8 -11111100 xxxxxxxx xxxxx 0x2131 13 -11111110 xxxxxxxx xxxxxxxx xxxxx 0x202131 21 -11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202131 34 -11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 - * maximum encodable value: 0x100000400202131 == 2**56 + some */ - -/* LEVEL: (total bits, prefix bits, prefix value), - * sorted ascending by number of total bits. - * The rest of the code table is calculated at compiletime from this. */ - -/* fibonacci data 0, 1, ... */ -#define VLI_L_0_1() do { \ - LEVEL( 1, 1, 0x00); \ - LEVEL( 3, 2, 0x01); \ - LEVEL( 4, 3, 0x03); \ - LEVEL( 6, 4, 0x07); \ - LEVEL( 8, 5, 0x0f); \ - LEVEL(12, 7, 0x1f); \ - LEVEL(15, 7, 0x5f); \ - LEVEL(21, 8, 0x3f); \ - LEVEL(29, 8, 0x7f); \ - LEVEL(42, 8, 0xbf); \ - LEVEL(64, 8, 0xff); \ - } while (0) - -/* Some variants, differeing in number of levels, prefix value, and number of - * databits in each level. I tried a lot of variants. Those where the number - * of data bits follows the fibonacci sequence (with a certain offset) simply - * "look best" ;-) - * All of these can encode at least "2 ** 56". */ - -/* fibonacci data 1, 1, ... */ -#define VLI_L_1_1() do { \ - LEVEL( 2, 1, 0x00); \ - LEVEL( 3, 2, 0x01); \ - LEVEL( 5, 3, 0x03); \ - LEVEL( 7, 4, 0x07); \ - LEVEL(10, 5, 0x0f); \ - LEVEL(14, 6, 0x1f); \ - LEVEL(21, 8, 0x3f); \ - LEVEL(29, 8, 0x7f); \ - LEVEL(42, 8, 0xbf); \ - LEVEL(64, 8, 0xff); \ - } while (0) - -/* fibonacci data 1, 2, ... */ -#define VLI_L_1_2() do { \ - LEVEL( 2, 1, 0x00); \ - LEVEL( 4, 2, 0x01); \ - LEVEL( 6, 3, 0x03); \ - LEVEL( 9, 4, 0x07); \ - LEVEL(13, 5, 0x0f); \ - LEVEL(19, 6, 0x1f); \ - LEVEL(28, 7, 0x3f); \ - LEVEL(42, 8, 0x7f); \ - LEVEL(64, 8, 0xff); \ - } while (0) - -/* fibonacci data 2, 3, ... */ -#define VLI_L_2_3() do { \ - LEVEL( 3, 1, 0x00); \ - LEVEL( 5, 2, 0x01); \ - LEVEL( 8, 3, 0x03); \ - LEVEL(12, 4, 0x07); \ - LEVEL(18, 5, 0x0f); \ - LEVEL(27, 6, 0x1f); \ - LEVEL(41, 7, 0x3f); \ - LEVEL(64, 7, 0x5f); \ - } while (0) - -/* fibonacci data 3, 5, ... */ -#define VLI_L_3_5() do { \ - LEVEL( 4, 1, 0x00); \ - LEVEL( 7, 2, 0x01); \ - LEVEL(11, 3, 0x03); \ - LEVEL(17, 4, 0x07); \ - LEVEL(26, 5, 0x0f); \ - LEVEL(40, 6, 0x1f); \ - LEVEL(64, 6, 0x3f); \ - } while (0) - -/* CONFIG */ -#ifndef VLI_LEVELS -#define VLI_LEVELS() VLI_L_3_5() -#endif - -/* finds a suitable level to decode the least significant part of in. - * returns number of bits consumed. - * - * BUG() for bad input, as that would mean a buggy code table. */ -static inline int vli_decode_bits(u64 *out, const u64 in) -{ - u64 adj = 1; - -#define LEVEL(t,b,v) \ - do { \ - if ((in & ((1 << b) -1)) == v) { \ - *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ - return t; \ - } \ - adj += 1ULL << (t - b); \ - } while (0) - - VLI_LEVELS(); - - /* NOT REACHED, if VLI_LEVELS code table is defined properly */ - BUG(); -#undef LEVEL -} - -/* return number of code bits needed, - * or negative error number */ -static inline int __vli_encode_bits(u64 *out, const u64 in) -{ - u64 max = 0; - u64 adj = 1; - - if (in == 0) - return -EINVAL; - -#define LEVEL(t,b,v) do { \ - max += 1ULL << (t - b); \ - if (in <= max) { \ - if (out) \ - *out = ((in - adj) << b) | v; \ - return t; \ - } \ - adj = max + 1; \ - } while (0) - - VLI_LEVELS(); - - return -EOVERFLOW; -#undef LEVEL -} - /* encodes @in as vli into @bs; * return values @@ -464,11 +348,4 @@ static inline int vli_encode_bits(struct bitstream *bs, u64 in) return bitstream_put_bits(bs, code, bits); } -#undef VLI_L_0_1 -#undef VLI_L_1_1 -#undef VLI_L_1_2 -#undef VLI_L_2_3 -#undef VLI_L_3_5 - -#undef VLI_LEVELS #endif diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a39ba573743e..4be8814fd50d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1428,6 +1428,8 @@ int drbd_worker(struct drbd_thread *thi) NS(conn, C_NETWORK_FAILURE)); } } + D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); + D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); spin_lock_irq(&mdev->data.work.q_lock); i = 0; @@ -1460,5 +1462,9 @@ int drbd_worker(struct drbd_thread *thi) dev_info(DEV, "worker terminated\n"); + clear_bit(DEVICE_DYING, &mdev->flags); + clear_bit(CONFIG_PENDING, &mdev->flags); + wake_up(&mdev->state_wait); + return 0; } |